Repository: alibaba/zvec
Branch: main
Commit: b49833bf56a0
Files: 988
Total size: 10.5 MB

Directory structure:
gitextract_w52102_2/

├── .clang-format
├── .git/
│   ├── HEAD
│   ├── config
│   ├── description
│   ├── hooks/
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   ├── sendemail-validate.sample
│   │   └── update.sample
│   ├── index
│   ├── info/
│   │   └── exclude
│   ├── logs/
│   │   ├── HEAD
│   │   └── refs/
│   │       ├── heads/
│   │       │   └── main
│   │       └── remotes/
│   │           └── origin/
│   │               └── HEAD
│   ├── objects/
│   │   └── pack/
│   │       ├── pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.idx
│   │       ├── pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.pack
│   │       ├── pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.promisor
│   │       └── pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.rev
│   ├── packed-refs
│   ├── refs/
│   │   ├── heads/
│   │   │   └── main
│   │   └── remotes/
│   │       └── origin/
│   │           └── HEAD
│   └── shallow
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── benchmark.yml
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   ├── enhancement.yml
│   │   ├── feature_request.yml
│   │   ├── integration.yml
│   │   └── profiling.yml
│   ├── codecov.yml
│   ├── dependabot.yml
│   └── workflows/
│       ├── 01-ci-pipeline.yml
│       ├── 02-lint-check.yml
│       ├── 03-macos-linux-build.yml
│       ├── 04-android-build.yml
│       ├── _build_wheel_job.yml
│       ├── build_test_wheel.yml
│       ├── build_wheel.yml
│       ├── continuous_bench.yml
│       ├── docker/
│       │   └── Dockerfile.linux_x64_glibc228
│       ├── nightly_coverage.yml
│       └── scripts/
│           └── run_vdb.sh
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── cmake/
│   ├── bazel.cmake
│   ├── option.cmake
│   └── utils.cmake
├── examples/
│   └── c++/
│       ├── CMakeLists.txt
│       ├── ailego/
│       │   └── main.cc
│       ├── core/
│       │   └── main.cc
│       └── db/
│           └── main.cc
├── pyproject.toml
├── python/
│   ├── tests/
│   │   ├── detail/
│   │   │   ├── distance_helper.py
│   │   │   ├── doc_helper.py
│   │   │   ├── fixture_helper.py
│   │   │   ├── params_helper.py
│   │   │   ├── support_helper.py
│   │   │   ├── test_collection_concurrency.py
│   │   │   ├── test_collection_create_and_open.py
│   │   │   ├── test_collection_ddl.py
│   │   │   ├── test_collection_dml.py
│   │   │   ├── test_collection_dql.py
│   │   │   ├── test_collection_exception.py
│   │   │   ├── test_collection_open.py
│   │   │   ├── test_collection_recall.py
│   │   │   └── test_db_config.py
│   │   ├── test_collection.py
│   │   ├── test_collection_hnsw_rabitq.py
│   │   ├── test_convert.py
│   │   ├── test_doc.py
│   │   ├── test_embedding.py
│   │   ├── test_params.py
│   │   ├── test_query_executor.py
│   │   ├── test_reranker.py
│   │   ├── test_schema.py
│   │   ├── test_typing.py
│   │   └── test_util.py
│   └── zvec/
│       ├── __init__.py
│       ├── __init__.pyi
│       ├── common/
│       │   ├── __init__.py
│       │   └── constants.py
│       ├── executor/
│       │   ├── __init__.py
│       │   └── query_executor.py
│       ├── extension/
│       │   ├── __init__.py
│       │   ├── bm25_embedding_function.py
│       │   ├── embedding_function.py
│       │   ├── http_embedding_function.py
│       │   ├── jina_embedding_function.py
│       │   ├── jina_function.py
│       │   ├── multi_vector_reranker.py
│       │   ├── openai_embedding_function.py
│       │   ├── openai_function.py
│       │   ├── qwen_embedding_function.py
│       │   ├── qwen_function.py
│       │   ├── qwen_rerank_function.py
│       │   ├── rerank_function.py
│       │   ├── sentence_transformer_embedding_function.py
│       │   ├── sentence_transformer_function.py
│       │   └── sentence_transformer_rerank_function.py
│       ├── model/
│       │   ├── __init__.py
│       │   ├── collection.py
│       │   ├── convert.py
│       │   ├── doc.py
│       │   ├── param/
│       │   │   ├── __init__.py
│       │   │   ├── __init__.pyi
│       │   │   └── vector_query.py
│       │   └── schema/
│       │       ├── __init__.py
│       │       ├── __init__.pyi
│       │       ├── collection_schema.py
│       │       └── field_schema.py
│       ├── py.typed
│       ├── tool/
│       │   ├── __init__.py
│       │   └── util.py
│       ├── typing/
│       │   ├── __init__.py
│       │   ├── __init__.pyi
│       │   └── enum.py
│       └── zvec.py
├── scripts/
│   ├── README.md
│   ├── build_android.sh
│   └── gcov.sh
├── src/
│   ├── CMakeLists.txt
│   ├── ailego/
│   │   ├── CMakeLists.txt
│   │   ├── algorithm/
│   │   │   ├── binary_quantizer.cc
│   │   │   ├── binary_quantizer.h
│   │   │   ├── integer_quantizer.cc
│   │   │   ├── integer_quantizer.h
│   │   │   ├── kmeans.h
│   │   │   └── lloyd_cluster.h
│   │   ├── buffer/
│   │   │   ├── buffer_manager.cc
│   │   │   └── buffer_pool.cc
│   │   ├── container/
│   │   │   ├── bitmap.cc
│   │   │   ├── bitmap.h
│   │   │   ├── bloom_filter.h
│   │   │   ├── params.cc
│   │   │   ├── reservoir.h
│   │   │   └── vector_array.h
│   │   ├── encoding/
│   │   │   └── json/
│   │   │       └── mod_json.c
│   │   ├── hash/
│   │   │   └── crc32c.cc
│   │   ├── internal/
│   │   │   ├── cpu_features.cc
│   │   │   └── cpu_features.h
│   │   ├── io/
│   │   │   ├── file.cc
│   │   │   ├── file_lock.cc
│   │   │   ├── file_lock.h
│   │   │   └── file_writer.h
│   │   ├── logger/
│   │   │   └── logger.cc
│   │   ├── math/
│   │   │   ├── cosine_distance_matrix.h
│   │   │   ├── distance.h
│   │   │   ├── distance_matrix.h
│   │   │   ├── distance_matrix_accum_fp16.i
│   │   │   ├── distance_matrix_accum_fp32.i
│   │   │   ├── distance_matrix_accum_int4.i
│   │   │   ├── distance_matrix_accum_int8.i
│   │   │   ├── distance_matrix_euclidean_utility.i
│   │   │   ├── distance_matrix_fp16.i
│   │   │   ├── distance_matrix_fp32.i
│   │   │   ├── distance_matrix_inner_product_utility.i
│   │   │   ├── distance_matrix_int32.i
│   │   │   ├── distance_matrix_int64.i
│   │   │   ├── distance_matrix_mips_utility.i
│   │   │   ├── distance_matrix_popcnt.i
│   │   │   ├── distance_utility.h
│   │   │   ├── euclidean_distance_matrix.h
│   │   │   ├── euclidean_distance_matrix_fp16_avx.cc
│   │   │   ├── euclidean_distance_matrix_fp16_avx512.cc
│   │   │   ├── euclidean_distance_matrix_fp16_avx512fp16.cc
│   │   │   ├── euclidean_distance_matrix_fp16_dispatch.cc
│   │   │   ├── euclidean_distance_matrix_fp16_neon.cc
│   │   │   ├── euclidean_distance_matrix_fp32_avx.cc
│   │   │   ├── euclidean_distance_matrix_fp32_avx512.cc
│   │   │   ├── euclidean_distance_matrix_fp32_dispatch.cc
│   │   │   ├── euclidean_distance_matrix_fp32_neon.cc
│   │   │   ├── euclidean_distance_matrix_fp32_sse.cc
│   │   │   ├── euclidean_distance_matrix_int4_avx2.cc
│   │   │   ├── euclidean_distance_matrix_int4_dispatch.cc
│   │   │   ├── euclidean_distance_matrix_int4_sse.cc
│   │   │   ├── euclidean_distance_matrix_int8_avx2.cc
│   │   │   ├── euclidean_distance_matrix_int8_dispatch.cc
│   │   │   ├── euclidean_distance_matrix_int8_sse.cc
│   │   │   ├── euclidean_distance_matrix_scalar.cc
│   │   │   ├── hamming_distance_matrix.cc
│   │   │   ├── hamming_distance_matrix.h
│   │   │   ├── inner_product_matrix.h
│   │   │   ├── inner_product_matrix_fp16_avx.cc
│   │   │   ├── inner_product_matrix_fp16_avx512.cc
│   │   │   ├── inner_product_matrix_fp16_avx512fp16.cc
│   │   │   ├── inner_product_matrix_fp16_dispatch.cc
│   │   │   ├── inner_product_matrix_fp16_neon.cc
│   │   │   ├── inner_product_matrix_fp32_avx.cc
│   │   │   ├── inner_product_matrix_fp32_avx512.cc
│   │   │   ├── inner_product_matrix_fp32_dispatch.cc
│   │   │   ├── inner_product_matrix_fp32_neon.cc
│   │   │   ├── inner_product_matrix_fp32_sse.cc
│   │   │   ├── inner_product_matrix_int4_avx2.cc
│   │   │   ├── inner_product_matrix_int4_dispatch.cc
│   │   │   ├── inner_product_matrix_int4_sse.cc
│   │   │   ├── inner_product_matrix_int8_avx2.cc
│   │   │   ├── inner_product_matrix_int8_dispatch.cc
│   │   │   ├── inner_product_matrix_int8_sse.cc
│   │   │   ├── inner_product_matrix_scalar.cc
│   │   │   ├── matrix_define.i
│   │   │   ├── matrix_utility.i
│   │   │   ├── mips_euclidean_distance_matrix.h
│   │   │   ├── mips_euclidean_distance_matrix_fp16_avx.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp16_avx512.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp16_dispatch.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp16_neon.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_avx.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_avx512.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_dispatch.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_neon.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_sse.cc
│   │   │   ├── mips_euclidean_distance_matrix_int4_avx2.cc
│   │   │   ├── mips_euclidean_distance_matrix_int4_dispatch.cc
│   │   │   ├── mips_euclidean_distance_matrix_int4_sse.cc
│   │   │   ├── mips_euclidean_distance_matrix_int8_avx2.cc
│   │   │   ├── mips_euclidean_distance_matrix_int8_dispatch.cc
│   │   │   ├── mips_euclidean_distance_matrix_int8_sse.cc
│   │   │   ├── mips_euclidean_distance_matrix_scalar.cc
│   │   │   ├── norm1_matrix.h
│   │   │   ├── norm1_matrix_fp16.cc
│   │   │   ├── norm1_matrix_fp32.cc
│   │   │   ├── norm2_matrix.h
│   │   │   ├── norm2_matrix_fp16.cc
│   │   │   ├── norm2_matrix_fp32.cc
│   │   │   ├── norm_matrix.h
│   │   │   ├── norm_matrix_fp16.i
│   │   │   ├── norm_matrix_fp32.i
│   │   │   ├── normalizer.cc
│   │   │   └── normalizer.h
│   │   ├── math_batch/
│   │   │   ├── cosine_distance_batch.h
│   │   │   ├── distance_batch.h
│   │   │   ├── inner_product_distance_batch.h
│   │   │   ├── inner_product_distance_batch_dispatch.cc
│   │   │   ├── inner_product_distance_batch_impl_fp16_avx2.cc
│   │   │   ├── inner_product_distance_batch_impl_fp16_avx512.cc
│   │   │   ├── inner_product_distance_batch_impl_fp16_avx512fp16.cc
│   │   │   ├── inner_product_distance_batch_impl_fp32_avx2.cc
│   │   │   ├── inner_product_distance_batch_impl_int8_avx2.cc
│   │   │   └── inner_product_distance_batch_impl_int8_avx512fp16.cc
│   │   ├── parallel/
│   │   │   ├── lock.h
│   │   │   ├── multi_thread_list.h
│   │   │   ├── semaphore.h
│   │   │   └── thread_pool.cc
│   │   ├── pattern/
│   │   │   ├── defer.h
│   │   │   └── scope_guard.h
│   │   ├── utility/
│   │   │   ├── bit_string_helper.h
│   │   │   ├── bitset_helper.cc
│   │   │   ├── bitset_helper.h
│   │   │   ├── concurrency_helper.cc
│   │   │   ├── concurrency_helper.h
│   │   │   ├── dl_helper.cc
│   │   │   ├── dl_helper.h
│   │   │   ├── file_helper.cc
│   │   │   ├── float_helper.cc
│   │   │   ├── math_helper.h
│   │   │   ├── matrix_helper.h
│   │   │   ├── memory_helper.cc
│   │   │   ├── memory_helper.h
│   │   │   ├── string_helper.cc
│   │   │   └── time_helper.cc
│   │   ├── version.cc
│   │   ├── version.h
│   │   └── version.i
│   ├── binding/
│   │   ├── CMakeLists.txt
│   │   └── python/
│   │       ├── CMakeLists.txt
│   │       ├── binding.cc
│   │       ├── exports.mac
│   │       ├── include/
│   │       │   ├── python_collection.h
│   │       │   ├── python_config.h
│   │       │   ├── python_doc.h
│   │       │   ├── python_param.h
│   │       │   ├── python_schema.h
│   │       │   └── python_type.h
│   │       ├── model/
│   │       │   ├── common/
│   │       │   │   └── python_config.cc
│   │       │   ├── param/
│   │       │   │   └── python_param.cc
│   │       │   ├── python_collection.cc
│   │       │   ├── python_doc.cc
│   │       │   └── schema/
│   │       │       └── python_schema.cc
│   │       └── typing/
│   │           └── python_type.cc
│   ├── core/
│   │   ├── CMakeLists.txt
│   │   ├── algorithm/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cluster/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── cluster_params.h
│   │   │   │   ├── kmeans_cluster.cc
│   │   │   │   ├── linear_seeker.cc
│   │   │   │   ├── linear_seeker.h
│   │   │   │   ├── opt_kmeans_cluster.cc
│   │   │   │   ├── seeker.h
│   │   │   │   ├── stratified_cluster.cc
│   │   │   │   ├── stratified_cluster_trainer.cc
│   │   │   │   ├── stratified_cluster_trainer.h
│   │   │   │   └── vector_mean.h
│   │   │   ├── flat/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── flat_builder.cc
│   │   │   │   ├── flat_builder.h
│   │   │   │   ├── flat_distance_matrix.h
│   │   │   │   ├── flat_index_format.h
│   │   │   │   ├── flat_searcher.cc
│   │   │   │   ├── flat_searcher.h
│   │   │   │   ├── flat_searcher_context.h
│   │   │   │   ├── flat_searcher_provider.h
│   │   │   │   ├── flat_streamer.cc
│   │   │   │   ├── flat_streamer.h
│   │   │   │   ├── flat_streamer_context.h
│   │   │   │   ├── flat_streamer_dumper.h
│   │   │   │   ├── flat_streamer_entity.cc
│   │   │   │   ├── flat_streamer_entity.h
│   │   │   │   ├── flat_streamer_provider.h
│   │   │   │   └── flat_utility.h
│   │   │   ├── flat_sparse/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── flat_sparse_builder.cc
│   │   │   │   ├── flat_sparse_builder.h
│   │   │   │   ├── flat_sparse_context.cc
│   │   │   │   ├── flat_sparse_context.h
│   │   │   │   ├── flat_sparse_entity.h
│   │   │   │   ├── flat_sparse_index_format.h
│   │   │   │   ├── flat_sparse_provider.h
│   │   │   │   ├── flat_sparse_search.h
│   │   │   │   ├── flat_sparse_searcher.cc
│   │   │   │   ├── flat_sparse_searcher.h
│   │   │   │   ├── flat_sparse_searcher_entity.cc
│   │   │   │   ├── flat_sparse_searcher_entity.h
│   │   │   │   ├── flat_sparse_streamer.cc
│   │   │   │   ├── flat_sparse_streamer.h
│   │   │   │   ├── flat_sparse_streamer_entity.cc
│   │   │   │   ├── flat_sparse_streamer_entity.h
│   │   │   │   └── flat_sparse_utility.h
│   │   │   ├── hnsw/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_algorithm.cc
│   │   │   │   ├── hnsw_algorithm.h
│   │   │   │   ├── hnsw_builder.cc
│   │   │   │   ├── hnsw_builder.h
│   │   │   │   ├── hnsw_builder_entity.cc
│   │   │   │   ├── hnsw_builder_entity.h
│   │   │   │   ├── hnsw_chunk.cc
│   │   │   │   ├── hnsw_chunk.h
│   │   │   │   ├── hnsw_context.cc
│   │   │   │   ├── hnsw_context.h
│   │   │   │   ├── hnsw_dist_calculator.h
│   │   │   │   ├── hnsw_entity.cc
│   │   │   │   ├── hnsw_entity.h
│   │   │   │   ├── hnsw_index_hash.h
│   │   │   │   ├── hnsw_index_provider.h
│   │   │   │   ├── hnsw_params.h
│   │   │   │   ├── hnsw_searcher.cc
│   │   │   │   ├── hnsw_searcher.h
│   │   │   │   ├── hnsw_searcher_entity.cc
│   │   │   │   ├── hnsw_searcher_entity.h
│   │   │   │   ├── hnsw_streamer.cc
│   │   │   │   ├── hnsw_streamer.h
│   │   │   │   ├── hnsw_streamer_entity.cc
│   │   │   │   └── hnsw_streamer_entity.h
│   │   │   ├── hnsw_rabitq/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_rabitq_algorithm.cc
│   │   │   │   ├── hnsw_rabitq_algorithm.h
│   │   │   │   ├── hnsw_rabitq_builder.cc
│   │   │   │   ├── hnsw_rabitq_builder.h
│   │   │   │   ├── hnsw_rabitq_builder_entity.cc
│   │   │   │   ├── hnsw_rabitq_builder_entity.h
│   │   │   │   ├── hnsw_rabitq_chunk.cc
│   │   │   │   ├── hnsw_rabitq_chunk.h
│   │   │   │   ├── hnsw_rabitq_context.cc
│   │   │   │   ├── hnsw_rabitq_context.h
│   │   │   │   ├── hnsw_rabitq_dist_calculator.cc
│   │   │   │   ├── hnsw_rabitq_dist_calculator.h
│   │   │   │   ├── hnsw_rabitq_entity.cc
│   │   │   │   ├── hnsw_rabitq_entity.h
│   │   │   │   ├── hnsw_rabitq_index_hash.h
│   │   │   │   ├── hnsw_rabitq_index_provider.h
│   │   │   │   ├── hnsw_rabitq_params.h
│   │   │   │   ├── hnsw_rabitq_query_algorithm.cc
│   │   │   │   ├── hnsw_rabitq_query_algorithm.h
│   │   │   │   ├── hnsw_rabitq_query_entity.h
│   │   │   │   ├── hnsw_rabitq_register.cc
│   │   │   │   ├── hnsw_rabitq_searcher.cc
│   │   │   │   ├── hnsw_rabitq_searcher.h
│   │   │   │   ├── hnsw_rabitq_searcher_entity.cc
│   │   │   │   ├── hnsw_rabitq_searcher_entity.h
│   │   │   │   ├── hnsw_rabitq_streamer.cc
│   │   │   │   ├── hnsw_rabitq_streamer.h
│   │   │   │   ├── hnsw_rabitq_streamer_entity.cc
│   │   │   │   ├── hnsw_rabitq_streamer_entity.h
│   │   │   │   ├── rabitq_converter.cc
│   │   │   │   ├── rabitq_converter.h
│   │   │   │   ├── rabitq_params.h
│   │   │   │   ├── rabitq_reformer.cc
│   │   │   │   ├── rabitq_reformer.h
│   │   │   │   ├── rabitq_utils.cc
│   │   │   │   └── rabitq_utils.h
│   │   │   ├── hnsw_sparse/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_sparse_algorithm.cc
│   │   │   │   ├── hnsw_sparse_algorithm.h
│   │   │   │   ├── hnsw_sparse_builder.cc
│   │   │   │   ├── hnsw_sparse_builder.h
│   │   │   │   ├── hnsw_sparse_builder_entity.cc
│   │   │   │   ├── hnsw_sparse_builder_entity.h
│   │   │   │   ├── hnsw_sparse_chunk.cc
│   │   │   │   ├── hnsw_sparse_chunk.h
│   │   │   │   ├── hnsw_sparse_context.cc
│   │   │   │   ├── hnsw_sparse_context.h
│   │   │   │   ├── hnsw_sparse_dist_calculator.h
│   │   │   │   ├── hnsw_sparse_entity.cc
│   │   │   │   ├── hnsw_sparse_entity.h
│   │   │   │   ├── hnsw_sparse_index_hash.h
│   │   │   │   ├── hnsw_sparse_index_provider.h
│   │   │   │   ├── hnsw_sparse_params.h
│   │   │   │   ├── hnsw_sparse_searcher.cc
│   │   │   │   ├── hnsw_sparse_searcher.h
│   │   │   │   ├── hnsw_sparse_searcher_entity.cc
│   │   │   │   ├── hnsw_sparse_searcher_entity.h
│   │   │   │   ├── hnsw_sparse_streamer.cc
│   │   │   │   ├── hnsw_sparse_streamer.h
│   │   │   │   ├── hnsw_sparse_streamer_entity.cc
│   │   │   │   └── hnsw_sparse_streamer_entity.h
│   │   │   └── ivf/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── ivf_builder.cc
│   │   │       ├── ivf_builder.h
│   │   │       ├── ivf_centroid_index.cc
│   │   │       ├── ivf_centroid_index.h
│   │   │       ├── ivf_distance_calculator.cc
│   │   │       ├── ivf_distance_calculator.h
│   │   │       ├── ivf_dumper.cc
│   │   │       ├── ivf_dumper.h
│   │   │       ├── ivf_entity.cc
│   │   │       ├── ivf_entity.h
│   │   │       ├── ivf_index_format.h
│   │   │       ├── ivf_index_provider.h
│   │   │       ├── ivf_params.h
│   │   │       ├── ivf_searcher.cc
│   │   │       ├── ivf_searcher.h
│   │   │       ├── ivf_searcher_context.h
│   │   │       ├── ivf_streamer.cc
│   │   │       ├── ivf_streamer.h
│   │   │       └── ivf_utility.h
│   │   ├── framework/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── index_cluster.cc
│   │   │   ├── index_context.cc
│   │   │   ├── index_converter.cc
│   │   │   ├── index_error.cc
│   │   │   ├── index_factory.cc
│   │   │   ├── index_flow.cc
│   │   │   ├── index_helper.cc
│   │   │   ├── index_logger.cc
│   │   │   ├── index_mapping.cc
│   │   │   ├── index_meta.cc
│   │   │   ├── index_plugin.cc
│   │   │   └── index_version.cc
│   │   ├── interface/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── index.cc
│   │   │   ├── index_factory.cc
│   │   │   ├── index_param.cc
│   │   │   ├── indexes/
│   │   │   │   ├── flat_index.cc
│   │   │   │   ├── hnsw_index.cc
│   │   │   │   ├── hnsw_rabitq_index.cc
│   │   │   │   └── ivf_index.cc
│   │   │   └── utils/
│   │   │       └── utils.h
│   │   ├── metric/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cosine_metric.cc
│   │   │   ├── euclidean_metric.cc
│   │   │   ├── hamming_metric.cc
│   │   │   ├── inner_product_metric.cc
│   │   │   ├── metric_params.h
│   │   │   ├── mips_euclidean_metric.cc
│   │   │   ├── quantized_integer_metric.cc
│   │   │   ├── quantized_integer_metric_batch.h
│   │   │   └── quantized_integer_metric_matrix.h
│   │   ├── mixed_reducer/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── mixed_reducer_params.h
│   │   │   ├── mixed_streamer_reducer.cc
│   │   │   └── mixed_streamer_reducer.h
│   │   ├── quantizer/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── binary_converter.cc
│   │   │   ├── binary_reformer.cc
│   │   │   ├── cosine_converter.cc
│   │   │   ├── cosine_reformer.cc
│   │   │   ├── half_float_converter.cc
│   │   │   ├── half_float_reformer.cc
│   │   │   ├── integer_quantizer_converter.cc
│   │   │   ├── integer_quantizer_reformer.cc
│   │   │   ├── mips_converter.cc
│   │   │   ├── mips_reformer.cc
│   │   │   ├── quantizer_params.h
│   │   │   └── record_quantizer.h
│   │   └── utility/
│   │       ├── CMakeLists.txt
│   │       ├── basic_refiner.cc
│   │       ├── buffer_storage.cc
│   │       ├── file_dumper.cc
│   │       ├── file_read_storage.cc
│   │       ├── memory_dumper.cc
│   │       ├── memory_read_storage.cc
│   │       ├── mmap_file_read_storage.cc
│   │       ├── mmap_file_storage.cc
│   │       ├── sparse_utility.h
│   │       ├── utility_params.h
│   │       └── visit_filter.h
│   ├── db/
│   │   ├── CMakeLists.txt
│   │   ├── collection.cc
│   │   ├── common/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cgroup_util.cc
│   │   │   ├── cgroup_util.h
│   │   │   ├── concurrent_roaring_bitmap.cc
│   │   │   ├── concurrent_roaring_bitmap.h
│   │   │   ├── config.cc
│   │   │   ├── constants.h
│   │   │   ├── error_code.cc
│   │   │   ├── error_code.h
│   │   │   ├── file_helper.cc
│   │   │   ├── file_helper.h
│   │   │   ├── global_resource.cc
│   │   │   ├── global_resource.h
│   │   │   ├── glogger.h
│   │   │   ├── logger.h
│   │   │   ├── profiler.h
│   │   │   ├── rocbsdb_context.cc
│   │   │   ├── rocksdb_context.h
│   │   │   ├── status.cc
│   │   │   ├── typedef.h
│   │   │   ├── utils.cc
│   │   │   └── utils.h
│   │   ├── index/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── column/
│   │   │   │   ├── column_indexer.h
│   │   │   │   ├── common/
│   │   │   │   │   └── index_results.h
│   │   │   │   ├── inverted_column/
│   │   │   │   │   ├── inverted_codec.h
│   │   │   │   │   ├── inverted_column_indexer.h
│   │   │   │   │   ├── inverted_column_indexer_search.cc
│   │   │   │   │   ├── inverted_column_indexer_util.cc
│   │   │   │   │   ├── inverted_column_indexer_write.cc
│   │   │   │   │   ├── inverted_doc_range.h
│   │   │   │   │   ├── inverted_indexer.cc
│   │   │   │   │   ├── inverted_indexer.h
│   │   │   │   │   ├── inverted_rocksdb_merger.h
│   │   │   │   │   └── inverted_search_result.h
│   │   │   │   └── vector_column/
│   │   │   │       ├── combined_vector_column_indexer.cc
│   │   │   │       ├── combined_vector_column_indexer.h
│   │   │   │       ├── engine_helper.hpp
│   │   │   │       ├── vector_column_indexer.cc
│   │   │   │       ├── vector_column_indexer.h
│   │   │   │       ├── vector_column_params.h
│   │   │   │       └── vector_index_results.h
│   │   │   ├── common/
│   │   │   │   ├── delete_store.h
│   │   │   │   ├── doc.cc
│   │   │   │   ├── id_map.cc
│   │   │   │   ├── id_map.h
│   │   │   │   ├── index_filter.h
│   │   │   │   ├── index_params.cc
│   │   │   │   ├── meta.h
│   │   │   │   ├── proto_converter.cc
│   │   │   │   ├── proto_converter.h
│   │   │   │   ├── schema.cc
│   │   │   │   ├── stats.cc
│   │   │   │   ├── type_helper.cc
│   │   │   │   ├── type_helper.h
│   │   │   │   ├── version_manager.cc
│   │   │   │   └── version_manager.h
│   │   │   ├── segment/
│   │   │   │   ├── column_merging_reader.cc
│   │   │   │   ├── column_merging_reader.h
│   │   │   │   ├── segment.cc
│   │   │   │   ├── segment.h
│   │   │   │   ├── segment_helper.cc
│   │   │   │   ├── segment_helper.h
│   │   │   │   ├── segment_manager.cc
│   │   │   │   ├── segment_manager.h
│   │   │   │   ├── sql_expr_parser.cc
│   │   │   │   └── sql_expr_parser.h
│   │   │   └── storage/
│   │   │       ├── arrow_ipc_writer.cc
│   │   │       ├── arrow_ipc_writer.h
│   │   │       ├── base_forward_store.h
│   │   │       ├── bufferpool_forward_store.cc
│   │   │       ├── bufferpool_forward_store.h
│   │   │       ├── chunked_file_writer.cc
│   │   │       ├── chunked_file_writer.h
│   │   │       ├── forward_writer.cc
│   │   │       ├── forward_writer.h
│   │   │       ├── lazy_record_batch_reader.h
│   │   │       ├── memory_forward_store.cc
│   │   │       ├── memory_forward_store.h
│   │   │       ├── mmap_forward_store.cc
│   │   │       ├── mmap_forward_store.h
│   │   │       ├── parquet_writer.cc
│   │   │       ├── parquet_writer.h
│   │   │       ├── store_helper.h
│   │   │       └── wal/
│   │   │           ├── local_wal_file.cc
│   │   │           ├── local_wal_file.h
│   │   │           ├── wal_file.cc
│   │   │           └── wal_file.h
│   │   ├── proto/
│   │   │   └── zvec.proto
│   │   └── sqlengine/
│   │       ├── CMakeLists.txt
│   │       ├── analyzer/
│   │       │   ├── query_analyzer.cc
│   │       │   ├── query_analyzer.h
│   │       │   ├── query_field_info.cc
│   │       │   ├── query_field_info.h
│   │       │   ├── query_info.cc
│   │       │   ├── query_info.h
│   │       │   ├── query_info_helper.cc
│   │       │   ├── query_info_helper.h
│   │       │   ├── query_node.cc
│   │       │   ├── query_node.h
│   │       │   ├── query_node_walker.cc
│   │       │   ├── query_node_walker.h
│   │       │   ├── query_orderby_info.cc
│   │       │   ├── query_orderby_info.h
│   │       │   ├── simple_rewriter.cc
│   │       │   └── simple_rewriter.h
│   │       ├── antlr/
│   │       │   ├── SQLLexer.g4
│   │       │   ├── SQLParser.g4
│   │       │   ├── gen/
│   │       │   │   ├── SQLLexer.cc
│   │       │   │   ├── SQLLexer.h
│   │       │   │   ├── SQLLexer.interp
│   │       │   │   ├── SQLLexer.tokens
│   │       │   │   ├── SQLParser.cc
│   │       │   │   ├── SQLParser.h
│   │       │   │   ├── SQLParser.interp
│   │       │   │   ├── SQLParser.tokens
│   │       │   │   ├── SQLParserBaseListener.cc
│   │       │   │   ├── SQLParserBaseListener.h
│   │       │   │   ├── SQLParserListener.cc
│   │       │   │   └── SQLParserListener.h
│   │       │   └── gen_parser.sh
│   │       ├── common/
│   │       │   ├── generic_node.h
│   │       │   ├── group_by.h
│   │       │   ├── util.cc
│   │       │   └── util.h
│   │       ├── parser/
│   │       │   ├── base_info.h
│   │       │   ├── case_changing_charstream.h
│   │       │   ├── error_verbose_listener.h
│   │       │   ├── node.cc
│   │       │   ├── node.h
│   │       │   ├── orderby_elem_info.h
│   │       │   ├── query_parser.cc
│   │       │   ├── query_parser.h
│   │       │   ├── select_info.cc
│   │       │   ├── select_info.h
│   │       │   ├── selected_elem_info.cc
│   │       │   ├── selected_elem_info.h
│   │       │   ├── sql_info.cc
│   │       │   ├── sql_info.h
│   │       │   ├── sql_info_helper.cc
│   │       │   ├── sql_info_helper.h
│   │       │   ├── zvec_cached_sql_parser.cc
│   │       │   ├── zvec_cached_sql_parser.h
│   │       │   ├── zvec_parser.cc
│   │       │   ├── zvec_parser.h
│   │       │   ├── zvec_sql_parser.cc
│   │       │   └── zvec_sql_parser.h
│   │       ├── planner/
│   │       │   ├── doc_filter.cc
│   │       │   ├── doc_filter.h
│   │       │   ├── invert_recall_node.cc
│   │       │   ├── invert_recall_node.h
│   │       │   ├── invert_search.cc
│   │       │   ├── invert_search.h
│   │       │   ├── op_register.cc
│   │       │   ├── op_register.h
│   │       │   ├── ops/
│   │       │   │   ├── check_not_filtered_op.cc
│   │       │   │   ├── check_not_filtered_op.h
│   │       │   │   ├── contain_op.cc
│   │       │   │   ├── contain_op.h
│   │       │   │   ├── fetch_vector_op.cc
│   │       │   │   └── fetch_vector_op.h
│   │       │   ├── optimizer.cc
│   │       │   ├── optimizer.h
│   │       │   ├── plan_info.cc
│   │       │   ├── plan_info.h
│   │       │   ├── query_planner.cc
│   │       │   ├── query_planner.h
│   │       │   ├── segment_node.cc
│   │       │   ├── segment_node.h
│   │       │   ├── vector_recall_node.cc
│   │       │   └── vector_recall_node.h
│   │       ├── sqlengine.cc
│   │       ├── sqlengine.h
│   │       ├── sqlengine_impl.cc
│   │       └── sqlengine_impl.h
│   ├── include/
│   │   └── zvec/
│   │       ├── ailego/
│   │       │   ├── buffer/
│   │       │   │   ├── buffer_manager.h
│   │       │   │   ├── buffer_pool.h
│   │       │   │   └── concurrentqueue.h
│   │       │   ├── container/
│   │       │   │   ├── blob.h
│   │       │   │   ├── cube.h
│   │       │   │   ├── heap.h
│   │       │   │   ├── hypercube.h
│   │       │   │   ├── params.h
│   │       │   │   └── vector.h
│   │       │   ├── encoding/
│   │       │   │   ├── json/
│   │       │   │   │   ├── mod_json.h
│   │       │   │   │   └── mod_json_plus.h
│   │       │   │   └── json.h
│   │       │   ├── hash/
│   │       │   │   ├── crc32c.h
│   │       │   │   └── jump_hash.h
│   │       │   ├── internal/
│   │       │   │   └── platform.h
│   │       │   ├── io/
│   │       │   │   ├── file.h
│   │       │   │   └── mmap_file.h
│   │       │   ├── logger/
│   │       │   │   └── logger.h
│   │       │   ├── math_batch/
│   │       │   │   └── utils.h
│   │       │   ├── parallel/
│   │       │   │   ├── thread_pool.h
│   │       │   │   └── thread_queue.h
│   │       │   ├── pattern/
│   │       │   │   ├── closure.h
│   │       │   │   ├── expected.hpp
│   │       │   │   ├── factory.h
│   │       │   │   └── singleton.h
│   │       │   ├── string/
│   │       │   │   ├── string_concat_helper.h
│   │       │   │   └── string_view.h
│   │       │   └── utility/
│   │       │       ├── file_helper.h
│   │       │       ├── float_helper.h
│   │       │       ├── string_helper.h
│   │       │       ├── string_helper_impl.h
│   │       │       ├── time_helper.h
│   │       │       └── type_helper.h
│   │       ├── core/
│   │       │   ├── framework/
│   │       │   │   ├── index_builder.h
│   │       │   │   ├── index_bundle.h
│   │       │   │   ├── index_cluster.h
│   │       │   │   ├── index_context.h
│   │       │   │   ├── index_converter.h
│   │       │   │   ├── index_document.h
│   │       │   │   ├── index_dumper.h
│   │       │   │   ├── index_error.h
│   │       │   │   ├── index_factory.h
│   │       │   │   ├── index_features.h
│   │       │   │   ├── index_filter.h
│   │       │   │   ├── index_flow.h
│   │       │   │   ├── index_format.h
│   │       │   │   ├── index_framework.h
│   │       │   │   ├── index_groupby.h
│   │       │   │   ├── index_helper.h
│   │       │   │   ├── index_holder.h
│   │       │   │   ├── index_logger.h
│   │       │   │   ├── index_mapping.h
│   │       │   │   ├── index_memory.h
│   │       │   │   ├── index_meta.h
│   │       │   │   ├── index_metric.h
│   │       │   │   ├── index_module.h
│   │       │   │   ├── index_packer.h
│   │       │   │   ├── index_plugin.h
│   │       │   │   ├── index_provider.h
│   │       │   │   ├── index_reducer.h
│   │       │   │   ├── index_refiner.h
│   │       │   │   ├── index_reformer.h
│   │       │   │   ├── index_runner.h
│   │       │   │   ├── index_searcher.h
│   │       │   │   ├── index_segment_storage.h
│   │       │   │   ├── index_stats.h
│   │       │   │   ├── index_storage.h
│   │       │   │   ├── index_streamer.h
│   │       │   │   ├── index_threads.h
│   │       │   │   ├── index_trainer.h
│   │       │   │   ├── index_unpacker.h
│   │       │   │   └── index_version.h
│   │       │   └── interface/
│   │       │       ├── constants.h
│   │       │       ├── index.h
│   │       │       ├── index_factory.h
│   │       │       ├── index_param.h
│   │       │       └── index_param_builders.h
│   │       ├── db/
│   │       │   ├── collection.h
│   │       │   ├── config.h
│   │       │   ├── doc.h
│   │       │   ├── index_params.h
│   │       │   ├── options.h
│   │       │   ├── query_params.h
│   │       │   ├── schema.h
│   │       │   ├── stats.h
│   │       │   ├── status.h
│   │       │   └── type.h
│   │       └── turbo/
│   │           └── turbo.h
│   └── turbo/
│       ├── CMakeLists.txt
│       ├── avx512_vnni/
│       │   └── record_quantized_int8/
│       │       ├── common.h
│       │       ├── cosine.cc
│       │       ├── cosine.h
│       │       ├── squared_euclidean.cc
│       │       └── squared_euclidean.h
│       └── turbo.cc
├── tests/
│   ├── CMakeLists.txt
│   ├── ailego/
│   │   ├── CMakeLists.txt
│   │   ├── algorithm/
│   │   │   ├── integer_quantizer_test.cc
│   │   │   └── kmeans_test.cc
│   │   ├── buffer/
│   │   │   └── buffer_manager_test.cc
│   │   ├── container/
│   │   │   ├── bitmap_test.cc
│   │   │   ├── blob_test.cc
│   │   │   ├── bloom_filter_test.cc
│   │   │   ├── cube_test.cc
│   │   │   ├── heap_test.cc
│   │   │   ├── hypercube_test.cc
│   │   │   ├── params_test.cc
│   │   │   ├── reservoir_test.cc
│   │   │   ├── vector_array_test.cc
│   │   │   └── vector_test.cc
│   │   ├── encoding/
│   │   │   └── json_parse_test.cc
│   │   ├── hash/
│   │   │   ├── crc32c_test.cc
│   │   │   └── jump_hash_test.cc
│   │   ├── internal/
│   │   │   └── cpu_features_test.cc
│   │   ├── io/
│   │   │   ├── file_lock_test.cc
│   │   │   ├── file_test.cc
│   │   │   └── mmap_file_test.cc
│   │   ├── logger/
│   │   │   └── logger_test.cc
│   │   ├── math/
│   │   │   ├── cosine_distance_matrix_fp16_test.cc
│   │   │   ├── cosine_distance_matrix_fp32_test.cc
│   │   │   ├── cosine_distance_matrix_int8_test.cc
│   │   │   ├── euclidean_distance_matrix_fp16_test.cc
│   │   │   ├── euclidean_distance_matrix_fp32_test.cc
│   │   │   ├── euclidean_distance_matrix_int4_test.cc
│   │   │   ├── euclidean_distance_matrix_int8_test.cc
│   │   │   ├── hamming_distance_matrix_test.cc
│   │   │   ├── inner_product_matrix_fp16_test.cc
│   │   │   ├── inner_product_matrix_fp32_test.cc
│   │   │   ├── inner_product_matrix_int4_test.cc
│   │   │   ├── inner_product_matrix_int8_test.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp16_test.cc
│   │   │   ├── mips_euclidean_distance_matrix_fp32_test.cc
│   │   │   ├── mips_euclidean_distance_matrix_int4_test.cc
│   │   │   ├── mips_euclidean_distance_matrix_int8_test.cc
│   │   │   ├── norm_matrix_fp16_test.cc
│   │   │   ├── norm_matrix_fp32_test.cc
│   │   │   ├── norm_matrix_int4_test.cc
│   │   │   ├── norm_matrix_int8_test.cc
│   │   │   └── normalizer_test.cc
│   │   ├── parallel/
│   │   │   ├── lock_test.cc
│   │   │   ├── multi_thread_list_test.cc
│   │   │   ├── semaphore_test.cc
│   │   │   ├── thread_pool_test.cc
│   │   │   └── thread_queue_test.cc
│   │   ├── pattern/
│   │   │   ├── closure_test.cc
│   │   │   ├── factory_test.cc
│   │   │   ├── scope_guard_test.cc
│   │   │   └── singleton_test.cc
│   │   ├── utility/
│   │   │   ├── bit_string_helper_test.cc
│   │   │   ├── bitset_helper_test.cc
│   │   │   ├── dl_helper_test.cc
│   │   │   ├── float_helper_test.cc
│   │   │   ├── matrix_helper_test.cc
│   │   │   ├── memory_helper_test.cc
│   │   │   ├── string_helper_test.cc
│   │   │   ├── time_helper_test.cc
│   │   │   └── type_helper_test.cc
│   │   └── version_test.cc
│   ├── core/
│   │   ├── CMakeLists.txt
│   │   ├── algorithm/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cluster/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── kmeans_cluster_test.cc
│   │   │   │   └── opt_kmeans_cluster_test.cc
│   │   │   ├── flat/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── flat_builder_test.cc
│   │   │   │   ├── flat_searcher_test.cpp
│   │   │   │   ├── flat_streamer_buffer_test.cc
│   │   │   │   ├── flat_streamer_buffer_time_test.cc
│   │   │   │   └── flat_streamer_test.cc
│   │   │   ├── flat_sparse/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── flat_sparse_builder_test.cc
│   │   │   │   ├── flat_sparse_searcher_test.cc
│   │   │   │   ├── flat_sparse_streamer_buffer_test.cc
│   │   │   │   └── flat_sparse_streamer_test.cc
│   │   │   ├── hnsw/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_builder_test.cc
│   │   │   │   ├── hnsw_searcher_test.cpp
│   │   │   │   ├── hnsw_streamer_buffer_test.cc
│   │   │   │   └── hnsw_streamer_test.cc
│   │   │   ├── hnsw_rabitq/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_rabitq_builder_test.cc
│   │   │   │   ├── hnsw_rabitq_searcher_test.cc
│   │   │   │   └── hnsw_rabitq_streamer_test.cc
│   │   │   ├── hnsw_sparse/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── hnsw_sparse_builder_test.cc
│   │   │   │   ├── hnsw_sparse_searcher_test.cpp
│   │   │   │   ├── hnsw_sparse_streamer_buffer_test.cpp
│   │   │   │   └── hnsw_sparse_streamer_test.cc
│   │   │   └── ivf/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── ivf_builder_test.cc
│   │   │       └── ivf_searcher_test.cc
│   │   ├── framework/
│   │   │   └── CMakeLists.txt
│   │   ├── interface/
│   │   │   ├── CMakeLists.txt
│   │   │   └── index_interface_test.cc
│   │   ├── metric/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── cosine_metric_test.cc
│   │   │   ├── euclidean_metric_test.cc
│   │   │   ├── hamming_metric_test.cc
│   │   │   ├── inner_product_metric_test.cc
│   │   │   └── quantized_integer_metric_test.cc
│   │   ├── quantizer/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── half_float_reformer_test.cc
│   │   │   └── integer_quantizer_reformer_test.cc
│   │   └── utility/
│   │       ├── CMakeLists.txt
│   │       ├── buffer_storage_test.cpp
│   │       ├── file_dumper_test.cc
│   │       ├── memory_dumper_test.cc
│   │       ├── mmap_file_container_test.cc
│   │       └── mmap_file_storage_test.cpp
│   └── db/
│       ├── CMakeLists.txt
│       ├── collection_test.cc
│       ├── common/
│       │   ├── CMakeLists.txt
│       │   ├── config_test.cc
│       │   └── status_test.cc
│       ├── crash_recovery/
│       │   ├── CMakeLists.txt
│       │   ├── data_generator.cc
│       │   ├── utility.h
│       │   └── write_recovery_test.cc
│       ├── index/
│       │   ├── CMakeLists.txt
│       │   ├── column/
│       │   │   ├── inverted_column/
│       │   │   │   ├── inverted_column_indexer_array_numbers_test.cc
│       │   │   │   ├── inverted_column_indexer_bool_test.cc
│       │   │   │   ├── inverted_column_indexer_cyclic_numbers_test.cc
│       │   │   │   ├── inverted_column_indexer_sequential_numbers_test.cc
│       │   │   │   ├── inverted_column_indexer_string_test.cc
│       │   │   │   └── inverted_indexer_util_test.cc
│       │   │   └── vector_column_indexer_test.cc
│       │   ├── common/
│       │   │   ├── db_proto_converter_test.cc
│       │   │   ├── db_type_helper_test.cc
│       │   │   ├── doc_test.cc
│       │   │   ├── index_params_test.cc
│       │   │   ├── meta_test.cc
│       │   │   ├── query_params_test.cc
│       │   │   ├── schema_test.cc
│       │   │   └── version_manager_test.cc
│       │   ├── segment/
│       │   │   ├── column_merging_reader_test.cc
│       │   │   ├── segment_helper_test.cc
│       │   │   ├── segment_test.cc
│       │   │   ├── sql_expr_parser_test.cc
│       │   │   └── sql_expr_validator_test.cc
│       │   ├── storage/
│       │   │   ├── arrow_ipc_writer_test.cc
│       │   │   ├── bufferpool_store_test.cc
│       │   │   ├── mem_store_test.cc
│       │   │   ├── mmap_store_test.cc
│       │   │   ├── parquet_writer_test.cc
│       │   │   └── wal_file_test.cc
│       │   └── utils/
│       │       ├── utils.cc
│       │       └── utils.h
│       └── sqlengine/
│           ├── CMakeLists.txt
│           ├── contain_test.cc
│           ├── forward_recall_test.cc
│           ├── invert_recall_test.cc
│           ├── like_test.cc
│           ├── mock_segment.h
│           ├── optimizer_test.cc
│           ├── query_info_test.cc
│           ├── recall_base.h
│           ├── simple_rewriter_test.cc
│           ├── sqlengine_test.cc
│           ├── test_helper.h
│           └── vector_recall_test.cc
├── thirdparty/
│   ├── CMakeLists.txt
│   ├── CRoaring/
│   │   └── CMakeLists.txt
│   ├── RaBitQ-Library/
│   │   └── CMakeLists.txt
│   ├── antlr/
│   │   ├── CMakeLists.txt
│   │   └── antlr4.patch
│   ├── arrow/
│   │   ├── CMakeLists.txt
│   │   ├── arrow.android.patch
│   │   └── arrow.patch
│   ├── gflags/
│   │   └── CMakeLists.txt
│   ├── glog/
│   │   ├── CMakeLists.txt
│   │   ├── glog.android.patch
│   │   └── glog.patch
│   ├── googletest/
│   │   └── CMakeLists.txt
│   ├── lz4/
│   │   └── CMakeLists.txt
│   ├── magic_enum/
│   │   └── CMakeLists.txt
│   ├── protobuf/
│   │   └── CMakeLists.txt
│   ├── rocksdb/
│   │   ├── CMakeLists.txt
│   │   └── rocksdb.android.patch
│   ├── sparsehash/
│   │   ├── CMakeLists.txt
│   │   └── sparseconfig.h
│   └── yaml-cpp/
│       └── CMakeLists.txt
└── tools/
    ├── CMakeLists.txt
    └── core/
        ├── CMakeLists.txt
        ├── README.md
        ├── bench.cc
        ├── bench_original.cc
        ├── bench_result.h
        ├── convert_cohere_parquet.py
        ├── filter_result_cache.h
        ├── flow.h
        ├── helper.h
        ├── index_meta_helper.h
        ├── local_builder.cc
        ├── local_builder_original.cc
        ├── meta_segment_common.h
        ├── recall.cc
        ├── recall_original.cc
        ├── txt2vecs.cc
        ├── txt_input_reader.h
        ├── vecs_common.h
        ├── vecs_index_holder.h
        └── vecs_reader.h

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
# http://clang.llvm.org/docs/ClangFormatStyleOptions.html
# Defines the Google C++ style for automatic reformatting.
BasedOnStyle: Google
MaxEmptyLinesToKeep: 2
DerivePointerAlignment: false
PointerAlignment: Right
AllowShortFunctionsOnASingleLine: Empty
IncludeBlocks: Merge
IncludeCategories:
  - Regex:           '^<linux/[0-9A-Za-z]+'
    Priority:        100
  - Regex:           '^<mach/[0-9A-Za-z]+'
    Priority:        101
  - Regex:           '^<mach-o/[0-9A-Za-z]+'
    Priority:        102
  - Regex:           '^<sys/[0-9A-Za-z]+'
    Priority:        103
  - Regex:           '^<[0-9A-Za-z]+\.h>$'
    Priority:        200
  - Regex:           '^<[0-9A-Za-z_]+>$'
    Priority:        201
  - Regex:           '^<[0-9A-Za-z_]+\.[0-9A-Za-z]+>$'
    Priority:        202
  - Regex:           '^<[0-9A-Za-z_]+/[0-9A-Za-z]+'
    Priority:        203
  - Regex:           '^\"[0-9A-Za-z_]+/[0-9A-Za-z]+'
    Priority:        300
  - Regex:           '^\"[0-9A-Za-z_]+\.[0-9A-Za-z]+\"$'
    Priority:        301
  - Regex:           '.*'
    Priority:        1000


================================================
FILE: .git/HEAD
================================================
ref: refs/heads/main


================================================
FILE: .git/config
================================================
[core]
	repositoryformatversion = 1
	filemode = true
	bare = false
	logallrefupdates = true
[remote "origin"]
	url = https://github.com/alibaba/zvec
	tagOpt = --no-tags
	fetch = +refs/heads/main:refs/remotes/origin/main
	promisor = true
	partialclonefilter = blob:limit=1048576
[branch "main"]
	remote = origin
	merge = refs/heads/main


================================================
FILE: .git/description
================================================
Unnamed repository; edit this file 'description' to name the repository.


================================================
FILE: .git/hooks/applypatch-msg.sample
================================================
#!/bin/sh
#
# An example hook script to check the commit log message taken by
# applypatch from an e-mail message.
#
# The hook should exit with non-zero status after issuing an
# appropriate message if it wants to stop the commit.  The hook is
# allowed to edit the commit message file.
#
# To enable this hook, rename this file to "applypatch-msg".

. git-sh-setup
commitmsg="$(git rev-parse --git-path hooks/commit-msg)"
test -x "$commitmsg" && exec "$commitmsg" ${1+"$@"}
:


================================================
FILE: .git/hooks/commit-msg.sample
================================================
#!/bin/sh
#
# An example hook script to check the commit log message.
# Called by "git commit" with one argument, the name of the file
# that has the commit message.  The hook should exit with non-zero
# status after issuing an appropriate message if it wants to stop the
# commit.  The hook is allowed to edit the commit message file.
#
# To enable this hook, rename this file to "commit-msg".

# Uncomment the below to add a Signed-off-by line to the message.
# Doing this in a hook is a bad idea in general, but the prepare-commit-msg
# hook is more suited to it.
#
# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"

# This example catches duplicate Signed-off-by lines.

test "" = "$(grep '^Signed-off-by: ' "$1" |
	 sort | uniq -c | sed -e '/^[ 	]*1[ 	]/d')" || {
	echo >&2 Duplicate Signed-off-by lines.
	exit 1
}


================================================
FILE: .git/hooks/fsmonitor-watchman.sample
================================================
#!/usr/bin/perl

use strict;
use warnings;
use IPC::Open2;

# An example hook script to integrate Watchman
# (https://facebook.github.io/watchman/) with git to speed up detecting
# new and modified files.
#
# The hook is passed a version (currently 2) and last update token
# formatted as a string and outputs to stdout a new update token and
# all files that have been modified since the update token. Paths must
# be relative to the root of the working tree and separated by a single NUL.
#
# To enable this hook, rename this file to "query-watchman" and set
# 'git config core.fsmonitor .git/hooks/query-watchman'
#
my ($version, $last_update_token) = @ARGV;

# Uncomment for debugging
# print STDERR "$0 $version $last_update_token\n";

# Check the hook interface version
if ($version ne 2) {
	die "Unsupported query-fsmonitor hook version '$version'.\n" .
	    "Falling back to scanning...\n";
}

my $git_work_tree = get_working_dir();

my $retry = 1;

my $json_pkg;
eval {
	require JSON::XS;
	$json_pkg = "JSON::XS";
	1;
} or do {
	require JSON::PP;
	$json_pkg = "JSON::PP";
};

launch_watchman();

sub launch_watchman {
	my $o = watchman_query();
	if (is_work_tree_watched($o)) {
		output_result($o->{clock}, @{$o->{files}});
	}
}

sub output_result {
	my ($clockid, @files) = @_;

	# Uncomment for debugging watchman output
	# open (my $fh, ">", ".git/watchman-output.out");
	# binmode $fh, ":utf8";
	# print $fh "$clockid\n@files\n";
	# close $fh;

	binmode STDOUT, ":utf8";
	print $clockid;
	print "\0";
	local $, = "\0";
	print @files;
}

sub watchman_clock {
	my $response = qx/watchman clock "$git_work_tree"/;
	die "Failed to get clock id on '$git_work_tree'.\n" .
		"Falling back to scanning...\n" if $? != 0;

	return $json_pkg->new->utf8->decode($response);
}

sub watchman_query {
	my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty')
	or die "open2() failed: $!\n" .
	"Falling back to scanning...\n";

	# In the query expression below we're asking for names of files that
	# changed since $last_update_token but not from the .git folder.
	#
	# To accomplish this, we're using the "since" generator to use the
	# recency index to select candidate nodes and "fields" to limit the
	# output to file names only. Then we're using the "expression" term to
	# further constrain the results.
	my $last_update_line = "";
	if (substr($last_update_token, 0, 1) eq "c") {
		$last_update_token = "\"$last_update_token\"";
		$last_update_line = qq[\n"since": $last_update_token,];
	}
	my $query = <<"	END";
		["query", "$git_work_tree", {$last_update_line
			"fields": ["name"],
			"expression": ["not", ["dirname", ".git"]]
		}]
	END

	# Uncomment for debugging the watchman query
	# open (my $fh, ">", ".git/watchman-query.json");
	# print $fh $query;
	# close $fh;

	print CHLD_IN $query;
	close CHLD_IN;
	my $response = do {local $/; <CHLD_OUT>};

	# Uncomment for debugging the watch response
	# open ($fh, ">", ".git/watchman-response.json");
	# print $fh $response;
	# close $fh;

	die "Watchman: command returned no output.\n" .
	"Falling back to scanning...\n" if $response eq "";
	die "Watchman: command returned invalid output: $response\n" .
	"Falling back to scanning...\n" unless $response =~ /^\{/;

	return $json_pkg->new->utf8->decode($response);
}

sub is_work_tree_watched {
	my ($output) = @_;
	my $error = $output->{error};
	if ($retry > 0 and $error and $error =~ m/unable to resolve root .* directory (.*) is not watched/) {
		$retry--;
		my $response = qx/watchman watch "$git_work_tree"/;
		die "Failed to make watchman watch '$git_work_tree'.\n" .
		    "Falling back to scanning...\n" if $? != 0;
		$output = $json_pkg->new->utf8->decode($response);
		$error = $output->{error};
		die "Watchman: $error.\n" .
		"Falling back to scanning...\n" if $error;

		# Uncomment for debugging watchman output
		# open (my $fh, ">", ".git/watchman-output.out");
		# close $fh;

		# Watchman will always return all files on the first query so
		# return the fast "everything is dirty" flag to git and do the
		# Watchman query just to get it over with now so we won't pay
		# the cost in git to look up each individual file.
		my $o = watchman_clock();
		$error = $output->{error};

		die "Watchman: $error.\n" .
		"Falling back to scanning...\n" if $error;

		output_result($o->{clock}, ("/"));
		$last_update_token = $o->{clock};

		eval { launch_watchman() };
		return 0;
	}

	die "Watchman: $error.\n" .
	"Falling back to scanning...\n" if $error;

	return 1;
}

sub get_working_dir {
	my $working_dir;
	if ($^O =~ 'msys' || $^O =~ 'cygwin') {
		$working_dir = Win32::GetCwd();
		$working_dir =~ tr/\\/\//;
	} else {
		require Cwd;
		$working_dir = Cwd::cwd();
	}

	return $working_dir;
}


================================================
FILE: .git/hooks/post-update.sample
================================================
#!/bin/sh
#
# An example hook script to prepare a packed repository for use over
# dumb transports.
#
# To enable this hook, rename this file to "post-update".

exec git update-server-info


================================================
FILE: .git/hooks/pre-applypatch.sample
================================================
#!/bin/sh
#
# An example hook script to verify what is about to be committed
# by applypatch from an e-mail message.
#
# The hook should exit with non-zero status after issuing an
# appropriate message if it wants to stop the commit.
#
# To enable this hook, rename this file to "pre-applypatch".

. git-sh-setup
precommit="$(git rev-parse --git-path hooks/pre-commit)"
test -x "$precommit" && exec "$precommit" ${1+"$@"}
:


================================================
FILE: .git/hooks/pre-commit.sample
================================================
#!/bin/sh
#
# An example hook script to verify what is about to be committed.
# Called by "git commit" with no arguments.  The hook should
# exit with non-zero status after issuing an appropriate message if
# it wants to stop the commit.
#
# To enable this hook, rename this file to "pre-commit".

if git rev-parse --verify HEAD >/dev/null 2>&1
then
	against=HEAD
else
	# Initial commit: diff against an empty tree object
	against=$(git hash-object -t tree /dev/null)
fi

# If you want to allow non-ASCII filenames set this variable to true.
allownonascii=$(git config --type=bool hooks.allownonascii)

# Redirect output to stderr.
exec 1>&2

# Cross platform projects tend to avoid non-ASCII filenames; prevent
# them from being added to the repository. We exploit the fact that the
# printable range starts at the space character and ends with tilde.
if [ "$allownonascii" != "true" ] &&
	# Note that the use of brackets around a tr range is ok here, (it's
	# even required, for portability to Solaris 10's /usr/bin/tr), since
	# the square bracket bytes happen to fall in the designated range.
	test $(git diff-index --cached --name-only --diff-filter=A -z $against |
	  LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0
then
	cat <<\EOF
Error: Attempt to add a non-ASCII file name.

This can cause problems if you want to work with people on other platforms.

To be portable it is advisable to rename the file.

If you know what you are doing you can disable this check using:

  git config hooks.allownonascii true
EOF
	exit 1
fi

# If there are whitespace errors, print the offending file names and fail.
exec git diff-index --check --cached $against --


================================================
FILE: .git/hooks/pre-merge-commit.sample
================================================
#!/bin/sh
#
# An example hook script to verify what is about to be committed.
# Called by "git merge" with no arguments.  The hook should
# exit with non-zero status after issuing an appropriate message to
# stderr if it wants to stop the merge commit.
#
# To enable this hook, rename this file to "pre-merge-commit".

. git-sh-setup
test -x "$GIT_DIR/hooks/pre-commit" &&
        exec "$GIT_DIR/hooks/pre-commit"
:


================================================
FILE: .git/hooks/pre-push.sample
================================================
#!/bin/sh

# An example hook script to verify what is about to be pushed.  Called by "git
# push" after it has checked the remote status, but before anything has been
# pushed.  If this script exits with a non-zero status nothing will be pushed.
#
# This hook is called with the following parameters:
#
# $1 -- Name of the remote to which the push is being done
# $2 -- URL to which the push is being done
#
# If pushing without using a named remote those arguments will be equal.
#
# Information about the commits which are being pushed is supplied as lines to
# the standard input in the form:
#
#   <local ref> <local oid> <remote ref> <remote oid>
#
# This sample shows how to prevent push of commits where the log message starts
# with "WIP" (work in progress).

remote="$1"
url="$2"

zero=$(git hash-object --stdin </dev/null | tr '[0-9a-f]' '0')

while read local_ref local_oid remote_ref remote_oid
do
	if test "$local_oid" = "$zero"
	then
		# Handle delete
		:
	else
		if test "$remote_oid" = "$zero"
		then
			# New branch, examine all commits
			range="$local_oid"
		else
			# Update to existing branch, examine new commits
			range="$remote_oid..$local_oid"
		fi

		# Check for WIP commit
		commit=$(git rev-list -n 1 --grep '^WIP' "$range")
		if test -n "$commit"
		then
			echo >&2 "Found WIP commit in $local_ref, not pushing"
			exit 1
		fi
	fi
done

exit 0


================================================
FILE: .git/hooks/pre-rebase.sample
================================================
#!/bin/sh
#
# Copyright (c) 2006, 2008 Junio C Hamano
#
# The "pre-rebase" hook is run just before "git rebase" starts doing
# its job, and can prevent the command from running by exiting with
# non-zero status.
#
# The hook is called with the following parameters:
#
# $1 -- the upstream the series was forked from.
# $2 -- the branch being rebased (or empty when rebasing the current branch).
#
# This sample shows how to prevent topic branches that are already
# merged to 'next' branch from getting rebased, because allowing it
# would result in rebasing already published history.

publish=next
basebranch="$1"
if test "$#" = 2
then
	topic="refs/heads/$2"
else
	topic=`git symbolic-ref HEAD` ||
	exit 0 ;# we do not interrupt rebasing detached HEAD
fi

case "$topic" in
refs/heads/??/*)
	;;
*)
	exit 0 ;# we do not interrupt others.
	;;
esac

# Now we are dealing with a topic branch being rebased
# on top of master.  Is it OK to rebase it?

# Does the topic really exist?
git show-ref -q "$topic" || {
	echo >&2 "No such branch $topic"
	exit 1
}

# Is topic fully merged to master?
not_in_master=`git rev-list --pretty=oneline ^master "$topic"`
if test -z "$not_in_master"
then
	echo >&2 "$topic is fully merged to master; better remove it."
	exit 1 ;# we could allow it, but there is no point.
fi

# Is topic ever merged to next?  If so you should not be rebasing it.
only_next_1=`git rev-list ^master "^$topic" ${publish} | sort`
only_next_2=`git rev-list ^master           ${publish} | sort`
if test "$only_next_1" = "$only_next_2"
then
	not_in_topic=`git rev-list "^$topic" master`
	if test -z "$not_in_topic"
	then
		echo >&2 "$topic is already up to date with master"
		exit 1 ;# we could allow it, but there is no point.
	else
		exit 0
	fi
else
	not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"`
	/usr/bin/perl -e '
		my $topic = $ARGV[0];
		my $msg = "* $topic has commits already merged to public branch:\n";
		my (%not_in_next) = map {
			/^([0-9a-f]+) /;
			($1 => 1);
		} split(/\n/, $ARGV[1]);
		for my $elem (map {
				/^([0-9a-f]+) (.*)$/;
				[$1 => $2];
			} split(/\n/, $ARGV[2])) {
			if (!exists $not_in_next{$elem->[0]}) {
				if ($msg) {
					print STDERR $msg;
					undef $msg;
				}
				print STDERR " $elem->[1]\n";
			}
		}
	' "$topic" "$not_in_next" "$not_in_master"
	exit 1
fi

<<\DOC_END

This sample hook safeguards topic branches that have been
published from being rewound.

The workflow assumed here is:

 * Once a topic branch forks from "master", "master" is never
   merged into it again (either directly or indirectly).

 * Once a topic branch is fully cooked and merged into "master",
   it is deleted.  If you need to build on top of it to correct
   earlier mistakes, a new topic branch is created by forking at
   the tip of the "master".  This is not strictly necessary, but
   it makes it easier to keep your history simple.

 * Whenever you need to test or publish your changes to topic
   branches, merge them into "next" branch.

The script, being an example, hardcodes the publish branch name
to be "next", but it is trivial to make it configurable via
$GIT_DIR/config mechanism.

With this workflow, you would want to know:

(1) ... if a topic branch has ever been merged to "next".  Young
    topic branches can have stupid mistakes you would rather
    clean up before publishing, and things that have not been
    merged into other branches can be easily rebased without
    affecting other people.  But once it is published, you would
    not want to rewind it.

(2) ... if a topic branch has been fully merged to "master".
    Then you can delete it.  More importantly, you should not
    build on top of it -- other people may already want to
    change things related to the topic as patches against your
    "master", so if you need further changes, it is better to
    fork the topic (perhaps with the same name) afresh from the
    tip of "master".

Let's look at this example:

		   o---o---o---o---o---o---o---o---o---o "next"
		  /       /           /           /
		 /   a---a---b A     /           /
		/   /               /           /
	       /   /   c---c---c---c B         /
	      /   /   /             \         /
	     /   /   /   b---b C     \       /
	    /   /   /   /             \     /
    ---o---o---o---o---o---o---o---o---o---o---o "master"


A, B and C are topic branches.

 * A has one fix since it was merged up to "next".

 * B has finished.  It has been fully merged up to "master" and "next",
   and is ready to be deleted.

 * C has not merged to "next" at all.

We would want to allow C to be rebased, refuse A, and encourage
B to be deleted.

To compute (1):

	git rev-list ^master ^topic next
	git rev-list ^master        next

	if these match, topic has not merged in next at all.

To compute (2):

	git rev-list master..topic

	if this is empty, it is fully merged to "master".

DOC_END


================================================
FILE: .git/hooks/pre-receive.sample
================================================
#!/bin/sh
#
# An example hook script to make use of push options.
# The example simply echoes all push options that start with 'echoback='
# and rejects all pushes when the "reject" push option is used.
#
# To enable this hook, rename this file to "pre-receive".

if test -n "$GIT_PUSH_OPTION_COUNT"
then
	i=0
	while test "$i" -lt "$GIT_PUSH_OPTION_COUNT"
	do
		eval "value=\$GIT_PUSH_OPTION_$i"
		case "$value" in
		echoback=*)
			echo "echo from the pre-receive-hook: ${value#*=}" >&2
			;;
		reject)
			exit 1
		esac
		i=$((i + 1))
	done
fi


================================================
FILE: .git/hooks/prepare-commit-msg.sample
================================================
#!/bin/sh
#
# An example hook script to prepare the commit log message.
# Called by "git commit" with the name of the file that has the
# commit message, followed by the description of the commit
# message's source.  The hook's purpose is to edit the commit
# message file.  If the hook fails with a non-zero status,
# the commit is aborted.
#
# To enable this hook, rename this file to "prepare-commit-msg".

# This hook includes three examples. The first one removes the
# "# Please enter the commit message..." help message.
#
# The second includes the output of "git diff --name-status -r"
# into the message, just before the "git status" output.  It is
# commented because it doesn't cope with --amend or with squashed
# commits.
#
# The third example adds a Signed-off-by line to the message, that can
# still be edited.  This is rarely a good idea.

COMMIT_MSG_FILE=$1
COMMIT_SOURCE=$2
SHA1=$3

/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE"

# case "$COMMIT_SOURCE,$SHA1" in
#  ,|template,)
#    /usr/bin/perl -i.bak -pe '
#       print "\n" . `git diff --cached --name-status -r`
# 	 if /^#/ && $first++ == 0' "$COMMIT_MSG_FILE" ;;
#  *) ;;
# esac

# SOB=$(git var GIT_COMMITTER_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
# git interpret-trailers --in-place --trailer "$SOB" "$COMMIT_MSG_FILE"
# if test -z "$COMMIT_SOURCE"
# then
#   /usr/bin/perl -i.bak -pe 'print "\n" if !$first_line++' "$COMMIT_MSG_FILE"
# fi


================================================
FILE: .git/hooks/push-to-checkout.sample
================================================
#!/bin/sh

# An example hook script to update a checked-out tree on a git push.
#
# This hook is invoked by git-receive-pack(1) when it reacts to git
# push and updates reference(s) in its repository, and when the push
# tries to update the branch that is currently checked out and the
# receive.denyCurrentBranch configuration variable is set to
# updateInstead.
#
# By default, such a push is refused if the working tree and the index
# of the remote repository has any difference from the currently
# checked out commit; when both the working tree and the index match
# the current commit, they are updated to match the newly pushed tip
# of the branch. This hook is to be used to override the default
# behaviour; however the code below reimplements the default behaviour
# as a starting point for convenient modification.
#
# The hook receives the commit with which the tip of the current
# branch is going to be updated:
commit=$1

# It can exit with a non-zero status to refuse the push (when it does
# so, it must not modify the index or the working tree).
die () {
	echo >&2 "$*"
	exit 1
}

# Or it can make any necessary changes to the working tree and to the
# index to bring them to the desired state when the tip of the current
# branch is updated to the new commit, and exit with a zero status.
#
# For example, the hook can simply run git read-tree -u -m HEAD "$1"
# in order to emulate git fetch that is run in the reverse direction
# with git push, as the two-tree form of git read-tree -u -m is
# essentially the same as git switch or git checkout that switches
# branches while keeping the local changes in the working tree that do
# not interfere with the difference between the branches.

# The below is a more-or-less exact translation to shell of the C code
# for the default behaviour for git's push-to-checkout hook defined in
# the push_to_deploy() function in builtin/receive-pack.c.
#
# Note that the hook will be executed from the repository directory,
# not from the working tree, so if you want to perform operations on
# the working tree, you will have to adapt your code accordingly, e.g.
# by adding "cd .." or using relative paths.

if ! git update-index -q --ignore-submodules --refresh
then
	die "Up-to-date check failed"
fi

if ! git diff-files --quiet --ignore-submodules --
then
	die "Working directory has unstaged changes"
fi

# This is a rough translation of:
#
#   head_has_history() ? "HEAD" : EMPTY_TREE_SHA1_HEX
if git cat-file -e HEAD 2>/dev/null
then
	head=HEAD
else
	head=$(git hash-object -t tree --stdin </dev/null)
fi

if ! git diff-index --quiet --cached --ignore-submodules $head --
then
	die "Working directory has staged changes"
fi

if ! git read-tree -u -m "$commit"
then
	die "Could not update working tree to new HEAD"
fi


================================================
FILE: .git/hooks/sendemail-validate.sample
================================================
#!/bin/sh

# An example hook script to validate a patch (and/or patch series) before
# sending it via email.
#
# The hook should exit with non-zero status after issuing an appropriate
# message if it wants to prevent the email(s) from being sent.
#
# To enable this hook, rename this file to "sendemail-validate".
#
# By default, it will only check that the patch(es) can be applied on top of
# the default upstream branch without conflicts in a secondary worktree. After
# validation (successful or not) of the last patch of a series, the worktree
# will be deleted.
#
# The following config variables can be set to change the default remote and
# remote ref that are used to apply the patches against:
#
#   sendemail.validateRemote (default: origin)
#   sendemail.validateRemoteRef (default: HEAD)
#
# Replace the TODO placeholders with appropriate checks according to your
# needs.

validate_cover_letter () {
	file="$1"
	# TODO: Replace with appropriate checks (e.g. spell checking).
	true
}

validate_patch () {
	file="$1"
	# Ensure that the patch applies without conflicts.
	git am -3 "$file" || return
	# TODO: Replace with appropriate checks for this patch
	# (e.g. checkpatch.pl).
	true
}

validate_series () {
	# TODO: Replace with appropriate checks for the whole series
	# (e.g. quick build, coding style checks, etc.).
	true
}

# main -------------------------------------------------------------------------

if test "$GIT_SENDEMAIL_FILE_COUNTER" = 1
then
	remote=$(git config --default origin --get sendemail.validateRemote) &&
	ref=$(git config --default HEAD --get sendemail.validateRemoteRef) &&
	worktree=$(mktemp --tmpdir -d sendemail-validate.XXXXXXX) &&
	git worktree add -fd --checkout "$worktree" "refs/remotes/$remote/$ref" &&
	git config --replace-all sendemail.validateWorktree "$worktree"
else
	worktree=$(git config --get sendemail.validateWorktree)
fi || {
	echo "sendemail-validate: error: failed to prepare worktree" >&2
	exit 1
}

unset GIT_DIR GIT_WORK_TREE
cd "$worktree" &&

if grep -q "^diff --git " "$1"
then
	validate_patch "$1"
else
	validate_cover_letter "$1"
fi &&

if test "$GIT_SENDEMAIL_FILE_COUNTER" = "$GIT_SENDEMAIL_FILE_TOTAL"
then
	git config --unset-all sendemail.validateWorktree &&
	trap 'git worktree remove -ff "$worktree"' EXIT &&
	validate_series
fi


================================================
FILE: .git/hooks/update.sample
================================================
#!/bin/sh
#
# An example hook script to block unannotated tags from entering.
# Called by "git receive-pack" with arguments: refname sha1-old sha1-new
#
# To enable this hook, rename this file to "update".
#
# Config
# ------
# hooks.allowunannotated
#   This boolean sets whether unannotated tags will be allowed into the
#   repository.  By default they won't be.
# hooks.allowdeletetag
#   This boolean sets whether deleting tags will be allowed in the
#   repository.  By default they won't be.
# hooks.allowmodifytag
#   This boolean sets whether a tag may be modified after creation. By default
#   it won't be.
# hooks.allowdeletebranch
#   This boolean sets whether deleting branches will be allowed in the
#   repository.  By default they won't be.
# hooks.denycreatebranch
#   This boolean sets whether remotely creating branches will be denied
#   in the repository.  By default this is allowed.
#

# --- Command line
refname="$1"
oldrev="$2"
newrev="$3"

# --- Safety check
if [ -z "$GIT_DIR" ]; then
	echo "Don't run this script from the command line." >&2
	echo " (if you want, you could supply GIT_DIR then run" >&2
	echo "  $0 <ref> <oldrev> <newrev>)" >&2
	exit 1
fi

if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then
	echo "usage: $0 <ref> <oldrev> <newrev>" >&2
	exit 1
fi

# --- Config
allowunannotated=$(git config --type=bool hooks.allowunannotated)
allowdeletebranch=$(git config --type=bool hooks.allowdeletebranch)
denycreatebranch=$(git config --type=bool hooks.denycreatebranch)
allowdeletetag=$(git config --type=bool hooks.allowdeletetag)
allowmodifytag=$(git config --type=bool hooks.allowmodifytag)

# check for no description
projectdesc=$(sed -e '1q' "$GIT_DIR/description")
case "$projectdesc" in
"Unnamed repository"* | "")
	echo "*** Project description file hasn't been set" >&2
	exit 1
	;;
esac

# --- Check types
# if $newrev is 0000...0000, it's a commit to delete a ref.
zero=$(git hash-object --stdin </dev/null | tr '[0-9a-f]' '0')
if [ "$newrev" = "$zero" ]; then
	newrev_type=delete
else
	newrev_type=$(git cat-file -t $newrev)
fi

case "$refname","$newrev_type" in
	refs/tags/*,commit)
		# un-annotated tag
		short_refname=${refname##refs/tags/}
		if [ "$allowunannotated" != "true" ]; then
			echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2
			echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2
			exit 1
		fi
		;;
	refs/tags/*,delete)
		# delete tag
		if [ "$allowdeletetag" != "true" ]; then
			echo "*** Deleting a tag is not allowed in this repository" >&2
			exit 1
		fi
		;;
	refs/tags/*,tag)
		# annotated tag
		if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1
		then
			echo "*** Tag '$refname' already exists." >&2
			echo "*** Modifying a tag is not allowed in this repository." >&2
			exit 1
		fi
		;;
	refs/heads/*,commit)
		# branch
		if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then
			echo "*** Creating a branch is not allowed in this repository" >&2
			exit 1
		fi
		;;
	refs/heads/*,delete)
		# delete branch
		if [ "$allowdeletebranch" != "true" ]; then
			echo "*** Deleting a branch is not allowed in this repository" >&2
			exit 1
		fi
		;;
	refs/remotes/*,commit)
		# tracking branch
		;;
	refs/remotes/*,delete)
		# delete tracking branch
		if [ "$allowdeletebranch" != "true" ]; then
			echo "*** Deleting a tracking branch is not allowed in this repository" >&2
			exit 1
		fi
		;;
	*)
		# Anything else (is there anything else?)
		echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2
		exit 1
		;;
esac

# --- Finished
exit 0


================================================
FILE: .git/info/exclude
================================================
# git ls-files --others --exclude-from=.git/info/exclude
# Lines that start with '#' are comments.
# For a project mostly in C, the following would be a good set of
# exclude patterns (uncomment them if you want to use them):
# *.[oa]
# *~


================================================
FILE: .git/logs/HEAD
================================================
0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000	clone: from https://github.com/alibaba/zvec


================================================
FILE: .git/logs/refs/heads/main
================================================
0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000	clone: from https://github.com/alibaba/zvec


================================================
FILE: .git/logs/refs/remotes/origin/HEAD
================================================
0000000000000000000000000000000000000000 b49833bf56a0e102b8ac1ff95ed7766545f5bd1e appuser <appuser@6f4aff7aca96.(none)> 1774064477 +0000	clone: from https://github.com/alibaba/zvec


================================================
FILE: .git/objects/pack/pack-2b5e15ebe928a592991dc24c7ae7e8dc9e3500dc.promisor
================================================
b49833bf56a0e102b8ac1ff95ed7766545f5bd1e refs/heads/main


================================================
FILE: .git/packed-refs
================================================
# pack-refs with: peeled fully-peeled sorted 
b49833bf56a0e102b8ac1ff95ed7766545f5bd1e refs/remotes/origin/main


================================================
FILE: .git/refs/heads/main
================================================
b49833bf56a0e102b8ac1ff95ed7766545f5bd1e


================================================
FILE: .git/refs/remotes/origin/HEAD
================================================
ref: refs/remotes/origin/main


================================================
FILE: .git/shallow
================================================
b49833bf56a0e102b8ac1ff95ed7766545f5bd1e


================================================
FILE: .github/ISSUE_TEMPLATE/benchmark.yml
================================================
name: Benchmarking
description: Add, update, or fix benchmark cases for zvec
title: "[Benchmark]: "
labels: ["benchmark"]
body:
  - type: markdown
    attributes:
      value: |
        Use this for benchmark-related work: new test cases, CI integration, or performance regression tracking.
  
  - type: input
    id: benchmark_type
    attributes:
      label: Benchmark Type
      description: e.g., filtered search, batch insert, recall@k, ARM64 vs x86
    validations:
      required: true
  
  - type: textarea
    id: goal
    attributes:
      label: Goal
      description: What performance aspect are you measuring or improving?
    validations:
      required: true
  
  - type: textarea
    id: methodology
    attributes:
      label: Methodology
      description: Dataset, query size, hardware, metrics (latency, throughput, memory)
    validations:
      required: true
  
  - type: textarea
    id: baseline
    attributes:
      label: Baseline (if applicable)
      description: Current performance numbers or competing systems for comparison.
    validations:
      required: false
  
  - type: textarea
    id: ci_integration
    attributes:
      label: CI Integration Plan
      description: Should this run in CI? How often?
    validations:
      required: false

================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: Bug Report
description: Report a bug or unexpected behavior (e.g., crash, incorrect vector query, memory leak)
title: "[Bug]: "
labels: ["bug", "triage"]
body:
  - type: markdown
    attributes:
      value: |
        Thank you for reporting! Please provide detailed info so we can reproduce and fix it quickly.

  - type: textarea
    id: description
    attributes:
      label: Description
      description: What happened? What did you expect?
      placeholder: |
        e.g. "Query with vector field crashes when using Zvec Python API"
    validations:
      required: true

  - type: textarea
    id: steps_to_reproduce
    attributes:
      label: Steps to Reproduce
      description: Exact steps to trigger the issue (code snippets welcome)
      placeholder: |
        1. Build Zvec with CMake (Debug/Release)
        2. Run Python script: `python test.py`
        3. Call `collection.query(VectorQuery())`
        4. Process segfaults / hangs / returns wrong results
      render: python
    validations:
      required: true

  - type: textarea
    id: logs_or_trace
    attributes:
      label: Logs / Stack Trace
      description: Paste relevant logs, LLDB/GDB backtrace, or CI failures
      placeholder: |
        Thread 1 "python" received signal SIGSEGV, Segmentation fault.
        0x0000000104a2c3f0 in std::__1::shared_ptr<...>::...
      render: shell
    validations:
      required: false

  - type: input
    id: os
    attributes:
      label: Operating System
      placeholder: macOS 14 (M1), Ubuntu 22.04, Windows 11 (WSL2)
    validations:
      required: true

  - type: input
    id: build_env
    attributes:
      label: Build & Runtime Environment
      description: Compiler, CMake, Python, key dependencies
      placeholder: |
        clang 15.0.0, CMake 4.1.2, Python 3.11.9, magic_enum v0.9.7 (via git submodule)
    validations:
      required: true

  - type: checkboxes
    id: additional_context
    attributes:
      label: Additional Context
      options:
        - label: I've checked `git status` — no uncommitted submodule changes
        - label: I built with `CMAKE_BUILD_TYPE=Debug`
        - label: This occurs with or without `COVERAGE=ON`
        - label: The issue involves Python ↔ C++ integration (pybind11)

================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
  - name: Documentation
    url: https://zvec.org/en/
    about: Check the quickstart, build guide, and API docs first.

  - name: Python API Examples
    url: https://zvec.org/en/docs/quickstart/
    about: See working usage examples.


================================================
FILE: .github/ISSUE_TEMPLATE/enhancement.yml
================================================
name: Enhancement
description: Improve an existing feature or component
title: "[Enhance]: "
labels: ["enhancement"]
body:
  - type: markdown
    attributes:
      value: |
        This template is for improving existing functionality (e.g., performance, usability, robustness).
  
  - type: input
    id: component
    attributes:
      label: Affected Component
      description: e.g., HNSW index, buffer manager, Python API
    validations:
      required: true
  
  - type: textarea
    id: current
    attributes:
      label: Current Behavior
      description: What is the current state and its limitations?
    validations:
      required: true
  
  - type: textarea
    id: desired
    attributes:
      label: Desired Improvement
      description: What should be improved and how?
    validations:
      required: true
  
  - type: textarea
    id: impact
    attributes:
      label: Impact
      description: How will this benefit users? (e.g., faster queries, lower memory, easier integration)
    validations:
      required: true

================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: Feature Request
description: Suggest a new feature or improvement (e.g., better memory control, new query option)
title: "[Feature]: "
labels: ["feature"]
body:
  - type: markdown
    attributes:
      value: |
        Thanks for your idea! Help us understand the motivation and scope.

  - type: textarea
    id: problem_or_motivation
    attributes:
      label: Problem / Motivation
      description: What problem does this solve? Why is it needed?
      placeholder: |
        e.g. "Current vector queries don't allow filtering by metadata + distance threshold at once"
    validations:
      required: true

  - type: textarea
    id: proposed_solution
    attributes:
      label: Proposed Solution
      description: How should it work? API sketch or pseudocode welcome.
      placeholder: |
        Add `filter=` and `max_distance=` args to `Zvec.query()`:
        ```python
        results = db.query(vector, filter="category == 'A'", max_distance=0.5)
        ```
      render: python
    validations:
      required: false

  - type: textarea
    id: alternatives
    attributes:
      label: Alternatives Considered
      description: Are there workarounds? Why not use them?
    validations:
      required: false

  - type: dropdown
    id: impact_area
    attributes:
      label: Affected Area
      multiple: true
      options:
        - label: C++ Core (storage, indexing)
        - label: Python API / Bindings
        - label: Build System (CMake, Homebrew pkg)
        - label: Testing / CI / Coverage
        - label: Documentation
    validations:
      required: false

================================================
FILE: .github/ISSUE_TEMPLATE/integration.yml
================================================
name: Ecosystem Integration
description: Integrate zvec with external frameworks (e.g., LangChain, LlamaIndex)
title: "[Integration]: "
labels: ["integration"]
body:
  - type: input
    id: framework
    attributes:
      label: Target Framework
      description: e.g., LangChain, LlamaIndex, Haystack
    validations:
      required: true
  
  - type: textarea
    id: motivation
    attributes:
      label: Motivation
      description: Why integrate with this framework? Who benefits?
    validations:
      required: true
  
  - type: textarea
    id: interface
    attributes:
      label: Required Interface
      description: What adapter or interface must be implemented? (e.g., VectorStore base class)
    validations:
      required: true
  
  - type: textarea
    id: reference
    attributes:
      label: Reference Implementations
      description: Links to similar integrations in other vector DBs.
    validations:
      required: false

================================================
FILE: .github/ISSUE_TEMPLATE/profiling.yml
================================================
name: Profiling / Investigation
description: Profile performance, compatibility, or behavior in a specific scenario
title: "[Profile]: "
labels: ["profile"]
body:
  - type: markdown
    attributes:
      value: |
        Use this for tasks like performance profiling, architecture compatibility checks, or feasibility studies.
  
  - type: input
    id: scenario
    attributes:
      label: Target Scenario
      description: e.g., ARM64 deployment, high-concurrency load, large dataset ingestion
    validations:
      required: true
  
  - type: textarea
    id: objective
    attributes:
      label: Objective
      description: What do you want to learn or validate?
    validations:
      required: true
  
  - type: textarea
    id: methodology
    attributes:
      label: Proposed Methodology
      description: How will you conduct the investigation? (tools, metrics, test data)
    validations:
      required: true
  
  - type: textarea
    id: expected_outcome
    attributes:
      label: Expected Outcome
      description: What deliverables are expected? (e.g., report, optimization PR, benchmark results)
    validations:
      required: true

================================================
FILE: .github/codecov.yml
================================================
codecov:
  require_ci_to_pass: true

coverage:
  precision: 2
  round: down
  range: "60...75"

  status:
    project:
      default: false
    patch:
      default: false


parsers:
  gcov:
    branch_detection:
      conditional: true
      loop: true
      method: false
      macro: false

comment:
  require_changes: false
  layout: "reach,diff,flags,tree"
  behavior: default


ignore:
  - "thirdparty/"
  - "tests/"

================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  # GitHub Actions dependencies
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
      time: "02:00"
      timezone: "Asia/Shanghai"
    labels:
      - "dependencies"
      - "github-actions"
    commit-message:
      prefix: "ci"
      include: "scope"
    open-pull-requests-limit: 5


================================================
FILE: .github/workflows/01-ci-pipeline.yml
================================================
name: Main

on:
  push:
    branches: [ "main" ]
    paths-ignore:
      - '**.md'
  merge_group:
  pull_request:
    branches: [ "main" ]
    paths-ignore:
      - '**.md'
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  # Code quality checks (fast, run first)
  lint:
    uses: ./.github/workflows/02-lint-check.yml

  # Main build and test matrix
  build-and-test-macos-arm64:
    name: Build & Test (macos-arm64)
    needs: lint
    uses: ./.github/workflows/03-macos-linux-build.yml
    with:
      platform: macos-arm64
      os: macos-15

  build-and-test-linux-arm64:
    name: Build & Test (linux-arm64)
    needs: lint
    uses: ./.github/workflows/03-macos-linux-build.yml
    with:
      platform: linux-arm64
      os: ubuntu-24.04-arm

  build-and-test-linux-x64:
    name: Build & Test (linux-x64)
    needs: lint
    uses: ./.github/workflows/03-macos-linux-build.yml
    with:
      platform: linux-x64
      os: ubuntu-24.04

  build-android:
    name: Build & Test (android)
    needs: lint
    uses: ./.github/workflows/04-android-build.yml


================================================
FILE: .github/workflows/02-lint-check.yml
================================================
name: Lint

on:
  workflow_call:

jobs:
  lint:
    name: Code Quality Checks
    runs-on: ubuntu-24.04
    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'
          cache: 'pip'
          cache-dependency-path: 'pyproject.toml'

      - name: Install linting tools
        run: |
          python -m pip install --upgrade pip \
            ruff==v0.14.4 \
            clang-format==18.1.8
        shell: bash

      - name: Run Ruff Linter
        run: python -m ruff check .
        shell: bash

      - name: Run Ruff Formatter Check
        run: python -m ruff format --check .
        shell: bash

      - name: Run clang-format Check
        run: |
          CPP_FILES=$(find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" -o -name "*.cc" -o -name "*.cxx" \) \
            ! -path "./build/*" \
            ! -path "./tests/*" \
            ! -path "./scripts/*" \
            ! -path "./python/*" \
            ! -path "./thirdparty/*" \
            ! -path "./.git/*")

          if [ -z "$CPP_FILES" ]; then
            echo "No C++ files found to check."
            exit 0
          fi

          clang-format --dry-run --Werror $CPP_FILES
        shell: bash


================================================
FILE: .github/workflows/03-macos-linux-build.yml
================================================
name: MacOS & Linux Build

on:
  workflow_call:
    inputs:
      platform:
        description: 'Platform identifier'
        required: true
        type: string
      os:
        description: 'GitHub Actions runner OS'
        required: true
        type: string

permissions:
  contents: read

jobs:
  # Build and test matrix (parallel execution)
  build-and-test:
    name: Build & Test (${{ inputs.platform }})
    runs-on: ${{ inputs.os }}
    
    strategy:
      fail-fast: false
      matrix:
        include:
          - os: ${{ inputs.os }}
            platform: ${{ inputs.platform }}
            arch_flag: ""  # Use appropriate architecture

    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          submodules: recursive

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.10'
          cache: 'pip'
          cache-dependency-path: 'pyproject.toml'

      - name: Set up environment variables
        run: |
          # Set number of processors for parallel builds
          if [[ "${{ matrix.platform }}" == "macos-arm64" ]]; then
            NPROC=$(sysctl -n hw.ncpu 2>/dev/null || echo 2)
          else
            NPROC=$(nproc 2>/dev/null || echo 2)
          fi
          echo "NPROC=$NPROC" >> $GITHUB_ENV
          echo "Using $NPROC parallel jobs for builds"
          
          # Add Python user base bin to PATH for pip-installed CLI tools
          echo "$(python -c 'import site; print(site.USER_BASE)')/bin" >> $GITHUB_PATH
        shell: bash

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip \
            pybind11==3.0 \
            cmake==3.30.0 \
            ninja==1.11.1 \
            pytest \
            scikit-build-core \
            setuptools_scm
        shell: bash

      - name: Build from source
        run: |
          cd "$GITHUB_WORKSPACE"
          
          CMAKE_GENERATOR="Unix Makefiles" \
          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
          python -m pip install -v . \
            --no-build-isolation \
            --config-settings='cmake.define.BUILD_TOOLS="ON"' \
            ${{ matrix.arch_flag }}
        shell: bash

      - name: Run C++ Tests
        run: |
          cd "$GITHUB_WORKSPACE/build"
          make unittest -j$NPROC
        shell: bash

      - name: Run Python Tests
        run: |
          cd "$GITHUB_WORKSPACE"
          python -m pytest python/tests/
        shell: bash

      - name: Run C++ Examples
        run: |
          cd "$GITHUB_WORKSPACE/examples/c++"
          mkdir build && cd build
          cmake .. -DCMAKE_BUILD_TYPE=Release
          make -j $NPROC
          ./db-example
          ./core-example
          ./ailego-example
        shell: bash


================================================
FILE: .github/workflows/04-android-build.yml
================================================
name: Android Cross Build

on:
  workflow_call:

permissions:
  contents: read

jobs:
  build-android:
    # sdkmanager and other Android tools are x86‑only; ARM runners fail with exit code 1
    # switch back to an x86 image so the setup-android action can install the SDK
    runs-on: ubuntu-24.04
    strategy:
      fail-fast: false
      matrix:
        abi: [x86_64]
        api: [21]

    steps:
      - name: Checkout
        uses: actions/checkout@v6

      - name: Cache dependencies
        uses: actions/cache@v5
        with:
          path: |
            ~/.ccache
          key: ${{ runner.os }}-dependencies-cache-${{ hashFiles('**/CMakeLists.txt', 'thirdparty/**') }}-stl-fix

      - name: Install dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y --no-install-recommends \
            cmake ninja-build git ca-certificates python3 \
            build-essential make ccache

      - name: Setup Java 17
        uses: actions/setup-java@v5
        with:
          distribution: temurin
          java-version: '17'

      - name: Setup Android SDK
        uses: android-actions/setup-android@v3

      - name: Install NDK (side by side)
        shell: bash
        run: |
          sdkmanager "ndk;26.1.10909125"

      - name: Cache host protoc build
        uses: actions/cache@v5
        with:
          path: build-host
          key: ${{ runner.os }}-host-protoc-${{ hashFiles('src/**', 'CMakeLists.txt') }}-stl-fix
          restore-keys: |
            ${{ runner.os }}-host-protoc-

      - name: Use host env to compile protoc
        shell: bash
        run: |
          git submodule update --init
          if [ ! -d "build-host" ]; then
            export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
            export CCACHE_NOHASHDIR=1
            export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros

            cmake -S . -B build-host -G Ninja \
              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
            cmake --build build-host --target protoc --parallel
          else
            echo "Using cached host protoc build"
          fi

      - name: Cache Android build
        uses: actions/cache@v5
        with:
          path: build-android-${{ matrix.abi }}
          key: ${{ runner.os }}-android-build-${{ matrix.abi }}-${{ hashFiles('src/**', 'CMakeLists.txt', 'cmake/**', 'thirdparty/**') }}-stl-fix-3

      - name: Configure and Build
        shell: bash
        run: |
          git submodule foreach --recursive 'git stash --include-untracked'

          export ANDROID_SDK_ROOT="$ANDROID_HOME"
          export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"

          export CCACHE_BASEDIR="$GITHUB_WORKSPACE"
          export CCACHE_NOHASHDIR=1
          export CCACHE_SLOPPINESS=clang_index_store,file_stat_matches,include_file_mtime,locale,time_macros

          if [ ! -d "build-android-${{ matrix.abi }}" ]; then
            cmake -S . -B build-android-${{ matrix.abi }} -G Ninja \
              -DCMAKE_BUILD_TYPE=Release \
              -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
              -DANDROID_ABI=${{ matrix.abi }} \
              -DANDROID_PLATFORM=android-${{ matrix.api }} \
              -DANDROID_STL=c++_static \
              -DBUILD_PYTHON_BINDINGS=OFF \
              -DENABLE_NATIVE=OFF \
              -DAUTO_DETECT_ARCH=OFF \
              -DBUILD_TOOLS=OFF \
              -DGLOBAL_CC_PROTOBUF_PROTOC="$GITHUB_WORKSPACE/build-host/bin/protoc" \
              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
              -DCMAKE_VERBOSE_MAKEFILE=ON
            cmake --build build-android-${{ matrix.abi }} --parallel --verbose
          else
            echo "Using cached Android build directory"
          fi

      - name: Cache examples build
        uses: actions/cache@v5
        with:
          path: examples/c++/build-android-examples-${{ matrix.abi }}
          key: ${{ runner.os }}-examples-build-${{ matrix.abi }}-${{ hashFiles('examples/c++/**', 'CMakeLists.txt', 'src/**') }}-stl-fix-3

      - name: Build examples
        shell: bash
        run: |
          export ANDROID_SDK_ROOT="$ANDROID_HOME"
          export ANDROID_NDK_HOME="$ANDROID_SDK_ROOT/ndk/26.1.10909125"

          if [ ! -d "examples/c++/build-android-examples-${{ matrix.abi }}" ]; then
            cmake -S examples/c++ -B examples/c++/build-android-examples-${{ matrix.abi }} -G Ninja \
              -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
              -DANDROID_ABI=${{ matrix.abi }} \
              -DANDROID_PLATFORM=android-${{ matrix.api }} \
              -DANDROID_STL=c++_static \
              -DCMAKE_BUILD_TYPE=Release \
              -DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON \
              -DHOST_BUILD_DIR="build-android-${{ matrix.abi }}" \
              -DCMAKE_C_COMPILER_LAUNCHER=ccache \
              -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
            cmake --build examples/c++/build-android-examples-${{ matrix.abi }} --parallel
          else
            echo "Using cached examples build"
          fi

      - name: Run on Android emulator (arm64) and verify
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: ${{ matrix.api }}
          arch: ${{ matrix.abi }}
          # target: google_apis
          # emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -netdelay none -netspeed full
          # disable-animations: true
          script: |
            adb wait-for-device

            echo "Device ABI:"
            adb shell getprop ro.product.cpu.abi
            adb shell getprop ro.product.cpu.abilist

            echo "=== CPU ISA / Instruction Set Support ==="
            echo "--- /proc/cpuinfo flags ---"
            adb shell 'cat /proc/cpuinfo | grep -E "^(Features|flags)"'

            echo "Checking binary sizes:"
            ls -lah examples/c++/build-android-examples-${{ matrix.abi }}/

            # Push executables to device
            adb push examples/c++/build-android-examples-${{ matrix.abi }}/ailego-example /data/local/tmp/
            adb push examples/c++/build-android-examples-${{ matrix.abi }}/core-example /data/local/tmp/
            adb push examples/c++/build-android-examples-${{ matrix.abi }}/db-example /data/local/tmp/

            adb shell chmod 755 /data/local/tmp/ailego-example
            adb shell chmod 755 /data/local/tmp/core-example
            adb shell chmod 755 /data/local/tmp/db-example

            echo "File info on device:"
            adb shell ls -la /data/local/tmp/ailego-example
            adb shell ls -la /data/local/tmp/core-example
            adb shell ls -la /data/local/tmp/db-example

            echo "Running ailego example:"
            adb shell 'cd /data/local/tmp && ./ailego-example'

            echo "Running core example:"
            adb shell 'cd /data/local/tmp && ./core-example'

            echo "Running db example:"
            adb shell 'cd /data/local/tmp && ./db-example'


================================================
FILE: .github/workflows/_build_wheel_job.yml
================================================
name: "(Reusable) Build, Publish and Smoke-test a Wheel"

on:
  workflow_call:
    inputs:
      runner:
        description: "GitHub Actions runner label"
        required: true
        type: string
      pypi_repository_url:
        description: "PyPI repository URL (empty string means official PyPI)"
        required: false
        type: string
        default: ""
    secrets:
      PYPI_API_TOKEN:
        required: true

jobs:
  build_publish_test:
    name: Build / publish / smoke-test on ${{ inputs.runner }}
    runs-on: ${{ inputs.runner }}
    permissions:
      contents: read

    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          submodules: recursive

      - name: Set up Python (for cibuildwheel controller)
        uses: actions/setup-python@v6
        with:
          python-version: '3.11'

      - name: Install cibuildwheel
        run: |
          pip install --upgrade pip
          pip install cibuildwheel==3.4.0

      - name: Build wheels using cibuildwheel
        run: |
          python -m cibuildwheel --output-dir wheelhouse
          # Save list of built wheels for publishing
          ls wheelhouse/*.whl | tee $GITHUB_STEP_SUMMARY
          echo "wheels=$(ls wheelhouse/*.whl | tr '\n' ' ')" >> $GITHUB_ENV

      - name: Publish to PyPI
        if: success() && github.event_name == 'workflow_dispatch'
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
          TWINE_REPOSITORY_URL: ${{ inputs.pypi_repository_url }}
        run: |
          pip install twine
          twine upload --skip-existing --verbose wheelhouse/*.whl

      - name: Smoke test from PyPI
        if: success() && github.event_name == 'workflow_dispatch'
        shell: bash
        env:
          PYPI_REPOSITORY_URL: ${{ inputs.pypi_repository_url }}
        run: |
          # Extract version from wheel filename (e.g. zvec-0.2.1.dev24-cp311-...whl -> 0.2.1.dev24)
          WHEEL_FILE=$(ls wheelhouse/zvec-*.whl | head -1)
          ZVEC_VERSION=$(basename "$WHEEL_FILE" | sed 's/zvec-\([^-]*\)-.*/\1/')

          # Build index-url flags: use TestPyPI when repository URL is set, otherwise official PyPI
          if [ -n "$PYPI_REPOSITORY_URL" ]; then
            INDEX_FLAGS="--index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/"
            echo "Waiting for zvec==$ZVEC_VERSION to become available on TestPyPI..."
          else
            INDEX_FLAGS=""
            echo "Waiting for zvec==$ZVEC_VERSION to become available on PyPI..."
          fi
          # Poll until the version is available (max 5 minutes)
          FOUND=0
          for i in $(seq 1 30); do
            if pip install $INDEX_FLAGS --dry-run "zvec==$ZVEC_VERSION" > /dev/null 2>&1; then
              echo "Version $ZVEC_VERSION is available."
              FOUND=1
              break
            fi
            echo "Attempt $i/30: not yet available, retrying in 10s..."
            sleep 10
          done

          if [ "$FOUND" -eq 0 ]; then
            echo "ERROR: Timed out (5 min) waiting for zvec==$ZVEC_VERSION on PyPI. Aborting smoke test."
            exit 1
          fi

          # Create a clean venv and install
          python -m venv test_env
          source test_env/bin/activate
          pip install --upgrade pip
          pip install $INDEX_FLAGS "zvec==$ZVEC_VERSION"
          pip install --upgrade pip
          pip install $INDEX_FLAGS "zvec==$ZVEC_VERSION"
          # Run a simple smoke test
          python -c "import zvec; print('Import OK:', zvec.__version__)"


================================================
FILE: .github/workflows/build_test_wheel.yml
================================================
name: Build Test PyPi Wheels

on:
  workflow_dispatch:

permissions:
  contents: read

jobs:
  build_wheels_linux_x64:
    name: Build wheels on ubuntu-24.04 (x64) for TestPyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: ubuntu-24.04
      pypi_repository_url: https://test.pypi.org/legacy/
    secrets:
      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}

  build_wheels_linux_arm64:
    name: Build wheels on ubuntu-24.04-arm (arm64) for TestPyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: ubuntu-24.04-arm
      pypi_repository_url: https://test.pypi.org/legacy/
    secrets:
      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}

  build_wheels_macos_arm64:
    name: Build wheels on macos-15 (arm64) for TestPyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: macos-15
      pypi_repository_url: https://test.pypi.org/legacy/
    secrets:
      PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }}


================================================
FILE: .github/workflows/build_wheel.yml
================================================
name: Build Wheels

on:
  workflow_dispatch:

permissions:
  contents: read

jobs:
  build_wheels_linux_x64:
    name: Build wheels on ubuntu-24.04 (x64) for PyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: ubuntu-24.04
    secrets:
      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}

  build_wheels_linux_arm64:
    name: Build wheels on ubuntu-24.04-arm (arm64) for PyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: ubuntu-24.04-arm
    secrets:
      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}

  build_wheels_macos_arm64:
    name: Build wheels on macos-15 (arm64) for PyPi
    uses: ./.github/workflows/_build_wheel_job.yml
    with:
      runner: macos-15
    secrets:
      PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}


================================================
FILE: .github/workflows/continuous_bench.yml
================================================
name: Continuous Benchmark
on:
  push:
    branches: [ "main", "ci/continuous_bench_squash" ]
    paths-ignore:
      - '**.md'
  workflow_dispatch:

concurrency:
  group: cb-${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  benchmark:
    runs-on: vdbbench
    steps:
      - uses: actions/checkout@v6

      - name: Run VectorDBBench
        env:
          DATABASE_URL: ${{ secrets.DATABASE_URL }}
        run: |
          bash .github/workflows/scripts/run_vdb.sh

================================================
FILE: .github/workflows/docker/Dockerfile.linux_x64_glibc228
================================================
# =============================================================================
# Dockerfile.linux_x64_glibc228
# Purpose: Ubuntu 18.10 gcc-9 + glibc 2.28 + CMake 3.30.0 + PyBind11 build environment
# Warning: ubuntu:18.10 is EOL; use only for glibc 2.28 compatibility testing.
# =============================================================================

# Use official Ubuntu 18.10 (Cosmic Cuttlefish)
# glibc version: 2.28 (confirmed via `ldd --version`)
FROM ubuntu:18.10

# Replace Ubuntu mirror with old-releases.ubuntu.com for older glibc compatibility
RUN sed -i 's|http://\(.*\)/ubuntu|http://old-releases.ubuntu.com/ubuntu|g' /etc/apt/sources.list && \
    sed -i 's|http://security.ubuntu.com/ubuntu|http://old-releases.ubuntu.com/ubuntu|g' /etc/apt/sources.list

# Add Ubuntu 20.04 (focal) repo for GCC 9 ONLY
RUN echo "deb http://archive.ubuntu.com/ubuntu/ focal main universe" >> /etc/apt/sources.list && \
    echo "deb http://security.ubuntu.com/ubuntu/ focal-security main universe" >> /etc/apt/sources.list

# Prevent interactive prompts & set non-root user
ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Etc/UTC

# Create non-root user for safety (optional but recommended)
RUN useradd -m -u 1000 builder && \
    mkdir -p /workspace && chown builder:builder /workspace

# Install base system dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        build-essential \
        gcc-9 g++-9 \
        ninja-build git curl ca-certificates vim wget lcov gnupg clang-format-18\
        rsync lsb-release \
        uuid-dev zlib1g-dev libssl-dev libffi-dev \
        pybind11-dev && \
    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90 \
                        --slave /usr/bin/g++ g++ /usr/bin/g++-9 && \
    rm -rf /var/lib/apt/lists/*

# Install Miniforge (Conda) as root, then assign to builder
ENV MINIFORGE_VERSION="latest"
ENV MINIFORGE_HOME="/opt/miniforge3"

RUN curl -sSL "https://github.com/conda-forge/miniforge/releases/${MINIFORGE_VERSION}/download/Miniforge3-Linux-x86_64.sh" -o miniforge.sh && \
    bash miniforge.sh -b -p ${MINIFORGE_HOME} && \
    rm miniforge.sh && \
    chown -R builder:builder ${MINIFORGE_HOME}

# Switch to non-root user
USER builder
ENV PATH="${MINIFORGE_HOME}/bin:${PATH}"
WORKDIR /workspace

# Create conda envs for supported Python versions
RUN conda create -n py310 python=3.10 -y && \
    conda create -n py311 python=3.11 -y && \
    conda create -n py312 python=3.12 -y
RUN conda clean --all -f -y

# Install CMake 3.30.0 from Kitware official binary
# Ref: https://github.com/Kitware/CMake/releases/tag/v3.30.0
RUN mkdir -p /tmp/cmake && cd /tmp/cmake && \
    curl -sSL -o cmake.tar.gz \
        "https://github.com/Kitware/CMake/releases/download/v3.30.0/cmake-3.30.0-linux-x86_64.tar.gz" && \
    tar -xzf cmake.tar.gz --strip-components=1 -C /tmp/cmake && \
    mkdir -p /home/builder/.local && \
    mv * /home/builder/.local/ && \
    chown -R builder:builder /home/builder/.local && \
    rm -rf /tmp/cmake

# Add CMake to PATH
ENV PATH="/home/builder/.local/bin:${PATH}"

# Verify installations
RUN cmake --version && \
    conda info && \
    conda env list && \
    python --version && \
    gcc --version && \
    ldd --version | head -n1

# Final setup
WORKDIR /workspace

================================================
FILE: .github/workflows/nightly_coverage.yml
================================================
name: Nightly Coverage Report

on:
  schedule:
    # Runs daily at 00:00 CST (China Standard Time) = 16:00 UTC
    - cron: '0 16 * * *'

  workflow_dispatch:

permissions:
  contents: read

jobs:
  coverage:
    name: Nightly Coverage Report
    runs-on: ubuntu-24.04

    strategy:
      matrix:
        python-version: ['3.10']
      fail-fast: false

    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          ref: main  # Always use main for nightly
          submodules: recursive

      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: 'pyproject.toml'

      - name: Set up environment variables
        run: |
          # Set number of processors for parallel builds
          NPROC=$(nproc 2>/dev/null || echo 2)
          echo "NPROC=$NPROC" >> $GITHUB_ENV
          echo "Using $NPROC parallel jobs for builds"
          
          # Add Python user base bin to PATH for pip-installed CLI tools
          echo "$(python -c 'import site; print(site.USER_BASE)')/bin" >> $GITHUB_PATH
        shell: bash

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip \
            cmake==3.30.0 \
            ninja==1.11.1 \
            pytest \
            pytest-cov \
            scikit-build-core \
            setuptools_scm
        shell: bash

      - name: Build with COVERAGE config
        run: |
          cd "$GITHUB_WORKSPACE"
          
          CMAKE_GENERATOR="Unix Makefiles" \
          CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
          python -m pip install -v . \
            --no-build-isolation \
            --config-settings="cmake.build-type=COVERAGE" \
            --config-settings='cmake.define.ENABLE_ZEN3="ON"'
        shell: bash

      - name: Run Python Tests with Coverage
        run: |
          cd "$GITHUB_WORKSPACE"
          python -m pytest python/tests/ --cov=zvec --cov-report=xml
        shell: bash

      - name: Run C++ Tests and Generate Coverage
        run: |
          cd "$GITHUB_WORKSPACE/build"
          make unittest -j$NPROC
          cd "$GITHUB_WORKSPACE"
          # Ensure gcov.sh is executable
          chmod +x scripts/gcov.sh
          bash scripts/gcov.sh -k
        shell: bash

      - name: Upload Coverage to Codecov
        uses: codecov/codecov-action@v5
        with:
          files: ./proxima-zvec-filtered.lcov.info,./coverage.xml
          flags: python,cpp,nightly
          name: nightly-linux-py${{ matrix.python-version }}
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/scripts/run_vdb.sh
================================================
set -e

QUANTIZE_TYPE_LIST="int8 int4 fp16 fp32"
CASE_TYPE_LIST="Performance768D1M Performance768D10M Performance1536D500K" # respectively test cosine, ip # Performance960D1M l2 metrics
LOG_FILE="bench.log"
DATE=$(date +%Y-%m-%d_%H-%M-%S)
NPROC=$(nproc 2>/dev/null || getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)

# COMMIT_ID = branch-date-sha
COMMIT_ID=${GITHUB_REF_NAME}-"$DATE"-$(echo ${GITHUB_WORKFLOW_SHA} | cut -c1-8)
COMMIT_ID=$(echo "$COMMIT_ID" | sed 's/\//_/g')
echo "COMMIT_ID: $COMMIT_ID"
echo "GITHUB_WORKFLOW_SHA: $GITHUB_WORKFLOW_SHA"
echo "workspace: $GITHUB_WORKSPACE"
DB_LABEL_PREFIX="Zvec16c64g-$COMMIT_ID"

# install zvec
git submodule update --init

# for debug
#cd ..
#export SKBUILD_BUILD_DIR="$GITHUB_WORKSPACE/../build"
pwd

python3 -m venv .venv
source .venv/bin/activate
pip install cmake ninja psycopg2-binary loguru fire
pip install -e /opt/VectorDBBench

CMAKE_GENERATOR="Unix Makefiles" \
CMAKE_BUILD_PARALLEL_LEVEL="$NPROC" \
pip install -v "$GITHUB_WORKSPACE"

for CASE_TYPE in $CASE_TYPE_LIST; do
    echo "Running VectorDBBench for $CASE_TYPE"
    DATASET_DESC=""
    if [ "$CASE_TYPE" == "Performance768D1M" ]; then
        DATASET_DESC="Performance768D1M - Cohere Cosine"
    elif [ "$CASE_TYPE" == "Performance768D10M" ]; then
        DATASET_DESC="Performance768D10M - Cohere Cosine"
    else
        DATASET_DESC="Performance1536D500K - OpenAI IP"
    fi

    for QUANTIZE_TYPE in $QUANTIZE_TYPE_LIST; do
        DB_LABEL="$DB_LABEL_PREFIX-$CASE_TYPE-$QUANTIZE_TYPE"
        echo "Running VectorDBBench for $DB_LABEL"

        VDB_PARAMS="--path ${DB_LABEL} --db-label ${DB_LABEL} --case-type ${CASE_TYPE} --num-concurrency 12,14,16,18,20"
        if [ "$CASE_TYPE" == "Performance768D1M" ]; then
            VDB_PARAMS="${VDB_PARAMS} --m 15 --ef-search 180"
        elif [ "$CASE_TYPE" == "Performance768D10M" ]; then
            VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 118 --is-using-refiner"
        else #Performance1536D500K using default params + refiner to monitor performance degradation
            VDB_PARAMS="${VDB_PARAMS} --m 50 --ef-search 100 --is-using-refiner"
        fi

        if [ "$QUANTIZE_TYPE" == "fp32" ]; then
            vectordbbench zvec ${VDB_PARAMS} 2>&1 | tee $LOG_FILE
        else
            vectordbbench zvec ${VDB_PARAMS} --quantize-type "${QUANTIZE_TYPE}" 2>&1 | tee $LOG_FILE
        fi

        RESULT_JSON_PATH=$(grep -o "/opt/VectorDBBench/.*\.json" $LOG_FILE)
        QPS=$(jq -r '.results[0].metrics.qps' "$RESULT_JSON_PATH")
        RECALL=$(jq -r '.results[0].metrics.recall' "$RESULT_JSON_PATH")
        LATENCY_P99=$(jq -r '.results[0].metrics.serial_latency_p99' "$RESULT_JSON_PATH")
        LOAD_DURATION=$(jq -r '.results[0].metrics.load_duration' "$RESULT_JSON_PATH")

        #quote the var to avoid space in the label
        label_list="case_type=\"${CASE_TYPE}\",dataset_desc=\"${DATASET_DESC}\",db_label=\"${DB_LABEL}\",commit=\"${COMMIT_ID}\",date=\"${DATE}\",quantize_type=\"${QUANTIZE_TYPE}\""
        # replace `/` with `_` in label_list
        label_list=$(echo "$label_list" | sed 's/\//_/g')
        cat <<EOF > prom_metrics.txt
        # TYPE vdb_bench_qps gauge
        vdb_bench_qps{$label_list} $QPS
        # TYPE vdb_bench_recall gauge
        vdb_bench_recall{$label_list} $RECALL
        # TYPE vdb_bench_latency_p99 gauge
        vdb_bench_latency_p99{$label_list} $LATENCY_P99
        # TYPE vdb_bench_load_duration gauge
        vdb_bench_load_duration{$label_list} $LOAD_DURATION
EOF
        echo "prom_metrics:"
        cat prom_metrics.txt
        curl --data-binary @prom_metrics.txt "http://47.93.34.27:9091/metrics/job/benchmarks-${CASE_TYPE}/case_type/${CASE_TYPE}/quantize_type/${QUANTIZE_TYPE}" -v
    done
done

================================================
FILE: .gitignore
================================================
.*
*~
bazel-*
build*
bin/*
lib/*
var/*
venv*
tests/integration/conf/*
tests/de_integration/conf/*
**/__pycache__/*
tests/bench/log/*
tests/integration/integration
tests/integration/log
tests/integration/*.log
tests/de_integration/log
tests/de_integration/*.log
!.git*
!.clang-format
!.circleci
!.drone.yml
sdk/python/dist/
compile_commands.json
dist
html
*.lcov.info

# Dependencies
/node_modules

# Production
/build

# Generated files
.docusaurus
.cache-loader

# Misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*

allure-*

!build_android.sh

================================================
FILE: .gitmodules
================================================
[submodule "thirdparty/googletest/googletest-1.10.0"]
	path = thirdparty/googletest/googletest-1.10.0
	url = https://github.com/google/googletest.git
[submodule "thirdparty/sparsehash/sparsehash-2.0.4"]
	path = thirdparty/sparsehash/sparsehash-2.0.4
	url = https://github.com/sparsehash/sparsehash.git
	ignore = untracked
[submodule "thirdparty/gflags/gflags-2.2.2"]
	path = thirdparty/gflags/gflags-2.2.2
	url = https://github.com/gflags/gflags.git
[submodule "thirdparty/rocksdb/rocksdb-8.1.1"]
	path = thirdparty/rocksdb/rocksdb-8.1.1
	url = https://github.com/facebook/rocksdb.git
	ignore = all
[submodule "thirdparty/yaml-cpp/yaml-cpp-0.6.3"]
	path = thirdparty/yaml-cpp/yaml-cpp-0.6.3
	url = https://github.com/jbeder/yaml-cpp.git
[submodule "thirdparty/arrow/apache-arrow-21.0.0"]
	path = thirdparty/arrow/apache-arrow-21.0.0
	url = https://github.com/apache/arrow.git
	ignore = all
[submodule "thirdparty/CRoaring/CRoaring-2.0.4"]
	path = thirdparty/CRoaring/CRoaring-2.0.4
	url = https://github.com/RoaringBitmap/CRoaring.git
[submodule "thirdparty/glog/glog-0.5.0"]
	path = thirdparty/glog/glog-0.5.0
	url = https://github.com/google/glog.git
	ignore = all
[submodule "thirdparty/protobuf/protobuf-3.21.12"]
	path = thirdparty/protobuf/protobuf-3.21.12
	url = https://github.com/protocolbuffers/protobuf.git
[submodule "thirdparty/lz4/lz4-1.9.4"]
	path = thirdparty/lz4/lz4-1.9.4
	url = https://github.com/lz4/lz4.git
[submodule "thirdparty/antlr/antlr4"]
	path = thirdparty/antlr/antlr4
	url = https://github.com/antlr/antlr4.git
	ignore = all
[submodule "thirdparty/magic_enum/magic_enum-0.9.7"]
	path = thirdparty/magic_enum/magic_enum-0.9.7
	url = https://github.com/Neargye/magic_enum.git
	ignore = all
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
	path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
	url = https://github.com/VectorDB-NTU/RaBitQ-Library.git


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.13)
cmake_policy(SET CMP0077 NEW)
project(zvec)
set(CC_CXX_STANDARD 17)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror=return-type")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Werror=return-type")

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif()

if(NOT DEFINED PROJECT_ROOT_DIR OR NOT PROJECT_ROOT_DIR)
    set(PROJECT_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Root directory of the project" FORCE)
endif()

message(STATUS "PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}")

include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(NOT ANDROID AND AUTO_DETECT_ARCH AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
  setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)
  message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512} ", avx512fp16: " ${MATH_MARCH_FLAG_AVX512FP16})
endif()

include_directories(${PROJECT_ROOT_DIR}/src/include)
include_directories(${PROJECT_ROOT_DIR}/src)

option(BUILD_PYTHON_BINDINGS "Build Python bindings using pybind11" OFF)
message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}")

option(BUILD_TOOLS "Build tools" ON)
message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}")

option(RABITQ_ENABLE_AVX512 "Compile RaBitQ with AVX-512 support" OFF)

if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64" AND NOT ANDROID)
  include(CheckCCompilerFlag)

  check_c_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
  check_c_compiler_flag("-mavx512f -mavx512bw -mavx512vl" COMPILER_SUPPORTS_AVX512)

  if(COMPILER_SUPPORTS_AVX2 OR COMPILER_SUPPORTS_AVX512)
    set(RABITQ_SUPPORTED ON)
    add_definitions(-DRABITQ_SUPPORTED=1)
    if(RABITQ_ENABLE_AVX512 AND COMPILER_SUPPORTS_AVX512)
      add_definitions(-DRABITQ_COMPILED_AVX512=1)
      set(RABITQ_ARCH_FLAG "${MATH_MARCH_FLAG_AVX512}")
    else()
      set(RABITQ_ARCH_FLAG "${MATH_MARCH_FLAG_AVX2}")
    endif()
  else()
    set(RABITQ_SUPPORTED OFF)
    add_definitions(-DRABITQ_SUPPORTED=0)
    message(STATUS "RaBitQ support disabled - compiler does not support AVX2 or AVX-512")
  endif()
else()
  set(RABITQ_SUPPORTED OFF)
  add_definitions(-DRABITQ_SUPPORTED=0)
  message(STATUS "RaBitQ support disabled - only supported on Linux x86_64")
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
  set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
endif()
message(STATUS "USE_OSS_MIRROR:${USE_OSS_MIRROR}")

cc_directory(thirdparty)
cc_directories(src)
cc_directories(tests)

if(BUILD_TOOLS)
    cc_directories(tools)
endif()

git_version(GIT_SRCS_VER ${PROJECT_ROOT_DIR})
set(CPACK_PACKAGE_VERSION ${GIT_SRCS_VER})
set(CPACK_PACKAGE_NAME zvec)
include(CPack)

if(BUILD_PYTHON_BINDINGS)
    if(APPLE)
        set(CMAKE_STRIP "")
        message(STATUS "Disabled strip on macOS to preserve code signature")
    endif()

    include(GNUInstallDirs)
    if(DEFINED SKBUILD_PLATLIB_DIR)
        set(ZVEC_PY_INSTALL_DIR "${SKBUILD_PLATLIB_DIR}")
    elseif(DEFINED Python_SITEARCH)
        set(ZVEC_PY_INSTALL_DIR "${Python_SITEARCH}")
    else()
        set(ZVEC_PY_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}")
    endif()

    message(STATUS "Zvec install path: ${ZVEC_PY_INSTALL_DIR}")
    install(TARGETS _zvec LIBRARY DESTINATION ${ZVEC_PY_INSTALL_DIR})
endif()


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Zvec Code of Conduct

## Our Pledge

We pledge to foster an open, respectful, and harassment-free environment for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation.

## Expected Behavior

- Use welcoming and inclusive language
- Respect differing viewpoints and experiences
- Gracefully accept constructive criticism
- Focus on what is best for the community
- Show empathy and kindness toward others

## Unacceptable Behavior

- Harassment, intimidation, or discriminatory conduct
- Trolling, insulting, or derogatory comments
- Public or private harassment
- Publishing others’ private information without consent
- Any conduct that would reasonably be considered inappropriate in a professional setting

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team 
at **zvec@alibaba-inc.com** (replace with your contact). All complaints will be reviewed and investigated promptly 
and fairly.

The project team is obligated to respect the privacy and security of the reporter.

Consequences may include:
- A formal warning
- Temporary or permanent ban from project spaces
- Removal of contributions (e.g. comments, PRs)

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at  
https://www.contributor-covenant.org/version/2/1/code_of_conduct.html

[homepage]: https://www.contributor-covenant.org

================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Zvec

First off, thank you for considering contributing to Zvec! 🙌  
Whether you're reporting a bug, proposing a feature, improving documentation, or submitting code — every contribution helps make Zvec better.

## Code of Conduct

By participating, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md). Please be respectful, collaborative, and inclusive.

---

## Development Setup

### Prerequisites
- Python 3.10 - 3.12
- CMake ≥ 3.26, < 4.0 (`cmake --version`)
- A C++17-compatible compiler (e.g., `g++-11+`, `clang++`, Apple Clang on macOS)

### Clone & Initialize

```bash
git clone --recursive https://github.com/alibaba/zvec.git
cd zvec
```

> 💡 **Tip**  
> - Forgot `--recursive`? Run:  
>   ```bash
>   git submodule update --init --recursive
>   ```
> - Set up pre-commit hooks:  
>   ```bash
>   pip install pre-commit && pre-commit install
>   ```

### Build from Source (Editable Install)
```bash
pip install -e ".[dev]"
# This installs dev dependencies (pytest, ruff, etc.) and builds the C++ extension in-place
```

> ✅ Verify:
> ```bash
> python -c "import zvec; print('Success!')"
> ```

---

## Testing

### Run All Tests
```bash
pytest python/tests/ -v
```

### Run with Coverage (Debug/CI)
```bash
pytest python/tests/ --cov=zvec --cov-report=term-missing
```

> 🔎 See full rules in `[tool.ruff]` section of `pyproject.toml`.

---

## Build Customization

You can control build behavior via environment variables or `pyproject.toml`:

| Option | How to Set | Description |
|--------|------------|-------------|
| **Build Type** | `CMAKE_BUILD_TYPE=Debug` | `Debug`, `Release`, or `Coverage` (for gcov/lcov) |
| **Generator** | `CMAKE_GENERATOR="Unix Makefiles"` | Default: `Ninja`; use Make if preferred |
| **AVX-512** | `ENABLE_SKYLAKE_AVX512=ON` | Enable AVX-512 optimizations (x86_64 only) |

Example (Debug + Make):
```bash
CMAKE_BUILD_TYPE=Debug CMAKE_GENERATOR="Unix Makefiles" pip install -v .
```

---

## Submitting Changes

1. Fork the repo and create a feature branch (`feat/...`, `fix/...`, `docs/...`)
2. Write clear commit messages (e.g., `fix(query): handle null vector in dense_fp32`)
3. Ensure tests pass & linter is clean
4. Open a Pull Request to `main`
5. Link related issue (e.g., `Closes #123`)

✅ **PRs should include**:
- Test coverage for new behavior
- Updates to documentation (if applicable)
- Reasoning behind non-obvious design choices

---

## Documentation

- User guides: `docs/` (built with MkDocs)
- API reference: generated from docstrings (follow [Google style](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings))
- Build & deploy: `mkdocs serve` / `mkdocs build`

---

## Need Help

- Browse [existing issues](https://github.com/alibaba/zvec/issues)
- For sensitive/security issues: email `zvec@alibaba-inc.com`

---

✨ Thanks again for being part of Zvec!


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

================================================
FILE: README.md
================================================
<div align="center">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_log_2.svg" />
    <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/logo/github_logo_1.svg" width="400" alt="zvec logo" />
  </picture>
</div>

<p align="center">
  <a href="https://codecov.io/github/alibaba/zvec"><img src="https://codecov.io/github/alibaba/zvec/graph/badge.svg?token=O81CT45B66" alt="Code Coverage"/></a>
  <a href="https://github.com/alibaba/zvec/actions/workflows/01-ci-pipeline.yml"><img src="https://github.com/alibaba/zvec/actions/workflows/01-ci-pipeline.yml/badge.svg?branch=main" alt="Main"/></a>
  <a href="https://github.com/alibaba/zvec/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache%202.0-blue.svg" alt="License"/></a>
  <a href="https://pypi.org/project/zvec/"><img src="https://img.shields.io/pypi/v/zvec.svg" alt="PyPI Release"/></a>
  <a href="https://pypi.org/project/zvec/"><img src="https://img.shields.io/pypi/pyversions/zvec.svg" alt="Python Versions"/></a>
  <a href="https://www.npmjs.com/package/@zvec/zvec"><img src="https://img.shields.io/npm/v/@zvec/zvec.svg" alt="npm Release"/></a>
</p>

<p align="center">
  <a href="https://trendshift.io/repositories/20830" target="_blank"><img src="https://trendshift.io/api/badge/repositories/20830" alt="alibaba%2Fzvec | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>

<p align="center">
  <a href="https://zvec.org/en/docs/quickstart/">🚀 <strong>Quickstart</strong> </a> |
  <a href="https://zvec.org/en/">🏠 <strong>Home</strong> </a> |
  <a href="https://zvec.org/en/docs/">📚 <strong>Docs</strong> </a> |
  <a href="https://zvec.org/en/docs/benchmarks/">📊 <strong>Benchmarks</strong> </a> |
  <a href="https://deepwiki.com/alibaba/zvec">🔎 <strong>DeepWiki</strong> </a> |
  <a href="https://discord.gg/rKddFBBu9z">🎮 <strong>Discord</strong> </a>
</p>

**Zvec** is an open-source, in-process vector database — lightweight, lightning-fast, and designed to embed directly into applications. Built on **Proxima** (Alibaba's battle-tested vector search engine), it delivers production-grade, low-latency, scalable similarity search with minimal setup.

## 💫 Features

- **Blazing Fast**: Searches billions of vectors in milliseconds.
- **Simple, Just Works**: [Install](#-installation) and start searching in seconds. No servers, no config, no fuss.
- **Dense + Sparse Vectors**: Work with both dense and sparse embeddings, with native support for multi-vector queries in a single call.
- **Hybrid Search**: Combine semantic similarity with structured filters for precise results.
- **Runs Anywhere**: As an in-process library, Zvec runs wherever your code runs — notebooks, servers, CLI tools, or even edge devices.

## 📦 Installation

### [Python](https://pypi.org/project/zvec/)

**Requirements**: Python 3.10 - 3.12

```bash
pip install zvec
```

### [Node.js](https://www.npmjs.com/package/@zvec/zvec)

```bash
npm install @zvec/zvec
```

### ✅ Supported Platforms

- Linux (x86_64, ARM64)
- macOS (ARM64)

### 🛠️ Building from Source

If you prefer to build Zvec from source, please check the [Building from Source](https://zvec.org/en/docs/build/) guide.

## ⚡ One-Minute Example

```python
import zvec

# Define collection schema
schema = zvec.CollectionSchema(
    name="example",
    vectors=zvec.VectorSchema("embedding", zvec.DataType.VECTOR_FP32, 4),
)

# Create collection
collection = zvec.create_and_open(path="./zvec_example", schema=schema)

# Insert documents
collection.insert([
    zvec.Doc(id="doc_1", vectors={"embedding": [0.1, 0.2, 0.3, 0.4]}),
    zvec.Doc(id="doc_2", vectors={"embedding": [0.2, 0.3, 0.4, 0.1]}),
])

# Search by vector similarity
results = collection.query(
    zvec.VectorQuery("embedding", vector=[0.4, 0.3, 0.3, 0.1]),
    topk=10
)

# Results: list of {'id': str, 'score': float, ...}, sorted by relevance
print(results)
```

## 📈 Performance at Scale

Zvec delivers exceptional speed and efficiency, making it ideal for demanding production workloads.

<img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qps_10M.svg" width="800" alt="Zvec Performance Benchmarks" />

For detailed benchmark methodology, configurations, and complete results, please see our [Benchmarks documentation](https://zvec.org/en/docs/benchmarks/).

## 🤝 Join Our Community

<div align="center">

Stay updated and get support — scan or click:

<div align="center">

| 💬 DingTalk | 📱 WeChat | 🎮 Discord |
|:---:|:---:|:---:|
| <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/dingding.png" width="150"/> | <img src="https://zvec.oss-cn-hongkong.aliyuncs.com/qrcode/wechat.png?v=3" width="150"/> | [![Discord](https://img.shields.io/badge/Discord-Join%20Server-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/rKddFBBu9z) |
| Scan to join | Scan to join | Click to join |

</div>

</div>

## ❤️ Contributing

We welcome and appreciate contributions from the community! Whether you're fixing a bug, adding a feature, or improving documentation, your help makes Zvec better for everyone.

Check out our [Contributing Guide](./CONTRIBUTING.md) to get started!


================================================
FILE: cmake/bazel.cmake
================================================
##
##  The following functions used by user's CMakeLists.txt:
##

##  1. Functions for C/C++
##
##  1.1. Add a subdirectory to the build
##    cc_directory(<source_dir> [binary_dir])
##
##  1.2. Add subdirectories to the build
##    cc_directories(<source_dir1> [source_dir2 ...])
##
##  1.3. Build a C/C++ static or shared library
##    cc_library(
##        NAME <name>
##        [STATIC] [SHARED] [STRICT] [ALWAYS_LINK] [EXCLUDE] [PACKED] [SRCS_NO_GLOB]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [PACKED_EXCLUDES pattern1 ...]
##        [VERSION <version>]
##      )
##
##  1.4. Build a C/C++ executable program
##    cc_binary(
##        NAME <name>
##        [STRICT] [PACKED]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [VERSION <version>]
##      )
##
##  1.5. Build a C/C++ executable test program
##    cc_test(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##
##  1.6. Add existing test cases to a test suite
##    cc_test_suite(<suite_name> [test_name ...])
##
##  1.7. Import a C/C++ static or shared library
##    cc_import(
##        NAME <name>
##        [STATIC | SHARED] [PACKED]
##        PATH <file>
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEPS target1 ...]
##        [IMPLIB <file>]
##        [PACKED_EXCLUDES pattern1 ...]
##      )
##
##  1.8. Import a C/C++ interface library
##    cc_interface(
##        NAME <name>
##        [PACKED]
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEPS target1 ...]
##        [PACKED_EXCLUDES pattern1 ...]
##      )
##
##  1.9. Build a C/C++ executable google test program
##    cc_gtest(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##
##  1.10. Build a C/C++ executable google mock program
##    cc_gmock(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##
##  1.11. Build a C++ protobuf static or shared library
##    cc_proto_library(
##        NAME <name>
##        [STATIC] [SHARED] [STRICT] [EXCLUDE] [PACKED]
##        SRCS <file1.proto> [file2.proto ...]
##        [PROTOROOT path]
##        [CXXFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [VERSION <version>]
##        [PROTOBUF_VERSION <Protobuf version>]
##      )
##

##  2. Functions for CUDA
##
##  2.1. Add a subdirectory to the build
##    cuda_directory(<source_dir> [binary_dir])
##
##  2.2. Add subdirectories to the build
##    cuda_directories(<source_dir1> [source_dir2 ...])
##
##  2.3. Build a CUDA static or shared library
##    cuda_library(
##        NAME <name>
##        [STATIC] [SHARED] [STRICT] [ALWAYS_LINK] [EXCLUDE] [PACKED]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [CUDAFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [PACKED_EXCLUDES pattern1 ...]
##        [VERSION <version>]
##      )
##
##  2.4. Build a CUDA executable program
##    cuda_binary(
##        NAME <name>
##        [STRICT] [PACKED]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [CUDAFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [VERSION <version>]
##      )
##
##  2.5. Build a CUDA executable test program
##    cuda_test(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [CUDAFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##
##  2.6. Add existing test cases to a test suite
##    cuda_test_suite(<suite_name> [test_name ...])
##
##  2.7. Import a C/C++/CUDA static or shared library
##    cuda_import(
##        NAME <name>
##        [STATIC | SHARED] [PACKED]
##        PATH <file>
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEPS target1 ...]
##        [IMPLIB <file>]
##        [PACKED_EXCLUDES pattern1 ...]
##      )
##
##  2.8. Import a C/C++/CUDA interface library
##    cuda_interface(
##        NAME <name>
##        [PACKED]
##        [INCS dir1 ...]
##        [PUBINCS public_dir1 ...]
##        [DEPS target1 ...]
##        [PACKED_EXCLUDES pattern1 ...]
##      )
##
##  2.9. Build a CUDA executable google test program
##    cuda_gtest(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [CUDAFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##
##  2.10. Build a CUDA executable google mock program
##    cuda_gmock(
##        NAME <name>
##        [STRICT]
##        SRCS <file1> [file2 ...]
##        [INCS dir1 ...]
##        [DEFS DEF1=1 ...]
##        [LIBS lib1 ...]
##        [CFLAGS flag1 ...]
##        [CXXFLAGS flag1 ...]
##        [CUDAFLAGS flag1 ...]
##        [LDFLAGS flag1 ...]
##        [DEPS target1 ...]
##        [ARGS args1 ...]
##        [VERSION <version>]
##      )
##

##  3. Utility functions
##
##  3.1. Download a git repository
##    git_repository(
##        NAME <name>
##        URL <url>
##        [TAG <tag>]
##        [PATH <local path>]
##      )
##
##  3.2. Download a hg repository
##    hg_repository(
##        NAME <name>
##        URL <url>
##        [TAG <tag>]
##        [PATH <local path>]
##      )
##
##  3.3. Download a svn repository
##    svn_repository(
##        NAME <name>
##        URL <url>
##        [REV <rev>]
##        [PATH <local path>]
##      )
##
##  3.4. Download a http archive
##    http_archive(
##        NAME <name>
##        URL <url>
##        [SHA256 <sha256 value> | SHA1 <sha1 value> | MD5 <md5 value>]
##        [PATH <local path>]
##      )
##
##  3.5. Retrieve a version string from GIT
##    git_version(
##        <result variable>
##        <repository path>
##      )
##
##  3.6. Retrieve a version string from HG
##    hg_version(
##        <result variable>
##        <repository path>
##      )
##
##  3.7. Retrieve a version string from SVN
##    svn_version(
##        <result variable>
##        <repository path>
##      )
##

cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
include(CMakeParseArguments)

# Using AppleClang instead of Clang (Compiler id)
if(POLICY CMP0025)
  cmake_policy(SET CMP0025 NEW)
endif()

# Enable unit testing
enable_testing()

# Add unittest target
if(NOT TARGET unittest)
  add_custom_target(
      unittest
      COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
      --build-config $<CONFIGURATION>
    )
endif()

# Directories of target output
if(NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
endif()
if(NOT CMAKE_LIBRARY_OUTPUT_DIRECTORY)
  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
endif()
if(NOT CMAKE_RUNTIME_OUTPUT_DIRECTORY)
  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
endif()

# RPATH settings
set(CMAKE_MACOSX_RPATH ON)
if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
  set(CMAKE_SKIP_BUILD_RPATH ON)
  set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
  if(${CMAKE_SIZEOF_VOID_P} EQUAL "8")
    set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib64:$ORIGIN/../lib:$ORIGIN")
  else()
    set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN")
  endif()
else()
  set(CMAKE_INSTALL_RPATH "@loader_path/../lib:@loader_path")
endif()

# Define standard installation directories
if(NOT CMAKE_INSTALL_LIBDIR)
  set(CMAKE_INSTALL_LIBDIR lib)
endif()
if(NOT CMAKE_INSTALL_BINDIR)
  set(CMAKE_INSTALL_BINDIR bin)
endif()
if(NOT CMAKE_INSTALL_INCDIR)
  set(CMAKE_INSTALL_INCDIR include)
endif()
if(NOT CMAKE_INSTALL_ETCDIR)
  set(CMAKE_INSTALL_ETCDIR etc)
endif()

# Generates a compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

if(APPLE OR ANDROID)
    option(CLANG_USE_LIBCXX "Use libc++ instead of libstdc++" ON)
else()
    option(CLANG_USE_LIBCXX "Use libc++ instead of libstdc++" OFF)
endif()

set(CLANG_STDLIB_OPTION "")
if(CLANG_USE_LIBCXX)
    set(CLANG_STDLIB_OPTION "-stdlib=libc++")
else()
    set(CLANG_STDLIB_OPTION "-stdlib=libstdc++")
endif()

if(NOT MSVC)
  # Use color in diagnostics
  set(
      _COMPILER_FLAGS
      "$<$<C_COMPILER_ID:Clang>:-fcolor-diagnostics;${CLANG_STDLIB_OPTION}>"
      "$<$<C_COMPILER_ID:AppleClang>:-fcolor-diagnostics>"
      "$<$<C_COMPILER_ID:GNU>:-fdiagnostics-color=always>"
    )
  add_compile_options(
      "$<$<COMPILE_LANGUAGE:C>:${_COMPILER_FLAGS}>"
      "$<$<COMPILE_LANGUAGE:CXX>:${_COMPILER_FLAGS}>"
    )
  unset(_COMPILER_FLAGS)
else()
  # Replace the default compiling flags
  set(
      _COMPILER_FLAGS
      CMAKE_CXX_FLAGS
      CMAKE_CXX_FLAGS_DEBUG
      CMAKE_CXX_FLAGS_RELEASE
      CMAKE_CXX_FLAGS_RELWITHDEBINFO
      CMAKE_CXX_FLAGS_MINSIZEREL
      CMAKE_C_FLAGS
      CMAKE_C_FLAGS_DEBUG
      CMAKE_C_FLAGS_RELEASE
      CMAKE_C_FLAGS_RELWITHDEBINFO
      CMAKE_C_FLAGS_MINSIZEREL
    )
  foreach(COMPILER_FLAG ${_COMPILER_FLAGS})
    string(REPLACE "/MT" "/MD" ${COMPILER_FLAG} "${${COMPILER_FLAG}}")
    string(REGEX REPLACE "/W[0-9]" "" ${COMPILER_FLAG} "${${COMPILER_FLAG}}")
  endforeach()
  unset(_COMPILER_FLAGS)
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
  # Build shared library as default
  set(BUILD_SHARED_LIBS ON)
endif()

set(CMAKE_C_FLAGS_ASAN ${CMAKE_C_FLAGS_DEBUG})
set(CMAKE_CXX_FLAGS_ASAN ${CMAKE_CXX_FLAGS_DEBUG})
set(CMAKE_EXE_LINKER_FLAGS_ASAN ${CMAKE_EXE_LINKER_FLAGS_DEBUG})
set(CMAKE_SHARED_LINKER_FLAGS_ASAN ${CMAKE_SHARED_LINKER_FLAGS_DEBUG})
set(CMAKE_STATIC_LINKER_FLAGS_ASAN ${CMAKE_STATIC_LINKER_FLAGS_DEBUG})
set(CMAKE_MODULE_LINKER_FLAGS_ASAN ${CMAKE_MODULE_LINKER_FLAGS_DEBUG})
set(CMAKE_C_FLAGS_COVERAGE ${CMAKE_C_FLAGS_DEBUG})
set(CMAKE_CXX_FLAGS_COVERAGE ${CMAKE_CXX_FLAGS_DEBUG})
set(CMAKE_EXE_LINKER_FLAGS_COVERAGE ${CMAKE_EXE_LINKER_FLAGS_DEBUG})
set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE ${CMAKE_SHARED_LINKER_FLAGS_DEBUG})
set(CMAKE_STATIC_LINKER_FLAGS_COVERAGE ${CMAKE_STATIC_LINKER_FLAGS_DEBUG})
set(CMAKE_MODULE_LINKER_FLAGS_COVERAGE ${CMAKE_MODULE_LINKER_FLAGS_DEBUG})

# C/C++ ASAN compile flags
set(
    BAZEL_CC_ASAN_COMPILE_FLAGS
    "$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:Clang>:-fsanitize=address>>"
    "$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:AppleClang>:-fsanitize=address>>"
    "$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:GNU>:-fsanitize=address>>"
    "$<$<CONFIG:ASAN>:$<$<CXX_COMPILER_ID:MSVC>:/fsanitize=address>>"
  )

# C/C++ COVERAGE compile flags
set(
    BAZEL_CC_COVERAGE_COMPILE_FLAGS
    "$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:Clang>:--coverage>>"
    "$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:AppleClang>:--coverage>>"
    "$<$<CONFIG:COVERAGE>:$<$<CXX_COMPILER_ID:GNU>:--coverage>>"
  )

# C/C++ strict compile flags
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
  set(
      BAZEL_CC_STRICT_COMPILE_FLAGS
      "$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>"
      "$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>"
      "$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow-local;-Wno-misleading-indentation>"
      "$<$<CXX_COMPILER_ID:MSVC>:/W4>"
      ${BAZEL_CC_ASAN_COMPILE_FLAGS}
      ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
    )
else()
  set(
      BAZEL_CC_STRICT_COMPILE_FLAGS
      "$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>"
      "$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>"
      "$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow;-Wno-misleading-indentation>"
      "$<$<CXX_COMPILER_ID:MSVC>:/W4>"
      ${BAZEL_CC_ASAN_COMPILE_FLAGS}
      ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
    )
endif()


# C/C++ strict link flags
set(
    BAZEL_CC_STRICT_LINK_FLAGS
    "$<$<CXX_COMPILER_ID:Clang>:${CLANG_STDLIB_OPTION}>"
    ${BAZEL_CC_ASAN_COMPILE_FLAGS}
    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
  )

# C/C++ unstrict compile flags
set(
    BAZEL_CC_UNSTRICT_COMPILE_FLAGS
    "$<$<CXX_COMPILER_ID:Clang>:-Wall>"
    "$<$<CXX_COMPILER_ID:AppleClang>:-Wall>"
    "$<$<CXX_COMPILER_ID:GNU>:-Wall>"
    "$<$<CXX_COMPILER_ID:MSVC>:/W3>"
    ${BAZEL_CC_ASAN_COMPILE_FLAGS}
    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
  )

# C/C++ unstrict link flags
set(
    BAZEL_CC_UNSTRICT_LINK_FLAGS
    "$<$<CXX_COMPILER_ID:Clang>:${CLANG_STDLIB_OPTION}>"
    ${BAZEL_CC_ASAN_COMPILE_FLAGS}
    ${BAZEL_CC_COVERAGE_COMPILE_FLAGS}
  )

# CUDA strict compile flags
set(
    BAZEL_CUDA_STRICT_COMPILE_FLAGS
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:MSVC>:/W4>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:AppleClang>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-Wall;-Wextra;-Wshadow>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:MSVC>:/W4>>"
    "$<$<CONFIG:DEBUG>:$<$<COMPILE_LANGUAGE:CUDA>:-G>>"
  )

# CUDA strict link flags
set(BAZEL_CUDA_STRICT_LINK_FLAGS "")

# CUDA unstrict compile flags
set(
    BAZEL_CUDA_UNSTRICT_COMPILE_FLAGS
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:Clang>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:AppleClang>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:GNU>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:C>:$<$<C_COMPILER_ID:MSVC>:/W3>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:AppleClang>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:GNU>:-Wall>>"
    "$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:MSVC>:/W3>>"
    "$<$<CONFIG:DEBUG>:$<$<COMPILE_LANGUAGE:CUDA>:-G>>"
  )

# CUDA unstrict link flags
set(BAZEL_CUDA_UNSTRICT_LINK_FLAGS "")

## Find workspace directory
function(_find_workspace_directory _RESULT)
  # Find Workspace.cmake folder
  set(_CURRENT_WORKSPACE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
  get_filename_component(
      _PARENT_WORKSPACE_DIR ${_CURRENT_WORKSPACE_DIR} DIRECTORY
    )
  while(NOT ("${_CURRENT_WORKSPACE_DIR}" STREQUAL "${_PARENT_WORKSPACE_DIR}"))
    if(EXISTS "${_CURRENT_WORKSPACE_DIR}/Workspace.cmake")
      set(${_RESULT} ${_CURRENT_WORKSPACE_DIR} PARENT_SCOPE)
      message(STATUS "Found workspace at ${${_RESULT}}")
      break()
    endif()

    # Find next parent folder
    set(_CURRENT_WORKSPACE_DIR ${_PARENT_WORKSPACE_DIR})
    get_filename_component(
        _PARENT_WORKSPACE_DIR ${_CURRENT_WORKSPACE_DIR} DIRECTORY
      )
  endwhile()
endfunction()

## Retrieve absolute paths
function(_absolute_paths _RESULT)
  foreach(FILEPATH ${ARGN})
    if(NOT IS_ABSOLUTE ${FILEPATH})
      get_filename_component(FILEPATH ${FILEPATH} ABSOLUTE)
    endif()
    list(APPEND FILEPATHS ${FILEPATH})
  endforeach()
  set(${_RESULT} "${FILEPATHS}" PARENT_SCOPE)
endfunction()

## Add both shared and static library
macro(_add_library _NAME _OPTION)
  add_library(${_NAME}_objects OBJECT ${_OPTION} ${ARGN})
  add_library(
      ${_NAME}_static STATIC ${_OPTION} $<TARGET_OBJECTS:${_NAME}_objects>
    )
  add_library(
      ${_NAME} SHARED ${_OPTION} $<TARGET_OBJECTS:${_NAME}_objects>
    )
  add_dependencies(${_NAME} ${_NAME}_static)
  if(NOT MSVC)
    set_property(TARGET ${_NAME}_static PROPERTY OUTPUT_NAME ${_NAME})
  endif()
endmacro()

## Link dependencies
function(_targets_link_dependencies _NAME)
  foreach(LIB ${ARGN})
    if(TARGET ${LIB})
      list(APPEND LIBS_DEPS ${LIB})
      list(
          APPEND LIBS_INCS
          "$<TARGET_PROPERTY:${LIB},INTERFACE_INCLUDE_DIRECTORIES>"
        )
    endif()
  endforeach()

  if(LIBS_DEPS)
    add_dependencies(${_NAME} ${LIBS_DEPS})
    target_include_directories(${_NAME} PRIVATE "${LIBS_INCS}")
  endif()
endfunction()

## Link libraries
function(_target_link_libraries _NAME)
  function(_collect_always_link_libs LIB_LIST RESULT_VAR)
    if(NOT _COLLECT_ALWAYS_LINK_VISITED)
      set(_COLLECT_ALWAYS_LINK_VISITED "" PARENT_SCOPE)
    endif()

    set(LOCAL_RESULT "")
    foreach(LIB ${LIB_LIST})
      if(NOT TARGET ${LIB})
        continue()
      endif()

      list(FIND _COLLECT_ALWAYS_LINK_VISITED ${LIB} ALREADY_VISITED)
      if(NOT ALREADY_VISITED EQUAL -1)
        continue()
      endif()

      list(APPEND _COLLECT_ALWAYS_LINK_VISITED ${LIB})
      set(_COLLECT_ALWAYS_LINK_VISITED "${_COLLECT_ALWAYS_LINK_VISITED}" PARENT_SCOPE)

      get_target_property(ALWAYS_LINK ${LIB} ALWAYS_LINK)
      if(ALWAYS_LINK)
        list(APPEND LOCAL_RESULT ${LIB})
      endif()

      get_target_property(DEP_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)
      if(DEP_LIBS)
        _collect_always_link_libs("${DEP_LIBS}" DEP_ALWAYS_LINK_LIBS)
        list(APPEND LOCAL_RESULT ${DEP_ALWAYS_LINK_LIBS})
      endif()

      get_target_property(LINK_LIBS ${LIB} LINK_LIBRARIES)
      if(LINK_LIBS)
        _collect_always_link_libs("${LINK_LIBS}" LINK_ALWAYS_LINK_LIBS)
        list(APPEND LOCAL_RESULT ${LINK_ALWAYS_LINK_LIBS})
      endif()
    endforeach()

    list(REMOVE_DUPLICATES LOCAL_RESULT)
    set(${RESULT_VAR} "${LOCAL_RESULT}" PARENT_SCOPE)
  endfunction()

  _collect_always_link_libs("${ARGN}" ALL_ALWAYS_LINK_LIBS)

  set(ALL_LIBS_TO_PROCESS ${ARGN})
  foreach(ALWAYS_LIB ${ALL_ALWAYS_LINK_LIBS})
    list(FIND ARGN ${ALWAYS_LIB} FOUND_INDEX)
    if(FOUND_INDEX EQUAL -1)
      list(APPEND ALL_LIBS_TO_PROCESS ${ALWAYS_LIB})
    endif()
  endforeach()

  list(REMOVE_DUPLICATES ALL_LIBS_TO_PROCESS)

  foreach(LIB ${ALL_LIBS_TO_PROCESS})
    if(NOT TARGET ${LIB})
      list(APPEND LINK_LIBS ${LIB})
      continue()
    endif()

    list(FIND ALL_ALWAYS_LINK_LIBS ${LIB} IS_ALWAYS_LINK)
    if(IS_ALWAYS_LINK EQUAL -1)
      list(APPEND LINK_LIBS ${LIB})
      continue()
    endif()

    if(NOT MSVC)
      if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
        list(APPEND LINK_LIBS -Wl,--whole-archive ${LIB} -Wl,--no-whole-archive)
      else()
        list(APPEND LINK_LIBS -Wl,-force_load ${LIB})
      endif()
    else()
      # Microsoft Visual C++
      list(APPEND LINK_LIBS /WHOLEARCHIVE:$<TARGET_FILE:${LIB}>)
      get_target_property(OTHER_LINK_LIBS ${LIB} INTERFACE_LINK_LIBRARIES)
      if(OTHER_LINK_LIBS)
        foreach(OTHER_LIB ${OTHER_LINK_LIBS})
          list(FIND ALL_LIBS_TO_PROCESS ${OTHER_LIB} FOUND_INDEX)
          if(FOUND_INDEX EQUAL -1)
            list(APPEND LINK_LIBS ${OTHER_LIB})
          endif()
        endforeach()
      endif()
      list(APPEND LIBS_DEPS ${LIB})
      list(
          APPEND LIBS_INCS
          "$<TARGET_PROPERTY:${LIB},INTERFACE_INCLUDE_DIRECTORIES>"
        )
    endif()
  endforeach()

  target_link_libraries(${_NAME} ${LINK_LIBS})
  if(LIBS_DEPS)
    add_dependencies(${_NAME} ${LIBS_DEPS})
    target_include_directories(${_NAME} PRIVATE "${LIBS_INCS}")
  endif()
endfunction()

## Add a subdirectory to the build
function(cc_directory)
  add_subdirectory(${ARGN})
endfunction()

## Add subdirectories to the build
function(cc_directories)
  foreach(SRC_DIR ${ARGN})
    add_subdirectory(${SRC_DIR})
  endforeach()
endfunction()

## Set the properties of target
function(_cc_target_properties)
  cmake_parse_arguments(
      CC_ARGS "STRICT;ALWAYS_LINK" "NAME;VERSION;C_STANDARD;CXX_STANDARD"
      "INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS" ${ARGN}
    )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  get_target_property(TARGET_TYPE ${CC_ARGS_NAME} TYPE)
  if(("${TARGET_TYPE}" STREQUAL "SHARED_LIBRARY") OR
      ("${TARGET_TYPE}" STREQUAL "STATIC_LIBRARY") OR
      ("${TARGET_TYPE}" STREQUAL "EXECUTABLE"))
    set(TARGET_LINKABLE TRUE)
  endif()

  if(CC_ARGS_ALWAYS_LINK)
    if(("${TARGET_TYPE}" STREQUAL "STATIC_LIBRARY") OR
        ("${TARGET_TYPE}" STREQUAL "OBJECT_LIBRARY"))
      set_property(TARGET ${CC_ARGS_NAME} PROPERTY ALWAYS_LINK TRUE)
    endif()
  endif()

  # Set the warning level of compiling
  if(CC_ARGS_STRICT)
    target_compile_options(
        ${CC_ARGS_NAME} PRIVATE "${BAZEL_CC_STRICT_COMPILE_FLAGS}"
      )
    if(TARGET_LINKABLE)
      target_link_libraries(${CC_ARGS_NAME} "${BAZEL_CC_STRICT_LINK_FLAGS}")
    endif()
  else()
    target_compile_options(
        ${CC_ARGS_NAME} PRIVATE "${BAZEL_CC_UNSTRICT_COMPILE_FLAGS}"
      )
    if(TARGET_LINKABLE)
      target_link_libraries(${CC_ARGS_NAME} "${BAZEL_CC_UNSTRICT_LINK_FLAGS}")
    endif()
  endif()

  if(CC_ARGS_DEFS)
    target_compile_definitions(${CC_ARGS_NAME} PRIVATE "${CC_ARGS_DEFS}")
  endif()

  if(CC_ARGS_CFLAGS OR CC_ARGS_CXXFLAGS)
    target_compile_options(
        ${CC_ARGS_NAME} PRIVATE
        "$<$<COMPILE_LANGUAGE:C>:${CC_ARGS_CFLAGS}>"
        "$<$<COMPILE_LANGUAGE:CXX>:${CC_ARGS_CXXFLAGS}>"
      )
  endif()

  if(CC_ARGS_LDFLAGS)
    string(REPLACE ";" " " CC_ARGS_LDFLAGS "${CC_ARGS_LDFLAGS}")
    set_property(
        TARGET ${CC_ARGS_NAME} PROPERTY LINK_FLAGS "${CC_ARGS_LDFLAGS}"
      )
  endif()

  if(CC_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})
    target_include_directories(${CC_ARGS_NAME} PRIVATE "${INC_DIRS}")
  endif()

  if(BAZEL_WORKSPACE_DIR)
    target_include_directories(${CC_ARGS_NAME} PRIVATE "${BAZEL_WORKSPACE_DIR}")
  endif()

  if(CC_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})
    target_include_directories(${CC_ARGS_NAME} PUBLIC "${INC_DIRS}")
  endif()

  if(CC_ARGS_LIBS)
    if(NOT TARGET_LINKABLE)
      _targets_link_dependencies(${CC_ARGS_NAME} ${CC_ARGS_LIBS})
    else()
      if ("${TARGET_TYPE}" STREQUAL "EXECUTABLE")
        _target_link_libraries(${CC_ARGS_NAME} "${CC_ARGS_LIBS}")
      else()
        target_link_libraries(${CC_ARGS_NAME} "${CC_ARGS_LIBS}")
      endif()
    endif()
  endif()

  if(CC_ARGS_DEPS)
    add_dependencies(${CC_ARGS_NAME} "${CC_ARGS_DEPS}")
  endif()

  if(CC_ARGS_VERSION)
    set_property(
        TARGET ${CC_ARGS_NAME} PROPERTY VERSION "${CC_ARGS_VERSION}"
      )
  endif()

  if(NOT CC_C_STANDARD)
    set(CC_C_STANDARD 99)
  endif()

  if(NOT CC_CXX_STANDARD)
    set(CC_CXX_STANDARD 11)
  endif()

  set_target_properties(
      ${CC_ARGS_NAME} PROPERTIES DEFINE_SYMBOL ""
      C_STANDARD ${CC_C_STANDARD} CXX_STANDARD ${CC_CXX_STANDARD}
      C_STANDARD_REQUIRED ON C_EXTENSIONS ON
      CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF
      WINDOWS_EXPORT_ALL_SYMBOLS ON
    )
endfunction()

## Build a C/C++ static or shared library
function(cc_library)
  cmake_parse_arguments(
      CC_ARGS
      "STATIC;SHARED;EXCLUDE;PACKED;SRCS_NO_GLOB"
      "NAME;VERSION"
      "SRCS;INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;PACKED_EXCLUDES"
      ${ARGN}
  )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name provided.")
  endif()

  if(CC_ARGS_SRCS_NO_GLOB)
    set(SOURCE_FILES ${CC_ARGS_SRCS})
    if(NOT SOURCE_FILES)
      message(FATAL_ERROR "No source files provided for ${CC_ARGS_NAME} (SRCS_NO_GLOB mode).")
    endif()
  else()
    set(SOURCE_FILES "")
    foreach(_src IN LISTS CC_ARGS_SRCS)
      if(IS_ABSOLUTE "${_src}" OR NOT "${_src}" MATCHES "[*?]")
        list(APPEND SOURCE_FILES "${_src}")
      else()
        file(GLOB _globbed_srcs ${_src})
        list(APPEND SOURCE_FILES ${_globbed_srcs})
      endif()
    endforeach()
    if(NOT SOURCE_FILES)
      message(FATAL_ERROR "No source files found for ${CC_ARGS_NAME} after globbing.")
    endif()
  endif()

  if(CC_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CC_ARGS_NAME}")
    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CC_ARGS_VERSION}")
  endif()

  if(CC_ARGS_EXCLUDE)
    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)
  endif()

  if(CC_ARGS_SHARED AND CC_ARGS_STATIC)
    _add_library(${CC_ARGS_NAME} "${EXCLUDE_OPTION}" ${SOURCE_FILES})
  elseif(CC_ARGS_SHARED)
    add_library(${CC_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${SOURCE_FILES})
  elseif(CC_ARGS_STATIC)
    add_library(${CC_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${SOURCE_FILES})
  else()
    add_library(${CC_ARGS_NAME} ${EXCLUDE_OPTION} ${SOURCE_FILES})
  endif()

  if(TARGET ${CC_ARGS_NAME}_objects)
    _cc_target_properties(
        NAME "${CC_ARGS_NAME}_objects"
        INCS "${CC_ARGS_INCS};${CC_ARGS_PUBINCS}"
        DEFS "${CC_ARGS_DEFS}"
        LIBS "${CC_ARGS_LIBS}"
        CFLAGS "${CC_ARGS_CFLAGS}"
        CXXFLAGS "${CC_ARGS_CXXFLAGS}"
        LDFLAGS "${CC_ARGS_LDFLAGS}"
        DEPS "${CC_ARGS_DEPS}"
        "${CC_ARGS_UNPARSED_ARGUMENTS}"
    )
  endif()

  if(TARGET ${CC_ARGS_NAME}_static)
    _cc_target_properties(
        NAME "${CC_ARGS_NAME}_static"
        INCS "${CC_ARGS_INCS}"
        PUBINCS "${CC_ARGS_PUBINCS}"
        DEFS "${CC_ARGS_DEFS}"
        LIBS "${CC_ARGS_LIBS}"
        CFLAGS "${CC_ARGS_CFLAGS}"
        CXXFLAGS "${CC_ARGS_CXXFLAGS}"
        LDFLAGS "${CC_ARGS_LDFLAGS}"
        DEPS "${CC_ARGS_DEPS}"
        "${CC_ARGS_UNPARSED_ARGUMENTS}"
    )
    if(CC_ARGS_PACKED)
      install(
        TARGETS ${CC_ARGS_NAME}_static
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
      )
    endif()
  endif()

  _cc_target_properties(
      NAME "${CC_ARGS_NAME}"
      INCS "${CC_ARGS_INCS}"
      PUBINCS "${CC_ARGS_PUBINCS}"
      DEFS "${CC_ARGS_DEFS}"
      LIBS "${CC_ARGS_LIBS}"
      CFLAGS "${CC_ARGS_CFLAGS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      VERSION "${CC_ARGS_VERSION}"
      "${CC_ARGS_UNPARSED_ARGUMENTS}"
  )
  if(CC_ARGS_PACKED)
    install(
        TARGETS ${CC_ARGS_NAME}
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
    )
    if(CC_ARGS_PUBINCS)
      foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})
        list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
      endforeach()
      install(
          DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
          FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" PATTERN "*.hxx"
          ${PATTERN_EXCLUDES}
      )
    endif()
  endif()
endfunction()

## Build a C/C++ executable program
function(cc_binary)
  cmake_parse_arguments(
      CC_ARGS "PACKED" "NAME;VERSION"
     "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS" ${ARGN}
    )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})
  if(NOT CC_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CC_ARGS_NAME}.")
  endif()

  if(CC_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CC_ARGS_NAME}")
    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CC_ARGS_VERSION}")
  endif()
  add_executable(${CC_ARGS_NAME} ${CC_ARGS_SRCS})

  if(CC_ARGS_PACKED)
    install(
        TARGETS ${CC_ARGS_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
      )
  endif()

  _cc_target_properties(
      NAME "${CC_ARGS_NAME}"
      INCS "${CC_ARGS_INCS}"
      DEFS "${CC_ARGS_DEFS}"
      LIBS "${CC_ARGS_LIBS}"
      CFLAGS "${CC_ARGS_CFLAGS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      VERSION "${CC_ARGS_VERSION}"
      "${CC_ARGS_UNPARSED_ARGUMENTS}"
    )
endfunction()

## Build a C/C++ executable test program
function(cc_test)
  cmake_parse_arguments(
      CC_ARGS "" "NAME;VERSION"
      "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
    )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})
  if(NOT CC_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CC_ARGS_NAME}.")
  endif()

  if(CC_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CC_ARGS_NAME}")
    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CC_ARGS_VERSION}")
  endif()
  add_executable(${CC_ARGS_NAME} EXCLUDE_FROM_ALL ${CC_ARGS_SRCS})

  _cc_target_properties(
      NAME "${CC_ARGS_NAME}"
      INCS "${CC_ARGS_INCS}"
      DEFS "${CC_ARGS_DEFS}"
      LIBS "${CC_ARGS_LIBS}"
      CFLAGS "${CC_ARGS_CFLAGS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      "${CC_ARGS_UNPARSED_ARGUMENTS}"
    )
  add_dependencies(unittest ${CC_ARGS_NAME})
  add_custom_target(
      unittest.${CC_ARGS_NAME}
      COMMAND $<TARGET_FILE:${CC_ARGS_NAME}> "${CC_ARGS_ARGS}"
      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
      DEPENDS ${CC_ARGS_NAME}
    )
  add_test(
      NAME ${CC_ARGS_NAME}
      COMMAND $<TARGET_FILE:${CC_ARGS_NAME}> "${CC_ARGS_ARGS}"
      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
    )
endfunction()

## Add existing test cases to a test suite
function(cc_test_suite _NAME)
  if(NOT TARGET unittest.${_NAME})
    add_custom_target(unittest.${_NAME} COMMAND "")
  endif()
  foreach(TEST_TARGET ${ARGN})
    list(APPEND TEST_TARGETS unittest.${TEST_TARGET})
  endforeach()
  if(TEST_TARGETS)
    add_dependencies(unittest.${_NAME} ${TEST_TARGETS})
  endif()
endfunction()

## Import a C/C++ static or shared library
function(cc_import)
  cmake_parse_arguments(
      CC_ARGS "STATIC;SHARED;PACKED"
      "NAME;PATH;IMPLIB" "INCS;PUBINCS;DEPS;PACKED_EXCLUDES" ${ARGN}
    )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CC_ARGS_PATH ${CC_ARGS_PATH})
  if(NOT CC_ARGS_PATH)
    message(FATAL_ERROR "No imported target file found of ${CC_ARGS_NAME}.")
  endif()
  if(MSVC AND CC_ARGS_SHARED AND NOT CC_ARGS_IMPLIB)
    string(REGEX REPLACE
        ".[Dd][Ll][Ll]$" ".lib" CC_ARGS_IMPLIB ${CC_ARGS_PATH}
      )
  endif()

  if(CC_ARGS_SHARED)
    add_library(${CC_ARGS_NAME} SHARED IMPORTED GLOBAL)
  elseif(CC_ARGS_STATIC)
    add_library(${CC_ARGS_NAME} STATIC IMPORTED GLOBAL)
  else()
    add_library(${CC_ARGS_NAME} UNKNOWN IMPORTED GLOBAL)
  endif()

  set_property(
      TARGET ${CC_ARGS_NAME} PROPERTY IMPORTED_LOCATION ${CC_ARGS_PATH}
    )
  if(MSVC AND CC_ARGS_SHARED)
    set_property(
        TARGET ${CC_ARGS_NAME} PROPERTY IMPORTED_IMPLIB ${CC_ARGS_IMPLIB}
      )
  endif()

  if(CC_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})
    foreach(INC_DIR ${INC_DIRS})
      set_property(
          TARGET ${CC_ARGS_NAME} APPEND PROPERTY
          INTERFACE_INCLUDE_DIRECTORIES "${INC_DIR}"
        )
    endforeach()
  endif()

  if(CC_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})
    foreach(INC_DIR ${INC_DIRS})
      set_property(
          TARGET ${CC_ARGS_NAME} APPEND PROPERTY
          INTERFACE_INCLUDE_DIRECTORIES "${INC_DIR}"
        )
    endforeach()
  endif()

  if(CC_ARGS_DEPS)
    add_dependencies(${CC_ARGS_NAME} "${CC_ARGS_DEPS}")
  endif()

  if(CC_ARGS_PACKED)
    install(
        TARGETS ${CC_ARGS_NAME}
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
      )
    if(CC_ARGS_PUBINCS)
      foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})
        list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
      endforeach()
      install(
          DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
          FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" PATTERN "*.hxx"
          ${PATTERN_EXCLUDES}
        )
    endif()
  endif()
endfunction()

## Import a C/C++ interface library
function(cc_interface)
  cmake_parse_arguments(
      CC_ARGS "PACKED" "NAME" "INCS;PUBINCS;DEPS;PACKED_EXCLUDES" ${ARGN}
    )

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  add_library(${CC_ARGS_NAME} INTERFACE GLOBAL)
  if(CC_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_INCS})
    target_include_directories(${CC_ARGS_NAME} INTERFACE "${INC_DIRS}")
  endif()

  if(CC_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CC_ARGS_PUBINCS})
    target_include_directories(${CC_ARGS_NAME} INTERFACE "${INC_DIRS}")
  endif()

  if(CC_ARGS_DEPS)
    add_dependencies(${CC_ARGS_NAME} "${CC_ARGS_DEPS}")
  endif()

  if(CC_ARGS_PACKED AND CC_ARGS_PUBINCS)
    foreach(PACKED_EXCLUDE ${CC_ARGS_PACKED_EXCLUDES})
      list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
    endforeach()
    install(
        DIRECTORY ${CC_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
        FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp" PATTERN "*.hxx"
        ${PATTERN_EXCLUDES}
      )
  endif()
endfunction()

## Find gtest library
function(_find_gtest)
  if(DEFINED FIND_GTEST_LIBS AND DEFINED FIND_GTEST_INCS)
    return()
  endif()

  if(NOT TARGET gtest OR NOT TARGET gtest_main)
    # Find gtest using 'find_package'
    find_package(GTest REQUIRED)
    set(
        FIND_GTEST_INCS "${GTEST_INCLUDE_DIRS}"
        CACHE STRING "GTest includes"
      )
    set(
        FIND_GTEST_LIBS "${GTEST_BOTH_LIBRARIES}"
        CACHE STRING "GTest libraries"
      )
  else()
    # Find gtest using target names
    set(FIND_GTEST_INCS "" CACHE STRING "GTest includes")
    set(FIND_GTEST_LIBS "gtest;gtest_main" CACHE STRING "GTest libraries")
  endif()
endfunction()

## Build a C/C++ executable google test program
function(cc_gtest)
  cmake_parse_arguments(
    CC_ARGS "" "NAME;VERSION"
    "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
  )
  _find_gtest()
  cc_test(
      NAME "${CC_ARGS_NAME}"
      VERSION "${CC_ARGS_VERSION}"
      SRCS "${CC_ARGS_SRCS}"
      INCS "${CC_ARGS_INCS};${FIND_GTEST_INCS}"
      DEFS "${CC_ARGS_DEFS}"
      LIBS "${CC_ARGS_LIBS};${FIND_GTEST_LIBS}"
      CFLAGS "${CC_ARGS_CFLAGS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      ARGS "${CC_ARGS_ARGS}"
    )
endfunction()

## Find gmock library
function(_find_gmock)
  if(DEFINED FIND_GMOCK_LIBS AND DEFINED FIND_GMOCK_INCS)
    return()
  endif()

  if(NOT TARGET gmock OR NOT TARGET gmock_main)
    # Find gmock/gtest using 'find_package'
    find_package(GMock REQUIRED)
    find_package(GTest REQUIRED)
    set(
        FIND_GMOCK_INCS "${GMOCK_INCLUDE_DIRS};${GTEST_INCLUDE_DIRS}"
        CACHE STRING "GMock includes"
      )
    set(
        FIND_GMOCK_LIBS "${GMOCK_BOTH_LIBRARIES};${GTEST_LIBRARIES}"
        CACHE STRING "GMock libraries"
      )
  else()
    # Find gmock using target names
    set(FIND_GMOCK_INCS "" CACHE STRING "GMock includes")
    set(FIND_GMOCK_LIBS "gmock;gmock_main" CACHE STRING "GMock libraries")
  endif()
endfunction()

## Build a C/C++ executable google mock program
function(cc_gmock)
  cmake_parse_arguments(
    CC_ARGS "" "NAME;VERSION"
    "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
  )
  _find_gmock()
  cc_test(
      NAME "${CC_ARGS_NAME}"
      VERSION "${CC_ARGS_VERSION}"
      SRCS "${CC_ARGS_SRCS}"
      INCS "${CC_ARGS_INCS};${FIND_GMOCK_INCS}"
      DEFS "${CC_ARGS_DEFS}"
      LIBS "${CC_ARGS_LIBS};${FIND_GMOCK_LIBS}"
      CFLAGS "${CC_ARGS_CFLAGS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      ARGS "${CC_ARGS_ARGS}"
    )
endfunction()

## Find protobuf library
function(_find_protobuf _VERSION)
  if(DEFINED CC_PROTOBUF_PROTOC_${_VERSION})
    return()
  endif()

  # Find protobuf using 'find_package'
  if(NOT TARGET protoc OR NOT TARGET libprotobuf)
    find_package(Protobuf ${_VERSION} REQUIRED)
    set(
        CC_PROTOBUF_PROTOC_${_VERSION}
        "${PROTOBUF_PROTOC_EXECUTABLE}" CACHE PATH "Protobuf compiler"
      )
    set(
        CC_PROTOBUF_INCS_${_VERSION}
        "${PROTOBUF_INCLUDE_DIRS}" CACHE STRING "Protobuf includes"
      )
    set(
        CC_PROTOBUF_LIBS_${_VERSION}
        "${PROTOBUF_LIBRARIES}" CACHE STRING "Protobuf libraries"
      )
    return()
  endif()

  # Find protobuf using target names
  get_target_property(protoc_VERSION protoc VERSION)
  get_target_property(libprotobuf_VERSION libprotobuf VERSION)
  if(_VERSION)
    if(${protoc_VERSION} VERSION_LESS ${_VERSION})
      message(
          FATAL_ERROR
          "The 'protoc' version is ${protoc_VERSION}, less than ${_VERSION}."
        )
    endif()
    if(${libprotobuf_VERSION} VERSION_LESS ${_VERSION})
      message(
          FATAL_ERROR
          "The 'libprotobuf' version is ${libprotobuf_VERSION}, "
          "less than ${_VERSION}."
        )
    endif()
  endif()

  message(STATUS "Found binary 'protoc ${protoc_VERSION}'")
  message(STATUS "Found library 'libprotobuf ${libprotobuf_VERSION}'")
  set(
      CC_PROTOBUF_PROTOC_${_VERSION}
      "$<TARGET_FILE:protoc>" CACHE PATH "Protobuf compiler"
    )
  get_target_property(protoc_SOURCE_DIR protoc SOURCE_DIR)
  get_filename_component(protoc_INCLUDE_DIR ${protoc_SOURCE_DIR}/../src ABSOLUTE)
  set(
      CC_PROTOBUF_INCS_${_VERSION}
      "${protoc_INCLUDE_DIR}" CACHE STRING "Protobuf includes"
    )
  set(
      CC_PROTOBUF_LIBS_${_VERSION} libprotobuf CACHE STRING "Protobuf libraries"
    )
endfunction()

## Build a C++ protobuf static or shared library
function(cc_proto_library)
  cmake_parse_arguments(
      CC_ARGS "STATIC;SHARED;EXCLUDE;PACKED"
      "NAME;VERSION;PROTOROOT;PROTOBUF_VERSION"
      "SRCS;CXXFLAGS;LDFLAGS;DEPS" ${ARGN}
    )

  _find_protobuf("${CC_ARGS_PROTOBUF_VERSION}")
  set(CC_PROTOBUF_PROTOC ${CC_PROTOBUF_PROTOC_${CC_ARGS_PROTOBUF_VERSION}})
  if(DEFINED GLOBAL_CC_PROTOBUF_PROTOC)
    set(CC_PROTOBUF_PROTOC ${GLOBAL_CC_PROTOBUF_PROTOC})
  endif()
  set(CC_PROTOBUF_INCS ${CC_PROTOBUF_INCS_${CC_ARGS_PROTOBUF_VERSION}})
  set(CC_PROTOBUF_LIBS ${CC_PROTOBUF_LIBS_${CC_ARGS_PROTOBUF_VERSION}})

  if(NOT CC_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CC_ARGS_SRCS ${CC_ARGS_SRCS})
  if(NOT CC_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CC_ARGS_NAME}.")
  endif()

  if(CC_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CC_ARGS_NAME}")
    list(APPEND CC_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CC_ARGS_VERSION}")
  endif()

  if(CC_ARGS_EXCLUDE)
    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)
  endif()

  set(PROTO_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
  if(CC_ARGS_PROTOROOT)
    get_filename_component(PROTO_ROOT ${CC_ARGS_PROTOROOT} ABSOLUTE)
  endif()

  # Compile proto files to C++ sources
  set(CPP_OUTPATH "${CMAKE_CURRENT_BINARY_DIR}")
  foreach(PROTO_FILE ${CC_ARGS_SRCS})
    get_filename_component(PROTO_FILE ${PROTO_FILE} ABSOLUTE)

    if(NOT ${PROTO_FILE} MATCHES "\\.proto$$")
      message(FATAL_ERROR "Unrecognized proto file ${PROTOFILE}")
    endif()
    if(NOT ${PROTO_FILE} MATCHES "^${PROTO_ROOT}")
      message(FATAL_ERROR "'${PROTO_FILE}' NOT IN '${PROTO_ROOT}'")
    endif()

    string(
        REGEX REPLACE "^${PROTO_ROOT}(/?)" "" ROOT_CLEANED_FILE ${PROTO_FILE}
      )
    string(REGEX REPLACE "\\.proto$$" "" EXT_CLEANED_FILE ${ROOT_CLEANED_FILE})
    set(CPP_FILE "${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.cc")
    set(HDR_FILE "${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.h")
    set(INJ_FILE "${CPP_OUTPATH}/${EXT_CLEANED_FILE}.pb.cmake")
    file(RELATIVE_PATH REL_CPP_FILE ${CMAKE_BINARY_DIR} ${CPP_FILE})

    set(INJECTED_SCRIPT
        "foreach(SRC ${EXT_CLEANED_FILE}.pb.cc ${EXT_CLEANED_FILE}.pb.h)\n"
        "  file(READ \$\{SRC\} SRC_CODE)\n"
        "  file(REMOVE \$\{SRC\})\n"
        "  file(APPEND \$\{SRC\} \"#ifdef __GNUC__\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#pragma GCC diagnostic push\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#pragma GCC diagnostic ignored \\\"-Wshadow\\\"\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#pragma GCC diagnostic ignored \\\"-Wunused-parameter\\\"\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#endif\\n\\n\")\n"
        "  file(APPEND \$\{SRC\} \"\$\{SRC_CODE\}\")\n"
        "  file(APPEND \$\{SRC\} \"\\n#ifdef __GNUC__\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#pragma GCC diagnostic pop\\n\")\n"
        "  file(APPEND \$\{SRC\} \"#endif\\n\")\n"
        "endforeach()\n"
      )
    file(WRITE "${INJ_FILE}" ${INJECTED_SCRIPT})

    add_custom_command(
        OUTPUT "${CPP_FILE}" "${HDR_FILE}"
        # COMMAND ${CMAKE_COMMAND} -E make_directory ${CPP_OUTPATH}
        COMMAND ${CC_PROTOBUF_PROTOC}
        --cpp_out "${CPP_OUTPATH}" --python_out "${CPP_OUTPATH}"
        --proto_path "${PROTO_ROOT}" --proto_path "${CC_PROTOBUF_INCS}" "${PROTO_FILE}"

        COMMAND ${CMAKE_COMMAND} -P "${INJ_FILE}"
        DEPENDS "${PROTO_FILE}"
        COMMENT "Generating CXX source ${REL_CPP_FILE}"
        VERBATIM
      )
    list(APPEND CC_SRCS "${CPP_FILE}" "${HDR_FILE}")
  endforeach()

  # Compile C++ sources
  if(CC_ARGS_SHARED AND CC_ARGS_STATIC)
    _add_library(${CC_ARGS_NAME} "${EXCLUDE_OPTION}" "${CC_SRCS}")
  elseif(CC_ARGS_SHARED)
    add_library(${CC_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${CC_SRCS})
  elseif(CC_ARGS_STATIC)
    add_library(${CC_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${CC_SRCS})
  else()
    add_library(${CC_ARGS_NAME} ${EXCLUDE_OPTION} ${CC_SRCS})
  endif()

  if(TARGET ${CC_ARGS_NAME}_objects)
    _cc_target_properties(
        NAME "${CC_ARGS_NAME}_objects"
        INCS "${CPP_OUTPATH};${CC_PROTOBUF_INCS}"
        LIBS "${CC_PROTOBUF_LIBS}"
        CXXFLAGS "${CC_ARGS_CXXFLAGS}"
        LDFLAGS "${CC_ARGS_LDFLAGS}"
        DEPS "${CC_ARGS_DEPS}"
        "${CC_ARGS_UNPARSED_ARGUMENTS}"
      )
  endif()

  if(TARGET ${CC_ARGS_NAME}_static)
    _cc_target_properties(
        NAME "${CC_ARGS_NAME}_static"
        PUBINCS "${CPP_OUTPATH};${CC_PROTOBUF_INCS}"
        LIBS "${CC_PROTOBUF_LIBS}"
        CXXFLAGS "${CC_ARGS_CXXFLAGS}"
        LDFLAGS "${CC_ARGS_LDFLAGS}"
        DEPS "${CC_ARGS_DEPS}"
        "${CC_ARGS_UNPARSED_ARGUMENTS}"
      )
    if(CC_ARGS_PACKED)
      install(
          TARGETS ${CC_ARGS_NAME}_static
          ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        )
    endif()
  endif()

  _cc_target_properties(
      NAME "${CC_ARGS_NAME}"
      PUBINCS "${CPP_OUTPATH};${CC_PROTOBUF_INCS}"
      LIBS "${CC_PROTOBUF_LIBS}"
      CXXFLAGS "${CC_ARGS_CXXFLAGS}"
      LDFLAGS "${CC_ARGS_LDFLAGS}"
      DEPS "${CC_ARGS_DEPS}"
      VERSION "${CC_ARGS_VERSION}"
      "${CC_ARGS_UNPARSED_ARGUMENTS}"
    )
  if(CC_ARGS_PACKED)
    install(
        TARGETS ${CC_ARGS_NAME}
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
      )
  endif()
endfunction()

## Add a subdirectory to the build
function(cuda_directory)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()
  cc_directory(${ARGN})
endfunction()

## Add subdirectories to the build
function(cuda_directories)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()
  cc_directories(${ARGN})
endfunction()

## Set the properties of cuda target
function(_cuda_target_properties)
  cmake_parse_arguments(
      CUDA_ARGS "STRICT;ALWAYS_LINK" "NAME;VERSION;C_STANDARD;CXX_STANDARD"
      "INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS" ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  get_target_property(TARGET_TYPE ${CUDA_ARGS_NAME} TYPE)
  if(("${TARGET_TYPE}" STREQUAL "SHARED_LIBRARY") OR
      ("${TARGET_TYPE}" STREQUAL "STATIC_LIBRARY") OR
      ("${TARGET_TYPE}" STREQUAL "EXECUTABLE"))
    set(TARGET_LINKABLE TRUE)
  endif()

  if(CUDA_ARGS_ALWAYS_LINK)
    if(("${TARGET_TYPE}" STREQUAL "STATIC_LIBRARY") OR
        ("${TARGET_TYPE}" STREQUAL "OBJECT_LIBRARY"))
      set_property(TARGET ${CUDA_ARGS_NAME} PROPERTY ALWAYS_LINK TRUE)
    endif()
  endif()

  # Set the warning level of compiling
  if(CUDA_ARGS_STRICT)
    target_compile_options(
        ${CUDA_ARGS_NAME} PRIVATE "${BAZEL_CUDA_STRICT_COMPILE_FLAGS}"
      )
    if(TARGET_LINKABLE)
      target_link_libraries(
          ${CUDA_ARGS_NAME} "${BAZEL_CUDA_STRICT_LINK_FLAGS}"
        )
    endif()
  else()
    target_compile_options(
        ${CUDA_ARGS_NAME} PRIVATE "${BAZEL_CUDA_UNSTRICT_COMPILE_FLAGS}"
      )
    if(TARGET_LINKABLE)
      target_link_libraries(
          ${CUDA_ARGS_NAME} "${BAZEL_CUDA_UNSTRICT_LINK_FLAGS}"
        )
    endif()
  endif()

  target_compile_options(
      ${CUDA_ARGS_NAME} PRIVATE
      "$<$<COMPILE_LANGUAGE:CUDA>:-ccbin=${CMAKE_CXX_COMPILER}>"
    )

  if(CUDA_ARGS_DEFS)
    target_compile_definitions(${CUDA_ARGS_NAME} PRIVATE "${CUDA_ARGS_DEFS}")
  endif()

  if(CUDA_ARGS_CFLAGS OR CUDA_ARGS_CXXFLAGS OR CUDA_ARGS_CUDAFLAGS)
    target_compile_options(
        ${CUDA_ARGS_NAME} PRIVATE
        "$<$<COMPILE_LANGUAGE:C>:${CUDA_ARGS_CFLAGS}>"
        "$<$<COMPILE_LANGUAGE:CXX>:${CUDA_ARGS_CXXFLAGS}>"
        "$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_ARGS_CUDAFLAGS}>"
      )
  endif()

  if(CUDA_ARGS_LDFLAGS)
    string(REPLACE ";" " " CUDA_ARGS_LDFLAGS "${CUDA_ARGS_LDFLAGS}")
    set_property(
        TARGET ${CUDA_ARGS_NAME} PROPERTY LINK_FLAGS "${CUDA_ARGS_LDFLAGS}"
      )
  endif()

  if(CUDA_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})
    target_include_directories(${CUDA_ARGS_NAME} PRIVATE "${INC_DIRS}")
  endif()

  target_include_directories(
      ${CUDA_ARGS_NAME} PRIVATE "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
    )

  if(BAZEL_WORKSPACE_DIR)
    target_include_directories(
        ${CUDA_ARGS_NAME} PRIVATE "${BAZEL_WORKSPACE_DIR}"
      )
  endif()

  if(CUDA_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})
    target_include_directories(${CUDA_ARGS_NAME} PUBLIC "${INC_DIRS}")
  endif()

  if(CUDA_ARGS_LIBS)
    if(NOT TARGET_LINKABLE)
      _targets_link_dependencies(${CUDA_ARGS_NAME} ${CUDA_ARGS_LIBS})
    else()
      if ("${TARGET_TYPE}" STREQUAL "EXECUTABLE")
        _target_link_libraries(${CUDA_ARGS_NAME} "${CUDA_ARGS_LIBS}")
      else()
        target_link_libraries(${CUDA_ARGS_NAME} "${CUDA_ARGS_LIBS}")
      endif()
    endif()
  endif()

  if(CUDA_ARGS_DEPS)
    add_dependencies(${CUDA_ARGS_NAME} "${CUDA_ARGS_DEPS}")
  endif()

  if(CUDA_ARGS_VERSION)
    set_property(
        TARGET ${CUDA_ARGS_NAME} PROPERTY VERSION "${CUDA_ARGS_VERSION}"
      )
  endif()

  if(NOT CUDA_C_STANDARD)
    set(CUDA_C_STANDARD 99)
  endif()

  if(NOT CUDA_CXX_STANDARD)
    set(CUDA_CXX_STANDARD 11)
  endif()

  set_target_properties(
      ${CUDA_ARGS_NAME} PROPERTIES DEFINE_SYMBOL ""
      C_STANDARD ${CUDA_C_STANDARD} CXX_STANDARD ${CUDA_CXX_STANDARD}
      C_STANDARD_REQUIRED ON C_EXTENSIONS ON
      CXX_STANDARD_REQUIRED ON CXX_EXTENSIONS OFF
      CUDA_STANDARD 11 CUDA_STANDARD_REQUIRED ON CUDA_EXTENSIONS OFF
      WINDOWS_EXPORT_ALL_SYMBOLS ON
    )
endfunction()

## Build a CUDA static or shared library
function(cuda_library)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()

  cmake_parse_arguments(
      CUDA_ARGS "STATIC;SHARED;EXCLUDE;PACKED" "NAME;VERSION"
      "SRCS;INCS;PUBINCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;PACKED_EXCS"
      ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})
  if(NOT CUDA_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CUDA_ARGS_NAME}.")
  endif()

  if(CUDA_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CUDA_ARGS_NAME}")
    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CUDA_ARGS_VERSION}")
  endif()

  if(CUDA_ARGS_EXCLUDE)
    set(EXCLUDE_OPTION EXCLUDE_FROM_ALL)
  endif()

  if(CUDA_ARGS_SHARED AND CUDA_ARGS_STATIC)
    _add_library(${CUDA_ARGS_NAME} "${EXCLUDE_OPTION}" "${CUDA_ARGS_SRCS}")
  elseif(CUDA_ARGS_SHARED)
    add_library(${CUDA_ARGS_NAME} SHARED ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})
  elseif(CUDA_ARGS_STATIC)
    add_library(${CUDA_ARGS_NAME} STATIC ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})
  else()
    add_library(${CUDA_ARGS_NAME} ${EXCLUDE_OPTION} ${CUDA_ARGS_SRCS})
  endif()

  if(TARGET ${CUDA_ARGS_NAME}_objects)
    _cuda_target_properties(
        NAME "${CUDA_ARGS_NAME}_objects"
        INCS "${CUDA_ARGS_INCS};${CUDA_ARGS_PUBINCS}"
        DEFS "${CUDA_ARGS_DEFS}"
        LIBS "${CUDA_ARGS_LIBS}"
        CFLAGS "${CUDA_ARGS_CFLAGS}"
        CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
        CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
        LDFLAGS "${CUDA_ARGS_LDFLAGS}"
        DEPS "${CUDA_ARGS_DEPS}"
        "${CUDA_ARGS_UNPARSED_ARGUMENTS}"
      )
  endif()

  if(TARGET ${CUDA_ARGS_NAME}_static)
    _cuda_target_properties(
        NAME "${CUDA_ARGS_NAME}_static"
        INCS "${CUDA_ARGS_INCS}"
        PUBINCS "${CUDA_ARGS_PUBINCS}"
        DEFS "${CUDA_ARGS_DEFS}"
        LIBS "${CUDA_ARGS_LIBS}"
        CFLAGS "${CUDA_ARGS_CFLAGS}"
        CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
        CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
        LDFLAGS "${CUDA_ARGS_LDFLAGS}"
        DEPS "${CUDA_ARGS_DEPS}"
        "${CUDA_ARGS_UNPARSED_ARGUMENTS}"
      )
    if(CUDA_ARGS_PACKED)
      install(
          TARGETS ${CUDA_ARGS_NAME}_static
          ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        )
    endif()
  endif()

  _cuda_target_properties(
      NAME "${CUDA_ARGS_NAME}"
      INCS "${CUDA_ARGS_INCS}"
      PUBINCS "${CUDA_ARGS_PUBINCS}"
      DEFS "${CUDA_ARGS_DEFS}"
      LIBS "${CUDA_ARGS_LIBS}"
      CFLAGS "${CUDA_ARGS_CFLAGS}"
      CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
      CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
      LDFLAGS "${CUDA_ARGS_LDFLAGS}"
      DEPS "${CUDA_ARGS_DEPS}"
      VERSION "${CUDA_ARGS_VERSION}"
      "${CUDA_ARGS_UNPARSED_ARGUMENTS}"
    )
  if(CUDA_ARGS_PACKED)
    install(
        TARGETS ${CUDA_ARGS_NAME}
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
      )
    if(CUDA_ARGS_PUBINCS)
      foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_IGORNES})
        list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
      endforeach()
      install(
          DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
          FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp"
          PATTERN "*.hxx" PATTERN "*.cuh"
          ${PATTERN_EXCLUDES}
        )
    endif()
  endif()
endfunction()

## Build a CUDA executable program
function(cuda_binary)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()

  cmake_parse_arguments(
      CUDA_ARGS "PACKED" "NAME;VERSION"
     "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS" ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})
  if(NOT CUDA_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CUDA_ARGS_NAME}.")
  endif()

  if(CUDA_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CUDA_ARGS_NAME}")
    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CUDA_ARGS_VERSION}")
  endif()
  add_executable(${CUDA_ARGS_NAME} ${CUDA_ARGS_SRCS})

  if(CUDA_ARGS_PACKED)
    install(
        TARGETS ${CUDA_ARGS_NAME} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
      )
  endif()

  _cuda_target_properties(
      NAME "${CUDA_ARGS_NAME}"
      INCS "${CUDA_ARGS_INCS}"
      DEFS "${CUDA_ARGS_DEFS}"
      LIBS "${CUDA_ARGS_LIBS}"
      CFLAGS "${CUDA_ARGS_CFLAGS}"
      CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
      CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
      LDFLAGS "${CUDA_ARGS_LDFLAGS}"
      DEPS "${CUDA_ARGS_DEPS}"
      VERSION "${CUDA_ARGS_VERSION}"
      "${CUDA_ARGS_UNPARSED_ARGUMENTS}"
    )
endfunction()

## Build a CUDA executable test program
function(cuda_test)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()

  cmake_parse_arguments(
      CUDA_ARGS "" "NAME;VERSION"
      "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CUDA_ARGS_SRCS ${CUDA_ARGS_SRCS})
  if(NOT CUDA_ARGS_SRCS)
    message(FATAL_ERROR "No source files found of ${CUDA_ARGS_NAME}.")
  endif()

  if(CUDA_ARGS_VERSION)
    string(REPLACE "-" "_" MACRO_PREFIX "${CUDA_ARGS_NAME}")
    list(APPEND CUDA_ARGS_DEFS ${MACRO_PREFIX}_VERSION="${CUDA_ARGS_VERSION}")
  endif()
  add_executable(${CUDA_ARGS_NAME} EXCLUDE_FROM_ALL ${CUDA_ARGS_SRCS})

  _cuda_target_properties(
      NAME "${CUDA_ARGS_NAME}"
      INCS "${CUDA_ARGS_INCS}"
      DEFS "${CUDA_ARGS_DEFS}"
      LIBS "${CUDA_ARGS_LIBS}"
      CFLAGS "${CUDA_ARGS_CFLAGS}"
      CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
      CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
      LDFLAGS "${CUDA_ARGS_LDFLAGS}"
      DEPS "${CUDA_ARGS_DEPS}"
      "${CUDA_ARGS_UNPARSED_ARGUMENTS}"
    )
  add_dependencies(unittest ${CUDA_ARGS_NAME})
  add_custom_target(
      unittest.${CUDA_ARGS_NAME}
      COMMAND $<TARGET_FILE:${CUDA_ARGS_NAME}> "${CUDA_ARGS_ARGS}"
      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
      DEPENDS ${CUDA_ARGS_NAME}
    )
  add_test(
      NAME ${CUDA_ARGS_NAME}
      COMMAND $<TARGET_FILE:${CUDA_ARGS_NAME}> "${CUDA_ARGS_ARGS}"
      WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
    )
endfunction()

## Add existing test cases to a test suite
function(cuda_test_suite)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()
  cc_test_suite(${ARGN})
endfunction()

## Import a C/C++/CUDA static or shared library
function(cuda_import)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()

  cmake_parse_arguments(
      CUDA_ARGS "STATIC;SHARED;PACKED"
      "NAME;PATH;IMPLIB" "INCS;PUBINCS;DEPS;PACKED_EXCLUDES" ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB CUDA_ARGS_PATH ${CUDA_ARGS_PATH})
  if(NOT CUDA_ARGS_PATH)
    message(FATAL_ERROR "No imported target file found of ${CUDA_ARGS_NAME}.")
  endif()
  if(MSVC AND CUDA_ARGS_SHARED AND NOT CUDA_ARGS_IMPLIB)
    string(REGEX REPLACE
        ".[Dd][Ll][Ll]$" ".lib" CUDA_ARGS_IMPLIB ${CUDA_ARGS_PATH}
      )
  endif()

  if(CUDA_ARGS_SHARED)
    add_library(${CUDA_ARGS_NAME} SHARED IMPORTED GLOBAL)
  elseif(CUDA_ARGS_STATIC)
    add_library(${CUDA_ARGS_NAME} STATIC IMPORTED GLOBAL)
  else()
    add_library(${CUDA_ARGS_NAME} UNKNOWN IMPORTED GLOBAL)
  endif()

  set_property(
      TARGET ${CUDA_ARGS_NAME} PROPERTY IMPORTED_LOCATION ${CUDA_ARGS_PATH}
    )
  if(MSVC AND CUDA_ARGS_SHARED)
    set_property(
        TARGET ${CUDA_ARGS_NAME} PROPERTY IMPORTED_IMPLIB ${CUDA_ARGS_IMPLIB}
      )
  endif()

  if(CUDA_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})
    foreach(INC_DIR ${INC_DIRS})
      set_property(
          TARGET ${CUDA_ARGS_NAME} APPEND PROPERTY
          INTERFACE_INCLUDE_DIRECTORIES "${INC_DIR}"
        )
    endforeach()
  endif()

  if(CUDA_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})
    foreach(INC_DIR ${INC_DIRS})
      set_property(
          TARGET ${CUDA_ARGS_NAME} APPEND PROPERTY
          INTERFACE_INCLUDE_DIRECTORIES "${INC_DIR}"
        )
    endforeach()
  endif()

  if(CUDA_ARGS_DEPS)
    add_dependencies(${CUDA_ARGS_NAME} "${CUDA_ARGS_DEPS}")
  endif()

  if(CUDA_ARGS_PACKED)
    install(
        TARGETS ${CUDA_ARGS_NAME}
        ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
        LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
      )
    if(CUDA_ARGS_PUBINCS)
      foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_EXCLUDES})
        list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
      endforeach()
      install(
          DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
          FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp"
          PATTERN "*.hxx" PATTERN "*.cuh"
          ${PATTERN_EXCLUDES}
        )
    endif()
  endif()
endfunction()

## Import a C/C++/CUDA interface library
function(cuda_interface)
  if(NOT CMAKE_CUDA_COMPILER)
    message(FATAL_ERROR "No CUDA language supported.")
  endif()

  cmake_parse_arguments(
      CUDA_ARGS "PACKED" "NAME" "INCS;PUBINCS;DEPS;PACKED_EXCLUDES" ${ARGN}
    )

  if(NOT CUDA_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  add_library(${CUDA_ARGS_NAME} INTERFACE GLOBAL)
  if(CUDA_ARGS_INCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_INCS})
    target_include_directories(${CUDA_ARGS_NAME} INTERFACE "${INC_DIRS}")
  endif()

  if(CUDA_ARGS_PUBINCS)
    _absolute_paths(INC_DIRS ${CUDA_ARGS_PUBINCS})
    target_include_directories(${CUDA_ARGS_NAME} INTERFACE "${INC_DIRS}")
  endif()

  if(CUDA_ARGS_DEPS)
    add_dependencies(${CUDA_ARGS_NAME} "${CUDA_ARGS_DEPS}")
  endif()

  if(CUDA_ARGS_PACKED AND CUDA_ARGS_PUBINCS)
    foreach(PACKED_EXCLUDE ${CUDA_ARGS_PACKED_EXCLUDES})
      list(APPEND PATTERN_EXCLUDES "PATTERN;${PACKED_EXCLUDE};EXCLUDE")
    endforeach()
    install(
        DIRECTORY ${CUDA_ARGS_PUBINCS} DESTINATION ${CMAKE_INSTALL_INCDIR}
        FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp"
        PATTERN "*.hxx" PATTERN "*.cuh"
        ${PATTERN_EXCLUDES}
      )
  endif()
endfunction()

## Build a C/C++/CUDA executable google test program
function(cuda_gtest)
  cmake_parse_arguments(
      CUDA_ARGS "" "NAME;VERSION"
      "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
    )
  _find_gtest()
  cuda_test(
      NAME "${CUDA_ARGS_NAME}"
      VERSION "${CUDA_ARGS_VERSION}"
      SRCS "${CUDA_ARGS_SRCS}"
      INCS "${CUDA_ARGS_INCS};${FIND_GTEST_INCS}"
      DEFS "${CUDA_ARGS_DEFS}"
      LIBS "${CUDA_ARGS_LIBS};${FIND_GTEST_LIBS}"
      CFLAGS "${CUDA_ARGS_CFLAGS}"
      CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
      CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
      LDFLAGS "${CUDA_ARGS_LDFLAGS}"
      DEPS "${CUDA_ARGS_DEPS}"
      ARGS "${CUDA_ARGS_ARGS}"
    )
endfunction()

## Build a C/C++/CUDA executable google mock program
function(cuda_gmock)
  cmake_parse_arguments(
      CUDA_ARGS "" "NAME;VERSION"
      "SRCS;INCS;DEFS;LIBS;CFLAGS;CXXFLAGS;CUDAFLAGS;LDFLAGS;DEPS;ARGS" ${ARGN}
    )
  _find_gmock()
  cuda_test(
      NAME "${CUDA_ARGS_NAME}"
      VERSION "${CUDA_ARGS_VERSION}"
      SRCS "${CUDA_ARGS_SRCS}"
      INCS "${CUDA_ARGS_INCS};${FIND_GMOCK_INCS}"
      DEFS "${CUDA_ARGS_DEFS}"
      LIBS "${CUDA_ARGS_LIBS};${FIND_GMOCK_LIBS}"
      CFLAGS "${CUDA_ARGS_CFLAGS}"
      CXXFLAGS "${CUDA_ARGS_CXXFLAGS}"
      CUDAFLAGS "${CUDA_ARGS_CUDAFLAGS}"
      LDFLAGS "${CUDA_ARGS_LDFLAGS}"
      DEPS "${CUDA_ARGS_DEPS}"
      ARGS "${CUDA_ARGS_ARGS}"
    )
endfunction()

## Add a subdirectory to the build
function(go_directory)
  add_subdirectory(${ARGN})
endfunction()

## Add subdirectories to the build
function(go_directories)
  foreach(SRC_DIR ${ARGN})
    add_subdirectory(${SRC_DIR})
  endforeach()
endfunction()

## Build a go executable program
function(go_binary)
  find_program(
      GO_EXECUTABLE go PATHS $ENV{HOME}/go ENV GOROOT GOPATH PATH_SUFFIXES bin
    )
  if(NOT GO_EXECUTABLE)
    message(FATAL_ERROR "No go language compiler found.")
  endif()

  cmake_parse_arguments(
      GO_ARGS "PACKED" "NAME"
      "GOPATH;SRCS;ASMFLAGS;GCFLAGS;LDFLAGS;DEPS" ${ARGN}
    )
  if(NOT GO_ARGS_NAME)
    message(FATAL_ERROR "No target name privated.")
  endif()

  file(GLOB GO_ARGS_SRCS ${GO_ARGS_SRCS})
  if(NOT GO_ARGS_SRCS)
    message(FATAL_ERROR "No source files/directories found of ${GO_ARGS_NAME}.")
  endif()

  if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
    string(REPLACE ";" "\;" GO_ARGS_GOPATH "${GO_ARGS_GOPATH}")
  else()
    string(REPLACE ";" ":" GO_ARGS_GOPATH "${GO_ARGS_GOPATH}")
  endif()

  set(
      GO_OUTPUT_FILE
      ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${GO_ARGS_NAME}${CMAKE_EXECUTABLE_SUFFIX}
    )
  file(RELATIVE_PATH GO_OUTPUT_REL_FILE ${CMAKE_BINARY_DIR} ${GO_OUTPUT_FILE})
  add_custom_target(
      ${GO_ARGS_NAME}
      COMMAND ${CMAKE_COMMAND} -E env GOPATH="${GO_ARGS_GOPATH}"
      "${GO_EXECUTABLE}" build -v -buildmode=exe
      -compiler=gc -gcflags="${GO_ARGS_GCFLAGS}" -asmflags="${GO_ARGS_ASMFLAGS}"
      -ldflags="${GO_ARGS_LDFLAGS}"
      -o "${GO_OUTPUT_FILE}" "${GO_ARGS_SRCS}"
      WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
      DEPENDS "${GO_ARGS_DEPS}"
      COMMENT "Building GO executable ${GO_OUTPUT_REL_FILE}"
    )
  if(GO_ARGS_PACKED)
    install(PROGRAMS ${GO_OUTPUT_FILE} DESTINATION "${CMAKE_INSTALL_BINDIR}")
  endif()
endfunction()

## Fetch content
function(_fetch_content)
  cmake_parse_arguments(
      DL_ARGS ""
      "NAME;PATH;GIT_URL;GIT_TAG;HG_URL;HG_TAG;SVN_URL;SVN_REV;URL;URL_HASH"
      "" ${ARGN}
    )

  if(NOT DL_ARGS_NAME)
    message(FATAL_ERROR "No fetch name privated.")
  endif()

  if(NOT DL_ARGS_PATH)
    # Download to current source directory
    set(DL_ARGS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${DL_ARGS_NAME}")
  endif()

  set(
      CMAKELISTS_CONTENT
      "cmake_minimum_required(VERSION 3.1)\n"
      "project(${DL_ARGS_NAME})\n"
      "include(ExternalProject)\n"
      "ExternalProject_Add(\n"
      "    ${DL_ARGS_NAME}\n"
      "    PREFIX \"external\"\n"
      "    GIT_REPOSITORY \"${DL_ARGS_GIT_URL}\"\n"
      "    GIT_TAG \"${DL_ARGS_GIT_TAG}\"\n"
      "    HG_REPOSITORY \"${DL_ARGS_HG_URL}\"\n"
      "    HG_TAG \"${DL_ARGS_HG_TAG}\"\n"
      "    SVN_REPOSITORY \"${DL_ARGS_SVN_URL}\"\n"
      "    SVN_REVISION \"${DL_ARGS_SVN_REV}\"\n"
      "    URL \"${DL_ARGS_URL}\"\n"
      "    URL_HASH \"${DL_ARGS_URL_HASH}\"\n"
      "    SOURCE_DIR \"${DL_ARGS_PATH}\"\n"
      "    BINARY_DIR \"\"\n"
      "    CONFIGURE_COMMAND \"\"\n"
      "    BUILD_COMMAND \"\"\n"
      "    INSTALL_COMMAND \"\"\n"
      "    TEST_COMMAND \"\"\n"
      "    LOG_DOWNLOAD ON\n"
      "  )\n"
    )
  set(
      CMAKELISTS_DIRECTORY
      "${PROJECT_BINARY_DIR}/downloads/${DL_ARGS_NAME}"
    )
  add_custom_target(
      external.${DL_ARGS_NAME}
      COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" . &&
              "${CMAKE_COMMAND}" --build .
      WORKING_DIRECTORY "${CMAKELISTS_DIRECTORY}"
    )

  # Write a cmake script into folder
  file(WRITE "${CMAKELISTS_DIRECTORY}/CMakeLists.txt" ${CMAKELISTS_CONTENT})

  execute_process(
      COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
      WORKING_DIRECTORY "${CMAKELISTS_DIRECTORY}"
    )
  execute_process(
      COMMAND "${CMAKE_COMMAND}" --build .
      WORKING_DIRECTORY "${CMAKELISTS_DIRECTORY}"
    )
endfunction()

## Download a git repository
function(git_repository)
  cmake_parse_arguments(GIT_ARGS "" "NAME;PATH;URL;TAG" "" ${ARGN})

  if(NOT GIT_ARGS_NAME)
    message(FATAL_ERROR "No repository name privated.")
  endif()
  if(NOT GIT_ARGS_URL)
    message(FATAL_ERROR "No repository URL privated.")
  endif()

  if(GIT_ARGS_PATH AND NOT IS_ABSOLUTE ${GIT_ARGS_PATH})
    get_filename_component(GIT_ARGS_PATH ${GIT_ARGS_PATH} ABSOLUTE)
  endif()

  _fetch_content(
      NAME "${GIT_ARGS_NAME}"
      PATH "${GIT_ARGS_PATH}"
      GIT_URL "${GIT_ARGS_URL}"
      GIT_TAG "${GIT_ARGS_TAG}"
    )
endfunction()

## Download a hg repository
function(hg_repository)
  cmake_parse_arguments(HG_ARGS "" "NAME;PATH;URL;TAG" "" ${ARGN})

  if(NOT HG_ARGS_NAME)
    message(FATAL_ERROR "No repository name privated.")
  endif()
  if(NOT HG_ARGS_URL)
    message(FATAL_ERROR "No repository URL privated.")
  endif()

  if(HG_ARGS_PATH AND NOT IS_ABSOLUTE ${HG_ARGS_PATH})
    get_filename_component(HG_ARGS_PATH ${HG_ARGS_PATH} ABSOLUTE)
  endif()

  _fetch_content(
      NAME "${HG_ARGS_NAME}"
      PATH "${HG_ARGS_PATH}"
      HG_URL "${HG_ARGS_URL}"
      HG_TAG "${HG_ARGS_TAG}"
    )
endfunction()

## Download a svn repository
function(svn_repository)
  cmake_parse_arguments(SVN_ARGS "" "NAME;PATH;URL;REV" "" ${ARGN})

  if(NOT SVN_ARGS_NAME)
    message(FATAL_ERROR "No repository name privated.")
  endif()
  if(NOT SVN_ARGS_URL)
    message(FATAL_ERROR "No repository URL privated.")
  endif()

  if(SVN_ARGS_PATH AND NOT IS_ABSOLUTE ${SVN_ARGS_PATH})
    get_filename_component(SVN_ARGS_PATH ${SVN_ARGS_PATH} ABSOLUTE)
  endif()

  _fetch_content(
      NAME "${SVN_ARGS_NAME}"
      PATH "${SVN_ARGS_PATH}"
      SVN_URL "${SVN_ARGS_URL}"
      SVN_REV "${SVN_ARGS_REV}"
    )
endfunction()

## Download a http archive
function(http_archive)
  cmake_parse_arguments(HTTP_ARGS "" "NAME;PATH;URL;SHA256;SHA1;MD5" "" ${ARGN})

  if(NOT HTTP_ARGS_NAME)
    message(FATAL_ERROR "No archive name privated.")
  endif()
  if(NOT HTTP_ARGS_URL)
    message(FATAL_ERROR "No archive URL privated.")
  endif()

  if(HTTP_ARGS_PATH AND NOT IS_ABSOLUTE ${HTTP_ARGS_PATH})
    get_filename_component(HTTP_ARGS_PATH ${HTTP_ARGS_PATH} ABSOLUTE)
  endif()

  if(HTTP_ARGS_SHA256)
    set(HTTP_URL_HASH "SHA256=${HTTP_ARGS_SHA256}")
  elseif(HTTP_ARGS_SHA1)
    set(HTTP_URL_HASH "SHA1=${HTTP_ARGS_SHA1}")
  elseif(HTTP_ARGS_MD5)
    set(HTTP_URL_HASH "MD5=${HTTP_ARGS_MD5}")
  else()
    set(HTTP_URL_HASH "")
  endif()

  _fetch_content(
      NAME "${HTTP_ARGS_NAME}"
      PATH "${HTTP_ARGS_PATH}"
      URL "${HTTP_ARGS_URL}"
      URL_HASH "${HTTP_URL_HASH}"
    )
endfunction()

## Retrieve a version string from GIT
function(git_version _RESULT _SOURCES_DIR)
  find_package(Git REQUIRED)

  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})
    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)
  endif()

  # git describe --tags
  execute_process(
      COMMAND "${GIT_EXECUTABLE}" describe --tags
      WORKING_DIRECTORY "${_SOURCES_DIR}"
      RESULT_VARIABLE GIT_VER_RESULT
      OUTPUT_VARIABLE GIT_VER_OUTPUT
      ERROR_VARIABLE GIT_VER_ERROR
    )
  if(GIT_VER_RESULT EQUAL 0)
    string(STRIP ${GIT_VER_OUTPUT} GIT_VER_OUTPUT)
    set(${_RESULT} "${GIT_VER_OUTPUT}" PARENT_SCOPE)
    return()
  endif()

  # git rev-parse --short HEAD
  execute_process(
      COMMAND "${GIT_EXECUTABLE}" rev-parse --short HEAD
      WORKING_DIRECTORY "${_SOURCES_DIR}"
      RESULT_VARIABLE GIT_VER_RESULT
      OUTPUT_VARIABLE GIT_VER_OUTPUT
      ERROR_VARIABLE GIT_VER_ERROR
    )
  if(GIT_VER_RESULT EQUAL 0)
    string(STRIP ${GIT_VER_OUTPUT} GIT_VER_OUTPUT)
    set(${_RESULT} "g${GIT_VER_OUTPUT}" PARENT_SCOPE)
    return()
  endif()

  set(${_RESULT} "" PARENT_SCOPE)
endfunction()

## Retrieve a version string from HG
function(hg_version _RESULT _SOURCES_DIR)
  find_package(Hg REQUIRED)

  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})
    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)
  endif()

  # hg log -T "{latesttagdistance}" -r .
  execute_process(
      COMMAND "${HG_EXECUTABLE}" log -T "{latesttagdistance}" -r .
      WORKING_DIRECTORY "${_SOURCES_DIR}"
      RESULT_VARIABLE HG_VER_RESULT
      OUTPUT_VARIABLE HG_VER_OUTPUT
      ERROR_VARIABLE HG_VER_ERROR
    )
  if(HG_VER_RESULT EQUAL 0)
    string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)
    if(HG_VER_OUTPUT STREQUAL "0")
      # hg log -T "{latesttag}" -r .
      execute_process(
          COMMAND "${HG_EXECUTABLE}" log -T "{latesttag}" -r .
          WORKING_DIRECTORY "${_SOURCES_DIR}"
          RESULT_VARIABLE HG_VER_RESULT
          OUTPUT_VARIABLE HG_VER_OUTPUT
          ERROR_VARIABLE HG_VER_ERROR
        )
    else()
      # hg log -T "{latesttag}-{latesttagdistance}-h{node|short}" -r .
      execute_process(
          COMMAND "${HG_EXECUTABLE}" log
          -T "{latesttag}-{latesttagdistance}-h{node|short}" -r .
          WORKING_DIRECTORY "${_SOURCES_DIR}"
          RESULT_VARIABLE HG_VER_RESULT
          OUTPUT_VARIABLE HG_VER_OUTPUT
          ERROR_VARIABLE HG_VER_ERROR
        )
    endif()

    if(HG_VER_RESULT EQUAL 0)
      string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)
      if(NOT HG_VER_OUTPUT MATCHES "^null.*")
        set(${_RESULT} "${HG_VER_OUTPUT}" PARENT_SCOPE)
        return()
      endif()
    endif()
  endif()

  # hg log -T "h{node|short}" -r .
  execute_process(
      COMMAND "${HG_EXECUTABLE}" log -T "h{node|short}" -r .
      WORKING_DIRECTORY "${_SOURCES_DIR}"
      RESULT_VARIABLE HG_VER_RESULT
      OUTPUT_VARIABLE HG_VER_OUTPUT
      ERROR_VARIABLE HG_VER_ERROR
    )
  if(HG_VER_RESULT EQUAL 0)
    string(STRIP ${HG_VER_OUTPUT} HG_VER_OUTPUT)
    set(${_RESULT} "${HG_VER_OUTPUT}" PARENT_SCOPE)
    return()
  endif()

  set(${_RESULT} "" PARENT_SCOPE)
endfunction()

## Retrieve a version string from SVN
function(svn_version _RESULT _SOURCES_DIR)
  find_package(Subversion REQUIRED)

  if(NOT IS_ABSOLUTE ${_SOURCES_DIR})
    get_filename_component(_SOURCES_DIR ${_SOURCES_DIR} ABSOLUTE)
  endif()

  # svn info --show-item revision
  execute_process(
      COMMAND "${Subversion_SVN_EXECUTABLE}" info --show-item revision
      WORKING_DIRECTORY "${_SOURCES_DIR}"
      RESULT_VARIABLE SVN_VER_RESULT
      OUTPUT_VARIABLE SVN_VER_OUTPUT
      ERROR_VARIABLE SVN_VER_ERROR
    )
  if(SVN_VER_RESULT EQUAL 0)
    string(STRIP ${SVN_VER_OUTPUT} SVN_VER_OUTPUT)
    set(${_RESULT} "r${SVN_VER_OUTPUT}" PARENT_SCOPE)
    return()
  endif()

  set(${_RESULT} "" PARENT_SCOPE)
endfunction()

_find_workspace_directory(BAZEL_WORKSPACE_DIR)
if(BAZEL_WORKSPACE_DIR)
  include("${BAZEL_WORKSPACE_DIR}/Workspace.cmake")
endif()


================================================
FILE: cmake/option.cmake
================================================
## https://en.wikipedia.org/wiki/List_of_Intel_CPU_microarchitectures  
## https://en.wikipedia.org/wiki/List_of_AMD_CPU_microarchitectures  
## https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html  

## Intel Microarchitectures
option(ENABLE_NEHALEM "Enable Intel Nehalem CPU microarchitecture" OFF)
option(ENABLE_SANDYBRIDGE "Enable Intel Sandy Bridge CPU microarchitecture" OFF)
option(ENABLE_HASWELL "Enable Intel Haswell CPU microarchitecture" OFF)
option(ENABLE_BROADWELL "Enable Intel Broadwell CPU microarchitecture" OFF)
option(ENABLE_SKYLAKE "Enable Intel Skylake CPU microarchitecture" OFF)
option(ENABLE_SKYLAKE_AVX512 "Enable Intel Skylake Server CPU microarchitecture" OFF)
option(ENABLE_ICELAKE "Enable Intel Icelake CPU microarchitecture" OFF)
option(ENABLE_SAPPHIRERAPIDS "Enable Intel Sapphire Rapids Server CPU microarchitecture" OFF)
option(ENABLE_EMERALDRAPIDS "Enable Intel Emerald Rapids Server CPU microarchitecture" OFF)
option(ENABLE_GRANITERAPIDS "Enable Intel Granite Rapids Server CPU microarchitecture" OFF)

option(ENABLE_NATIVE "Enable native CPU microarchitecture" OFF)

## AMD Microarchitectures
option(ENABLE_ZEN1 "Enable AMD Zen+ Family 17h CPU microarchitecture" OFF)
option(ENABLE_ZEN2 "Enable AMD Zen 2 Family 17h CPU microarchitecture" OFF)
option(ENABLE_ZEN3 "Enable AMD Zen 3 Family 19h CPU microarchitecture" OFF)

## ARM architectures
option(ENABLE_ARMV8A "Enable ARMv8-a architecture" OFF)
option(ENABLE_ARMV8.1A "Enable ARMv8.1-a architecture" OFF)
option(ENABLE_ARMV8.2A "Enable ARMv8.2-a architecture" OFF)
option(ENABLE_ARMV8.3A "Enable ARMv8.3-a architecture" OFF)
option(ENABLE_ARMV8.4A "Enable ARMv8.4-a architecture" OFF)
option(ENABLE_ARMV8.5A "Enable ARMv8.5-a architecture" OFF)
option(ENABLE_ARMV8.6A "Enable ARMv8.6-a architecture" OFF)

## OpenMP option
option(ENABLE_OPENMP "Enable OpenMP support" OFF)

set(ARCH_OPTIONS
  ENABLE_NEHALEM ENABLE_SANDYBRIDGE ENABLE_HASWELL ENABLE_BROADWELL ENABLE_SKYLAKE
  ENABLE_SKYLAKE_AVX512 ENABLE_ICELAKE ENABLE_SAPPHIRERAPIDS ENABLE_EMERALDRAPIDS ENABLE_GRANITERAPIDS
  ENABLE_ZEN1 ENABLE_ZEN2 ENABLE_ZEN3
  ENABLE_ARMV8A ENABLE_ARMV8.1A ENABLE_ARMV8.2A ENABLE_ARMV8.3A ENABLE_ARMV8.4A
  ENABLE_ARMV8.5A ENABLE_ARMV8.6A
  ENABLE_NATIVE
)

option(AUTO_DETECT_ARCH "Auto detect CPU microarchitecture" ON)
foreach(opt IN LISTS ARCH_OPTIONS)
  if(${opt})
    set(AUTO_DETECT_ARCH OFF)
    break()
  endif()
endforeach()

include(CheckCCompilerFlag)

function(_AppendFlags _RESULT _FLAG)
  if(${_RESULT} AND NOT "${${_RESULT}}" MATCHES "${_FLAG}")
    set(${_RESULT} "${${_RESULT}} ${_FLAG}" PARENT_SCOPE)
  else()
    set(${_RESULT} "${_FLAG}" PARENT_SCOPE)
  endif()
endfunction()

macro(add_arch_flag FLAG VAR_NAME OPTION_NAME)
  check_c_compiler_flag("${FLAG}" COMPILER_SUPPORT_${VAR_NAME})
  if(COMPILER_SUPPORT_${VAR_NAME})
    _AppendFlags(CMAKE_C_FLAGS "${FLAG}")
    _AppendFlags(CMAKE_CXX_FLAGS "${FLAG}")
    set(${VAR_NAME}_ENABLED ON)
  else()
    if(${OPTION_NAME})
      message(FATAL_ERROR "Compiler does not support required flag: '${FLAG}' for ${OPTION_NAME}")
    else()
      set(${VAR_NAME}_ENABLED OFF)
    endif()
  endif()
endmacro()

function(_setup_armv8_march)
  set(_arch "armv8")
  check_c_compiler_flag("-march=${_arch}" _COMP_SUPP_${_arch})
  if(_COMP_SUPP_${_arch})
    _AppendFlags(CMAKE_C_FLAGS "-march=${_arch}")
    _AppendFlags(CMAKE_CXX_FLAGS "-march=${_arch}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" PARENT_SCOPE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE)
    return()
  else()
    message(WARNING "No ARMv8 march flag supported by compiler.")
  endif()
endfunction()

function(_setup_x86_march)
  set(_arch "x86-64")
  check_c_compiler_flag("-march=${_arch}" _COMP_SUPP_${_arch})
  if(_COMP_SUPP_${_arch})
    _AppendFlags(CMAKE_C_FLAGS "-march=${_arch}")
    _AppendFlags(CMAKE_CXX_FLAGS "-march=${_arch}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" PARENT_SCOPE)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE)
    return()
  else()
    message(WARNING "No known x86 march flag supported; falling back to generic.")
  endif()
endfunction()

function(setup_compiler_march_for_x86 VAR_NAME_SSE VAR_NAME_AVX2 VAR_NAME_AVX512 VAR_NAME_AVX512FP16)
  #sse
  set(${VAR_NAME_SSE} "-march=corei7" PARENT_SCOPE)

  #avx 2
  set(${VAR_NAME_AVX2} "-march=core-avx2" PARENT_SCOPE)

  #avx512
  set(_x86_flags_avx512 "icelake-server" "skylake-avx512" "core-avx2" "x86-64")
  foreach(_arch_avx512 IN LISTS _x86_flags_avx512)
    check_c_compiler_flag("-march=${_arch_avx512}" _COMP_SUPP_${_arch_avx512})
    if(_COMP_SUPP_${_arch_avx512})
      set(${VAR_NAME_AVX512} "-march=${_arch_avx512}" PARENT_SCOPE)
      break()
    endif()
  endforeach()

  #avx512fp16
  set(_x86_flags_avx512fp16
    "sapphirerapids" "icelake-server" "skylake-avx512" "core-avx2" "x86-64"
  )
  foreach(_arch_avx512fp16 IN LISTS _x86_flags_avx512fp16)
    check_c_compiler_flag("-march=${_arch_avx512fp16}" _COMP_SUPP_${_arch_avx512fp16})
    if(_COMP_SUPP_${_arch_avx512fp16})
      set(${VAR_NAME_AVX512FP16} "-march=${_arch_avx512fp16}" PARENT_SCOPE)
      break()
    endif()
  endforeach()
endfunction()

if(MSVC)
  # Prefer higher ISAs
  foreach(_isa IN ITEMS "AVX512" "AVX2" "AVX" "SSE2")
    check_c_compiler_flag("/arch:${_isa}" _COMP_SUPP_${_isa})
    if(_COMP_SUPP_${_isa})
      _AppendFlags(CMAKE_C_FLAGS "/arch:${_isa}")
      _AppendFlags(CMAKE_CXX_FLAGS "/arch:${_isa}")
      message(STATUS "MSVC: enabled /arch:${_isa}")
      break()
    endif()
  endforeach()
  return()
endif()

if(NOT AUTO_DETECT_ARCH)
  if(ENABLE_NATIVE)
    add_arch_flag("-march=native" NATIVE ENABLE_NATIVE)
  endif()

  if(ENABLE_ZEN3)
    add_arch_flag("-march=znver3" ZNVER3 ENABLE_ZEN3)
  endif()

  if(ENABLE_ZEN2)
    add_arch_flag("-march=znver2" ZNVER2 ENABLE_ZEN2)
  endif()

  if(ENABLE_ZEN1)
    add_arch_flag("-march=znver1" ZNVER1 ENABLE_ZEN1)
  endif()

  if(ENABLE_GRANITERAPIDS)
    add_arch_flag("-march=graniterapids" GRANITERAPIDS ENABLE_GRANITERAPIDS)
  endif()

  if(ENABLE_EMERALDRAPIDS)
    add_arch_flag("-march=emeraldrapids" EMERALDRAPIDS ENABLE_EMERALDRAPIDS)
  endif()

  if(ENABLE_SAPPHIRERAPIDS)
    add_arch_flag("-march=sapphirerapids" SAPPHIRERAPIDS ENABLE_SAPPHIRERAPIDS)
  endif()

  if(ENABLE_ICELAKE)
    add_arch_flag("-march=icelake-server" ICELAKE ENABLE_ICELAKE)
  endif()

  if(ENABLE_SKYLAKE_AVX512)
    add_arch_flag("-march=skylake-avx512" SKYLAKE_AVX512 ENABLE_SKYLAKE_AVX512)
  endif()

  if(ENABLE_SKYLAKE)
    add_arch_flag("-march=skylake" SKYLAKE ENABLE_SKYLAKE)
  endif()

  if(ENABLE_BROADWELL)
    add_arch_flag("-march=broadwell" BROADWELL ENABLE_BROADWELL)
  endif()

  if(ENABLE_HASWELL)
    add_arch_flag("-march=haswell" HASWELL ENABLE_HASWELL)
  endif()

  if(ENABLE_SANDYBRIDGE)
    add_arch_flag("-march=sandybridge" SANDYBRIDGE ENABLE_SANDYBRIDGE)
  endif()

  if(ENABLE_NEHALEM)
    add_arch_flag("-march=nehalem" NEHALEM ENABLE_NEHALEM)
  endif()

  # ARM (newest first — allow multiple? usually only one)
  # But GCC allows only one -march=, so honor highest enabled
  if(ENABLE_ARMV8.6A)
    add_arch_flag("-march=armv8.6-a" ARMV86A ENABLE_ARMV8.6A)
  endif()
  if(ENABLE_ARMV8.5A)
    add_arch_flag("-march=armv8.5-a" ARMV85A ENABLE_ARMV8.5A)
  endif()
  if(ENABLE_ARMV8.4A)
    add_arch_flag("-march=armv8.4-a" ARMV84A ENABLE_ARMV8.4A)
  endif()
  if(ENABLE_ARMV8.3A)
    add_arch_flag("-march=armv8.3-a" ARMV83A ENABLE_ARMV8.3A)
  endif()
  if(ENABLE_ARMV8.2A)
    add_arch_flag("-march=armv8.2-a" ARMV82A ENABLE_ARMV8.2A)
  endif()
  if(ENABLE_ARMV8.1A)
    add_arch_flag("-march=armv8.1-a" ARMV81A ENABLE_ARMV8.1A)
  endif()
  if(ENABLE_ARMV8A)
    add_arch_flag("-march=armv8-a" ARMV8A ENABLE_ARMV8A)
  endif()

else()
  # AUTO DETECT
  # Heuristic: detect host architecture and probe appropriate flags
  if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
    _setup_armv8_march()
  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
    _setup_x86_march()
  else()
    message(WARNING "Unknown host architecture: ${CMAKE_SYSTEM_PROCESSOR}; no -march= set.")
  endif()
endif()

# -----------------------------
# OpenMP
# -----------------------------
if(ENABLE_OPENMP)
  find_package(OpenMP REQUIRED)
  if(OpenMP_C_FLAGS)
    _AppendFlags(CMAKE_C_FLAGS "${OpenMP_C_FLAGS}")
  endif()
  if(OpenMP_CXX_FLAGS)
    _AppendFlags(CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}")
  endif()
endif()


================================================
FILE: cmake/utils.cmake
================================================
function(apply_patch_once patch_name target_dir patch_file)
    set(mark_file "${target_dir}/.${patch_name}_patched")

    if(EXISTS "${mark_file}")
        #message(STATUS "Patch '${patch_name}' already applied to ${target_dir}, skipping.")
        return()
    endif()

    if(NOT EXISTS "${patch_file}")
        message(FATAL_ERROR "Patch file '${patch_file}' not found!")
    endif()

    #message(STATUS "Applying patch '${patch_name}' to ${target_dir} ...")
    execute_process(
        COMMAND patch -p1 -i "${patch_file}"
        WORKING_DIRECTORY "${target_dir}"
        RESULT_VARIABLE patch_result
        OUTPUT_VARIABLE patch_stdout
        ERROR_VARIABLE patch_stderr
    )

    if(NOT patch_result EQUAL 0)
        message(FATAL_ERROR "Failed to apply patch '${patch_name}' to ${target_dir}:\n${patch_stderr}")
    else()
        #message(STATUS "Patch '${patch_name}' applied successfully:\n${patch_stdout}")
        file(WRITE "${mark_file}" "patched")
    endif()
endfunction()


================================================
FILE: examples/c++/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.13)
cmake_policy(SET CMP0077 NEW)
project(zvec-example-c++)
set(CMAKE_CXX_STANDARD 17)

# Enable compile_commands.json
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# --- Paths to Zvec and dependencies ---
# Allow custom host build directory, default to "build"
if(NOT DEFINED HOST_BUILD_DIR)
    set(HOST_BUILD_DIR "build")
endif()

set(ZVEC_INCLUDE_DIR ${CMAKE_BINARY_DIR}/../../../src/include)
set(ZVEC_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/lib)
set(ZVEC_DEPENDENCY_LIB_DIR ${CMAKE_BINARY_DIR}/../../../${HOST_BUILD_DIR}/external/usr/local/lib)

# Add include and library search paths
include_directories(${ZVEC_INCLUDE_DIR})
link_directories(${ZVEC_LIB_DIR} ${ZVEC_DEPENDENCY_LIB_DIR})

# --- Determine debug/release library names ---
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
    set(GLOG_LIB glogd)
    set(GFLAGS_LIB gflags_nothreads_debug)
    set(PROTOBUF_LIB protobufd)
else()
    set(GLOG_LIB glog)
    set(GFLAGS_LIB gflags_nothreads)
    set(PROTOBUF_LIB protobuf)
endif()

# --- Dependency groups ---
find_package(Threads REQUIRED)

set(zvec_ailego_deps
    arrow
    parquet
    arrow_bundled_dependencies
    ${CMAKE_THREAD_LIBS_INIT}
    ${CMAKE_DL_LIBS}
)

set(zvec_core_deps
    zvec_turbo
)

set(zvec_db_deps
    roaring
    rocksdb
    arrow
    arrow_acero
    arrow_bundled_dependencies
    arrow_compute
    arrow_dataset
    parquet
    antlr4-runtime
    ${GLOG_LIB}
    ${GFLAGS_LIB}
    ${PROTOBUF_LIB}
    lz4
)

# --- Create INTERFACE targets for Zvec components ---

# zvec_ailego: links libzvec_ailego.a + its deps
add_library(zvec-ailego INTERFACE)
target_link_libraries(zvec-ailego INTERFACE
    -lzvec_ailego
    ${zvec_ailego_deps}
)

# zvec_core: links libzvec_core.a via special flags (handled externally), but declare logical deps
add_library(zvec-core INTERFACE)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
    target_link_libraries(zvec-core INTERFACE
        -Wl,--whole-archive
        zvec_core
        -Wl,--no-whole-archive
        -Wl,--start-group
        zvec-ailego
        ${zvec_core_deps}
        -Wl,--end-group
    )
elseif(APPLE)
    target_link_libraries(zvec-core INTERFACE
        -Wl,-force_load ${ZVEC_LIB_DIR}/libzvec_core.a
        zvec-ailego
        ${zvec_core_deps}
    )
elseif(ANDROID)
    target_link_libraries(zvec-core INTERFACE
        -Wl,--whole-archive
        zvec_core
        -Wl,--no-whole-archive
        -Wl,--start-group
        zvec-ailego
        ${zvec_core_deps}
        -Wl,--end-group
    )
else()
    message(FATAL_ERROR "Unsupported platform: ${CMAKE_SYSTEM_NAME}")
endif()

# zvec_db: links libzvec_db.a + all deps
add_library(zvec-db INTERFACE)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
    target_link_libraries(zvec-db INTERFACE
        zvec_db
        zvec-core
        zvec-ailego
        -Wl,--start-group
        ${zvec_db_deps}
        -Wl,--end-group
    )
elseif(APPLE)
    target_link_libraries(zvec-db INTERFACE
        zvec_db
        zvec-core
        zvec-ailego
        ${zvec_db_deps}
    )
elseif(ANDROID)
    target_link_libraries(zvec-db INTERFACE
        zvec_db
        zvec-core
        zvec-ailego
        -Wl,--start-group
        ${zvec_db_deps}
        -Wl,--end-group
    )
else()
    message(FATAL_ERROR "Unsupported platform: ${CMAKE_SYSTEM_NAME}")
endif()


# --- Main executable ---
add_executable(db-example db/main.cc)
target_link_libraries(db-example PRIVATE
    zvec-db
)
if(ANDROID)
    target_link_libraries(db-example PRIVATE
        log
    )
endif()

add_executable(core-example core/main.cc)
target_link_libraries(core-example PRIVATE
    zvec-core
)

add_executable(ailego-example ailego/main.cc)
target_link_libraries(ailego-example PRIVATE
    zvec-ailego
)

# Strip symbols to reduce executable size
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
    add_custom_command(TARGET db-example POST_BUILD
        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:db-example>"
        COMMENT "Stripping symbols from db-example")
    add_custom_command(TARGET core-example POST_BUILD
        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:core-example>"
        COMMENT "Stripping symbols from core-example")
    add_custom_command(TARGET ailego-example POST_BUILD
        COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:ailego-example>"
        COMMENT "Stripping symbols from ailego-example")
endif()

# Optimize for size
if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID)
    set_property(TARGET db-example core-example ailego-example
                 PROPERTY COMPILE_FLAGS "-Os")
    set_property(TARGET db-example core-example ailego-example
                 PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()


================================================
FILE: examples/c++/ailego/main.cc
================================================
#include <iostream>
#include <string>
#include <zvec/ailego/utility/string_helper.h>

using namespace zvec;

int main() {
  std::string a{"hello world"};

  std::cout << ailego::StringHelper::StartsWith(a, "hello") << std::endl;
}

================================================
FILE: examples/c++/core/main.cc
================================================
#include <cstdlib>
#include <iostream>
#include <zvec/core/interface/index.h>
#include <zvec/core/interface/index_factory.h>
#include <zvec/core/interface/index_param.h>
#include <zvec/core/interface/index_param_builders.h>

using namespace zvec::core_interface;

constexpr uint32_t kDimension = 64;
const std::string index_name{"test.index"};

Index::Pointer create_index(const BaseIndexParam::Pointer &param,
                            int doc_num = 10) {
  auto index = IndexFactory::CreateAndInitIndex(*param);
  if (!index) {
    std::cout << "Failed to create index." << std::endl;
    return nullptr;
  }

  int ret = index->Open(
      index_name, StorageOptions{StorageOptions::StorageType::kMMAP, true});
  if (ret != 0) {
    std::cout << "Failed to open index." << std::endl;
    return nullptr;
  }

  for (int i = 0; i < doc_num; ++i) {
    std::vector<float> vector(kDimension, i / 10.0f + 0.1f);
    VectorData vector_data;
    vector_data.vector = DenseVector{vector.data()};
    ret = index->Add(vector_data, i);
    if (ret != 0) {
      std::cout << "Failed to add to index." << std::endl;
      return nullptr;
    }
  }

  ret = index->Train();
  if (ret != 0) {
    std::cout << "Failed to train index." << std::endl;
    return nullptr;
  }

  return index;
}

int main() {
  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_name.c_str());
  system(cmd_buf);

  auto param = HNSWIndexParamBuilder()
                   .WithMetricType(MetricType::kInnerProduct)
                   .WithDataType(DataType::DT_FP32)
                   .WithDimension(kDimension)
                   .WithIsSparse(false)
                   .Build();
  auto index = create_index(param, 1);
  std::cout << "index stats: " << index->GetDocCount() << std::endl;

  // query
  auto query_param = HNSWQueryParamBuilder()
                         .with_topk(10)
                         .with_fetch_vector(true)
                         .with_ef_search(20)
                         .build();

  SearchResult result;
  VectorData query;
  std::vector<float> vector(kDimension, 0.1f);
  query.vector = DenseVector{vector.data()};
  int ret = index->Search(query, query_param, &result);
  if (ret != 0) {
    std::cout << "Failed to search index." << std::endl;
    return -1;
  }

  std::cout << "query results: " << result.doc_list_.size() << std::endl;
  if (result.doc_list_.size() == 0) {
    std::cout << "No results found." << std::endl;
    return -1;
  }

  std::cout << "key: " << result.doc_list_[0].key()
            << ", score: " << result.doc_list_[0].score() << std::endl;

  return 0;
}

================================================
FILE: examples/c++/db/main.cc
================================================
#include <cstdlib>
#include <string>
#include <vector>
#include <zvec/db/collection.h>
#include <zvec/db/doc.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>

using namespace zvec;

Doc create_doc(const uint64_t doc_id, const CollectionSchema &schema,
               std::string pk = "") {
  Doc new_doc;
  if (pk.empty()) {
    pk = "pk_" + std::to_string(doc_id);
  }
  new_doc.set_pk(pk);

  for (auto &field : schema.fields()) {
    switch (field->data_type()) {
      case DataType::BINARY: {
        std::string binary_str("binary_" + std::to_string(doc_id));
        new_doc.set<std::string>(field->name(), binary_str);
        break;
      }
      case DataType::BOOL:
        new_doc.set<bool>(field->name(), doc_id % 10 == 0);
        break;
      case DataType::INT32:
        new_doc.set<int32_t>(field->name(), (int32_t)doc_id);
        break;
      case DataType::INT64:
        new_doc.set<int64_t>(field->name(), (int64_t)doc_id);
        break;
      case DataType::UINT32:
        new_doc.set<uint32_t>(field->name(), (uint32_t)doc_id);
        break;
      case DataType::UINT64:
        new_doc.set<uint64_t>(field->name(), (uint64_t)doc_id);
        break;
      case DataType::FLOAT:
        new_doc.set<float>(field->name(), (float)doc_id);
        break;
      case DataType::DOUBLE:
        new_doc.set<double>(field->name(), (double)doc_id);
        break;
      case DataType::STRING:
        new_doc.set<std::string>(field->name(),
                                 "value_" + std::to_string(doc_id));
        break;
      case DataType::ARRAY_BINARY: {
        std::vector<std::string> bin_vec;
        for (size_t i = 0; i < (doc_id % 10); i++) {
          bin_vec.push_back("bin_" + std::to_string(i));
        }
        new_doc.set<std::vector<std::string>>(field->name(), bin_vec);
        break;
      }
      case DataType::ARRAY_BOOL:
        new_doc.set<std::vector<bool>>(field->name(),
                                       std::vector<bool>(10, doc_id % 10 == 0));
        break;
      case DataType::ARRAY_INT32:
        new_doc.set<std::vector<int32_t>>(
            field->name(), std::vector<int32_t>(10, (int32_t)doc_id));
        break;
      case DataType::ARRAY_INT64:
        new_doc.set<std::vector<int64_t>>(
            field->name(), std::vector<int64_t>(10, (int64_t)doc_id));
        break;
      case DataType::ARRAY_UINT32:
        new_doc.set<std::vector<uint32_t>>(
            field->name(), std::vector<uint32_t>(10, (uint32_t)doc_id));
        break;
      case DataType::ARRAY_UINT64:
        new_doc.set<std::vector<uint64_t>>(
            field->name(), std::vector<uint64_t>(10, (uint64_t)doc_id));
        break;
      case DataType::ARRAY_FLOAT:
        new_doc.set<std::vector<float>>(field->name(),
                                        std::vector<float>(10, (float)doc_id));
        break;
      case DataType::ARRAY_DOUBLE:
        new_doc.set<std::vector<double>>(
            field->name(), std::vector<double>(10, (double)doc_id));
        break;
      case DataType::ARRAY_STRING:
        new_doc.set<std::vector<std::string>>(
            field->name(),
            std::vector<std::string>(10, "value_" + std::to_string(doc_id)));
        break;
      case DataType::VECTOR_BINARY32:
        new_doc.set<std::vector<uint32_t>>(
            field->name(),
            std::vector<uint32_t>(field->dimension(), uint32_t(doc_id + 0.1)));
        break;
      case DataType::VECTOR_BINARY64:
        new_doc.set<std::vector<uint64_t>>(
            field->name(),
            std::vector<uint64_t>(field->dimension(), uint64_t(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP32:
        new_doc.set<std::vector<float>>(
            field->name(),
            std::vector<float>(field->dimension(), float(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP64:
        new_doc.set<std::vector<double>>(
            field->name(),
            std::vector<double>(field->dimension(), double(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP16:
        new_doc.set<std::vector<zvec::float16_t>>(
            field->name(), std::vector<zvec::float16_t>(
                               field->dimension(), static_cast<zvec::float16_t>(
                                                       float(doc_id + 0.1))));
        break;
      case DataType::VECTOR_INT8:
        new_doc.set<std::vector<int8_t>>(
            field->name(),
            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));
        break;
      case DataType::VECTOR_INT16:
        new_doc.set<std::vector<int16_t>>(
            field->name(),
            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));
        break;
      case DataType::SPARSE_VECTOR_FP16: {
        std::vector<uint32_t> indices;
        std::vector<zvec::float16_t> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(zvec::float16_t(float(doc_id + 0.1)));
        }
        std::pair<std::vector<uint32_t>, std::vector<zvec::float16_t>>
            sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<
            std::pair<std::vector<uint32_t>, std::vector<zvec::float16_t>>>(
            field->name(), sparse_float_vec);
        break;
      }
      case DataType::SPARSE_VECTOR_FP32: {
        std::vector<uint32_t> indices;
        std::vector<float> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(float(doc_id + 0.1));
        }
        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            field->name(), sparse_float_vec);
        break;
      }
      default:
        std::cout << "Unsupported data type: " << field->name() << std::endl;
        throw std::runtime_error("Unsupported vector data type");
    }
  }

  return new_doc;
}

CollectionSchema::Ptr create_schema() {
  auto schema = std::make_shared<CollectionSchema>("demo");
  schema->set_max_doc_count_per_segment(1000);

  schema->add_field(std::make_shared<FieldSchema>(
      "id", DataType::INT64, false, std::make_shared<InvertIndexParams>(true)));
  schema->add_field(std::make_shared<FieldSchema>(
      "name", DataType::STRING, false,
      std::make_shared<InvertIndexParams>(false)));
  schema->add_field(
      std::make_shared<FieldSchema>("weight", DataType::FLOAT, true));

  schema->add_field(std::make_shared<FieldSchema>(
      "dense", DataType::VECTOR_FP32, 128, false,
      std::make_shared<HnswIndexParams>(MetricType::IP)));
  schema->add_field(std::make_shared<FieldSchema>(
      "sparse", DataType::SPARSE_VECTOR_FP32, 0, false,
      std::make_shared<HnswIndexParams>(MetricType::IP)));

  return schema;
}

int main() {
  std::string path = "./demo";
  std::string rm_cmd = "rm -rf " + path;
  system(rm_cmd.c_str());

  auto schema = create_schema();
  CollectionOptions options{false, true};

  auto result = Collection::CreateAndOpen(path, *schema, options);
  if (!result.has_value()) {
    std::cout << result.error().message() << std::endl;
    return -1;
  }

  std::cout << "init stats: " << result.value()->Stats().value().to_string()
            << std::endl;

  auto coll = std::move(result).value();

  // insert docs
  {
    auto doc1 = create_doc(0, *schema);
    std::vector<Doc> docs{doc1};
    auto res = coll->Insert(docs);
    if (!res.has_value()) {
      std::cout << res.error().message() << std::endl;
      return -1;
    }
    std::cout << "after insert stats " << coll->Stats().value().to_string()
              << std::endl;
  }

  // optimize
  {
    auto res = coll->Optimize();
    if (!res.ok()) {
      std::cout << res.message() << std::endl;
      return -1;
    }
    std::cout << "after optimize stats " << coll->Stats().value().to_string()
              << std::endl;
  }

  // query
  {
    VectorQuery query;
    query.topk_ = 10;
    query.field_name_ = "dense";
    query.include_vector_ = true;
    std::vector<float> query_vector = std::vector<float>(128, 0.1);
    query.query_vector_.assign((char *)query_vector.data(),
                               query_vector.size() * sizeof(float));
    auto res = coll->Query(query);
    if (!res.has_value()) {
      std::cout << res.error().message() << std::endl;
      return -1;
    }
    std::cout << "query result: doc_count[" << res.value().size() << "]"
              << std::endl;
    std::cout << "first doc: " << res.value()[0]->to_detail_string()
              << std::endl;
  }

  // close and reopen
  coll.reset();
  options.read_only_ = true;
  result = Collection::Open(path, options);
  if (!result.has_value()) {
    std::cout << result.error().message() << std::endl;
    return -1;
  }
  std::cout << "reopen stats: " << result.value()->Stats().value().to_string()
            << std::endl;

  return 0;
}

================================================
FILE: pyproject.toml
================================================
######################################################################################################
# Zvec: High-Performance Vector Database with PyBind11 & C++ Backend
######################################################################################################
[project]
name = "zvec"
dynamic = ["version"]
description = "A high-performance vector database engine with native C++ backend and Python bindings"
readme = "README.md"
license = { text = "Apache-2.0" }
authors = [
    { name = "zvec", email = "zvec@alibaba-inc.com" },
]
maintainers = [
    { name = "Zvec Core Team", email = "zvec@alibaba-inc.com" },
]
requires-python = ">=3.9"
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "Intended Audience :: Education",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: POSIX :: Linux",
    "Operating System :: MacOS",
    "Programming Language :: C++",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Topic :: Database",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Software Development :: Libraries :: Python Modules",
]
keywords = [
    "vector-database", "ann", "nearest-neighbor"
]
dependencies = [
    "numpy >=1.23",
]

[project.urls]
Homepage = "https://github.com/alibaba/zvec"
Repository = "https://github.com/alibaba/zvec"
"Bug Tracker" = "https://github.com/alibaba/zvec/issues"
"Documentation" = "https://zvec.org"

[project.optional-dependencies]
test = [
    "pytest >=8.0",
    "pytest-cov >=4.1",
    "pytest-mock >=3.12",
    "cibuildwheel == 3.4.0",
]
docs = [
    "mkdocs >=1.5",
    "mkdocs-material >=9.5",
    "mkdocstrings[python] >=0.24",
]
dev = [
    "ruff >=0.4",
    "black >=24.0",
    "mypy >=1.8",
    "pre-commit >=3.6",
    "build >=1.0",
    "twine >=4.0",
    "numpy >=1.23",
    # Inherit test deps
    "pytest >=8.0",
    "pytest-cov >=4.1",
    "pytest-mock >=3.12",
    "cibuildwheel == 3.4.0",
    # Inherit docs deps
    "mkdocs >=1.5",
    "mkdocs-material >=9.5",
    "mkdocstrings[python] >=0.24",
    "pybind11-stubgen>=2.5.5",
    "pybind11 >=3.0",
]
######################################################################################################
# BUILD SYSTEM CONFIGURATION (scikit-build-core)
######################################################################################################
[build-system]
requires = [
    "scikit-build-core >=0.11",
    "pybind11 >=3.0",
    "setuptools_scm>=8.0",
    "cmake>=3.26,<4.0",
    "ninja>=1.11",
]
build-backend = "scikit_build_core.build"

[tool.scikit-build]
# Core settings
minimum-version = "0.11"
metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"

# CMake configuration
cmake.version = ">=3.26,<4.0"
ninja.version = ">=1.11"
cmake.build-type = "Release"
install.strip = true  # Strip symbols in release builds to reduce wheel size

# Build directory
build-dir = "build"

# Platform support
wheel.expand-macos-universal-tags = true
wheel.packages = ["python/zvec"]

# Source distribution
sdist.include = [
    "README.md",
    "LICENSE",
    "pyproject.toml",
    "CMakeLists.txt",
    "src/**/*",
    "stub/zvec/**/*",
    "python/zvec/py.typed",
]

# CMake defines (env-overridable)
[tool.scikit-build.cmake.define]
BUILD_TOOLS = "OFF"
BUILD_PYTHON_BINDINGS = "ON"
#CMAKE_VERBOSE_MAKEFILE = "ON"

# Setuptools config for test pypi
[tool.setuptools_scm]
local_scheme = "no-local-version"
version_scheme = "guess-next-dev"
fallback_version = "0.2.1b1"
######################################################################################################
# TESTING & QUALITY
######################################################################################################
[tool.pytest.ini_options]
minversion = "8.0"
addopts = [
    "-ra",
    "--showlocals",
    "--strict-markers",
    "--strict-config",
    "--tb=short",
]
xfail_strict = true
log_cli_level = "INFO"
filterwarnings = [
    "error",
    "ignore::pytest.PytestCacheWarning",
    # Ignore numpy deprecation warnings in tests (if any)
    "ignore:.*numpy.*:DeprecationWarning",
]
testpaths = ["python/tests"]
markers = [
    "title: Custom marker for test title/description",
    # "slow: marks tests as slow",
]

######################################################################################################
# BUILD WHEEL
######################################################################################################
[tool.cibuildwheel]
build = [
    "cp310-*",
    "cp311-*",
    "cp312-*",
    "cp313-*",
    "cp314-*",
]
build-frontend = "build"
test-requires = ["pytest", "numpy"]
test-command = "cd {project} && pytest python/tests -v --tb=short"
build-verbosity = 1

[tool.cibuildwheel.linux]
archs = ["auto"]
environment = { CMAKE_GENERATOR = "Unix Makefiles", CMAKE_BUILD_PARALLEL_LEVEL = "16" }
manylinux-x86_64-image = "manylinux_2_28"
manylinux-aarch64-image = "manylinux_2_28"
# Skip 32-bit builds and musllinux
skip = ["*-manylinux_i686", "*-musllinux*"]

[tool.cibuildwheel.macos]
archs = ["arm64"]
# Inherits CMAKE_GENERATOR and CMAKE_BUILD_PARALLEL_LEVEL from [tool.cibuildwheel] won't work;
# platform-level environment overrides the top-level entirely, so all vars must be listed here
environment = { CMAKE_GENERATOR = "Unix Makefiles", CMAKE_BUILD_PARALLEL_LEVEL = "16", MACOSX_DEPLOYMENT_TARGET = "11.0" }
######################################################################################################
# CODE QUALITY & FORMATTING (Ruff)
######################################################################################################
[tool.ruff]
target-version = "py310"
line-length = 88
exclude = [
    "build/",
    "dist/",
    ".git/",
    ".venv/",
    "venv/",
    "thirdparty",
]

[tool.ruff.lint]
extend-select = [
    "B",    # flake8-bugbear
    "I",    # isort
    "ARG",  # flake8-unused-arguments
    "C4",   # flake8-comprehensions
    "EM",   # flake8-errmsg
    "ICN",  # flake8-import-conventions
    "G",    # flake8-logging-format
    "PGH",  # pygrep-hooks
    "PIE",  # flake8-pie
    "PL",   # pylint
    "PT",   # flake8-pytest-style
    "PTH",  # flake8-use-pathlib
    "RET",  # flake8-return
    "RUF",  # Ruff-specific
    "SIM",  # flake8-simplify
    "T20",  # flake8-print
    "UP",   # pyupgrade
    "YTT",  # flake8-2020
    "EXE",  # flake8-executable
    "NPY",  # NumPy-specific
    "PD",   # pandas-vet
]
ignore = [
    "PLR0913",  # Too many arguments (common in bindings)
    "PLR2004",  # Magic value used in comparison
    "UP045", "UP007",  # Use list() instead of [] (breaks C++ init)
    "EM101", "EM102",  # Exception messages as literals (ok in tests/utils)
    "B008",     # Mutable default args (cautiously allowed in config)
    "E731",     # Lambda assignment (used in callbacks)
    "B019",     # `functools.lru_cache` on methods (handled manually)
    "PLR0912",  # Too many branches
    "PLC0105",  # Ignore contravariant
    "RUF002",   # Ignore Unicode
]
fixable = ["ALL"]
unfixable = []

# Ignore all errors in docstrings
[tool.ruff.lint.pydocstyle]
convention = "google"  # or "numpy", "pep257"
ignore-decorators = ["typing.overload"]

[tool.ruff.lint.flake8-type-checking]
# Don't check code examples in docstrings
quote-annotations = true

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]
known-first-party = ["zvec"]

[tool.ruff.lint.per-file-ignores]
"python/tests/**" = ["ALL"]
"bench/core/**" = ["ALL"]
"python/zvec/__init__.py" = [
    "F401",   # Unused import (for __all__)
    "E402",   # Module level import not at top (C++ module init order)
    "PLE0605", # Invalid format for __all__
    "RUF022", # __all__ is not sorted
]
"python/zvec/model/doc.py" = [
    "RUF023",   # Unused sort (for __slot__)
]
"python/zvec/extension/**" = [
    "PLC0415",  # Import outside top-level (dynamic imports in _get_model)
]

[tool.ruff.format]
indent-style = "space"
quote-style = "double"
line-ending = "lf"
skip-magic-trailing-comma = false


================================================
FILE: python/tests/detail/distance_helper.py
================================================
import logging
import math
import numpy as np

from zvec import (
    MetricType,
    DataType,
    QuantizeType,
    Doc,
    CollectionSchema,
    FieldSchema,
    VectorSchema,
)

from typing import Dict


def is_float_equal(actual, expected, rel_tol=1e-5, abs_tol=1e-8):
    if actual is None and expected is None:
        return True
    return math.isclose(actual, expected, rel_tol=rel_tol, abs_tol=abs_tol)


def is_dense_vector_equal(vec1, vec2, rtol=1e-5, atol=1e-8):
    """Compare two dense vectors with tolerance."""
    return np.allclose(vec1, vec2, rtol=rtol, atol=atol)


def is_sparse_vector_equal(vec1, vec2, rtol=1e-5, atol=1e-8):
    """Compare two sparse vectors with tolerance."""
    # Check if they have the same keys
    if set(vec1.keys()) != set(vec2.keys()):
        return False

    # Check if all values are close
    for key in vec1:
        if not math.isclose(vec1[key], vec2[key], rel_tol=rtol, abs_tol=atol):
            return False

    return True


def is_float_array_equal(arr1, arr2, rtol=1e-5, atol=1e-8):
    """Compare two float arrays with tolerance."""
    return np.allclose(arr1, arr2, rtol=rtol, atol=atol)


def is_double_array_equal(arr1, arr2, rtol=1e-9, atol=1e-12):
    """Compare two double arrays with tolerance."""
    return np.allclose(arr1, arr2, rtol=rtol, atol=atol)


def is_int_array_equal(arr1, arr2):
    """Compare two integer arrays with exact equality."""
    return np.array_equal(arr1, arr2)


def cosine_distance_dense(
    vec1,
    vec2,
    dtype: DataType = DataType.VECTOR_FP32,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:
        # More stable conversion to float16 to avoid numerical issues
        vec1 = [float(np.float16(a)) for a in vec1]
        vec2 = [float(np.float16(b)) for b in vec2]
    elif dtype == DataType.VECTOR_INT8:
        # For INT8 vectors, convert to integers for proper calculation
        vec1 = [
            int(round(min(max(val, -128), 127))) for val in vec1
        ]  # Clamp to valid INT8 range
        vec2 = [
            int(round(min(max(val, -128), 127))) for val in vec2
        ]  # Clamp to valid INT8 range

    dot_product = sum(a * b for a, b in zip(vec1, vec2))

    magnitude1 = math.sqrt(sum(a * a for a in vec1))
    magnitude2 = math.sqrt(sum(b * b for b in vec2))

    if magnitude1 == 0 or magnitude2 == 0:
        return 1.0  # Zero vector case - maximum distance

    cosine_similarity = dot_product / (magnitude1 * magnitude2)

    # Clamp to [-1, 1] range to handle floating-point precision errors
    cosine_similarity = max(-1.0, min(1.0, cosine_similarity))

    # For identical vectors (within floating point precision), ensure cosine distance is 0.0
    # This is especially important for low-precision types which have limited precision
    if (
        dtype == DataType.VECTOR_FP16
        or quantize_type == QuantizeType.FP16
        or dtype == DataType.VECTOR_INT8
    ):
        if (
            abs(cosine_similarity - 1.0) < 1e-3
        ):  # Handle precision issues for low-precision types
            cosine_similarity = 1.0

    # Return cosine distance (1 - cosine similarity) to maintain compatibility
    # with system internal processing and existing test expectations
    return 1.0 - cosine_similarity


def dp_distance_dense(
    vec1,
    vec2,
    dtype: DataType = DataType.VECTOR_FP32,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:
        # More stable computation to avoid numerical issues
        products = [
            float(np.float16(a)) * float(np.float16(b)) for a, b in zip(vec1, vec2)
        ]
        return sum(products)
    elif dtype == DataType.VECTOR_INT8:
        # For INT8 vectors, convert to integers for proper calculation
        products = [
            int(round(min(max(a, -128), 127))) * int(round(min(max(b, -128), 127)))
            for a, b in zip(vec1, vec2)
        ]
        return sum(products)
    return sum(a * b for a, b in zip(vec1, vec2))


def euclidean_distance_dense(
    vec1,
    vec2,
    dtype: DataType = DataType.VECTOR_FP32,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    if dtype == DataType.VECTOR_FP16 or quantize_type == QuantizeType.FP16:
        # Convert to float16 and compute squared differences safely
        # Use a more stable computation to avoid overflow
        squared_diffs = []
        for a, b in zip(vec1, vec2):
            diff = np.float16(a) - np.float16(b)
            squared_diff = float(diff) * float(
                diff
            )  # Convert to float for multiplication
            squared_diffs.append(squared_diff)
        squared_distance = sum(squared_diffs)
    elif dtype == DataType.VECTOR_INT8:
        # For INT8 vectors, convert to integers and handle potential scaling
        # INT8 values might be treated differently in the library implementation
        vec1_int = [
            int(round(min(max(val, -128), 127))) for val in vec1
        ]  # Clamp to valid INT8 range
        vec2_int = [
            int(round(min(max(val, -128), 127))) for val in vec2
        ]  # Clamp to valid INT8 range
        # Use float type to prevent overflow when summing large squared differences
        squared_distance = sum(float(a - b) ** 2 for a, b in zip(vec1_int, vec2_int))
    else:
        squared_distance = sum((a - b) ** 2 for a, b in zip(vec1, vec2))

    return squared_distance  # Return squared distance for INT8


def distance_dense(
    vec1,
    vec2,
    metric: MetricType,
    data_type: DataType = DataType.VECTOR_FP32,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    if metric == MetricType.COSINE:
        return cosine_distance_dense(vec1, vec2, data_type, quantize_type)
    elif metric == MetricType.L2:
        return euclidean_distance_dense(vec1, vec2, data_type, quantize_type)
    elif metric == MetricType.IP:
        return dp_distance_dense(vec1, vec2, data_type, quantize_type)
    else:
        raise ValueError("Unsupported metric type")


def dp_distance_sparse(
    vec1,
    vec2,
    data_type: DataType = DataType.SPARSE_VECTOR_FP32,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    dot_product = 0.0
    for dim in set(vec1.keys()) & set(vec2.keys()):
        print("dim,vec1,vec2:\n")
        print(dim, vec1, vec2)
        if (
            data_type == DataType.SPARSE_VECTOR_FP16
            or quantize_type == QuantizeType.FP16
        ):
            vec1[dim] = np.float16(vec1[dim])
            vec2[dim] = np.float16(vec2[dim])
        dot_product += vec1[dim] * vec2[dim]
    return dot_product


def distance(
    vec1,
    vec2,
    metric: MetricType,
    data_type: DataType,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    is_sparse = (
        data_type == DataType.SPARSE_VECTOR_FP32
        or data_type == DataType.SPARSE_VECTOR_FP16
    )

    if is_sparse:
        if metric != MetricType.IP:
            raise ValueError("Unsupported metric type for sparse vectors")

    if is_sparse:
        return dp_distance_sparse(vec1, vec2, data_type, quantize_type)
    else:
        return distance_dense(vec1, vec2, metric, data_type, quantize_type)


def distance_recall(
    vec1,
    vec2,
    metric: MetricType,
    data_type: DataType,
    quantize_type: QuantizeType = QuantizeType.UNDEFINED,
):
    is_sparse = (
        data_type == DataType.SPARSE_VECTOR_FP32
        or data_type == DataType.SPARSE_VECTOR_FP16
    )

    if is_sparse:
        return dp_distance_sparse(vec1, vec2, data_type, quantize_type)
    else:
        if data_type in [DataType.VECTOR_FP32, DataType.VECTOR_FP16]:
            return distance_dense(vec1, vec2, metric, data_type, quantize_type)
        elif data_type in [DataType.VECTOR_INT8] and metric in [
            MetricType.L2,
            MetricType.IP,
        ]:
            return distance_dense(vec1, vec2, metric, data_type, quantize_type)
        else:
            return dp_distance_dense(vec1, vec2, data_type, quantize_type)


def calculate_rrf_score(rank, k=60):
    return 1.0 / (k + rank + 1)


def calculate_multi_vector_rrf_scores(query_results: Dict[str, Doc], k=60):
    rrf_scores = {}

    for vector_name, docs in query_results.items():
        for rank, doc in enumerate(docs):
            doc_id = doc.id
            rrf_score = calculate_rrf_score(rank, k)
            if doc_id in rrf_scores:
                rrf_scores[doc_id] += rrf_score
            else:
                rrf_scores[doc_id] = rrf_score

    return rrf_scores


def calculate_multi_vector_weighted_scores(
    query_results: Dict[str, Doc], weights: Dict[str, float], metric: MetricType
):
    def _normalize_score(score: float, metric: MetricType) -> float:
        if metric == MetricType.L2:
            return 1.0 - 2 * math.atan(score) / math.pi
        if metric == MetricType.IP:
            return 0.5 + math.atan(score) / math.pi
        if metric == MetricType.COSINE:
            return 1.0 - score / 2.0
        raise ValueError("Unsupported metric type")

    weighted_scores = {}

    for vector_name, docs in query_results.items():
        weight = weights.get(vector_name, 1.0)

        for doc in docs:
            doc_id = doc.id
            weighted_score = (_normalize_score(doc.score, metric)) * weight
            if doc_id in weighted_scores:
                weighted_scores[doc_id] += weighted_score
            else:
                weighted_scores[doc_id] = weighted_score

    return weighted_scores


def is_field_equal(field1, field2, schema: FieldSchema) -> bool:
    if field1 is None and field2 is None:
        return True
    if field1 is None or field2 is None:
        return False

    if schema.data_type == DataType.ARRAY_FLOAT:
        return is_float_array_equal(field1, field2)
    elif schema.data_type == DataType.ARRAY_DOUBLE:
        return is_double_array_equal(field1, field2)
    elif schema.data_type in [
        DataType.ARRAY_INT32,
        DataType.ARRAY_INT64,
        DataType.ARRAY_BOOL,
        DataType.ARRAY_STRING,
        DataType.ARRAY_UINT32,
        DataType.ARRAY_UINT64,
        DataType.ARRAY_INT64,
    ]:
        return is_int_array_equal(field1, field2)
    elif schema.data_type in [DataType.FLOAT, DataType.DOUBLE]:
        return is_float_equal(field1, field2)

    return field1 == field2


def is_vector_equal(vec1, vec2, schema: VectorSchema) -> bool:
    if (
        schema.data_type == DataType.SPARSE_VECTOR_FP16
        or schema.data_type == DataType.VECTOR_FP16
    ):
        # skip fp16 vector equal
        return True

    is_sparse = (
        schema.data_type == DataType.SPARSE_VECTOR_FP32
        or schema.data_type == DataType.SPARSE_VECTOR_FP16
    )

    if is_sparse:
        return is_sparse_vector_equal(vec1, vec2)
    else:
        return is_dense_vector_equal(vec1, vec2)


def is_doc_equal(
    doc1: Doc,
    doc2: Doc,
    schema: CollectionSchema,
    except_score: bool = True,
    include_vector: bool = True,
):
    if doc1.id != doc2.id:
        logging.error("doc ids are not equal")
        return False

    reduce_field_names = set(doc1.field_names() + doc2.field_names())
    reduce_vector_names = set(doc1.vector_names() + doc2.vector_names())

    is_doc1_fields_empty = doc1.fields is None or doc1.fields == {}
    is_doc2_fields_empty = doc2.fields is None or doc2.fields == {}

    if is_doc1_fields_empty or is_doc2_fields_empty:
        if is_doc1_fields_empty != is_doc2_fields_empty:
            return False
    else:
        for field_name in reduce_field_names:
            field_schema = schema.field(field_name)
            if field_schema is None:
                return False
            if is_field_equal(
                doc1.field(field_name), doc2.field(field_name), field_schema
            ):
                continue
            else:
                logging.error(f"{field_name} are not equal")
                return False

    if include_vector:
        is_doc1_vectors_empty = doc1.vectors is None or doc1.vectors == {}
        is_doc2_vectors_empty = doc2.vectors is None or doc2.vectors == {}

        if is_doc1_vectors_empty or is_doc2_vectors_empty:
            if is_doc1_fields_empty != is_doc2_vectors_empty:
                return False
        else:
            for vector_name in reduce_vector_names:
                vector_schema = schema.vector(vector_name)
                if vector_schema is None:
                    return False
                if is_vector_equal(
                    doc1.vector(vector_name), doc2.vector(vector_name), vector_schema
                ):
                    continue
                else:
                    return False

    return True


================================================
FILE: python/tests/detail/doc_helper.py
================================================
from zvec import CollectionSchema, Doc

from support_helper import *

import numpy as np
from typing import Literal, Optional, Union, Tuple

import random
import string
import math


def generate_constant_vector(
    i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32"
):
    if dtype == "int8":
        vec = [(i % 127)] * dimension
        vec[i % dimension] = (i + 1) % 127
    else:
        base_val = (i % 1000) / 256.0
        special_val = ((i + 1) % 1000) / 256.0
        vec = [base_val] * dimension
        vec[i % dimension] = special_val

    return vec


def generate_constant_vector_recall(
    i: int, dimension: int, dtype: Literal["int8", "float16", "float32"] = "float32"
):
    if dtype == "int8":
        vec = [(i % 127)] * dimension
        vec[i % dimension] = (i + 1) % 127
    else:
        base_val = math.sin((i) * 1000) / 256.0
        special_val = math.sin((i + 1) * 1000) / 256.0
        vec = [base_val] * dimension
        vec[i % dimension] = special_val

    return vec


def generate_sparse_vector(i: int):
    return {i: i + 0.1}


def generate_vectordict(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields = {}
    doc_vectors = {}
    for field in schema.fields:
        if field.data_type == DataType.BOOL:
            doc_fields[field.name] = i % 2 == 0
        elif field.data_type == DataType.INT32:
            doc_fields[field.name] = i
        elif field.data_type == DataType.UINT32:
            doc_fields[field.name] = i
        elif field.data_type == DataType.INT64:
            doc_fields[field.name] = i
        elif field.data_type == DataType.UINT64:
            doc_fields[field.name] = i
        elif field.data_type == DataType.FLOAT:
            doc_fields[field.name] = float(i) + 0.1
        elif field.data_type == DataType.DOUBLE:
            doc_fields[field.name] = float(i) + 0.11
        elif field.data_type == DataType.STRING:
            doc_fields[field.name] = f"test_{i}"
        elif field.data_type == DataType.ARRAY_BOOL:
            doc_fields[field.name] = [i % 2 == 0, i % 3 == 0]
        elif field.data_type == DataType.ARRAY_INT32:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT32:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_INT64:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT64:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_FLOAT:
            doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)]
        elif field.data_type == DataType.ARRAY_DOUBLE:
            doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)]
        elif field.data_type == DataType.ARRAY_STRING:
            doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"]
        else:
            raise ValueError(f"Unsupported field type: {field.data_type}")
    for vector in schema.vectors:
        if vector.data_type == DataType.VECTOR_FP16:
            doc_vectors[vector.name] = generate_constant_vector(
                i, vector.dimension, "float16"
            )
        elif vector.data_type == DataType.VECTOR_FP32:
            doc_vectors[vector.name] = generate_constant_vector(
                i, vector.dimension, "float32"
            )
        elif vector.data_type == DataType.VECTOR_INT8:
            doc_vectors[vector.name] = generate_constant_vector(
                i,
                vector.dimension,
                "int8",
            )
        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:
            doc_vectors[vector.name] = generate_sparse_vector(i)
        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:
            doc_vectors[vector.name] = generate_sparse_vector(i)
        else:
            raise ValueError(f"Unsupported vector type: {vector.data_type}")
    return doc_fields, doc_vectors


def generate_vectordict_recall(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields = {}
    doc_vectors = {}
    for field in schema.fields:
        if field.data_type == DataType.BOOL:
            doc_fields[field.name] = i % 2 == 0
        elif field.data_type == DataType.INT32:
            doc_fields[field.name] = i
        elif field.data_type == DataType.UINT32:
            doc_fields[field.name] = i
        elif field.data_type == DataType.INT64:
            doc_fields[field.name] = i
        elif field.data_type == DataType.UINT64:
            doc_fields[field.name] = i
        elif field.data_type == DataType.FLOAT:
            doc_fields[field.name] = float(i) + 0.1
        elif field.data_type == DataType.DOUBLE:
            doc_fields[field.name] = float(i) + 0.11
        elif field.data_type == DataType.STRING:
            doc_fields[field.name] = f"test_{i}"
        elif field.data_type == DataType.ARRAY_BOOL:
            doc_fields[field.name] = [i % 2 == 0, i % 3 == 0]
        elif field.data_type == DataType.ARRAY_INT32:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT32:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_INT64:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT64:
            doc_fields[field.name] = [i, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_FLOAT:
            doc_fields[field.name] = [float(i + 0.1), float(i + 1.1), float(i + 2.1)]
        elif field.data_type == DataType.ARRAY_DOUBLE:
            doc_fields[field.name] = [float(i + 0.11), float(i + 1.11), float(i + 2.11)]
        elif field.data_type == DataType.ARRAY_STRING:
            doc_fields[field.name] = [f"test_{i}", f"test_{i + 1}", f"test_{i + 2}"]
        else:
            raise ValueError(f"Unsupported field type: {field.data_type}")
    for vector in schema.vectors:
        if vector.data_type == DataType.VECTOR_FP16:
            doc_vectors[vector.name] = generate_constant_vector_recall(
                i, vector.dimension, "float16"
            )
        elif vector.data_type == DataType.VECTOR_FP32:
            doc_vectors[vector.name] = generate_constant_vector_recall(
                i, vector.dimension, "float32"
            )
        elif vector.data_type == DataType.VECTOR_INT8:
            doc_vectors[vector.name] = generate_constant_vector_recall(
                i,
                vector.dimension,
                "int8",
            )
        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:
            doc_vectors[vector.name] = generate_sparse_vector(i)
        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:
            doc_vectors[vector.name] = generate_sparse_vector(i)
        else:
            raise ValueError(f"Unsupported vector type: {vector.data_type}")
    return doc_fields, doc_vectors


def generate_vectordict_update(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields = {}
    doc_vectors = {}
    for field in schema.fields:
        if field.data_type == DataType.BOOL:
            doc_fields[field.name] = (i + 1) % 2 == 0
        elif field.data_type == DataType.INT32:
            doc_fields[field.name] = i + 1
        elif field.data_type == DataType.UINT32:
            doc_fields[field.name] = i + 1
        elif field.data_type == DataType.INT64:
            doc_fields[field.name] = i + 1
        elif field.data_type == DataType.UINT64:
            doc_fields[field.name] = i + 1
        elif field.data_type == DataType.FLOAT:
            doc_fields[field.name] = float(i + 1) + 0.1
        elif field.data_type == DataType.DOUBLE:
            doc_fields[field.name] = float(i + 1) + 0.11
        elif field.data_type == DataType.STRING:
            doc_fields[field.name] = f"test_{i + 1}"
        elif field.data_type == DataType.ARRAY_BOOL:
            doc_fields[field.name] = [(i + 1) % 2 == 0, (i + 1) % 3 == 0]
        elif field.data_type == DataType.ARRAY_INT32:
            doc_fields[field.name] = [i + 1, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT32:
            doc_fields[field.name] = [i + 1, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_INT64:
            doc_fields[field.name] = [i + 1, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_UINT64:
            doc_fields[field.name] = [i + 1, i + 1, i + 2]
        elif field.data_type == DataType.ARRAY_FLOAT:
            doc_fields[field.name] = [float(i + 1.1), float(i + 2.1), float(i + 3.1)]
        elif field.data_type == DataType.ARRAY_DOUBLE:
            doc_fields[field.name] = [float(i + 1.11), float(i + 2.11), float(i + 3.11)]
        elif field.data_type == DataType.ARRAY_STRING:
            doc_fields[field.name] = [f"test_{i + 1}", f"test_{i + 2}", f"test_{i + 3}"]
        else:
            raise ValueError(f"Unsupported field type: {field.data_type}")
    for vector in schema.vectors:
        if vector.data_type == DataType.VECTOR_FP16:
            doc_vectors[vector.name] = generate_constant_vector(
                i + 1, vector.dimension, "float16"
            )
        elif vector.data_type == DataType.VECTOR_FP32:
            doc_vectors[vector.name] = generate_constant_vector(
                i + 1, vector.dimension, "float32"
            )
        elif vector.data_type == DataType.VECTOR_INT8:
            doc_vectors[vector.name] = generate_constant_vector(
                i + 1,
                vector.dimension,
                "int8",
            )
        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:
            doc_vectors[vector.name] = generate_sparse_vector(i + 1)
        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:
            doc_vectors[vector.name] = generate_sparse_vector(i + 1)
        else:
            raise ValueError(f"Unsupported vector type: {vector.data_type}")
    return doc_fields, doc_vectors


def generate_doc(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields, doc_vectors = generate_vectordict(i, schema)
    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)
    return doc


def generate_doc_recall(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields, doc_vectors = generate_vectordict_recall(i, schema)
    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)
    return doc


def generate_update_doc(i: int, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}
    doc_fields, doc_vectors = generate_vectordict_update(i, schema)
    doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)
    return doc


def generate_doc_random(i, schema: CollectionSchema) -> Doc:
    doc_fields = {}
    doc_vectors = {}

    random.seed(i)

    for field in schema.fields:
        if field.data_type == DataType.BOOL:
            doc_fields[field.name] = random.choice([True, False])
        elif field.data_type == DataType.INT32:
            doc_fields[field.name] = random.randint(-2147483648, 2147483647)
        elif field.data_type == DataType.UINT32:
            doc_fields[field.name] = random.randint(0, 4294967295)
        elif field.data_type == DataType.INT64:
            doc_fields[field.name] = random.randint(
                -9223372036854775808, 9223372036854775807
            )
        elif field.data_type == DataType.UINT64:
            doc_fields[field.name] = random.randint(0, 18446744073709551615)
        elif field.data_type == DataType.FLOAT:
            doc_fields[field.name] = random.uniform(-3.4028235e38, 3.4028235e38)
        elif field.data_type == DataType.DOUBLE:
            doc_fields[field.name] = random.uniform(
                -1.7976931348623157e308, 1.7976931348623157e308
            )
        elif field.data_type == DataType.STRING:
            length = random.randint(1, 999)
            doc_fields[field.name] = "".join(
                random.choices(string.ascii_letters + string.digits, k=length)
            )
        elif field.data_type == DataType.ARRAY_BOOL:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.choice([True, False]) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_INT32:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(-2147483648, 2147483647) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_UINT32:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(0, 4294967295) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_INT64:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(-9223372036854775808, 9223372036854775807)
                for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_UINT64:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(0, 18446744073709551615) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_FLOAT:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.uniform(-3.4028235e38, 3.4028235e38) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_DOUBLE:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.uniform(-1.7976931348623157e308, 1.7976931348623157e308)
                for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_STRING:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                "".join(
                    random.choices(
                        string.ascii_letters + string.digits, k=random.randint(1, 100)
                    )
                )
                for _ in range(array_length)
            ]
        else:
            raise ValueError(f"Unsupported field type: {field.data_type}")

    for vector in schema.vectors:
        if vector.data_type == DataType.VECTOR_FP16:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, "float16"
            )
        elif vector.data_type == DataType.VECTOR_FP32:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, "float32"
            )
        elif vector.data_type == DataType.VECTOR_INT8:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), DEFAULT_VECTOR_DIMENSION, "int8"
            )
        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:
            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))
        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:
            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))
        else:
            raise ValueError(f"Unsupported vector type: {vector.data_type}")

    doc = Doc(id=i, fields=doc_fields, vectors=doc_vectors)
    return doc


def generate_vectordict_random(schema: CollectionSchema):
    doc_fields = {}
    doc_vectors = {}
    for field in schema.fields:
        if field.data_type == DataType.BOOL:
            doc_fields[field.name] = random.choice([True, False])
        elif field.data_type == DataType.INT32:
            doc_fields[field.name] = random.randint(-2147483648, 2147483647)
        elif field.data_type == DataType.UINT32:
            doc_fields[field.name] = random.randint(0, 4294967295)
        elif field.data_type == DataType.INT64:
            doc_fields[field.name] = random.randint(
                -9223372036854775808, 9223372036854775807
            )
        elif field.data_type == DataType.UINT64:
            doc_fields[field.name] = random.randint(0, 18446744073709551615)
        elif field.data_type == DataType.FLOAT:
            doc_fields[field.name] = random.uniform(-3.4028235e38, 3.4028235e38)
        elif field.data_type == DataType.DOUBLE:
            doc_fields[field.name] = random.uniform(
                -1.7976931348623157e308, 1.7976931348623157e308
            )
        elif field.data_type == DataType.STRING:
            length = random.randint(1, 999)
            doc_fields[field.name] = "".join(
                random.choices(string.ascii_letters + string.digits, k=length)
            )
        elif field.data_type == DataType.ARRAY_BOOL:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.choice([True, False]) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_INT32:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(-2147483648, 2147483647) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_UINT32:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(0, 4294967295) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_INT64:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(-9223372036854775808, 9223372036854775807)
                for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_UINT64:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.randint(0, 18446744073709551615) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_FLOAT:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.uniform(-3.4028235e38, 3.4028235e38) for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_DOUBLE:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                random.uniform(-1.7976931348623157e308, 1.7976931348623157e308)
                for _ in range(array_length)
            ]
        elif field.data_type == DataType.ARRAY_STRING:
            array_length = random.randint(0, 10)
            doc_fields[field.name] = [
                "".join(
                    random.choices(
                        string.ascii_letters + string.digits, k=random.randint(1, 100)
                    )
                )
                for _ in range(array_length)
            ]
        else:
            raise ValueError(f"Unsupported field type: {field.data_type}")

    for vector in schema.vectors:
        if vector.data_type == DataType.VECTOR_FP16:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), vector.dimension, "float16"
            )
        elif vector.data_type == DataType.VECTOR_FP32:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), vector.dimension, "float32"
            )
        elif vector.data_type == DataType.VECTOR_INT8:
            doc_vectors[vector.name] = generate_constant_vector(
                random.randint(1, 100), vector.dimension, "int8"
            )
        elif vector.data_type == DataType.SPARSE_VECTOR_FP32:
            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))
        elif vector.data_type == DataType.SPARSE_VECTOR_FP16:
            doc_vectors[vector.name] = generate_sparse_vector(random.randint(1, 100))
        else:
            raise ValueError(f"Unsupported vector type: {vector.data_type}")

    return doc_fields, doc_vectors


================================================
FILE: python/tests/detail/fixture_helper.py
================================================
import pytest
import logging

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
import zvec
from zvec import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    FlatIndexParam,
    IVFIndexParam,
    FieldSchema,
    VectorSchema,
    CollectionSchema,
    Collection,
    Doc,
    VectorQuery,
)

from support_helper import *


@pytest.fixture(scope="session")
def basic_schema(collection_name="test_collection") -> CollectionSchema:
    return CollectionSchema(
        name=collection_name if len(collection_name) > 0 else "test_collection",
        fields=[
            FieldSchema(
                "id",
                DataType.INT64,
                nullable=False,
                index_param=InvertIndexParam(enable_range_optimization=True),
            ),
            FieldSchema(
                "name", DataType.STRING, nullable=False, index_param=InvertIndexParam()
            ),
            FieldSchema("weight", DataType.FLOAT, nullable=True),
        ],
        vectors=[
            VectorSchema(
                "dense",
                DataType.VECTOR_FP32,
                dimension=128,
                index_param=HnswIndexParam(),
            ),
            VectorSchema(
                "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
            ),
        ],
    )


@pytest.fixture(scope="session")
def full_schema(
    nullable: bool = False,
    has_index: bool = False,
) -> CollectionSchema:
    scalar_index_param = None
    vector_index_param = None
    if has_index:
        scalar_index_param = InvertIndexParam(enable_range_optimization=True)
        vector_index_param = HnswIndexParam()

    fields = []
    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():
        fields.append(
            FieldSchema(
                v,
                k,
                nullable=nullable,
                index_param=scalar_index_param,
            )
        )
    vetors = []
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        vetors.append(
            VectorSchema(
                v,
                k,
                dimension=DEFAULT_VECTOR_DIMENSION,
                index_param=vector_index_param,
            )
        )

    return CollectionSchema(
        name="full_collection",
        fields=fields,
        vectors=vetors,
    )


@pytest.fixture(scope="function")
def full_schema_new(request) -> CollectionSchema:
    if hasattr(request, "param"):
        nullable, has_index, vector_index = request.param
    else:
        nullable, has_index, vector_index = True, False, HnswIndexParam()

    scalar_index_param = None
    vector_index_param = None
    if has_index:
        scalar_index_param = InvertIndexParam(enable_range_optimization=True)
        vector_index_param = vector_index

    fields = []
    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():
        fields.append(
            FieldSchema(
                v,
                k,
                nullable=nullable,
                index_param=scalar_index_param,
            )
        )
    vectors = []

    if vector_index_param in [
        HnswIndexParam(),
        FlatIndexParam(),
        HnswIndexParam(
            metric_type=MetricType.IP,
            m=16,
            ef_construction=100,
        ),
        FlatIndexParam(
            metric_type=MetricType.IP,
        ),
    ]:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            vectors.append(
                VectorSchema(
                    v,
                    k,
                    dimension=DEFAULT_VECTOR_DIMENSION,
                    index_param=vector_index_param,
                )
            )
    elif vector_index_param in [
        IVFIndexParam(),
        IVFIndexParam(
            metric_type=MetricType.IP,
            n_list=100,
            n_iters=10,
            use_soar=False,
        ),
        IVFIndexParam(
            metric_type=MetricType.L2,
            n_list=200,
            n_iters=20,
            use_soar=True,
        ),
        (
            IVFIndexParam(
                metric_type=MetricType.COSINE,
                n_list=150,
                n_iters=15,
                use_soar=False,
            )
        ),
        (
            HnswIndexParam(
                metric_type=MetricType.COSINE,
                m=24,
                ef_construction=150,
            )
        ),
        (
            HnswIndexParam(
                metric_type=MetricType.L2,
                m=32,
                ef_construction=200,
            )
        ),
        (
            FlatIndexParam(
                metric_type=MetricType.COSINE,
            )
        ),
        (
            FlatIndexParam(
                metric_type=MetricType.L2,
            )
        ),
    ]:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field"]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DEFAULT_VECTOR_DIMENSION,
                        index_param=vector_index_param,
                    )
                )
            elif v in ["vector_int8_field"] and vector_index_param in [
                IVFIndexParam(
                    metric_type=MetricType.L2,
                    n_list=200,
                    n_iters=20,
                    use_soar=True,
                ),
                (
                    HnswIndexParam(
                        metric_type=MetricType.L2,
                        m=32,
                        ef_construction=200,
                    )
                ),
                (
                    FlatIndexParam(
                        metric_type=MetricType.L2,
                    )
                ),
            ]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DEFAULT_VECTOR_DIMENSION,
                        index_param=vector_index_param,
                    )
                )
            else:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DEFAULT_VECTOR_DIMENSION,
                        index_param=HnswIndexParam(),
                    )
                )
    else:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field"]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DEFAULT_VECTOR_DIMENSION,
                        index_param=vector_index_param,
                    )
                )
            else:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DEFAULT_VECTOR_DIMENSION,
                        index_param=HnswIndexParam(),
                    )
                )

    return CollectionSchema(
        name="full_collection_new",
        fields=fields,
        vectors=vectors,
    )


@pytest.fixture(scope="function")
def full_schema_ivf(request) -> CollectionSchema:
    if hasattr(request, "param"):
        nullable, has_index, vector_index = request.param
    else:
        nullable, has_index, vector_index = True, False, IVFIndexParam()

    scalar_index_param = None
    vector_index_param = None
    if has_index:
        scalar_index_param = InvertIndexParam(enable_range_optimization=True)
        vector_index_param = vector_index

    fields = []
    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():
        fields.append(
            FieldSchema(
                v,
                k,
                nullable=nullable,
                index_param=scalar_index_param,
            )
        )
    vectors = []
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        if v in ["vector_fp16_field", "vector_fp32_field"]:
            vectors.append(
                VectorSchema(
                    v,
                    k,
                    dimension=DEFAULT_VECTOR_DIMENSION,
                    index_param=vector_index_param,
                )
            )

    return CollectionSchema(
        name="full_collection_ivf",
        fields=fields,
        vectors=vectors,
    )


@pytest.fixture(scope="function")
def full_schema_1024(request) -> CollectionSchema:
    if hasattr(request, "param"):
        nullable, has_index, vector_index = request.param
    else:
        nullable, has_index, vector_index = True, False, HnswIndexParam()

    scalar_index_param = None
    vector_index_param = None
    if has_index:
        scalar_index_param = InvertIndexParam(enable_range_optimization=True)
        vector_index_param = vector_index

    fields = []
    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():
        fields.append(
            FieldSchema(
                v,
                k,
                nullable=nullable,
                index_param=scalar_index_param,
            )
        )
    vectors = []

    if vector_index_param in [
        HnswIndexParam(),
        FlatIndexParam(),
        HnswIndexParam(
            metric_type=MetricType.IP,
            m=16,
            ef_construction=100,
        ),
        FlatIndexParam(
            metric_type=MetricType.IP,
        ),
    ]:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            vectors.append(
                VectorSchema(
                    v,
                    k,
                    dimension=VECTOR_DIMENSION_1024,
                    index_param=vector_index_param,
                )
            )
    elif vector_index_param in [
        IVFIndexParam(),
        IVFIndexParam(
            metric_type=MetricType.IP,
            n_list=100,
            n_iters=10,
            use_soar=False,
        ),
        IVFIndexParam(
            metric_type=MetricType.L2,
            n_list=200,
            n_iters=20,
            use_soar=True,
        ),
        IVFIndexParam(
            metric_type=MetricType.COSINE,
            n_list=150,
            n_iters=15,
            use_soar=False,
        ),
    ]:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field"]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=VECTOR_DIMENSION_1024,
                        index_param=vector_index_param,
                    )
                )
            elif v in ["vector_int8_field"] and vector_index_param in [
                IVFIndexParam(
                    metric_type=MetricType.L2,
                    n_list=200,
                    n_iters=20,
                    use_soar=True,
                ),
                IVFIndexParam(
                    metric_type=MetricType.COSINE,
                    n_list=150,
                    n_iters=15,
                    use_soar=False,
                ),
            ]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=DVECTOR_DIMENSION_1024,
                        index_param=vector_index_param,
                    )
                )
            else:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=VECTOR_DIMENSION_1024,
                        index_param=HnswIndexParam(),
                    )
                )
    else:
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field", "vector_int8_field"]:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=VECTOR_DIMENSION_1024,
                        index_param=vector_index_param,
                    )
                )
            else:
                vectors.append(
                    VectorSchema(
                        v,
                        k,
                        dimension=VECTOR_DIMENSION_1024,
                        index_param=HnswIndexParam(),
                    )
                )

    return CollectionSchema(
        name="full_collection_new",
        fields=fields,
        vectors=vectors,
    )


@pytest.fixture(scope="function")
def single_vector_schema(
    data_type: DataType,
) -> CollectionSchema:
    vector_schema = [
        VectorSchema(
            DEFAULT_VECTOR_FIELD_NAME[data_type],
            data_type,
            DEFAULT_VECTOR_DIMENSION,
        )
    ]

    return CollectionSchema(
        name="full_collection",
        vectors=vector_schema,
    )


@pytest.fixture(scope="function")
def single_vector_schema_with_index_param(
    data_type: DataType, index_param
) -> CollectionSchema:
    vector_schema = [
        VectorSchema(
            DEFAULT_VECTOR_FIELD_NAME[data_type],
            data_type,
            DEFAULT_VECTOR_DIMENSION,
            index_param,
        )
    ]

    return CollectionSchema(
        name="full_collection",
        vectors=vector_schema,
    )


def create_collection_fixture(
    collection_temp_dir, schema: CollectionSchema, collection_option: CollectionOption
) -> Generator[Any, Any, Collection]:
    """Common helper function to create and manage collection fixtures."""
    coll = zvec.create_and_open(
        path=str(collection_temp_dir),
        schema=schema,
        option=collection_option,
    )

    assert coll is not None, "Failed to create and open collection"
    assert coll.path == str(collection_temp_dir)
    assert coll.schema.name == schema.name
    assert list(coll.schema.fields) == list(schema.fields)
    assert list(coll.schema.vectors) == list(schema.vectors)
    assert coll.option.read_only == collection_option.read_only
    assert coll.option.enable_mmap == collection_option.enable_mmap

    try:
        yield coll
    finally:
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                logging.warning(f"Warning: failed to destroy collection: {e}")


@pytest.fixture(scope="function")
def basic_collection(
    collection_temp_dir, basic_schema, collection_option
) -> Generator[Any, Any, Collection]:
    yield from create_collection_fixture(
        collection_temp_dir, basic_schema, collection_option
    )


@pytest.fixture(scope="function")
def collection_option():
    return CollectionOption(read_only=False, enable_mmap=True)


@pytest.fixture(scope="function")
def collection_temp_dir(tmp_path_factory):
    temp_dir = tmp_path_factory.mktemp("zvec")
    collection_path = temp_dir / "test_collection_path"
    return str(collection_path)


@pytest.fixture(scope="function")
def full_collection(
    collection_temp_dir,
    full_schema,
    collection_option,
    nullable: bool = True,
    has_index: bool = False,
) -> Generator[Any, Any, Collection]:
    yield from create_collection_fixture(
        collection_temp_dir, full_schema, collection_option
    )


@pytest.fixture(scope="function")
def full_collection_new(
    collection_temp_dir, full_schema_new, collection_option
) -> Generator[Any, Any, Collection]:
    yield from create_collection_fixture(
        collection_temp_dir, full_schema_new, collection_option
    )


@pytest.fixture(scope="function")
def full_collection_ivf(
    collection_temp_dir, full_schema_ivf, collection_option
) -> Generator[Any, Any, Collection]:
    yield from create_collection_fixture(
        collection_temp_dir, full_schema_ivf, collection_option
    )


@pytest.fixture(scope="function")
def full_collection_1024(
    collection_temp_dir, full_schema_1024, collection_option
) -> Generator[Any, Any, Collection]:
    yield from create_collection_fixture(
        collection_temp_dir, full_schema_1024, collection_option
    )


@pytest.fixture
def sample_field_list(nullable: bool = True, scalar_index_param=None, name_prefix=""):
    field_list = []
    for k, v in DEFAULT_SCALAR_FIELD_NAME.items():
        field_list.append(
            FieldSchema(
                f"{name_prefix}_{v}" if len(name_prefix) > 0 else v,
                k,
                nullable=nullable,
                index_param=scalar_index_param,
            )
        )
    return field_list


@pytest.fixture
def sample_vector_list(vector_index_param=None, name_prefix=""):
    vector_list = []
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        vector_list.append(
            VectorSchema(
                f"{name_prefix}_{v}" if len(name_prefix) > 0 else v,
                k,
                dimension=DEFAULT_VECTOR_DIMENSION,
                index_param=vector_index_param,
            )
        )
    return vector_list


================================================
FILE: python/tests/detail/params_helper.py
================================================
from zvec import (
    CollectionOption,
    IndexOption,
    OptimizeOption,
    InvertIndexParam,
    HnswIndexParam,
    IVFIndexParam,
    FlatIndexParam,
    AlterColumnOption,
    AddColumnOption,
    DataType,
    MetricType,
    QuantizeType,
)


VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP = {
    DataType.VECTOR_FP32: [
        HnswIndexParam(),
        HnswIndexParam(
            metric_type=MetricType.IP,
            m=16,
            ef_construction=100,
            quantize_type=QuantizeType.INT8,
        ),
        HnswIndexParam(
            metric_type=MetricType.COSINE,
            m=24,
            ef_construction=150,
            quantize_type=QuantizeType.INT4,
        ),
        HnswIndexParam(
            metric_type=MetricType.L2,
            m=32,
            ef_construction=200,
            quantize_type=QuantizeType.FP16,
        ),
        FlatIndexParam(),
        FlatIndexParam(metric_type=MetricType.IP, quantize_type=QuantizeType.INT4),
        FlatIndexParam(metric_type=MetricType.L2, quantize_type=QuantizeType.INT8),
        FlatIndexParam(metric_type=MetricType.COSINE, quantize_type=QuantizeType.FP16),
        IVFIndexParam(),
        IVFIndexParam(
            metric_type=MetricType.IP,
            quantize_type=QuantizeType.INT4,
            n_list=100,
            n_iters=10,
            use_soar=False,
        ),
        IVFIndexParam(
            metric_type=MetricType.L2,
            quantize_type=QuantizeType.INT8,
            n_list=200,
            n_iters=20,
            use_soar=True,
        ),
        IVFIndexParam(
            metric_type=MetricType.COSINE,
            quantize_type=QuantizeType.FP16,
            n_list=150,
            n_iters=15,
            use_soar=False,
        ),
    ],
    DataType.VECTOR_FP16: [
        HnswIndexParam(),
        FlatIndexParam(),
        # IVFIndexParam(),
    ],
    DataType.VECTOR_INT8: [
        HnswIndexParam(),
        FlatIndexParam(),
        # IVFIndexParam(),
    ],
    DataType.SPARSE_VECTOR_FP32: [
        HnswIndexParam(),
        FlatIndexParam(),
        HnswIndexParam(
            metric_type=MetricType.IP,
            m=16,
            ef_construction=100,
            quantize_type=QuantizeType.FP16,
        ),
    ],
    DataType.SPARSE_VECTOR_FP16: [
        HnswIndexParam(),
        FlatIndexParam(),
        HnswIndexParam(
            metric_type=MetricType.IP,
            m=16,
            ef_construction=100,
        ),
    ],
}

VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS = [
    (data_type, param)
    for data_type, params in VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP.items()
    for param in params
]

INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP = {
    DataType.VECTOR_FP32: [
        InvertIndexParam(),
    ],
    DataType.VECTOR_FP16: [
        InvertIndexParam(),
    ],
    DataType.VECTOR_INT8: [
        InvertIndexParam(),
    ],
    DataType.SPARSE_VECTOR_FP32: [
        HnswIndexParam(metric_type=MetricType.L2),
        FlatIndexParam(metric_type=MetricType.COSINE),
        IVFIndexParam(),
        InvertIndexParam(),
    ],
    DataType.SPARSE_VECTOR_FP16: [
        HnswIndexParam(metric_type=MetricType.L2),
        FlatIndexParam(metric_type=MetricType.COSINE),
        IVFIndexParam(),
        InvertIndexParam(),
    ],
}

INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS = [
    (data_type, param)
    for data_type, params in INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP.items()
    for param in params
]

COLLECTION_NAME_MAX_LENGTH = 64

COLLECTION_NAME_VALID_LIST = [
    "col",
    "C0llECTION",
    "Collection1",
    "collection_2",
    "123collection-",
    "a" * COLLECTION_NAME_MAX_LENGTH,
]

COLLECTION_NAME_INVALID_LIST = [
    "l",
    "1C",
    "",
    " ",
    None,
    "abcdefghijklmnopqrstuvwxzy123456abcdefghijklmnopqrstuvwxzy1234561",
    "test/",
    "!@#$%^&*()test",
]

FIELD_NAME_VALID_LIST = [
    "1",
    "12",
    "col",
    "ID",
    "name1",
    "Weigt_12-",
    "123age",
    "name_with_underscores",
    "123numeric_start",
    "name-with-dashes",
]

FIELD_NAME_INVALID_LIST = [
    "",
    " ",
    None,
    "abcdefghijklmnopqrstuvwxzy1234561",
    "test/",
    "!@#$%^&*()test",
    "name@with#special$chars",
    "name with spaces",
]

FIELD_LIST_MAX_LENGTH = 1024
VECTOR_LIST_MAX_LENGTH = 5
DENSE_VECTOR_MAX_DIMENSION = 20000
SPARSE_VECTOR_MAX_DIMENSION = 4096

FIELD_VECTOR_LIST_DIMENSION_VALID_LIST = [
    # field_list_len, vector_list_len, dimension
    (1, 1, 1),
    (2, 2, 512),
    (512, 3, 1024),
    (1024, 4, 20000),
]

FIELD_VECTOR_LIST_DIMENSION_INVALID_LIST = [
    # field_list_len, vector_list_len, dimension
    (1, 1, 0),
    (1, 1, -1),
    (1, 1, "1"),
    (1, 1, 20001),
]


INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG = "incompatible constructor arguments"
SCHEMA_VALIDATE_ERROR_MSG = "schema validate failed"
CREATE_READ_ONLY_ERROR_MSG = "Unable to create collection with read-only mode"
INCOMPATIBLE_FUNCTION_ERROR_MSG = "incompatible function arguments"
INVALID_PATH_ERROR_MSG = "path validate failed"
INDEX_NON_EXISTENT_COLUMN_ERROR_MSG = "not found in schema"
ACCESS_DESTROYED_COLLECTION_ERROR_MSG = "is already destroyed"
COLLECTION_PATH_NOT_EXIST_ERROR_MSG = "not exist"
NOT_SUPPORT_ADD_COLUMN_ERROR_MSG = "Only support basic numeric data type"
NOT_EXIST_COLUMN_TO_DROP_ERROR_MSG = "Column not exists"


================================================
FILE: python/tests/detail/support_helper.py
================================================
from zvec import (
    CollectionOption,
    IndexOption,
    OptimizeOption,
    InvertIndexParam,
    HnswIndexParam,
    IVFIndexParam,
    FlatIndexParam,
    DataType,
    IndexType,
    QuantizeType,
)

SUPPORT_SCALAR_DATA_TYPES = [
    DataType.BOOL,
    DataType.FLOAT,
    DataType.DOUBLE,
    DataType.INT32,
    DataType.INT64,
    DataType.UINT32,
    DataType.UINT64,
    DataType.STRING,
    DataType.ARRAY_BOOL,
    DataType.ARRAY_FLOAT,
    DataType.ARRAY_DOUBLE,
    DataType.ARRAY_INT32,
    DataType.ARRAY_INT64,
    DataType.ARRAY_UINT32,
    DataType.ARRAY_UINT64,
    DataType.ARRAY_STRING,
]

DEFAULT_SCALAR_FIELD_NAME = {
    DataType.BOOL: "bool_field",
    DataType.FLOAT: "float_field",
    DataType.DOUBLE: "double_field",
    DataType.INT32: "int32_field",
    DataType.INT64: "int64_field",
    DataType.UINT32: "uint32_field",
    DataType.UINT64: "uint64_field",
    DataType.STRING: "string_field",
    DataType.ARRAY_BOOL: "array_bool_field",
    DataType.ARRAY_FLOAT: "array_float_field",
    DataType.ARRAY_DOUBLE: "array_double_field",
    DataType.ARRAY_INT32: "array_int32_field",
    DataType.ARRAY_INT64: "array_int64_field",
    DataType.ARRAY_UINT32: "array_uint32_field",
    DataType.ARRAY_UINT64: "array_uint64_field",
    DataType.ARRAY_STRING: "array_string_field",
}

SUPPORT_SCALAR_INDEX_TYPES = [
    IndexType.INVERT,
]

SUPPORT_VECTOR_DATA_TYPES = [
    DataType.VECTOR_FP16,
    DataType.VECTOR_FP32,
    DataType.VECTOR_INT8,
    DataType.SPARSE_VECTOR_FP32,
    DataType.SPARSE_VECTOR_FP16,
]

SUPPORT_VECTOR_INDEX_TYPES = [
    IndexType.FLAT,
    IndexType.HNSW,
    IndexType.IVF,
]

DEFAULT_VECTOR_FIELD_NAME = {
    DataType.VECTOR_FP16: "vector_fp16_field",
    DataType.VECTOR_FP32: "vector_fp32_field",
    DataType.VECTOR_INT8: "vector_int8_field",
    DataType.SPARSE_VECTOR_FP32: "sparse_vector_fp32_field",
    DataType.SPARSE_VECTOR_FP16: "sparse_vector_fp16_field",
}

DEFAULT_VECTOR_DIMENSION = 128
VECTOR_DIMENSION_1024 = 4
SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP = {
    DataType.VECTOR_FP16: [IndexType.FLAT, IndexType.HNSW, IndexType.IVF],
    DataType.VECTOR_FP32: [IndexType.FLAT, IndexType.HNSW, IndexType.IVF],
    DataType.VECTOR_INT8: [IndexType.FLAT, IndexType.HNSW],
    DataType.SPARSE_VECTOR_FP32: [IndexType.FLAT, IndexType.HNSW],
    DataType.SPARSE_VECTOR_FP16: [IndexType.FLAT, IndexType.HNSW],
}

SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP_PARAMS = [
    (data_type, index_type)
    for data_type, index_types in SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP.items()
    for index_type in index_types
]

DEFAULT_INDEX_PARAMS = {
    IndexType.FLAT: FlatIndexParam(),
    IndexType.HNSW: HnswIndexParam(),
    IndexType.IVF: IVFIndexParam(),
    IndexType.INVERT: InvertIndexParam(),
}

SUPPORT_VECTOR_DATA_TYPE_QUANT_MAP = {
    DataType.VECTOR_FP32: [QuantizeType.FP16, QuantizeType.INT8, QuantizeType.INT4],
    DataType.SPARSE_VECTOR_FP32: [QuantizeType.FP16],
}

SUPPORT_ADD_COLUMN_DATA_TYPE = [
    DataType.INT32,
    DataType.UINT32,
    DataType.INT64,
    DataType.UINT64,
    DataType.FLOAT,
    DataType.DOUBLE,
]

NOT_SUPPORT_ADD_COLUMN_DATA_TYPE = [
    DataType.BOOL,
    DataType.STRING,
    DataType.ARRAY_BOOL,
    DataType.ARRAY_INT32,
    DataType.ARRAY_INT64,
    DataType.ARRAY_UINT32,
    DataType.ARRAY_UINT64,
    DataType.ARRAY_FLOAT,
    DataType.ARRAY_DOUBLE,
    DataType.ARRAY_STRING,
]


================================================
FILE: python/tests/detail/test_collection_concurrency.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import pytest
import threading
import numpy as np
import zvec

from zvec import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    Collection,
    Doc,
    DataType,
    FieldSchema,
    VectorSchema,
)


class TestCollectionConcurrency:
    @pytest.fixture(scope="function")
    def test_collection(self, tmp_path_factory):
        """Fixture to create a test collection"""
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
                FieldSchema("weight", DataType.FLOAT, nullable=True),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                ),
                VectorSchema(
                    "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
                ),
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert coll is not None, "Failed to create and open collection"

        yield coll

        # Clean up
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")

    def test_concurrent_read_write(self, test_collection: Collection):
        results = []

        def insert_docs(thread_id):
            try:
                docs = [
                    Doc(
                        id=f"{thread_id}_{i}",
                        fields={
                            "id": int(f"{thread_id}{i}"),
                            "name": f"thread_{thread_id}_doc_{i}",
                            "weight": float(i),
                        },
                        vectors={
                            "dense": np.random.random(128).tolist(),
                            "sparse": {1: float(i), 2: float(i * 2)},
                        },
                    )
                    for i in range(5)
                ]

                result = test_collection.insert(docs)
                results.append((thread_id, "insert", len(result)))
            except Exception as e:
                results.append((thread_id, "insert_exception", str(e)))

        def query_docs(thread_id):
            try:
                result = test_collection.query(filter="id > 0", topk=10)
                results.append((thread_id, "query", len(result)))
            except Exception as e:
                results.append((thread_id, "query_exception", str(e)))

        # Create threads for concurrent operations
        threads = []

        # Start insert threads
        for i in range(3):
            thread = threading.Thread(target=insert_docs, args=(i,))
            threads.append(thread)
            thread.start()

        # Start query threads
        for i in range(3):
            thread = threading.Thread(target=query_docs, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Analyze results
        insert_results = [r for r in results if r[1] == "insert"]
        query_results = [r for r in results if r[1] == "query"]

        logging.info(
            f"Concurrent read/write results - Inserts: {len(insert_results)}, Queries: {len(query_results)}"
        )

        # At least some operations should succeed
        assert len(insert_results) + len(query_results) > 0

    def test_concurrent_query(self, test_collection: Collection):
        # First insert some data
        docs = [
            Doc(
                id=f"{i}",
                fields={"id": i, "name": f"test_{i}", "weight": float(i)},
                vectors={
                    "dense": np.random.random(128).tolist(),
                    "sparse": {1: float(i), 2: float(i * 2)},
                },
            )
            for i in range(20)
        ]

        insert_result = test_collection.insert(docs)
        assert len(insert_result) == 20

        results = []

        def query_operation(thread_id):
            """Perform query operation from a thread"""
            try:
                result = test_collection.query(filter=f"id > {thread_id}", topk=5)
                results.append((thread_id, "query", len(result)))
            except Exception as e:
                results.append((thread_id, "query_exception", str(e)))

        # Create multiple threads for concurrent queries
        threads = []
        for i in range(5):
            thread = threading.Thread(target=query_operation, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Analyze results
        query_results = [r for r in results if r[1] == "query"]
        logging.info(f"Concurrent query results - Queries: {len(query_results)}")

        # All query operations should succeed
        assert len(query_results) == 5

    def test_concurrent_modifications(self, test_collection: Collection):
        # First insert some data
        docs = [
            Doc(
                id=f"{i}",
                fields={"id": i, "name": f"test_{i}", "weight": float(i)},
                vectors={
                    "dense": np.random.random(128).tolist(),
                    "sparse": {1: float(i), 2: float(i * 2)},
                },
            )
            for i in range(10)
        ]

        insert_result = test_collection.insert(docs)
        assert len(insert_result) == 10

        results = []

        def update_operation(thread_id):
            """Perform update operation from a thread"""
            try:
                # Each thread updates different documents
                update_docs = [
                    Doc(
                        id=f"{i}",
                        fields={
                            "id": i,
                            "name": f"updated_by_thread_{thread_id}",
                            "weight": float(i + thread_id),
                        },
                        vectors={
                            "dense": np.random.random(128).tolist(),
                            "sparse": {1: float(i) + 0.5, 2: float(i * 2) + 0.5},
                        },
                    )
                    for i in range(thread_id * 2, thread_id * 2 + 2)
                ]

                result = test_collection.update(update_docs)
                results.append((thread_id, "update", len(result)))
            except Exception as e:
                results.append((thread_id, "update_exception", str(e)))

        def delete_operation(thread_id):
            """Perform delete operation from a thread"""
            try:
                # Each thread deletes different documents
                delete_ids = [f"{thread_id * 2 + 2}", f"{thread_id * 2 + 3}"]
                result = test_collection.delete(delete_ids)
                results.append((thread_id, "delete", len(result)))
            except Exception as e:
                results.append((thread_id, "delete_exception", str(e)))

        # Create threads for concurrent operations
        threads = []

        # Start update threads
        for i in range(3):
            thread = threading.Thread(target=update_operation, args=(i,))
            threads.append(thread)
            thread.start()

        # Start delete threads
        for i in range(2):
            thread = threading.Thread(target=delete_operation, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Analyze results
        update_results = [r for r in results if r[1] == "update"]
        delete_results = [r for r in results if r[1] == "delete"]

        logging.info(
            f"Concurrent modification results - Updates: {len(update_results)}, Deletes: {len(delete_results)}"
        )

        # At least some operations should succeed
        assert len(update_results) + len(delete_results) > 0

    def test_read_write_locking(self, test_collection: Collection):
        # Perform operations that should be thread-safe
        docs = [
            Doc(
                id=f"{i}",
                fields={"id": i, "name": f"test_{i}", "weight": float(i)},
                vectors={
                    "dense": np.random.random(128).tolist(),
                    "sparse": {1: float(i), 2: float(i * 2)},
                },
            )
            for i in range(5)
        ]

        # Insert data
        insert_result = test_collection.insert(docs)
        assert len(insert_result) == 5

        # Concurrent operations should not cause data corruption
        results = []

        def mixed_operation(thread_id):
            """Perform mixed operations from a thread"""
            try:
                # Mix of read and write operations
                if thread_id % 2 == 0:
                    # Read operation
                    result = test_collection.fetch([f"{thread_id % 5}"])
                    results.append((thread_id, "read", len(result)))
                else:
                    # Write operation
                    doc = Doc(
                        id=f"{thread_id % 5}",
                        fields={
                            "id": thread_id % 5,
                            "name": f"mixed_op_{thread_id}",
                            "weight": float(thread_id),
                        },
                        vectors={
                            "dense": np.random.random(128).tolist(),
                            "sparse": {1: float(thread_id), 2: float(thread_id * 2)},
                        },
                    )
                    result = test_collection.upsert(doc)
                    results.append((thread_id, "write", len(result)))
            except Exception as e:
                results.append((thread_id, "exception", str(e)))

        # Create multiple threads
        threads = []
        for i in range(10):
            thread = threading.Thread(target=mixed_operation, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Verify that the collection is still in a consistent state
        final_result = test_collection.query()
        assert len(final_result) >= 0  # Should not crash or return corrupted data

    def test_race_condition_detection(self, test_collection: Collection):
        # Insert initial data
        docs = [
            Doc(
                id=f"{i}",
                fields={"id": i, "name": f"initial_{i}", "weight": float(i)},
                vectors={
                    "dense": np.random.random(128).tolist(),
                    "sparse": {1: float(i), 2: float(i * 2)},
                },
            )
            for i in range(10)
        ]

        insert_result = test_collection.insert(docs)
        assert len(insert_result) == 10

        # Perform many rapid concurrent operations
        operation_count = 100
        results = []

        def rapid_operation(op_id):
            """Perform rapid operations"""
            try:
                # Alternate between different types of operations
                if op_id % 4 == 0:
                    # Insert
                    doc = Doc(
                        id=f"rapid_{op_id}",
                        fields={
                            "id": op_id,
                            "name": f"rapid_{op_id}",
                            "weight": float(op_id),
                        },
                        vectors={
                            "dense": np.random.random(128).tolist(),
                            "sparse": {1: float(op_id), 2: float(op_id * 2)},
                        },
                    )
                    result = test_collection.insert(doc)
                    results.append(("insert", len(result)))
                elif op_id % 4 == 1:
                    # Update
                    doc = Doc(
                        id=f"{op_id % 10}",
                        fields={
                            "id": op_id % 10,
                            "name": f"rapid_update_{op_id}",
                            "weight": float(op_id),
                        },
                        vectors={
                            "dense": np.random.random(128).tolist(),
                            "sparse": {1: float(op_id), 2: float(op_id * 2)},
                        },
                    )
                    result = test_collection.update(doc)
                    results.append(("update", len(result)))
                elif op_id % 4 == 2:
                    # Query
                    result = test_collection.query(filter=f"id > {op_id % 5}", topk=3)
                    results.append(("query", len(result)))
                else:
                    # Fetch
                    result = test_collection.fetch([f"{op_id % 10}"])
                    results.append(("fetch", len(result)))
            except Exception as e:
                results.append(("exception", str(e)))

        # Create many threads for rapid concurrent operations
        threads = []
        for i in range(operation_count):
            thread = threading.Thread(target=rapid_operation, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Verify collection is still functional
        final_query = test_collection.query()
        assert len(final_query) >= 0  # Should not be corrupted

        logging.info(
            f"Rapid concurrent operations completed - Total operations: {len(results)}"
        )


================================================
FILE: python/tests/detail/test_collection_create_and_open.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import threading
import os

from distance_helper import *
from fixture_helper import *
from doc_helper import *
from params_helper import *


def check_collection_info(
    coll: Collection, schema: CollectionSchema, option: CollectionOption, path: str
):
    assert coll is not None, "Failed to create and open collection"
    assert coll.path == path
    assert coll.schema.name == schema.name
    assert list(coll.schema.fields) == list(schema.fields)
    assert list(coll.schema.vectors) == list(schema.vectors)
    assert coll.option.read_only == option.read_only
    assert coll.option.enable_mmap == option.enable_mmap


def check_collection_basic(coll: Collection, optimize: bool = False):
    schema = coll.schema

    docs = [generate_doc(i, schema) for i in range(10)]

    results = coll.insert(docs=docs)
    assert len(results) == len(docs)
    for result in results:
        assert result.ok()

    assert coll.stats.doc_count == len(docs)

    def check_fetch_query():
        results = coll.fetch([str(i) for i in range(len(docs))])
        assert len(results) == len(docs)
        for i in range(len(docs)):
            assert str(i) in results

        results = coll.query()
        assert len(results) == len(docs)

    check_fetch_query()

    if optimize:
        coll.optimize()
        check_fetch_query()


def check_collection_full(coll: Collection):
    test_doc = generate_doc(1, coll.schema)

    insert_result = coll.insert(test_doc)
    assert insert_result.ok()

    stats = coll.stats
    assert stats.doc_count == 1

    fetched_docs = coll.fetch(ids=["1"])
    assert len(fetched_docs) == 1
    assert "1" in fetched_docs
    assert fetched_docs["1"] is not None
    assert is_doc_equal(fetched_docs["1"], test_doc, coll.schema)

    query_result = coll.query()
    assert len(query_result) == 1

    updated_doc = Doc(
        id="1",
        fields={"int32_field": 1},
        vectors={"vector_fp32_field": [0.2] * 128},
    )
    update_result = coll.update(updated_doc)
    assert update_result.ok()

    upserted_doc = generate_doc(1, coll.schema)
    upsert_result = coll.upsert(upserted_doc)
    assert upsert_result.ok()

    # 8. Delete document
    delete_result = coll.delete("1")
    assert delete_result.ok()

    # Verify document was deleted
    stats = coll.stats
    assert stats.doc_count == 0


valid_collection_options = [
    # (read_only, enable_mmap)
    (False, True),
    (False, False),
]
invalid_collection_options = [
    # (read_only, enable_mmap)
    (True, True),
    (True, False),
]
duplicate_names_test = [
    ("field1", "field1", "vector1", "vector2"),
    ("field1", "field2", "vector1", "vector1"),
    (
        "shared_name1",
        "shared_name2",
        "shared_name1",
        "shared_name2",
    ),
]
long_names = [
    "a" * 100,  # 100 characters
    "b" * 200,  # 200 characters
]

valid_path_list = [
    "/tmp/nonexistent/directory/test_collection",
    "test/collection/with/slashes",
]
invalid_path_list = [
    "invalid:path",
    "",
    "test_collection_with_spaces ",
    "test@#$%collection",
]


class TestCreateAndOpen:
    @pytest.mark.parametrize("collection_name", COLLECTION_NAME_VALID_LIST)
    def test_valid_collection_name(
        self,
        collection_temp_dir,
        collection_name,
        collection_option,
        sample_field_list,
        sample_vector_list,
    ):
        collection_schema = zvec.CollectionSchema(
            name=collection_name,
            fields=sample_field_list,
            vectors=sample_vector_list,
        )

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=collection_schema,
            option=collection_option,
        )

        check_collection_info(
            coll, collection_schema, collection_option, collection_temp_dir
        )
        check_collection_basic(coll)

        coll.destroy()

    @pytest.mark.parametrize("collection_name", COLLECTION_NAME_INVALID_LIST)
    def test_invalid_collection_name(
        self,
        collection_temp_dir,
        collection_name,
        collection_option,
        sample_field_list,
        sample_vector_list,
    ):
        with pytest.raises(Exception) as exc_info:
            collection_schema = zvec.CollectionSchema(
                name=collection_name,
                fields=sample_field_list,
                vectors=sample_vector_list,
            )

            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize("name_prefix", FIELD_NAME_VALID_LIST)
    def test_valid_field_vector_name(
        self,
        collection_temp_dir,
        collection_option,
        name_prefix,
        sample_field_list,
        sample_vector_list,
    ):
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=sample_field_list,
            vectors=sample_vector_list,
        )

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=collection_schema,
            option=collection_option,
        )

        check_collection_info(
            coll, collection_schema, collection_option, collection_temp_dir
        )
        check_collection_basic(coll)

        coll.destroy()

    @pytest.mark.parametrize("field_name", FIELD_NAME_INVALID_LIST)
    def test_invalid_field_name(
        self, collection_temp_dir, collection_option, field_name
    ):
        with pytest.raises(Exception) as exc_info:
            field_list = [FieldSchema(field_name, DataType.STRING)]
            vector_list = [
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ]

            collection_schema = zvec.CollectionSchema(
                name="collection_name", fields=field_list, vectors=vector_list
            )

            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize("vector_name", FIELD_NAME_INVALID_LIST)
    def test_invalid_vector_name(
        self, collection_temp_dir, collection_option, vector_name
    ):
        with pytest.raises(Exception) as exc_info:
            field_list = [
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                )
            ]
            vector_list = [
                VectorSchema(vector_name, DataType.VECTOR_FP32, dimension=128)
            ]

            collection_schema = zvec.CollectionSchema(
                name="collection_name", fields=field_list, vectors=vector_list
            )

            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize(
        "field_list_len,vector_list_len,dimension",
        FIELD_VECTOR_LIST_DIMENSION_VALID_LIST,
    )
    def test_valid_field_vector_size_dimension(
        self,
        collection_temp_dir,
        collection_option,
        field_list_len,
        vector_list_len,
        dimension,
    ):
        field_list = []
        vector_list = []
        for i in range(0, field_list_len):
            field_list.append(
                FieldSchema("id_" + str(i), DataType.INT64, nullable=True)
            )

        for i in range(0, vector_list_len):
            vector_list.append(
                VectorSchema(
                    "dense_vector_" + str(i),
                    DataType.VECTOR_FP32,
                    dimension=dimension,
                    index_param=HnswIndexParam(),
                )
            )

        collection_schema = zvec.CollectionSchema(
            name="test_dense_vector_list", fields=field_list, vectors=vector_list
        )

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=collection_schema,
            option=collection_option,
        )

        check_collection_info(
            coll, collection_schema, collection_option, collection_temp_dir
        )
        check_collection_basic(coll)

        coll.destroy()

    @pytest.mark.parametrize(
        "field_list_len,vector_list_len,dimension",
        FIELD_VECTOR_LIST_DIMENSION_INVALID_LIST,
    )
    def test_invalid_field_vector_size_dimension(
        self,
        collection_temp_dir,
        collection_option,
        vector_list_len,
        field_list_len,
        dimension,
    ):
        with pytest.raises(Exception) as exc_info:
            field_list = []
            vector_list = []
            for i in range(0, field_list_len):
                field_list.append(
                    FieldSchema(
                        "id_" + str(i),
                        DataType.INT64,
                        nullable=False,
                    )
                )

            for i in range(0, vector_list_len):
                vector_list.append(
                    VectorSchema(
                        "dense_vector_" + str(i),
                        DataType.VECTOR_FP32,
                        dimension=dimension,
                        index_param=HnswIndexParam(),
                    )
                )

            collection_schema = zvec.CollectionSchema(
                name="test_dense_vector_list", fields=field_list, vectors=vector_list
            )

            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    def test_valid_single_vector_field_construction(
        self, collection_temp_dir, collection_option
    ):
        field = FieldSchema(
            "id",
            DataType.INT64,
            nullable=True,
            index_param=InvertIndexParam(enable_range_optimization=True),
        )

        vector = VectorSchema(
            "dense_vector",
            DataType.VECTOR_FP32,
            dimension=128,
            index_param=HnswIndexParam(),
        )

        collection_schema = zvec.CollectionSchema(
            name="test_single_dense_vector_non_list",
            fields=field,
            vectors=vector,  # Non-list form
        )

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=collection_schema,
            option=collection_option,
        )

        check_collection_info(
            coll, collection_schema, collection_option, collection_temp_dir
        )
        check_collection_basic(coll)
        coll.destroy()

    def test_collection_concurrent_create(
        self, collection_temp_dir, basic_schema, collection_option
    ):
        results = []
        errors = []
        lock = threading.Lock()

        # Function to be executed by each thread
        def create_collection_thread(thread_id):
            try:
                coll = zvec.create_and_open(
                    path=collection_temp_dir,
                    schema=basic_schema,
                    option=collection_option,
                )
                with lock:
                    results.append((thread_id, coll))
            except Exception as e:
                with lock:
                    errors.append((thread_id, str(e)))

        threads = []
        for i in range(5):
            thread = threading.Thread(target=create_collection_thread, args=(i,))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()
        assert len(results) == 1, (
            f"Expected exactly one successful creation, but got {len(results)}"
        )
        assert len(errors) == 4, (
            f"Expected exactly four failures, but got {len(errors)}"
        )

        successful_thread_id, successful_collection = results[0]
        assert successful_collection is not None, (
            "Successful creation should return a valid collection"
        )
        assert successful_collection.path == collection_temp_dir, (
            "Collection path mismatch"
        )

    def test_create_open_loop(
        self, collection_temp_dir, collection_option, full_schema
    ):
        for cycle in range(10):
            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=full_schema,
                option=collection_option,
            )
            assert coll is not None, (
                f"Failed to create and open collection in cycle {cycle}"
            )
            assert coll.path == collection_temp_dir, (
                f"Collection path mismatch in cycle {cycle}"
            )

            del coll

            reopened_coll = zvec.open(
                path=collection_temp_dir, option=collection_option
            )
            assert reopened_coll is not None, (
                f"Failed to reopen collection in cycle {cycle}"
            )
            assert reopened_coll.path == collection_temp_dir, (
                f"Reopened collection path mismatch in cycle {cycle}"
            )

            check_collection_full(reopened_coll)

            reopened_coll.destroy()

    @pytest.mark.parametrize(
        "data_type, index_param", VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS
    )
    def test_valid_vector_index_params(
        self,
        data_type,
        index_param,
        single_vector_schema_with_index_param,
        collection_temp_dir,
        collection_option,
    ):
        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=single_vector_schema_with_index_param,
            option=collection_option,
        )

        check_collection_info(
            coll,
            single_vector_schema_with_index_param,
            collection_option,
            collection_temp_dir,
        )

        check_collection_basic(coll, True)

    @pytest.mark.parametrize(
        "data_type, index_param", INVALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS
    )
    def test_invalid_vector_index_params(
        self,
        data_type,
        index_param,
        single_vector_schema_with_index_param,
        collection_temp_dir,
        collection_option,
    ):
        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=single_vector_schema_with_index_param,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    def test_open_concurrent_same_path(self, tmp_path_factory, collection_option):
        """Test concurrent opening of the same collection path.

        - Multi-threading concurrency: 5 threads simultaneously open the same collection
        - Result verification: Verify that only one can open successfully, others must fail
        """
        # Create a temporary directory and path for the collection
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "concurrent_open_test_collection"

        # First, create a collection that we'll try to open concurrently
        field_list = [
            FieldSchema(
                "id",
                DataType.INT64,
                nullable=False,
                index_param=InvertIndexParam(enable_range_optimization=True),
            ),
            FieldSchema(
                "name", DataType.STRING, nullable=False, index_param=InvertIndexParam()
            ),
        ]

        vector_list = [
            VectorSchema(
                "dense_vector",
                DataType.VECTOR_FP32,
                dimension=128,
                index_param=HnswIndexParam(),
            )
        ]

        collection_schema = zvec.CollectionSchema(
            name="concurrent_open_test_collection",
            fields=field_list,
            vectors=vector_list,
        )

        # Create the collection first
        coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        # Close the collection so we can test opening it
        if hasattr(coll, "close") and coll is not None:
            coll.close()

        # Shared variables to collect results from threads
        results = []
        errors = []

        # Lock for thread-safe operations
        lock = threading.Lock()
        # Clean up the created collection reference
        del coll

        # Function to be executed by each thread
        def open_collection_thread(thread_id):
            try:
                reopened_coll = zvec.open(
                    path=str(collection_path), option=collection_option
                )
                with lock:
                    results.append((thread_id, reopened_coll))
                # Clean up the collection if opened successfully
                if hasattr(reopened_coll, "close") and reopened_coll is not None:
                    reopened_coll.close()
            except Exception as e:
                with lock:
                    errors.append((thread_id, str(e)))

        # Create and start 5 threads
        threads = []
        for i in range(5):
            thread = threading.Thread(target=open_collection_thread, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Verify results:
        # 1. Only one open should succeed (exactly one collection in results)
        # 2. Others should fail (4 errors in errors)
        assert len(results) == 1, (
            f"Expected exactly one successful open, but got {len(results)}"
        )
        assert len(errors) == 4, (
            f"Expected exactly four failures, but got {len(errors)}"
        )

        # Additional verification: check that the successful open has a valid collection
        successful_thread_id, successful_collection = results[0]
        assert successful_collection is not None, (
            "Successful open should return a valid collection"
        )
        assert successful_collection.path == str(collection_path), (
            "Collection path mismatch"
        )

    @pytest.mark.parametrize("read_only,enable_mmap", valid_collection_options)
    def test_valid_option(
        self, collection_temp_dir, basic_schema, read_only, enable_mmap
    ):
        option = CollectionOption(read_only=read_only, enable_mmap=enable_mmap)

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=basic_schema,
            option=option,
        )

        check_collection_info(coll, basic_schema, option, collection_temp_dir)
        check_collection_basic(coll)

        coll.destroy()

    def test_valid_none_option(self, collection_temp_dir, basic_schema):
        zvec.create_and_open(
            path=collection_temp_dir,
            schema=basic_schema,
            option=None,
        )

    @pytest.mark.parametrize("read_only,enable_mmap", invalid_collection_options)
    def test_invalid_option(
        self, collection_temp_dir, basic_schema, read_only, enable_mmap
    ):
        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=basic_schema,
                option=CollectionOption(read_only=read_only, enable_mmap=enable_mmap),
            )

        assert CREATE_READ_ONLY_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize(
        "field_name1,field_name2,vector_name1,vector_name2",
        duplicate_names_test,
    )
    def test_duplicate_field_names(
        self,
        collection_temp_dir,
        collection_option,
        field_name1,
        field_name2,
        vector_name1,
        vector_name2,
    ):
        with pytest.raises(Exception) as exc_info:
            collection_schema = zvec.CollectionSchema(
                name="test_collection",
                fields=[
                    FieldSchema(
                        field_name1,
                        DataType.INT64,
                        nullable=False,
                        index_param=InvertIndexParam(enable_range_optimization=True),
                    ),
                    FieldSchema(
                        field_name2,
                        DataType.INT64,
                        nullable=False,
                        index_param=InvertIndexParam(enable_range_optimization=True),
                    ),
                ],
                vectors=[
                    VectorSchema(
                        vector_name1,
                        DataType.VECTOR_FP32,
                        dimension=128,
                        index_param=HnswIndexParam(),
                    ),
                    VectorSchema(
                        vector_name2,
                        DataType.VECTOR_FP32,
                        dimension=128,
                        index_param=HnswIndexParam(),
                    ),
                ],
            )

            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize("long_name", long_names)
    def test_invalid_long_field_names(
        self, collection_option, collection_temp_dir, long_name
    ):
        collection_schema = zvec.CollectionSchema(
            name=long_name,
            fields=[
                FieldSchema(
                    long_name + "_field",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
            ],
            vectors=[
                VectorSchema(
                    long_name + "_vector",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    def test_invalid_empty_fields_and_vectors(
        self, collection_temp_dir, collection_option
    ):
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[],  # Empty fields
            vectors=[],  # Empty vectors
        )

        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=collection_temp_dir,
                schema=collection_schema,
                option=collection_option,
            )

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value), str(exc_info.value)

    @pytest.mark.parametrize("valid_path", valid_path_list)
    def test_valid_path(self, basic_schema, collection_option, valid_path):
        if os.path.exists(valid_path):
            import shutil

            shutil.rmtree(valid_path)

        coll = zvec.create_and_open(
            path=valid_path, schema=basic_schema, option=collection_option
        )

        check_collection_info(coll, basic_schema, collection_option, valid_path)

        coll.destroy()

    @pytest.mark.parametrize("invalid_path", invalid_path_list)
    def test_invalid_path(self, basic_schema, collection_option, invalid_path):
        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=invalid_path, schema=basic_schema, option=collection_option
            )

        assert INVALID_PATH_ERROR_MSG in str(exc_info.value), str(exc_info.value)


================================================
FILE: python/tests/detail/test_collection_ddl.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from distance_helper import *
from fixture_helper import *
from doc_helper import *
from params_helper import *


class TestDDL:
    def test_collection_stats(self, basic_collection: Collection):
        assert basic_collection.stats is not None
        stats = basic_collection.stats
        assert stats.doc_count == 0
        assert len(stats.index_completeness) == 2
        assert stats.index_completeness["dense"] == 1
        assert stats.index_completeness["sparse"] == 1

    def test_collection_destroy(
        self, basic_collection: Collection, collection_temp_dir, collection_option
    ):
        doc = generate_doc(1, basic_collection.schema)

        result = basic_collection.insert(doc)
        assert bool(result)
        assert result.ok()

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        basic_collection.destroy()

        with pytest.raises(Exception) as exc_info:
            stats = basic_collection.stats
        assert ACCESS_DESTROYED_COLLECTION_ERROR_MSG in str(exc_info.value)

        with pytest.raises(Exception) as exc_info:
            zvec.open(path=collection_temp_dir, option=collection_option)
        assert COLLECTION_PATH_NOT_EXIST_ERROR_MSG in str(exc_info.value)

    def test_collection_flush(self, basic_collection: Collection):
        doc = generate_doc(1, basic_collection.schema)

        result = basic_collection.insert(doc)
        assert bool(result)
        assert result.ok()

        basic_collection.flush()

        fetched_docs = basic_collection.fetch(["1"])
        assert "1" in fetched_docs
        assert fetched_docs["1"].id == "1"


class TestIndexDDL:
    @pytest.mark.parametrize("field_name", DEFAULT_SCALAR_FIELD_NAME.values())
    @pytest.mark.parametrize("index_type", SUPPORT_SCALAR_INDEX_TYPES)
    def test_scalar_index_operation(
        self,
        full_collection: Collection,
        field_name: str,
        index_type: IndexType,
    ):
        # INSERT 0~5 Doc
        docs = [generate_doc(i, full_collection.schema) for i in range(5)]

        result = full_collection.insert(docs)
        assert len(result) == 5
        for item in result:
            assert item.ok()

        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 5

        if field_name in ["bool_field"]:
            query_filter = f"{field_name} = true"
        elif field_name in ["double_field", "float_field"]:
            query_filter = f"{field_name} >= 3.0"
        elif field_name in [
            "int32_field",
            "int64_field",
            "uint32_field",
            "uint64_field",
        ]:
            query_filter = f"{field_name} >= 30"
        elif field_name in ["string_field"]:
            query_filter = f"{field_name} >= 'test_3'"
        elif field_name in ["array_bool_field"]:
            query_filter = f"{field_name} contain_any (false)"
        elif field_name in ["array_double_field", "array_float_field"]:
            query_filter = f"{field_name} contain_any (3.0, 4.0)"
        elif field_name in [
            "array_int64_field",
            "array_int32_field",
            "array_uint64_field",
            "array_uint32_field",
        ]:
            query_filter = f"{field_name} contain_any (3, 4)"
        elif field_name == "array_string_field":
            query_filter = f"{field_name} contain_any ('test_3', 'test_4')"
        else:
            assert False, f"Unsupported field type for index creation: {field_name}"

        query_result_before = full_collection.query(filter=query_filter, topk=10)

        if index_type not in DEFAULT_INDEX_PARAMS:
            pytest.fail(f"Unsupported index type for index creation: {index_type}")
        index_param = DEFAULT_INDEX_PARAMS[index_type]

        full_collection.create_index(
            field_name=field_name, index_param=index_param, option=IndexOption()
        )
        stats_after_create = full_collection.stats
        assert stats_after_create is not None
        assert stats_after_create.doc_count == 5

        query_result_after = full_collection.query(filter=query_filter, topk=10)

        assert len(query_result_before) == len(query_result_after), (
            f"Query result count mismatch for {field_name} with index type {index_type}: before={len(query_result_before)}, after={len(query_result_after)}"
        )

        before_ids = set(doc.id for doc in query_result_before)
        after_ids = set(doc.id for doc in query_result_after)
        assert before_ids == after_ids, (
            f"Query result IDs mismatch for {field_name} with index type {index_type}: before={before_ids}, after={after_ids}"
        )

        # INSERT 5~8 Doc
        new_docs = [generate_doc(i, full_collection.schema) for i in range(5, 8)]

        result = full_collection.insert(new_docs)
        assert len(result) == 3
        for item in result:
            assert item.ok()

        stats_after_insert1 = full_collection.stats
        assert stats_after_insert1 is not None
        assert stats_after_insert1.doc_count == 8

        fetched_docs = full_collection.fetch([f"{i}" for i in range(5, 8)])
        assert len(fetched_docs) == 3

        for i in range(5, 8):
            doc_id = f"{i}"
            assert doc_id in fetched_docs

        query_result = full_collection.query(filter=query_filter, topk=20)
        assert len(query_result) >= len(query_result_before)

        full_collection.drop_index(field_name=field_name)

        # Insert 8~10 Doc
        more_docs = [generate_doc(i, full_collection.schema) for i in range(8, 10)]

        result = full_collection.insert(more_docs)
        assert len(result) == 2
        for item in result:
            assert item.ok()

        stats_after_insert2 = full_collection.stats
        assert stats_after_insert2 is not None
        assert stats_after_insert2.doc_count == 10

        fetched_docs = full_collection.fetch([f"{i}" for i in range(8, 10)])
        assert len(fetched_docs) == 2

        for i in range(8, 10):
            doc_id = f"{i}"
            assert doc_id in fetched_docs

        query_result = full_collection.query(filter=query_filter, topk=20)
        assert len(query_result) >= len(query_result_before)

        final_stats = full_collection.stats
        assert final_stats is not None
        assert final_stats.doc_count == 10
        full_collection.destroy()

    @pytest.mark.parametrize("field_name", DEFAULT_SCALAR_FIELD_NAME.values())
    @pytest.mark.parametrize("index_type", SUPPORT_SCALAR_INDEX_TYPES)
    def test_duplicate_create_index(
        self, full_collection: Collection, field_name: str, index_type: IndexType
    ):
        docs = [generate_doc(i, full_collection.schema) for i in range(10)]

        result = full_collection.insert(docs)
        assert bool(result)
        for item in result:
            assert item.ok()

        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 10

        if field_name in ["bool_field"]:
            query_filter = f"{field_name} = true"
        elif field_name in ["double_field", "float_field"]:
            query_filter = f"{field_name} >= 3.0"
        elif field_name in [
            "int32_field",
            "int64_field",
            "uint32_field",
            "uint64_field",
        ]:
            query_filter = f"{field_name} >= 30"
        elif field_name in ["string_field"]:
            query_filter = f"{field_name} >= 'test_3'"
        elif field_name in ["array_bool_field"]:
            query_filter = f"{field_name} contain_any (false)"
        elif field_name in ["array_double_field", "array_float_field"]:
            query_filter = f"{field_name} contain_any (3.0, 4.0)"
        elif field_name in [
            "array_int64_field",
            "array_int32_field",
            "array_uint64_field",
            "array_uint32_field",
        ]:
            query_filter = f"{field_name} contain_any (3, 4)"
        elif field_name == "array_string_field":
            query_filter = f"{field_name} contain_any ('test_3', 'test_4')"
        else:
            assert False, f"Unsupported field type for index creation: {field_name}"

        query_result_before = full_collection.query(filter=query_filter, topk=5)

        if index_type not in DEFAULT_INDEX_PARAMS:
            pytest.fail(f"Unsupported index type for index creation: {index_type}")
        index_param = DEFAULT_INDEX_PARAMS[index_type]

        full_collection.create_index(
            field_name=field_name, index_param=index_param, option=IndexOption()
        )

        query_result_after = full_collection.query(filter=query_filter, topk=5)

        assert len(query_result_before) == len(query_result_after), (
            f"Query result count mismatch: before={len(query_result_before)}, after={len(query_result_after)}"
        )

        before_ids = set(doc.id for doc in query_result_before)
        after_ids = set(doc.id for doc in query_result_after)
        assert before_ids == after_ids, (
            f"Query result IDs mismatch: before={before_ids}, after={after_ids}"
        )

        full_collection.create_index(
            field_name=field_name, index_param=index_param, option=IndexOption()
        )

    def test_optimize(self, full_collection: Collection):
        docs = [generate_doc(i, full_collection.schema) for i in range(10)]

        result = full_collection.insert(docs)
        assert bool(result)
        for item in result:
            assert item.ok()

        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 10

        full_collection.optimize(option=OptimizeOption())

        fetched_docs = full_collection.fetch(["1"])
        assert "1" in fetched_docs
        assert fetched_docs["1"].id == "1"

    @pytest.mark.parametrize(
        "vector_type, index_type", SUPPORT_VECTOR_DATA_TYPE_INDEX_MAP_PARAMS
    )
    def test_vector_index_operation(
        self,
        full_collection: Collection,
        vector_type: DataType,
        index_type: IndexType,
    ):
        vector_field_name = DEFAULT_VECTOR_FIELD_NAME[vector_type]

        docs = [generate_doc(i, full_collection.schema) for i in range(5)]

        result = full_collection.insert(docs)
        assert len(result) == 5, (
            f"Expected 5 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}"
        )
        for i, item in enumerate(result):
            assert item.ok(), (
                f"Before create_index,result={result},Insertion result {i} is not OK for vector type {vector_type} and index type {index_type} and result={result}"
            )

        stats = full_collection.stats
        assert stats is not None, (
            f"stats is None for vector type {vector_type} and index type {index_type}"
        )
        assert stats.doc_count == 5, (
            f"doc_count!=5 for vector type {vector_type} and index type {index_type}"
        )

        if index_type not in DEFAULT_INDEX_PARAMS:
            pytest.fail(
                f"Unsupported index type {index_type} for vector type {vector_type} in test_vector_all_data_types_index_create_drop_validation"
            )
        index_param = DEFAULT_INDEX_PARAMS[index_type]

        full_collection.create_index(
            field_name=vector_field_name,
            index_param=index_param,
            option=IndexOption(),
        )

        stats_after_create = full_collection.stats
        assert stats_after_create is not None, (
            f"stats_after_create_index is None for vector type {vector_type} and index type {index_type}"
        )

        new_docs = [generate_doc(i, full_collection.schema) for i in range(5, 8)]

        result = full_collection.insert(new_docs)
        assert len(result) == 3, (
            f"Expected 3 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}"
        )
        for i, item in enumerate(result):
            assert item.ok(), (
                f"Before drop_index,result={result},BInsertion result {i} is not OK for vector type {vector_type} and index type {index_type} and "
            )

        stats_after_insert1 = full_collection.stats
        assert stats_after_insert1 is not None, (
            f"stats_after_insert1 is None for vector type {vector_type} and index type {index_type}"
        )
        assert stats_after_insert1.doc_count == 8, (
            f"Expected 8 documents, got {stats_after_insert1.doc_count} for vector type {vector_type} and index type {index_type}"
        )

        fetched_docs = full_collection.fetch([f"{i}" for i in range(5, 8)])
        assert len(fetched_docs) == 3, (
            f"Expected 3 fetched documents, got {len(fetched_docs)} for vector type {vector_type} and index type {index_type}"
        )

        for i in range(5, 8):
            doc_id = f"{i}"
            assert doc_id in fetched_docs, (
                f"Document ID {doc_id} not found in fetched results for vector type {vector_type} and index type {index_type}"
            )
            assert fetched_docs[doc_id].id == doc_id, (
                f"Document {doc_id} has incorrect ID field value for vector type {vector_type} and index type {index_type}"
            )

        full_collection.drop_index(field_name=vector_field_name)

        more_docs = [generate_doc(i, full_collection.schema) for i in range(8, 10)]
        result = full_collection.insert(more_docs)
        assert len(result) == 2, (
            f"Expected 2 insertion results, got {len(result)} for vector type {vector_type} and index type {index_type}"
        )
        for i, item in enumerate(result):
            assert item.ok(), (
                f"After drop_index,Insertion result {i} is not OK for vector type {vector_type} and index type {index_type} and result={result}"
            )

        # Verify document count after second insertion
        stats_after_insert2 = full_collection.stats
        assert stats_after_insert2 is not None, (
            f"stats_after_insert2 is None for vector type {vector_type} and index type {index_type}"
        )
        assert stats_after_insert2.doc_count == 10, (
            f"Expected 10 documents, got {stats_after_insert2.doc_count} for vector type {vector_type} and index type {index_type}"
        )

        # Fetch data
        fetched_docs = full_collection.fetch([f"{i}" for i in range(8, 10)])
        assert len(fetched_docs) == 2, (
            f"Expected 2 fetched documents, got {len(fetched_docs)} for vector type {vector_type} and index type {index_type}"
        )

        # Verify fetched documents have correct data
        for i in range(8, 10):
            doc_id = f"{i}"
            assert doc_id in fetched_docs, (
                f"Document ID {doc_id} not found in fetched results for vector type {vector_type} and index type {index_type}"
            )
            assert fetched_docs[doc_id].id == doc_id, (
                f"Document {doc_id} has incorrect ID field value for vector type {vector_type} and index type {index_type}"
            )

        # Final verification
        final_stats = full_collection.stats
        assert final_stats is not None, (
            f"final_stats is None for vector type {vector_type} and index type {index_type}"
        )
        assert final_stats.doc_count == 10, (
            f"Expected 10 documents, got {final_stats.doc_count} for vector type {vector_type} and index type {index_type}"
        )
        full_collection.destroy()

    @staticmethod
    def create_collection(
        collection_path, collection_option: CollectionOption
    ) -> Collection:
        schema = CollectionSchema(
            name="test_collection_invalid_vector_index",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=True,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                ),
            ],
        )
        coll = zvec.create_and_open(
            path=collection_path, schema=schema, option=collection_option
        )
        assert coll is not None, "Failed to create and open collection"
        return coll

    @staticmethod
    def check_error_message(exc_info, invalid_name):
        if type(invalid_name) is str:
            assert INDEX_NON_EXISTENT_COLUMN_ERROR_MSG in str(exc_info.value), (
                "Error message is unreasonable: e=" + str(exc_info.value)
            )
        else:
            assert INCOMPATIBLE_FUNCTION_ERROR_MSG in str(exc_info.value), (
                "Error message is unreasonable: e=" + str(exc_info.value)
            )

    @pytest.mark.parametrize(
        "invalid_field_name,invalid_vector_name",
        [
            ("", ""),  # Empty string
            (" ", " "),  # Space only
            ("v" * 33, "v" * 33),  # Too long (33 characters, exceeds 32)
            ("vector name", "vector_name"),  # Contains space
            ("vector@name", "vector@name"),  # Contains special character
            ("vector/name", "vector/name"),  # Contains slash
            ("vector\\name", "vector\\name"),  # Contains backslash
            ("vector.name", "vector.name"),  # Contains dot
            ("vector$data", "vector$data"),  # Contains dollar sign
            ("vector+name", "vector+name"),  # Contains plus sign
            ("vector=name", "vector=name"),  # Contains equals sign
            (None, None),  # None value,
            (1, 1),
            (1.1, 1.1),
        ],
    )
    def test_invalid_field_and_vector_name(
        self,
        collection_temp_dir,
        collection_option: CollectionOption,
        invalid_field_name: Any,
        invalid_vector_name: Any,
    ):
        coll = self.create_collection(collection_temp_dir, collection_option)
        with pytest.raises(Exception) as exc_info:
            coll.create_index(
                field_name=invalid_vector_name,
                index_param=HnswIndexParam(),
                option=IndexOption(),
            )
        self.check_error_message(exc_info, invalid_vector_name)
        with pytest.raises(Exception) as exc_info:
            coll.create_index(
                field_name=invalid_field_name,
                index_param=InvertIndexParam(),
                option=IndexOption(),
            )
        self.check_error_message(exc_info, invalid_field_name)
        coll.destroy()
        coll = self.create_collection(collection_temp_dir, collection_option)
        with pytest.raises(Exception) as exc_info:
            coll.drop_index(field_name=invalid_vector_name)
        self.check_error_message(exc_info, invalid_vector_name)
        with pytest.raises(Exception) as exc_info:
            coll.drop_index(field_name=invalid_field_name)
        self.check_error_message(exc_info, invalid_field_name)
        coll.destroy()

    @pytest.mark.parametrize(
        "field_name,vector_name",
        [
            ("2", "3"),
            ("col", "co1"),
            ("ID", "IM"),
            ("name-1", "name2"),
            ("Weigt_12", "Weigt_13"),
            ("123age", "123agl"),
        ],
    )
    def test_valid_field_and_vector_name(
        self,
        collection_temp_dir,
        collection_option: CollectionOption,
        field_name: str,
        vector_name: str,
    ):
        schema = zvec.CollectionSchema(
            name="test_index_names",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(field_name, DataType.STRING, nullable=True),
            ],
            vectors=[
                VectorSchema(
                    vector_name,
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        coll = zvec.create_and_open(
            path=collection_temp_dir, schema=schema, option=collection_option
        )

        assert coll is not None, (
            f"Failed to create and open collection with field_name={field_name}, vector_name={vector_name}"
        )

        # Insert some data
        docs = [
            Doc(
                id=f"{i}",
                fields={"id": i, field_name: f"value_{i}"},
                vectors={vector_name: [float(j % 10) for j in range(128)]},
            )
            for i in range(5)
        ]

        result = coll.insert(docs)
        assert len(result) == 5, (
            f"Expected 5 insertion results, got {len(result)} for field_name={field_name}, vector_name={vector_name}"
        )
        for item in result:
            assert item.ok(), (
                f"Insertion failed for field_name={field_name}, vector_name={vector_name}: {item}"
            )

        # Create index on field
        coll.create_index(
            field_name=field_name,
            index_param=InvertIndexParam(),
            option=IndexOption(),
        )

        # Create index on vector
        coll.create_index(
            field_name=vector_name,
            index_param=HnswIndexParam(),
            option=IndexOption(),
        )

        # Verify indexes were created successfully
        stats = coll.stats
        assert stats is not None, (
            f"Stats is None for field_name={field_name}, vector_name={vector_name}"
        )

        coll.destroy()

    def test_compicated_workflow(
        self,
        collection_temp_dir,
        basic_schema: CollectionSchema,
        collection_option: CollectionOption,
    ):
        """
        Test the complete workflow:
        1. Create collection
        2. Create index
        3. Insert doc
        4. Upsert
        5. Update doc
        6. Fetch doc
        7. Query doc
        8. Drop index
        9. Insert doc
        10. Update doc
        11. Upsert doc
        12. Fetch doc
        13. Query doc
        14. Flush
        15. Destroy
        """
        # Step 1: Create collection
        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=basic_schema,
            option=collection_option,
        )

        assert coll is not None, "Failed to create and open collection"
        assert coll.path == collection_temp_dir
        assert coll.schema.name == basic_schema.name
        assert coll.stats.doc_count == 0

        # Step 2: Create index
        coll.create_index(
            field_name="name", index_param=InvertIndexParam(), option=IndexOption()
        )
        # Verify index was created
        stats = coll.stats
        assert stats is not None, "coll.stats is None!"

        # Step 3: Insert doc
        doc1 = Doc(
            id="1",
            fields={"id": 1, "name": "test1", "weight": 80.5},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = coll.insert(doc1)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 1

        # Step 4: Upsert (existing doc)
        doc1_updated = Doc(
            id="1",
            fields={"id": 1, "name": "test1_updated", "weight": 85.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.5, 2: 2.5},
            },
        )

        result = coll.upsert(doc1_updated)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 1

        # Step 5: Update doc
        doc2 = Doc(
            id="2",
            fields={"id": 2, "name": "test2", "weight": 90.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 3.0, 2: 4.0},
            },
        )

        # First insert doc2
        result = coll.insert(doc2)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 2

        # Then update it
        doc2_updated = Doc(
            id="2",
            fields={"id": 2, "name": "test2_updated", "weight": 95.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 3.5, 2: 4.5},
            },
        )

        result = coll.update(doc2_updated)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 2

        # Step 6: Fetch doc
        fetched_docs = coll.fetch(["1", "2"])
        assert len(fetched_docs) == 2
        assert "1" in fetched_docs
        assert "2" in fetched_docs
        assert fetched_docs["1"].field("name") == "test1_updated"
        assert fetched_docs["2"].field("name") == "test2_updated"

        # Step 7: Query doc
        query_result = coll.query(filter="id >= 1", topk=10)
        assert len(query_result) == 2

        # Step 8: Drop index
        coll.drop_index(field_name="name")

        # Step 9: Insert doc
        doc3 = Doc(
            id="3",
            fields={"id": 3, "name": "test3", "weight": 100.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 5.0, 2: 6.0},
            },
        )

        result = coll.insert(doc3)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 3

        # Step 10: Update doc
        doc3_updated = Doc(
            id="3",
            fields={"id": 3, "name": "test3_updated", "weight": 105.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 5.5, 2: 6.5},
            },
        )

        result = coll.update(doc3_updated)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 3

        # Step 11: Upsert doc
        doc4 = Doc(
            id="4",
            fields={"id": 4, "name": "test4", "weight": 110.0},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 7.0, 2: 8.0},
            },
        )

        result = coll.upsert(doc4)
        assert bool(result)
        assert result.ok()
        assert coll.stats.doc_count == 4

        # Step 12: Fetch doc
        fetched_docs = coll.fetch(["3", "4"])
        assert len(fetched_docs) == 2
        assert "3" in fetched_docs
        assert "4" in fetched_docs
        assert fetched_docs["3"].field("name") == "test3_updated"
        assert fetched_docs["4"].field("name") == "test4"

        # Step 13: Query doc
        query_result = coll.query(filter="id >= 3", topk=10)
        assert len(query_result) == 2

        # Step 14: Flush
        coll.flush()

        # Verify data is still accessible after flush
        fetched_docs = coll.fetch(["1", "2", "3", "4"])
        assert len(fetched_docs) == 4

        # Step 15: Destroy
        coll.destroy()

    @pytest.mark.parametrize(
        "data_type, index_param", VALID_VECTOR_DATA_TYPE_INDEX_PARAM_MAP_PARAMS
    )
    def test_vector_index_params(
        self,
        collection_temp_dir,
        collection_option: CollectionOption,
        data_type: DataType,
        index_param,
        single_vector_schema,
    ):
        vector_name = DEFAULT_VECTOR_FIELD_NAME[data_type]
        dimension = DEFAULT_VECTOR_DIMENSION

        coll = zvec.create_and_open(
            path=collection_temp_dir,
            schema=single_vector_schema,
            option=collection_option,
        )

        assert coll is not None, (
            f"Failed to create and open collection, {data_type}, {index_param}"
        )

        docs = {str(i): generate_doc(i, single_vector_schema) for i in range(5)}
        result = coll.insert(docs.values())
        assert len(result) == len(docs), (
            f"Expected 5 results, got {len(result)}, {data_type}, {index_param}"
        )
        for item in result:
            assert item.ok(), f"Insertion failed for, {data_type}, {index_param}"

        def check_result(
            label: str, metric_type: MetricType, quantize_type: QuantizeType
        ):
            query_vector = [1] * dimension
            if data_type in [DataType.SPARSE_VECTOR_FP16, DataType.SPARSE_VECTOR_FP32]:
                query_vector = {1: 1}

            fetch_result = coll.fetch([str(i) for i in range(len(docs))])
            assert len(fetch_result) == len(docs), (
                f"{label}, Expected 5 fetched docs, got {len(fetch_result)}, {data_type}, {index_param}"
            )
            for i in range(len(docs)):
                doc_id = str(i)
                assert doc_id in fetch_result, (
                    f"{label}, Document ID '{doc_id}' not found, {data_type}, {index_param}"
                )
                fetched_doc = fetch_result[doc_id]
                # Verify doc equal
                assert is_doc_equal(fetched_doc, docs[doc_id], single_vector_schema), (
                    f"{label}, doc not equal, insert: {docs[doc_id]}, fetched: {fetched_doc}, {data_type}, {index_param}"
                )

            query_result: list[Doc] = coll.query(
                VectorQuery(field_name=vector_name, vector=query_vector),
                include_vector=False,
                topk=len(docs),
            )
            assert len(query_result) == len(docs), (
                f"{label}, Expected {len(docs)} result, got {len(query_result)}, {data_type}, {index_param}"
            )
            inserted_ids = [str(i) for i in range(len(docs))]
            queried_ids = [doc.id for doc in query_result]
            assert set(inserted_ids) == set(queried_ids), (
                f"{label}, inserted_ids != queried_ids, insert: {inserted_ids}, query: {queried_ids}, {data_type}, {index_param}"
            )

            last_score = None
            for i, doc in enumerate(query_result):
                # Get the document's vector for comparison
                expect_doc = generate_doc(int(doc.id), single_vector_schema)
                doc_vector = expect_doc.vector(vector_name)
                expected_score = distance(
                    doc_vector,
                    query_vector,
                    metric_type,
                    data_type,
                    quantize_type,
                )
                print(f"query: {doc}, expect_core: {expected_score}")
                if quantize_type is QuantizeType.UNDEFINED:
                    assert is_float_equal(doc.score, expected_score), (
                        f"{label} top{i} pk{doc.id} score {doc.score:6f} expected:{expected_score:6f}, {data_type}, {index_param}"
                    )
                if last_score is not None:
                    if metric_type == MetricType.IP:
                        assert last_score >= doc.score, (
                            f"{label}, score not sorted, last_score: {last_score}, current_score: {doc.score}, {data_type}, {index_param}"
                        )
                    else:
                        assert last_score <= doc.score, (
                            f"{label}, score not sorted, last_score: {last_score}, current_score: {doc.score}, {data_type}, {index_param}"
                        )
                last_score = doc.score

        # default metric_type=IP, quantize_type=None
        check_result("pre_create_index", MetricType.IP, QuantizeType.UNDEFINED)

        # create index
        coll.create_index(
            field_name=vector_name,
            index_param=index_param,
            option=IndexOption(),
        )
        check_result(
            "post_create_index", index_param.metric_type, index_param.quantize_type
        )

        coll.drop_index(field_name=vector_name)
        check_result("post_drop_index", MetricType.IP, QuantizeType.UNDEFINED)

        new_docs = {str(i): generate_doc(i, single_vector_schema) for i in range(5, 8)}
        new_result = coll.insert(new_docs.values())
        assert len(new_result) == len(new_docs), (
            f"Expected {len(new_docs)} insertion results for new docs, got {len(new_result)} for vector {vector_name}"
        )
        for item in new_result:
            assert item.ok(), (
                f"New document insertion failed for vector {vector_name}: {item}"
            )
        docs |= new_docs
        coll.create_index(
            field_name=vector_name,
            index_param=index_param,
            option=IndexOption(),
        )

        check_result(
            "post_create_index2", index_param.metric_type, index_param.quantize_type
        )
        coll.destroy()


class TestColumnDDL:
    def test_add_column(self, basic_collection: Collection):
        basic_collection.add_column(
            field_schema=FieldSchema("income", DataType.INT32),
            expression="'weight' * 2",  # Simple expression
        )
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, "income": 1},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_add_column_with_default_option(self, basic_collection: Collection):
        # Add a new column with default option
        basic_collection.add_column(
            field_schema=FieldSchema("test_column_default", DataType.INT32),
            expression="100",
            option=AddColumnOption(),  # Default option
        )
        # Verify column was added by inserting data
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, "test_column_default": 1},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )
        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize("concurrency", [0, 1, 4, 8])
    def test_add_column_with_various_concurrency_options(
        self, basic_collection: Collection, concurrency
    ):
        field_name = f"test_column_concurrent_{concurrency}"
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, DataType.INT32),
            expression="100",
            option=AddColumnOption(concurrency=concurrency),
        )

        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize("data_type", SUPPORT_ADD_COLUMN_DATA_TYPE)
    def test_add_column_valid_data_types(self, basic_collection: Collection, data_type):
        field_name = f"test_field_{data_type.name.lower()}"

        # Add a new column with specific data type
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, data_type),
            expression="1" if data_type != DataType.STRING else "'test'",
        )

        # Verify column was added by inserting data
        if data_type == DataType.STRING:
            field_value = "test_value"
        elif data_type in [DataType.ARRAY_STRING]:
            field_value = ["test_value"]
        elif data_type in [DataType.ARRAY_INT32, DataType.ARRAY_INT64]:
            field_value = [1, 2, 3]
        elif data_type in [DataType.ARRAY_FLOAT, DataType.ARRAY_DOUBLE]:
            field_value = [1.1, 2.2, 3.3]
        elif data_type == DataType.ARRAY_BOOL:
            field_value = [True, False]
        elif data_type in [DataType.FLOAT, DataType.DOUBLE]:
            field_value = 1.5
        elif data_type in [DataType.INT32, DataType.INT64]:
            field_value = 100
        elif data_type == DataType.BOOL:
            field_value = True
        else:
            field_value = 1

        doc = Doc(
            id="1",
            fields={
                "id": 1,
                "name": "test",
                "weight": 80.5,
                field_name: field_value,
            },
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize("data_type", NOT_SUPPORT_ADD_COLUMN_DATA_TYPE)
    def test_add_column_invalid_data_types(
        self, basic_collection: Collection, data_type
    ):
        with pytest.raises(Exception) as exc_info:
            field_name = f"test_field_{data_type.name.lower()}"

            # Add a new column with specific data type
            basic_collection.add_column(
                field_schema=FieldSchema(field_name, data_type),
                expression="1" if data_type != DataType.STRING else "'test'",
            )

        assert NOT_SUPPORT_ADD_COLUMN_ERROR_MSG in str(exc_info.value)

    @pytest.mark.parametrize("nullable", [True, False])
    def test_add_column_with_nullable_options(
        self, basic_collection: Collection, nullable
    ):
        field_name = f"test_field_nullable_{str(nullable).lower()}"

        # Add a new column with specific nullable option
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, DataType.INT32, nullable=nullable),
            expression="100",
        )

        # Verify column was added by inserting data
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        # Verify column was added by inserting data
        doc = Doc(
            id="2",
            fields={"id": 2, "name": "test", "weight": 80.5, field_name: None},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        if nullable:
            result = basic_collection.insert(doc)
            assert bool(result), f"Expected 1 result, but got {len(result)}"
            assert result.ok(), (
                f"result={result},Insert operation failed with code = {result.code()}"
            )
        else:
            with pytest.raises(ValueError) as e:
                basic_collection.insert(doc)
            assert (
                "Field 'test_field_nullable_false': expected non-nullable type"
                in str(e.value)
            )

        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        if nullable:
            assert stats.doc_count == 2
        else:
            assert stats.doc_count == 1

    @pytest.mark.parametrize(
        "expression",
        [
            "1",  # Constant integer
            "1.5",  # Constant float
            "'test'",  # Constant string
            "id",  # Reference to existing field
            "weight * 2",  # Simple arithmetic
            "weight + id",  # Complex arithmetic
            "CASE WHEN weight > 50 THEN 1 ELSE 0 END",  # Conditional expression
        ],
    )
    def test_add_column_with_different_expressions(
        self, basic_collection: Collection, expression
    ):
        field_name = f"test_field_expr_{abs(hash(expression)) % 1000}"

        # Add a new column with specific expression
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, DataType.INT32),
            expression=expression,
        )

        # Verify column was added by inserting data
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_add_column_with_index_param(self, basic_collection: Collection):
        basic_collection.add_column(
            field_schema=FieldSchema(
                "indexed_field",
                DataType.INT32,
                index_param=InvertIndexParam(enable_range_optimization=True),
            ),
            expression="id * 2",
        )

        # Verify column was added by inserting data
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, "indexed_field": 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        # Verify document was inserted
        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize(
        "field_name",
        [
            "a",  # Minimum length
            "a" * 32,  # Maximum length (32 characters)
            "valid_field_name_123",  # Alphanumeric with underscore
            "Valid-Field-Name",  # With hyphens
            "_underscore_start",  # Starting with underscore
            "field_name_with_123_numbers",  # Numbers in middle
            "FIELD_NAME_UPPERCASE",  # Uppercase
            # "field_with_nums_123_and_hyphens-456",  # Complex valid name within limit
        ],
    )
    def test_add_column_with_valid_field_names(
        self, basic_collection: Collection, field_name
    ):
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, DataType.INT32), expression="200"
        )

        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, field_name: 300},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize(
        "invalid_field_name",
        [
            "",  # Empty string
            " ",  # Space only
            "a" * 33,  # Too long (33 characters, exceeds 32)
            "field name",  # Contains space
            "field.name",  # Contains dot
            "field@name",  # Contains special character
            "field/name",  # Contains slash
            "field\\name",  # Contains backslash
            "field$name",  # Contains dollar sign
            "field+name",  # Contains plus sign
            "field=name",  # Contains equals sign
            None,  # None value
        ],
    )
    def test_add_column_with_invalid_field_names(
        self, basic_collection: Collection, invalid_field_name
    ):
        with pytest.raises(Exception) as exc_info:
            basic_collection.add_column(
                field_schema=FieldSchema(invalid_field_name, DataType.INT32),
                expression="100",
            )

        if invalid_field_name is None:
            assert "validate failed" in str(exc_info.value), (
                "Error message is unreasonable: e=" + str(exc_info.value)
            )
        else:
            assert (
                "invalid" in str(exc_info.value).lower()
                or "name" in str(exc_info.value).lower()
            )

    def test_alter_column_rename(self, basic_collection: Collection):
        basic_collection.alter_column(
            old_name="weight",
            new_name="mass",
            option=AlterColumnOption(),
        )
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "mass": 80.5},  # Use new name
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_alter_column_non_exist(self, basic_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            basic_collection.alter_column(
                old_name="non_existing",
                new_name="new_name",
                field_schema=FieldSchema("new_name", DataType.STRING),
            )
        assert "column non_existing not found" in str(exc_info.value), (
            "Error message is unreasonable: e=" + str(exc_info.value)
        )

    def test_alter_column_with_default_option(self, basic_collection: Collection):
        basic_collection.add_column(
            field_schema=FieldSchema("original_field", DataType.INT32), expression="100"
        )

        basic_collection.alter_column(
            old_name="original_field",
            new_name="renamed_field",
            option=AlterColumnOption(),
        )

        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, "renamed_field": 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize("concurrency", [0, 1, 4, 8])
    def test_alter_column_with_various_concurrency_options(
        self, basic_collection: Collection, concurrency
    ):
        old_field_name = f"orig_field_{concurrency}"
        new_field_name = f"modified_field_{concurrency}"

        basic_collection.add_column(
            field_schema=FieldSchema(old_field_name, DataType.INT32),
            expression="100",
        )

        basic_collection.alter_column(
            old_name=old_field_name,
            new_name=new_field_name,
            option=AlterColumnOption(concurrency=concurrency),
        )

        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, new_field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize(
        "old_field_name,new_field_name",
        [
            ("a", "new_a"),  # Minimum length
            (
                "abcdefghijklmnopqrstuvwxyz123456",
                "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
            ),  # Maximum length (32 characters)
            ("valid_field_name_123", "new_valid_field"),  # Alphanumeric with underscore
            ("Valid-Field-Name", "New-Field-Name"),  # With hyphens
            ("_underscore_start", "new_underscore"),  # Starting with underscore
            ("field_name_with_123_numbers", "new_with_nums"),  # Numbers in middle
            ("FIELD_NAME_UPPERCASE", "new_uppercase"),  # Uppercase
            (
                "field_with_nums_3_and_hyphens-6",
                "new_field_hyphens",
            ),  # Complex valid name
        ],
    )
    def test_alter_column_field_name_valid(
        self, basic_collection: Collection, old_field_name, new_field_name
    ):
        basic_collection.add_column(
            field_schema=FieldSchema(old_field_name, DataType.INT32),
            expression="100",
        )
        basic_collection.alter_column(
            old_name=old_field_name,
            new_name=new_field_name,
            option=AlterColumnOption(),
        )
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, new_field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    @pytest.mark.parametrize(
        "valid_old_name,invalid_new_name",
        [
            ("temp_field", ""),  # Empty new name
            ("temp_field", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),  # Too long new name
            ("temp_field", "field name"),  # New name with space
            ("temp_field", "field.name"),  # New name with dot
            ("temp_field", "field@name"),  # New name with special character
            ("temp_field", "field/name"),  # New name with slash
            ("temp_field", "field\\name"),  # New name with backslash
            ("temp_field", "field$name"),  # New name with dollar sign
            ("temp_field", "field+name"),  # New name with plus sign
            ("temp_field", "field=name"),  # New name with equals sign
            ("temp_field", None),  # None new name
        ],
    )
    def test_alter_column_with_invalid_field_names(
        self, basic_collection: Collection, valid_old_name, invalid_new_name
    ):
        basic_collection.add_column(
            field_schema=FieldSchema("temp_field", DataType.INT32), expression="100"
        )
        with pytest.raises(Exception) as exc_info:
            basic_collection.alter_column(
                old_name=valid_old_name,
                new_name=invalid_new_name if invalid_new_name is not None else "",
                field_schema=FieldSchema(
                    invalid_new_name if invalid_new_name is not None else "",
                    DataType.INT32,
                ),
            )

        assert (
            "invalid" in str(exc_info.value).lower()
            or "name" in str(exc_info.value).lower()
            or "incompatible" in str(exc_info.value).lower()
        )

    def test_drop_column_exist(self, basic_collection: Collection):
        basic_collection.add_column(
            field_schema=FieldSchema("temp_field", DataType.INT32), expression="100"
        )
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, "temp_field": 1},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        basic_collection.drop_column("temp_field")
        doc = Doc(
            id="2",
            fields={"id": 2, "name": "test", "weight": 80.5, "temp_field": 1},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        with pytest.raises(Exception) as exc_info:
            result = basic_collection.insert(doc)

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value)

    def test_drop_column_non_exist(self, basic_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            basic_collection.drop_column("non_existing_column")
        assert NOT_EXIST_COLUMN_TO_DROP_ERROR_MSG in str(exc_info.value)

    @pytest.mark.parametrize(
        "field_name",
        [
            "a",  # Minimum length
            "a" * 32,  # Maximum length (32 characters)
            "valid_field_name_123",  # Alphanumeric with underscore
            "Valid-Field-Name",  # With hyphens
            "_underscore_start",  # Starting with underscore
            "field_name_with_123_numbers",  # Numbers in middle
            "FIELD_NAME_UPPERCASE",  # Uppercase
            "field_with_nums_3_and_hyphens-6",  # Complex valid name within limit
        ],
    )
    def test_drop_column_field_name_valid(
        self, basic_collection: Collection, field_name
    ):
        basic_collection.add_column(
            field_schema=FieldSchema(field_name, DataType.INT32), expression="100"
        )
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5, field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        result = basic_collection.insert(doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.ok(), (
            f"result={result},Insert operation failed with code = {result.code()}"
        )

        stats = basic_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        basic_collection.drop_column(field_name)

        doc = Doc(
            id="2",
            fields={"id": 2, "name": "test", "weight": 80.5, field_name: 200},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )
        with pytest.raises(Exception) as exc_info:
            result = basic_collection.insert(doc)

        assert SCHEMA_VALIDATE_ERROR_MSG in str(exc_info.value)


================================================
FILE: python/tests/detail/test_collection_dml.py
================================================
import logging
import pytest


from zvec import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    FieldSchema,
    VectorSchema,
    CollectionSchema,
    Collection,
    Doc,
    VectorQuery,
    StatusCode,
)
from distance_helper import *
from fixture_helper import *
from doc_helper import *

Maximum = 1024

DOCID_VALID_LIST = [
    "1valid_Id",
    "123.45",
    "123abc",
    "-!@#$%+=.123abc_+",
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789012",
]
DOCID_INVALID_LIST = [
    None,
    "",
    "()qsd123",
    " ",
    "/&AS12",
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890121",
]

FIELD_VALUE_VALID_LIST = [
    (
        "bool_field",
        [
            None,
            True,
            False,
        ],
    ),
    (
        "float_field",
        [
            None,
            0.0,
            -1.0,
            1.0,
            3.4028235e38,
            -3.4028235e38,
            1.17549435e-38,
            -1.17549435e-38,
            float("inf"),
            float("-inf"),
        ],
    ),
    (
        "double_field",
        [
            None,
            0.0,
            -1.0,
            1.0,
            1.7976931348623157e308,
            -1.7976931348623157e308,
            2.2250738585072014e-308,
            -2.2250738585072014e-308,
            float("inf"),
            float("-inf"),
        ],
    ),
    (
        "int32_field",
        [
            None,
            0,
            1,
            -1,
            2147483647,
            -2147483648,
        ],
    ),
    (
        "int64_field",
        [
            None,
            0,
            1,
            -1,
            9223372036854775807,
            -9223372036854775808,
        ],
    ),
    (
        "uint32_field",
        [
            None,
            0,
            1,
            4294967295,
        ],
    ),
    (
        "uint64_field",
        [
            None,
            0,
            1,
            18446744073709551615,
        ],
    ),
    (
        "string_field",
        [
            None,
            "",
            "a",
            "test_name",
            "这是一个中文名称测试",
            "a" * 1000,
        ],
    ),
    (
        "array_bool_field",
        [
            None,
            [],
            [True],
            [False, True],
            [True, False, True, False] * 10,
        ],
    ),
    (
        "array_float_field",
        [
            None,
            [],
            [0.0],
            [1.0, 2.0, 3.0],
            [3.4028235e38, -3.4028235e38],
        ],
    ),
    (
        "array_double_field",
        [
            None,
            [],
            [0.0],
            [1.0, 2.0, 3.0],
            [1.7976931348623157e308, -1.7976931348623157e308],
        ],
    ),
    (
        "array_int32_field",
        [
            None,
            [],
            [0],
            [1, 2, 3],
            [2147483647, -2147483648],
        ],
    ),
    (
        "array_int64_field",
        [
            None,
            [],
            [0],
            [1, 2, 3],
            [9223372036854775807, -9223372036854775808],
        ],
    ),
    (
        "array_uint32_field",
        [
            None,
            [],
            [0],
            [1, 2, 3],
            [4294967295],
        ],
    ),
    (
        "array_uint64_field",
        [
            None,
            [],
            [0],
            [1, 2, 3],
            [18446744073709551615],
        ],
    ),
    (
        "array_string_field",
        [
            None,
            [],
            [""],
            ["a", "b", "c"],
            ["test_string", "测试字符串"],
            ["a" * 100] * 5,
        ],
    ),
]
FIELD_VALUE_INVALID_LIST = [
    (
        "bool_field",
        [
            "True",
            "False",
            "",
        ],
    ),
    ("float_field", ["invalid", [1.0], {"value": 1.0}]),
    ("double_field", ["invalid", [1.0], {"value": 1.0}]),
    (
        "int32_field",
        [
            "invalid",
            [1],
            {"value": 1},
            2147483648,
            -2147483649,
        ],
    ),
    (
        "int64_field",
        [
            "invalid",
            [1],
            {"value": 1},
            9223372036854775808,
            -9223372036854775809,
        ],
    ),
    (
        "uint32_field",
        [
            "invalid",
            [1],
            {"value": 1},
            4294967296,
            -1,
        ],
    ),
    (
        "uint64_field",
        [
            "invalid",
            [1],
            {"value": 1},
            18446744073709551616,
            -1,
        ],
    ),
    (
        "string_field",
        [
            123,
            12.34,
            True,
            ["array"],
            {"key": "value"},
        ],
    ),
    (
        "array_bool_field",
        [
            True,
            False,
            [True, "invalid"],
            {"key": True},
        ],
    ),
    (
        "array_float_field",
        [
            [1.0, "invalid"],
            [1.0, None],
            "invalid",
            [1.0, [2.0]],
            1.0,
        ],
    ),
    (
        "array_double_field",
        [
            [1.0, "invalid"],
            [1.0, None],
            "invalid",
            [1.0, [2.0]],
            1.0,
        ],
    ),
    (
        "array_int32_field",
        [
            [1, "invalid"],
            [1, None],
            "invalid",
            [1, [2]],
            1,
        ],
    ),
    (
        "array_int64_field",
        [
            [1, "invalid"],
            [1, None],
            "invalid",
            [1, [2]],
            1,
        ],
    ),
    (
        "array_uint32_field",
        [
            [1, "invalid"],
            [1, None],
            [1, -1],
            "invalid",
            [1, [2]],
            1,
        ],
    ),
    (
        "array_uint64_field",
        [
            [1, "invalid"],
            [1, None],
            [1, -1],
            "invalid",
            [1, [2]],
            1,
        ],
    ),
    (
        "array_string_field",
        [
            ["valid", 123],
            ["valid", None],
            "invalid",
            [["nested"]],
            123,
        ],
    ),
]

VECTOR_VALUE_VALID_LIST = [
    (
        "vector_fp32_field",
        [
            [0.0] * 128,
            [1.0] * 128,
            [-1.0] * 128,
            [float("inf")] * 128,
            [float("-inf")] * 128,
            [i / 128.0 for i in range(128)],
            [-i / 128.0 for i in range(128)],
        ],
    ),
    (
        "vector_fp16_field",
        [
            [0.0] * 128,
            [1.0] * 128,
            [-1.0] * 128,
            [float("inf")] * 128,
            [float("-inf")] * 128,
            [i / 128.0 for i in range(128)],
            [-i / 128.0 for i in range(128)],
        ],
    ),
    ("vector_int8_field", [[100] * 128, [0] * 128, [-100] * 128]),
    (
        "sparse_vector_fp32_field",
        [
            {0: 1.0},
            {0: 0.0, 1: 1.0, 2: -1.0},
            {0: float("inf"), 1: float("-inf")},
            {i: float(i) for i in range(10)},
            {128: 1.0, 256: -1.0, 512: 0.5},
        ],
    ),
    (
        "sparse_vector_fp16_field",
        [
            {0: 1.0},
            {0: 0.0, 1: 1.0, 2: -1.0},
            {0: float("inf"), 1: float("-inf")},
            {i: float(i) for i in range(10)},
            {128: 1.0, 256: -1.0, 512: 0.5},
        ],
    ),
]
VECTOR_VALUE_INVALID_LIST = [
    (
        "vector_fp32_field",
        [
            None,
            [],
            [0.0] * 127,
            [0.0] * 129,
            [0.0] * 1000,
            ["invalid"],
            [0, 1, 2],
            [None] * 128,
        ],
    ),
    (
        "vector_fp16_field",
        [
            None,
            [],
            [0.0] * 127,
            [0.0] * 129,
            [0.0] * 1000,
            ["invalid"],
            [0, 1, 2],
            [None] * 128,
        ],
    ),
    (
        "vector_int8_field",
        [
            None,
            [],
            [1] * 127,
            [10] * 129,
            [0] * 1000,
            ["invalid"],
            [0, 1, 2],
            [None] * 128,
        ],
    ),
    (
        "sparse_vector_fp32_field",
        [
            None,
            "invalid",
            {None: 1.0},
            {"0": 1.0},
            {0: "invalid"},
            {0: None},
            {-1: 1.0},
        ],
    ),
    (
        "sparse_vector_fp16_field",
        [
            None,
            "invalid",
            {None: 1.0},
            {"0": 1.0},
            {0: "invalid"},
            {0: None},
            {-1: 1.0},
        ],
    ),
]

UPDATE_PARTIAL_VALUE = [
    (
        "partial_fields",
        {"string_field": "partially_updated_test", "float_field": 95.5},
        {},
    ),
    ("dense_vector_only", {}, {"vector_fp32_field": [0.3] * 128}),
    ("dense_vector_only", {}, {"vector_fp16_field": [0.6] * 128}),
    ("dense_vector_only", {}, {"vector_int8_field": [3] * 128}),
    ("sparse_vector_only", {}, {"sparse_vector_fp32_field": {1: 2.0, 2: 3.0, 4: 4.0}}),
    (
        "sparse_vector_only",
        {},
        {"sparse_vector_fp16_field": {10: 2.1, 20: 3.1, 40: 4.1}},
    ),
    (
        "fields_and_vectors",
        {"string_field": "fully_updated_test", "bool_field": False},
        {
            "vector_fp32_field": [0.4] * 128,
            "sparse_vector_fp32_field": {1: 3.0, 3: 5.0},
        },
    ),
]


# ==================== helper ====================
def singledoc_and_check(
    collection: Collection, insert_doc, operator="insert", is_delete=1
):
    if operator == "insert":
        result = collection.insert(insert_doc)
    elif operator == "upsert":
        result = collection.upsert(insert_doc)
    elif operator == "update":
        result = collection.update(insert_doc)
    else:
        logging.error("operator value is error!")

    assert bool(result)
    assert result.ok()

    stats = collection.stats
    assert stats is not None
    assert stats.doc_count == 1

    fetched_docs = collection.fetch([insert_doc.id])
    assert len(fetched_docs) == 1
    assert insert_doc.id in fetched_docs

    fetched_doc = fetched_docs[insert_doc.id]

    assert is_doc_equal(fetched_doc, insert_doc, collection.schema)
    assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
    assert fetched_doc.score == 0.0, (
        "Fetch operation should return default score of 0.0"
    )

    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        if v != {}:
            query_result = collection.query(
                VectorQuery(field_name=v, vector=insert_doc.vectors[v]),
                topk=10,
            )
            assert len(query_result) > 0, (
                f"Expected at least 1 query result, but got {len(query_result)}"
            )

            found_doc = None
            for doc in query_result:
                if doc.id == insert_doc.id:
                    found_doc = doc
                    break
            assert found_doc is not None, (
                f"Inserted document {insert_doc.id} not found in query results"
            )
            assert is_doc_equal(found_doc, insert_doc, collection.schema, True, False)
    if is_delete == 1:
        collection.delete(insert_doc.id)
        assert collection.stats.doc_count == 0, "Document should be deleted"


def updatedoc_partial_check(
    collection, update_doc_partial, update_doc_full, operator="update", is_delete=1
):
    if operator == "upsert":
        result = collection.upsert(update_doc_partial)
    elif operator == "update":
        result = collection.update(update_doc_partial)
    else:
        logging.error("operator value is error!")

    assert bool(result)
    assert result.ok()

    stats = collection.stats
    assert stats is not None
    assert stats.doc_count == 1

    fetched_docs = collection.fetch([update_doc_partial.id])
    assert len(fetched_docs) == 1, (
        f"fetched_docs={fetched_docs},Expected 1 fetched document, but got {len(fetched_docs)}"
    )
    assert update_doc_partial.id in fetched_docs, (
        f"Expected document ID {update_doc_partial.id} in fetched documents"
    )

    fetched_doc = fetched_docs[update_doc_partial.id]
    assert is_doc_equal(fetched_doc, update_doc_full, collection.schema)
    assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
    assert fetched_doc.score == 0.0, (
        "Fetch operation should return default score of 0.0"
    )

    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        if v != {}:
            query_result = collection.query(
                VectorQuery(field_name=v, vector=update_doc_full.vectors[v]),
                topk=10,
            )
            assert len(query_result) > 0, (
                f"Expected at least 1 query result, but got {len(query_result)}"
            )

            found_doc = None
            for doc in query_result:
                if doc.id == update_doc_partial.id:
                    found_doc = doc
                    break
            assert found_doc is not None, (
                f"Inserted document {update_doc_partial.id} not found in query results"
            )
            assert is_doc_equal(
                found_doc, update_doc_full, collection.schema, True, False
            )
    if is_delete == 1:
        collection.delete(update_doc_partial.id)
        assert collection.stats.doc_count == 0, "Document should be deleted"


def batchdoc_and_check(collection, multiple_docs, doc_num, operator="insert"):
    if operator == "insert":
        result = collection.insert(multiple_docs)
    elif operator == "upsert":
        result = collection.upsert(multiple_docs)

    elif operator == "update":
        result = collection.update(multiple_docs)
    else:
        logging.error("operator value is error!")

    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok(), (
            f"result={result},Insert operation failed with code {item.code()}"
        )

    stats = collection.stats
    assert stats is not None, "Collection stats should not be None"
    assert stats.doc_count == len(multiple_docs), (
        f"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}"
    )

    doc_ids = [doc.id for doc in multiple_docs]
    fetched_docs = collection.fetch(doc_ids)
    assert len(fetched_docs) == len(multiple_docs), (
        f"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}"
    )

    for original_doc in multiple_docs:
        assert original_doc.id in fetched_docs, (
            f"Expected document ID {original_doc.id} in fetched documents"
        )
        fetched_doc = fetched_docs[original_doc.id]

        assert is_doc_equal(fetched_doc, original_doc, collection.schema)

        assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
        assert fetched_doc.score == 0.0, (
            "Fetch operation should return default score of 0.0"
        )

    first_doc = multiple_docs[doc_num - 1]
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        query_result = collection.query(
            VectorQuery(field_name=v, vector=first_doc.vectors[v]),
            topk=1024,
        )
        assert len(query_result) > 0, (
            f"Expected at least 1 query result, but got {len(query_result)}"
        )

        found_doc = None

        for doc in query_result:
            if doc.id == first_doc.id:
                found_doc = doc
                break
        assert found_doc is not None, (
            f"Inserted document {first_doc.id} not found in query results"
        )

        assert is_doc_equal(found_doc, first_doc, collection.schema, True, False)


# ==================== Tests ====================
# ----------------------------
# Collection Insert Test Case
# ----------------------------


class TestCollectionInsert:
    def test_insert(self, full_collection: Collection):
        single_doc = generate_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, single_doc)

    @pytest.mark.parametrize("doc_num", [1, 5, Maximum])
    def test_insert_batch(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num)

    def test_insert_duplicate(self, full_collection: Collection):
        insert_doc = generate_doc(1, full_collection.schema)

        result = full_collection.insert(insert_doc)
        assert result.code().value == 0
        assert result.ok()

        # Verify documents were inserted
        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        insert_doc_duplicate = full_collection.insert(insert_doc)
        assert bool(insert_doc_duplicate)
        assert insert_doc_duplicate.code() == StatusCode.ALREADY_EXISTS, (
            f"Second insert operation should fail with ALREADY_EXISTS, but got code {insert_doc_duplicate.code()}"
        )

        stats = full_collection.stats
        assert stats is not None, "Collection stats should not be None"
        assert stats.doc_count == 1, (
            f"Document count should still be 1 after failed insert, but got {stats.doc_count}"
        )

    @pytest.mark.parametrize("doc_id", DOCID_VALID_LIST)
    def test_insert_docid_valid(self, full_collection: Collection, doc_id):
        insert_doc = generate_doc_random(doc_id, full_collection.schema)
        singledoc_and_check(full_collection, insert_doc)

    @pytest.mark.parametrize("doc_id", DOCID_INVALID_LIST)
    def test_insert_docid_invalid(self, full_collection: Collection, doc_id):
        insert_doc = generate_doc_random(doc_id, full_collection.schema)

        with pytest.raises(Exception) as exc_info:
            full_collection.insert(insert_doc)

        assert exc_info.value is not None
        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 0

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_VALID_LIST)
    @pytest.mark.parametrize(
        "full_schema_new",
        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],
        indirect=True,
    )
    def test_insert_fields_valid(
        self, full_collection_new: Collection, field_name: str, field_values, request
    ):
        for i, field_value in enumerate(field_values):
            doc_id = str(field_value) if field_name == "id" else str(i)
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_new.schema
            )
            full_schema_params = request.getfixturevalue("full_schema_new")
            target_field = None
            for field in full_schema_params.fields:
                if field.name == field_name:
                    target_field = field
                    break
            doc_fields[field_name] = field_value
            insert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)
            if target_field and not target_field.nullable and field_value is None:
                with pytest.raises(Exception) as exc_info:
                    full_collection_new.insert(insert_doc)
                assert exc_info.value is not None
            else:
                singledoc_and_check(full_collection_new, insert_doc)

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_INVALID_LIST)
    def test_insert_fields_invalid(
        self, full_collection: Collection, field_name: str, field_values
    ):
        for i, field_value in enumerate(field_values):
            doc_id = str(field_value) if field_name == "id" else str(i)
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
            doc_fields[field_name] = field_value
            insert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)

            with pytest.raises(Exception) as exc_info:
                full_collection.insert(insert_doc)
            assert exc_info.value is not None
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_VALID_LIST)
    def test_insert_vector_valid(
        self, full_collection: Collection, vector_field: str, vector_values
    ):
        for i, vector_value in enumerate(vector_values):
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)

            doc_vectors[vector_field] = vector_value

            insert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)

            singledoc_and_check(full_collection, insert_doc)

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_INVALID_LIST)
    def test_insert_vector_invalid(
        self, full_collection: Collection, vector_field: str, vector_values
    ):
        for i, vector_value in enumerate(vector_values):
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
            doc_vectors[vector_field] = vector_value
            insert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)
            with pytest.raises(Exception) as exc_info:
                full_collection.insert(insert_doc)

            assert exc_info.value is not None
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0


class TestCollectionUpdate:
    def test_update(self, full_collection: Collection):
        insert_doc = generate_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, insert_doc, is_delete=0)
        updated_doc = generate_update_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, updated_doc, operator="update")

    @pytest.mark.parametrize("doc_num", [1, 5, Maximum])
    def test_update_batch(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num)
        multiple_update_docs = [
            generate_update_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(
            full_collection, multiple_update_docs, doc_num, operator="update"
        )

    def test_empty_collection_update(self, full_collection: Collection):
        updated_doc = generate_update_doc(1, full_collection.schema)
        result = full_collection.update(updated_doc)
        assert bool(result), f"Expected 1 result, but got {len(result)}"
        assert result.code() == StatusCode.NOT_FOUND, (
            f"Update operation should fail with NOT_FOUND, but got code {result.code()}"
        )
        fetched_docs = full_collection.fetch([updated_doc.id])
        assert len(fetched_docs) == 0

        stats = full_collection.stats
        assert stats is not None, "Collection stats should not be None"
        assert stats.doc_count == 0, (
            f"Document count should be 0, but got {stats.doc_count}"
        )

    @pytest.mark.parametrize("doc_num", [1, 5, Maximum])
    def test_empty_collection_update_batch(self, full_collection: Collection, doc_num):
        multiple_update_docs = [
            generate_update_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        result = full_collection.update(multiple_update_docs)
        assert len(result) == len(multiple_update_docs), (
            f"Expected {len(multiple_update_docs)} results, but got {len(result)}"
        )
        for item in result:
            assert item.code() == StatusCode.NOT_FOUND, (
                f"Update operation should fail with NOT_FOUND, but got code {item.code()}"
            )

        stats = full_collection.stats
        assert stats is not None, "Collection stats should not be None"
        assert stats.doc_count == 0, (
            f"Document count should be 0, but got {stats.doc_count}"
        )

        doc_ids = [doc.id for doc in multiple_update_docs]
        fetched_docs = full_collection.fetch(doc_ids)
        assert len(fetched_docs) == 0

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_VALID_LIST)
    @pytest.mark.parametrize(
        "full_schema_new",
        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],
        indirect=True,
    )
    def test_update_fields_valid(
        self, full_collection_new: Collection, field_name: str, field_values, request
    ):
        for i, field_value in enumerate(field_values):
            insert_doc = generate_doc(i, full_collection_new.schema)
            singledoc_and_check(full_collection_new, insert_doc, is_delete=0)
            update_doc_fields, update_doc_vectors = generate_vectordict_random(
                full_collection_new.schema
            )
            full_schema_params = request.getfixturevalue("full_schema_new")
            target_field = None
            for field in full_schema_params.fields:
                if field.name == field_name:
                    target_field = field
                    break
            update_doc_fields[field_name] = field_value
            update_doc = Doc(
                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors
            )
            if target_field and not target_field.nullable and field_value is None:
                with pytest.raises(Exception) as exc_info:
                    update_doc_fields[field_name] = field_value
                    full_collection_new.update(update_doc)
                assert exc_info.value is not None
                full_collection_new.delete(insert_doc.id)
            else:
                singledoc_and_check(
                    full_collection_new, update_doc, operator="update", is_delete=1
                )

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_INVALID_LIST)
    def test_update_fields_invalid(
        self, full_collection: Collection, field_name: str, field_values
    ):
        for i, field_value in enumerate(field_values):
            insert_doc = generate_doc(i, full_collection.schema)
            singledoc_and_check(full_collection, insert_doc, is_delete=0)
            update_doc_fields, update_doc_vectors = generate_vectordict_random(
                full_collection.schema
            )
            update_doc_fields[field_name] = field_value
            update_doc = Doc(
                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors
            )

            with pytest.raises(Exception) as exc_info:
                full_collection.update(update_doc)

            assert exc_info.value is not None
            full_collection.delete(insert_doc.id)
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_VALID_LIST)
    def test_update_doc_vector_valid(
        self,
        full_collection: Collection,
        collection_temp_dir,
        collection_option,
        vector_field: str,
        vector_values,
    ):
        for i, vector_value in enumerate(vector_values):
            insert_doc = generate_doc(i, full_collection.schema)
            singledoc_and_check(full_collection, insert_doc, is_delete=0)
            update_doc_fields, update_doc_vectors = generate_vectordict_random(
                full_collection.schema
            )
            update_doc_vectors[vector_field] = vector_value
            update_doc = Doc(
                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors
            )
            singledoc_and_check(full_collection, update_doc, operator="update")

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_INVALID_LIST)
    def test_update_doc_vector_invalid(
        self,
        full_collection: Collection,
        collection_temp_dir,
        collection_option,
        vector_field: str,
        vector_values,
    ):
        for i, vector_value in enumerate(vector_values):
            insert_doc = generate_doc(i, full_collection.schema)
            singledoc_and_check(full_collection, insert_doc, is_delete=0)
            update_doc_fields, update_doc_vectors = generate_vectordict_random(
                full_collection.schema
            )
            update_doc_vectors[vector_field] = vector_value
            update_doc = Doc(
                id=str(i), fields=update_doc_fields, vectors=update_doc_vectors
            )
            with pytest.raises(Exception) as exc_info:
                full_collection.update(update_doc)
            assert exc_info.value is not None
            full_collection.delete(insert_doc.id)
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0

    @pytest.mark.parametrize(
        "update_type, fields_to_update, vectors_to_update", UPDATE_PARTIAL_VALUE
    )
    def test_update_partial_fields(
        self,
        full_collection: Collection,
        collection_temp_dir,
        collection_option,
        update_type: str,
        fields_to_update: dict,
        vectors_to_update: dict,
        doc_id=1,
    ):
        insert_doc = generate_doc(doc_id, full_collection.schema)
        singledoc_and_check(full_collection, insert_doc, is_delete=0)

        update_doc_fields, update_doc_vectors = insert_doc.fields, insert_doc.vectors
        for k, v in fields_to_update.items():
            update_doc_fields[k] = v
        for k, v in vectors_to_update.items():
            update_doc_vectors[k] = v

        update_doc_full = Doc(
            id=str(doc_id), fields=update_doc_fields, vectors=update_doc_vectors
        )

        update_doc_partial = Doc(
            id=str(doc_id), fields=fields_to_update, vectors=vectors_to_update
        )

        updatedoc_partial_check(
            full_collection,
            update_doc_partial,
            update_doc_full,
            operator="update",
            is_delete=1,
        )


class TestCollectionUpsert:
    def test_new_doc_upsert(self, full_collection: Collection):
        single_doc = generate_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, single_doc, operator="upsert", is_delete=1)

    @pytest.mark.parametrize("doc_num", [1, 5, Maximum])
    def test_new_doc_upsert_batch(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="upsert")

    def test_existing_doc_upsert(self, full_collection: Collection):
        insert_doc = generate_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, insert_doc, is_delete=0)
        updated_doc = generate_update_doc(1, full_collection.schema)
        singledoc_and_check(full_collection, updated_doc, operator="upsert")

    @pytest.mark.parametrize("doc_id", DOCID_VALID_LIST)
    def test_upsert_docid_valid(self, full_collection: Collection, doc_id):
        upsert_doc = generate_doc_random(doc_id, full_collection.schema)
        singledoc_and_check(full_collection, upsert_doc, operator="upsert", is_delete=1)

    @pytest.mark.parametrize("doc_id", DOCID_INVALID_LIST)
    def test_upsert_docid_invalid(self, full_collection: Collection, doc_id):
        upsert_doc = generate_doc_random(doc_id, full_collection.schema)

        with pytest.raises(Exception) as exc_info:
            full_collection.upsert(upsert_doc)

        assert exc_info.value is not None

        stats = full_collection.stats
        assert stats is not None
        assert stats.doc_count == 0

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_VALID_LIST)
    @pytest.mark.parametrize(
        "full_schema_new",
        [(True, True, HnswIndexParam()), (False, True, HnswIndexParam())],
        indirect=True,
    )
    def test_upsert_fields_valid(
        self, full_collection_new: Collection, field_name: str, field_values, request
    ):
        for i, field_value in enumerate(field_values):
            doc_id = str(field_value) if field_name == "id" else str(i)
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_new.schema
            )

            full_schema_params = request.getfixturevalue("full_schema_new")
            target_field = None
            for field in full_schema_params.fields:
                if field.name == field_name:
                    target_field = field
                    break
            doc_fields[field_name] = field_value
            upsert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)
            if target_field and not target_field.nullable and field_value is None:
                with pytest.raises(Exception) as exc_info:
                    full_collection_new.upsert(upsert_doc)
                assert exc_info.value is not None
            else:
                singledoc_and_check(
                    full_collection_new, upsert_doc, operator="upsert", is_delete=1
                )

    @pytest.mark.parametrize("field_name, field_values", FIELD_VALUE_INVALID_LIST)
    def test_upsert_fields_invalid(
        self, full_collection: Collection, field_name: str, field_values
    ):
        for i, field_value in enumerate(field_values):
            doc_id = str(field_value) if field_name == "id" else str(i)
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
            doc_fields[field_name] = field_value
            upsert_doc = Doc(id=doc_id, fields=doc_fields, vectors=doc_vectors)

            with pytest.raises(Exception) as exc_info:
                full_collection.upsert(upsert_doc)
            assert exc_info.value is not None
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_VALID_LIST)
    def test_upsert_vector_valid(
        self, full_collection: Collection, vector_field: str, vector_values
    ):
        for i, vector_value in enumerate(vector_values):
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)

            doc_vectors[vector_field] = vector_value

            upsert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)

            singledoc_and_check(
                full_collection, upsert_doc, operator="upsert", is_delete=1
            )

    @pytest.mark.parametrize("vector_field, vector_values", VECTOR_VALUE_INVALID_LIST)
    def test_upsert_vector_invalid(
        self, full_collection: Collection, vector_field: str, vector_values
    ):
        for i, vector_value in enumerate(vector_values):
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
            doc_vectors[vector_field] = vector_value
            upsert_doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors)
            with pytest.raises(Exception) as exc_info:
                full_collection.upsert(upsert_doc)

            assert exc_info.value is not None
            stats = full_collection.stats
            assert stats is not None
            assert stats.doc_count == 0


class TestCollectionDelete:
    @pytest.mark.parametrize("doc_num", [1, 5, Maximum])
    def test_delete_batch(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        doc_ids = [doc.id for doc in multiple_docs]
        result = full_collection.delete(doc_ids)
        assert len(result) == len(doc_ids)
        for item in result:
            assert item.ok()

    def test_delete_non_exist(self, full_collection: Collection):
        result = full_collection.delete("non_existing_id")
        assert result.code().value == 1
        assert result.code() == StatusCode.NOT_FOUND

    @pytest.mark.parametrize("doc_num", [5])
    def test_delete_batch_part_non_exist(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        doc_ids = [doc.id for doc in multiple_docs]
        doc_ids.extend([str(doc_num), str(doc_num + 1)])
        result = full_collection.delete(doc_ids)

        assert len(result) == len(doc_ids)
        for i in range(len(result)):
            if i < doc_num:
                assert result[i].ok()
            else:
                assert result[i].code().value == 1
                assert result[i].code() == StatusCode.NOT_FOUND

    @pytest.mark.parametrize("doc_num", [5])
    def test_delete_by_filter(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        result = full_collection.delete_by_filter("int32_field > 0")
        assert result is None

    def test_delete_empty_ids(self, full_collection: Collection):
        result = full_collection.delete([])
        assert len(result) == 0


================================================
FILE: python/tests/detail/test_collection_dql.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
from zvec.model import Collection, Doc, VectorQuery
from zvec.model.param import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    FlatIndexParam,
    IVFIndexParam,
    HnswQueryParam,
    IVFQueryParam,
)


from zvec.model.schema import FieldSchema, VectorSchema
from zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker
from distance_helper import *

from zvec import StatusCode
from distance_helper import *
from fixture_helper import *
from doc_helper import *
from params_helper import *


# ==================== helper ====================
def batchdoc_and_check(
    collection: Collection, multiple_docs, doc_num, operator="insert"
):
    if operator == "insert":
        result = collection.insert(multiple_docs)
    elif operator == "upsert":
        result = collection.upsert(multiple_docs)

    elif operator == "update":
        result = collection.update(multiple_docs)
    else:
        logging.error("operator value is error!")

    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok(), (
            f"result={result},Insert operation failed with code {item.code()}"
        )

    stats = collection.stats
    assert stats is not None, "Collection stats should not be None"
    assert stats.doc_count == len(multiple_docs), (
        f"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}"
    )

    doc_ids = [doc.id for doc in multiple_docs]
    fetched_docs = collection.fetch(doc_ids)
    assert len(fetched_docs) == len(multiple_docs), (
        f"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}"
    )

    for original_doc in multiple_docs:
        assert original_doc.id in fetched_docs, (
            f"Expected document ID {original_doc.id} in fetched documents"
        )
        fetched_doc = fetched_docs[original_doc.id]

        assert is_doc_equal(fetched_doc, original_doc, collection.schema)

        assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
        assert fetched_doc.score == 0.0, (
            "Fetch operation should return default score of 0.0"
        )

    first_doc = multiple_docs[doc_num - 1]
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        query_result = collection.query(
            VectorQuery(field_name=v, vector=first_doc.vectors[v]),
            topk=1024,
            include_vector=True,
        )
        assert len(query_result) > 0, (
            f"Expected at least 1 query result, but got {len(query_result)}"
        )

        found_doc = None

        for doc in query_result:
            if doc.id == first_doc.id:
                found_doc = doc
                break
        assert found_doc is not None, (
            f"Inserted document {first_doc.id} not found in query results"
        )

        assert is_doc_equal(found_doc, first_doc, collection.schema)
        assert hasattr(found_doc, "score")
        assert isinstance(found_doc.score, (int, float))


def batchdoc_and_check_ivf(
    collection: Collection, multiple_docs, doc_num, operator="insert"
):
    if operator == "insert":
        result = collection.insert(multiple_docs)
    elif operator == "upsert":
        result = collection.upsert(multiple_docs)

    elif operator == "update":
        result = collection.update(multiple_docs)
    else:
        logging.error("operator value is error!")

    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok(), (
            f"result={result},Insert operation failed with code {item.code()}"
        )

    stats = collection.stats
    assert stats is not None, "Collection stats should not be None"
    assert stats.doc_count == len(multiple_docs), (
        f"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}"
    )

    doc_ids = [doc.id for doc in multiple_docs]
    fetched_docs = collection.fetch(doc_ids)
    assert len(fetched_docs) == len(multiple_docs), (
        f"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}"
    )

    for original_doc in multiple_docs:
        assert original_doc.id in fetched_docs, (
            f"Expected document ID {original_doc.id} in fetched documents"
        )
        fetched_doc = fetched_docs[original_doc.id]

        assert is_doc_equal(fetched_doc, original_doc, collection.schema)

        assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
        assert fetched_doc.score == 0.0, (
            "Fetch operation should return default score of 0.0"
        )

    first_doc = multiple_docs[doc_num - 1]
    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
        if v in ["vector_fp16_field", "vector_fp32_field"]:
            query_result = collection.query(
                VectorQuery(field_name=v, vector=first_doc.vectors[v]),
                topk=1024,
                include_vector=True,
            )
            assert len(query_result) > 0, (
                f"Expected at least 1 query result, but got {len(query_result)}"
            )

            found_doc = None

            for doc in query_result:
                if doc.id == first_doc.id:
                    found_doc = doc
                    break
            assert found_doc is not None, (
                f"Inserted document {first_doc.id} not found in query results"
            )

            assert is_doc_equal(found_doc, first_doc, collection.schema)
            assert hasattr(found_doc, "score")
            assert isinstance(found_doc.score, (int, float))


def single_querydoc_check(
    multiple_docs,
    query_result,
    full_collection: Collection,
    is_by_vector=0,
    query_vector=None,
    data_type=None,
    vector_name=None,
    metric_type=MetricType.IP,
    id_include_vector: bool = False,
    is_output_fields=0,
):
    for original_doc in multiple_docs:
        for doc in query_result:
            if doc.id == original_doc.id:
                found_doc = doc
                if is_output_fields == 0:
                    assert is_doc_equal(
                        found_doc,
                        original_doc,
                        full_collection.schema,
                        True,
                        id_include_vector,
                    )
                assert hasattr(found_doc, "score")
                # assert found_doc.score >= 0.0
                if not id_include_vector:
                    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
                        assert found_doc.vector(v) == {}
                else:
                    for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
                        assert found_doc.vector(v) != {}
                if is_by_vector:
                    prev_score = float("inf")
                    for i, doc in enumerate(query_result):
                        doc_vector = full_collection.fetch(doc.id)[doc.id].vector(
                            vector_name
                        )
                        expected_score = distance(
                            query_vector, doc_vector, metric_type, data_type, k
                        )
                        if (
                            full_collection.schema.vector(vector_name).data_type
                            != DataType.VECTOR_FP16
                        ):
                            assert abs(doc.score - expected_score) < 0.001, (
                                f"{data_type} {vector_name} :Expected score {expected_score:.6f}, but got {doc.score:.6f} for document {doc.id}"
                            )
                        assert doc.score <= prev_score, (
                            f"{data_type} {vector_name} :Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}"
                        )
                        prev_score = doc.score


def multi_querydoc_check(multiple_docs, query_result, full_collection):
    for original_doc in multiple_docs:
        for doc in query_result:
            if doc.id == original_doc.id:
                found_doc = doc
                assert is_doc_equal(
                    found_doc, original_doc, full_collection.schema, False, False
                )
                assert hasattr(found_doc, "score"), (
                    "Document should have a score attribute"
                )
                assert found_doc.score >= 0.0, (
                    "Fetch operation should return default score of 0.0"
                )
                for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
                    assert found_doc.vector(v) == {}


# ==================== Tests ====================
class TestCollectionFetch:
    def test_fetch_non_existing(self, full_collection: Collection):
        result = full_collection.fetch(ids=["non_existing_id1", "non_existing_id2"])
        assert len(result) == 0

    @pytest.mark.parametrize("doc_num", [3])
    def test_fetch_partial_non_existing(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        fetch_id_list = [doc.id for doc in multiple_docs]
        fetch_id_list.append("non_existing_id")
        result = full_collection.fetch(ids=fetch_id_list)

        assert len(result) == doc_num
        assert "non_existing_id" not in result.keys()

    def test_fetch_empty_ids(self, full_collection: Collection):
        result = full_collection.fetch(ids=[])
        assert len(result) == 0, (
            f"Expected 0 results for empty ID list, but got {len(result)}"
        )


class TestCollectionQuery:
    @pytest.mark.parametrize("doc_num", [5])
    def test_query_with_no_condition(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        query_result = full_collection.query()
        assert len(query_result) == doc_num
        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_filter_empty(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        result1 = full_collection.query(filter="")
        assert len(result1) == doc_num
        single_querydoc_check(multiple_docs, result1, full_collection)
        result2 = full_collection.query(filter=None)
        assert len(result2) == doc_num
        single_querydoc_check(multiple_docs, result2, full_collection)
        ids1 = set(doc.id for doc in result1)
        ids2 = set(doc.id for doc in result2)
        assert ids1 == ids2

    @pytest.mark.parametrize("field_name", ["int32_field"])
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_filter_single_condition(
        self, full_collection: Collection, doc_num, field_name
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        filter = field_name + " > 5"
        query_result = full_collection.query(filter=filter)
        assert len(query_result) == doc_num - 6

        returned_doc_ids = set()
        for doc in query_result:
            returned_doc_ids.add(doc.id)

        expected_doc_ids = set(str(i) for i in range(6, doc_num))

        for doc in query_result:
            assert doc.id in expected_doc_ids
            assert int(doc.field(field_name)) > 5

        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("field_name", ["int32_field"])
    @pytest.mark.parametrize(
        "filter",
        [
            "int32_field > 3 and int32_field < 9",
            "int32_field >= 5 and int32_field <= 7",
        ],
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_filter_and(
        self, full_collection: Collection, doc_num, field_name, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        filter = field_name + " > 3 and " + field_name + " < 9"
        query_result = full_collection.query(filter=filter)
        if filter == "int32_field > 3 and int32_field < 9":
            assert len(query_result) == doc_num - 4 - 1
            expected_doc_ids = set(str(i) for i in range(4, 9))

            for doc in query_result:
                assert doc.id in expected_doc_ids
                field_value = int(doc.field(field_name))
                assert field_value > 3 and field_value < 9
        else:
            assert len(query_result) == 3
            expected_doc_ids = set(str(i) for i in range(5, 8))

            for doc in query_result:
                assert doc.id in expected_doc_ids
                field_value = int(doc.field(field_name))
                assert field_value >= 5 and field_value <= 7

        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("field_name", ["int32_field"])
    @pytest.mark.parametrize(
        "filter",
        [
            "int32_field < 3 or int32_field > 8",
            "int32_field = 3 or int32_field = 7",
            "int32_field <= 3 or int32_field >= 8",
        ],
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_filter_or(
        self, full_collection: Collection, doc_num, field_name, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        query_result = full_collection.query(filter=filter)
        if filter == "int32_field < 3 or int32_field > 8":
            assert len(query_result) == 4
            expected_doc_ids = set([str(0), str(1), str(2), str(9)])
            for doc in query_result:
                assert doc.id in expected_doc_ids
                field_value = int(doc.field(field_name))
                assert field_value < 3 or field_value > 8
        elif filter == "int32_field = 3 or int32_field = 7":
            assert len(query_result) == 2
            expected_doc_ids = set([str(3), str(7)])
            for doc in query_result:
                assert doc.id in expected_doc_ids
                field_value = int(doc.field(field_name))
                assert field_value == 3 or field_value == 7
        else:
            assert len(query_result) == 6
            expected_doc_ids = set([str(0), str(1), str(2), str(3), str(8), str(9)])
            for doc in query_result:
                assert doc.id in expected_doc_ids
                field_value = int(doc.field(field_name))
                assert field_value <= 3 or field_value >= 8

        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("field_names", [("int32_field", "bool_field")])
    @pytest.mark.parametrize(
        "filter",
        [
            "(int32_field < 3 or int32_field > 8) and bool_field = false",
            "(int32_field > 2 and int32_field < 5) or (int32_field > 7 and bool_field = true)",
        ],
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_filter_parentheses(
        self, full_collection: Collection, doc_num, field_names, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        query_result = full_collection.query(filter=filter)
        if filter == "(int32_field < 3 or int32_field > 8) and bool_field = false":
            assert len(query_result) == 2
            expected_doc_ids = set([str(1), str(9)])
            for doc in query_result:
                assert doc.id in expected_doc_ids
                assert (
                    int(doc.field(field_names[0])) < 3
                    or int(doc.field(field_names[0])) > 8
                ) and doc.field(field_names[1]) == False
        else:
            assert len(query_result) == 3
            expected_doc_ids = set([str(3), str(4), str(8)])
            for doc in query_result:
                assert doc.id in expected_doc_ids
                assert (
                    (
                        int(doc.field(field_names[0])) > 2
                        and int(doc.field(field_names[0])) < 5
                    )
                    or (doc.field(field_names[0])) > 7
                    and doc.field(field_names[1]) == True
                )
        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize(
        "filter",
        [
            "int32_field >",
            "int32_field = 'string'",
            "nonexistent_field = 5",
            "int32_field > 5 and",
            "int32_field > > 5",
        ],
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_filter_invalid(self, full_collection: Collection, doc_num, filter):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        with pytest.raises(Exception) as exc_info:
            full_collection.query(filter=filter)
        if filter in ["int32_field = 'string'", "nonexistent_field = 5"]:
            assert "Analyze sql info failed" in str(exc_info.value)
        else:
            assert "Invalid filter" in str(exc_info.value)

    @pytest.mark.parametrize("field_name", ["int32_field"])
    @pytest.mark.parametrize("topk_value", [1, 5, 10, 50, 100, 500, 1000, 1024])
    def test_query_with_filter_topk_valid(
        self, full_collection: Collection, topk_value: int, field_name
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(topk_value)
        ]
        batchdoc_and_check(
            full_collection, multiple_docs, topk_value, operator="insert"
        )
        filter = (
            field_name + f" >={topk_value - 1} and " + field_name + f" <={topk_value}"
        )
        print("filter:\n")
        print(filter)
        query_result = full_collection.query(filter=filter, topk=topk_value)
        assert len(query_result) == 1
        expected_doc_ids = [str(topk_value - 1)]

        for doc in query_result:
            assert doc.id in expected_doc_ids
            field_value = int(doc.field(field_name))
            assert field_value >= topk_value - 1 and field_value <= topk_value
        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("field_name", ["int32_field"])
    @pytest.mark.parametrize("topk_value", [1, 5, 10, 50, 100, 500, 1000, 1024])
    def test_query_without_filter_topk_valid(
        self, full_collection: Collection, topk_value: int, field_name
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(topk_value)
        ]
        batchdoc_and_check(
            full_collection, multiple_docs, topk_value, operator="insert"
        )

        query_result = full_collection.query(topk=topk_value)
        assert len(query_result) == topk_value
        single_querydoc_check(multiple_docs, query_result, full_collection)

    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_include_vector(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        query_result = full_collection.query(include_vector=True)
        assert len(query_result) > 0
        single_querydoc_check(
            multiple_docs, query_result, full_collection, id_include_vector=1
        )

    @pytest.mark.parametrize("output_fields", [["int32_field", "int64_field"]])
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_with_output_fields(
        self, full_collection: Collection, doc_num, output_fields
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        query_result = full_collection.query(output_fields=output_fields)
        assert len(query_result) > 0
        for doc in query_result:
            field_names = doc.field_names()
            assert field_names == output_fields

    @pytest.mark.parametrize(
        "filter",
        [
            "int32_field >= 10 and int32_field <= 20",
            "int32_field = 3 and int32_field = 8",
        ],
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_empty_result(self, full_collection: Collection, doc_num, filter):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        result = full_collection.query(filter=filter)
        assert len(result) == 0

    @pytest.mark.parametrize(
        "full_schema_new",
        [(True, True, HnswIndexParam()), (False, True, FlatIndexParam())],
        indirect=True,
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_by_id(
        self, full_collection_new: Collection, doc_num, full_schema_new
    ):
        multiple_docs = [
            generate_doc(i, full_collection_new.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(
            full_collection_new, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            query_result = full_collection_new.query(VectorQuery(field_name=v, id="1"))
            assert len(query_result) > 0
            query_doc = full_collection_new.fetch(ids=["1"])
            query_vector = query_doc["1"].vector(v)
            single_querydoc_check(
                multiple_docs,
                query_result,
                full_collection_new,
                is_by_vector=1,
                query_vector=query_vector,
                data_type=k,
                vector_name=v,
            )

    @pytest.mark.parametrize("doc_num", [10])
    def test_query_by_id_ivf(self, full_collection_ivf: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)
        ]
        batchdoc_and_check_ivf(
            full_collection_ivf, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field"]:
                query_result = full_collection_ivf.query(
                    VectorQuery(field_name=v, id="1")
                )
                assert len(query_result) > 0
                query_doc = full_collection_ivf.fetch(ids=["1"])
                query_vector = query_doc["1"].vector(v)
                single_querydoc_check(
                    multiple_docs,
                    query_result,
                    full_collection_ivf,
                    is_by_vector=1,
                    query_vector=query_vector,
                    data_type=k,
                    vector_name=v,
                )

    @pytest.mark.parametrize(
        "full_schema_new",
        [(True, True, HnswIndexParam()), (False, True, FlatIndexParam())],
        indirect=True,
    )
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("topk", [None, 1024])
    @pytest.mark.parametrize("filter", [None, "int32_field >= 3 and int32_field <= 7"])
    def test_query_by_vector(
        self, full_collection_new: Collection, doc_num, full_schema_new, topk, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection_new.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(
            full_collection_new, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_new.schema
            )
            query_vector = doc_vectors[v]
            if topk and filter:
                query_result = full_collection_new.query(
                    filter=filter,
                    vectors=VectorQuery(field_name=v, vector=query_vector),
                    topk=topk,
                )
            elif topk and not filter:
                query_result = full_collection_new.query(
                    VectorQuery(field_name=v, vector=query_vector), topk=topk
                )
            elif not topk and filter:
                query_result = full_collection_new.query(
                    filter=filter,
                    vectors=VectorQuery(field_name=v, vector=query_vector),
                )
            else:
                query_result = full_collection_new.query(
                    VectorQuery(field_name=v, vector=query_vector)
                )
            assert len(query_result) > 0, (
                f"Expected at least 1 query result, but got {len(query_result)}"
            )
            single_querydoc_check(
                multiple_docs,
                query_result,
                full_collection_new,
                is_by_vector=1,
                query_vector=query_vector,
                data_type=k,
                vector_name=v,
            )

    @pytest.mark.parametrize("doc_num", [10])
    def test_query_by_vector_ivf(self, full_collection_ivf: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)
        ]
        batchdoc_and_check_ivf(
            full_collection_ivf, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            if v in ["vector_fp16_field", "vector_fp32_field"]:
                doc_fields, doc_vectors = generate_vectordict_random(
                    full_collection_ivf.schema
                )
                query_vector = doc_vectors[v]
                query_result = full_collection_ivf.query(
                    VectorQuery(field_name=v, vector=query_vector),
                    topk=1024,
                )
                assert len(query_result) > 0, (
                    f"Expected at least 1 query result, but got {len(query_result)}"
                )
                single_querydoc_check(
                    multiple_docs,
                    query_result,
                    full_collection_ivf,
                    is_by_vector=1,
                    query_vector=query_vector,
                    data_type=k,
                    vector_name=v,
                )

    @pytest.mark.parametrize("doc_num", [10])
    def test_query_multivector_rrf(self, full_collection: Collection, doc_num):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
        single_query_results = {}
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            single_query_results[v] = full_collection.query(
                VectorQuery(field_name=v, vector=doc_vectors[v])
            )
        expected_rrf_scores = calculate_multi_vector_rrf_scores(single_query_results)
        multi_query_vectors = []
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v]))

        rrf_reranker = RrfReRanker(topn=3)
        multi_query_result = full_collection.query(
            vectors=multi_query_vectors,
            reranker=rrf_reranker,
        )
        assert len(multi_query_result) > 0, (
            f"Expected at least 1 result, but got {len(multi_query_result)}"
        )

        multi_querydoc_check(multiple_docs, multi_query_result, full_collection)

        prev_score = float("inf")
        for i, doc in enumerate(multi_query_result):
            doc_id = doc.id
            assert doc_id in expected_rrf_scores, (
                f"Document {doc_id} should be in expected RRF scores"
            )
            expected_score = expected_rrf_scores[doc_id]
            actual_score = doc.score
            assert abs(actual_score - expected_score) < 1e-10, (
                f"RRF score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}"
            )
            assert doc.score <= prev_score, (
                f"Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}"
            )
            prev_score = doc.score

    @pytest.mark.parametrize(
        "weights",
        [
            {
                "vector_fp32_field": 0.3,
                "vector_fp16_field": 0.2,
                "vector_int8_field": 0.3,
                "sparse_vector_fp32_field": 0.1,
                "sparse_vector_fp16_field": 0.1,
            }
        ],
    )
    @pytest.mark.parametrize(
        "metric_type", [MetricType.L2, MetricType.IP, MetricType.COSINE]
    )
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_multivector_weighted(
        self, full_collection: Collection, doc_num, weights, metric_type
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)

        weighted_reranker = WeightedReRanker(
            topn=3, weights=weights, metric=MetricType.IP
        )

        single_query_results = {}
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            single_query_results[v] = full_collection.query(
                VectorQuery(field_name=v, vector=doc_vectors[v])
            )
        expected_weighted_scores = calculate_multi_vector_weighted_scores(
            single_query_results, weights, MetricType.IP
        )

        multi_query_vectors = []
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            multi_query_vectors.append(VectorQuery(field_name=v, vector=doc_vectors[v]))

        multi_query_result = full_collection.query(
            vectors=multi_query_vectors,
            reranker=weighted_reranker,
        )
        assert len(multi_query_result) > 0, (
            f"Expected at least 1 result, but got {len(multi_query_result)}"
        )

        multi_querydoc_check(multiple_docs, multi_query_result, full_collection)

        prev_score = float("inf")
        for i, doc in enumerate(multi_query_result):
            doc_id = doc.id
            assert doc_id in expected_weighted_scores, (
                f"Document {doc_id} should be in expected  scores"
            )
            expected_score = expected_weighted_scores[doc_id]
            actual_score = doc.score
            assert abs(actual_score - expected_score) < 1e-10, (
                f"score mismatch for document {doc_id}: expected {expected_score}, got {actual_score}"
            )
            assert doc.score <= prev_score, (
                f"Scores should be in descending order. Current: {doc.score}, Previous: {prev_score}"
            )
            prev_score = doc.score

    @pytest.mark.parametrize("topk", [5])
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    def test_query_consistency(
        self, full_collection: Collection, filter, doc_num, topk
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        results = []
        for i in range(5):
            query_result = full_collection.query(filter=filter, topk=topk)
            single_querydoc_check(multiple_docs, query_result, full_collection)

            results.append(query_result)
        assert len(results) == 5
        expected_count = len(results[0])
        for i, result in enumerate(results):
            assert len(result) == expected_count

        expected_ids = set(doc.id for doc in results[0])
        for i, result in enumerate(results):
            result_ids = set(doc.id for doc in result)
            assert result_ids == expected_ids

        for i, result in enumerate(results):
            result_ids = [doc.id for doc in result]
            expected_sorted_ids = sorted(result_ids, key=lambda x: int(x))
            assert result_ids == expected_sorted_ids

    @pytest.mark.parametrize("ef", [0, 100, 1024, 2048])
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("topk", [1024])
    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    def test_query_vector_with_HnswQueryParam_valid(
        self,
        full_collection_new: Collection,
        doc_num,
        full_schema_new,
        topk,
        filter,
        ef,
    ):
        multiple_docs = [
            generate_doc(i, full_collection_new.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(
            full_collection_new, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_new.schema
            )
            query_vector = doc_vectors[v]
            query_result = full_collection_new.query(
                filter=filter,
                vectors=VectorQuery(
                    field_name=v, vector=query_vector, param=HnswQueryParam(ef=ef)
                ),
                topk=topk,
            )
            assert len(query_result) > 0, (
                f"Expected at least 1 query result, but got {len(query_result)}"
            )
            single_querydoc_check(
                multiple_docs,
                query_result,
                full_collection_new,
                is_by_vector=1,
                query_vector=query_vector,
                data_type=k,
                vector_name=v,
            )

    @pytest.mark.parametrize("ef", [None, "invalid", 10.5])
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("topk", [10])
    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    def test_query_vector_with_HnswQueryParam_invalid(
        self, full_collection: Collection, doc_num, topk, ef, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            doc_fields, doc_vectors = generate_vectordict_random(full_collection.schema)
            query_vector = doc_vectors[v]
            with pytest.raises(Exception) as exc_info:
                full_collection.query(
                    filter=filter,
                    vectors=VectorQuery(
                        field_name=v, vector=query_vector, param=HnswQueryParam(ef=ef)
                    ),
                    topk=topk,
                )
            assert INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG in str(exc_info.value)

    @pytest.mark.parametrize("nprobe", [1, 10, 100, 2048])
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("topk", [10])
    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    def test_query_vector_with_IVFQueryParam_valid(
        self, full_collection_ivf: Collection, nprobe, doc_num, topk, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)
        ]
        batchdoc_and_check_ivf(
            full_collection_ivf, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_ivf.schema
            )
            if v in ["vector_fp32_field"]:
                query_vector = doc_vectors[v]

                query_result = full_collection_ivf.query(
                    filter=filter,
                    vectors=VectorQuery(
                        field_name=v,
                        vector=query_vector,
                        param=IVFQueryParam(nprobe=nprobe),
                    ),
                    topk=topk,
                )
                assert len(query_result) > 0
                single_querydoc_check(
                    multiple_docs,
                    query_result,
                    full_collection_ivf,
                    is_by_vector=1,
                    query_vector=query_vector,
                    data_type=k,
                    vector_name=v,
                )

    @pytest.mark.parametrize("nprobe", [None, 10.5])
    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize("topk", [10])
    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    def test_query_vector_with_IVFQueryParam_invalid(
        self, full_collection_ivf: Collection, nprobe, doc_num, topk, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection_ivf.schema) for i in range(doc_num)
        ]
        batchdoc_and_check_ivf(
            full_collection_ivf, multiple_docs, doc_num, operator="insert"
        )
        for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
            doc_fields, doc_vectors = generate_vectordict_random(
                full_collection_ivf.schema
            )
            if v in ["vector_fp32_field"]:
                print("v:\n")
                print(v)
                query_vector = doc_vectors[v]
                with pytest.raises(Exception) as exc_info:
                    full_collection_ivf.query(
                        # filter=filter,
                        vectors=VectorQuery(
                            field_name=v,
                            vector=query_vector,
                            param=IVFQueryParam(nprobe=nprobe),
                        ),
                        topk=topk,
                    )
                assert INCOMPATIBLE_CONSTRUCTOR_ERROR_MSG in str(exc_info.value)

    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_vector_with_param_invalid(
        self, full_collection: Collection, doc_num, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        with pytest.raises(Exception) as exc_info:
            for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
                doc_fields, doc_vectors = generate_vectordict_random(
                    full_collection.schema
                )
                query_vector = doc_vectors[v]
                if v in ["vector_fp16_field", "vector_fp32_field"]:
                    full_collection.query(
                        filter=filter,
                        vectors=VectorQuery(
                            field_name=v, vector=query_vector, param=HnswIndexParam()
                        ),
                    )
        assert INCOMPATIBLE_FUNCTION_ERROR_MSG in str(exc_info.value)

    @pytest.mark.parametrize("doc_num", [10])
    @pytest.mark.parametrize(
        "test_case_name,vector_query,expected_error_msg",
        [
            (
                "Non-existent vector field name",
                lambda ref_dense_vector: VectorQuery(
                    field_name="nonexistent_vector", vector=ref_dense_vector
                ),
                "Expected exception for non-existent vector field name",
            ),
            (
                "Invalid vector data type for dense vector (string instead of list)",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field", vector="invalid_vector_data"
                ),
                "Expected exception for invalid dense vector data type",
            ),
            (
                "Invalid vector data type for sparse vector (list instead of dict)",
                lambda ref_dense_vector: VectorQuery(
                    field_name="sparse_fp32", vector=[1.0, 2.0, 3.0]
                ),
                "Expected exception for invalid sparse vector data type",
            ),
            (
                "Empty vector data for dense vector",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field", vector=[]
                ),
                "Expected exception for empty dense vector data",
            ),
            (
                "Invalid dimension for dense vector",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field", vector=[1.0, 2.0]
                ),  # Only 2 dimensions instead of 128
                "Expected exception for invalid dense vector dimension",
            ),
            (
                "Non-existent document ID for by_id query",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field", id="999"
                ),  # Non-existent ID
                "Expected exception for non-existent document ID",
            ),
            (
                "Both vector and id specified (invalid combination)",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field", vector=ref_dense_vector, id="5"
                ),
                "Expected exception for specifying both vector and id",
            ),
            (
                "Neither vector nor id specified",
                lambda ref_dense_vector: VectorQuery(
                    field_name="vector_fp32_field"
                ),  # Neither vector nor id
                "Expected exception for specifying neither vector nor id",
            ),
        ],
    )
    def test_query_vector_with_vectors_invalid(
        self,
        full_collection: Collection,
        doc_num,
        test_case_name,
        vector_query,
        expected_error_msg,
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")
        ref_doc_result = full_collection.fetch(ids=["5"])
        assert "5" in ref_doc_result
        ref_doc = ref_doc_result["5"]
        ref_dense_vector = ref_doc.vector("vector_fp32_field")

        with pytest.raises(Exception) as exc_info:
            full_collection.query(vectors=[vector_query(ref_dense_vector)])
        assert exc_info.value is not None, expected_error_msg

    @pytest.mark.parametrize("filter", ["int32_field >= 3 and int32_field <= 7"])
    @pytest.mark.parametrize("doc_num", [10])
    def test_query_invalid_param_incompatible_type(
        self, full_collection: Collection, doc_num, filter
    ):
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(doc_num)
        ]
        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        with pytest.raises(Exception) as exc_info:
            for k, v in DEFAULT_VECTOR_FIELD_NAME.items():
                doc_fields, doc_vectors = generate_vectordict_random(
                    full_collection.schema
                )
                query_vector = doc_vectors[v]
                full_collection.query(
                    filter=filter,
                    vectors=VectorQuery(field_name=v, vector=query_vector),
                    param=HnswIndexParam(),
                    topk=3,
                )

        assert "query() got an unexpected keyword argument 'param'" in str(
            exc_info.value
        )


class TestRRFScoreCalculation:
    class MockDoc:
        def __init__(self, id, score=0.0):
            self._id = id
            self._score = score

        @property
        def id(self):
            return self._id

        @property
        def score(self):
            return self._score

        @score.setter
        def score(self, score):
            self._score = score

    def test_rrf_score_calculation_formula(self):
        k = 60

        assert abs(calculate_rrf_score(0, k) - 1.0 / 61) < 1e-10, (
            "RRF score for rank 0 should be 1/61"
        )
        assert abs(calculate_rrf_score(1, k) - 1.0 / 62) < 1e-10, (
            "RRF score for rank 1 should be 1/62"
        )
        assert abs(calculate_rrf_score(2, k) - 1.0 / 63) < 1e-10, (
            "RRF score for rank 2 should be 1/63"
        )
        assert abs(calculate_rrf_score(10, k) - 1.0 / 71) < 1e-10, (
            "RRF score for rank 10 should be 1/71"
        )

        k = 10
        assert abs(calculate_rrf_score(0, k) - 1.0 / 11) < 1e-10, (
            "RRF score for rank 0 with k=10 should be 1/11"
        )
        assert abs(calculate_rrf_score(1, k) - 1.0 / 12) < 1e-10, (
            "RRF score for rank 1 with k=10 should be 1/12"
        )

    def test_multi_vector_rrf_scores(self):
        query1_results = [self.MockDoc("1"), self.MockDoc("2"), self.MockDoc("3")]
        query2_results = [self.MockDoc("3"), self.MockDoc("1"), self.MockDoc("4")]
        query3_results = [self.MockDoc("2"), self.MockDoc("4"), self.MockDoc("5")]
        query_results = {
            "vector1": query1_results,
            "vector2": query2_results,
            "vector3": query3_results,
        }
        rrf_scores = calculate_multi_vector_rrf_scores(query_results, k=60)

        expected_doc1_score = 1.0 / 61 + 1.0 / 62
        assert abs(rrf_scores["1"] - expected_doc1_score) < 1e-10, (
            f"RRF score for doc1 mismatch: expected {expected_doc1_score}, got {rrf_scores['1']}"
        )
        expected_doc2_score = 1.0 / 62 + 1.0 / 61
        assert abs(rrf_scores["2"] - expected_doc2_score) < 1e-10, (
            f"RRF score for doc2 mismatch: expected {expected_doc2_score}, got {rrf_scores['2']}"
        )
        expected_doc3_score = 1.0 / 63 + 1.0 / 61
        assert abs(rrf_scores["3"] - expected_doc3_score) < 1e-10, (
            f"RRF score for doc3 mismatch: expected {expected_doc3_score}, got {rrf_scores['3']}"
        )
        expected_doc4_score = 1.0 / 63 + 1.0 / 62
        assert abs(rrf_scores["4"] - expected_doc4_score) < 1e-10, (
            f"RRF score for doc4 mismatch: expected {expected_doc4_score}, got {rrf_scores['4']}"
        )

        expected_doc5_score = 1.0 / 63
        assert abs(rrf_scores["5"] - expected_doc5_score) < 1e-10, (
            f"RRF score for doc5 mismatch: expected {expected_doc5_score}, got {rrf_scores['5']}"
        )
        sorted_scores = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
        expected_order = ["1", "2", "3", "4", "5"]
        actual_order = [item[0] for item in sorted_scores]
        assert actual_order == expected_order, (
            f"RRF score ranking mismatch: expected {expected_order}, got {actual_order}"
        )


class TestCollectionConcurrencyOperations:
    @pytest.mark.parametrize("doc_num", [10])
    def test_concurrent_insert_update_upsert_query(
        self, full_collection: Collection, doc_num
    ):
        import threading

        results = []
        errors = []
        multiple_docs = [
            generate_doc(i, full_collection.schema) for i in range(1000, 1010)
        ]

        batchdoc_and_check(full_collection, multiple_docs, doc_num, operator="insert")

        def insert_operation(thread_id):
            try:
                multiple_docs = [
                    generate_doc(i, full_collection.schema)
                    for i in range(thread_id, thread_id + 5)
                ]
                result = full_collection.insert(multiple_docs)
                results.append(("insert", thread_id, len(result)))
            except Exception as e:
                errors.append(("insert", thread_id, str(e)))

        def update_operation(thread_id):
            try:
                multiple_docs = [
                    generate_doc_random(i, full_collection.schema)
                    for i in range(1000, 1001)
                ]
                result = full_collection.update(multiple_docs)
                results.append(("update", thread_id, len(result)))
            except Exception as e:
                errors.append(("update", thread_id, str(e)))

        def upsert_operation(thread_id):
            try:
                multiple_docs = [
                    generate_doc(i, full_collection.schema)
                    for i in range(thread_id, thread_id + 5)
                ]
                result = full_collection.upsert(multiple_docs)
                results.append(("upsert", thread_id, len(result)))
            except Exception as e:
                errors.append(("upsert", thread_id, str(e)))

        def query_operation(thread_id):
            try:
                if thread_id % 3 == 0:
                    result = full_collection.query(filter="int32_field > 1", topk=5)
                elif thread_id % 3 == 1:
                    result = full_collection.query(filter="bool_field = true", topk=3)
                else:
                    query_vector = [0.1] * 128
                    result = full_collection.query(
                        VectorQuery(
                            field_name="vector_fp32_field", vector=query_vector
                        ),
                        topk=3,
                    )

                results.append(("query", thread_id, len(result)))
            except Exception as e:
                errors.append(("query", thread_id, str(e)))

        def delete_operation(thread_id):
            try:
                # Delete some existing documents
                delete_ids = (
                    [f"{thread_id + 1}", f"{thread_id + 2}"]
                    if thread_id < 5
                    else [f"{thread_id % 5 + 1}"]
                )
                result = full_collection.delete(delete_ids)
                results.append(("delete", thread_id, len(result)))
            except Exception as e:
                errors.append(("delete", thread_id, str(e)))

        threads = []
        for i in range(1):
            thread = threading.Thread(target=insert_operation, args=(i,))
            threads.append(thread)
            thread.start()
        for i in range(1):
            thread = threading.Thread(target=update_operation, args=(i,))
            threads.append(thread)
            thread.start()
        for i in range(1):
            thread = threading.Thread(target=upsert_operation, args=(i,))
            threads.append(thread)
            thread.start()
        for i in range(1):
            thread = threading.Thread(target=query_operation, args=(i,))
            threads.append(thread)
            thread.start()
        for i in range(1):
            thread = threading.Thread(target=delete_operation, args=(i,))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        insert_results = [r for r in results if r[0] == "insert"]
        update_results = [r for r in results if r[0] == "update"]
        upsert_results = [r for r in results if r[0] == "upsert"]
        query_results = [r for r in results if r[0] == "query"]
        delete_results = [r for r in results if r[0] == "delete"]

        assert (
            len(insert_results)
            + len(update_results)
            + len(upsert_results)
            + len(query_results)
            + len(delete_results)
            > 0
        ), f"No operations succeeded. Errors: {errors}"

        critical_errors = [
            e for e in errors if "critical" in e[2].lower() or "fatal" in e[2].lower()
        ]
        assert len(critical_errors) == 0, f"Critical errors occurred: {critical_errors}"


================================================
FILE: python/tests/detail/test_collection_exception.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import logging
import pytest
import numpy as np
import zvec

from zvec import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    DataType,
    Collection,
    Doc,
    FieldSchema,
    VectorSchema,
    VectorQuery,
)


class TestCollectionExceptionHandling:
    @pytest.fixture(scope="function")
    def test_collection(self, tmp_path_factory):
        """Fixture to create a test collection"""
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
                FieldSchema("weight", DataType.FLOAT, nullable=True),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                ),
                VectorSchema(
                    "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
                ),
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert coll is not None, "Failed to create and open collection"

        yield coll

        # Clean up
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")

    def test_create_and_open_missing_path(self, tmp_path_factory):
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                schema=collection_schema, option=collection_option
            )
        assert exc_info.value is not None, (
            "Expected exception for missing path parameter"
        )

    def test_create_and_open_missing_schema(self, tmp_path_factory):
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        with pytest.raises(Exception) as exc_info:
            coll = zvec.create_and_open(
                path=str(collection_path), option=collection_option
            )
        assert exc_info.value is not None, (
            "Expected exception for missing schema parameter"
        )

    def test_open_missing_path(self):
        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        with pytest.raises(Exception) as exc_info:
            coll = zvec.open(option=collection_option)
        assert exc_info.value is not None, (
            "Expected exception for missing path parameter"
        )

    def test_insert_missing_docs(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.insert()
        assert exc_info.value is not None, (
            "Expected exception for missing docs parameter"
        )

    def test_update_missing_docs(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.update()
        assert exc_info.value is not None, (
            "Expected exception for missing docs parameter"
        )

    def test_upsert_missing_docs(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.upsert()
        assert exc_info.value is not None, (
            "Expected exception for missing docs parameter"
        )

    def test_delete_missing_ids(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.delete()
        assert exc_info.value is not None, (
            "Expected exception for missing ids parameter"
        )

    def test_fetch_missing_ids(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.fetch()
        assert exc_info.value is not None, (
            "Expected exception for missing ids parameter"
        )

    def test_query_missing_vectorquery_field_name(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            result = test_collection.query(vectors=[VectorQuery()])
        assert exc_info.value is not None, (
            "Expected exception for missing VectorQuery field_name parameter"
        )

    def test_add_column_missing_field_schema(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            test_collection.add_column()
        assert exc_info.value is not None, (
            "Expected exception for missing field_schema parameter"
        )

    def test_alter_column_missing_old_name(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            test_collection.alter_column(new_name="new_name")
        assert exc_info.value is not None, (
            "Expected exception for missing old_name parameter"
        )

    def test_alter_column_missing_new_name(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            test_collection.alter_column(old_name="old_name")
        assert exc_info.value is not None, (
            "Expected exception for missing new_name parameter"
        )

    def test_drop_column_missing_field_name(self, test_collection: Collection):
        with pytest.raises(Exception) as exc_info:
            test_collection.drop_column()
        assert exc_info.value is not None, (
            "Expected exception for missing field_name parameter"
        )

    def test_invalid_parameter_types(self, test_collection: Collection):
        # This test depends on specific implementation details
        # Generally, we would expect TypeErrors or similar exceptions
        pass

    def test_missing_required_parameters(self, test_collection: Collection):
        # This test depends on specific implementation details
        # Generally, we would expect TypeErrors or similar exceptions
        pass

    def test_empty_collection_operations(self, tmp_path_factory):
        collection_schema = zvec.CollectionSchema(
            name="empty_test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "empty_test_collection"

        coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert coll is not None, "Failed to create and open collection"

        # Test fetch on empty collection
        result = coll.fetch(["1"])
        assert len(result) >= 0  # May be empty or have special handling

        # Test query on empty collection
        result = coll.query()
        assert len(result) == 0

        # Test update on empty collection
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test"},
            vectors={"dense": np.random.random(128).tolist()},
        )

        result = coll.update(doc)
        # Should handle gracefully, possibly with NOT_FOUND status

        # Clean up
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")

    def test_resource_management(self, test_collection: Collection):
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test", "weight": 80.5},
            vectors={
                "dense": np.random.random(128).tolist(),
                "sparse": {1: 1.0, 2: 2.0},
            },
        )

        # Insert
        result = test_collection.insert(doc)
        assert result.ok()

        # Fetch
        result = test_collection.fetch(["1"])
        assert len(result) == 1

        # Query
        result = test_collection.query()
        assert len(result) >= 0

        # Update
        result = test_collection.update(doc)
        assert result.ok()

        # Delete
        result = test_collection.delete("1")
        assert result.ok()

    def test_exception_resource_cleanup(self, test_collection: Collection):
        # This test would need to simulate exception conditions
        # which is difficult without specific failure injection points
        pass


================================================
FILE: python/tests/detail/test_collection_open.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import threading
import numpy as np

from fixture_helper import *

COLLECTION_OPTION_TEST_CASES_VALID = [
    # (read_only, enable_mmap, description)
    (False, True, "Read-write with mmap enabled"),
    (False, False, "Read-write with mmap disabled"),
    (True, True, "Read-only with mmap enabled"),
    (True, False, "Read-only with mmap disabled"),
]

# Test data for invalid paths
INVALID_PATH_LIST = [
    "/nonexistent/directory/test_collection",
    "invalid:path",
    "",  # Empty path
]


@pytest.fixture(scope="session")
def collection_schema():
    return zvec.CollectionSchema(
        name="test_collection",
        fields=[
            FieldSchema(
                "id",
                DataType.INT64,
                nullable=False,
                index_param=InvertIndexParam(enable_range_optimization=True),
            ),
            FieldSchema(
                "name", DataType.STRING, nullable=False, index_param=InvertIndexParam()
            ),
            FieldSchema(
                "weight", DataType.FLOAT, nullable=False, index_param=InvertIndexParam()
            ),
        ],
        vectors=[
            VectorSchema(
                "dense",
                DataType.VECTOR_FP32,
                dimension=128,
                index_param=HnswIndexParam(),
            ),
            VectorSchema(
                "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
            ),
        ],
    )


@pytest.fixture
def single_doc():
    id = 0
    return Doc(
        id=f"{id}",
        fields={"id": id, "name": "test"},
        vectors={
            "dense": [id + 0.1] * 128,
        },
    )


@pytest.fixture(scope="function")
def test_collection(
    tmp_path_factory, collection_schema, collection_option
) -> Generator[Any, Any, Collection]:
    temp_dir = tmp_path_factory.mktemp("zvec")
    collection_path = temp_dir / "test_collection"

    coll = zvec.create_and_open(
        path=str(collection_path), schema=collection_schema, option=collection_option
    )

    assert coll is not None, "Failed to create and open collection"
    assert coll.path == str(collection_path)
    assert coll.schema.name == collection_schema.name
    assert list(coll.schema.fields) == list(collection_schema.fields)
    assert list(coll.schema.vectors) == list(collection_schema.vectors)
    assert coll.option.read_only == collection_option.read_only
    assert coll.option.enable_mmap == collection_option.enable_mmap

    try:
        yield coll
    finally:
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")


class TestCollectionOpen:
    def test_open_basic_functionality(
        self, tmp_path_factory, collection_schema, collection_option
    ):
        import sys
        import time
        import os

        # Create unique temp directory
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        # Ensure the path exists
        collection_path_str = str(collection_path)
        print(f"DEBUG: Collection path: {collection_path_str}")
        print(f"DEBUG: Temp directory exists: {temp_dir.exists()}")

        # Create and open collection first
        created_coll = zvec.create_and_open(
            path=collection_path_str, schema=collection_schema, option=collection_option
        )

        assert created_coll is not None, (
            f"Failed to create collection, returned None instead of valid Collection object. Path: {collection_path_str}"
        )
        assert created_coll.path == collection_path_str, (
            f"Collection path mismatch. Expected: {collection_path_str}, Actual: {created_coll.path}"
        )
        assert created_coll.schema.name == "test_collection", (
            f"Collection schema name mismatch. Expected: test_collection, Actual: {created_coll.schema.name}"
        )

        # Insert multiple documents to verify persistence
        docs = []
        for i in range(3):
            doc = Doc(
                id=f"{i}",
                fields={"id": i, "name": f"test_{i}", "weight": float(i * 10)},
                vectors={
                    "dense": [float(j + i) for j in range(128)],
                    "sparse": {j: float(j + i) for j in range(5)},
                },
            )
            docs.append(doc)

        result = created_coll.insert(docs)
        assert len(result) == 3, f"Expected 3 insertion results, but got {len(result)}"
        for i, res in enumerate(result):
            assert res.ok(), (
                f"Insertion result {i} is not OK. Status code: {res.code()}, Message: {res.message()}"
            )

        # Verify documents were inserted using fetch interface
        fetched_docs_after_insert = created_coll.fetch(["0", "1", "2"])
        assert len(fetched_docs_after_insert) == 3, (
            f"Expected 3 fetched documents after insertion, but got {len(fetched_docs_after_insert)}"
        )
        assert "0" in fetched_docs_after_insert, (
            "Document with ID '0' not found in fetched results after insertion"
        )
        assert "1" in fetched_docs_after_insert, (
            "Document with ID '1' not found in fetched results after insertion"
        )
        assert "2" in fetched_docs_after_insert, (
            "Document with ID '2' not found in fetched results after insertion"
        )

        # Verify fetched document content after insertion
        for i in range(3):
            doc = fetched_docs_after_insert[f"{i}"]
            assert doc is not None, (
                f"Fetched document with ID '{i}' is None after insertion"
            )
            assert doc.id == f"{i}", (
                f"Document ID mismatch for document '{i}' after insertion. Expected: {i}, Actual: {doc.id}"
            )
            assert doc.field("id") == i, (
                f"Document id field mismatch for document '{i}' after insertion. Expected: {i}, Actual: {doc.field('id')}"
            )
            assert doc.field("name") == f"test_{i}", (
                f"Document name field mismatch for document '{i}' after insertion. Expected: test_{i}, Actual: {doc.field('name')}"
            )
            assert doc.field("weight") == float(i * 10), (
                f"Document weight field mismatch for document '{i}' after insertion. Expected: {float(i * 10)}, Actual: {doc.field('weight')}"
            )

            # Verify vector access after insertion
            assert doc.vector("dense") is not None, (
                f"Document {i} should have dense vector after insertion"
            )
            assert doc.vector("sparse") is not None, (
                f"Document {i} should have sparse vector after insertion"
            )

            # Verify vector types after insertion
            assert isinstance(doc.vector("dense"), list), (
                f"Document {i} dense vector should be dict after insertion, got {type(doc.vector('dense'))}"
            )
            assert isinstance(doc.vector("sparse"), dict), (
                f"Document {i} sparse vector should be dict after insertion, got {type(doc.vector('sparse'))}"
            )

        # Verify documents were inserted using stats
        stats = created_coll.stats
        assert stats is not None, "Collection stats should not be None"
        assert stats.doc_count == 3, (
            f"Document count mismatch after insertion. Expected: 3, Actual: {stats.doc_count}"
        )

        # Store the collection path before cleanup
        collection_path = created_coll.path

        # Clean up the created collection reference
        del created_coll

        # Wait and verify the path still exists
        print(f"DEBUG: Collection path after destroy: {collection_path}")
        print(f"DEBUG: Path exists after destroy: {os.path.exists(collection_path)}")

        # Now open the existing collection
        try:
            print(f"DEBUG: Path exists before open: {os.path.exists(collection_path)}")

            # List contents of parent directory for debugging
            parent_dir = os.path.dirname(collection_path)
            if os.path.exists(parent_dir):
                print(f"DEBUG: Parent directory contents: {os.listdir(parent_dir)}")

            opened_coll = zvec.open(path=collection_path, option=collection_option)

            assert opened_coll is not None, (
                f"Failed to open existing collection at path: {collection_path}. Returned None instead of valid Collection object"
            )
            assert opened_coll.path == collection_path, (
                f"Opened collection path mismatch. Expected: {collection_path}, Actual: {opened_coll.path}"
            )
            assert opened_coll.schema.name == "test_collection", (
                f"Opened collection schema name mismatch. Expected: test_collection, Actual: {opened_coll.schema.name}"
            )

            # Check reference count of opened collection
            opened_ref_count = sys.getrefcount(opened_coll)
            print(f"DEBUG: Reference count of opened collection: {opened_ref_count}")

            # Verify data persistence
            # Verify data persistence using fetch interface
            fetched_docs = opened_coll.fetch(["0", "1", "2"])
            assert len(fetched_docs) == 3, (
                f"Expected 3 fetched documents after reopening, but got {len(fetched_docs)}"
            )
            assert "0" in fetched_docs, (
                "Document with ID '0' not found in fetched results after reopening"
            )
            assert "1" in fetched_docs, (
                "Document with ID '1' not found in fetched results after reopening"
            )
            assert "2" in fetched_docs, (
                "Document with ID '2' not found in fetched results after reopening"
            )

            # Verify fetched document content after reopening collection
            for i in range(3):
                doc = fetched_docs[f"{i}"]
                assert doc is not None, (
                    f"Fetched document with ID '{i}' is None after reopening collection"
                )
                assert doc.id == f"{i}", (
                    f"Document ID mismatch for document '{i}' after reopening. Expected: {i}, Actual: {doc.id}"
                )
                assert doc.field("id") == i, (
                    f"Document id field mismatch for document '{i}' after reopening. Expected: {i}, Actual: {doc.field('id')}"
                )
                assert doc.field("name") == f"test_{i}", (
                    f"Document name field mismatch for document '{i}' after reopening. Expected: test_{i}, Actual: {doc.field('name')}"
                )
                assert doc.field("weight") == float(i * 10), (
                    f"Document weight field mismatch for document '{i}' after reopening. Expected: {float(i * 10)}, Actual: {doc.field('weight')}"
                )

                # Verify vector access after reopening
                assert doc.vector("dense") is not None, (
                    f"Document {i} should have dense vector after reopening"
                )
                assert doc.vector("sparse") is not None, (
                    f"Document {i} should have sparse vector after reopening"
                )

                # Verify vector types after reopening
                assert isinstance(doc.vector("dense"), list), (
                    f"Document {i} dense vector should be dict after reopening, got {type(doc.vector('dense'))}"
                )
                assert isinstance(doc.vector("sparse"), dict), (
                    f"Document {i} sparse vector should be dict after reopening, got {type(doc.vector('sparse'))}"
                )

                # Verify score attribute exists
                assert hasattr(doc, "score"), (
                    f"Document {i} should have a score attribute after reopening"
                )
                assert isinstance(doc.score, (int, float)), (
                    f"Document {i} score should be numeric after reopening, got {type(doc.score)}"
                )
                # For fetch operations, score is typically 0.0
                assert doc.score == 0.0, (
                    f"Document {i} score should be 0.0 for fetch operation after reopening, but got {doc.score}"
                )

            # Test query functionality
            query_result = opened_coll.query(include_vector=True)
            assert len(query_result) == 3, (
                f"Expected 3 query results, but got {len(query_result)}"
            )

            # Verify query results have proper structure and content with detailed validation
            returned_doc_ids = set()
            for doc in query_result:
                # Verify basic document structure
                assert doc.id is not None, f"Query result document should have an ID"
                assert doc.id in ["0", "1", "2"], (
                    f"Query result document ID should be one of ['0', '1', '2'], but got {doc.id}"
                )
                returned_doc_ids.add(doc.id)

                # Verify field access
                assert doc.field("id") is not None, (
                    f"Document {doc.id} should have id field"
                )
                assert doc.field("name") is not None, (
                    f"Document {doc.id} should have name field"
                )
                assert doc.field("weight") is not None, (
                    f"Document {doc.id} should have weight field"
                )

                # Verify field values
                expected_id = int(doc.id)
                assert doc.field("id") == expected_id, (
                    f"Document {doc.id} id field mismatch. Expected: {expected_id}, Actual: {doc.field('id')}"
                )
                assert doc.field("name") == f"test_{expected_id}", (
                    f"Document {doc.id} name field mismatch. Expected: test_{expected_id}, Actual: {doc.field('name')}"
                )
                assert doc.field("weight") == float(expected_id * 10), (
                    f"Document {doc.id} weight field mismatch. Expected: {float(expected_id * 10)}, Actual: {doc.field('weight')}"
                )

                # Verify vector access
                assert doc.vector("dense") is not None, (
                    f"Document {doc.id} should have dense vector"
                )
                assert doc.vector("sparse") is not None, (
                    f"Document {doc.id} should have sparse vector"
                )

                # Verify vector types
                assert isinstance(doc.vector("dense"), list), (
                    f"Document {doc.id} dense vector should be list, got {type(doc.vector('dense'))}"
                )
                assert isinstance(doc.vector("sparse"), dict), (
                    f"Document {doc.id} sparse vector should be dict, got {type(doc.vector('sparse'))}"
                )

                # Verify score attribute exists
                assert hasattr(doc, "score"), (
                    f"Document {doc.id} should have a score attribute"
                )
                assert isinstance(doc.score, (int, float)), (
                    f"Document {doc.id} score should be numeric, got {type(doc.score)}"
                )

            # Verify all expected documents are returned
            expected_doc_ids = {"0", "1", "2"}
            assert returned_doc_ids == expected_doc_ids, (
                f"Query should return all expected documents. Expected: {expected_doc_ids}, Actual: {returned_doc_ids}"
            )

            # === Enhanced validation based on test_collection_dql_operations.py ===

            # Verify vector field names accessibility for all documents
            for doc in query_result:
                vector_names = doc.vector_names()
                expected_vector_names = {"dense", "sparse"}
                assert set(vector_names) == expected_vector_names, (
                    f"Document {doc.id} vector names mismatch. Expected: {expected_vector_names}, Actual: {set(vector_names)}"
                )

                # Verify all vector fields can be accessed
                for vector_name in expected_vector_names:
                    vector_data = doc.vector(vector_name)
                    assert vector_data is not None, (
                        f"Document {doc.id} should have accessible vector '{vector_name}'"
                    )
                    if vector_name == "dense":
                        assert isinstance(vector_data, list), (
                            f"Document {doc.id} vector '{vector_name}' should be list, got {type(vector_data)}"
                        )
                    else:
                        assert isinstance(vector_data, dict), (
                            f"Document {doc.id} vector '{vector_name}' should be dict, got {type(vector_data)}"
                        )

            # Test query with filter
            filtered_result = opened_coll.query(filter="id >= 1", include_vector=True)
            assert len(filtered_result) == 2, (
                f"Expected 2 filtered query results (id >= 1), but got {len(filtered_result)}"
            )

            # Verify filtered query results
            filtered_doc_ids = set()
            for doc in filtered_result:
                assert doc.id is not None, (
                    f"Filtered query result document should have an ID"
                )
                assert doc.id in ["1", "2"], (
                    f"Filtered query result document ID should be one of ['1', '2'], but got {doc.id}"
                )
                filtered_doc_ids.add(doc.id)

                # Verify filter condition is satisfied
                doc_id = int(doc.id)
                assert doc_id >= 1, (
                    f"Document {doc.id} should satisfy filter condition id >= 1"
                )

                # Verify document structure
                assert doc.field("id") is not None, (
                    f"Document {doc.id} should have id field"
                )
                assert doc.field("name") is not None, (
                    f"Document {doc.id} should have name field"
                )
                assert doc.field("weight") is not None, (
                    f"Document {doc.id} should have weight field"
                )

                # Verify field values
                assert doc.field("id") == doc_id, (
                    f"Document {doc.id} id field mismatch. Expected: {doc_id}, Actual: {doc.field('id')}"
                )
                assert doc.field("name") == f"test_{doc_id}", (
                    f"Document {doc.id} name field mismatch. Expected: test_{doc_id}, Actual: {doc.field('name')}"
                )
                assert doc.field("weight") == float(doc_id * 10), (
                    f"Document {doc.id} weight field mismatch. Expected: {float(doc_id * 10)}, Actual: {doc.field('weight')}"
                )

                # Verify vector access
                assert doc.vector("dense") is not None, (
                    f"Document {doc.id} should have dense vector"
                )
                assert doc.vector("sparse") is not None, (
                    f"Document {doc.id} should have sparse vector"
                )

                # Verify score attribute exists
                assert hasattr(doc, "score"), (
                    f"Document {doc.id} should have a score attribute"
                )
                assert isinstance(doc.score, (int, float)), (
                    f"Document {doc.id} score should be numeric, got {type(doc.score)}"
                )

            # Verify filtered documents
            expected_filtered_ids = {"1", "2"}
            assert filtered_doc_ids == expected_filtered_ids, (
                f"Filtered query should return expected documents. Expected: {expected_filtered_ids}, Actual: {filtered_doc_ids}"
            )

            # Test vector query functionality for dense vectors
            query_vector_dense = [0.1] * 128
            vector_query_result = opened_coll.query(
                VectorQuery(field_name="dense", vector=query_vector_dense)
            )
            assert len(vector_query_result) > 0, (
                f"Expected at least 1 vector query result, but got {len(vector_query_result)}"
            )

            # Verify vector query results structure
            for doc in vector_query_result[:3]:  # Check first 3 results
                assert doc.id is not None, (
                    f"Vector query result document should have an ID"
                )
                assert doc.id in ["0", "1", "2"], (
                    f"Vector query result document ID should be one of ['0', '1', '2'], but got {doc.id}"
                )

                # Verify document structure
                assert doc.field("id") is not None, (
                    f"Document {doc.id} should have id field"
                )
                assert doc.field("name") is not None, (
                    f"Document {doc.id} should have name field"
                )
                assert doc.field("weight") is not None, (
                    f"Document {doc.id} should have weight field"
                )

                # Verify vector access
                assert doc.vector("dense") is not None, (
                    f"Document {doc.id} should have dense vector"
                )
                assert doc.vector("sparse") is not None, (
                    f"Document {doc.id} should have sparse vector"
                )

                # Verify score attribute exists and is numeric
                assert hasattr(doc, "score"), (
                    f"Document {doc.id} should have a score attribute"
                )
                assert isinstance(doc.score, (int, float)), (
                    f"Document {doc.id} score should be numeric, got {type(doc.score)}"
                )

                # For dense vector queries, score should typically be non-negative (depending on metric)
                # Note: This may vary based on the metric type used
                assert doc.score >= 0 or doc.score < 0, (
                    f"Document {doc.id} score should be a valid number"
                )

            # Test vector query functionality for sparse vectors
            query_vector_sparse = {1: 1.0, 2: 2.0, 3: 3.0}
            sparse_vector_query_result = opened_coll.query(
                VectorQuery(field_name="sparse", vector=query_vector_sparse)
            )
            assert len(sparse_vector_query_result) > 0, (
                f"Expected at least 1 sparse vector query result, but got {len(sparse_vector_query_result)}"
            )

            # Verify sparse vector query results structure
            for doc in sparse_vector_query_result[:3]:  # Check first 3 results
                assert doc.id is not None, (
                    f"Sparse vector query result document should have an ID"
                )
                assert doc.id in ["0", "1", "2"], (
                    f"Sparse vector query result document ID should be one of ['0', '1', '2'], but got {doc.id}"
                )

                # Verify document structure
                assert doc.field("id") is not None, (
                    f"Document {doc.id} should have id field"
                )
                assert doc.field("name") is not None, (
                    f"Document {doc.id} should have name field"
                )
                assert doc.field("weight") is not None, (
                    f"Document {doc.id} should have weight field"
                )

                # Verify vector access
                assert doc.vector("dense") is not None, (
                    f"Document {doc.id} should have dense vector"
                )
                assert doc.vector("sparse") is not None, (
                    f"Document {doc.id} should have sparse vector"
                )

                # Verify score attribute exists and is numeric
                assert hasattr(doc, "score"), (
                    f"Document {doc.id} should have a score attribute"
                )
                assert isinstance(doc.score, (int, float)), (
                    f"Document {doc.id} score should be numeric, got {type(doc.score)}"
                )

            # Clean up
            if hasattr(opened_coll, "destroy") and opened_coll is not None:
                opened_coll.destroy()
                print("DEBUG: Opened collection destroyed successfully")

        except Exception as e:
            logging.error("Exception occurred: [{}]".format(e))
            raise e

    @pytest.mark.parametrize(
        "read_only,enable_mmap,description", COLLECTION_OPTION_TEST_CASES_VALID
    )
    @pytest.mark.parametrize("createAndopen_enable_mmap", [True, False])
    def test_open_with_different_collection_options_valid(
        self,
        tmp_path_factory,
        createAndopen_enable_mmap,
        read_only,
        enable_mmap,
        description,
        collection_schema,
    ):
        # Create collection with initial option
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        initial_option = CollectionOption(
            read_only=False, enable_mmap=createAndopen_enable_mmap
        )

        # Create and open collection first
        created_coll = zvec.create_and_open(
            path=str(collection_path), schema=collection_schema, option=initial_option
        )

        assert created_coll is not None, "Failed to create collection"

        # Clean up the created collection reference
        del created_coll

        # Now open with different options
        collection_option = CollectionOption(
            read_only=read_only, enable_mmap=enable_mmap
        )

        try:
            opened_coll = zvec.open(path=str(collection_path), option=collection_option)

            assert opened_coll is not None, (
                f"Failed to open collection with option: {description}. Returned None instead of valid Collection object. Path: {collection_path}"
            )
            assert opened_coll.path == str(collection_path), (
                f"Opened collection path mismatch. Expected: {collection_path}, Actual: {opened_coll.path}"
            )
            assert opened_coll.schema.name == collection_schema.name, (
                f"Opened collection schema name mismatch. Expected: {collection_schema.name}, Actual: {opened_coll.schema.name}"
            )
            assert opened_coll.option.read_only == read_only, (
                f"Opened collection read_only option mismatch. Expected: {read_only}, Actual: {opened_coll.option.read_only}"
            )
            assert opened_coll.option.enable_mmap == createAndopen_enable_mmap, (
                f"Opened collection mmap option mismatch. Expected: {createAndopen_enable_mmap}, Actual: {opened_coll.option.enable_mmap}"
            )

            # Clean up
            if (
                hasattr(opened_coll, "destroy")
                and opened_coll is not None
                and read_only == False
            ):
                opened_coll.destroy()

        except Exception as e:
            logging.error("Exception occurred: [{}]".format(e))
            pytest.fail(f"Failed to open collection with different options: {e}")

    def test_open_with_none_option(self, tmp_path_factory, collection_schema):
        # Create collection
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        initial_option = CollectionOption(read_only=False, enable_mmap=True)

        # Create and open collection first
        created_coll = zvec.create_and_open(
            path=str(collection_path), schema=collection_schema, option=initial_option
        )

        assert created_coll is not None, (
            f"Failed to create collection. Returned None instead of valid Collection object. Path: {collection_path}"
        )

        # Clean up the created collection reference
        del created_coll

        # Now open with None option
        with pytest.raises(Exception) as exc_info:
            zvec.open(path=str(collection_path), option=None)

        assert "incompatible function arguments" in str(exc_info.value), (
            f"Expected 'incompatible function arguments' error, but got: {exc_info.value}"
        )

    def test_reopen_collection(self, tmp_path_factory):
        # Prepare schema
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        # Create collection
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        # Create and open collection
        coll1 = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert coll1 is not None, "Failed to create and open collection"

        # Insert some data
        doc = Doc(
            id="1",
            fields={"id": 1, "name": "test"},
            vectors={"dense": np.random.random(128).tolist()},
        )

        result = coll1.insert(doc)
        assert result.ok()

        # Close the first collection (delete reference)
        del coll1

        # Reopen the collection
        coll2 = zvec.open(path=str(collection_path), option=collection_option)

        assert coll2 is not None, "Failed to reopen collection"
        assert coll2.path == str(collection_path)
        assert coll2.schema.name == collection_schema.name

        # Verify data is still there
        fetched_docs = coll2.fetch(["1"])
        assert "1" in fetched_docs
        fetched_doc = fetched_docs["1"]
        assert fetched_doc.id == "1"
        assert fetched_doc.field("name") == "test"

        # Clean up
        if hasattr(coll2, "destroy") and coll2 is not None:
            try:
                coll2.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")

    def test_open_concurrent_same_path(self, tmp_path_factory):
        # First create a collection
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        # Create collection path
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        # First create the collection
        created_coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert created_coll is not None, "Failed to create collection"

        # Close the collection so we can test concurrent opening
        if hasattr(created_coll, "close") and created_coll is not None:
            created_coll.close()

        # Shared variables to collect results from threads
        results = []
        errors = []

        # Lock for thread-safe operations
        lock = threading.Lock()
        # Clean up the created collection reference
        del created_coll

        # Function to be executed by each thread
        def open_collection_thread(thread_id):
            try:
                coll = zvec.open(path=str(collection_path), option=collection_option)
                with lock:
                    results.append((thread_id, coll))
                # Close the collection if opened successfully
                if hasattr(coll, "close") and coll is not None:
                    coll.close()
            except Exception as e:
                with lock:
                    errors.append((thread_id, str(e)))

        # Create 5 threads to call open concurrently
        threads = []
        for i in range(5):
            thread = threading.Thread(target=open_collection_thread, args=(i,))
            threads.append(thread)
            thread.start()

        # Wait for all threads to complete
        for thread in threads:
            thread.join()

        # Verify concurrency safety: only one should succeed, others should fail
        assert len(results) == 1, (
            f"Expected exactly one successful open, but got {len(results)}"
        )
        assert len(errors) == 4, (
            f"Expected exactly four failures, but got {len(errors)}"
        )

        # Additional verification: check that the successful open has a valid collection
        successful_thread_id, successful_collection = results[0]
        assert successful_collection is not None, (
            "Successful open should return a valid collection"
        )
        assert successful_collection.path == str(collection_path), (
            "Collection path mismatch"
        )

        # Clean up the successfully opened collection
        if (
            hasattr(successful_collection, "destroy")
            and successful_collection is not None
        ):
            try:
                successful_collection.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")

    def test_open_with_corrupted_files(self, tmp_path_factory):
        # First create a collection
        collection_schema = zvec.CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                )
            ],
        )

        collection_option = CollectionOption(read_only=False, enable_mmap=True)

        # Create collection path
        temp_dir = tmp_path_factory.mktemp("zvec")
        collection_path = temp_dir / "test_collection"

        # First create the collection
        created_coll = zvec.create_and_open(
            path=str(collection_path),
            schema=collection_schema,
            option=collection_option,
        )

        assert created_coll is not None, "Failed to create collection"

        # Close the collection so we can manipulate its files
        if hasattr(created_coll, "close") and created_coll is not None:
            created_coll.close()

        # Test case 1: Delete some files in the collection directory (simulate partial corruption)
        import os
        import shutil
        import random

        # Get the collection directory path
        collection_dir = str(collection_path)

        # List all files in the collection directory
        files_in_dir = []
        for root, dirs, files in os.walk(collection_dir):
            for file in files:
                files_in_dir.append(os.path.join(root, file))

        # Randomly delete approximately half of the files to simulate partial corruption
        if files_in_dir:
            # Shuffle the list to randomly select files
            random.shuffle(files_in_dir)
            files_to_delete = files_in_dir[: len(files_in_dir) // 2]
            for file_path in files_to_delete:
                try:
                    os.remove(file_path)
                except Exception as e:
                    pass  # Ignore errors during deletion

        # Try to open the collection with missing files - should raise an exception
        with pytest.raises(Exception):
            zvec.open(path=str(collection_path), option=collection_option)

        # Test case 2: Delete all files in the collection directory (simulate complete corruption)
        # Recreate the collection
        recreated_coll = zvec.create_and_open(
            path=str(collection_path) + "_all",
            schema=collection_schema,
            option=collection_option,
        )

        assert recreated_coll is not None, "Failed to recreate collection"

        # Close the collection so we can manipulate its files
        if hasattr(recreated_coll, "close") and recreated_coll is not None:
            recreated_coll.close()

        # Delete all files in the collection directory
        try:
            shutil.rmtree(collection_dir)
            os.makedirs(collection_dir)  # Recreate empty directory
        except Exception as e:
            pass  # Ignore errors during deletion

        # Try to open the collection with missing files - should raise an exception
        with pytest.raises(Exception):
            zvec.open(path=str(collection_path), option=collection_option)


================================================
FILE: python/tests/detail/test_collection_recall.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
from zvec.model import Collection, Doc, VectorQuery
from zvec.model.param import (
    CollectionOption,
    InvertIndexParam,
    HnswIndexParam,
    FlatIndexParam,
    IVFIndexParam,
    HnswQueryParam,
    IVFQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
from zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker
from distance_helper import *

from zvec import StatusCode
from distance_helper import *
from fixture_helper import *
from doc_helper import *
from params_helper import *

import time


# ==================== helper ====================
def batchdoc_and_check(collection: Collection, multiple_docs, operator="insert"):
    if operator == "insert":
        result = collection.insert(multiple_docs)
    elif operator == "upsert":
        result = collection.upsert(multiple_docs)

    elif operator == "update":
        result = collection.update(multiple_docs)
    else:
        logging.error("operator value is error!")

    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok(), (
            f"result={result},Insert operation failed with code {item.code()}"
        )

    stats = collection.stats
    assert stats is not None, "Collection stats should not be None"
    """assert stats.doc_count == len(multiple_docs), (
        f"Document count should be {len(multiple_docs)} after insert, but got {stats.doc_count}"
    )"""

    doc_ids = [doc.id for doc in multiple_docs]
    fetched_docs = collection.fetch(doc_ids)
    assert len(fetched_docs) == len(multiple_docs), (
        f"fetched_docs={fetched_docs},Expected {len(multiple_docs)} fetched documents, but got {len(fetched_docs)}"
    )

    for original_doc in multiple_docs:
        assert original_doc.id in fetched_docs, (
            f"Expected document ID {original_doc.id} in fetched documents"
        )
        fetched_doc = fetched_docs[original_doc.id]

        assert is_doc_equal(fetched_doc, original_doc, collection.schema)

        assert hasattr(fetched_doc, "score"), "Document should have a score attribute"
        assert fetched_doc.score == 0.0, (
            "Fetch operation should return default score of 0.0"
        )


def compute_exact_similarity_scores(
    vectors_a,
    vectors_b,
    metric_type=MetricType.IP,
    DataType=DataType.VECTOR_FP32,
    QuantizeType=QuantizeType.UNDEFINED,
):
    similarities = []
    for i, vec_a in enumerate(vectors_a):
        for j, vec_b in enumerate(vectors_b):
            similarity = distance_recall(vec_a, vec_b, metric_type, DataType)
            similarities.append((j, similarity))

    # For L2,COSINE metric, smaller distances mean higher similarity, so sort in ascending order
    if (
        metric_type in [MetricType.L2]
        and DataType
        in [DataType.VECTOR_FP32, DataType.VECTOR_FP16, DataType.VECTOR_INT8]
    ) or (
        metric_type in [MetricType.COSINE]
        and DataType in [DataType.VECTOR_FP32, DataType.VECTOR_FP16]
    ):
        similarities.sort(key=lambda x: x[1], reverse=False)  # Ascending order for L2

    else:
        similarities.sort(
            key=lambda x: x[1], reverse=True
        )  # Descending order for others

    # Special handling for COSINE in FP16 to address precision issues
    if metric_type == MetricType.COSINE and DataType == DataType.VECTOR_FP16:
        # Clamp values to valid cosine distance range [0, 2] and handle floating point errors
        similarities = [(idx, max(0.0, min(2.0, score))) for idx, score in similarities]

    return similarities


def get_ground_truth_for_vector_query(
    collection,
    query_vector,
    field_name,
    all_docs,
    query_idx,
    metric_type,
    k,
    use_exact_computation=False,
):
    if use_exact_computation:
        all_vectors = [doc.vectors[field_name] for doc in all_docs]

        for d, f in DEFAULT_VECTOR_FIELD_NAME.items():
            if field_name == f:
                DataType = d
                break
        similarities = compute_exact_similarity_scores(
            [query_vector],
            all_vectors,
            metric_type,
            DataType=DataType,
            QuantizeType=QuantizeType,
        )

        if metric_type == MetricType.COSINE and DataType == DataType.VECTOR_FP16:
            # Filter out tiny non-zero values that may be caused by precision errors
            similarities = [
                (idx, max(0.0, min(2.0, score))) for idx, score in similarities
            ]

        ground_truth_ids_scores = similarities[:k]
        print("Get the most similar k document IDs k:,ground_truth_ids_scores")
        print(k, ground_truth_ids_scores)
        return ground_truth_ids_scores

    else:
        full_result = collection.query(
            VectorQuery(field_name=field_name, vector=query_vector),
            topk=min(len(all_docs), 1024),
            include_vector=True,
        )

        ground_truth_ids_scores = [
            (result.id, result.score) for result in full_result[:k]
        ]

        if not ground_truth_ids_scores:
            ground_truth_ids_scores = [(all_docs[query_idx].id, 0)]

        return ground_truth_ids_scores


def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type, k):
    ground_truth_map = {}

    for field_name, query_vectors in query_vectors_map.items():
        ground_truth_map[field_name] = {}

        for i, query_vector in enumerate(query_vectors):
            # Get the ground truth for this query
            relevant_doc_ids_scores = get_ground_truth_for_vector_query(
                collection, query_vector, field_name, test_docs, i, metric_type, k, True
            )
            ground_truth_map[field_name][i] = relevant_doc_ids_scores

    print("ground_truth_map:\n")
    print(ground_truth_map)
    return ground_truth_map


def calculate_recall_at_k(
    collection: Collection,
    test_docs,
    query_vectors_map,
    schema,
    k=1,
    expected_doc_ids_scores_map=None,
    tolerance=0.01,
):
    recall_stats = {}

    for field_name, query_vectors in query_vectors_map.items():
        recall_stats[field_name] = {
            "relevant_retrieved_count": 0,
            "total_relevant_count": 0,
            "retrieved_count": 0,
            "recall_at_k": 0.0,
        }

        for i, query_vector in enumerate(query_vectors):
            print("Starting %dth query" % i)

            query_result_list = collection.query(
                VectorQuery(field_name=field_name, vector=query_vector),
                topk=1024,
                include_vector=True,
            )
            retrieved_count = len(query_result_list)

            query_result_ids_scores = []
            for word in query_result_list:
                query_result_ids_scores.append((word.id, word.score))

            recall_stats[field_name]["retrieved_count"] += retrieved_count

            print("expected_doc_ids_scores_map:\n")
            print(expected_doc_ids_scores_map)
            if i in (expected_doc_ids_scores_map[field_name]):
                expected_relevant_ids_scores = expected_doc_ids_scores_map[field_name][
                    i
                ]
            print(
                "field_name,i,expected_relevant_ids_scores, query_result_ids_scores:\n"
            )
            print(
                field_name,
                i,
                "\n",
                expected_relevant_ids_scores,
                "\n",
                len(query_result_ids_scores),
                query_result_ids_scores,
            )

            # Update total relevant documents count
            recall_stats[field_name]["total_relevant_count"] += len(
                expected_relevant_ids_scores
            )

            relevant_found_count = 0
            for ids_scores_except in expected_relevant_ids_scores:
                for ids_scores_result in query_result_ids_scores[:k]:
                    if int(ids_scores_result[0]) == int(ids_scores_except[0]):
                        relevant_found_count += 1
                        break
                    elif (
                        int(ids_scores_result[0]) != int(ids_scores_except[0])
                        and abs(ids_scores_result[1] - ids_scores_except[1])
                        <= tolerance
                    ):
                        print("IDs are not equal, but the error is small, tolerance")
                        print(
                            ids_scores_result[0],
                            ids_scores_except[0],
                            ids_scores_result[1],
                            ids_scores_except[1],
                            tolerance,
                        )
                        relevant_found_count += 1
                        break
                    else:
                        continue

            recall_stats[field_name]["relevant_retrieved_count"] += relevant_found_count

        # Calculate Recall@K
        if recall_stats[field_name]["total_relevant_count"] > 0:
            recall_stats[field_name]["recall_at_k"] = (
                recall_stats[field_name]["relevant_retrieved_count"]
                / recall_stats[field_name]["total_relevant_count"]
            )

    return recall_stats


class TestRecall:
    @pytest.mark.parametrize(
        "full_schema_new",
        [
            (True, True, HnswIndexParam()),
            (False, True, IVFIndexParam()),
            (False, True, FlatIndexParam()),  # ——ok
            (
                True,
                True,
                HnswIndexParam(
                    metric_type=MetricType.IP,
                    m=16,
                    ef_construction=100,
                ),
            ),
            (
                True,
                True,
                HnswIndexParam(
                    metric_type=MetricType.COSINE,
                    m=24,
                    ef_construction=150,
                ),
            ),
            (
                True,
                True,
                HnswIndexParam(
                    metric_type=MetricType.L2,
                    m=32,
                    ef_construction=200,
                ),
            ),
            (
                False,
                True,
                FlatIndexParam(
                    metric_type=MetricType.IP,
                ),
            ),
            (
                True,
                True,
                FlatIndexParam(
                    metric_type=MetricType.COSINE,
                ),
            ),
            (
                True,
                True,
                FlatIndexParam(
                    metric_type=MetricType.L2,
                ),
            ),
            (
                True,
                True,
                IVFIndexParam(
                    metric_type=MetricType.IP,
                    n_list=100,
                    n_iters=10,
                    use_soar=False,
                ),
            ),
            (
                True,
                True,
                IVFIndexParam(
                    metric_type=MetricType.L2,
                    n_list=200,
                    n_iters=20,
                    use_soar=True,
                ),
            ),
            (
                True,
                True,
                IVFIndexParam(
                    metric_type=MetricType.COSINE,
                    n_list=150,
                    n_iters=15,
                    use_soar=False,
                ),
            ),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("doc_num", [500])
    @pytest.mark.parametrize("query_num", [10])
    @pytest.mark.parametrize("top_k", [1])
    def test_recall_with_single_vector_valid_500(
        self,
        full_collection_new: Collection,
        doc_num,
        query_num,
        top_k,
        full_schema_new,
        request,
    ):
        full_schema_params = request.getfixturevalue("full_schema_new")

        for vector_para in full_schema_params.vectors:
            if vector_para.name == "vector_fp32_field":
                metric_type = vector_para.index_param.metric_type
                break

        multiple_docs = [
            generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
        ]
        print("len(multiple_docs):\n")
        print(len(multiple_docs))
        # print(multiple_docs)

        for i in range(10):
            if i != 0:
                pass
                # print(multiple_docs[i * 1000:1000 * (i + 1)])
            batchdoc_and_check(
                full_collection_new,
                multiple_docs[i * 1000 : 1000 * (i + 1)],
                operator="insert",
            )

        stats = full_collection_new.stats
        assert stats.doc_count == len(multiple_docs)

        doc_ids = ["0", "1"]
        fetched_docs = full_collection_new.fetch(doc_ids)
        print("fetched_docs,multiple_docs")
        print(
            fetched_docs[doc_ids[0]].vectors["sparse_vector_fp32_field"],
            fetched_docs[doc_ids[0]].vectors["sparse_vector_fp16_field"],
            fetched_docs[doc_ids[1]].vectors["sparse_vector_fp32_field"],
            fetched_docs[doc_ids[1]].vectors["sparse_vector_fp16_field"],
            "\n",
            multiple_docs[0].vectors["sparse_vector_fp32_field"],
            multiple_docs[0].vectors["sparse_vector_fp32_field"],
            multiple_docs[1].vectors["sparse_vector_fp32_field"],
            multiple_docs[1].vectors["sparse_vector_fp16_field"],
        )

        full_collection_new.optimize(option=OptimizeOption())

        time.sleep(2)

        query_vectors_map = {}
        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():
            query_vectors_map[field_name] = [
                multiple_docs[i].vectors[field_name] for i in range(query_num)
            ]

        # Get ground truth mapping
        ground_truth_map = get_ground_truth_map(
            full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
        )

        # Validate ground truth mapping structure
        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():
            assert field_name in ground_truth_map
            field_gt = ground_truth_map[field_name]
            assert len(field_gt) == query_num

            for query_idx in range(query_num):
                assert query_idx in field_gt
                relevant_ids = field_gt[query_idx]
                assert isinstance(relevant_ids, list)
                assert len(relevant_ids) <= top_k

        # Print ground truth statistics
        print(f"Ground Truth for Top-{top_k} Retrieval:")
        for field_name, field_gt in ground_truth_map.items():
            print(f"  {field_name}:")
            for query_idx, relevant_ids in field_gt.items():
                print(
                    f" Query {query_idx}: {len(relevant_ids)} relevant docs - {relevant_ids[:5]}{'...' if len(relevant_ids) > 5 else ''}"
                )

        # Calculate Recall@K using ground truth
        recall_at_k_stats = calculate_recall_at_k(
            full_collection_new,
            multiple_docs,
            query_vectors_map,
            full_schema_new,
            k=top_k,
            expected_doc_ids_scores_map=ground_truth_map,
            tolerance=0.01,
        )
        print("ground_truth_map:\n")
        print(ground_truth_map)

        print("(recall_at_k_stats:\n")
        print(recall_at_k_stats)
        print("metric_type:")
        print(metric_type)
        # Print Recall@K statistics
        print(f"Recall@{top_k} using Ground Truth:")
        for field_name, stats in recall_at_k_stats.items():
            print(f"  {field_name}:")
            print(
                f"    Relevant Retrieved: {stats['relevant_retrieved_count']}/{stats['total_relevant_count']}"
            )
            print(f"    Recall@{top_k}: {stats['recall_at_k']:.4f}")
        for k, v in recall_at_k_stats.items():
            assert v["recall_at_k"] == 1.0

    @pytest.mark.parametrize(
        "full_schema_new",
        [
            (True, True, HnswIndexParam()),
            (False, True, IVFIndexParam()),
            (False, True, FlatIndexParam()),  # ——ok
            (
                True,
                True,
                HnswIndexParam(
                    metric_type=MetricType.IP,
                    m=16,
                    ef_construction=100,
                ),
            ),
            (
                True,
                True,
                HnswIndexParam(
                    metric_type=MetricType.COSINE,
                    m=24,
                    ef_construction=150,
                ),
            ),
            # (True, True, HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200, )),
            (
                False,
                True,
                FlatIndexParam(
                    metric_type=MetricType.IP,
                ),
            ),
            (
                True,
                True,
                FlatIndexParam(
                    metric_type=MetricType.COSINE,
                ),
            ),
            # (True, True, FlatIndexParam(metric_type=MetricType.L2, )),
            (
                True,
                True,
                IVFIndexParam(
                    metric_type=MetricType.IP,
                    n_list=100,
                    n_iters=10,
                    use_soar=False,
                ),
            ),
            (
                True,
                True,
                IVFIndexParam(
                    metric_type=MetricType.L2,
                    n_list=200,
                    n_iters=20,
                    use_soar=True,
                ),
            ),
            # (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("doc_num", [2000])
    @pytest.mark.parametrize("query_num", [2])
    @pytest.mark.parametrize("top_k", [1])
    @pytest.mark.skip(reason="known bug")
    def test_recall_with_single_vector_valid_2000(
        self,
        full_collection_new: Collection,
        doc_num,
        query_num,
        top_k,
        full_schema_new,
        request,
    ):
        full_schema_params = request.getfixturevalue("full_schema_new")

        for vector_para in full_schema_params.vectors:
            if vector_para.name == "vector_fp32_field":
                metric_type = vector_para.index_param.metric_type
                break

        multiple_docs = [
            generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
        ]
        print("len(multiple_docs):\n")
        print(len(multiple_docs))
        # print(multiple_docs)

        for i in range(10):
            if i != 0:
                pass
                # print(multiple_docs[i * 1000:1000 * (i + 1)])
            batchdoc_and_check(
                full_collection_new,
                multiple_docs[i * 1000 : 1000 * (i + 1)],
                operator="insert",
            )

        stats = full_collection_new.stats
        assert stats.doc_count == len(multiple_docs)

        doc_ids = ["0", "1"]
        fetched_docs = full_collection_new.fetch(doc_ids)
        print("fetched_docs,multiple_docs")
        print(
            fetched_docs[doc_ids[0]].vectors["sparse_vector_fp32_field"],
            fetched_docs[doc_ids[0]].vectors["sparse_vector_fp16_field"],
            fetched_docs[doc_ids[1]].vectors["sparse_vector_fp32_field"],
            fetched_docs[doc_ids[1]].vectors["sparse_vector_fp16_field"],
            "\n",
            multiple_docs[0].vectors["sparse_vector_fp32_field"],
            multiple_docs[0].vectors["sparse_vector_fp32_field"],
            multiple_docs[1].vectors["sparse_vector_fp32_field"],
            multiple_docs[1].vectors["sparse_vector_fp16_field"],
        )

        full_collection_new.optimize(option=OptimizeOption())

        time.sleep(2)

        query_vectors_map = {}
        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():
            query_vectors_map[field_name] = [
                multiple_docs[i].vectors[field_name] for i in range(query_num)
            ]

        # Get ground truth mapping
        ground_truth_map = get_ground_truth_map(
            full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
        )

        # Validate ground truth mapping structure
        for field_name in DEFAULT_VECTOR_FIELD_NAME.values():
            assert field_name in ground_truth_map
            field_gt = ground_truth_map[field_name]
            assert len(field_gt) == query_num

            for query_idx in range(query_num):
                assert query_idx in field_gt
                relevant_ids = field_gt[query_idx]
                assert isinstance(relevant_ids, list)
                assert len(relevant_ids) <= top_k

        # Print ground truth statistics
        print(f"Ground Truth for Top-{top_k} Retrieval:")
        for field_name, field_gt in ground_truth_map.items():
            print(f"  {field_name}:")
            for query_idx, relevant_ids in field_gt.items():
                print(
                    f" Query {query_idx}: {len(relevant_ids)} relevant docs - {relevant_ids[:5]}{'...' if len(relevant_ids) > 5 else ''}"
                )

        # Calculate Recall@K using ground truth
        recall_at_k_stats = calculate_recall_at_k(
            full_collection_new,
            multiple_docs,
            query_vectors_map,
            full_schema_new,
            k=top_k,
            expected_doc_ids_scores_map=ground_truth_map,
            tolerance=0.01,
        )
        print("ground_truth_map:\n")
        print(ground_truth_map)

        print("(recall_at_k_stats:\n")
        print(recall_at_k_stats)
        print("metric_type:")
        print(metric_type)
        # Print Recall@K statistics
        print(f"Recall@{top_k} using Ground Truth:")
        for field_name, stats in recall_at_k_stats.items():
            print(f"  {field_name}:")
            print(
                f"    Relevant Retrieved: {stats['relevant_retrieved_count']}/{stats['total_relevant_count']}"
            )
            print(f"    Recall@{top_k}: {stats['recall_at_k']:.4f}")
        for k, v in recall_at_k_stats.items():
            assert v["recall_at_k"] == 1.0


================================================
FILE: python/tests/detail/test_db_config.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import pytest
import tempfile
import os
import sys
import subprocess

import zvec
import zvec
from zvec import LogType, LogLevel

# Error messages
INITIALIZATION_ERROR_MSG = "initialization failed"
RUNTIME_ERROR_MSG = "RuntimeError"
VALUE_ERROR_MSG = "ValueError"
TYPE_ERROR_MSG = "TypeError"


# ==================== helper ====================
def run_in_subprocess(func):
    def wrapper(*args, **kwargs):
        if os.getenv("RUNNING_IN_SUBPROCESS"):
            return func(*args, **kwargs)

        env = os.environ.copy()
        env["RUNNING_IN_SUBPROCESS"] = "1"
        env["PYTEST_CURRENT_TEST"] = func.__name__

        import inspect

        filepath = inspect.getfile(func)
        qualname = func.__qualname__.replace(".", "::")
        test_id = f"{filepath}::{qualname}"

        project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        env["PYTHONPATH"] = project_root + ":" + env.get("PYTHONPATH", "")

        cmd = [sys.executable, "-m", "pytest", "-v", "-s", test_id]

        result = subprocess.run(cmd, env=env, capture_output=True, text=True)
        if result.returncode != 0:
            pytest.fail(
                f"Subprocess test {func.__name__} failed with code {result.returncode}\n"
                f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
            )

    return wrapper


# ==================== Fixtures ====================
@pytest.fixture(scope="function")
def temp_log_dir(tmp_path_factory):
    return tmp_path_factory.mktemp("logs")


# ==================== Tests ====================
class TestDbConfigInitialization:
    @run_in_subprocess
    def test_init_default(self):
        # default config
        # log_type: Optional[LogType] = LogType.CONSOLE,
        # log_level: Optional[LogLevel] = LogLevel.WARN,
        # log_dir: Optional[str] = "./logs",
        # log_basename: Optional[str] = "zvec.log",
        # log_file_size: Optional[int] = 2048,
        # log_overdue_days: Optional[int] = 7,
        zvec.init()

    @run_in_subprocess
    def test_init_file_logger(self):
        from pathlib import Path
        import shutil

        zvec.init(
            log_level=LogLevel.DEBUG,
            log_type=LogType.FILE,
        )
        # assert logdir exist
        log_dir = Path("./logs")
        assert log_dir.exists()

        # validate write log
        col = zvec.create_and_open(
            "/tmp/test/1",
            zvec.CollectionSchema(
                name="test",
                vectors=zvec.VectorSchema(
                    dimension=4,
                    data_type=zvec.DataType.VECTOR_FP32,
                    name="image",
                ),
            ),
        )
        col.insert(docs=[zvec.Doc(id="1", vectors={"image": [1.0, 2.0, 3.0, 4.0]})])
        assert any(log_dir.glob("zvec.log.*"))

        # clear
        col.destroy()
        shutil.rmtree(log_dir, ignore_errors=True)

    @run_in_subprocess
    def test_init_with_mixed_config(self):
        zvec.init(
            memory_limit_mb=128,
            log_type=LogType.FILE,
            query_threads=1,
            log_level=LogLevel.WARN,
        )

    @run_in_subprocess
    def test_repeated_initialization(self):
        # Calling init() repeatedly is allowed:
        # it succeeds but becomes a no-op after the first successful init()
        zvec.init()


class TestDbConfigMemoryLimitValidation:
    @run_in_subprocess
    def test_memory_limit_min_valid(self):
        # MIN_MEMORY_LIMIT_BYTES is 100M
        with pytest.raises(RuntimeError):
            zvec.init(memory_limit_mb=99)

    @run_in_subprocess
    def test_memory_limit_invalid_value(self):
        # memory_limit_mb must >= 0 and must be int and if None, set default value
        with pytest.raises(ValueError):
            zvec.init(memory_limit_mb=0)
        with pytest.raises(ValueError):
            zvec.init(memory_limit_mb=-1)
        with pytest.raises(TypeError):
            zvec.init(memory_limit_mb="512")
        with pytest.raises(TypeError):
            zvec.init(memory_limit_mb=512.5)


class TestDbConfigThreadValidation:
    @run_in_subprocess
    def test_query_threads(self):
        zvec.init(query_threads=1)

    @run_in_subprocess
    def test_query_threads_invalid(self):
        # query_threads must >= 0 and must be int and if None, set default value
        with pytest.raises(ValueError):
            zvec.init(query_threads=0)
        with pytest.raises(ValueError):
            zvec.init(query_threads=-1)
        with pytest.raises(TypeError):
            zvec.init(query_threads="value")
        with pytest.raises(TypeError):
            zvec.init(query_threads=512.5)
        with pytest.raises(TypeError):
            zvec.init(query_threads="512")

    @run_in_subprocess
    def test_optimize_threads(self):
        zvec.init(optimize_threads=1)

    @run_in_subprocess
    def test_optimize_threads_invalid(self):
        # optimize_threads must >= 0 and must be int and if None, set default value
        with pytest.raises(ValueError):
            zvec.init(optimize_threads=0)
        with pytest.raises(ValueError):
            zvec.init(optimize_threads=-1)
        with pytest.raises(TypeError):
            zvec.init(optimize_threads="value")
        with pytest.raises(TypeError):
            zvec.init(optimize_threads=512.5)
        with pytest.raises(TypeError):
            zvec.init(optimize_threads="512")


class TestDbConfigRatioValidation:
    @run_in_subprocess
    def test_init_invert_to_forward_scan_ratio(self):
        # must be in [0,1]
        zvec.init(invert_to_forward_scan_ratio=0.8)

    @run_in_subprocess
    def test_init_invert_to_forward_scan_ratio_invalid(self):
        with pytest.raises(ValueError):
            zvec.init(invert_to_forward_scan_ratio=1.1)
        with pytest.raises(ValueError):
            zvec.init(invert_to_forward_scan_ratio=-0.1)
        with pytest.raises(TypeError):
            zvec.init(invert_to_forward_scan_ratio="0.8")

    @run_in_subprocess
    def test_init_brute_force_by_keys_ratio(self):
        zvec.init(brute_force_by_keys_ratio=0.8)

    @run_in_subprocess
    def test_init_brute_force_by_keys_ratio_invalid(self):
        with pytest.raises(ValueError):
            zvec.init(brute_force_by_keys_ratio=1.1)
        with pytest.raises(ValueError):
            zvec.init(brute_force_by_keys_ratio=-0.1)
        with pytest.raises(TypeError):
            zvec.init(brute_force_by_keys_ratio="0.8")


class TestDbConfigLogValidation:
    @run_in_subprocess
    def test_log_type_valid(self):
        zvec.init(log_type=LogType.CONSOLE)

    @run_in_subprocess
    def test_log_type_invalid(self):
        with pytest.raises(TypeError):
            zvec.init(log_type="FILE")
        with pytest.raises(TypeError):
            zvec.init(log_type="")
        with pytest.raises(TypeError):
            zvec.init(log_type="invalid")
        with pytest.raises(TypeError):
            zvec.init(log_type=123)

    @run_in_subprocess
    def test_log_level_valid(self):
        zvec.init(log_level=LogLevel.ERROR)

    @run_in_subprocess
    def test_log_level_invalid(self):
        with pytest.raises(TypeError):
            zvec.init(log_level="WARN")
        with pytest.raises(TypeError):
            zvec.init(log_level="")
        with pytest.raises(TypeError):
            zvec.init(log_level="invalid")
        with pytest.raises(TypeError):
            zvec.init(log_level=123)

    @run_in_subprocess
    def test_init_file_logger(self):
        from pathlib import Path
        import shutil

        temp_dir = tempfile.mkdtemp(prefix="log_test_")
        abs_temp_dir = os.path.abspath(temp_dir)

        zvec.init(
            log_level=LogLevel.DEBUG,
            log_type=LogType.FILE,
            log_dir=abs_temp_dir,
            log_basename="test",
        )

        # assert logdir exist
        log_dir = Path(abs_temp_dir)
        assert log_dir.exists()

        # validate write log
        col = zvec.create_and_open(
            "/tmp/test/1",
            zvec.CollectionSchema(
                name="test",
                vectors=zvec.VectorSchema(
                    dimension=4,
                    data_type=zvec.DataType.VECTOR_FP32,
                    name="image",
                ),
            ),
        )
        col.insert(docs=[zvec.Doc(id="1", vectors={"image": [1.0, 2.0, 3.0, 4.0]})])
        assert any(log_dir.glob("test.*"))

        # clear
        col.destroy()
        shutil.rmtree(log_dir, ignore_errors=True)

    @run_in_subprocess
    def test_log_file_size_invalid(self):
        with pytest.raises(TypeError):
            zvec.init(log_type=LogType.FILE, log_file_size="df")

        with pytest.raises(ValueError):
            zvec.init(log_type=LogType.FILE, log_file_size=0)

        with pytest.raises(ValueError):
            zvec.init(log_type=LogType.FILE, log_file_size=-1)

    @run_in_subprocess
    def test_log_overdue_days_invalid(self):
        with pytest.raises(TypeError):
            zvec.init(log_type=LogType.FILE, log_overdue_days="df")

        with pytest.raises(ValueError):
            zvec.init(log_type=LogType.FILE, log_overdue_days=0)

        with pytest.raises(ValueError):
            zvec.init(log_type=LogType.FILE, log_overdue_days=-1)


================================================
FILE: python/tests/test_collection.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations


import pytest
import zvec
from zvec import (
    Collection,
    CollectionOption,
    DataType,
    Doc,
    FieldSchema,
    HnswIndexParam,
    InvertIndexParam,
    LogLevel,
    LogType,
    VectorSchema,
    StatusCode,
    IndexOption,
    IndexType,
    VectorQuery,
    OptimizeOption,
)

# ==================== Common ====================


@pytest.fixture(scope="session")
def collection_schema():
    return zvec.CollectionSchema(
        name="test_collection",
        fields=[
            FieldSchema(
                "id",
                DataType.INT64,
                nullable=False,
                index_param=InvertIndexParam(enable_range_optimization=True),
            ),
            FieldSchema(
                "name", DataType.STRING, nullable=False, index_param=InvertIndexParam()
            ),
            FieldSchema("weight", DataType.FLOAT, nullable=True),
            FieldSchema("height", DataType.INT32, nullable=True),
        ],
        vectors=[
            VectorSchema(
                "dense",
                DataType.VECTOR_FP32,
                dimension=128,
                index_param=HnswIndexParam(),
            ),
            VectorSchema(
                "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
            ),
        ],
    )


@pytest.fixture(scope="session")
def collection_option():
    return CollectionOption(read_only=False, enable_mmap=True)


@pytest.fixture
def single_doc():
    id = 0
    return Doc(
        id=f"{id}",
        fields={"id": id, "name": "test", "weight": 80.0, "height": id + 140},
        vectors={"dense": [id + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
    )


@pytest.fixture
def multiple_docs():
    return [
        Doc(
            id=f"{id}",
            fields={"id": id, "name": "test", "weight": 80.0, "height": 210},
            vectors={"dense": [id + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
        )
        for id in range(1, 101)
    ]


@pytest.fixture(scope="function")
def test_collection(
    tmp_path_factory, collection_schema, collection_option
) -> Collection:
    """
    Function-scoped fixture: creates and opens a collection.
    Uses tmp_path_factory to ensure shared temp dir per class.
    """
    # Create unique temp directory for this test class
    temp_dir = tmp_path_factory.mktemp("zvec")
    collection_path = temp_dir / "test_collection"

    coll = zvec.create_and_open(
        path=str(collection_path), schema=collection_schema, option=collection_option
    )

    assert coll is not None, "Failed to create and open collection"
    assert coll.path == str(collection_path)
    assert coll.schema.name == collection_schema.name
    assert list(coll.schema.fields) == list(collection_schema.fields)
    assert list(coll.schema.vectors) == list(collection_schema.vectors)
    assert coll.option.read_only == collection_option.read_only
    assert coll.option.enable_mmap == collection_option.enable_mmap

    try:
        yield coll
    finally:
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")


@pytest.fixture
def collection_with_single_doc(test_collection: Collection, single_doc) -> Collection:
    # Setup: insert single doc
    assert test_collection.stats.doc_count == 0
    result = test_collection.insert(single_doc)
    assert bool(result)
    assert result.ok()
    assert test_collection.stats.doc_count == 1

    yield test_collection

    # Teardown: delete single doc
    test_collection.delete(single_doc.id)
    assert test_collection.stats.doc_count == 0


@pytest.fixture
def collection_with_multiple_docs(
    test_collection: Collection, multiple_docs
) -> Collection:
    # Setup: insert multiple docs
    assert test_collection.stats.doc_count == 0
    result = test_collection.insert(multiple_docs)
    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok()
    assert test_collection.stats.doc_count == len(multiple_docs)

    yield test_collection

    # Teardown: delete multiple docs
    test_collection.delete([doc.id for doc in multiple_docs])


# ==================== Tests ====================


# ----------------------------
# Config Test Case
# ----------------------------
class TestConfig:
    def test_config(self):
        zvec.init(log_type=LogType.CONSOLE, log_level=LogLevel.ERROR, log_dir="./log")


# ----------------------------
# Collection DDL Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionDDL:
    def test_collection_stats(self, test_collection: Collection):
        assert test_collection.stats is not None
        stats = test_collection.stats
        assert stats.doc_count == 0
        assert len(stats.index_completeness) == 2
        assert stats.index_completeness["dense"] == 1
        assert stats.index_completeness["sparse"] == 1


# ----------------------------
# Collection Index DDL Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionIndexDDL:
    def test_create_index(self, test_collection: Collection):
        # before create
        field_schema = test_collection.schema.field("weight")
        assert field_schema is not None
        assert field_schema.data_type == DataType.FLOAT
        assert field_schema.name == "weight"
        index_param = field_schema.index_param
        assert index_param is None

        # create
        test_collection.create_index(
            field_name="weight", index_param=InvertIndexParam(), option=IndexOption()
        )
        assert test_collection.schema is not None
        field_schema = test_collection.schema.field("weight")
        assert field_schema is not None
        assert field_schema.data_type == DataType.FLOAT
        assert field_schema.name == "weight"

        index_param = field_schema.index_param
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is False
        assert index_param.enable_extended_wildcard is False

    def test_drop_index(self, test_collection: Collection):
        # before drop
        field_schema = test_collection.schema.field("name")
        assert field_schema is not None
        assert field_schema.data_type == DataType.STRING
        assert field_schema.name == "name"
        index_param = field_schema.index_param
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is False
        assert index_param.enable_extended_wildcard is False

        # drop
        test_collection.drop_index("name")
        field_schema = test_collection.schema.field("name")
        assert field_schema is not None
        assert field_schema.data_type == DataType.STRING
        assert field_schema.name == "name"

        # without index
        index_param = field_schema.index_param
        assert index_param is None

    def test_create_index_field_is_not_exist(self, test_collection: Collection):
        with pytest.raises(Exception) as e:
            test_collection.create_index(
                field_name="not_exist",
                index_param=InvertIndexParam(),
            )

        index_param = field_schema.index_param
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is False
        assert index_param.enable_extended_wildcard is False

    def test_drop_index(self, test_collection: Collection):
        # before drop
        field_schema = test_collection.schema.field("name")
        assert field_schema is not None
        assert field_schema.data_type == DataType.STRING
        assert field_schema.name == "name"
        index_param = field_schema.index_param
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is False
        assert index_param.enable_extended_wildcard is False

        # drop
        test_collection.drop_index("name")
        field_schema = test_collection.schema.field("name")
        assert field_schema is not None
        assert field_schema.data_type == DataType.STRING
        assert field_schema.name == "name"

        # without index
        index_param = field_schema.index_param
        assert index_param is None

    def test_create_index_field_is_not_exist(self, test_collection: Collection):
        with pytest.raises(Exception) as e:
            test_collection.create_index(
                field_name="not_exist",
                index_param=InvertIndexParam(),
            )


# ----------------------------
# Collection Column DDL Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionColumnDDL:
    def test_create_column(self, test_collection: Collection):
        # before create column
        field_schema = test_collection.schema.field("age")
        assert field_schema is None

        # create
        test_collection.add_column(FieldSchema("age", DataType.INT32, nullable=True))

        field_schema = test_collection.schema.field("age")
        assert field_schema is not None
        assert field_schema.data_type == DataType.INT32
        assert field_schema.name == "age"
        assert field_schema.index_param is None

    def test_create_column_is_nullable(self, test_collection: Collection):
        with pytest.raises(ValueError):
            test_collection.add_column(
                FieldSchema("age", DataType.INT32, nullable=False)
            )

    def test_drop_column(self, test_collection: Collection):
        # before drop column
        field_schema = test_collection.schema.field("id")
        assert field_schema is not None
        assert field_schema.data_type == DataType.INT64
        assert field_schema.name == "id"
        index_param = field_schema.index_param
        assert index_param is not None
        assert index_param.type == IndexType.INVERT

        # drop
        test_collection.drop_column("id")
        field_schema = test_collection.schema.field("id")
        assert field_schema is None

    def test_alert_column_to_rename(self, test_collection: Collection):
        # before alert column
        field_schema = test_collection.schema.field("id")
        assert field_schema is not None
        assert field_schema.data_type == DataType.INT64
        assert field_schema.name == "id"
        index_param = field_schema.index_param
        assert index_param is not None
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is True
        assert index_param.enable_extended_wildcard is False

        # alert rename
        test_collection.alter_column("id", "doc_id")

        # validate old column
        field_schema = test_collection.schema.field("id")
        assert field_schema is None
        # validate rename column
        field_schema = test_collection.schema.field("doc_id")
        assert field_schema is not None
        assert field_schema.data_type == DataType.INT64
        assert field_schema.name == "doc_id"
        assert field_schema.nullable is False
        index_param = field_schema.index_param
        assert index_param is not None
        assert index_param.type == IndexType.INVERT
        assert index_param.enable_range_optimization is True
        assert index_param.enable_extended_wildcard is False

    def test_alert_column_to_modify_schema(self, test_collection: Collection):
        # before alert column
        field_schema = test_collection.schema.field("id")
        assert field_schema is not None
        assert field_schema.data_type == DataType.INT64
        assert field_schema.name == "id"
        index_param = field_schema.index_param
        assert index_param.type == IndexType.INVERT

        test_collection.alter_column(
            old_name="id",
            field_schema=FieldSchema("doc_id", DataType.UINT64, nullable=True),
        )
        field_schema = test_collection.schema.field("doc_id")
        assert field_schema is not None
        assert field_schema.data_type == DataType.UINT64
        assert field_schema.name == "doc_id"

    def test_column_with_other_dtype(self, test_collection: Collection):
        # only allow number type
        test_collection.add_column(FieldSchema("age", DataType.INT32, nullable=True))

        with pytest.raises(ValueError):
            test_collection.add_column(FieldSchema("full_name", DataType.STRING))
        with pytest.raises(ValueError):
            test_collection.drop_column("name")
        with pytest.raises(ValueError):
            test_collection.alter_column(old_name="name", new_name="full_name")
        with pytest.raises(ValueError):
            test_collection.alter_column(
                old_name="name", field_schema=FieldSchema("full_name", DataType.STRING)
            )


# ----------------------------
# Collection Optimize Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionOptimize:
    def test_collection_optimize(self, test_collection: Collection):
        test_collection.optimize(option=OptimizeOption())


# ----------------------------
# Collection Fetch Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionFetch:
    def test_collection_fetch(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        result = collection_with_single_doc.fetch(ids=[single_doc.id])
        assert bool(result)
        assert single_doc.id in result.keys()

        doc = result[single_doc.id]
        assert doc is not None
        assert doc.id == single_doc.id
        assert set(doc.field_names()) == set(single_doc.field_names())
        for field_name in doc.field_names():
            if field_name in ["dense", "sparse"]:
                continue
            assert doc.field(field_name) == single_doc.field(field_name)

    def test_collection_fetch_contains_nodata_ids(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        ids = [doc.id for doc in multiple_docs]
        no_data_key = "x"
        ids_with_no_data = [no_data_key] + ids
        result = collection_with_multiple_docs.fetch(ids=ids_with_no_data)
        assert bool(result)
        assert len(result) == len(ids)
        assert no_data_key not in result


# ----------------------------
# Collection Insert Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionInsert:
    def test_collection_insert(self, test_collection, single_doc):
        result = test_collection.insert(single_doc)
        assert bool(result)
        assert result.ok()
        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_collection_insert_with_nullable_false_field(self, test_collection):
        # id, name's nullable == False
        # weight, height's nullable == True

        doc = Doc(
            id="0",
            fields={
                "id": 1,
                "name": "test",
            },
            vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
        )
        result = test_collection.insert(doc)
        assert bool(result)
        assert result.ok()
        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_collection_insert_without_nullable_false_field(self, test_collection):
        # id, name's nullable == False
        # weight, height's nullable == True

        # without id, name
        doc = Doc(
            id="0",
            vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
        )
        with pytest.raises(ValueError) as e:
            # ValueError: doc validate failed: field[id] is configured not nullable,
            # but doc does not contain this field
            test_collection.insert(doc)
        assert "field[id] is configured not nullable" in str(e.value)

        # without name
        doc = Doc(
            id="0",
            fields={
                "id": 1,
            },
            vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
        )
        with pytest.raises(ValueError) as e:
            test_collection.insert(doc)
        assert "field[name] is configured not nullable" in str(e.value)

    def test_collection_insert_with_nullable_true_field(self, test_collection):
        # id, name's nullable == False
        # weight, height's nullable == True

        doc = Doc(
            id="0",
            fields={
                "id": 1,
                "name": "test",
            },
            vectors={"dense": [1 + 0.1] * 128, "sparse": {1: 1.0, 2: 2.0, 3: 3.0}},
        )
        result = test_collection.insert(doc)
        assert bool(result)
        assert result.ok()
        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

        result = test_collection.fetch(ids=[doc.id])
        assert doc.id in result
        ret = result[doc.id]
        assert ret.field("id") == 1
        assert ret.field("name") == "test"
        assert ret.field("weight") is None
        assert ret.field("height") is None

    def test_collection_insert_batch(self, test_collection, multiple_docs):
        result = test_collection.insert(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == len(multiple_docs)

    def test_collection_insert_duplicate(
        self, test_collection, single_doc, multiple_docs
    ):
        test_collection.insert(single_doc)
        result = test_collection.insert(single_doc)
        assert bool(result)
        assert result.code() == StatusCode.ALREADY_EXISTS

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 1


# ----------------------------
# Collection Update Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionUpdate:
    def test_empty_collection_update(
        self, test_collection: Collection, single_doc: Doc
    ):
        result = test_collection.update(single_doc)
        assert bool(result)
        assert result.code() == StatusCode.NOT_FOUND

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 0

    def test_collection_update_with_nullable_false_field(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        # id, name's nullable == False
        # weight, height's nullable == True

        # update doc field id
        doc = Doc(
            id=single_doc.id,
            fields={"id": single_doc.field("id") + 1},
        )
        result = collection_with_single_doc.update(doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

        # fetch
        result = collection_with_single_doc.fetch(ids=[doc.id])
        assert doc.id in result
        ret = result[doc.id]
        assert ret.field("id") == doc.field("id")
        assert ret.field("name") == single_doc.field("name")
        assert ret.field("weight") == single_doc.field("weight")
        assert ret.field("height") == single_doc.field("height")

    def test_collection_update_with_nullable_false_field_is_none(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        # id, name's nullable == False
        # weight, height's nullable == True

        # update doc field id
        doc = Doc(
            id=single_doc.id,
            fields={"id": None},
        )
        with pytest.raises(ValueError) as e:
            # ValueError: doc validate failed: field[id] is configured not nullable,
            # but doc does not contain this field
            collection_with_single_doc.update(doc)

        doc = Doc(
            id=single_doc.id,
            fields={"id": single_doc.field("id") + 1, "weight": None},
        )

        result = collection_with_single_doc.update(doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

        ret = collection_with_single_doc.fetch(ids=[doc.id])
        assert doc.id in ret
        ret = ret[doc.id]
        assert ret.field("id") == doc.field("id")
        assert ret.field("name") == single_doc.field("name")
        assert ret.field("weight") is None
        assert ret.field("height") == single_doc.field("height")

    def test_collection_update_without_nullable_false_field(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        # id, name's nullable == False
        # weight, height's nullable == True

        # update doc field weight
        doc = Doc(
            id=single_doc.id,
            fields={"weight": single_doc.field("weight") + 1},
        )
        result = collection_with_single_doc.update(doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

        # fetch
        ret = collection_with_single_doc.fetch(ids=[doc.id])
        assert doc.id in ret
        ret = ret[doc.id]
        assert ret.field("id") == single_doc.field("id")
        assert ret.field("name") == single_doc.field("name")
        assert ret.field("weight") == doc.field("weight")
        assert ret.field("height") == single_doc.field("height")

    def test_collection_update_without_nullable_false_field_set_null(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        # id, name's nullable == False
        # weight, height's nullable == True

        # update doc field weight is None
        doc = Doc(
            id=single_doc.id,
            fields={"weight": None},
        )
        result = collection_with_single_doc.update(doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

        # fetch
        ret = collection_with_single_doc.fetch(ids=[doc.id])
        assert doc.id in ret
        ret = ret[doc.id]
        assert ret.field("id") == single_doc.field("id")
        assert ret.field("name") == single_doc.field("name")
        assert ret.field("weight") is None
        assert ret.field("height") == single_doc.field("height")

    def test_empty_collection_update_batch(
        self, test_collection: Collection, multiple_docs
    ):
        result = test_collection.update(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.code() == StatusCode.NOT_FOUND

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 0

    def test_collection_update(
        self, collection_with_single_doc: Collection, single_doc
    ):
        result = collection_with_single_doc.update(single_doc)
        assert bool(result) == 1
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_collection_update_batch(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        result = collection_with_multiple_docs.update(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()

        stats = collection_with_multiple_docs.stats
        assert stats is not None
        assert stats.doc_count == len(multiple_docs)


# ----------------------------
# Collection Upsert Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionUpsert:
    def test_empty_collection_upsert(self, test_collection: Collection, single_doc):
        result = test_collection.upsert(single_doc)
        assert bool(result)
        assert result.ok()

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_empty_collection_upsert_batch(
        self, test_collection: Collection, multiple_docs
    ):
        result = test_collection.upsert(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()

        stats = test_collection.stats
        assert stats is not None
        assert stats.doc_count == len(multiple_docs)

    def test_collection_upsert(
        self, collection_with_single_doc: Collection, single_doc, multiple_docs
    ):
        # doc is existing
        # upsert => update
        result = collection_with_single_doc.upsert(single_doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_collection_upsert_batch(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        # doc is existing
        # upsert => update
        result = collection_with_multiple_docs.upsert(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()

        stats = collection_with_multiple_docs.stats
        assert stats is not None
        assert stats.doc_count == len(multiple_docs)


# ----------------------------
# Collection Upsert Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionDelete:
    def test_empty_collection_delete(self, test_collection: Collection, single_doc):
        result = test_collection.delete(single_doc.id)
        assert bool(result)
        assert result.code() == StatusCode.NOT_FOUND

    def test_empty_collection_delete_batch(
        self, test_collection: Collection, multiple_docs
    ):
        result = test_collection.delete([doc.id for doc in multiple_docs])
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.code() == StatusCode.NOT_FOUND

    def test_collection_delete(
        self, collection_with_single_doc: Collection, single_doc
    ):
        result = collection_with_single_doc.delete(single_doc.id)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 0

        result = collection_with_single_doc.insert(single_doc)
        assert bool(result)
        assert result.ok()
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_collection_delete_batch(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        result = collection_with_multiple_docs.delete([doc.id for doc in multiple_docs])
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()
        stats = collection_with_multiple_docs.stats
        assert stats is not None
        assert stats.doc_count == 0

    def test_collection_delete_by_filter(
        self, collection_with_single_doc: Collection, single_doc
    ):
        collection_with_single_doc.delete_by_filter(
            filter=f"height={single_doc.field('height')}"
        )
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 0

    def test_collection_delete_by_filter_invert_field(
        self, collection_with_single_doc: Collection, single_doc
    ):
        collection_with_single_doc.delete_by_filter(
            filter=f"id={single_doc.field('id')}"
        )
        stats = collection_with_single_doc.stats
        assert stats is not None
        assert stats.doc_count == 0


# ----------------------------
# Collection Upsert Test Case
# ----------------------------
@pytest.mark.usefixtures("test_collection")
class TestCollectionQuery:
    def test_empty_collection_query(self, test_collection: Collection):
        result = test_collection.query()
        assert len(result) == 0

    def test_collection_query(self, collection_with_single_doc: Collection, single_doc):
        result = collection_with_single_doc.query()
        assert len(result) == 1
        doc = result[0]
        assert doc.id == single_doc.id
        assert "dense" not in doc.field_names()
        assert "sparse" not in doc.field_names()
        field_without_vector = single_doc.field_names()
        assert set(doc.field_names()) == set(field_without_vector)
        for name in field_without_vector:
            assert doc.field(name) == single_doc.field(name)

    def test_collection_query_with_include_vector(
        self, collection_with_single_doc: Collection, single_doc
    ):
        result = collection_with_single_doc.query(include_vector=True)
        assert len(result) == 1
        doc = result[0]
        assert doc.vector("dense") is not None
        assert doc.vector("sparse") is not None

    def test_collection_query_with_output_fields(
        self, collection_with_single_doc: Collection, single_doc
    ):
        result = collection_with_single_doc.query(output_fields=["id", "name"])
        assert len(result) == 1
        doc = result[0]
        assert doc.id == single_doc.id
        assert len(doc.field_names()) == 2
        assert set(doc.field_names()) == {"id", "name"}

    def test_collection_query_with_topk(
        self, collection_with_multiple_docs: Collection
    ):
        result = collection_with_multiple_docs.query()
        assert len(result) == 10

        result = collection_with_multiple_docs.query(topk=5)
        assert len(result) == 5

    def test_collection_query_with_range_filter_int_field(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        index = 10
        idx = multiple_docs[index].id

        result = collection_with_multiple_docs.query(filter=f"id>{idx}", topk=100)
        assert len(result) == len(multiple_docs) - index - 1

        result = collection_with_multiple_docs.query(filter=f"id>={idx}", topk=100)
        assert len(result) == len(multiple_docs) - index

        result = collection_with_multiple_docs.query(filter=f"id<{idx}", topk=100)
        assert len(result) == index

        result = collection_with_multiple_docs.query(filter=f"id<={idx}", topk=100)
        assert len(result) == index + 1

        result = collection_with_multiple_docs.query(filter=f"id={idx}", topk=100)
        assert len(result) == 1

        result = collection_with_multiple_docs.query(filter=f"id!={idx}", topk=100)
        assert len(result) == len(multiple_docs) - 1

        left, right = 10, 90
        l_id, r_id = multiple_docs[left].id, multiple_docs[right].id
        result = collection_with_multiple_docs.query(
            filter=f"id>{l_id} and id<{r_id}", topk=100
        )
        assert len(result) == right - left - 1

        result = collection_with_multiple_docs.query(
            filter=f"id>={l_id} and id<{r_id}", topk=100
        )
        assert len(result) == right - left

        result = collection_with_multiple_docs.query(
            filter=f"id>={l_id} and id<={r_id}", topk=100
        )
        assert len(result) == right - left + 1

        result = collection_with_multiple_docs.query(
            filter=f"id<{l_id} or id>{r_id}", topk=100
        )
        assert len(result) == len(multiple_docs) - (right - left) - 1

        result = collection_with_multiple_docs.query(
            filter=f"id<={l_id} or id>{r_id}", topk=100
        )
        assert len(result) == len(multiple_docs) - (right - left)

        result = collection_with_multiple_docs.query(
            filter=f"id<={l_id} or id>={r_id}", topk=100
        )
        assert len(result) == len(multiple_docs) - (right - left) + 1

        result = collection_with_multiple_docs.query(filter="id in (1)", topk=100)
        assert len(result) == 1

    def test_collection_query_with_vector_and_id(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        with pytest.raises(ValueError):
            collection_with_single_doc.query(
                VectorQuery(
                    field_name="dense",
                    id=single_doc.id,
                    vector=single_doc.vector("dense"),
                )
            )

    def test_collection_query_with_filter_not_in(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        result = collection_with_multiple_docs.query(filter="id not in (1)", topk=100)
        assert len(result) == len(multiple_docs) - 1

    def test_collection_with_error_query_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        query = VectorQuery(
            field_name="dense", vector=multiple_docs[0].vector("dense"), param=[1, 2, 3]
        )
        with pytest.raises(TypeError):
            result = collection_with_multiple_docs.query(
                filter="id in (1)", topk=100, vectors=query
            )

    def test_collection_query_by_id(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        result = collection_with_multiple_docs.query(
            VectorQuery(field_name="dense", id=multiple_docs[0].id)
        )
        assert len(result) == 10

    def test_collection_query_multi_vector_with_same_field(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        with pytest.raises(ValueError):
            collection_with_multiple_docs.query(
                [
                    VectorQuery(
                        field_name="dense", vector=multiple_docs[0].vector("dense")
                    ),
                    VectorQuery(
                        field_name="dense", vector=multiple_docs[0].vector("dense")
                    ),
                ]
            )

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_by_dense_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_by_sparse_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_by_dense_vector_with_filter(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_by_sparse_vector_with_filter(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_rrf_reranker_by_multi_dense_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_rrf_reranker_by_multi_sparse_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_rrf_reranker_by_hybrid_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_weighted_reranker_by_multi_dense_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_weighted_reranker_by_multi_sparse_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass

    @pytest.mark.skip(reason="TODO: This test case is pending implementation")
    def test_collection_query_with_weighted_reranker_by_hybrid_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs
    ):
        pass


================================================
FILE: python/tests/test_collection_hnsw_rabitq.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import platform
import sys

import pytest
import math
import zvec

pytestmark = pytest.mark.skipif(
    not (sys.platform == "linux" and platform.machine() in ("x86_64", "AMD64")),
    reason="HNSW RaBitQ only supported on Linux x86_64",
)
from zvec import (
    Collection,
    CollectionOption,
    DataType,
    Doc,
    FieldSchema,
    HnswRabitqIndexParam,
    HnswRabitqQueryParam,
    MetricType,
    VectorSchema,
    VectorQuery,
)


# ==================== Fixtures ====================


@pytest.fixture(scope="session")
def hnsw_rabitq_collection_schema():
    """Create a collection schema with HNSW RaBitQ index."""
    return zvec.CollectionSchema(
        name="test_hnsw_rabitq_collection",
        fields=[
            FieldSchema("id", DataType.INT64, nullable=False),
            FieldSchema("name", DataType.STRING, nullable=False),
        ],
        vectors=[
            VectorSchema(
                "embedding",
                DataType.VECTOR_FP32,
                dimension=128,
                index_param=HnswRabitqIndexParam(
                    metric_type=MetricType.L2,
                    m=16,
                    ef_construction=200,
                    total_bits=7,
                    num_clusters=64,
                ),
            ),
        ],
    )


@pytest.fixture(scope="session")
def collection_option():
    """Create collection options."""
    return CollectionOption(read_only=False, enable_mmap=True)


@pytest.fixture
def single_doc():
    """Create a single document for testing."""
    return Doc(
        id="0",
        fields={"id": 0, "name": "test_doc_0"},
        vectors={"embedding": [0.1 + i * 0.01 for i in range(128)]},
    )


@pytest.fixture
def multiple_docs():
    """Create multiple documents for testing."""
    return [
        Doc(
            id=f"{i}",
            fields={"id": i, "name": f"test_doc_{i}"},
            vectors={"embedding": [i * 0.1 + j * 0.01 for j in range(128)]},
        )
        for i in range(1, 101)
    ]


@pytest.fixture(scope="function")
def hnsw_rabitq_collection(
    tmp_path_factory, hnsw_rabitq_collection_schema, collection_option
) -> Collection:
    """
    Function-scoped fixture: creates and opens a collection with HNSW RaBitQ index.
    """
    temp_dir = tmp_path_factory.mktemp("zvec_hnsw_rabitq")
    collection_path = temp_dir / "test_hnsw_rabitq_collection"

    coll = zvec.create_and_open(
        path=str(collection_path),
        schema=hnsw_rabitq_collection_schema,
        option=collection_option,
    )

    assert coll is not None, "Failed to create and open HNSW RaBitQ collection"
    assert coll.path == str(collection_path)
    assert coll.schema.name == hnsw_rabitq_collection_schema.name

    try:
        yield coll
    finally:
        if hasattr(coll, "destroy") and coll is not None:
            try:
                coll.destroy()
            except Exception as e:
                print(f"Warning: failed to destroy collection: {e}")


@pytest.fixture
def collection_with_single_doc(
    hnsw_rabitq_collection: Collection, single_doc: Doc
) -> Collection:
    """Setup: insert single doc into collection."""
    assert hnsw_rabitq_collection.stats.doc_count == 0
    result = hnsw_rabitq_collection.insert(single_doc)
    assert bool(result)
    assert result.ok()
    assert hnsw_rabitq_collection.stats.doc_count == 1

    yield hnsw_rabitq_collection

    # Teardown: delete single doc
    hnsw_rabitq_collection.delete(single_doc.id)
    assert hnsw_rabitq_collection.stats.doc_count == 0


@pytest.fixture
def collection_with_multiple_docs(
    hnsw_rabitq_collection: Collection, multiple_docs: list[Doc]
) -> Collection:
    """Setup: insert multiple docs into collection."""
    assert hnsw_rabitq_collection.stats.doc_count == 0
    result = hnsw_rabitq_collection.insert(multiple_docs)
    assert len(result) == len(multiple_docs)
    for item in result:
        assert item.ok()
    assert hnsw_rabitq_collection.stats.doc_count == len(multiple_docs)

    yield hnsw_rabitq_collection

    # Teardown: delete multiple docs
    hnsw_rabitq_collection.delete([doc.id for doc in multiple_docs])


# ==================== Tests ====================


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionCreation:
    """Test HNSW RaBitQ collection creation and schema validation."""

    def test_collection_creation(
        self, hnsw_rabitq_collection: Collection, hnsw_rabitq_collection_schema
    ):
        """Test that collection is created with correct schema."""
        assert hnsw_rabitq_collection is not None
        assert hnsw_rabitq_collection.schema.name == hnsw_rabitq_collection_schema.name
        assert len(hnsw_rabitq_collection.schema.fields) == len(
            hnsw_rabitq_collection_schema.fields
        )
        assert len(hnsw_rabitq_collection.schema.vectors) == len(
            hnsw_rabitq_collection_schema.vectors
        )

    def test_vector_schema_validation(self, hnsw_rabitq_collection: Collection):
        """Test that vector schema has correct HNSW RaBitQ configuration."""
        vector_schema = hnsw_rabitq_collection.schema.vector("embedding")
        assert vector_schema is not None
        assert vector_schema.name == "embedding"
        assert vector_schema.data_type == DataType.VECTOR_FP32
        assert vector_schema.dimension == 128

        index_param = vector_schema.index_param
        assert index_param is not None
        assert index_param.metric_type == MetricType.L2
        assert index_param.m == 16
        assert index_param.ef_construction == 200
        assert index_param.total_bits == 7
        assert index_param.num_clusters == 64

    def test_collection_stats(self, hnsw_rabitq_collection: Collection):
        """Test initial collection statistics."""
        stats = hnsw_rabitq_collection.stats
        assert stats is not None
        assert stats.doc_count == 0
        assert len(stats.index_completeness) == 1
        assert stats.index_completeness["embedding"] == 1


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionInsert:
    """Test document insertion into HNSW RaBitQ collection."""

    def test_insert_single_doc(
        self, hnsw_rabitq_collection: Collection, single_doc: Doc
    ):
        """Test inserting a single document."""
        result = hnsw_rabitq_collection.insert(single_doc)
        assert bool(result)
        assert result.ok()

        stats = hnsw_rabitq_collection.stats
        assert stats is not None
        assert stats.doc_count == 1

    def test_insert_multiple_docs(
        self, hnsw_rabitq_collection: Collection, multiple_docs: list[Doc]
    ):
        """Test inserting multiple documents."""
        result = hnsw_rabitq_collection.insert(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()

        stats = hnsw_rabitq_collection.stats
        assert stats is not None
        assert stats.doc_count == len(multiple_docs)


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionFetch:
    """Test document fetching from HNSW RaBitQ collection."""

    def test_fetch_single_doc(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        """Test fetching a single document by ID."""
        result = collection_with_single_doc.fetch(ids=[single_doc.id])
        assert bool(result)
        assert single_doc.id in result.keys()

        doc = result[single_doc.id]
        assert doc is not None
        assert doc.id == single_doc.id
        assert doc.field("id") == single_doc.field("id")
        assert doc.field("name") == single_doc.field("name")

    def test_fetch_multiple_docs(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test fetching multiple documents by IDs."""
        ids = [doc.id for doc in multiple_docs[:10]]
        result = collection_with_multiple_docs.fetch(ids=ids)
        assert bool(result)
        assert len(result) == len(ids)

        for doc_id in ids:
            assert doc_id in result
            doc = result[doc_id]
            assert doc is not None
            assert doc.id == doc_id

    def test_fetch_nonexistent_doc(self, collection_with_single_doc: Collection):
        """Test fetching a non-existent document."""
        result = collection_with_single_doc.fetch(ids=["nonexistent_id"])
        assert len(result) == 0


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionQuery:
    """Test vector search queries on HNSW RaBitQ collection."""

    def test_query_by_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying by vector with HNSW RaBitQ index."""
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )

        result = collection_with_multiple_docs.query(vectors=query, topk=10)
        assert len(result) > 0
        assert len(result) <= 10

        # First result should be the query document itself (or very close)
        first_doc = result[0]
        assert first_doc is not None
        assert first_doc.id is not None

    def test_query_by_id(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying by document ID with HNSW RaBitQ index."""
        query = VectorQuery(
            field_name="embedding",
            id=multiple_docs[0].id,
            param=HnswRabitqQueryParam(ef=300),
        )

        result = collection_with_multiple_docs.query(vectors=query, topk=10)
        assert len(result) > 0
        assert len(result) <= 10

    def test_query_with_different_ef_values(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying with different ef parameter values."""
        query_vector = multiple_docs[0].vector("embedding")

        # Test with ef=100
        query_100 = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=100),
        )
        result_100 = collection_with_multiple_docs.query(vectors=query_100, topk=10)
        assert len(result_100) > 0

        # Test with ef=500
        query_500 = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=500),
        )
        result_500 = collection_with_multiple_docs.query(vectors=query_500, topk=10)
        assert len(result_500) > 0

    def test_query_with_topk(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying with different topk values."""
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )

        # Test topk=5
        result_5 = collection_with_multiple_docs.query(vectors=query, topk=5)
        assert len(result_5) <= 5

        # Test topk=20
        result_20 = collection_with_multiple_docs.query(vectors=query, topk=20)
        assert len(result_20) <= 20

    def test_query_with_filter(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying with filter conditions."""
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )

        # Query with id filter
        result = collection_with_multiple_docs.query(
            vectors=query, topk=10, filter="id < 50"
        )
        assert len(result) > 0
        for doc in result:
            assert doc.field("id") < 50

    def test_query_with_output_fields(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying with specific output fields."""
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )

        result = collection_with_multiple_docs.query(
            vectors=query, topk=10, output_fields=["id", "name"]
        )
        assert len(result) > 0

        first_doc = result[0]
        assert "id" in first_doc.field_names()
        assert "name" in first_doc.field_names()

    def test_query_with_include_vector(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test querying with vector data included in results."""
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )

        result = collection_with_multiple_docs.query(
            vectors=query, topk=10, include_vector=True
        )
        assert len(result) > 0

        first_doc = result[0]
        assert first_doc.vector("embedding") is not None
        assert len(first_doc.vector("embedding")) == 128


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionUpdate:
    """Test document update in HNSW RaBitQ collection."""

    def test_update_doc_fields(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        """Test updating document fields."""
        updated_doc = Doc(
            id=single_doc.id,
            fields={"id": single_doc.field("id"), "name": "updated_name"},
        )

        result = collection_with_single_doc.update(updated_doc)
        assert bool(result)
        assert result.ok()

        # Verify update
        fetched = collection_with_single_doc.fetch(ids=[single_doc.id])
        assert single_doc.id in fetched
        doc = fetched[single_doc.id]
        assert doc.field("name") == "updated_name"

    def test_update_doc_vector(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        """Test updating document vector."""
        new_vector = [0.5 + i * 0.01 for i in range(128)]
        updated_doc = Doc(
            id=single_doc.id,
            vectors={"embedding": new_vector},
        )

        result = collection_with_single_doc.update(updated_doc)
        assert bool(result)
        assert result.ok()

        # Verify update
        fetched = collection_with_single_doc.fetch(
            ids=[single_doc.id],
        )
        assert single_doc.id in fetched
        doc = fetched[single_doc.id]
        assert doc.vector("embedding") is not None
        embedding = doc.vector("embedding")
        assert len(embedding) == 128
        # Verify vector values are approximately equal (float comparison)
        for i in range(128):
            assert math.isclose(embedding[i], new_vector[i], rel_tol=1e-5)


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionDelete:
    """Test document deletion from HNSW RaBitQ collection."""

    def test_delete_single_doc(
        self, collection_with_single_doc: Collection, single_doc: Doc
    ):
        """Test deleting a single document."""
        result = collection_with_single_doc.delete(single_doc.id)
        assert bool(result)
        assert result.ok()

        stats = collection_with_single_doc.stats
        assert stats.doc_count == 0

    def test_delete_multiple_docs(
        self, collection_with_multiple_docs: Collection, multiple_docs: list[Doc]
    ):
        """Test deleting multiple documents."""
        ids_to_delete = [doc.id for doc in multiple_docs[:10]]
        result = collection_with_multiple_docs.delete(ids_to_delete)
        assert len(result) == len(ids_to_delete)
        for item in result:
            assert item.ok()

        stats = collection_with_multiple_docs.stats
        assert stats.doc_count == len(multiple_docs) - len(ids_to_delete)


@pytest.mark.usefixtures("hnsw_rabitq_collection")
class TestHnswRabitqCollectionOptimizeAndReopen:
    """Test collection optimize and reopen functionality."""

    def test_optimize_close_reopen_and_query(
        self,
        tmp_path_factory,
        hnsw_rabitq_collection_schema,
        collection_option,
        multiple_docs: list[Doc],
    ):
        """Test inserting 100 docs, optimize, close, reopen and query."""
        # Create collection and insert 100 documents
        temp_dir = tmp_path_factory.mktemp("zvec_hnsw_rabitq_optimize")
        collection_path = temp_dir / "test_optimize_collection"

        coll = zvec.create_and_open(
            path=str(collection_path),
            schema=hnsw_rabitq_collection_schema,
            option=collection_option,
        )

        assert coll is not None
        assert coll.stats.doc_count == 0

        # Insert 100 documents
        result = coll.insert(multiple_docs)
        assert len(result) == len(multiple_docs)
        for item in result:
            assert item.ok()
        assert coll.stats.doc_count == len(multiple_docs)

        # Call optimize
        from zvec import OptimizeOption

        coll.optimize(option=OptimizeOption())

        # Verify data is still accessible after optimize
        query_vector = multiple_docs[0].vector("embedding")
        query = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )
        result_before_close = coll.query(vectors=query, topk=10)
        assert len(result_before_close) > 0

        # Close collection (destroy will close it)
        collection_path_str = str(collection_path)
        del coll

        # Reopen collection
        reopened_coll = zvec.open(path=collection_path_str, option=collection_option)
        assert reopened_coll is not None
        assert reopened_coll.stats.doc_count == len(multiple_docs)

        # Execute query on reopened collection
        query_after_reopen = VectorQuery(
            field_name="embedding",
            vector=query_vector,
            param=HnswRabitqQueryParam(ef=300),
        )
        result_after_reopen = reopened_coll.query(vectors=query_after_reopen, topk=10)
        assert len(result_after_reopen) > 0
        assert len(result_after_reopen) <= 10

        # Verify query results are valid
        first_doc = result_after_reopen[0]
        assert first_doc is not None
        assert first_doc.id is not None
        assert first_doc.field("id") is not None
        assert first_doc.field("name") is not None

        # Cleanup
        reopened_coll.destroy()


================================================
FILE: python/tests/test_convert.py
================================================
from __future__ import annotations

import math

import pytest
from _zvec import _Doc
from zvec.model.convert import convert_to_py_doc, convert_to_cpp_doc
from zvec import Doc, CollectionSchema, DataType, FieldSchema, VectorSchema


# ----------------------------
# Convert Cpp Doc Test Case
# ----------------------------
class TestConvertCppDoc:
    def test_default(self):
        doc = Doc(id="1")
        schema = CollectionSchema(
            name="test_collection",
            fields=FieldSchema("name", DataType.STRING),
        )

        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)

        assert cpp_doc is not None
        assert cpp_doc.pk() == doc.id

    def test_with_field_notin_schema(self):
        doc = Doc(id="1", fields={"name": "Tom"})
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("id", DataType.UINT64),
                FieldSchema("salary", DataType.UINT32),
                FieldSchema("age", DataType.INT32),
                FieldSchema("create_at", DataType.INT64),
                FieldSchema("author", DataType.STRING),
                FieldSchema("weight", DataType.FLOAT),
            ],
        )
        with pytest.raises(ValueError):
            convert_to_cpp_doc(doc, collection_schema=schema)

    def test_with_scalar_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("id", DataType.UINT64),
                FieldSchema("salary", DataType.UINT32),
                FieldSchema("age", DataType.INT32),
                FieldSchema("create_at", DataType.INT64),
                FieldSchema("author", DataType.STRING),
                FieldSchema("weight", DataType.FLOAT),
                FieldSchema("bmi", DataType.DOUBLE),
                FieldSchema("is_male", DataType.BOOL),
            ],
        )
        doc = Doc(
            id="1",
            fields={
                "id": 1,
                "salary": 1000,
                "age": 18,
                "create_at": 1640995200,
                "bmi": 80.0 / 200.0,
                "author": "Tom",
                "weight": 80.0,
                "is_male": True,
            },
        )
        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)
        assert cpp_doc is not None
        assert cpp_doc.pk() == doc.id
        assert cpp_doc.get_any("id", DataType.UINT64) == 1
        assert cpp_doc.get_any("salary", DataType.UINT32) == 1000
        assert cpp_doc.get_any("age", DataType.INT32) == 18
        assert cpp_doc.get_any("create_at", DataType.INT64) == 1640995200
        assert cpp_doc.get_any("author", DataType.STRING) == "Tom"
        assert math.isclose(
            cpp_doc.get_any("weight", DataType.FLOAT), 80.0, rel_tol=1e-6
        )
        assert math.isclose(
            cpp_doc.get_any("bmi", DataType.DOUBLE), 80.0 / 200.0, rel_tol=1e-6
        )
        assert cpp_doc.get_any("is_male", DataType.BOOL) == True

    def test_with_array_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("tags", DataType.ARRAY_STRING),
                FieldSchema("ids", DataType.ARRAY_UINT64),
                FieldSchema("marks", DataType.ARRAY_UINT32),
                FieldSchema("x", DataType.ARRAY_INT32),
                FieldSchema("y", DataType.ARRAY_INT64),
                FieldSchema("scores", DataType.ARRAY_FLOAT),
                FieldSchema("ratios", DataType.ARRAY_DOUBLE),
                FieldSchema("results", DataType.ARRAY_BOOL),
            ],
        )

        doc = Doc(
            id="1",
            fields={
                "tags": ["tag1", "tag2", "tag3"],
                "ids": [111111111111, 222222222222, 333333333333],
                "marks": [100, 200, 300],
                "x": [1, 2, 3],
                "y": [100, 200, 300],
                "scores": [1.1, 2.2, 3.3],
                "ratios": [0.1, 0.2, 0.3],
                "results": [True, False, True],
            },
        )
        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)

        assert cpp_doc is not None
        assert cpp_doc.pk() == doc.id
        assert cpp_doc.get_any("tags", DataType.ARRAY_STRING) == doc.field("tags")
        assert cpp_doc.get_any("ids", DataType.ARRAY_UINT64) == doc.field("ids")
        assert cpp_doc.get_any("marks", DataType.ARRAY_UINT32) == doc.field("marks")
        assert cpp_doc.get_any("x", DataType.ARRAY_INT32) == doc.field("x")
        assert cpp_doc.get_any("y", DataType.ARRAY_INT64) == doc.field("y")
        scores = cpp_doc.get_any("scores", DataType.ARRAY_FLOAT)
        for i in range(len(doc.field("scores"))):
            assert math.isclose(scores[i], doc.field("scores")[i], rel_tol=1e-1)
        ratios = cpp_doc.get_any("ratios", DataType.ARRAY_DOUBLE)
        for i in range(len(doc.field("ratios"))):
            assert math.isclose(ratios[i], doc.field("ratios")[i], rel_tol=1e-1)
        results = cpp_doc.get_any("results", DataType.ARRAY_BOOL)
        for i in range(len(doc.field("results"))):
            assert results[i] == doc.field("results")[i]

    def test_with_dense_vector_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="embedding",
                    data_type=DataType.VECTOR_FP16,
                    dimension=4,
                ),
                VectorSchema(
                    name="image",
                    data_type=DataType.VECTOR_FP32,
                    dimension=8,
                ),
                VectorSchema(
                    name="text",
                    data_type=DataType.VECTOR_INT8,
                    dimension=32,
                ),
            ],
        )

        doc = Doc(
            id="1",
            vectors={
                "embedding": [1.1] * 4,
                "image": [2.2] * 8,
                "text": [4] * 32,
            },
        )
        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)
        assert cpp_doc is not None
        assert cpp_doc.pk() == doc.id

        embedding_vector = cpp_doc.get_any("embedding", DataType.VECTOR_FP16)
        assert len(embedding_vector) == 4
        for i in range(4):
            assert math.isclose(
                embedding_vector[i], doc.vector("embedding")[i], rel_tol=1e-1
            )

        image_vector = cpp_doc.get_any("image", DataType.VECTOR_FP32)
        assert len(image_vector) == 8
        for i in range(8):
            assert math.isclose(image_vector[i], doc.vector("image")[i], rel_tol=1e-1)

        text_vector = cpp_doc.get_any("text", DataType.VECTOR_INT8)
        assert len(text_vector) == 32
        for i in range(32):
            assert text_vector[i] == doc.vectors["text"][i]

    def test_with_sparse_vector_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="author",
                    data_type=DataType.SPARSE_VECTOR_FP32,
                ),
                VectorSchema(
                    name="content",
                    data_type=DataType.SPARSE_VECTOR_FP16,
                ),
            ],
        )
        doc = Doc(
            id="1",
            vectors={
                "author": {1: 1.1, 2: 2.2, 3: 3.3},
                "content": {4: 4.4, 5: 5.5, 6: 6.6},
            },
        )

        cpp_doc = convert_to_cpp_doc(doc, collection_schema=schema)
        assert cpp_doc is not None
        assert cpp_doc.pk() == doc.id

        author_vector = cpp_doc.get_any("author", DataType.SPARSE_VECTOR_FP32)
        assert isinstance(author_vector, dict)
        for key, value in doc.vector("author").items():
            assert math.isclose(author_vector[key], value, rel_tol=1e-1)

        content_vector = cpp_doc.get_any("content", DataType.SPARSE_VECTOR_FP16)
        assert isinstance(content_vector, dict)
        for key, value in doc.vector("content").items():
            assert math.isclose(content_vector[key], value, rel_tol=1e-1)

    def test_with_scalar_fields_error_datatype(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("id", DataType.UINT64),
                FieldSchema("salary", DataType.UINT32),
                FieldSchema("age", DataType.INT32),
                FieldSchema("create_at", DataType.INT64),
                FieldSchema("author", DataType.STRING),
                FieldSchema("weight", DataType.FLOAT),
                FieldSchema("bmi", DataType.DOUBLE),
                FieldSchema("is_male", DataType.BOOL),
            ],
        )
        doc = Doc(
            id="1",
            fields={
                "id": "1",
            },
        )
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"salary": "1000"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"age": "18"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"create_at": "2021-01-01"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"author": 1})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"weight": "80.5"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"bmi": "25.0"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"is_male": "true"})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

    def test_with_array_fields_error_datatype(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("tags", DataType.ARRAY_STRING),
                FieldSchema("ids", DataType.ARRAY_UINT64),
                FieldSchema("marks", DataType.ARRAY_UINT32),
                FieldSchema("x", DataType.ARRAY_INT32),
                FieldSchema("y", DataType.ARRAY_INT64),
                FieldSchema("scores", DataType.ARRAY_FLOAT),
                FieldSchema("ratios", DataType.ARRAY_DOUBLE),
                FieldSchema("results", DataType.ARRAY_BOOL),
            ],
        )

        doc = Doc(id="1", fields={"tags": [1, 2, 3]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"ids": ["1", "2", "3"]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"marks": [1.1, 2.2, 3.3]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"x": [1.1, 2.2, 3.3]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"y": [1.1, 2.2, 3.3]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"scores": ["1", "2", "3"]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"ratios": ["1", "2", "3"]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", fields={"results": ["1", "2", "3"]})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

    def test_with_vector_fields_error_datatype(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="embedding",
                    data_type=DataType.VECTOR_FP16,
                    dimension=4,
                ),
                VectorSchema(
                    name="image",
                    data_type=DataType.VECTOR_FP32,
                    dimension=8,
                ),
                VectorSchema(
                    name="text",
                    data_type=DataType.VECTOR_INT8,
                    dimension=32,
                ),
            ],
        )

        doc = Doc(id="1", vectors={"image": ["1.1"] * 4})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", vectors={"text": ["1"] * 4})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(id="1", vectors={"embedding": ["1"] * 4})
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

    def test_with_sparse_vector_error_datatype(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="author",
                    data_type=DataType.SPARSE_VECTOR_FP32,
                ),
                VectorSchema(
                    name="content",
                    data_type=DataType.SPARSE_VECTOR_FP16,
                ),
            ],
        )
        doc = Doc(
            id="1",
            vectors={
                "author": {"1": 1.1, "2": 2.2, "3": 3.3},
            },
        )
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(
            id="1",
            vectors={
                "content": {"1": 1.1, "2": 2.2, "3": 3.3},
            },
        )
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)

        doc = Doc(
            id="1",
            vectors={
                "author": {1: "1", 2: "2", 3: "3"},
            },
        )
        with pytest.raises(TypeError):
            convert_to_cpp_doc(doc, collection_schema=schema)


# ----------------------------
# Convert Py Doc Test Case
# ----------------------------
class TestConvertPyDoc:
    def test_default(self):
        doc = _Doc()
        doc.set_pk("1")
        doc.set_score(1.0)

        schema = CollectionSchema(
            name="test_collection",
            fields=FieldSchema("name", DataType.STRING),
        )

        py_doc = convert_to_py_doc(doc, schema)
        assert py_doc.id == "1"
        assert py_doc.score == 1.0

    def test_with_scalar_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("id", DataType.UINT64),
                FieldSchema("salary", DataType.UINT32),
                FieldSchema("age", DataType.INT32),
                FieldSchema("create_at", DataType.INT64),
                FieldSchema("author", DataType.STRING),
                FieldSchema("weight", DataType.FLOAT),
                FieldSchema("bmi", DataType.DOUBLE),
                FieldSchema("is_male", DataType.BOOL),
            ],
        )
        doc = _Doc()
        doc.set_pk("1")
        doc.set_any("id", schema.field("id")._get_object(), 1111111111111111)
        doc.set_any("salary", schema.field("salary")._get_object(), 1000)
        doc.set_any("age", schema.field("age")._get_object(), 18)
        doc.set_any("create_at", schema.field("create_at")._get_object(), 1640995200)
        doc.set_any("author", schema.field("author")._get_object(), "Tom")
        doc.set_any("weight", schema.field("weight")._get_object(), 80.0)
        doc.set_any("bmi", schema.field("bmi")._get_object(), 80.0 / 200.0)
        doc.set_any("is_male", schema.field("is_male")._get_object(), True)

        py_doc = convert_to_py_doc(doc, schema)
        assert py_doc.id == "1"
        assert py_doc.field("id") == 1111111111111111
        assert py_doc.field("salary") == 1000
        assert py_doc.field("age") == 18
        assert py_doc.field("create_at") == 1640995200
        assert py_doc.field("author") == "Tom"
        assert py_doc.field("weight") == 80.0
        assert py_doc.field("bmi") == 80.0 / 200.0
        assert py_doc.field("is_male") == True

    def test_with_array_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema("tags", DataType.ARRAY_STRING),
                FieldSchema("ids", DataType.ARRAY_UINT64),
                FieldSchema("marks", DataType.ARRAY_UINT32),
                FieldSchema("x", DataType.ARRAY_INT32),
                FieldSchema("y", DataType.ARRAY_INT64),
                FieldSchema("scores", DataType.ARRAY_FLOAT),
                FieldSchema("ratios", DataType.ARRAY_DOUBLE),
                FieldSchema("results", DataType.ARRAY_BOOL),
            ],
        )

        doc = _Doc()
        doc.set_pk("1")
        doc.set_any(
            "tags", schema.field("tags")._get_object(), ["tag1", "tag2", "tag3"]
        )
        doc.set_any(
            "ids",
            schema.field("ids")._get_object(),
            [111111111111, 222222222222, 3333333333333],
        )
        doc.set_any("marks", schema.field("marks")._get_object(), [1000, 2000, 3000])
        doc.set_any("x", schema.field("x")._get_object(), [1, 2, 3])
        doc.set_any("y", schema.field("y")._get_object(), [100, 200, 300])
        doc.set_any("scores", schema.field("scores")._get_object(), [0.1, 0.2, 0.3])
        doc.set_any("ratios", schema.field("ratios")._get_object(), [0.1, 0.2, 0.3])
        doc.set_any(
            "results", schema.field("results")._get_object(), [True, False, True]
        )

        py_doc = convert_to_py_doc(doc, schema)
        assert py_doc.field("tags") == ["tag1", "tag2", "tag3"]
        assert py_doc.field("ids") == [111111111111, 222222222222, 3333333333333]
        assert py_doc.field("marks") == [1000, 2000, 3000]
        assert py_doc.field("x") == [1, 2, 3]
        assert py_doc.field("y") == [100, 200, 300]

        scores = doc.get_any("scores", DataType.ARRAY_FLOAT)
        for i in range(len(scores)):
            assert math.isclose(scores[i], py_doc.field("scores")[i], rel_tol=1e-1)
        ratios = doc.get_any("ratios", DataType.ARRAY_DOUBLE)
        for i in range(len(ratios)):
            assert math.isclose(ratios[i], py_doc.field("ratios")[i], rel_tol=1e-1)
        results = doc.get_any("results", DataType.ARRAY_BOOL)
        for i in range(len(results)):
            assert results[i] == py_doc.field("results")[i]

    def test_with_dense_vector_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="embedding",
                    data_type=DataType.VECTOR_FP16,
                    dimension=4,
                ),
                VectorSchema(
                    name="image",
                    data_type=DataType.VECTOR_FP32,
                    dimension=8,
                ),
                VectorSchema(
                    name="text",
                    data_type=DataType.VECTOR_INT8,
                    dimension=32,
                ),
            ],
        )

        doc = _Doc()
        doc.set_pk("1")
        doc.set_any("embedding", schema.vector("embedding")._get_object(), [1.1] * 4)
        doc.set_any("image", schema.vector("image")._get_object(), [2.2] * 8)
        doc.set_any("text", schema.vector("text")._get_object(), [4] * 32)

        py_doc = convert_to_py_doc(doc, schema)
        assert py_doc.id == "1"

        embedding_vector = py_doc.vector("embedding")
        assert len(embedding_vector) == 4
        for i in range(4):
            assert math.isclose(
                py_doc.vector("embedding")[i], embedding_vector[i], rel_tol=1e-1
            )

        image_vector = py_doc.vector("image")
        assert len(image_vector) == 8
        for i in range(8):
            assert math.isclose(
                py_doc.vector("image")[i], image_vector[i], rel_tol=1e-1
            )

        text_vector = py_doc.vector("text")
        assert len(text_vector) == 32
        for i in range(32):
            assert py_doc.vector("text")[i] == text_vector[i]

    def test_with_sparse_vector_fields(self):
        schema = CollectionSchema(
            name="test_collection",
            vectors=[
                VectorSchema(
                    name="author",
                    data_type=DataType.SPARSE_VECTOR_FP32,
                ),
                VectorSchema(
                    name="content",
                    data_type=DataType.SPARSE_VECTOR_FP16,
                ),
            ],
        )

        doc = _Doc()
        doc.set_pk("1")
        doc.set_any(
            "author", schema.vector("author")._get_object(), {1: 1.1, 2: 2.2, 3: 3.3}
        )
        doc.set_any(
            "content", schema.vector("content")._get_object(), {4: 4.4, 5: 5.5, 6: 6.6}
        )

        py_doc = convert_to_py_doc(doc, schema)
        assert py_doc.id == "1"

        author_vector = py_doc.vector("author")
        assert isinstance(author_vector, dict)
        for key, value in doc.get_any("author", DataType.SPARSE_VECTOR_FP32).items():
            assert math.isclose(author_vector[key], value, rel_tol=1e-1)

        content_vector = py_doc.vector("content")
        assert isinstance(content_vector, dict)
        for key, value in doc.get_any("content", DataType.SPARSE_VECTOR_FP16).items():
            assert math.isclose(content_vector[key], value, rel_tol=1e-1)


================================================
FILE: python/tests/test_doc.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import math
import pytest


from _zvec import _Doc
from zvec import FieldSchema, VectorSchema, Doc, DataType


# ----------------------------
# PyDoc Test Case
# ----------------------------
class TestPyDoc:
    def test_default(self):
        Doc(id="1")

    def test_with_single_vector(self):
        doc = Doc(id="1", vectors={"dense": [1, 2, 3]})
        assert doc is not None
        assert doc.id == "1"
        assert doc.vector("dense") == [1, 2, 3]

    def test_with_hybrid_vectors(self):
        doc = Doc(
            id="1", vectors={"dense": [1, 2, 3], "sparse": {1: 1.0, 2: 2.0, 3: 3.0}}
        )
        assert doc is not None
        assert doc.id == "1"
        assert doc.vector("dense") == [1, 2, 3]
        assert doc.vector("sparse") == {1: 1.0, 2: 2.0, 3: 3.0}

    def test_with_multi_vectors(self):
        doc = Doc(
            id="1",
            vectors={
                "image": [1, 2, 3],
                "description": [4, 5, 6],
                "keys": {1: 1.0, 2: 2.0, 3: 3.0},
            },
            fields={"author": "Tom", "age": 19, "is_male": True, "weight": 60.5},
        )
        assert doc is not None
        assert doc.id == "1"
        assert doc.vector("image") == [1, 2, 3]
        assert doc.vector("description") == [4, 5, 6]
        assert doc.vector("keys") == {1: 1.0, 2: 2.0, 3: 3.0}
        assert doc.field("author") == "Tom"
        assert doc.field("age") == 19
        assert doc.field("is_male") == True
        assert doc.field("weight") == 60.5

    def test_with_numpy_array(self):
        import numpy as np

        doc = Doc._from_tuple(
            (
                "1",
                0.0,
                None,
                {
                    "image": np.array([1, 2, 3]),
                    "description": np.random.random(512),
                    "keys": {1: 1.0, 2: 2.0, 3: 3.0},
                },
            )
        )
        assert doc is not None
        assert doc.id == "1"
        assert doc.vector("image") == [1, 2, 3]
        assert doc.vector("keys") == {1: 1.0, 2: 2.0, 3: 3.0}


# ----------------------------
# CppDoc Test Case
# ----------------------------
class TestCppDoc:
    def test_default(self):
        doc = _Doc()
        assert doc is not None

    def test_doc_set_pk(self):
        doc = _Doc()
        doc.set_pk("1")
        assert doc.pk() == "1"

    def test_doc_set_score(self):
        doc = _Doc()
        doc.set_score(0.9)
        assert math.isclose(doc.score(), 0.9, rel_tol=1e-6)

    def test_doc_get_null_field(self):
        doc = _Doc()
        schema = FieldSchema("author", DataType.STRING, nullable=True)
        doc.set_any("author", schema._get_object(), None)
        assert doc.has_field("author")
        assert doc.get_any("author", schema.data_type) is None

    def test_doc_get_set_has_null_field(self):
        doc = _Doc()
        schema = FieldSchema("author", DataType.STRING, nullable=False)
        with pytest.raises(ValueError):
            doc.set_any("author", schema._get_object(), None)

    def test_doc_get_set_has_string_field(self):
        doc = _Doc()
        schema = FieldSchema("author", DataType.STRING)
        doc.set_any("author", schema._get_object(), "Tom")
        assert doc.has_field("author")
        assert doc.get_any("author", DataType.STRING) == "Tom"

    def test_doc_get_set_has_bool_field(self):
        doc = _Doc()
        schema = FieldSchema("is_male", DataType.BOOL)
        doc.set_any("is_male", schema._get_object(), True)
        assert doc.has_field("is_male")
        assert doc.get_any("is_male", DataType.BOOL) == True

    def test_doc_get_set_has_int32_field(self):
        doc = _Doc()
        schema = FieldSchema("age", DataType.INT32)
        doc.set_any("age", schema._get_object(), 19)
        assert doc.has_field("age")
        assert doc.get_any("age", DataType.INT32) == 19

    def test_doc_get_set_has_int64_field(self):
        doc = _Doc()
        schema = FieldSchema("id", DataType.INT64)
        doc.set_any("id", schema._get_object(), 1111111111111111111)
        assert doc.has_field("id")
        assert doc.get_any("id", DataType.INT64) == 1111111111111111111

    def test_doc_get_set_has_float_field(self):
        doc = _Doc()
        schema = FieldSchema("weight", DataType.FLOAT)
        doc.set_any("weight", schema._get_object(), 60.5)
        assert doc.has_field("weight")
        assert math.isclose(doc.get_any("weight", DataType.FLOAT), 60.5, rel_tol=1e-6)

    def test_doc_get_set_has_double_field(self):
        doc = _Doc()
        schema = FieldSchema("height", DataType.DOUBLE)
        doc.set_any("height", schema._get_object(), 1.77777777777)
        assert doc.has_field("height")
        assert math.isclose(
            doc.get_any("height", DataType.DOUBLE), 1.7777777777, rel_tol=1e-9
        )

    def test_doc_get_set_has_uint32_field(self):
        doc = _Doc()
        schema = FieldSchema("id", DataType.UINT32)
        doc.set_any("id", schema._get_object(), 4294967295)
        assert doc.has_field("id")
        assert doc.get_any("id", DataType.UINT32) == 4294967295

    def test_doc_get_set_has_uint64_field(self):
        doc = _Doc()
        schema = FieldSchema("id", DataType.UINT64)
        doc.set_any("id", schema._get_object(), 18446744073709551615)
        assert doc.has_field("id")
        assert doc.get_any("id", DataType.UINT64) == 18446744073709551615

    def test_doc_get_set_has_array_string_field(self):
        doc = _Doc()
        schema = FieldSchema("tags", DataType.ARRAY_STRING)
        doc.set_any("tags", schema._get_object(), ["tag1", "tag2", "tag3"])
        assert doc.has_field("tags")
        assert doc.get_any("tags", DataType.ARRAY_STRING) == ["tag1", "tag2", "tag3"]

    def test_doc_get_set_has_array_int32_field(self):
        doc = _Doc()
        schema = FieldSchema("ids", DataType.ARRAY_INT32)
        doc.set_any("ids", schema._get_object(), [1, 2, 3])
        assert doc.has_field("ids")
        assert doc.get_any("ids", DataType.ARRAY_INT32) == [1, 2, 3]

    def test_doc_get_set_has_array_int64_field(self):
        doc = _Doc()
        schema = FieldSchema("ids", DataType.ARRAY_INT64)
        doc.set_any("ids", schema._get_object(), [1, 2, 3])
        assert doc.has_field("ids")
        assert doc.get_any("ids", DataType.ARRAY_INT64) == [1, 2, 3]

    def test_doc_get_set_has_array_float_field(self):
        doc = _Doc()
        schema = FieldSchema("weights", DataType.ARRAY_FLOAT)
        doc.set_any("weights", schema._get_object(), [1.0, 2.0, 3.0])
        assert doc.has_field("weights")
        assert doc.get_any("weights", DataType.ARRAY_FLOAT) == [1.0, 2.0, 3.0]

    def test_doc_get_set_has_array_double_field(self):
        doc = _Doc()
        schema = FieldSchema("heights", DataType.ARRAY_DOUBLE)
        doc.set_any("heights", schema._get_object(), [1.0, 2.0, 3.0])
        assert doc.has_field("heights")
        assert doc.get_any("heights", DataType.ARRAY_DOUBLE) == [1.0, 2.0, 3.0]

    def test_doc_get_set_has_array_bool_field(self):
        doc = _Doc()
        schema = FieldSchema("bools", DataType.ARRAY_BOOL)
        doc.set_any("bools", schema._get_object(), [True, False, True])
        assert doc.has_field("bools")
        assert doc.get_any("bools", DataType.ARRAY_BOOL) == [True, False, True]

    def test_doc_get_set_has_vector_fp16(self):
        doc = _Doc()
        schema = VectorSchema("image", DataType.VECTOR_FP16)
        doc.set_any("image", schema._get_object(), [1.0, 2.0, 3.0])
        assert doc.has_field("image")
        image_vector = doc.get_any("image", DataType.VECTOR_FP16)
        assert image_vector is not None
        for i in range(len(image_vector)):
            assert math.isclose(image_vector[i], [1.0, 2.0, 3.0][i], rel_tol=1e-6)

    def test_doc_get_set_has_vector_fp32(self):
        doc = _Doc()
        schema = VectorSchema("image", DataType.VECTOR_FP32)
        doc.set_any("image", schema._get_object(), [1.111111, 2.222222, 3.333333])
        assert doc.has_field("image")
        vector = doc.get_any("image", DataType.VECTOR_FP32)
        assert vector is not None
        for i in range(len(vector)):
            assert math.isclose(
                vector[i], [1.111111, 2.222222, 3.333333][i], rel_tol=1e-6
            )

    def test_doc_get_set_has_vector_int8(self):
        doc = _Doc()
        schema = VectorSchema("image", DataType.VECTOR_INT8)
        doc.set_any("image", schema._get_object(), [1, 2, 3])
        assert doc.has_field("image")
        assert doc.get_any("image", DataType.VECTOR_INT8) == [1, 2, 3]

    def test_doc_get_set_has_sparse_vector_fp32(self):
        doc = _Doc()
        sparse = {1: 1.111111, 2: 2.222222, 3: 3.333333}
        schema = VectorSchema("key", DataType.SPARSE_VECTOR_FP32)
        doc.set_any("key", schema._get_object(), sparse)
        assert doc.has_field("key")
        vector = doc.get_any("key", DataType.SPARSE_VECTOR_FP32)
        assert vector is not None
        assert isinstance(vector, dict)
        for key, value in sparse.items():
            assert math.isclose(vector[key], value, rel_tol=1e-6)

    def test_doc_get_set_has_sparse_vector_fp16(self):
        doc = _Doc()
        sparse = {1: 1.1, 2: 2.2, 3: 3.3}
        schema = VectorSchema("key", DataType.SPARSE_VECTOR_FP16)
        doc.set_any("key", schema._get_object(), sparse)
        assert doc.has_field("key")
        vector = doc.get_any("key", DataType.SPARSE_VECTOR_FP16)
        assert vector is not None
        assert isinstance(vector, dict)
        for key, value in sparse.items():
            assert math.isclose(vector[key], value, rel_tol=1e-1)


================================================
FILE: python/tests/test_embedding.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from http import HTTPStatus
from unittest.mock import MagicMock, patch, Mock

import numpy as np
import pytest
from zvec.extension import (
    BM25EmbeddingFunction,
    DefaultLocalDenseEmbedding,
    DefaultLocalSparseEmbedding,
    OpenAIDenseEmbedding,
    QwenDenseEmbedding,
    QwenSparseEmbedding,
)

# Environment variable to control integration tests
# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API/model tests
RUN_INTEGRATION_TESTS = os.environ.get("ZVEC_RUN_INTEGRATION_TESTS", "0") == "1"


# ----------------------------
# QwenDenseEmbedding Test Case
# ----------------------------
class TestQwenDenseEmbedding:
    def test_init_with_api_key(self):
        # Test initialization with explicit API key
        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
        assert embedding_func.dimension == 128
        assert embedding_func.model == "text-embedding-v4"
        assert embedding_func._api_key == "test_key"

    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "env_key"})
    def test_init_with_env_api_key(self):
        # Test initialization with API key from environment
        embedding_func = QwenDenseEmbedding(dimension=128)
        assert embedding_func._api_key == "env_key"

    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": ""})
    def test_init_with_empty_env_api_key(self):
        # Test initialization with empty API key from environment
        with pytest.raises(ValueError, match="DashScope API key is required"):
            QwenDenseEmbedding(dimension=128)

    def test_model_property(self):
        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
        assert embedding_func.model == "text-embedding-v4"

        embedding_func = QwenDenseEmbedding(
            dimension=128, model="custom-model", api_key="test_key"
        )
        assert embedding_func.model == "custom-model"

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_with_empty_text(self, mock_require_module):
        # Test embed method with empty text raises ValueError
        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")

        with pytest.raises(
            ValueError, match="Input text cannot be empty or whitespace only"
        ):
            embedding_func.embed("")

        with pytest.raises(TypeError):
            embedding_func.embed(None)

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_success(self, mock_require_module):
        # Test successful embedding
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        mock_response.output = {"embeddings": [{"embedding": [0.1, 0.2, 0.3]}]}
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenDenseEmbedding(dimension=3, api_key="test_key")
        # Clear cache to avoid interference
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test text")

        assert result == [0.1, 0.2, 0.3]
        mock_dashscope.TextEmbedding.call.assert_called_once_with(
            model="text-embedding-v4",
            input="test text",
            dimension=3,
            output_type="dense",
        )

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_http_error(self, mock_require_module):
        # Test embedding with HTTP error
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.BAD_REQUEST
        mock_response.message = "Bad Request"
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError):
            embedding_func.embed("test text")

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_invalid_response(self, mock_require_module):
        # Test embedding with invalid response (wrong number of embeddings)
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        mock_response.output = {"embeddings": []}
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenDenseEmbedding(dimension=128, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError):
            embedding_func.embed("test text")

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_embed_success(self):
        """Integration test with real DashScope API.

        To run this test, set environment variable:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            export DASHSCOPE_API_KEY=your-api-key
        """
        embedding_func = QwenDenseEmbedding(dimension=128)
        dense = embedding_func("test text")
        assert len(dense) == 128


# ----------------------------
# QwenSparseEmbedding Test Case
# ----------------------------
class TestQwenSparseEmbedding:
    """Test suite for QwenSparseEmbedding (Qwen sparse embedding via DashScope API)."""

    def test_init_with_api_key(self):
        """Test initialization with explicit API key."""
        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        assert embedding_func._dimension == 1024
        assert embedding_func.model == "text-embedding-v4"
        assert embedding_func._api_key == "test_key"
        # encoding_type defaults to "query" via extra_params
        assert embedding_func.extra_params.get("encoding_type", "query") == "query"

    def test_init_with_custom_encoding_type(self):
        """Test initialization with custom encoding type."""
        embedding_func = QwenSparseEmbedding(
            dimension=1024, encoding_type="document", api_key="test_key"
        )
        assert embedding_func.extra_params.get("encoding_type") == "document"

    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "env_key"})
    def test_init_with_env_api_key(self):
        """Test initialization with API key from environment."""
        embedding_func = QwenSparseEmbedding(dimension=1024)
        assert embedding_func._api_key == "env_key"

    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": ""})
    def test_init_without_api_key(self):
        """Test initialization fails without API key."""
        with pytest.raises(ValueError, match="DashScope API key is required"):
            QwenSparseEmbedding(dimension=1024)

    def test_model_property(self):
        """Test model property."""
        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        assert embedding_func.model == "text-embedding-v4"

        embedding_func = QwenSparseEmbedding(
            dimension=1024, model="text-embedding-v3", api_key="test_key"
        )
        assert embedding_func.model == "text-embedding-v3"

    def test_encoding_type_property(self):
        """Test encoding_type via extra_params."""
        query_emb = QwenSparseEmbedding(
            dimension=1024, encoding_type="query", api_key="test_key"
        )
        assert query_emb.extra_params.get("encoding_type") == "query"

        doc_emb = QwenSparseEmbedding(
            dimension=1024, encoding_type="document", api_key="test_key"
        )
        assert doc_emb.extra_params.get("encoding_type") == "document"

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_with_empty_text(self, mock_require_module):
        """Test embed method with empty text raises ValueError."""
        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")

        with pytest.raises(
            ValueError, match="Input text cannot be empty or whitespace only"
        ):
            embedding_func.embed("")

        with pytest.raises(
            ValueError, match="Input text cannot be empty or whitespace only"
        ):
            embedding_func.embed("   ")

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_with_non_string_input(self, mock_require_module):
        """Test embed method with non-string input raises TypeError."""
        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            embedding_func.embed(123)

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            embedding_func.embed(None)

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_success(self, mock_require_module):
        """Test successful sparse embedding generation."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        # Sparse embedding returns array of {index, value, token} objects
        mock_response.output = {
            "embeddings": [
                {
                    "sparse_embedding": [
                        {"index": 10, "value": 0.5, "token": "机器"},
                        {"index": 245, "value": 0.8, "token": "学习"},
                        {"index": 1023, "value": 1.2, "token": "算法"},
                    ]
                }
            ]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        # Clear cache to avoid interference
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test text")

        # Verify result is a dict
        assert isinstance(result, dict)
        # Verify keys are integers
        assert all(isinstance(k, int) for k in result.keys())
        # Verify values are floats
        assert all(isinstance(v, float) for v in result.values())
        # Verify all values are positive
        assert all(v > 0 for v in result.values())
        # Verify sorted by indices
        keys = list(result.keys())
        assert keys == sorted(keys)
        # Verify specific keys
        assert keys == [10, 245, 1023]

        mock_dashscope.TextEmbedding.call.assert_called_once_with(
            model="text-embedding-v4",
            input="test text",
            dimension=1024,
            output_type="sparse",
            text_type="query",
        )

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_with_document_encoding_type(self, mock_require_module):
        """Test embedding with document encoding type."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        mock_response.output = {
            "embeddings": [
                {
                    "sparse_embedding": [
                        {"index": 5, "value": 0.3, "token": "文档"},
                        {"index": 100, "value": 0.7, "token": "内容"},
                        {"index": 500, "value": 0.9, "token": "检索"},
                    ]
                }
            ]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(
            dimension=1024, encoding_type="document", api_key="test_key"
        )
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test document")

        assert isinstance(result, dict)
        assert list(result.keys()) == [5, 100, 500]

        # Verify text_type parameter is "document"
        call_args = mock_dashscope.TextEmbedding.call.call_args
        assert call_args[1]["text_type"] == "document"
        assert call_args[1]["output_type"] == "sparse"

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_output_sorted_by_indices(self, mock_require_module):
        """Test that output is always sorted by indices in ascending order."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        # Return unsorted indices
        mock_response.output = {
            "embeddings": [
                {
                    "sparse_embedding": [
                        {"index": 9999, "value": 1.5, "token": "A"},
                        {"index": 5, "value": 2.0, "token": "B"},
                        {"index": 1234, "value": 0.8, "token": "C"},
                        {"index": 77, "value": 3.2, "token": "D"},
                        {"index": 500, "value": 1.1, "token": "E"},
                    ]
                }
            ]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test sorting")

        # Verify keys are sorted
        result_keys = list(result.keys())
        assert result_keys == sorted(result_keys)
        # Verify expected sorted order
        assert result_keys == [5, 77, 500, 1234, 9999]

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_filters_zero_values(self, mock_require_module):
        """Test that zero and negative values are filtered out."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        # Include zero and negative values
        mock_response.output = {
            "embeddings": [
                {
                    "sparse_embedding": [
                        {"index": 10, "value": 0.5, "token": "正"},
                        {
                            "index": 20,
                            "value": 0.0,
                            "token": "零",
                        },  # Should be filtered
                        {
                            "index": 30,
                            "value": -0.3,
                            "token": "负",
                        },  # Should be filtered
                        {"index": 40, "value": 0.8, "token": "正"},
                        {
                            "index": 50,
                            "value": 0.0,
                            "token": "零",
                        },  # Should be filtered
                    ]
                }
            ]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test filtering")

        # Only positive values should remain
        assert list(result.keys()) == [10, 40]
        assert all(v > 0 for v in result.values())

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_http_error(self, mock_require_module):
        """Test embedding with HTTP error."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.BAD_REQUEST
        mock_response.message = "Bad Request"
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError, match="DashScope API error"):
            embedding_func.embed("test text")

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_invalid_response_no_embeddings(self, mock_require_module):
        """Test embedding with invalid response (no embeddings)."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        mock_response.output = {"embeddings": []}
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError, match="Expected exactly 1 embedding"):
            embedding_func.embed("test text")

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_invalid_response_not_dict(self, mock_require_module):
        """Test embedding with invalid response (sparse_embedding not list)."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        # sparse_embedding should be list, not dict
        mock_response.output = {
            "embeddings": [{"sparse_embedding": {"index": 10, "value": 0.5}}]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(
            ValueError, match="'sparse_embedding' field is missing or not a list"
        ):
            embedding_func.embed("test text")

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_callable_interface(self, mock_require_module):
        """Test that embedding function is callable."""
        mock_dashscope = MagicMock()
        mock_response = MagicMock()
        mock_response.status_code = HTTPStatus.OK
        mock_response.output = {
            "embeddings": [
                {
                    "sparse_embedding": [
                        {"index": 100, "value": 1.0, "token": "测试"},
                        {"index": 200, "value": 0.5, "token": "调用"},
                    ]
                }
            ]
        }
        mock_dashscope.TextEmbedding.call.return_value = mock_response
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()

        # Test calling the function directly
        result = embedding_func("test text")
        assert isinstance(result, dict)
        assert list(result.keys()) == [100, 200]

    @patch("zvec.extension.qwen_function.require_module")
    def test_embed_api_connection_error(self, mock_require_module):
        """Test handling of API connection errors."""
        mock_dashscope = MagicMock()
        mock_dashscope.TextEmbedding.call.side_effect = Exception("Connection timeout")
        mock_require_module.return_value = mock_dashscope

        embedding_func = QwenSparseEmbedding(dimension=1024, api_key="test_key")
        embedding_func.embed.cache_clear()

        with pytest.raises(RuntimeError, match="Failed to call DashScope API"):
            embedding_func.embed("test text")

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_embed_success(self):
        """Integration test with real DashScope API.

        To run this test, set environment variable:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            export DASHSCOPE_API_KEY=your-api-key
        """
        # Test query embedding
        query_emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
        query_vec = query_emb.embed("machine learning")

        assert isinstance(query_vec, dict)
        assert len(query_vec) > 0
        assert all(isinstance(k, int) for k in query_vec.keys())
        assert all(isinstance(v, float) and v > 0 for v in query_vec.values())

        # Verify sorted output
        keys = list(query_vec.keys())
        assert keys == sorted(keys)

        # Test document embedding
        doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type="document")
        doc_vec = doc_emb.embed("Machine learning is a subset of AI")

        assert isinstance(doc_vec, dict)
        assert len(doc_vec) > 0

        # Verify sorted output
        doc_keys = list(doc_vec.keys())
        assert doc_keys == sorted(doc_keys)


# ----------------------------
# OpenAIDenseEmbedding Test Case
# ----------------------------
class TestOpenAIDenseEmbedding:
    def test_init_with_api_key(self):
        """Test initialization with explicit API key."""
        embedding_func = OpenAIDenseEmbedding(api_key="sk-test-key")
        assert embedding_func.dimension == 1536  # Default for text-embedding-3-small
        assert embedding_func.model == "text-embedding-3-small"
        assert embedding_func._api_key == "sk-test-key"

    @patch.dict(os.environ, {"OPENAI_API_KEY": "sk-env-key"})
    def test_init_with_env_api_key(self):
        """Test initialization with API key from environment."""
        embedding_func = OpenAIDenseEmbedding()
        assert embedding_func._api_key == "sk-env-key"

    @patch.dict(os.environ, {"OPENAI_API_KEY": ""})
    def test_init_without_api_key(self):
        """Test initialization fails without API key."""
        with pytest.raises(ValueError, match="OpenAI API key is required"):
            OpenAIDenseEmbedding()

    def test_init_with_custom_dimension(self):
        """Test initialization with custom dimension."""
        embedding_func = OpenAIDenseEmbedding(
            model="text-embedding-3-large", dimension=1024, api_key="sk-test"
        )
        assert embedding_func.dimension == 1024
        assert embedding_func.model == "text-embedding-3-large"

    def test_init_with_base_url(self):
        """Test initialization with custom base URL."""
        embedding_func = OpenAIDenseEmbedding(
            api_key="sk-test", base_url="https://custom.openai.com/"
        )
        assert embedding_func._base_url == "https://custom.openai.com/"

    def test_model_property(self):
        """Test model property."""
        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
        assert embedding_func.model == "text-embedding-3-small"

        embedding_func = OpenAIDenseEmbedding(
            model="text-embedding-ada-002", api_key="sk-test"
        )
        assert embedding_func.model == "text-embedding-ada-002"

    def test_extra_params(self):
        """Test extra_params property."""
        # Test without extra params
        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
        assert embedding_func.extra_params == {}

        # Test with extra params
        embedding_func = OpenAIDenseEmbedding(
            api_key="sk-test",
            encoding_format="float",
            user="test-user",
        )
        assert embedding_func.extra_params == {
            "encoding_format": "float",
            "user": "test-user",
        }

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_with_empty_text(self, mock_require_module):
        """Test embed method with empty text raises ValueError."""
        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")

        with pytest.raises(
            ValueError, match="Input text cannot be empty or whitespace only"
        ):
            embedding_func.embed("")

        with pytest.raises(
            ValueError, match="Input text cannot be empty or whitespace only"
        ):
            embedding_func.embed("   ")

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_with_non_string_input(self, mock_require_module):
        """Test embed method with non-string input raises TypeError."""
        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            embedding_func.embed(123)

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            embedding_func.embed(None)

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_success(self, mock_require_module):
        """Test successful embedding generation."""
        # Mock OpenAI client
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        # Create mock embedding data
        fake_embedding = [0.1, 0.2, 0.3]
        mock_embedding_obj = Mock()
        mock_embedding_obj.embedding = fake_embedding
        mock_response.data = [mock_embedding_obj]

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(dimension=3, api_key="sk-test")
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test text")

        assert result == [0.1, 0.2, 0.3]
        mock_client.embeddings.create.assert_called_once_with(
            model="text-embedding-3-small", input="test text", dimensions=3
        )

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_with_custom_model(self, mock_require_module):
        """Test embedding with custom model."""
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        fake_embedding = [0.1] * 1536
        mock_embedding_obj = Mock()
        mock_embedding_obj.embedding = fake_embedding
        mock_response.data = [mock_embedding_obj]

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(
            model="text-embedding-ada-002", api_key="sk-test"
        )
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test text")

        assert len(result) == 1536
        mock_client.embeddings.create.assert_called_once_with(
            model="text-embedding-ada-002", input="test text"
        )

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_api_error(self, mock_require_module):
        """Test handling of API errors."""
        mock_openai = Mock()
        mock_client = Mock()

        # Simulate API error
        api_error = Mock()
        api_error.__class__.__name__ = "APIError"
        mock_openai.APIError = type("APIError", (Exception,), {})
        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})

        mock_client.embeddings.create.side_effect = mock_openai.APIError(
            "Rate limit exceeded"
        )
        mock_openai.OpenAI.return_value = mock_client
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
        embedding_func.embed.cache_clear()

        with pytest.raises(RuntimeError, match="Failed to call OpenAI API"):
            embedding_func.embed("test text")

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_invalid_response(self, mock_require_module):
        """Test handling of invalid API response."""
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        # Empty response data
        mock_response.data = []

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_openai.APIError = type("APIError", (Exception,), {})
        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError, match="no embedding data returned"):
            embedding_func.embed("test text")

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_dimension_mismatch(self, mock_require_module):
        """Test handling of dimension mismatch."""
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        # Return embedding with wrong dimension
        fake_embedding = [0.1] * 512
        mock_embedding_obj = Mock()
        mock_embedding_obj.embedding = fake_embedding
        mock_response.data = [mock_embedding_obj]

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_openai.APIError = type("APIError", (Exception,), {})
        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(dimension=1536, api_key="sk-test")
        embedding_func.embed.cache_clear()

        with pytest.raises(ValueError, match="Dimension mismatch"):
            embedding_func.embed("test text")

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_callable(self, mock_require_module):
        """Test that embedding function is callable."""
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        fake_embedding = [0.1] * 1536
        mock_embedding_obj = Mock()
        mock_embedding_obj.embedding = fake_embedding
        mock_response.data = [mock_embedding_obj]

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_openai.APIError = type("APIError", (Exception,), {})
        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(api_key="sk-test")
        embedding_func.embed.cache_clear()

        # Test calling the function directly
        result = embedding_func("test text")
        assert isinstance(result, list)
        assert len(result) == 1536

    @patch("zvec.extension.openai_function.require_module")
    def test_embed_with_base_url(self, mock_require_module):
        """Test embedding with custom base URL."""
        mock_openai = Mock()
        mock_client = Mock()
        mock_response = Mock()

        fake_embedding = [0.1] * 1536
        mock_embedding_obj = Mock()
        mock_embedding_obj.embedding = fake_embedding
        mock_response.data = [mock_embedding_obj]

        mock_client.embeddings.create.return_value = mock_response
        mock_openai.OpenAI.return_value = mock_client
        mock_openai.APIError = type("APIError", (Exception,), {})
        mock_openai.APIConnectionError = type("APIConnectionError", (Exception,), {})
        mock_require_module.return_value = mock_openai

        embedding_func = OpenAIDenseEmbedding(
            api_key="sk-test", base_url="https://custom.openai.com/"
        )
        embedding_func.embed.cache_clear()
        result = embedding_func.embed("test text")

        # Verify client was created with custom base URL
        mock_openai.OpenAI.assert_called_once_with(
            api_key="sk-test", base_url="https://custom.openai.com/"
        )
        assert len(result) == 1536

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_embed_success(self):
        """Integration test with real OpenAI API.

        To run this test, set environment variable:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            export OPENAI_API_KEY=sk-...
        """
        embedding_func = OpenAIDenseEmbedding(
            model="text-embedding-v4",
            dimension=256,
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
        )
        vector = embedding_func.embed("Hello, world!")
        assert len(vector) == 256
        assert isinstance(vector, list)
        assert all(isinstance(x, float) for x in vector)


# ----------------------------
# DefaultLocalDenseEmbedding Test Case
# ----------------------------
class TestDefaultLocalDenseEmbedding:
    """Test cases for DefaultLocalDenseEmbedding."""

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_success(self, mock_require_module):
        """Test successful initialization with mocked model."""
        # Mock sentence_transformers module
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_model.device = "cpu"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        # Initialize embedding function
        emb_func = DefaultLocalDenseEmbedding()

        # Assertions
        assert emb_func.dimension == 384
        assert emb_func.model_name == "all-MiniLM-L6-v2"
        assert emb_func.model_source == "huggingface"
        assert emb_func.device == "cpu"
        mock_st.SentenceTransformer.assert_called_once_with(
            "all-MiniLM-L6-v2", device=None, trust_remote_code=True
        )

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_with_custom_device(self, mock_require_module):
        """Test initialization with custom device."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_model.device = "cuda"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding(device="cuda")

        assert emb_func.device == "cuda"
        mock_st.SentenceTransformer.assert_called_once_with(
            "all-MiniLM-L6-v2", device="cuda", trust_remote_code=True
        )

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_with_modelscope(self, mock_require_module):
        """Test initialization with ModelScope as model source."""
        mock_st = Mock()
        mock_ms = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_model.device = "cpu"
        mock_st.SentenceTransformer.return_value = mock_model

        def require_module_side_effect(module_name):
            if module_name == "sentence_transformers":
                return mock_st
            elif module_name == "modelscope":
                return mock_ms
            raise ImportError(f"No module named '{module_name}'")

        mock_require_module.side_effect = require_module_side_effect

        # Mock snapshot_download at the correct import location
        with patch(
            "modelscope.hub.snapshot_download.snapshot_download",
            return_value="/path/to/cached/model",
        ):
            emb_func = DefaultLocalDenseEmbedding(model_source="modelscope")

        # Assertions
        assert emb_func.dimension == 384
        assert emb_func.model_name == "iic/nlp_gte_sentence-embedding_chinese-small"
        assert emb_func.model_source == "modelscope"

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_with_invalid_model_source(self, mock_require_module):
        """Test initialization with invalid model_source raises ValueError."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        with pytest.raises(ValueError, match="Invalid model_source"):
            DefaultLocalDenseEmbedding(model_source="invalid_source")

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_success(self, mock_require_module):
        """Test successful embedding generation."""
        # Mock embedding output
        fake_embedding = np.random.rand(384).astype(np.float32)

        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384

        # Configure encode method
        mock_model.encode = Mock(return_value=fake_embedding)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()
        result = emb_func.embed("Hello, world!")

        # Assertions
        assert isinstance(result, list)
        assert len(result) == 384
        assert all(isinstance(x, float) for x in result)
        mock_model.encode.assert_called_once_with(
            "Hello, world!",
            convert_to_numpy=True,
            normalize_embeddings=True,
            batch_size=32,
        )

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_with_normalization(self, mock_require_module):
        """Test embedding with L2 normalization."""
        # Create a normalized vector
        fake_embedding = np.random.rand(384).astype(np.float32)
        fake_embedding = fake_embedding / np.linalg.norm(fake_embedding)

        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384

        # Configure encode method
        mock_model.encode = Mock(return_value=fake_embedding)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding(normalize_embeddings=True)
        result = emb_func.embed("Test sentence")

        # Check if vector is normalized (L2 norm should be close to 1.0)
        result_array = np.array(result)
        norm = np.linalg.norm(result_array)
        assert abs(norm - 1.0) < 1e-5

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_empty_string(self, mock_require_module):
        """Test embedding with empty string raises ValueError."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()

        with pytest.raises(ValueError, match="Input text cannot be empty"):
            emb_func.embed("")

        with pytest.raises(ValueError, match="Input text cannot be empty"):
            emb_func.embed("   ")

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_non_string_input(self, mock_require_module):
        """Test embedding with non-string input raises TypeError."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            emb_func.embed(123)

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            emb_func.embed(None)

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_callable(self, mock_require_module):
        """Test that embedding function is callable."""
        fake_embedding = np.random.rand(384).astype(np.float32)

        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384

        # Configure encode method
        mock_model.encode = Mock(return_value=fake_embedding)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()

        # Test calling the function directly
        result = emb_func("Test text")
        assert isinstance(result, list)
        assert len(result) == 384

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_semantic_similarity(self, mock_require_module):
        """Test semantic similarity between similar and different texts."""
        # Create mock embeddings for similar and different texts
        similar_emb_1 = np.array([1.0, 0.0, 0.0] + [0.0] * 381, dtype=np.float32)
        similar_emb_2 = np.array([0.9, 0.1, 0.0] + [0.0] * 381, dtype=np.float32)
        different_emb = np.array([0.0, 0.0, 1.0] + [0.0] * 381, dtype=np.float32)

        # Normalize
        similar_emb_1 = similar_emb_1 / np.linalg.norm(similar_emb_1)
        similar_emb_2 = similar_emb_2 / np.linalg.norm(similar_emb_2)
        different_emb = different_emb / np.linalg.norm(different_emb)

        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384

        # Configure encode method with side_effect for multiple calls
        mock_model.encode = Mock(
            side_effect=[similar_emb_1, similar_emb_2, different_emb]
        )

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()

        v1 = emb_func.embed("The cat sits on the mat")
        v2 = emb_func.embed("A feline rests on a rug")
        v3 = emb_func.embed("Python programming")

        # Calculate similarities
        similarity_high = np.dot(v1, v2)
        similarity_low = np.dot(v1, v3)

        assert similarity_high > similarity_low

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_model_loading_error(self, mock_require_module):
        """Test handling of model loading failure."""
        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()
        mock_st = Mock()
        mock_st.SentenceTransformer.side_effect = Exception("Model not found")
        mock_require_module.return_value = mock_st

        with pytest.raises(
            ValueError, match="Failed to load Sentence Transformer model"
        ):
            DefaultLocalDenseEmbedding()

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_modelscope_import_error(self, mock_require_module):
        """Test handling of ModelScope import error."""
        mock_st = Mock()

        def require_module_side_effect(module_name):
            if module_name == "sentence_transformers":
                return mock_st
            elif module_name == "modelscope":
                raise ImportError("No module named 'modelscope'")

        mock_require_module.side_effect = require_module_side_effect

        with pytest.raises(
            ImportError, match="ModelScope support requires the 'modelscope' package"
        ):
            DefaultLocalDenseEmbedding(model_source="modelscope")

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_dimension_mismatch(self, mock_require_module):
        """Test handling of dimension mismatch in embedding output."""
        # Return embedding with wrong dimension
        fake_embedding = np.random.rand(256).astype(np.float32)

        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384

        # Configure encode method
        mock_model.encode = Mock(return_value=fake_embedding)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        emb_func = DefaultLocalDenseEmbedding()

        with pytest.raises(ValueError, match="Dimension mismatch"):
            emb_func.embed("Test text")

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_embedding_generation(self):
        """Integration test with real model (requires sentence-transformers).

        To run this test, set environment variable:
            export ZVEC_RUN_INTEGRATION_TESTS=1

        Note: First run will download the model (~80MB).
        """
        emb_func = DefaultLocalDenseEmbedding()

        # Test basic embedding
        vector = emb_func.embed("Hello, world!")
        assert len(vector) == 384
        assert isinstance(vector, list)
        assert all(isinstance(x, float) for x in vector)

        # Test normalization
        norm = np.linalg.norm(vector)
        assert abs(norm - 1.0) < 1e-5

        # Test semantic similarity
        v1 = emb_func.embed("The cat sits on the mat")
        v2 = emb_func.embed("A feline rests on a rug")
        v3 = emb_func.embed("Python programming language")

        similarity_high = np.dot(v1, v2)
        similarity_low = np.dot(v1, v3)
        assert similarity_high > similarity_low

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_model_properties(self, mock_require_module):
        """Test model_name and model_source properties."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.get_sentence_embedding_dimension.return_value = 384
        mock_model.device = "cpu"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        # Test Hugging Face
        emb_func_hf = DefaultLocalDenseEmbedding(model_source="huggingface")
        assert emb_func_hf.model_name == "all-MiniLM-L6-v2"
        assert emb_func_hf.model_source == "huggingface"

        # Test ModelScope
        with patch(
            "modelscope.hub.snapshot_download.snapshot_download",
            return_value="/path/to/model",
        ):
            mock_ms = Mock()
            mock_require_module.side_effect = (
                lambda m: mock_st if m == "sentence_transformers" else mock_ms
            )
            emb_func_ms = DefaultLocalDenseEmbedding(model_source="modelscope")
            assert (
                emb_func_ms.model_name == "iic/nlp_gte_sentence-embedding_chinese-small"
            )
            assert emb_func_ms.model_source == "modelscope"


# -----------------------------------
# DefaultLocalSparseEmbedding Test Case
# -----------------------------------
class TestDefaultLocalSparseEmbedding:
    """Test suite for DefaultLocalSparseEmbedding (SPLADE sparse embedding).

    Note:
        DefaultLocalSparseEmbedding uses naver/splade-cocondenser-ensembledistil
        instead of naver/splade-v3 because:

        - splade-v3 is a gated model requiring Hugging Face authentication
        - cocondenser-ensembledistil is publicly accessible
        - Performance difference is minimal (~2%)
        - Avoids "Access to model is restricted" errors

        This allows all users to run tests without authentication setup.
    """

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_success(self, mock_require_module):
        """Test successful initialization.

        Verifies that DefaultLocalSparseEmbedding initializes with the publicly
        accessible naver/splade-cocondenser-ensembledistil model instead of
        the gated naver/splade-v3 model.
        """
        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()

        assert sparse_emb.model_name == "naver/splade-cocondenser-ensembledistil"
        assert sparse_emb.model_source == "huggingface"
        assert sparse_emb.device == "cpu"
        mock_st.SentenceTransformer.assert_called_once_with(
            "naver/splade-cocondenser-ensembledistil",
            device=None,
            trust_remote_code=True,
        )

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_init_with_custom_device(self, mock_require_module):
        """Test initialization with custom device."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cuda"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding(device="cuda")

        assert sparse_emb.device == "cuda"
        mock_st.SentenceTransformer.assert_called_once_with(
            "naver/splade-cocondenser-ensembledistil",
            device="cuda",
            trust_remote_code=True,
        )

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_success(self, mock_require_module):
        """Test successful sparse embedding generation with official API."""
        import numpy as np

        # Clear model cache to ensure fresh mock
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        # Create a mock sparse matrix that simulates scipy.sparse behavior
        # The code will call: sparse_matrix[0].toarray().flatten()
        mock_sparse_matrix = Mock()

        # Create a dense array representation with vocab_size=30522
        vocab_size = 30522
        dense_array = np.zeros(vocab_size)
        # Set specific non-zero values at indices [10, 245, 1023, 5678]
        dense_array[10] = 0.5
        dense_array[245] = 0.8
        dense_array[1023] = 1.2
        dense_array[5678] = 0.3

        # Mock the method chain: sparse_matrix[0].toarray().flatten()
        mock_row = Mock()
        mock_dense = Mock()
        mock_row.toarray.return_value = mock_dense
        mock_dense.flatten.return_value = dense_array
        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)

        # Also mock hasattr check for 'toarray'
        mock_sparse_matrix.toarray = Mock()

        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"

        # Configure mock methods to return sparse matrix
        # Must set return_value BEFORE hasattr() check in the code
        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()
        result = sparse_emb.embed("machine learning")

        # Verify result is a dictionary
        assert isinstance(result, dict)
        # Verify keys are integers and values are floats
        assert all(isinstance(k, int) for k in result.keys())
        assert all(isinstance(v, float) for v in result.values())
        # Verify all values are positive
        assert all(v > 0 for v in result.values())
        # Sparse vectors should have specific dimensions
        assert len(result) == 4

        # Verify output is sorted by indices (keys)
        keys = list(result.keys())
        assert keys == sorted(keys), (
            "Sparse vector keys must be sorted in ascending order"
        )

        # Verify expected keys
        assert keys == [10, 245, 1023, 5678]

        # Verify encode_query was called with a list
        mock_model.encode_query.assert_called_once()
        call_args = mock_model.encode_query.call_args[0][0]
        assert isinstance(call_args, list)
        assert call_args == ["machine learning"]

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_empty_input(self, mock_require_module):
        """Test embedding with empty input."""
        mock_st = Mock()
        mock_model = Mock()
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()

        with pytest.raises(ValueError, match="Input text cannot be empty"):
            sparse_emb.embed("")

        with pytest.raises(ValueError, match="Input text cannot be empty"):
            sparse_emb.embed("   ")

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_embed_non_string_input(self, mock_require_module):
        """Test embedding with non-string input."""
        mock_st = Mock()
        mock_model = Mock()
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            sparse_emb.embed(123)

        with pytest.raises(TypeError, match="Expected 'input' to be str"):
            sparse_emb.embed(["text"])

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_callable_interface(self, mock_require_module):
        """Test that DefaultSparseEmbedding is callable."""
        import numpy as np

        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        # Create a mock sparse matrix
        mock_sparse_matrix = Mock()

        # Create a dense array representation with vocab_size=30522
        vocab_size = 30522
        dense_array = np.zeros(vocab_size)
        # Set specific non-zero values at indices [100, 200, 300]
        dense_array[100] = 1.0
        dense_array[200] = 0.5
        dense_array[300] = 0.8

        # Mock the method chain: sparse_matrix[0].toarray().flatten()
        mock_row = Mock()
        mock_dense = Mock()
        mock_row.toarray.return_value = mock_dense
        mock_dense.flatten.return_value = dense_array
        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)

        # Also mock hasattr check for 'toarray'
        mock_sparse_matrix.toarray = Mock()

        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"

        # Configure mock methods
        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()

        # Test callable interface
        result = sparse_emb("test input")
        assert isinstance(result, dict)
        assert all(isinstance(k, int) for k in result.keys())

        # Verify sorted output
        keys = list(result.keys())
        assert keys == sorted(keys), "Callable interface must also return sorted keys"
        assert keys == [100, 200, 300]

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_model_loading_failure(self, mock_require_module):
        """Test handling of model loading failure."""
        # Clear model cache to ensure the test actually tries to load the model
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        mock_st = Mock()
        mock_st.SentenceTransformer.side_effect = Exception("Model not found")
        mock_require_module.return_value = mock_st

        with pytest.raises(
            ValueError, match="Failed to load Sentence Transformer model"
        ):
            DefaultLocalSparseEmbedding()

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_inference_failure(self, mock_require_module):
        """Test handling of inference failure."""
        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"

        # Configure mock methods to raise RuntimeError
        mock_model.encode_query = Mock(side_effect=RuntimeError("CUDA out of memory"))
        mock_model.encode_document = Mock(
            side_effect=RuntimeError("CUDA out of memory")
        )

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()

        with pytest.raises(RuntimeError, match="Failed to generate sparse embedding"):
            sparse_emb.embed("test input")

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_sparse_vector_properties(self, mock_require_module):
        """Test properties of sparse vectors (sparsity, non-zero values, sorted order)."""
        import numpy as np

        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        # Create a mock sparse matrix that simulates scipy.sparse behavior
        # The code will call: sparse_matrix[0].toarray().flatten()
        mock_sparse_matrix = Mock()

        # Create a dense array representation with vocab_size=30522
        vocab_size = 30522
        dense_array = np.zeros(vocab_size)
        # Set specific non-zero values at indices [50, 100, 200, 400, 500]
        dense_array[50] = 3.0
        dense_array[100] = 2.0
        dense_array[200] = 1.5
        dense_array[400] = 2.5
        dense_array[500] = 1.8

        # Mock the method chain: sparse_matrix[0].toarray().flatten()
        mock_row = Mock()
        mock_dense = Mock()
        mock_row.toarray.return_value = mock_dense
        mock_dense.flatten.return_value = dense_array
        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)

        # Also mock hasattr check for 'toarray'
        mock_sparse_matrix.toarray = Mock()

        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"

        # Configure mock methods
        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()
        result = sparse_emb.embed("test")

        # Verify sparsity: result should have much fewer dimensions than vocab_size
        assert len(result) < vocab_size
        # All values should be positive
        assert all(v > 0 for v in result.values())

        # Verify keys are sorted in ascending order
        keys = list(result.keys())
        assert keys == sorted(keys), "Sparse vector keys must be sorted"

        # Verify the specific non-zero indices are present and sorted
        # Expected order: [50, 100, 200, 400, 500] (sorted)
        expected_keys = [50, 100, 200, 400, 500]
        assert keys == expected_keys, f"Expected {expected_keys}, got {keys}"

        # First key should be smallest
        if len(result) > 0:
            first_key = next(iter(result.keys()))
            assert first_key == min(result.keys()), "First key must be the smallest"

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_output_sorted_by_indices(self, mock_require_module):
        """Test that output dictionary is always sorted by indices (keys) in ascending order."""
        import numpy as np

        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        # Create sparse output with deliberately out-of-order indices
        # Non-sequential indices: 9999, 5, 1234, 77, 500
        mock_sparse_matrix = Mock()

        # Create a dense array representation with vocab_size=30522
        vocab_size = 30522
        dense_array = np.zeros(vocab_size)
        # Set specific non-zero values at out-of-order indices
        dense_array[9999] = 1.5
        dense_array[5] = 2.0
        dense_array[1234] = 0.8
        dense_array[77] = 3.2
        dense_array[500] = 1.1

        # Mock the method chain: sparse_matrix[0].toarray().flatten()
        mock_row = Mock()
        mock_dense = Mock()
        mock_row.toarray.return_value = mock_dense
        mock_dense.flatten.return_value = dense_array
        mock_sparse_matrix.__getitem__ = Mock(return_value=mock_row)

        # Also mock hasattr check for 'toarray'
        mock_sparse_matrix.toarray = Mock()

        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"

        # Configure mock methods
        mock_model.encode_query = Mock(return_value=mock_sparse_matrix)
        mock_model.encode_document = Mock(return_value=mock_sparse_matrix)

        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding()
        result = sparse_emb.embed("test sorting")

        # Extract keys from result
        result_keys = list(result.keys())

        # Verify keys are sorted
        assert result_keys == sorted(result_keys), (
            f"Keys must be sorted in ascending order. "
            f"Got: {result_keys}, Expected: {sorted(result_keys)}"
        )

        # Verify expected keys are present and in correct order
        # Expected sorted order: [5, 77, 500, 1234, 9999]
        expected_sorted_keys = [5, 77, 500, 1234, 9999]
        assert result_keys == expected_sorted_keys, (
            f"All expected keys should be present in sorted order. "
            f"Expected: {expected_sorted_keys}, Got: {result_keys}"
        )

        # Verify first and last keys
        assert result_keys[0] == 5, "First key must be minimum"
        assert result_keys[-1] == 9999, "Last key must be maximum"

        # Verify iteration order matches sorted order
        for i, (key, value) in enumerate(result.items()):
            if i > 0:
                prev_key = list(result.keys())[i - 1]
                assert key > prev_key, (
                    f"Key at position {i} must be greater than previous key"
                )

    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_device_property(self, mock_require_module):
        """Test device property returns correct device."""
        mock_st = Mock()
        mock_model = Mock()
        mock_model.device = "cuda"
        mock_st.SentenceTransformer.return_value = mock_model
        mock_require_module.return_value = mock_st

        sparse_emb = DefaultLocalSparseEmbedding(device="cuda")
        assert sparse_emb.device == "cuda"

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download",
    )
    @patch("zvec.extension.sentence_transformer_function.require_module")
    def test_modelscope_source(self, mock_require_module):
        """Test initialization with ModelScope source."""
        mock_st = Mock()
        mock_ms = Mock()
        mock_model = Mock()
        mock_model.device = "cpu"
        mock_st.SentenceTransformer.return_value = mock_model

        # Mock ModelScope snapshot_download
        with patch(
            "modelscope.hub.snapshot_download.snapshot_download",
            return_value="/cache/splade-cocondenser",
        ):
            mock_require_module.side_effect = (
                lambda m: mock_st if m == "sentence_transformers" else mock_ms
            )

            sparse_emb = DefaultLocalSparseEmbedding(model_source="modelscope")

            assert sparse_emb.model_name == "naver/splade-cocondenser-ensembledistil"
            assert sparse_emb.model_source == "modelscope"

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1 and model download",
    )
    def test_integration_real_model(self):
        """Integration test with real SPLADE model (requires model download).

        This test uses naver/splade-cocondenser-ensembledistil instead of
        naver/splade-v3 because splade-v3 requires Hugging Face authentication.
        The cocondenser-ensembledistil model is publicly accessible and provides
        comparable performance.

        To run this test:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            pytest tests/test_embedding.py::TestDefaultSparseEmbedding::test_integration_real_model -v

        Note: First run will download ~100MB model from Hugging Face.

        Alternative models:
            If you have access to splade-v3, you can create a custom embedding
            class following the example in DefaultSparseEmbedding docstring.
        """
        # Clear model cache to ensure fresh load
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        sparse_emb = DefaultLocalSparseEmbedding()

        # Test with real input
        text = "machine learning and artificial intelligence"
        result = sparse_emb.embed(text)

        # Verify result structure
        assert isinstance(result, dict)
        assert len(result) > 0
        assert all(isinstance(k, int) and k >= 0 for k in result.keys())
        assert all(isinstance(v, float) and v > 0 for v in result.values())

        # SPLADE typically produces 100-300 non-zero dimensions
        assert 50 < len(result) < 500

        # Verify keys are sorted in ascending order
        keys = list(result.keys())
        assert keys == sorted(keys), "Real model output must be sorted by indices"

        # Test callable interface
        result2 = sparse_emb(text)
        assert result == result2

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test: requires ZVEC_RUN_INTEGRATION_TESTS=1",
    )
    def test_integration_multiple_inputs(self):
        """Integration test with multiple different inputs."""
        # Clear model cache
        from zvec.extension.sentence_transformer_embedding_function import (
            DefaultLocalSparseEmbedding,
        )

        DefaultLocalSparseEmbedding.clear_cache()

        sparse_emb = DefaultLocalSparseEmbedding()

        texts = [
            "Hello, world!",
            "Machine learning is fascinating",
            "Python programming language",
        ]

        results = [sparse_emb.embed(text) for text in texts]

        # All results should be different
        assert len(results) == 3
        assert all(isinstance(r, dict) for r in results)

        # Different inputs should produce different sparse vectors
        assert results[0] != results[1]
        assert results[1] != results[2]

        # All results must be sorted by indices
        for i, result in enumerate(results):
            keys = list(result.keys())
            assert keys == sorted(keys), f"Result {i} must have sorted keys"


# ----------------------------
# BM25EmbeddingFunction Test Case
# ----------------------------
class TestBM25EmbeddingFunction:
    """Test suite for BM25EmbeddingFunction (BM25-based sparse embedding using DashText SDK)."""

    def test_init_with_built_in_encoder(self):
        """Test successful initialization with built-in encoder (no corpus)."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            # Test with default language (Chinese)
            bm25 = BM25EmbeddingFunction()

            assert bm25.corpus_size == 0
            assert bm25.encoding_type == "query"
            assert bm25.language == "zh"
            mock_dashtext.SparseVectorEncoder.default.assert_called_once_with(name="zh")

    def test_init_with_custom_encoder(self):
        """Test successful initialization with custom encoder (with corpus)."""
        corpus = [
            "a cat is a feline and likes to purr",
            "a dog is the human's best friend",
            "a bird is a beautiful animal that can fly",
        ]

        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction(corpus=corpus, b=0.75, k1=1.2)

            assert bm25.corpus_size == 3
            assert bm25.encoding_type == "query"
            mock_dashtext.SparseVectorEncoder.assert_called_once_with(b=0.75, k1=1.2)
            mock_encoder.train.assert_called_once_with(corpus)

    def test_init_with_empty_corpus(self):
        """Test initialization with empty corpus raises ValueError."""
        with pytest.raises(ValueError, match="Corpus must be a non-empty list"):
            BM25EmbeddingFunction(corpus=[])

    def test_init_with_invalid_corpus(self):
        """Test initialization with invalid corpus elements."""
        with pytest.raises(ValueError, match="All corpus documents must be strings"):
            BM25EmbeddingFunction(corpus=["text", 123, "another"])

        with pytest.raises(ValueError, match="All corpus documents must be strings"):
            BM25EmbeddingFunction(corpus=[None, "text"])

    def test_init_with_language_parameter(self):
        """Test initialization with different language settings."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            # Test English language
            bm25_en = BM25EmbeddingFunction(language="en")
            assert bm25_en.language == "en"
            mock_dashtext.SparseVectorEncoder.default.assert_called_with(name="en")

    def test_init_with_encoding_type(self):
        """Test initialization with different encoding types."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            # Test document encoding type
            bm25_doc = BM25EmbeddingFunction(encoding_type="document")
            assert bm25_doc.encoding_type == "document"

    def test_init_with_missing_dashtext_library(self):
        """Test initialization fails when dashtext library is not installed."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_require.side_effect = ImportError("dashtext package is required")

            with pytest.raises(ImportError, match="dashtext package is required"):
                BM25EmbeddingFunction()

    def test_embed_with_query_encoding(self):
        """Test successful sparse embedding generation with query encoding."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()

            # Mock encode_queries to return sparse vector
            mock_encoder.encode_queries.return_value = {
                5: 0.89,
                12: 1.45,
                23: 0.67,
                45: 1.12,
            }

            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction(encoding_type="query")
            # Clear LRU cache to ensure fresh call
            bm25.embed.cache_clear()
            result = bm25.embed("cat purr loud")

            # Verify result structure
            assert isinstance(result, dict)
            assert all(isinstance(k, int) for k in result.keys())
            assert all(isinstance(v, float) for v in result.values())

            # Verify all values are positive
            assert all(v > 0 for v in result.values())

            # Verify output is sorted by indices
            keys = list(result.keys())
            assert keys == sorted(keys), "Output must be sorted by indices"

            # Verify expected keys from mock response
            assert result == {5: 0.89, 12: 1.45, 23: 0.67, 45: 1.12}

            # Verify encode_queries was called
            mock_encoder.encode_queries.assert_called_once_with("cat purr loud")

    def test_embed_with_document_encoding(self):
        """Test successful sparse embedding generation with document encoding."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()

            # Mock encode_documents to return sparse vector
            mock_encoder.encode_documents.return_value = {10: 1.5, 20: 2.3}

            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction(encoding_type="document")
            bm25.embed.cache_clear()
            result = bm25.embed("document text")

            assert result == {10: 1.5, 20: 2.3}
            mock_encoder.encode_documents.assert_called_once_with("document text")

    def test_embed_with_empty_input(self):
        """Test embedding with empty input raises ValueError."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction()

            with pytest.raises(ValueError, match="Input text cannot be empty"):
                bm25.embed("")

            with pytest.raises(ValueError, match="Input text cannot be empty"):
                bm25.embed("   ")

    def test_embed_with_non_string_input(self):
        """Test embedding with non-string input raises TypeError."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction()

            # Test with hashable non-string types - should get our custom error message
            with pytest.raises(TypeError, match="Expected 'input' to be str"):
                bm25.embed(123)

            with pytest.raises(TypeError, match="Expected 'input' to be str"):
                bm25.embed(None)

            # Test with unhashable type (list)
            # Note: lru_cache raises TypeError("unhashable type: 'list'") before our type check
            # This is still a valid type error, just caught at a different layer
            with pytest.raises(TypeError, match="unhashable type"):
                bm25.embed(["text"])

    def test_embed_callable_interface(self):
        """Test that BM25EmbeddingFunction is callable."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_encoder.encode_queries.return_value = {10: 1.5}
            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction()
            bm25.embed.cache_clear()

            # Test callable interface
            result = bm25("test query")
            assert isinstance(result, dict)
            assert 10 in result

    def test_embed_output_sorted_by_indices(self):
        """Test that output is always sorted by indices in ascending order."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()

            # Mock encode_queries with unsorted indices
            mock_encoder.encode_queries.return_value = {
                9999: 1.5,
                5: 2.0,
                1234: 0.8,
                77: 3.2,
                500: 1.1,
            }

            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction()
            bm25.embed.cache_clear()
            result = bm25.embed("test query")

            # Verify keys are sorted
            result_keys = list(result.keys())
            assert result_keys == sorted(result_keys), (
                f"Keys must be sorted. Got: {result_keys}, Expected: {sorted(result_keys)}"
            )

            # Verify expected sorted order: [5, 77, 500, 1234, 9999]
            expected_keys = [5, 77, 500, 1234, 9999]
            assert result_keys == expected_keys

    def test_embed_filters_zero_values(self):
        """Test that zero and negative values are filtered out."""
        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()

            # Mock encode_queries with zero and negative values
            mock_encoder.encode_queries.return_value = {
                0: 1.5,  # Positive - should be included
                1: 0.0,  # Zero - should be filtered
                2: -0.5,  # Negative - should be filtered
            }

            mock_dashtext.SparseVectorEncoder.default.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction()
            bm25.embed.cache_clear()
            result = bm25.embed("test")

            # Only positive token should be in result
            assert 0 in result
            assert 1 not in result  # Zero value filtered
            assert 2 not in result  # Negative value filtered
            assert all(v > 0 for v in result.values())

    def test_properties(self):
        """Test property accessors."""
        corpus = ["doc1", "doc2", "doc3"]

        with patch(
            "zvec.extension.bm25_embedding_function.require_module"
        ) as mock_require:
            mock_dashtext = Mock()
            mock_encoder = Mock()
            mock_dashtext.SparseVectorEncoder.return_value = mock_encoder
            mock_require.return_value = mock_dashtext

            bm25 = BM25EmbeddingFunction(
                corpus=corpus,
                encoding_type="document",
                language="en",
                b=0.8,
                k1=1.5,
                custom_param="test",
            )

            assert bm25.corpus_size == 3
            assert bm25.encoding_type == "document"
            assert bm25.language == "en"
            assert bm25.extra_params == {"custom_param": "test"}

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_dashtext_bm25_embedding(self):
        """Integration test with real DashText library.

        To run this test:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            pip install dashtext

        Note: This test requires the dashtext package to be installed.
        """
        # Test built-in encoder (Chinese)
        bm25_zh = BM25EmbeddingFunction(language="zh", encoding_type="query")

        query_zh = "什么是向量检索服务"
        result_zh = bm25_zh.embed(query_zh)

        assert isinstance(result_zh, dict)
        assert len(result_zh) > 0
        assert all(isinstance(k, int) for k in result_zh.keys())
        assert all(isinstance(v, float) and v > 0 for v in result_zh.values())

        # Verify sorted output
        keys = list(result_zh.keys())
        assert keys == sorted(keys), "Real DashText BM25 output must be sorted"

        # Test custom corpus
        corpus = [
            "The cat sits on the mat",
            "The dog plays in the garden",
            "Birds fly in the sky",
            "Fish swim in the water",
        ]

        bm25_custom = BM25EmbeddingFunction(corpus=corpus, encoding_type="query")

        query_en = "cat on mat"
        result_en = bm25_custom.embed(query_en)

        assert isinstance(result_en, dict)
        assert len(result_en) > 0
        assert all(isinstance(k, int) for k in result_en.keys())
        assert all(isinstance(v, float) and v > 0 for v in result_en.values())

        # Test callable interface
        result2 = bm25_custom(query_en)
        assert result_en == result2

        # Verify properties
        assert bm25_custom.corpus_size == 4


================================================
FILE: python/tests/test_params.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import sys
import time


import numpy as np
import pytest
from zvec import (
    AddColumnOption,
    AlterColumnOption,
    CollectionOption,
    FlatIndexParam,
    HnswIndexParam,
    IndexOption,
    InvertIndexParam,
    IVFIndexParam,
    OptimizeOption,
    HnswQueryParam,
    IVFQueryParam,
    VectorQuery,
    IndexType,
    MetricType,
    QuantizeType,
    DataType,
    VectorSchema,
)

from _zvec.param import _VectorQuery

# ----------------------------
# Invert Index Param Test Case
# ----------------------------


class TestInvertIndexParam:
    def test_default(self):
        param = InvertIndexParam()
        assert param.enable_range_optimization is False
        assert param.enable_extended_wildcard is False
        assert param.type == IndexType.INVERT

    def test_custom(self):
        param = InvertIndexParam(
            enable_range_optimization=True, enable_extended_wildcard=True
        )
        assert param.enable_range_optimization is True
        assert param.enable_extended_wildcard is True

    def test_readonly(self):
        param = InvertIndexParam()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            param.enable_range_optimization = False
            param.enable_extended_wildcard = False


# ----------------------------
# Hnsw Index Param Test Case
# ----------------------------


class TestHnswIndexParam:
    def test_default(self):
        param = HnswIndexParam()
        assert param.metric_type == MetricType.IP
        assert param.m == 50
        assert param.ef_construction == 500
        assert param.quantize_type == QuantizeType.UNDEFINED
        assert param.type == IndexType.HNSW

    def test_custom(self):
        param = HnswIndexParam(
            metric_type=MetricType.L2,
            m=10,
            ef_construction=1000,
            quantize_type=QuantizeType.FP16,
        )
        assert param.metric_type == MetricType.L2
        assert param.m == 10
        assert param.ef_construction == 1000
        assert param.quantize_type == QuantizeType.FP16

    @pytest.mark.parametrize(
        "attr", ["metric_type", "m", "ef_construction", "quantize_type"]
    )
    def test_readonly_attributes(self, attr):
        param = HnswIndexParam()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# Flat Index Param Test Case
# ----------------------------
class TestFlatIndexParam:
    def test_default(self):
        param = FlatIndexParam()
        assert param.type == IndexType.FLAT
        assert param.quantize_type == QuantizeType.UNDEFINED
        assert param.metric_type == MetricType.IP

    def test_custom(self):
        param = FlatIndexParam(
            metric_type=MetricType.L2, quantize_type=QuantizeType.INT8
        )
        assert param.metric_type == MetricType.L2
        assert param.quantize_type == QuantizeType.INT8

    @pytest.mark.parametrize("attr", ["metric_type", "quantize_type"])
    def test_readonly_attributes(self, attr):
        param = FlatIndexParam()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# Ivf Index Param Test Case
# ----------------------------
class TestIVFIndexParam:
    def test_default(self):
        param = IVFIndexParam()
        assert param.metric_type == MetricType.IP
        assert param.n_list == 0
        assert param.quantize_type == QuantizeType.UNDEFINED
        assert param.type == IndexType.IVF

    def test_custom(self):
        param = IVFIndexParam(
            metric_type=MetricType.L2, n_list=1000, quantize_type=QuantizeType.FP16
        )
        assert param.metric_type == MetricType.L2
        assert param.n_list == 1000
        assert param.quantize_type == QuantizeType.FP16
        assert param.type == IndexType.IVF

    @pytest.mark.parametrize("attr", ["metric_type", "n_list", "quantize_type"])
    def test_readonly_attributes(self, attr):
        param = IVFIndexParam()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# CollectionOption Test Case
# ----------------------------
class TestCollectionOption:
    def test_default(self):
        option = CollectionOption()
        assert option is not None
        assert option.read_only == False
        assert option.enable_mmap == True

    def test_custom(self):
        option = CollectionOption(read_only=True, enable_mmap=False)
        assert option.read_only == True
        assert option.enable_mmap == False

        option = CollectionOption(read_only=False, enable_mmap=True)
        assert option.read_only == False
        assert option.enable_mmap == True

    @pytest.mark.parametrize("attr", ["read_only", "enable_mmap"])
    def test_readonly_attributes(self, attr):
        param = CollectionOption()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# IndexOption Test Case
# ----------------------------
class TestIndexOption:
    def test_default(self):
        option = IndexOption()
        assert option is not None
        assert option.concurrency == 0

    def test_custom(self):
        option = IndexOption(concurrency=10)
        assert option.concurrency == 10

    @pytest.mark.parametrize("attr", ["concurrency"])
    def test_readonly_attributes(self, attr):
        param = IndexOption()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# AddColumnOption Test Case
# ----------------------------
class TestAddColumnOption:
    def test_default(self):
        option = AddColumnOption()
        assert option is not None
        assert option.concurrency == 0

    def test_custom(self):
        option = AddColumnOption(concurrency=10)
        assert option.concurrency == 10

    @pytest.mark.parametrize("attr", ["concurrency"])
    def test_readonly_attributes(self, attr):
        param = AddColumnOption()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# AlterColumnOption Test Case
# ----------------------------
class TestAlterColumnOption:
    def test_default(self):
        option = AlterColumnOption()
        assert option is not None
        assert option.concurrency == 0

    def test_custom(self):
        option = AlterColumnOption(concurrency=10)
        assert option.concurrency == 10

    @pytest.mark.parametrize("attr", ["concurrency"])
    def test_readonly_attributes(self, attr):
        param = AlterColumnOption()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# OptimizeOption Test Case
# ----------------------------
class TestOptimizeOption:
    def test_default(self):
        option = OptimizeOption()
        assert option is not None
        assert option.concurrency == 0

    def test_custom(self):
        option = OptimizeOption(concurrency=10)
        assert option.concurrency == 10

    @pytest.mark.parametrize("attr", ["concurrency"])
    def test_readonly_attributes(self, attr):
        param = OptimizeOption()
        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            setattr(param, attr, getattr(param, attr))


# ----------------------------
# HnswQueryParam Test Case
# ----------------------------
class TestHnswQueryParam:
    def test_default(self):
        param = HnswQueryParam()
        assert param is not None
        assert param.ef == 300
        assert param.is_using_refiner == False
        assert param.radius == 0
        assert param.is_linear == False

    def test_custom(self):
        param = HnswQueryParam(ef=10, is_using_refiner=True, radius=30, is_linear=True)
        assert param.ef == 10
        assert param.is_using_refiner == True
        assert param.radius == 30
        assert param.is_linear == True

    def test_readonly_attributes(self):
        param = HnswQueryParam()
        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
            with pytest.raises(AttributeError, match=match_pattern):
                param.ef = 10
                param.is_using_refiner = True
                param.radius = 30
                param.is_linear = True


# # ----------------------------
# # IVFQueryParam Test Case
# # ----------------------------
# class TestIVFQueryParam:
#     def test_default(self):
#         param = IVFQueryParam()
#         assert param is not None
#         assert param.nprobe == 10
#         assert param.is_using_refiner == False
#         assert param.radius == 0
#         assert param.is_linear == False
#         assert param.scale_factor == 10
#
#     def test_custom(self):
#         param = IVFQueryParam(
#             nprobe=20,
#             is_using_refiner=True,
#             radius=30,
#             is_linear=True,
#             scale_factor=40
#         )
#         assert param.nprobe == 20
#         assert param.is_using_refiner == True
#         assert param.radius == 30
#         assert param.is_linear == True
#         assert param.scale_factor == 40


class TestVectorQuery:
    def test_init_with_valid_id(self):
        vq = VectorQuery(field_name="embedding", id="doc123")
        assert vq.field_name == "embedding"
        assert vq.id == "doc123"
        assert vq.vector is None
        assert vq.param is None

    def test_init_with_valid_vector(self):
        vec = [0.1, 0.2, 0.3]
        param = HnswQueryParam(ef=300)
        vq = VectorQuery(field_name="embedding", vector=vec, param=param)
        assert vq.field_name == "embedding"
        assert vq.vector == vec
        assert vq.param == param

    def test_init_both_id_and_vector_raises_error(self):
        with pytest.raises(ValueError):
            VectorQuery(field_name="embedding", id="doc123", vector=[0.1])._validate()

    def test_init_without_field_name_raises_error(self):
        with pytest.raises(ValueError):
            VectorQuery(field_name=None)._validate()

    def test_has_id_returns_true_when_id_set(self):
        vq = VectorQuery(field_name="embedding", id="doc123")
        assert vq.has_id()

    def test_has_id_returns_false_when_no_id(self):
        vq = VectorQuery(field_name="embedding", vector=[0.1])
        assert not vq.has_id()

    def test_has_vector_returns_true_with_non_empty_vector(self):
        vq = VectorQuery(field_name="embedding", vector=[0.1])
        assert vq.has_vector()

    def test_validate_fails_on_both_id_and_vector(self):
        vq = VectorQuery(field_name="test", id="doc123", vector=[0.1])
        with pytest.raises(ValueError):
            vq._validate()


================================================
FILE: python/tests/test_query_executor.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Dict, Union
from unittest.mock import MagicMock

import numpy as np
import math
from _zvec.param import _VectorQuery

import pytest
from zvec.executor.query_executor import (
    MultiVectorQueryExecutor,
    NoVectorQueryExecutor,
    QueryContext,
    QueryExecutor,
    QueryExecutorFactory,
    SingleVectorQueryExecutor,
    VectorQuery,
)
from zvec import RrfReRanker, HnswQueryParam, CollectionSchema, VectorSchema, DataType


# ----------------------------
# Mock Vector Schema
# ----------------------------
class MockVectorSchema(VectorSchema):
    def __init__(self, name="test_vector"):
        self._name = name

    @property
    def name(self):
        return self._name

    def _get_object(self):
        return MagicMock()


# ----------------------------
# Mock Collection Schema
# ----------------------------
class MockCollectionSchema(CollectionSchema):
    def __init__(self, vectors=Union[VectorSchema, Dict[str, VectorSchema]]):
        self._vectors = (
            [vectors] if not isinstance(vectors, Dict) else list(vectors.values())
        )

    @property
    def vectors(self):
        return self._vectors


# ----------------------------
# VectorQuery Test Case
# ----------------------------
class TestVectorQuery:
    def test_init(self):
        query = VectorQuery(field_name="test_field")
        assert query.field_name == "test_field"
        assert query.id is None
        assert query.vector is None
        assert query.param is None

        param = HnswQueryParam()
        query = VectorQuery(
            field_name="test_field", id="test_id", vector=[1, 2, 3], param=param
        )
        assert query.field_name == "test_field"
        assert query.id == "test_id"
        assert query.vector == [1, 2, 3]
        assert query.param == param

    def test_has_id(self):
        query = VectorQuery(field_name="test_field")
        assert not query.has_id()

        query = VectorQuery(field_name="test_field", id="test_id")
        assert query.has_id()

    def test_has_vector(self):
        query = VectorQuery(field_name="test_field")
        assert not query.has_vector()

        query = VectorQuery(field_name="test_field", vector=[])
        assert not query.has_vector()

        query = VectorQuery(field_name="test_field", vector=[1, 2, 3])
        assert query.has_vector()

    def test_validate_dense_fp16_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.VECTOR_FP16)
        vec = np.array([1.1, 2.1, 3.1], dtype=np.float16)
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        assert np.array_equal(vec, ret)

    def test_validate_dense_fp32_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.VECTOR_FP32)
        vec = np.array([1.1, 2.1, 3.1], dtype=np.float32)
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        assert np.array_equal(vec, ret)

    def test_validate_dense_fp64_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.VECTOR_FP64)
        vec = np.array([1.1, 2.1, 3.1], dtype=np.float64)
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        assert np.array_equal(vec, ret)

    def test_validate_dense_int8_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.VECTOR_INT8)
        vec = np.array([1, 2, 3], dtype=np.int8)
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        assert np.array_equal(vec, ret)

    def test_validate_sparse_fp32_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.SPARSE_VECTOR_FP32)
        vec = {1: 1.1, 2: 2.2, 3: 3.3}
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        for k in vec.keys():
            assert math.isclose(vec[k], ret[k], abs_tol=1e-6)

    def test_validate_sparse_fp16_convert(self):
        v = _VectorQuery()
        schema = VectorSchema(name="test", data_type=DataType.SPARSE_VECTOR_FP16)
        vec = {1: 1.1, 2: 2.2, 3: 3.3}
        v.set_vector(schema._get_object(), vec)
        ret = v.get_vector(schema._get_object())
        for k in vec.keys():
            assert math.isclose(np.float16(vec[k]), ret[k], abs_tol=1e-6)


class TestQueryContext:
    def test_init(self):
        ctx = QueryContext(topk=10)
        assert ctx.topk == 10
        assert ctx.queries == []
        assert ctx.filter is None
        assert ctx.reranker is None
        assert ctx.output_fields is None
        assert ctx.include_vector is False
        assert ctx.core_vectors == []

    def test_properties(self):
        queries = [VectorQuery(field_name="test")]
        reranker = RrfReRanker()
        output_fields = ["field1", "field2"]

        ctx = QueryContext(
            topk=5,
            filter="test_filter",
            include_vector=True,
            queries=queries,
            output_fields=output_fields,
            reranker=reranker,
        )

        assert ctx.topk == 5
        assert ctx.queries == queries
        assert ctx.filter == "test_filter"
        assert ctx.reranker == reranker
        assert ctx.output_fields == output_fields
        assert ctx.include_vector is True

    def test_core_vectors_setter(self):
        ctx = QueryContext(topk=10)
        core_vectors = [MagicMock()]
        ctx.core_vectors = core_vectors
        assert ctx.core_vectors == core_vectors


class TestNoVectorQueryExecutor:
    def test_init(self):
        schema = MockCollectionSchema()
        executor = NoVectorQueryExecutor(schema)
        assert isinstance(executor, QueryExecutor)

    def test_do_validate_with_queries(self):
        schema = MockCollectionSchema()
        executor = NoVectorQueryExecutor(schema)
        ctx = QueryContext(topk=10, queries=[VectorQuery(field_name="test")])

        with pytest.raises(
            ValueError, match="Collection does not support query with vector or id"
        ):
            executor._do_validate(ctx)

    def test_do_validate_without_queries(self):
        schema = MockCollectionSchema()
        executor = NoVectorQueryExecutor(schema)
        ctx = QueryContext(topk=10)

        executor._do_validate(ctx)

    def test_do_build(self):
        schema = MockCollectionSchema()
        executor = NoVectorQueryExecutor(schema)
        ctx = QueryContext(topk=5, filter="test_filter")

        result = executor._do_build(ctx, MagicMock())
        assert len(result) == 1
        assert result[0].topk == 5
        assert result[0].filter == "test_filter"


class TestSingleVectorQueryExecutor:
    def test_init(self):
        schema = MockCollectionSchema()
        executor = SingleVectorQueryExecutor(schema)
        assert isinstance(executor, NoVectorQueryExecutor)

    def test_do_validate_multiple_queries(self):
        schema = MockCollectionSchema()
        executor = SingleVectorQueryExecutor(schema)
        queries = [VectorQuery(field_name="test1"), VectorQuery(field_name="test2")]
        ctx = QueryContext(topk=10, queries=queries)

        with pytest.raises(
            ValueError,
            match="Collection has only one vector field, cannot query with multiple vectors",
        ):
            executor._do_validate(ctx)

    def test_do_build_without_queries(self):
        schema = MockCollectionSchema()
        executor = SingleVectorQueryExecutor(schema)
        ctx = QueryContext(topk=5)

        result = executor._do_build(ctx, MagicMock())
        assert len(result) == 1
        assert result[0].topk == 5


class TestMultiVectorQueryExecutor:
    def test_init(self):
        schema = MockCollectionSchema()
        executor = MultiVectorQueryExecutor(schema)
        assert isinstance(executor, SingleVectorQueryExecutor)

    def test_do_validate_multiple_queries_without_reranker(self):
        schema = MockCollectionSchema()
        executor = MultiVectorQueryExecutor(schema)
        queries = [VectorQuery(field_name="test1"), VectorQuery(field_name="test2")]
        ctx = QueryContext(topk=10, queries=queries)

        with pytest.raises(
            ValueError, match="Reranker is required for multi-vector query"
        ):
            executor._do_validate(ctx)

    def test_do_validate_multiple_queries_with_reranker(self):
        schema = MockCollectionSchema()
        executor = MultiVectorQueryExecutor(schema)
        queries = [VectorQuery(field_name="test1"), VectorQuery(field_name="test2")]
        reranker = RrfReRanker()
        ctx = QueryContext(topk=10, queries=queries, reranker=reranker)

        executor._do_validate(ctx)


class TestQueryExecutorFactory:
    def test_create_no_vectors(self):
        schema = MockCollectionSchema()
        executor = QueryExecutorFactory.create(schema)
        assert isinstance(executor, NoVectorQueryExecutor)

    def test_create_single_vector(self):
        schema = MockCollectionSchema(vectors=MockVectorSchema())
        executor = QueryExecutorFactory.create(schema)
        assert isinstance(executor, SingleVectorQueryExecutor)

    def test_create_multiple_vectors(self):
        schema = MockCollectionSchema(
            vectors={"test1": MockVectorSchema(), "test2": MockVectorSchema()}
        )
        executor = QueryExecutorFactory.create(schema)
        assert isinstance(executor, MultiVectorQueryExecutor)


================================================
FILE: python/tests/test_reranker.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from unittest.mock import patch, MagicMock
import pytest
import math
import os

from zvec import Doc, MetricType
from zvec.extension.multi_vector_reranker import (
    RrfReRanker,
    WeightedReRanker,
)
from zvec.extension.sentence_transformer_rerank_function import (
    DefaultLocalReRanker,
)
from zvec.extension.qwen_rerank_function import QwenReRanker

# Set ZVEC_RUN_INTEGRATION_TESTS=1 to run real API tests
RUN_INTEGRATION_TESTS = os.environ.get("ZVEC_RUN_INTEGRATION_TESTS", "0") == "1"


# ----------------------------
# RrfRanker Test Case
# ----------------------------
class TestRrfReRanker:
    def test_init(self):
        reranker = RrfReRanker(topn=5, rerank_field="content", rank_constant=100)
        assert reranker.topn == 5
        assert reranker.rerank_field == "content"
        assert reranker.rank_constant == 100

    def test_rrf_score(self):
        reranker = RrfReRanker(rank_constant=60)
        # 根据公式 1.0 / (k + rank + 1)，其中k=60
        assert reranker._rrf_score(0) == 1.0 / (60 + 0 + 1)
        assert reranker._rrf_score(1) == 1.0 / (60 + 1 + 1)
        assert reranker._rrf_score(10) == 1.0 / (60 + 10 + 1)

    def test_rerank(self):
        reranker = RrfReRanker(topn=3)

        doc1 = Doc(id="1", score=0.8)
        doc2 = Doc(id="2", score=0.7)
        doc3 = Doc(id="3", score=0.9)
        doc4 = Doc(id="4", score=0.6)

        query_results = {"vector1": [doc1, doc2, doc3], "vector2": [doc3, doc1, doc4]}

        results = reranker.rerank(query_results)

        assert len(results) <= reranker.topn

        for doc in results:
            assert hasattr(doc, "score")

        scores = [doc.score for doc in results]
        assert scores == sorted(scores, reverse=True)


# ----------------------------
# WeightedRanker Test Case
# ----------------------------
class TestWeightedReRanker:
    def test_init(self):
        weights = {"vector1": 0.7, "vector2": 0.3}
        reranker = WeightedReRanker(
            topn=5,
            rerank_field="content",
            metric=MetricType.L2,
            weights=weights,
        )
        assert reranker.topn == 5
        assert reranker.rerank_field == "content"
        assert reranker.metric == MetricType.L2
        assert reranker.weights == weights

    def test_normalize_score(self):
        reranker = WeightedReRanker()

        score = reranker._normalize_score(1.0, MetricType.L2)
        expected = 1.0 - 2 * math.atan(1.0) / math.pi
        assert score == expected

        score = reranker._normalize_score(1.0, MetricType.IP)
        expected = 0.5 + math.atan(1.0) / math.pi
        assert score == expected

        score = reranker._normalize_score(1.0, MetricType.COSINE)
        expected = 1.0 - 1.0 / 2.0
        assert score == expected

        with pytest.raises(ValueError, match="Unsupported metric type"):
            reranker._normalize_score(1.0, "unsupported_metric")

    def test_rerank(self):
        weights = {"vector1": 0.7, "vector2": 0.3}
        reranker = WeightedReRanker(topn=3, weights=weights, metric=MetricType.L2)

        doc1 = Doc(id="1", score=0.8)
        doc2 = Doc(id="2", score=0.7)
        doc3 = Doc(id="3", score=0.9)

        query_results = {"vector1": [doc1, doc2], "vector2": [doc2, doc3]}

        results = reranker.rerank(query_results)

        assert len(results) <= reranker.topn

        for doc in results:
            assert hasattr(doc, "score")

        scores = [doc.score for doc in results]
        assert scores == sorted(scores, reverse=True)


# ----------------------------
# QwenReRanker Test Case
# ----------------------------
class TestQwenReRanker:
    def test_init_without_query(self):
        with pytest.raises(ValueError, match="Query is required for QwenReRanker"):
            QwenReRanker(api_key="test_key")

    def test_init_without_api_key(self):
        with patch.dict(os.environ, {}, clear=True):
            with pytest.raises(ValueError, match="DashScope API key is required"):
                QwenReRanker(query="test")

    @patch.dict(os.environ, {"DASHSCOPE_API_KEY": "test_key"})
    def test_init_with_env_api_key(self):
        reranker = QwenReRanker(query="test", rerank_field="content")
        assert reranker.query == "test"
        assert reranker._api_key == "test_key"
        assert reranker.rerank_field == "content"

    def test_init_with_explicit_api_key(self):
        reranker = QwenReRanker(
            query="test", api_key="explicit_key", rerank_field="content"
        )
        assert reranker.query == "test"
        assert reranker._api_key == "explicit_key"

    def test_model_property(self):
        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )
        assert reranker.model == "gte-rerank-v2"

        reranker = QwenReRanker(
            query="test",
            model="custom-model",
            api_key="test_key",
            rerank_field="content",
        )
        assert reranker.model == "custom-model"

    def test_query_property(self):
        reranker = QwenReRanker(
            query="test query", api_key="test_key", rerank_field="content"
        )
        assert reranker.query == "test query"

    def test_topn_property(self):
        reranker = QwenReRanker(
            query="test", topn=5, api_key="test_key", rerank_field="content"
        )
        assert reranker.topn == 5

    def test_rerank_field_property(self):
        reranker = QwenReRanker(query="test", api_key="test_key", rerank_field="title")
        assert reranker.rerank_field == "title"

    def test_rerank_empty_results(self):
        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )
        results = reranker.rerank({})
        assert results == []

    def test_rerank_no_valid_documents(self):
        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )
        # Document without the rerank_field
        query_results = {"vector1": [Doc(id="1")]}
        with pytest.raises(ValueError, match="No documents to rerank"):
            reranker.rerank(query_results)

    def test_rerank_skip_empty_content(self):
        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )
        query_results = {
            "vector1": [
                Doc(id="1", fields={"content": ""}),
                Doc(id="2", fields={"content": "   "}),
            ]
        }
        with pytest.raises(ValueError, match="No documents to rerank"):
            reranker.rerank(query_results)

    @patch("zvec.extension.qwen_function.require_module")
    def test_rerank_success(self, mock_require_module):
        # Mock dashscope module
        mock_dashscope = MagicMock()
        mock_require_module.return_value = mock_dashscope

        # Mock API response
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.output = {
            "results": [
                {"index": 0, "relevance_score": 0.95},
                {"index": 1, "relevance_score": 0.85},
            ]
        }
        mock_dashscope.TextReRank.call.return_value = mock_response

        reranker = QwenReRanker(
            query="test query", topn=2, api_key="test_key", rerank_field="content"
        )

        query_results = {
            "vector1": [
                Doc(id="1", fields={"content": "Document 1"}),
                Doc(id="2", fields={"content": "Document 2"}),
            ]
        }

        results = reranker.rerank(query_results)

        assert len(results) == 2
        assert results[0].id == "1"
        assert results[0].score == 0.95
        assert results[1].id == "2"
        assert results[1].score == 0.85

        # Verify API call
        mock_dashscope.TextReRank.call.assert_called_once_with(
            model="gte-rerank-v2",
            query="test query",
            documents=["Document 1", "Document 2"],
            top_n=2,
            return_documents=False,
        )

    @patch("zvec.extension.qwen_function.require_module")
    def test_rerank_deduplicate_documents(self, mock_require_module):
        # Mock dashscope module
        mock_dashscope = MagicMock()
        mock_require_module.return_value = mock_dashscope

        # Mock API response
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.output = {
            "results": [
                {"index": 0, "relevance_score": 0.9},
            ]
        }
        mock_dashscope.TextReRank.call.return_value = mock_response

        reranker = QwenReRanker(
            query="test", topn=5, api_key="test_key", rerank_field="content"
        )

        # Same document in multiple vector results
        doc1 = Doc(id="1", fields={"content": "Document 1"})
        query_results = {"vector1": [doc1], "vector2": [doc1]}

        results = reranker.rerank(query_results)

        # Should only call API with document once
        call_args = mock_dashscope.TextReRank.call.call_args
        assert len(call_args[1]["documents"]) == 1

    @patch("zvec.extension.qwen_function.require_module")
    def test_rerank_api_error(self, mock_require_module):
        # Mock dashscope module
        mock_dashscope = MagicMock()
        mock_require_module.return_value = mock_dashscope

        # Mock API error response
        mock_response = MagicMock()
        mock_response.status_code = 400
        mock_response.message = "Invalid request"
        mock_response.code = "InvalidParameter"
        mock_dashscope.TextReRank.call.return_value = mock_response

        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )

        query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}

        with pytest.raises(ValueError, match="DashScope API error"):
            reranker.rerank(query_results)

    @patch("zvec.extension.qwen_function.require_module")
    def test_rerank_runtime_error(self, mock_require_module):
        # Mock dashscope module that raises exception
        mock_dashscope = MagicMock()
        mock_require_module.return_value = mock_dashscope
        mock_dashscope.TextReRank.call.side_effect = Exception("Network error")

        reranker = QwenReRanker(
            query="test", api_key="test_key", rerank_field="content"
        )

        query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}

        with pytest.raises(RuntimeError, match="Failed to call DashScope API"):
            reranker.rerank(query_results)

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_qwen_rerank(self):
        """Integration test with real DashScope TextReRank API.

        To run this test, set environment variables:
            export ZVEC_RUN_INTEGRATION_TESTS=1
            export DASHSCOPE_API_KEY=your-api-key
        """
        # Create reranker with real API
        reranker = QwenReRanker(
            query="What is machine learning?",
            topn=3,
            rerank_field="content",
            model="gte-rerank-v2",
        )

        # Prepare test documents
        query_results = {
            "vector1": [
                Doc(
                    id="1",
                    score=0.8,
                    fields={
                        "content": "Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data."
                    },
                ),
                Doc(
                    id="2",
                    score=0.7,
                    fields={
                        "content": "The weather is nice today with clear skies and sunshine."
                    },
                ),
                Doc(
                    id="3",
                    score=0.75,
                    fields={
                        "content": "Deep learning is a specialized branch of machine learning using neural networks with multiple layers."
                    },
                ),
            ],
            "vector2": [
                Doc(
                    id="4",
                    score=0.6,
                    fields={
                        "content": "Python is a popular programming language for data science and machine learning applications."
                    },
                ),
                Doc(
                    id="5",
                    score=0.65,
                    fields={
                        "content": "A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder."
                    },
                ),
            ],
        }

        # Call real API
        results = reranker.rerank(query_results)

        # Verify results
        assert len(results) <= 3, "Should return at most topn documents"
        assert len(results) > 0, "Should return at least one document"

        # All results should have valid scores
        for doc in results:
            assert hasattr(doc, "score"), "Each document should have a score"
            assert isinstance(doc.score, (int, float)), "Score should be numeric"
            assert doc.score > 0, "Score should be positive"

        # Verify scores are in descending order
        scores = [doc.score for doc in results]
        assert scores == sorted(scores, reverse=True), (
            "Results should be sorted by score in descending order"
        )

        # Verify relevant documents are ranked higher
        # Document 1 and 3 are about machine learning, should rank higher than weather/recipe docs
        result_ids = [doc.id for doc in results]

        # At least one of the ML-related documents should be in top results
        ml_related_docs = {"1", "3", "4"}
        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (
            "ML-related documents should rank higher"
        )

        # Print results for manual verification (useful during development)
        print("\nReranking results:")
        for i, doc in enumerate(results, 1):
            print(f"{i}. ID={doc.id}, Score={doc.score:.4f}")
            if doc.fields:
                content = doc.field("content")
                if content:
                    print(f"   Content: {content[:80]}...")


# ----------------------------
# DefaultLocalReRanker Test Case
# ----------------------------
class TestDefaultLocalReRanker:
    """Test cases for DefaultLocalReRanker."""

    def test_init_without_query(self):
        """Test initialization fails without query."""
        with pytest.raises(
            ValueError, match="Query is required for DefaultLocalReRanker"
        ):
            DefaultLocalReRanker(rerank_field="content")

    def test_init_with_empty_query(self):
        """Test initialization fails with empty query."""
        with pytest.raises(
            ValueError, match="Query is required for DefaultLocalReRanker"
        ):
            DefaultLocalReRanker(query="", rerank_field="content")

    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
    def test_init_success(self, mock_require_module):
        """Test successful initialization with mocked model."""
        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_model = MagicMock()
        mock_model.predict = MagicMock()  # Cross-encoder has predict method
        mock_model.device = "cpu"
        mock_st.CrossEncoder.return_value = mock_model
        mock_require_module.return_value = mock_st

        reranker = DefaultLocalReRanker(
            query="test query",
            topn=5,
            rerank_field="content",
            model_name="cross-encoder/ms-marco-MiniLM-L6-v2",
        )

        assert reranker.query == "test query"
        assert reranker.topn == 5
        assert reranker.rerank_field == "content"
        assert reranker.model_name == "cross-encoder/ms-marco-MiniLM-L6-v2"
        assert reranker.model_source == "huggingface"
        assert reranker.batch_size == 32

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
    def test_init_with_custom_params(self, mock_require_module):
        """Test initialization with custom parameters."""
        mock_st = MagicMock()
        mock_model = MagicMock()
        mock_model.predict = MagicMock()
        mock_model.device = "cuda"
        mock_st.CrossEncoder.return_value = mock_model
        mock_require_module.return_value = mock_st

        reranker = DefaultLocalReRanker(
            query="custom query",
            topn=10,
            rerank_field="title",
            model_name="cross-encoder/ms-marco-MiniLM-L12-v2",
            model_source="modelscope",
            device="cuda",
            batch_size=64,
        )

        assert reranker.query == "custom query"
        assert reranker.topn == 10
        assert reranker.rerank_field == "title"
        assert reranker.model_name == "cross-encoder/ms-marco-MiniLM-L12-v2"
        assert reranker.model_source == "modelscope"
        assert reranker.batch_size == 64

    @patch("zvec.extension.sentence_transformer_rerank_function.require_module")
    def test_init_invalid_model(self, mock_require_module):
        """Test initialization fails with non-cross-encoder model."""
        # Mock a model without predict method (not a cross-encoder)
        mock_st = MagicMock()
        mock_model = MagicMock(spec=[])  # No predict method
        mock_st.CrossEncoder.return_value = mock_model
        mock_require_module.return_value = mock_st

        with pytest.raises(ValueError, match="does not appear to be a cross-encoder"):
            DefaultLocalReRanker(query="test", rerank_field="content")

    def test_query_property(self):
        """Test query property."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test query", rerank_field="content")
            assert reranker.query == "test query"

    def test_topn_property(self):
        """Test topn property."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", topn=15, rerank_field="content"
            )
            assert reranker.topn == 15

    def test_rerank_field_property(self):
        """Test rerank_field property."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test", rerank_field="title")
            assert reranker.rerank_field == "title"

    def test_batch_size_property(self):
        """Test batch_size property."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", rerank_field="content", batch_size=128
            )
            assert reranker.batch_size == 128

    def test_rerank_empty_results(self):
        """Test rerank with empty query_results."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test", rerank_field="content")
            results = reranker.rerank({})
            assert results == []

    def test_rerank_no_valid_documents(self):
        """Test rerank with documents missing rerank_field."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test", rerank_field="content")

            # Document without the rerank_field
            query_results = {"vector1": [Doc(id="1")]}
            with pytest.raises(ValueError, match="No documents to rerank"):
                reranker.rerank(query_results)

    def test_rerank_skip_empty_content(self):
        """Test rerank skips documents with empty content."""
        mock_model = MagicMock()
        mock_model.predict = MagicMock()

        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test", rerank_field="content")

            query_results = {
                "vector1": [
                    Doc(id="1", fields={"content": ""}),
                    Doc(id="2", fields={"content": "   "}),
                ]
            }
            with pytest.raises(ValueError, match="No documents to rerank"):
                reranker.rerank(query_results)

    def test_rerank_success(self):
        """Test successful rerank with mocked model."""
        # Mock standard cross-encoder model
        mock_model = MagicMock()

        # Mock predict method to return scores
        import numpy as np

        mock_scores = np.array([0.95, 0.85, 0.75])
        mock_model.predict.return_value = mock_scores
        mock_model.device = "cpu"

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test query", topn=3, rerank_field="content"
            )

            query_results = {
                "vector1": [
                    Doc(id="1", score=0.8, fields={"content": "Document 1"}),
                    Doc(id="2", score=0.7, fields={"content": "Document 2"}),
                    Doc(id="3", score=0.6, fields={"content": "Document 3"}),
                ]
            }

            results = reranker.rerank(query_results)

            # Verify results
            assert len(results) == 3
            assert results[0].id == "1"
            assert results[0].score == 0.95
            assert results[1].id == "2"
            assert results[1].score == 0.85
            assert results[2].id == "3"
            assert results[2].score == 0.75

            # Verify model.predict was called correctly
            assert mock_model.predict.called
            call_args = mock_model.predict.call_args
            pairs = call_args[0][0]
            assert len(pairs) == 3
            assert pairs[0] == ["test query", "Document 1"]
            assert pairs[1] == ["test query", "Document 2"]
            assert pairs[2] == ["test query", "Document 3"]
            assert call_args[1]["batch_size"] == 32
            assert call_args[1]["show_progress_bar"] is False

    def test_rerank_with_topn_limit(self):
        """Test rerank respects topn limit."""
        mock_model = MagicMock()

        import numpy as np

        mock_scores = np.array([0.9, 0.8, 0.7, 0.6, 0.5])
        mock_model.predict.return_value = mock_scores

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", topn=2, rerank_field="content"
            )

            query_results = {
                "vector1": [
                    Doc(id="1", fields={"content": "Doc 1"}),
                    Doc(id="2", fields={"content": "Doc 2"}),
                    Doc(id="3", fields={"content": "Doc 3"}),
                    Doc(id="4", fields={"content": "Doc 4"}),
                    Doc(id="5", fields={"content": "Doc 5"}),
                ]
            }

            results = reranker.rerank(query_results)

            # Should only return top 2
            assert len(results) == 2
            assert results[0].id == "1"
            assert results[0].score == 0.9
            assert results[1].id == "2"
            assert results[1].score == 0.8

    def test_rerank_deduplicate_documents(self):
        """Test rerank deduplicates documents across multiple vectors."""
        mock_model = MagicMock()

        import numpy as np

        mock_scores = np.array([0.95, 0.85])
        mock_model.predict.return_value = mock_scores

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", topn=5, rerank_field="content"
            )

            # Same document in multiple vector results
            doc1 = Doc(id="1", fields={"content": "Document 1"})
            doc2 = Doc(id="2", fields={"content": "Document 2"})

            query_results = {
                "vector1": [doc1, doc2],
                "vector2": [doc1],  # doc1 appears in both
            }

            results = reranker.rerank(query_results)

            # Should only process each document once
            assert len(results) == 2
            assert mock_model.predict.call_count == 1

            call_args = mock_model.predict.call_args
            pairs = call_args[0][0]
            assert len(pairs) == 2  # Only 2 unique documents

    def test_rerank_sorting(self):
        """Test rerank sorts documents by score in descending order."""
        mock_model = MagicMock()

        import numpy as np

        # Return scores in non-sorted order
        mock_scores = np.array([0.6, 0.9, 0.7])
        mock_model.predict.return_value = mock_scores

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", topn=3, rerank_field="content"
            )

            query_results = {
                "vector1": [
                    Doc(id="1", fields={"content": "Doc 1"}),
                    Doc(id="2", fields={"content": "Doc 2"}),
                    Doc(id="3", fields={"content": "Doc 3"}),
                ]
            }

            results = reranker.rerank(query_results)

            # Should be sorted by score (descending)
            assert len(results) == 3
            assert results[0].id == "2"  # score 0.9
            assert results[0].score == 0.9
            assert results[1].id == "3"  # score 0.7
            assert results[1].score == 0.7
            assert results[2].id == "1"  # score 0.6
            assert results[2].score == 0.6

    def test_rerank_model_error(self):
        """Test rerank handles model prediction errors."""
        mock_model = MagicMock()

        # Mock predict to raise exception
        mock_model.predict.side_effect = Exception("Model inference error")

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(query="test", rerank_field="content")

            query_results = {"vector1": [Doc(id="1", fields={"content": "Document 1"})]}

            with pytest.raises(RuntimeError, match="Failed to compute rerank scores"):
                reranker.rerank(query_results)

    def test_rerank_with_custom_batch_size(self):
        """Test rerank uses custom batch_size."""
        mock_model = MagicMock()

        import numpy as np

        mock_scores = np.array([0.9, 0.8])
        mock_model.predict.return_value = mock_scores

        # Mock sentence_transformers module
        mock_st = MagicMock()
        mock_st.CrossEncoder.return_value = mock_model

        with patch(
            "zvec.extension.sentence_transformer_rerank_function.require_module",
            return_value=mock_st,
        ):
            reranker = DefaultLocalReRanker(
                query="test", rerank_field="content", batch_size=64
            )

            query_results = {
                "vector1": [
                    Doc(id="1", fields={"content": "Doc 1"}),
                    Doc(id="2", fields={"content": "Doc 2"}),
                ]
            }

            reranker.rerank(query_results)

            # Verify batch_size is passed to predict
            call_args = mock_model.predict.call_args
            assert call_args[1]["batch_size"] == 64

    @pytest.mark.skipif(
        not RUN_INTEGRATION_TESTS,
        reason="Integration test skipped. Set ZVEC_RUN_INTEGRATION_TESTS=1 to run.",
    )
    def test_real_sentence_transformer_rerank(self):
        """Integration test with real SentenceTransformer cross-encoder model.

        To run this test, set environment variable:
            export ZVEC_RUN_INTEGRATION_TESTS=1

        Note: This test requires sentence-transformers package and will
        download the MS MARCO MiniLM model (~80MB) on first run.
        """
        # Create reranker with real model (using default lightweight model)
        reranker = DefaultLocalReRanker(
            query="What is machine learning?",
            topn=3,
            rerank_field="content",
        )

        # Prepare test documents
        query_results = {
            "vector1": [
                Doc(
                    id="1",
                    score=0.8,
                    fields={
                        "content": "Machine learning is a subset of artificial intelligence that focuses on building systems that can learn from data."
                    },
                ),
                Doc(
                    id="2",
                    score=0.7,
                    fields={
                        "content": "The weather is nice today with clear skies and sunshine."
                    },
                ),
                Doc(
                    id="3",
                    score=0.75,
                    fields={
                        "content": "Deep learning is a specialized branch of machine learning using neural networks with multiple layers."
                    },
                ),
            ],
            "vector2": [
                Doc(
                    id="4",
                    score=0.6,
                    fields={
                        "content": "Python is a popular programming language for data science and machine learning applications."
                    },
                ),
                Doc(
                    id="5",
                    score=0.65,
                    fields={
                        "content": "A recipe for chocolate cake includes flour, sugar, eggs, and cocoa powder."
                    },
                ),
            ],
        }

        # Call real model
        results = reranker.rerank(query_results)

        # Verify results
        assert len(results) <= 3, "Should return at most topn documents"
        assert len(results) > 0, "Should return at least one document"

        # All results should have valid scores
        for doc in results:
            assert hasattr(doc, "score"), "Each document should have a score"
            assert isinstance(doc.score, (int, float)), "Score should be numeric"

        # Verify scores are in descending order
        scores = [doc.score for doc in results]
        assert scores == sorted(scores, reverse=True), (
            "Results should be sorted by score in descending order"
        )

        # Verify relevant documents are ranked higher
        # Documents 1, 3, and 4 are about machine learning, should rank higher
        result_ids = [doc.id for doc in results]

        # At least one of the ML-related documents should be in top results
        ml_related_docs = {"1", "3", "4"}
        assert any(doc_id in ml_related_docs for doc_id in result_ids[:2]), (
            "ML-related documents should rank higher"
        )

        # Print results for manual verification (useful during development)
        print("\nSentenceTransformer Reranking results:")
        for i, doc in enumerate(results, 1):
            print(f"{i}. ID={doc.id}, Score={doc.score:.4f}")
            if doc.fields:
                content = doc.field("content")
                if content:
                    print(f"   Content: {content[:80]}...")


================================================
FILE: python/tests/test_schema.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import pytest
from zvec import (
    CollectionSchema,
    CollectionStats,
    FieldSchema,
    VectorSchema,
    HnswIndexParam,
    InvertIndexParam,
    DataType,
    IndexType,
    MetricType,
)

# ----------------------------
# FieldSchema Test Case
# ----------------------------


class TestFieldSchema:
    def test_default(self):
        field = FieldSchema("field", data_type=DataType.FLOAT)
        assert field.name == "field"
        assert field.data_type == DataType.FLOAT
        assert field.nullable is False
        assert field.index_param is None

    def test_custom(self):
        field_1 = FieldSchema(
            name="float",
            data_type=DataType.FLOAT,
            nullable=True,
            index_param=InvertIndexParam(),
        )
        assert field_1.name == "float"
        assert field_1.data_type == DataType.FLOAT
        assert field_1.nullable is True
        assert field_1.index_param.enable_range_optimization is False

        field_2 = FieldSchema(
            name="str",
            data_type=DataType.STRING,
            nullable=True,
            index_param=InvertIndexParam(enable_range_optimization=True),
        )
        assert field_2.name == "str"
        assert field_2.data_type == DataType.STRING
        assert field_2.nullable is True
        assert field_2.index_param.enable_range_optimization is True

    def test_readonly(self):
        field = FieldSchema(
            name="float",
            data_type=DataType.FLOAT,
            nullable=True,
            index_param=InvertIndexParam(),
        )

        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            field.index_param = InvertIndexParam(enable_range_optimization=True)


# ----------------------------
# VectorSchema Test Case
# ----------------------------
class TestVectorSchema:
    def test_default(self):
        field = VectorSchema("vector", data_type=DataType.VECTOR_FP32, dimension=128)
        assert field.name == "vector"
        assert field.data_type == DataType.VECTOR_FP32
        assert field.dimension == 128
        assert field.index_param is not None
        assert field.index_param.type == IndexType.FLAT
        assert field.index_param.metric_type == MetricType.IP

    def test_custom(self):
        field = VectorSchema(
            name="vector",
            data_type=DataType.VECTOR_INT8,
            dimension=512,
            index_param=HnswIndexParam(
                metric_type=MetricType.COSINE, m=15, ef_construction=300
            ),
        )
        assert field.name == "vector"
        assert field.data_type == DataType.VECTOR_INT8
        assert field.index_param.metric_type == MetricType.COSINE
        assert field.index_param.m == 15
        assert field.index_param.ef_construction == 300

    def test_readonly(self):
        field = VectorSchema(
            name="vector",
            dimension=128,
            data_type=DataType.VECTOR_INT8,
        )

        import sys

        if sys.version_info >= (3, 11):
            match_pattern = r"(can't set attribute|has no setter|readonly attribute)"
        else:
            match_pattern = r"can't set attribute"
        with pytest.raises(AttributeError, match=match_pattern):
            field.dimension = 4


# ----------------------------
# CollectionSchema Test Case
# ----------------------------
class TestCollectionSchema:
    def test_collection_schema_with_single_field(self):
        collection_schema = CollectionSchema(
            name="test_collection",
            fields=FieldSchema(
                name="id",
                data_type=DataType.INT64,
                index_param=InvertIndexParam(),
                nullable=False,
            ),
            vectors=VectorSchema(
                name="vector",
                data_type=DataType.VECTOR_INT8,
                dimension=128,
                index_param=HnswIndexParam(),
            ),
        )

        assert collection_schema is not None
        assert collection_schema.name == "test_collection"
        assert len(collection_schema.fields) == 1
        assert len(collection_schema.vectors) == 1

        field = collection_schema.field("id")
        assert field is not None
        assert field.name == "id"
        assert field.data_type == DataType.INT64
        assert not field.nullable
        assert field.index_param.type == IndexType.INVERT
        assert not field.index_param.enable_range_optimization

        vector = collection_schema.vector("vector")
        assert vector is not None
        assert vector.name == "vector"
        assert vector.data_type == DataType.VECTOR_INT8
        assert vector.dimension == 128
        assert vector.index_param.type == IndexType.HNSW
        assert vector.index_param.m == 50
        assert vector.index_param.ef_construction == 500
        assert vector.index_param.metric_type == MetricType.IP

    def test_collection_schema_with_multi_fields(self):
        collection_schema = CollectionSchema(
            name="test_collection",
            fields=[
                FieldSchema(
                    "id",
                    DataType.INT64,
                    nullable=False,
                    index_param=InvertIndexParam(enable_range_optimization=True),
                ),
                FieldSchema(
                    "name",
                    DataType.STRING,
                    nullable=False,
                    index_param=InvertIndexParam(),
                ),
                FieldSchema(
                    "weight",
                    DataType.INT32,
                    nullable=True,
                ),
            ],
            vectors=[
                VectorSchema(
                    "dense",
                    DataType.VECTOR_FP32,
                    dimension=128,
                    index_param=HnswIndexParam(),
                ),
                VectorSchema(
                    "sparse", DataType.SPARSE_VECTOR_FP32, index_param=HnswIndexParam()
                ),
            ],
        )
        assert collection_schema is not None
        assert collection_schema.name == "test_collection"
        assert len(collection_schema.fields) == 3
        assert len(collection_schema.vectors) == 2

        field_id = collection_schema.field("id")
        assert field_id is not None
        assert field_id.name == "id"
        assert field_id.data_type == DataType.INT64
        assert not field_id.nullable
        assert field_id.index_param.type == IndexType.INVERT

        dense = collection_schema.vector("dense")
        assert dense is not None
        assert dense.name == "dense"
        assert dense.data_type == DataType.VECTOR_FP32
        assert dense.dimension == 128
        assert dense.index_param.type == IndexType.HNSW

        sparse = collection_schema.vector("sparse")
        assert sparse is not None
        assert sparse.name == "sparse"
        assert sparse.data_type == DataType.SPARSE_VECTOR_FP32
        assert sparse.dimension == 0
        assert sparse.index_param.type == IndexType.HNSW

        assert str(collection_schema) is not None


# ----------------------------
# CollectionStats Test Case
# ----------------------------
class TestCollectionStats:
    """
    The constructor of CollectionStats is not provided.
    It can only be obtained through collection.stats()
    """

    def test_collection_stats(self):
        stats = CollectionStats()
        assert stats is not None


================================================
FILE: python/tests/test_typing.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import pytest
from zvec import (
    DataType,
    IndexType,
    MetricType,
    QuantizeType,
    Status,
    StatusCode,
)


# ----------------------------
# Enum Test Case
# ----------------------------
@pytest.mark.parametrize(
    "member, name",
    [
        (DataType.FLOAT, "FLOAT"),
        (IndexType.HNSW, "HNSW"),
        (MetricType.COSINE, "COSINE"),
        (QuantizeType.INT8, "INT8"),
        (StatusCode.OK, "OK"),
    ],
)
def test_enum_names(member, name):
    assert member.name == name


@pytest.mark.parametrize(
    "member, value",
    [
        (DataType.FLOAT, 8),
        (IndexType.HNSW, 1),
        (MetricType.COSINE, 3),
        (QuantizeType.INT8, 2),
        (StatusCode.OK, 0),
    ],
)
def test_enum_values(member, value):
    assert member.value == value


@pytest.mark.parametrize("member", ["L2", "IP", "COSINE"])
def test_metric_type_has_member(member):
    assert member in MetricType.__members__


@pytest.mark.parametrize(
    "member",
    [
        "STRING",
        "BOOL",
        "INT32",
        "INT64",
        "FLOAT",
        "DOUBLE",
        "UINT32",
        "UINT64",
        "VECTOR_FP16",
        "VECTOR_FP32",
        "VECTOR_FP64",
        "VECTOR_INT8",
        "SPARSE_VECTOR_FP32",
        "SPARSE_VECTOR_FP16",
        "ARRAY_STRING",
        "ARRAY_INT32",
        "ARRAY_INT64",
        "ARRAY_FLOAT",
        "ARRAY_DOUBLE",
        "ARRAY_BOOL",
        "ARRAY_UINT32",
        "ARRAY_UINT64",
    ],
)
def test_data_type_has_member(member):
    assert member in DataType.__members__


@pytest.mark.parametrize("member", ["HNSW", "IVF", "FLAT", "INVERT"])
def test_index_type_has_member(member):
    assert member in IndexType.__members__


@pytest.mark.parametrize("member", ["FP16", "INT8", "INT4", "UNDEFINED"])
def test_quantize_type_has_member(member):
    assert member in QuantizeType.__members__


@pytest.mark.parametrize(
    "member",
    [
        "OK",
        "UNKNOWN",
        "NOT_FOUND",
        "ALREADY_EXISTS",
        "INVALID_ARGUMENT",
        "PERMISSION_DENIED",
        "FAILED_PRECONDITION",
        "RESOURCE_EXHAUSTED",
        "UNAVAILABLE",
        "INTERNAL_ERROR",
        "NOT_SUPPORTED",
    ],
)
def test_status_code_has_member(member):
    assert member in StatusCode.__members__


# ----------------------------
# Status Test Case
# ----------------------------
class TestStatus:
    def test_status_code(self):
        status = Status(StatusCode.OK)
        assert status.code() == StatusCode.OK

    def test_status_message(self):
        status = Status(StatusCode.OK, "OK")
        assert status.message() == "OK"

        status = Status(StatusCode.NOT_FOUND, "Not Found")
        assert status.message() == "Not Found"

    def test_status_ok(self):
        status = Status(StatusCode.OK)
        assert status.ok()


================================================
FILE: python/tests/test_util.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from unittest.mock import MagicMock, patch

import pytest
from zvec import require_module


# ----------------------------
# require_module func Test Case
# ----------------------------
def test_require_module_success():
    module = require_module("os")
    assert module is not None
    assert hasattr(module, "path")


def test_require_module_with_submodule_success():
    module = require_module("os.path")
    assert module is not None
    assert hasattr(module, "join")


def test_require_module_import_error():
    with pytest.raises(ImportError) as exc_info:
        require_module("nonexistent_module")

    exception_msg = str(exc_info.value)
    assert "Required package 'nonexistent_module' is not installed." in exception_msg


def test_require_module_with_mitigation_import_error():
    with pytest.raises(ImportError) as exc_info:
        require_module("nonexistent_module.submodule", mitigation="custom_package")

    exception_msg = str(exc_info.value)
    assert "Required package 'custom_package' is not installed." in exception_msg
    assert (
        "Module 'nonexistent_module.submodule' is part of 'nonexistent_module'"
        in exception_msg
    )
    assert "please pip install 'custom_package'." in exception_msg


def test_require_module_submodule_import_error():
    with pytest.raises(ImportError) as exc_info:
        require_module("os.nonexistent_submodule")

    exception_msg = str(exc_info.value)
    assert (
        "Required package 'os.nonexistent_submodule' is not installed." in exception_msg
    )
    assert "Module 'os.nonexistent_submodule' is part of 'os'" in exception_msg
    assert "please pip install 'os'." in exception_msg


@patch("importlib.import_module")
def test_require_module_wraps_original_exception(mock_import_module):
    original_exception = ImportError("Original error")
    mock_import_module.side_effect = original_exception

    with pytest.raises(ImportError) as exc_info:
        require_module("some_module")

    assert exc_info.value.__cause__ is original_exception


@patch("importlib.import_module")
def test_require_module_calls_importlib(mock_import_module):
    mock_module = MagicMock()
    mock_import_module.return_value = mock_module

    result = require_module("test_module")

    mock_import_module.assert_called_once_with("test_module")
    assert result is mock_module


================================================
FILE: python/zvec/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import sys
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from importlib.metadata import PackageNotFoundError


# ==============================
# Public API — grouped by category
# ==============================

from . import model as model

# —— Extensions ——
from .extension import (
    BM25EmbeddingFunction,
    DefaultLocalDenseEmbedding,
    DefaultLocalReRanker,
    DefaultLocalSparseEmbedding,
    DenseEmbeddingFunction,
    OpenAIDenseEmbedding,
    OpenAIFunctionBase,
    QwenDenseEmbedding,
    QwenFunctionBase,
    QwenReRanker,
    QwenSparseEmbedding,
    ReRanker,
    RrfReRanker,
    SentenceTransformerFunctionBase,
    SparseEmbeddingFunction,
    WeightedReRanker,
)

# —— Typing ——
from .model import param as param
from .model import schema as schema

# —— Core data structures ——
from .model.collection import Collection
from .model.doc import Doc

# —— Query & index parameters ——
from .model.param import (
    AddColumnOption,
    AlterColumnOption,
    CollectionOption,
    FlatIndexParam,
    HnswIndexParam,
    HnswQueryParam,
    HnswRabitqIndexParam,
    HnswRabitqQueryParam,
    IndexOption,
    InvertIndexParam,
    IVFIndexParam,
    IVFQueryParam,
    OptimizeOption,
)
from .model.param.vector_query import VectorQuery

# —— Schema & field definitions ——
from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema

# —— tools ——
from .tool import require_module
from .typing import (
    DataType,
    IndexType,
    MetricType,
    QuantizeType,
    Status,
    StatusCode,
)
from .typing.enum import LogLevel, LogType

# —— lifecycle ——
from .zvec import create_and_open, init, open

# ==============================
# Public interface declaration
# ==============================
__all__ = [
    # Zvec functions
    "create_and_open",
    "init",
    "open",
    # Core classes
    "Collection",
    "Doc",
    # Schema
    "CollectionSchema",
    "FieldSchema",
    "VectorSchema",
    "CollectionStats",
    # Parameters
    "VectorQuery",
    "InvertIndexParam",
    "HnswIndexParam",
    "HnswRabitqIndexParam",
    "HnswRabitqQueryParam",
    "FlatIndexParam",
    "IVFIndexParam",
    "CollectionOption",
    "IndexOption",
    "OptimizeOption",
    "AddColumnOption",
    "AlterColumnOption",
    "HnswQueryParam",
    "IVFQueryParam",
    # Extensions
    "DenseEmbeddingFunction",
    "SparseEmbeddingFunction",
    "QwenFunctionBase",
    "OpenAIFunctionBase",
    "SentenceTransformerFunctionBase",
    "ReRanker",
    "DefaultLocalDenseEmbedding",
    "DefaultLocalSparseEmbedding",
    "BM25EmbeddingFunction",
    "OpenAIDenseEmbedding",
    "QwenDenseEmbedding",
    "QwenSparseEmbedding",
    "RrfReRanker",
    "WeightedReRanker",
    "DefaultLocalReRanker",
    "QwenReRanker",
    # Typing
    "DataType",
    "MetricType",
    "QuantizeType",
    "IndexType",
    "LogLevel",
    "LogType",
    "Status",
    "StatusCode",
    # Tools
    "require_module",
]

# ==============================
# Version handling
# ==============================
__version__: str

try:
    from importlib.metadata import version
except ImportError:
    from importlib_metadata import version  # Python < 3.8

try:
    __version__ = version("zvec")
except Exception:
    __version__ = "unknown"


================================================
FILE: python/zvec/__init__.pyi
================================================
"""
Zvec core module
"""

from __future__ import annotations

import collections

from . import typing
from .extension import ReRanker, RrfReRanker, WeightedReRanker
from .extension.embedding import DenseEmbeddingFunction
from .model import param, schema
from .model.collection import Collection
from .model.doc import Doc
from .model.param import (
    AddColumnOption,
    AlterColumnOption,
    CollectionOption,
    FlatIndexParam,
    HnswIndexParam,
    HnswQueryParam,
    IndexOption,
    InvertIndexParam,
    IVFIndexParam,
    IVFQueryParam,
    OptimizeOption,
)
from .model.param.vector_query import VectorQuery
from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema
from .tool import require_module
from .typing import (
    DataType,
    IndexType,
    MetricType,
    QuantizeType,
    Status,
    StatusCode,
)
from .typing.enum import LogLevel, LogType
from .zvec import create_and_open, init, open

__all__: list = [
    "AddColumnOption",
    "AlterColumnOption",
    "Collection",
    "CollectionOption",
    "CollectionSchema",
    "CollectionStats",
    "DataType",
    "DenseEmbeddingFunction",
    "DenseEmbeddingFunction",
    "Doc",
    "FieldSchema",
    "FlatIndexParam",
    "HnswIndexParam",
    "HnswQueryParam",
    "IVFIndexParam",
    "IVFQueryParam",
    "IndexOption",
    "IndexType",
    "InvertIndexParam",
    "LogLevel",
    "LogType",
    "MetricType",
    "OptimizeOption",
    "QuantizeType",
    "ReRanker",
    "ReRanker",
    "RrfReRanker",
    "Status",
    "StatusCode",
    "VectorQuery",
    "VectorSchema",
    "WeightedReRanker",
    "create_and_open",
    "init",
    "open",
    "require_module",
]

class _Collection:
    @staticmethod
    def CreateAndOpen(
        arg0: str, arg1: schema._CollectionSchema, arg2: param.CollectionOption
    ) -> _Collection: ...
    @staticmethod
    def Open(arg0: str, arg1: param.CollectionOption) -> _Collection: ...
    def AddColumn(
        self,
        arg0: schema._FieldSchema,
        arg1: str,
        arg2: param.AddColumnOption,
    ) -> None: ...
    def AlterColumn(
        self,
        arg0: str,
        arg1: str,
        arg2: schema._FieldSchema,
        arg3: param.AlterColumnOption,
    ) -> None: ...
    def CreateIndex(
        self, arg0: str, arg1: param.IndexParam, arg2: param.IndexOption
    ) -> None: ...
    def Delete(self, arg0: collections.abc.Sequence[str]) -> list[typing.Status]: ...
    def DeleteByFilter(self, arg0: str) -> None: ...
    def Destroy(self) -> None: ...
    def DropColumn(self, arg0: str) -> None: ...
    def DropIndex(self, arg0: str) -> None: ...
    def Fetch(self, arg0: collections.abc.Sequence[str]) -> dict[str, _Doc]: ...
    def Flush(self) -> None: ...
    def GroupByQuery(self, arg0: ...) -> list[...]: ...
    def Insert(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...
    def Optimize(self, arg0: param.OptimizeOption) -> None: ...
    def Options(self) -> param.CollectionOption: ...
    def Path(self) -> str: ...
    def Query(self, arg0: param._VectorQuery) -> list[_Doc]: ...
    def Schema(self) -> schema._CollectionSchema: ...
    def Stats(self) -> schema.CollectionStats: ...
    def Update(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...
    def Upsert(self, arg0: collections.abc.Sequence[_Doc]) -> list[typing.Status]: ...
    def __getstate__(self) -> tuple: ...
    def __setstate__(self, arg0: tuple) -> None: ...

class _Doc:
    def __getstate__(self) -> bytes: ...
    def __init__(self) -> None: ...
    def __setstate__(self, arg0: bytes) -> None: ...
    def field_names(self) -> list[str]: ...
    def get_any(self, arg0: str, arg1: typing.DataType) -> typing.Any: ...
    def has_field(self, arg0: str) -> bool: ...
    def pk(self) -> str: ...
    def score(self) -> float: ...
    def set_any(self, arg0: str, arg1: typing.DataType, arg2: typing.Any) -> bool: ...
    def set_pk(self, arg0: str) -> None: ...
    def set_score(self, arg0: typing.SupportsFloat) -> None: ...

class _DocOp:
    """
    Members:

      INSERT

      UPDATE

      DELETE

      UPSERT
    """

    DELETE: typing.ClassVar[_DocOp]  # value = <_DocOp.DELETE: 3>
    INSERT: typing.ClassVar[_DocOp]  # value = <_DocOp.INSERT: 0>
    UPDATE: typing.ClassVar[_DocOp]  # value = <_DocOp.UPDATE: 2>
    UPSERT: typing.ClassVar[_DocOp]  # value = <_DocOp.UPSERT: 1>
    __members__: typing.ClassVar[
        dict[str, _DocOp]
    ]  # value = {'INSERT': <_DocOp.INSERT: 0>, 'UPDATE': <_DocOp.UPDATE: 2>, 'DELETE': <_DocOp.DELETE: 3>, 'UPSERT': <_DocOp.UPSERT: 1>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...


================================================
FILE: python/zvec/common/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from .constants import DenseVectorType, SparseVectorType, VectorType

__all__ = ["DenseVectorType", "SparseVectorType", "VectorType"]


================================================
FILE: python/zvec/common/constants.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Optional, TypeVar, Union

import numpy as np

# VectorType: DenseVectorType | SparseVectorType
DenseVectorType = Union[list[float], list[int], np.ndarray]
SparseVectorType = dict[int, float]
VectorType = Optional[Union[DenseVectorType, SparseVectorType]]

# Embeddable: Text | Image | Audio
TEXT = str
IMAGE = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array
AUDIO = Union[str, bytes, np.ndarray]  # file path, raw bytes, or numpy array

Embeddable = Optional[Union[TEXT, IMAGE, AUDIO]]

# Multimodal Embeddable
MD = TypeVar("MD", bound=Embeddable, contravariant=True)


================================================
FILE: python/zvec/executor/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from .query_executor import (
    QueryContext,
    QueryExecutor,
    QueryExecutorFactory,
)

__all__ = [
    "QueryContext",
    "QueryExecutor",
    "QueryExecutorFactory",
]


================================================
FILE: python/zvec/executor/query_executor.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Optional, Union, final

import numpy as np
from _zvec import _Collection
from _zvec.param import _VectorQuery

from ..extension import ReRanker, RrfReRanker, WeightedReRanker
from ..model.convert import convert_to_py_doc
from ..model.doc import Doc
from ..model.param.vector_query import VectorQuery
from ..model.schema import CollectionSchema
from ..typing import DataType

__all__ = [
    "QueryContext",
    "QueryExecutor",
    "QueryExecutorFactory",
]

DTYPE_MAP = {
    DataType.VECTOR_FP16.value: np.float16,
    DataType.VECTOR_FP32.value: np.float32,
    DataType.VECTOR_FP64.value: np.float64,
    DataType.VECTOR_INT8.value: np.int8,
}


def convert_to_numpy(vec: Union[list, np.ndarray], dtype: np.dtype) -> np.ndarray:
    if isinstance(vec, np.ndarray):
        if vec.dtype == dtype and vec.ndim == 1:
            return vec
        return np.asarray(vec, dtype=dtype).flatten()

    try:
        arr = np.asarray(vec, dtype=dtype)
        if arr.ndim != 1:
            arr = arr.flatten()
        return arr
    except (ValueError, TypeError) as e:
        raise TypeError(
            f"Cannot convert input to 1D numpy array with dtype={dtype}: {type(vec)}"
        ) from e


class QueryContext:
    def __init__(
        self,
        topk: int,
        filter: Optional[str] = None,
        include_vector: bool = False,
        queries: Optional[list[VectorQuery]] = None,
        output_fields: Optional[list[str]] = None,
        reranker: Optional[ReRanker] = None,
    ):
        # query param
        self._filter = filter
        self._queries = queries or []
        self._topk = topk
        self._include_vector = include_vector
        self._output_fields = output_fields

        # reranker
        self._reranker = reranker

        # core vectors
        self._core_vectors = []

    @property
    def topk(self):
        return self._topk

    @property
    def queries(self):
        return self._queries

    @property
    def filter(self):
        return self._filter

    @property
    def reranker(self):
        return self._reranker

    @property
    def output_fields(self):
        return self._output_fields

    @property
    def include_vector(self):
        return self._include_vector

    @property
    def core_vectors(self):
        return self._core_vectors

    @core_vectors.setter
    def core_vectors(self, core_vectors: list[_VectorQuery]):
        self._core_vectors = core_vectors


class QueryExecutor(ABC):
    def __init__(self, schema: CollectionSchema):
        self._schema = schema
        self._concurrency = max(1, int(os.getenv("ZVEC_QUERY_CONCURRENCY", "1")))

    @abstractmethod
    def _do_validate(self, ctx: QueryContext) -> None:
        pass

    @abstractmethod
    def _do_build(
        self, ctx: QueryContext, collection: _Collection
    ) -> list[_VectorQuery]:
        pass

    def _do_build_query_wo_vector(self, ctx: QueryContext) -> _VectorQuery:
        core_vector = _VectorQuery()
        core_vector.topk = ctx.topk
        core_vector.include_vector = ctx.include_vector
        if ctx.filter:
            core_vector.filter = ctx.filter
        if ctx.output_fields:
            core_vector.output_fields = ctx.output_fields
        return core_vector

    def _do_build_query_with_vector(
        self, ctx: QueryContext, query: VectorQuery, collection: _Collection
    ) -> _VectorQuery:
        core_vector = self._do_build_query_wo_vector(ctx)
        core_vector.field_name = query.field_name
        if query.param:
            core_vector.query_params = query.param

        vector_schema = (
            self._schema.vector(query.field_name) if query else self._schema.vectors[0]
        )

        if vector_schema is None:
            raise ValueError("No vector field found")

        # set output_fields
        core_vector.output_fields = ctx.output_fields

        # set vector
        if query.has_vector():
            vec_data = query.vector
        else:
            fetched = collection.Fetch([query.id])
            doc = next(iter(fetched.values()))
            if not doc:
                return core_vector
            vec_data = doc.get_any(vector_schema.name, vector_schema.data_type)

        target_dtype = DTYPE_MAP.get(vector_schema.data_type.value)
        core_vector.set_vector(
            vector_schema._get_object(),
            convert_to_numpy(vec_data, target_dtype) if target_dtype else vec_data,
        )
        return core_vector

    def _do_execute(
        self, vectors: list[_VectorQuery], collection: _Collection
    ) -> dict[str, list[Doc]]:
        query_cnt = len(vectors)
        if query_cnt == 0:
            raise ValueError("No query to execute")

        if len(vectors) == 1 or self._concurrency == 1:
            results = {}
            for query in vectors:
                docs = collection.Query(query)
                results[query.field_name] = [
                    convert_to_py_doc(doc, self._schema) for doc in docs
                ]
            return results

        results = {}
        with ThreadPoolExecutor(max_workers=self._concurrency) as executor:
            future_to_query = {
                executor.submit(collection.Query, query): query.field_name
                for query in vectors
            }

            for future in as_completed(future_to_query):
                field_name = future_to_query[future]
                try:
                    docs = future.result()
                    results[field_name] = [
                        convert_to_py_doc(doc, self._schema) for doc in docs
                    ]
                except Exception as e:
                    raise e
        return results

    def _do_merge_rerank_results(
        self, ctx: QueryContext, docs_map: dict[str, list[Doc]]
    ) -> list[Doc]:
        query_result_cnt = len(docs_map) if docs_map else 0
        if query_result_cnt == 0:
            raise ValueError("Query results is none and dost not to rerank")
        if query_result_cnt == 1:
            if not ctx.reranker or isinstance(
                ctx.reranker, (RrfReRanker, WeightedReRanker)
            ):
                return next(iter(docs_map.values()))
            return ctx.reranker.rerank(docs_map)
        return ctx.reranker.rerank(docs_map)

    @final
    def execute(self, ctx: QueryContext, collection: _Collection) -> list[Doc]:
        # 1. validate query
        self._do_validate(ctx)
        # 2. build query vector
        query_vectors = self._do_build(ctx, collection)
        if not query_vectors:
            raise ValueError("No query to execute")
        # 3. execute query
        docs = self._do_execute(query_vectors, collection)
        # 4. merge and rerank result
        return self._do_merge_rerank_results(ctx, docs)


class NoVectorQueryExecutor(QueryExecutor):
    def __init__(self, schema: CollectionSchema):
        super().__init__(schema)

    def _do_validate(self, ctx: QueryContext) -> None:
        if len(ctx.queries) > 0:
            raise ValueError("Collection does not support query with vector or id")

    def _do_build(
        self, ctx: QueryContext, _collection: _Collection
    ) -> list[_VectorQuery]:
        return [self._do_build_query_wo_vector(ctx)]


class SingleVectorQueryExecutor(NoVectorQueryExecutor):
    def __init__(self, schema: CollectionSchema) -> None:
        super().__init__(schema)

    def _do_validate(self, ctx: QueryContext) -> None:
        if len(ctx.queries) > 1:
            raise ValueError(
                "Collection has only one vector field, cannot query with multiple vectors"
            )
        for query in ctx.queries:
            query._validate()

    def _do_build(
        self, ctx: QueryContext, collection: _Collection
    ) -> list[_VectorQuery]:
        if len(ctx.queries) == 0:
            return [self._do_build_query_wo_vector(ctx)]
        vectors = []
        for query in ctx.queries:
            vectors.append(self._do_build_query_with_vector(ctx, query, collection))
        return vectors


class MultiVectorQueryExecutor(SingleVectorQueryExecutor):
    def __init__(self, schema: CollectionSchema) -> None:
        super().__init__(schema)

    def _do_validate(self, ctx: QueryContext) -> None:
        if len(ctx.queries) > 1 and ctx.reranker is None:
            raise ValueError("Reranker is required for multi-vector query")
        seen_fields = set()
        for query in ctx.queries:
            query._validate()
            field = query.field_name
            if field in seen_fields:
                raise ValueError(f"Query field name '{field}' appears more than once")
            seen_fields.add(field)

    def _do_execute(
        self, vectors: list[_VectorQuery], collection: _Collection
    ) -> dict[str, list[Doc]]:
        return super()._do_execute(vectors, collection)


class QueryExecutorFactory:
    @staticmethod
    def create(schema: CollectionSchema) -> QueryExecutor:
        vectors = schema.vectors
        if len(vectors) == 0:
            return NoVectorQueryExecutor(schema)
        if len(vectors) == 1:
            return SingleVectorQueryExecutor(schema)
        return MultiVectorQueryExecutor(schema)


================================================
FILE: python/zvec/extension/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from .bm25_embedding_function import BM25EmbeddingFunction
from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
from .http_embedding_function import HTTPDenseEmbedding
from .jina_embedding_function import JinaDenseEmbedding
from .jina_function import JinaFunctionBase
from .multi_vector_reranker import RrfReRanker, WeightedReRanker
from .openai_embedding_function import OpenAIDenseEmbedding
from .openai_function import OpenAIFunctionBase
from .qwen_embedding_function import QwenDenseEmbedding, QwenSparseEmbedding
from .qwen_function import QwenFunctionBase
from .qwen_rerank_function import QwenReRanker
from .rerank_function import RerankFunction as ReRanker
from .sentence_transformer_embedding_function import (
    DefaultLocalDenseEmbedding,
    DefaultLocalSparseEmbedding,
)
from .sentence_transformer_function import SentenceTransformerFunctionBase
from .sentence_transformer_rerank_function import DefaultLocalReRanker

__all__ = [
    "BM25EmbeddingFunction",
    "DefaultLocalDenseEmbedding",
    "DefaultLocalReRanker",
    "DefaultLocalSparseEmbedding",
    "DenseEmbeddingFunction",
    "HTTPDenseEmbedding",
    "JinaDenseEmbedding",
    "JinaFunctionBase",
    "OpenAIDenseEmbedding",
    "OpenAIFunctionBase",
    "QwenDenseEmbedding",
    "QwenFunctionBase",
    "QwenReRanker",
    "QwenSparseEmbedding",
    "ReRanker",
    "RrfReRanker",
    "SentenceTransformerFunctionBase",
    "SparseEmbeddingFunction",
    "WeightedReRanker",
]


================================================
FILE: python/zvec/extension/bm25_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from functools import lru_cache
from typing import Literal, Optional

from ..common.constants import TEXT, SparseVectorType
from ..tool import require_module
from .embedding_function import SparseEmbeddingFunction


class BM25EmbeddingFunction(SparseEmbeddingFunction[TEXT]):
    """BM25-based sparse embedding function using DashText SDK.

    This class provides text-to-sparse-vector embedding capabilities using
    the DashText library with BM25 algorithm. BM25 (Best Matching 25) is a
    probabilistic retrieval function used for lexical search and document
    ranking based on term frequency and inverse document frequency.

    BM25 generates sparse vectors where each dimension corresponds to a term in
    the vocabulary, and the value represents the BM25 score for that term. It's
    particularly effective for:

    - Lexical search and keyword matching
    - Document ranking and information retrieval
    - Combining with dense embeddings for hybrid search
    - Traditional IR tasks where exact term matching is important

    This implementation uses DashText's SparseVectorEncoder, which provides
    efficient BM25 computation for Chinese and English text using either a
    built-in encoder or custom corpus training.

    Args:
        corpus (Optional[list[str]], optional): List of documents to train the
            BM25 encoder. If provided, creates a custom encoder trained on this
            corpus for better domain-specific accuracy. If ``None``, uses the
            built-in encoder. Defaults to ``None``.
        encoding_type (Literal["query", "document"], optional): Encoding mode
            for text processing. Use ``"query"`` for search queries (default) and
            ``"document"`` for document indexing. This distinction optimizes the
            BM25 scoring for asymmetric retrieval tasks. Defaults to ``"query"``.
        language (Literal["zh", "en"], optional): Language for built-in encoder.
            Only used when corpus is None. ``"zh"`` for Chinese (trained on Chinese
            Wikipedia), ``"en"`` for English. Defaults to ``"zh"``.
        b (float, optional): Document length normalization parameter for BM25.
            Range [0, 1]. 0 means no normalization, 1 means full normalization.
            Only used with custom corpus. Defaults to ``0.75``.
        k1 (float, optional): Term frequency saturation parameter for BM25.
            Higher values give more weight to term frequency. Only used with
            custom corpus. Defaults to ``1.2``.
        **kwargs: Additional parameters for DashText encoder customization.

    Attributes:
        corpus_size (int): Number of documents in the training corpus (0 if using built-in encoder).
        encoding_type (str): The encoding type being used ("query" or "document").
        language (str): The language of the built-in encoder ("zh" or "en").

    Raises:
        ValueError: If corpus is provided but empty or contains non-string elements.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If DashText encoder initialization or training fails.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``dashtext`` package: ``pip install dashtext``
        - Two encoder options available:

          1. **Built-in encoder** (no corpus needed): Pre-trained models for
             Chinese (zh) and English (en), good generalization, works out-of-the-box
          2. **Custom encoder** (corpus required): Better accuracy for domain-specific
             terminology, requires training on your full corpus with BM25 parameters

        - Encoding types:

          * ``encoding_type="query"``: Optimized for search queries (shorter text)
          * ``encoding_type="document"``: Optimized for document indexing (longer text)

        - BM25 parameters (b, k1) only apply to custom encoder training
        - Output is sorted by indices (vocabulary term IDs) for consistency
        - Results are cached (LRU cache, maxsize=10) to reduce computation
        - No API key or network connectivity required (local computation)

    Examples:
        >>> # Option 1: Using built-in encoder for Chinese (no corpus needed)
        >>> from zvec.extension import BM25EmbeddingFunction
        >>>
        >>> # For query encoding (Chinese)
        >>> bm25_query_zh = BM25EmbeddingFunction(language="zh", encoding_type="query")
        >>> query_vec = bm25_query_zh.embed("什么是机器学习")
        >>> isinstance(query_vec, dict)
        True
        >>> # query_vec: {1169440797: 0.29, 2045788977: 0.70, ...}

        >>> # For document encoding (Chinese)
        >>> bm25_doc_zh = BM25EmbeddingFunction(language="zh", encoding_type="document")
        >>> doc_vec = bm25_doc_zh.embed("机器学习是人工智能的一个重要分支...")
        >>> isinstance(doc_vec, dict)
        True

        >>> # Using built-in encoder for English
        >>> bm25_query_en = BM25EmbeddingFunction(language="en", encoding_type="query")
        >>> query_vec_en = bm25_query_en.embed("what is vector search service")
        >>> isinstance(query_vec_en, dict)
        True

        >>> # Option 2: Using custom corpus for domain-specific accuracy
        >>> corpus = [
        ...     "机器学习是人工智能的一个重要分支",
        ...     "深度学习使用多层神经网络进行特征提取",
        ...     "自然语言处理技术用于理解和生成人类语言"
        ... ]
        >>> bm25_custom = BM25EmbeddingFunction(
        ...     corpus=corpus,
        ...     encoding_type="query",
        ...     b=0.75,
        ...     k1=1.2
        ... )
        >>> custom_vec = bm25_custom.embed("机器学习算法")
        >>> isinstance(custom_vec, dict)
        True

        >>> # Hybrid search: combining with dense embeddings
        >>> from zvec.extension import DefaultLocalDenseEmbedding
        >>> dense_emb = DefaultLocalDenseEmbedding()
        >>> bm25_emb = BM25EmbeddingFunction(language="zh", encoding_type="query")
        >>>
        >>> query = "machine learning algorithms"
        >>> dense_vec = dense_emb.embed(query)  # Semantic similarity
        >>> sparse_vec = bm25_emb.embed(query)  # Lexical matching
        >>> # Combine scores for hybrid retrieval

        >>> # Callable interface
        >>> sparse_vec = bm25_query_zh("information retrieval")
        >>> isinstance(sparse_vec, dict)
        True

        >>> # Error handling
        >>> try:
        ...     bm25_query_zh.embed("")  # Empty query
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

    See Also:
        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
        - ``DefaultLocalSparseEmbedding``: SPLADE-based sparse embedding
        - ``QwenSparseEmbedding``: API-based sparse embedding using Qwen
        - ``DefaultLocalDenseEmbedding``: Dense embedding for semantic search

    References:
        - DashText Documentation: https://help.aliyun.com/zh/document_detail/2546039.html
        - DashText PyPI: https://pypi.org/project/dashtext/
        - BM25 Algorithm: Robertson & Zaragoza (2009)
    """

    def __init__(
        self,
        corpus: Optional[list[str]] = None,
        encoding_type: Literal["query", "document"] = "query",
        language: Literal["zh", "en"] = "zh",
        b: float = 0.75,
        k1: float = 1.2,
        **kwargs,
    ):
        """Initialize the BM25 embedding function.

        Args:
            corpus (Optional[list[str]]): Optional corpus for training custom encoder.
                If None, uses built-in encoder. Defaults to None.
            encoding_type (Literal["query", "document"]): Text encoding mode.
                Use "query" for search queries, "document" for indexing.
                Defaults to "query".
            language (Literal["zh", "en"]): Language for built-in encoder.
                "zh" for Chinese, "en" for English. Defaults to "zh".
            b (float): Document length normalization for BM25 [0, 1].
                Only used with custom corpus. Defaults to 0.75.
            k1 (float): Term frequency saturation for BM25.
                Only used with custom corpus. Defaults to 1.2.
            **kwargs: Additional DashText encoder parameters.

        Raises:
            ValueError: If corpus is provided but empty or invalid.
            ImportError: If dashtext package is not installed.
            RuntimeError: If encoder initialization or training fails.
        """
        # Validate corpus if provided
        if corpus is not None:
            if not corpus or not isinstance(corpus, list):
                raise ValueError("Corpus must be a non-empty list of strings")

            if not all(isinstance(doc, str) for doc in corpus):
                raise ValueError("All corpus documents must be strings")

        # Import dashtext
        self._dashtext = require_module("dashtext")

        self._corpus = corpus
        self._encoding_type = encoding_type
        self._language = language
        self._b = b
        self._k1 = k1
        self._extra_params = kwargs

        # Initialize the BM25 encoder
        self._build_encoder()

    def _build_encoder(self):
        """Build the BM25 sparse vector encoder.

        Creates either a built-in encoder (pre-trained) or a custom encoder
        trained on the provided corpus.

        Raises:
            RuntimeError: If encoder initialization or training fails.
            ImportError: If dashtext package is not installed.
        """
        try:
            if self._corpus is None:
                # Use built-in encoder (pre-trained on Wikipedia)
                # language: 'zh' for Chinese, 'en' for English
                self._encoder = self._dashtext.SparseVectorEncoder.default(
                    name=self._language
                )
            else:
                # Create custom encoder with BM25 parameters
                self._encoder = self._dashtext.SparseVectorEncoder(
                    b=self._b, k1=self._k1, **self._extra_params
                )

                # Train encoder with the corpus
                self._encoder.train(self._corpus)

        except ImportError as e:
            raise ImportError(
                "dashtext package is required for BM25EmbeddingFunction. "
                "Install it with: pip install dashtext"
            ) from e
        except Exception as e:
            if isinstance(e, (ValueError, RuntimeError)):
                raise
            raise RuntimeError(f"Failed to build BM25 encoder: {e!s}") from e

    @property
    def corpus_size(self) -> int:
        """int: Number of documents in the training corpus (0 if using built-in encoder)."""
        return len(self._corpus) if self._corpus is not None else 0

    @property
    def encoding_type(self) -> str:
        """str: The encoding type being used ("query" or "document")."""
        return self._encoding_type

    @property
    def language(self) -> str:
        """str: The language of the built-in encoder ("zh" or "en")."""
        return self._language

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for DashText encoder customization."""
        return self._extra_params

    def __call__(self, input: TEXT) -> SparseVectorType:
        """Make the embedding function callable.

        Args:
            input (TEXT): Input text to embed.

        Returns:
            SparseVectorType: Sparse vector as dictionary.
        """
        return self.embed(input)

    @lru_cache(maxsize=10)
    def embed(self, input: TEXT) -> SparseVectorType:
        """Generate BM25 sparse embedding for the input text.

        This method computes BM25 scores for the input text using DashText's
        SparseVectorEncoder. The encoding behavior depends on the encoding_type:

        - ``encoding_type="query"``: Uses ``encode_queries()`` for search queries
        - ``encoding_type="document"``: Uses ``encode_documents()`` for documents

        The result is a sparse vector where keys are term indices in the
        vocabulary and values are BM25 scores.

        Args:
            input (TEXT): Input text string to embed. Must be non-empty after
                stripping whitespace.

        Returns:
            SparseVectorType: A dictionary mapping vocabulary term index to BM25 score.
                Only non-zero scores are included. The dictionary is sorted by indices
                (keys) in ascending order for consistent output.
                Example: ``{1169440797: 0.29, 2045788977: 0.70, ...}``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty or whitespace-only.
            RuntimeError: If BM25 encoding fails.

        Examples:
            >>> bm25 = BM25EmbeddingFunction(language="zh", encoding_type="query")
            >>> sparse_vec = bm25.embed("query text")
            >>> isinstance(sparse_vec, dict)
            True
            >>> all(isinstance(k, int) and isinstance(v, float) for k, v in sparse_vec.items())
            True

            >>> # Verify sorted output
            >>> keys = list(sparse_vec.keys())
            >>> keys == sorted(keys)
            True

            >>> # Error: empty input
            >>> bm25.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> bm25.embed(123)
            TypeError: Expected 'input' to be str, got int

        Note:
            - BM25 scores are relative to the vocabulary statistics
            - Output dictionary is always sorted by indices for consistency
            - Terms not in the vocabulary will have zero scores (not included)
            - This method is cached (maxsize=10) for performance
            - DashText automatically handles Chinese/English text segmentation
        """
        if not isinstance(input, str):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        try:
            # Encode based on encoding_type
            if self._encoding_type == "query":
                sparse_vector = self._encoder.encode_queries(input)
            else:  # encoding_type == "document"
                sparse_vector = self._encoder.encode_documents(input)

            # DashText returns dict with int/long keys and float values
            # Convert to standard format: {int: float}
            sparse_dict: dict[int, float] = {}
            for key, value in sparse_vector.items():
                try:
                    idx = int(key)
                    val = float(value)
                    if val > 0:
                        sparse_dict[idx] = val
                except (ValueError, TypeError):
                    # Skip invalid entries
                    continue

            # Sort by indices (keys) to ensure consistent ordering
            return dict(sorted(sparse_dict.items()))

        except Exception as e:
            if isinstance(e, (TypeError, ValueError)):
                raise
            raise RuntimeError(f"Failed to generate BM25 embedding: {e!s}") from e


================================================
FILE: python/zvec/extension/embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from abc import abstractmethod
from typing import Protocol, runtime_checkable

from ..common.constants import MD, DenseVectorType, SparseVectorType


@runtime_checkable
class DenseEmbeddingFunction(Protocol[MD]):
    """Protocol for dense vector embedding functions.

    Dense embedding functions map multimodal input (text, image, or audio) to
    fixed-length real-valued vectors. This is a Protocol class that defines
    the interface - implementations should provide their own initialization
    and properties.

    Type Parameters:
        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).

    Note:
        - This is a Protocol class - it only defines the ``embed()`` interface.
        - Implementations are free to define their own ``__init__``, properties,
          and additional methods as needed.
        - The ``embed()`` method is the only required interface.

    Examples:
        >>> # Custom text embedding implementation
        >>> class MyTextEmbedding:
        ...     def __init__(self, dimension: int, model_name: str):
        ...         self.dimension = dimension
        ...         self.model = load_model(model_name)
        ...
        ...     def embed(self, input: str) -> list[float]:
        ...         return self.model.encode(input).tolist()

        >>> # Custom image embedding implementation
        >>> class MyImageEmbedding:
        ...     def __init__(self, dimension: int = 512):
        ...         self.dimension = dimension
        ...         self.model = load_image_model()
        ...
        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> list[float]:
        ...         if isinstance(input, str):
        ...             image = load_image_from_path(input)
        ...         else:
        ...             image = input
        ...         return self.model.extract_features(image).tolist()

        >>> # Using built-in implementations
        >>> from zvec.extension import QwenDenseEmbedding
        >>> text_emb = QwenDenseEmbedding(dimension=768, api_key="sk-xxx")
        >>> vector = text_emb.embed("Hello world")
    """

    @abstractmethod
    def embed(self, input: MD) -> DenseVectorType:
        """Generate a dense embedding vector for the input data.

        Args:
            input (MD): Multimodal input data to embed. Can be:
                - TEXT (str): Text string
                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array
                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array

        Returns:
            DenseVectorType: A dense vector representing the embedding.
                Can be list[float], list[int], or np.ndarray.
                Length should match the implementation's dimension.
        """
        ...


@runtime_checkable
class SparseEmbeddingFunction(Protocol[MD]):
    """Abstract base class for sparse vector embedding functions.

    Sparse embedding functions map multimodal input (text, image, or audio) to
    a dictionary of {index: weight}, where only non-zero dimensions are stored.
    You can inherit this class to create custom sparse embedding functions.

    Type Parameters:
        MD: The type of input data (bound to Embeddable: TEXT, IMAGE, or AUDIO).

    Note:
        Subclasses must implement the ``embed()`` method.

    Examples:
        >>> # Using built-in text sparse embedding (e.g., BM25, TF-IDF)
        >>> sparse_emb = SomeSparseEmbedding()
        >>> vector = sparse_emb.embed("Hello world")
        >>> # Returns: {0: 0.5, 42: 1.2, 100: 0.8}

        >>> # Custom BM25 sparse embedding function
        >>> class MyBM25Embedding(SparseEmbeddingFunction):
        ...     def __init__(self, vocab_size: int = 10000):
        ...         self.vocab_size = vocab_size
        ...         self.tokenizer = MyTokenizer()
        ...
        ...     def embed(self, input: str) -> dict[int, float]:
        ...         tokens = self.tokenizer.tokenize(input)
        ...         sparse_vector = {}
        ...         for token_id, weight in self._calculate_bm25(tokens):
        ...             if weight > 0:
        ...                 sparse_vector[token_id] = weight
        ...         return sparse_vector
        ...
        ...     def _calculate_bm25(self, tokens):
        ...         # BM25 calculation logic
        ...         pass

        >>> # Custom sparse image feature extractor
        >>> class MySparseImageEmbedding(SparseEmbeddingFunction):
        ...     def embed(self, input: Union[str, bytes, np.ndarray]) -> dict[int, float]:
        ...         image = self._load_image(input)
        ...         features = self._extract_sparse_features(image)
        ...         return {idx: val for idx, val in enumerate(features) if val != 0}
    """

    @abstractmethod
    def embed(self, input: MD) -> SparseVectorType:
        """Generate a sparse embedding for the input data.

        Args:
            input (MD): Multimodal input data to embed. Can be:
                - TEXT (str): Text string
                - IMAGE (str | bytes | np.ndarray): Image file path, raw bytes, or array
                - AUDIO (str | bytes | np.ndarray): Audio file path, raw bytes, or array

        Returns:
            SparseVectorType: Mapping from dimension index to non-zero weight.
                Only dimensions with non-zero values are included.
        """
        ...


================================================
FILE: python/zvec/extension/http_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import json
import os
import urllib.request
from functools import lru_cache
from typing import Optional

from ..common.constants import TEXT, DenseVectorType
from .embedding_function import DenseEmbeddingFunction


class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]):
    """Dense text embedding function using any OpenAI-compatible HTTP endpoint.

    This class calls any server that implements the ``/v1/embeddings`` API
    (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard
    library — no extra dependencies are required.

    The embedding dimension is detected automatically from the first server
    response.

    Args:
        base_url (str, optional): Base URL of the embedding server.
            Defaults to ``"http://localhost:1234"`` (LM Studio).
            Common values:

            - ``"http://localhost:1234"``  — LM Studio
            - ``"http://localhost:11434"`` — Ollama
        model (str, optional): Model identifier as expected by the server.
            Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``.
        api_key (Optional[str], optional): Bearer token for authenticated
            endpoints.  Falls back to the ``OPENAI_API_KEY`` environment
            variable.  Leave as ``None`` for local servers that do not
            require authentication.
        timeout (int, optional): HTTP request timeout in seconds.
            Defaults to 30.

    Attributes:
        dimension (int): Embedding vector dimensionality (auto-detected).

    Raises:
        TypeError: If ``embed()`` receives a non-string input.
        ValueError: If input is empty/whitespace-only or the server returns
            an unexpected response format.
        RuntimeError: If the HTTP request fails or the server is unreachable.

    Examples:
        >>> from zvec.extension import HTTPDenseEmbedding
        >>>
        >>> # LM Studio (default)
        >>> emb = HTTPDenseEmbedding()
        >>> vector = emb.embed("Hello, world!")
        >>> len(vector)
        768
        >>>
        >>> # Ollama
        >>> emb = HTTPDenseEmbedding(
        ...     base_url="http://localhost:11434",
        ...     model="nomic-embed-text",
        ... )
        >>> vector = emb.embed("Semantic search with local models")

    See Also:
        - ``DenseEmbeddingFunction``: Protocol for dense embeddings.
        - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API.
    """

    ENDPOINT = "/v1/embeddings"

    def __init__(
        self,
        base_url: str = "http://localhost:1234",
        model: str = "text-embedding-nomic-embed-text-v1.5@f16",
        api_key: Optional[str] = None,
        timeout: int = 30,
    ) -> None:
        self._base_url = base_url.rstrip("/")
        self._model = model
        self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
        self._timeout = timeout
        self._dimension: Optional[int] = None

    @property
    def dimension(self) -> int:
        """int: Embedding vector dimensionality (auto-detected on first call)."""
        if self._dimension is None:
            self._dimension = len(self.embed("dimension probe"))
        return self._dimension

    def __call__(self, input: TEXT) -> DenseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    @lru_cache(maxsize=256)
    def embed(self, input: TEXT) -> DenseVectorType:
        """Generate a dense embedding vector for the input text.

        Results are cached (LRU, up to 256 entries) so repeated strings
        do not trigger extra HTTP requests.

        Args:
            input (TEXT): Input text string to embed.  Must be non-empty
                after stripping whitespace.

        Returns:
            DenseVectorType: A list of floats representing the embedding.

        Raises:
            TypeError: If *input* is not a string.
            ValueError: If *input* is empty/whitespace-only or the server
                returns an unexpected response format.
            RuntimeError: If the HTTP request fails.
        """
        if not isinstance(input, TEXT):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        url = self._base_url + self.ENDPOINT
        payload = json.dumps({"model": self._model, "input": input}).encode()

        headers: dict[str, str] = {"Content-Type": "application/json"}
        if self._api_key:
            headers["Authorization"] = f"Bearer {self._api_key}"

        req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
        try:
            with urllib.request.urlopen(req, timeout=self._timeout) as resp:
                body = json.loads(resp.read())
        except urllib.error.HTTPError as exc:
            raise RuntimeError(
                f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
            ) from exc
        except OSError as exc:
            raise RuntimeError(
                f"Could not reach embedding server at {url}: {exc}"
            ) from exc

        try:
            vector: list[float] = body["data"][0]["embedding"]
        except (KeyError, IndexError) as exc:
            raise ValueError(
                f"Unexpected response format from embedding server: {body}"
            ) from exc

        return vector


================================================
FILE: python/zvec/extension/jina_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from functools import lru_cache
from typing import Optional

from ..common.constants import TEXT, DenseVectorType
from .embedding_function import DenseEmbeddingFunction
from .jina_function import JinaFunctionBase


class JinaDenseEmbedding(JinaFunctionBase, DenseEmbeddingFunction[TEXT]):
    """Dense text embedding function using Jina AI API.

    This class provides text-to-vector embedding capabilities using Jina AI's
    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements
    dense text embedding via the Jina Embeddings API (OpenAI-compatible).

    Jina Embeddings v5 models support task-specific embedding through the
    ``task`` parameter, which optimizes the embedding for different use cases
    such as retrieval, text matching, or classification. They also support
    Matryoshka Representation Learning, allowing flexible output dimensions.

    Args:
        model (str, optional): Jina embedding model identifier.
            Defaults to ``"jina-embeddings-v5-text-nano"``. Available models:
            - ``"jina-embeddings-v5-text-nano"``: 768 dims, 239M params, 8K context
            - ``"jina-embeddings-v5-text-small"``: 1024 dims, 677M params, 32K context
        dimension (Optional[int], optional): Desired output embedding dimension.
            If ``None``, uses model's default dimension. Supports Matryoshka
            dimensions: 32, 64, 128, 256, 512, 768 (nano) / 1024 (small).
            Defaults to ``None``.
        api_key (Optional[str], optional): Jina API authentication key.
            If ``None``, reads from ``JINA_API_KEY`` environment variable.
            Obtain your key from: https://jina.ai/api-dashboard
        task (Optional[str], optional): Task type to optimize embeddings for.
            Defaults to ``None``. Valid values:
            - ``"retrieval.query"``: For search queries
            - ``"retrieval.passage"``: For documents/passages to be searched
            - ``"text-matching"``: For symmetric text similarity
            - ``"classification"``: For text classification
            - ``"separation"``: For clustering/separation tasks

    Attributes:
        dimension (int): The embedding vector dimension.
        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
        model (str): The Jina model name being used.
        task (Optional[str]): The task type for embedding optimization.

    Raises:
        ValueError: If API key is not provided and not found in environment,
            if task is not a valid task type, or if API returns an error response.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If network error or Jina service error occurs.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``openai`` package: ``pip install openai``
        - Jina API is OpenAI-compatible, so it uses the ``openai`` Python client
        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
        - For retrieval tasks, use ``"retrieval.query"`` for queries and
          ``"retrieval.passage"`` for documents
        - API usage requires a Jina API key from https://jina.ai/api-dashboard

    Examples:
        >>> # Basic usage with default model
        >>> from zvec.extension import JinaDenseEmbedding
        >>> import os
        >>> os.environ["JINA_API_KEY"] = "jina_..."
        >>>
        >>> emb_func = JinaDenseEmbedding()
        >>> vector = emb_func.embed("Hello, world!")
        >>> len(vector)
        768

        >>> # Retrieval use case: embed queries and documents differently
        >>> query_emb = JinaDenseEmbedding(task="retrieval.query")
        >>> doc_emb = JinaDenseEmbedding(task="retrieval.passage")
        >>>
        >>> query_vector = query_emb.embed("What is machine learning?")
        >>> doc_vector = doc_emb.embed("Machine learning is a subset of AI...")

        >>> # Using larger model with custom dimension (Matryoshka)
        >>> emb_func = JinaDenseEmbedding(
        ...     model="jina-embeddings-v5-text-small",
        ...     dimension=256,
        ...     api_key="jina_...",
        ...     task="text-matching",
        ... )
        >>> vector = emb_func.embed("Semantic similarity comparison")
        >>> len(vector)
        256

        >>> # Using with zvec collection
        >>> import zvec
        >>> emb_func = JinaDenseEmbedding(task="retrieval.passage")
        >>> schema = zvec.CollectionSchema(
        ...     name="docs",
        ...     vectors=zvec.VectorSchema(
        ...         "embedding", zvec.DataType.VECTOR_FP32, emb_func.dimension
        ...     ),
        ... )
        >>> collection = zvec.create_and_open(path="./my_docs", schema=schema)

    See Also:
        - ``DenseEmbeddingFunction``: Base class for dense embeddings
        - ``OpenAIDenseEmbedding``: Alternative using OpenAI API
        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API
        - ``DefaultLocalDenseEmbedding``: Local model without API calls
    """

    def __init__(
        self,
        model: str = "jina-embeddings-v5-text-nano",
        dimension: Optional[int] = None,
        api_key: Optional[str] = None,
        task: Optional[str] = None,
        **kwargs,
    ):
        """Initialize the Jina dense embedding function.

        Args:
            model (str): Jina model name. Defaults to "jina-embeddings-v5-text-nano".
            dimension (Optional[int]): Target embedding dimension or None for default.
            api_key (Optional[str]): API key or None to use environment variable.
            task (Optional[str]): Task type for embedding optimization or None.
            **kwargs: Additional parameters for API calls.

        Raises:
            ValueError: If API key is not provided and not in environment,
                or if task is not a valid task type.
        """
        # Initialize base class for API connection
        JinaFunctionBase.__init__(self, model=model, api_key=api_key, task=task)

        # Store dimension configuration
        self._custom_dimension = dimension

        # Determine actual dimension
        if dimension is None:
            self._dimension = self._MODEL_DIMENSIONS.get(model, 768)
        else:
            self._dimension = dimension

        # Store extra attributes
        self._extra_params = kwargs

    @property
    def dimension(self) -> int:
        """int: The expected dimensionality of the embedding vector."""
        return self._dimension

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: TEXT) -> DenseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    @lru_cache(maxsize=10)
    def embed(self, input: TEXT) -> DenseVectorType:
        """Generate dense embedding vector for the input text.

        This method calls the Jina Embeddings API to convert input text
        into a dense vector representation. Results are cached to improve
        performance for repeated inputs.

        Args:
            input (TEXT): Input text string to embed. Must be non-empty after
                stripping whitespace. Maximum length depends on model:
                8192 tokens for v5-nano, 32768 tokens for v5-small.

        Returns:
            DenseVectorType: A list of floats representing the embedding vector.
                Length equals ``self.dimension``. Example:
                ``[0.123, -0.456, 0.789, ...]``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty/whitespace-only, or if the API returns
                an error or malformed response.
            RuntimeError: If network connectivity issues or Jina service
                errors occur.

        Examples:
            >>> emb = JinaDenseEmbedding(task="retrieval.query")
            >>> vector = emb.embed("What is deep learning?")
            >>> len(vector)
            768
            >>> isinstance(vector[0], float)
            True

            >>> # Error: empty input
            >>> emb.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> emb.embed(123)
            TypeError: Expected 'input' to be str, got int

        Note:
            - This method is cached (maxsize=10). Identical inputs return cached results.
            - The cache is based on exact string match (case-sensitive).
            - Task type affects embedding optimization but not caching behavior.
        """
        if not isinstance(input, TEXT):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        # Call API
        embedding_vector = self._call_text_embedding_api(
            input=input,
            dimension=self._custom_dimension,
        )

        # Verify dimension
        if len(embedding_vector) != self.dimension:
            raise ValueError(
                f"Dimension mismatch: expected {self.dimension}, "
                f"got {len(embedding_vector)}"
            )

        return embedding_vector


================================================
FILE: python/zvec/extension/jina_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from typing import ClassVar, Optional

from ..common.constants import TEXT
from ..tool import require_module


class JinaFunctionBase:
    """Base class for Jina AI functions.

    This base class provides common functionality for calling Jina AI APIs
    and handling responses. It supports embeddings (dense) operations via
    the OpenAI-compatible Jina Embeddings API.

    This class is not meant to be used directly. Use concrete implementations:
    - ``JinaDenseEmbedding`` for dense embeddings

    Args:
        model (str): Jina embedding model identifier.
        api_key (Optional[str]): Jina API authentication key.
        task (Optional[str]): Task type for the embedding model.

    Note:
        - This is an internal base class for code reuse across Jina features
        - Subclasses should inherit from appropriate Protocol
        - Provides unified API connection and response handling
        - Jina API is OpenAI-compatible, using the ``openai`` Python client
    """

    _BASE_URL: ClassVar[str] = "https://api.jina.ai/v1"

    # Model default dimensions
    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {
        "jina-embeddings-v5-text-nano": 768,
        "jina-embeddings-v5-text-small": 1024,
    }

    # Model max tokens
    _MODEL_MAX_TOKENS: ClassVar[dict[str, int]] = {
        "jina-embeddings-v5-text-nano": 8192,
        "jina-embeddings-v5-text-small": 32768,
    }

    # Valid task types
    _VALID_TASKS: ClassVar[tuple[str, ...]] = (
        "retrieval.query",
        "retrieval.passage",
        "text-matching",
        "classification",
        "separation",
    )

    def __init__(
        self,
        model: str,
        api_key: Optional[str] = None,
        task: Optional[str] = None,
    ):
        """Initialize the base Jina functionality.

        Args:
            model (str): Jina model name.
            api_key (Optional[str]): API key or None to use environment variable.
            task (Optional[str]): Task type for the embedding model.
                Valid values: "retrieval.query", "retrieval.passage",
                "text-matching", "classification", "separation".

        Raises:
            ValueError: If API key is not provided and not in environment,
                or if task is not a valid task type.
        """
        self._model = model
        self._api_key = api_key or os.environ.get("JINA_API_KEY")
        self._task = task

        if not self._api_key:
            raise ValueError(
                "Jina API key is required. Please provide 'api_key' parameter "
                "or set the 'JINA_API_KEY' environment variable. "
                "Get your key from: https://jina.ai/api-dashboard"
            )

        if task is not None and task not in self._VALID_TASKS:
            raise ValueError(
                f"Invalid task '{task}'. Valid tasks: {', '.join(self._VALID_TASKS)}"
            )

    @property
    def model(self) -> str:
        """str: The Jina model name currently in use."""
        return self._model

    @property
    def task(self) -> Optional[str]:
        """Optional[str]: The task type for the embedding model."""
        return self._task

    def _get_client(self):
        """Get OpenAI-compatible client instance configured for Jina API.

        Returns:
            OpenAI: Configured OpenAI client pointing to Jina API.

        Raises:
            ImportError: If openai package is not installed.
        """
        openai = require_module("openai")
        return openai.OpenAI(api_key=self._api_key, base_url=self._BASE_URL)

    def _call_text_embedding_api(
        self,
        input: TEXT,
        dimension: Optional[int] = None,
    ) -> list:
        """Call Jina Embeddings API.

        Args:
            input (TEXT): Input text to embed.
            dimension (Optional[int]): Target dimension for Matryoshka embeddings.

        Returns:
            list: Embedding vector as list of floats.

        Raises:
            RuntimeError: If API call fails.
            ValueError: If API returns error response.
        """
        try:
            client = self._get_client()

            # Prepare embedding parameters
            params = {"model": self.model, "input": input}

            # Add dimension parameter for Matryoshka support
            if dimension is not None:
                params["dimensions"] = dimension

            # Add task parameter via extra_body
            if self._task is not None:
                params["extra_body"] = {"task": self._task}

            # Call Jina API (OpenAI-compatible)
            response = client.embeddings.create(**params)

        except Exception as e:
            # Check if it's an OpenAI API error
            openai = require_module("openai")
            if isinstance(e, (openai.APIError, openai.APIConnectionError)):
                raise RuntimeError(f"Failed to call Jina API: {e!s}") from e
            raise RuntimeError(f"Unexpected error during API call: {e!s}") from e

        # Extract embedding from response
        try:
            if not response.data:
                raise ValueError("Invalid API response: no embedding data returned")

            embedding_vector = response.data[0].embedding

            if not isinstance(embedding_vector, list):
                raise ValueError(
                    "Invalid API response: embedding is not a list of numbers"
                )

            return embedding_vector

        except (AttributeError, IndexError, TypeError) as e:
            raise ValueError(f"Failed to parse API response: {e!s}") from e


================================================
FILE: python/zvec/extension/multi_vector_reranker.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import heapq
import math
from collections import defaultdict
from typing import Optional

from ..model.doc import Doc
from ..typing import MetricType
from .rerank_function import RerankFunction


class RrfReRanker(RerankFunction):
    """Re-ranker using Reciprocal Rank Fusion (RRF) for multi-vector search.

    RRF combines results from multiple vector queries without requiring relevance scores.
    It assigns higher weight to documents that appear early in multiple result lists.

    The RRF score for a document at rank ``r`` is: ``1 / (k + r + 1)``,
    where ``k`` is the rank constant.

    Note:
        This re-ranker is specifically designed for multi-vector scenarios where
        query results from multiple vector fields need to be combined.

    Args:
        topn (int, optional): Number of top documents to return. Defaults to 10.
        rerank_field (Optional[str], optional): Ignored by RRF. Defaults to None.
        rank_constant (int, optional): Smoothing constant ``k`` in RRF formula.
            Larger values reduce the impact of early ranks. Defaults to 60.
    """

    def __init__(
        self,
        topn: int = 10,
        rerank_field: Optional[str] = None,
        rank_constant: int = 60,
    ):
        super().__init__(topn=topn, rerank_field=rerank_field)
        self._rank_constant = rank_constant

    @property
    def rank_constant(self) -> int:
        return self._rank_constant

    def _rrf_score(self, rank: int) -> float:
        return 1.0 / (self._rank_constant + rank + 1)

    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
        """Apply Reciprocal Rank Fusion to combine multiple query results.

        Args:
            query_results (dict[str, list[Doc]]): Results from one or more vector queries.

        Returns:
            list[Doc]: Re-ranked documents with RRF scores in the ``score`` field.
        """
        rrf_scores: dict[str, float] = defaultdict(float)
        id_to_doc: dict[str, Doc] = {}

        for _, query_result in query_results.items():
            for rank, doc in enumerate(query_result):
                doc_id = doc.id
                rrf_score = self._rrf_score(rank)
                rrf_scores[doc_id] += rrf_score
                if doc_id not in id_to_doc:
                    id_to_doc[doc_id] = doc

        top_docs = heapq.nlargest(self.topn, rrf_scores.items(), key=lambda x: x[1])
        results: list[Doc] = []
        for doc_id, rrf_score in top_docs:
            doc = id_to_doc[doc_id]
            new_doc = doc._replace(score=rrf_score)
            results.append(new_doc)
        return results


class WeightedReRanker(RerankFunction):
    """Re-ranker that combines scores from multiple vector fields using weights.

    Each vector field's relevance score is normalized based on its metric type,
    then scaled by a user-provided weight. Final scores are summed across fields.

    Note:
        This re-ranker is specifically designed for multi-vector scenarios where
        query results from multiple vector fields need to be combined with
        configurable weights.

    Args:
        topn (int, optional): Number of top documents to return. Defaults to 10.
        rerank_field (Optional[str], optional): Ignored. Defaults to None.
        metric (MetricType, optional): Distance metric used for score normalization.
            Defaults to ``MetricType.L2``.
        weights (Optional[dict[str, float]], optional): Weight per vector field.
            Fields not listed use weight 1.0. Defaults to None.

    Note:
        Supported metrics: L2, IP, COSINE. Scores are normalized to [0, 1].
    """

    def __init__(
        self,
        topn: int = 10,
        rerank_field: Optional[str] = None,
        metric: MetricType = MetricType.L2,
        weights: Optional[dict[str, float]] = None,
    ):
        super().__init__(topn=topn, rerank_field=rerank_field)
        self._weights = weights or {}
        self._metric = metric

    @property
    def weights(self) -> dict[str, float]:
        """dict[str, float]: Weight mapping for vector fields."""
        return self._weights

    @property
    def metric(self) -> MetricType:
        """MetricType: Distance metric used for score normalization."""
        return self._metric

    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
        """Combine scores from multiple vector fields using weighted sum.

        Args:
            query_results (dict[str, list[Doc]]): Results per vector field.

        Returns:
            list[Doc]: Re-ranked documents with combined scores in ``score`` field.
        """
        weighted_scores: dict[str, float] = defaultdict(float)
        id_to_doc: dict[str, Doc] = {}

        for vector_name, query_result in query_results.items():
            for _, doc in enumerate(query_result):
                doc_id = doc.id
                weighted_score = self._normalize_score(
                    doc.score, self.metric
                ) * self.weights.get(vector_name, 1.0)
                weighted_scores[doc_id] += weighted_score
                if doc_id not in id_to_doc:
                    id_to_doc[doc_id] = doc

        top_docs = heapq.nlargest(
            self.topn, weighted_scores.items(), key=lambda x: x[1]
        )
        results: list[Doc] = []
        for doc_id, weighted_score in top_docs:
            doc = id_to_doc[doc_id]
            new_doc = doc._replace(score=weighted_score)
            results.append(new_doc)
        return results

    def _normalize_score(self, score: float, metric: MetricType) -> float:
        if metric == MetricType.L2:
            return 1.0 - 2 * math.atan(score) / math.pi
        if metric == MetricType.IP:
            return 0.5 + math.atan(score) / math.pi
        if metric == MetricType.COSINE:
            return 1.0 - score / 2.0
        raise ValueError("Unsupported metric type")


================================================
FILE: python/zvec/extension/openai_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from functools import lru_cache
from typing import Optional

from ..common.constants import TEXT, DenseVectorType
from .embedding_function import DenseEmbeddingFunction
from .openai_function import OpenAIFunctionBase


class OpenAIDenseEmbedding(OpenAIFunctionBase, DenseEmbeddingFunction[TEXT]):
    """Dense text embedding function using OpenAI API.

    This class provides text-to-vector embedding capabilities using OpenAI's
    embedding models. It inherits from ``DenseEmbeddingFunction`` and implements
    dense text embedding via the OpenAI API.

    The implementation supports various OpenAI embedding models with different
    dimensions and includes automatic result caching for improved performance.

    Args:
        model (str, optional): OpenAI embedding model identifier.
            Defaults to ``"text-embedding-3-small"``. Common options:
            - ``"text-embedding-3-small"``: 1536 dims, cost-efficient, good performance
            - ``"text-embedding-3-large"``: 3072 dims, highest quality
            - ``"text-embedding-ada-002"``: 1536 dims, legacy model
        dimension (Optional[int], optional): Desired output embedding dimension.
            If ``None``, uses model's default dimension. For text-embedding-3 models,
            you can specify custom dimensions (e.g., 256, 512, 1024, 1536).
            Defaults to ``None``.
        api_key (Optional[str], optional): OpenAI API authentication key.
            If ``None``, reads from ``OPENAI_API_KEY`` environment variable.
            Obtain your key from: https://platform.openai.com/api-keys
        base_url (Optional[str], optional): Custom API base URL for OpenAI-compatible
            services. Defaults to ``None`` (uses official OpenAI endpoint).

    Attributes:
        dimension (int): The embedding vector dimension.
        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
        model (str): The OpenAI model name being used.

    Raises:
        ValueError: If API key is not provided and not found in environment,
            or if API returns an error response.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If network error or OpenAI service error occurs.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``openai`` package: ``pip install openai``
        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
        - Network connectivity to OpenAI API endpoints is required
        - API usage incurs costs based on your OpenAI subscription plan
        - Rate limits apply based on your OpenAI account tier

    Examples:
        >>> # Basic usage with default model
        >>> from zvec.extension import OpenAIDenseEmbedding
        >>> import os
        >>> os.environ["OPENAI_API_KEY"] = "sk-..."
        >>>
        >>> emb_func = OpenAIDenseEmbedding()
        >>> vector = emb_func.embed("Hello, world!")
        >>> len(vector)
        1536

        >>> # Using specific model with custom dimension
        >>> emb_func = OpenAIDenseEmbedding(
        ...     model="text-embedding-3-large",
        ...     dimension=1024,
        ...     api_key="sk-..."
        ... )
        >>> vector = emb_func.embed("Machine learning is fascinating")
        >>> len(vector)
        1024

        >>> # Using with custom base URL (e.g., Azure OpenAI)
        >>> emb_func = OpenAIDenseEmbedding(
        ...     model="text-embedding-ada-002",
        ...     api_key="your-azure-key",
        ...     base_url="https://your-resource.openai.azure.com/"
        ... )
        >>> vector = emb_func("Natural language processing")
        >>> isinstance(vector, list)
        True

        >>> # Batch processing with caching benefit
        >>> texts = ["First text", "Second text", "First text"]
        >>> vectors = [emb_func.embed(text) for text in texts]
        >>> # Third call uses cached result for "First text"

        >>> # Error handling
        >>> try:
        ...     emb_func.embed("")  # Empty string
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

    See Also:
        - ``DenseEmbeddingFunction``: Base class for dense embeddings
        - ``QwenDenseEmbedding``: Alternative using Qwen/DashScope API
        - ``DefaultDenseEmbedding``: Local model without API calls
        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
    """

    def __init__(
        self,
        model: str = "text-embedding-3-small",
        dimension: Optional[int] = None,
        api_key: Optional[str] = None,
        base_url: Optional[str] = None,
        **kwargs,
    ):
        """Initialize the OpenAI dense embedding function.

        Args:
            model (str): OpenAI model name. Defaults to "text-embedding-3-small".
            dimension (Optional[int]): Target embedding dimension or None for default.
            api_key (Optional[str]): API key or None to use environment variable.
            base_url (Optional[str]): Custom API base URL or None for default.
            **kwargs: Additional parameters for API calls. Examples:
                - ``encoding_format`` (str): Format of embeddings, "float" or "base64".
                - ``user`` (str): User identifier for tracking.

        Raises:
            ValueError: If API key is not provided and not in environment.
        """
        # Initialize base class for API connection
        OpenAIFunctionBase.__init__(
            self, model=model, api_key=api_key, base_url=base_url
        )

        # Store dimension configuration
        self._custom_dimension = dimension

        # Determine actual dimension
        if dimension is None:
            # Use model default dimension
            self._dimension = self._MODEL_DIMENSIONS.get(model, 1536)
        else:
            self._dimension = dimension

        # Store dense-specific attributes
        self._extra_params = kwargs

    @property
    def dimension(self) -> int:
        """int: The expected dimensionality of the embedding vector."""
        return self._dimension

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: TEXT) -> DenseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    @lru_cache(maxsize=10)
    def embed(self, input: TEXT) -> DenseVectorType:
        """Generate dense embedding vector for the input text.

        This method calls the OpenAI Embeddings API to convert input text
        into a dense vector representation. Results are cached to improve
        performance for repeated inputs.

        Args:
            input (TEXT): Input text string to embed. Must be non-empty after
                stripping whitespace. Maximum length is 8191 tokens for most models.

        Returns:
            DenseVectorType: A list of floats representing the embedding vector.
                Length equals ``self.dimension``. Example:
                ``[0.123, -0.456, 0.789, ...]``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty/whitespace-only, or if the API returns
                an error or malformed response.
            RuntimeError: If network connectivity issues or OpenAI service
                errors occur.

        Examples:
            >>> emb = OpenAIDenseEmbedding()
            >>> vector = emb.embed("Natural language processing")
            >>> len(vector)
            1536
            >>> isinstance(vector[0], float)
            True

            >>> # Error: empty input
            >>> emb.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> emb.embed(123)
            TypeError: Expected 'input' to be str, got int

        Note:
            - This method is cached (maxsize=10). Identical inputs return cached results.
            - The cache is based on exact string match (case-sensitive).
            - Consider pre-processing text (lowercasing, normalization) for better caching.
        """
        if not isinstance(input, TEXT):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        # Call API
        embedding_vector = self._call_text_embedding_api(
            input=input,
            dimension=self._custom_dimension,
        )

        # Verify dimension
        if len(embedding_vector) != self.dimension:
            raise ValueError(
                f"Dimension mismatch: expected {self.dimension}, "
                f"got {len(embedding_vector)}"
            )

        return embedding_vector


================================================
FILE: python/zvec/extension/openai_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from typing import ClassVar, Optional

from ..common.constants import TEXT
from ..tool import require_module


class OpenAIFunctionBase:
    """Base class for OpenAI functions.

    This base class provides common functionality for calling OpenAI APIs
    and handling responses. It supports embeddings (dense) operations.

    This class is not meant to be used directly. Use concrete implementations:
    - ``OpenAIDenseEmbedding`` for dense embeddings

    Args:
        model (str): OpenAI model identifier.
        api_key (Optional[str]): OpenAI API authentication key.
        base_url (Optional[str]): Custom API base URL.

    Note:
        - This is an internal base class for code reuse across OpenAI features
        - Subclasses should inherit from appropriate Protocol
        - Provides unified API connection and response handling
    """

    # Model default dimensions
    _MODEL_DIMENSIONS: ClassVar[dict[str, int]] = {
        "text-embedding-3-small": 1536,
        "text-embedding-3-large": 3072,
        "text-embedding-ada-002": 1536,
    }

    def __init__(
        self,
        model: str,
        api_key: Optional[str] = None,
        base_url: Optional[str] = None,
    ):
        """Initialize the base OpenAI functionality.

        Args:
            model (str): OpenAI model name.
            api_key (Optional[str]): API key or None to use environment variable.
            base_url (Optional[str]): Custom API base URL or None for default.

        Raises:
            ValueError: If API key is not provided and not in environment.
        """
        self._model = model
        self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
        self._base_url = base_url

        if not self._api_key:
            raise ValueError(
                "OpenAI API key is required. Please provide 'api_key' parameter "
                "or set the 'OPENAI_API_KEY' environment variable."
            )

    @property
    def model(self) -> str:
        """str: The OpenAI model name currently in use."""
        return self._model

    def _get_client(self):
        """Get OpenAI client instance.

        Returns:
            OpenAI: Configured OpenAI client.

        Raises:
            ImportError: If openai package is not installed.
        """
        openai = require_module("openai")

        if self._base_url:
            return openai.OpenAI(api_key=self._api_key, base_url=self._base_url)
        return openai.OpenAI(api_key=self._api_key)

    def _call_text_embedding_api(
        self,
        input: TEXT,
        dimension: Optional[int] = None,
    ) -> list:
        """Call OpenAI Embeddings API.

        Args:
            input (TEXT): Input text to embed.
            dimension (Optional[int]): Target dimension (for models that support it).

        Returns:
            list: Embedding vector as list of floats.

        Raises:
            RuntimeError: If API call fails.
            ValueError: If API returns error response.
        """
        try:
            client = self._get_client()

            # Prepare embedding parameters
            params = {"model": self.model, "input": input}

            # Add dimension parameter for models that support it
            if dimension is not None:
                params["dimensions"] = dimension

            # Call OpenAI API
            response = client.embeddings.create(**params)

        except Exception as e:
            # Check if it's an OpenAI API error
            openai = require_module("openai")
            if isinstance(e, (openai.APIError, openai.APIConnectionError)):
                raise RuntimeError(f"Failed to call OpenAI API: {e!s}") from e
            raise RuntimeError(f"Unexpected error during API call: {e!s}") from e

        # Extract embedding from response
        try:
            if not response.data:
                raise ValueError("Invalid API response: no embedding data returned")

            embedding_vector = response.data[0].embedding

            if not isinstance(embedding_vector, list):
                raise ValueError(
                    "Invalid API response: embedding is not a list of numbers"
                )

            return embedding_vector

        except (AttributeError, IndexError, TypeError) as e:
            raise ValueError(f"Failed to parse API response: {e!s}") from e


================================================
FILE: python/zvec/extension/qwen_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from functools import lru_cache
from typing import Optional

from ..common.constants import TEXT, DenseVectorType, SparseVectorType
from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
from .qwen_function import QwenFunctionBase


class QwenDenseEmbedding(QwenFunctionBase, DenseEmbeddingFunction[TEXT]):
    """Dense text embedding function using Qwen (DashScope) API.

    This class provides text-to-vector embedding capabilities using Alibaba Cloud's
    DashScope service and Qwen embedding models. It inherits from
    ``DenseEmbeddingFunction`` and implements dense text embedding.

    The implementation supports various Qwen embedding models with configurable
    dimensions and includes automatic result caching for improved performance.

    Args:
        dimension (int): Desired output embedding dimension. Common values:
            - 512: Balanced performance and accuracy
            - 1024: Higher accuracy, larger storage
            - 1536: Maximum accuracy for supported models
        model (str, optional): DashScope embedding model identifier.
            Defaults to ``"text-embedding-v4"``. Other options include:
            - ``"text-embedding-v3"``
            - ``"text-embedding-v2"``
            - ``"text-embedding-v1"``
        api_key (Optional[str], optional): DashScope API authentication key.
            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.
            Obtain your key from: https://dashscope.console.aliyun.com/
        **kwargs: Additional DashScope API parameters. Supported options:
            - ``text_type`` (str): Specifies the text role in retrieval tasks.
              Options: ``"query"`` (search query) or ``"document"`` (indexed content).
              This parameter optimizes embeddings for asymmetric search scenarios.

            Reference: https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api

    Attributes:
        dimension (int): The embedding vector dimension.
        data_type (DataType): Always ``DataType.VECTOR_FP32`` for this implementation.
        model (str): The DashScope model name being used.

    Raises:
        ValueError: If API key is not provided and not found in environment,
            or if API returns an error response.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If network error or DashScope service error occurs.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``dashscope`` package: ``pip install dashscope``
        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
        - Network connectivity to DashScope API endpoints is required
        - API usage may incur costs based on your DashScope subscription plan

        **Parameter Guidelines:**

        - Use ``text_type="query"`` for search queries and ``text_type="document"``
          for indexed content to optimize asymmetric retrieval tasks.
        - For detailed API specifications and parameter usage, refer to:
          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api

    Examples:
        >>> # Basic usage with default model
        >>> from zvec.extension import QwenDenseEmbedding
        >>> import os
        >>> os.environ["DASHSCOPE_API_KEY"] = "your-api-key"
        >>>
        >>> emb_func = QwenDenseEmbedding(dimension=1024)
        >>> vector = emb_func.embed("Hello, world!")
        >>> len(vector)
        1024

        >>> # Using specific model with explicit API key
        >>> emb_func = QwenDenseEmbedding(
        ...     dimension=512,
        ...     model="text-embedding-v3",
        ...     api_key="sk-xxxxx"
        ... )
        >>> vector = emb_func("Machine learning is fascinating")
        >>> isinstance(vector, list)
        True

        >>> # Using with custom parameters (text_type)
        >>> # For search queries - optimize for query-document matching
        >>> emb_func = QwenDenseEmbedding(
        ...     dimension=1024,
        ...     text_type="query"
        ... )
        >>> query_vector = emb_func.embed("What is machine learning?")
        >>>
        >>> # For document embeddings - optimize for being matched by queries
        >>> doc_emb_func = QwenDenseEmbedding(
        ...     dimension=1024,
        ...     text_type="document"
        ... )
        >>> doc_vector = doc_emb_func.embed(
        ...     "Machine learning is a subset of artificial intelligence..."
        ... )

        >>> # Batch processing with caching benefit
        >>> texts = ["First text", "Second text", "First text"]
        >>> vectors = [emb_func.embed(text) for text in texts]
        >>> # Third call uses cached result for "First text"

        >>> # Error handling
        >>> try:
        ...     emb_func.embed("")  # Empty string
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

    See Also:
        - ``DenseEmbeddingFunction``: Base class for dense embeddings
        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
    """

    def __init__(
        self,
        dimension: int,
        model: str = "text-embedding-v4",
        api_key: Optional[str] = None,
        **kwargs,
    ):
        """Initialize the Qwen dense embedding function.

        Args:
            dimension (int): Target embedding dimension.
            model (str): DashScope model name. Defaults to "text-embedding-v4".
            api_key (Optional[str]): API key or None to use environment variable.
            **kwargs: Additional DashScope API parameters. Supported options:
                - ``text_type`` (str): Text role in asymmetric retrieval.
                  * ``"query"``: Optimize for search queries (short, question-like).
                  * ``"document"``: Optimize for indexed documents (longer content).
                  Using appropriate text_type improves retrieval accuracy by
                  optimizing the embedding space for query-document matching.

                For detailed API documentation, see:
                https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api

        Raises:
            ValueError: If API key is not provided and not in environment.
        """
        # Initialize base class for API connection
        QwenFunctionBase.__init__(self, model=model, api_key=api_key)

        # Store dense-specific attributes
        self._dimension = dimension
        self._extra_params = kwargs

    @property
    def dimension(self) -> int:
        """int: The expected dimensionality of the embedding vector."""
        return self._dimension

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: TEXT) -> DenseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    @lru_cache(maxsize=10)
    def embed(self, input: TEXT) -> DenseVectorType:
        """Generate dense embedding vector for the input text.

        This method calls the DashScope TextEmbedding API to convert input text
        into a dense vector representation. Results are cached to improve
        performance for repeated inputs.

        Args:
            input (TEXT): Input text string to embed. Must be non-empty after
                stripping whitespace. Maximum length depends on the model used
                (typically 2048-8192 tokens).

        Returns:
            DenseVectorType: A list of floats representing the embedding vector.
                Length equals ``self.dimension``. Example:
                ``[0.123, -0.456, 0.789, ...]``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty/whitespace-only, or if the API returns
                an error or malformed response.
            RuntimeError: If network connectivity issues or DashScope service
                errors occur.

        Examples:
            >>> emb = QwenDenseEmbedding(dimension=1024)
            >>> vector = emb.embed("Natural language processing")
            >>> len(vector)
            1024
            >>> isinstance(vector[0], float)
            True

            >>> # Error: empty input
            >>> emb.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> emb.embed(123)
            TypeError: Expected 'input' to be str, got int

        Note:
            - This method is cached (maxsize=10). Identical inputs return cached results.
            - The cache is based on exact string match (case-sensitive).
            - Consider pre-processing text (lowercasing, normalization) for better caching.
        """
        if not isinstance(input, TEXT):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        # Call API with dense output type
        output = self._call_text_embedding_api(
            input=input,
            dimension=self.dimension,
            output_type="dense",
            text_type=self.extra_params.get("text_type"),
        )

        embeddings = output.get("embeddings")
        if not isinstance(embeddings, list):
            raise ValueError(
                "Invalid API response: 'embeddings' field is missing or not a list"
            )

        if len(embeddings) != 1:
            raise ValueError(
                f"Expected exactly 1 embedding in response, got {len(embeddings)}"
            )

        first_emb = embeddings[0]
        if not isinstance(first_emb, dict):
            raise ValueError("Invalid API response: embedding item is not a dictionary")

        embedding_vector = first_emb.get("embedding")
        if not isinstance(embedding_vector, list):
            raise ValueError(
                "Invalid API response: 'embedding' field is missing or not a list"
            )

        if len(embedding_vector) != self.dimension:
            raise ValueError(
                f"Dimension mismatch: expected {self.dimension}, "
                f"got {len(embedding_vector)}"
            )

        return list(embedding_vector)


class QwenSparseEmbedding(QwenFunctionBase, SparseEmbeddingFunction[TEXT]):
    """Sparse text embedding function using Qwen (DashScope) API.

    This class provides text-to-sparse-vector embedding capabilities using
    Alibaba Cloud's DashScope service and Qwen embedding models. It generates
    sparse keyword-weighted vectors suitable for lexical matching and BM25-style
    retrieval scenarios.

    Sparse embeddings are particularly useful for:
    - Keyword-based search and exact matching
    - Hybrid retrieval (combining with dense embeddings)
    - Interpretable search results (weights show term importance)

    Args:
        dimension (int): Desired output embedding dimension. Common values:
            - 512: Balanced performance and accuracy
            - 1024: Higher accuracy, larger storage
            - 1536: Maximum accuracy for supported models
        model (str, optional): DashScope embedding model identifier.
            Defaults to ``"text-embedding-v4"``. Other options include:
            - ``"text-embedding-v3"``
            - ``"text-embedding-v2"``
        api_key (Optional[str], optional): DashScope API authentication key.
            If ``None``, reads from ``DASHSCOPE_API_KEY`` environment variable.
            Obtain your key from: https://dashscope.console.aliyun.com/
        **kwargs: Additional DashScope API parameters. Supported options:
            - ``encoding_type`` (Literal["query", "document"]): Encoding type.
              * ``"query"``: Optimize for search queries (default).
              * ``"document"``: Optimize for indexed documents.
              This distinction is important for asymmetric retrieval tasks.

    Attributes:
        model (str): The DashScope model name being used.
        encoding_type (str): The encoding type ("query" or "document").

    Raises:
        ValueError: If API key is not provided and not found in environment,
            or if API returns an error response.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If network error or DashScope service error occurs.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``dashscope`` package: ``pip install dashscope``
        - Embedding results are cached (LRU cache, maxsize=10) to reduce API calls
        - Network connectivity to DashScope API endpoints is required
        - API usage may incur costs based on your DashScope subscription plan
        - Sparse vectors have only non-zero dimensions stored as dict
        - Output is sorted by indices (keys) in ascending order

        **Parameter Guidelines:**

        - Use ``encoding_type="query"`` for search queries and
          ``encoding_type="document"`` for indexed content to optimize
          asymmetric retrieval tasks.
        - For detailed API specifications, refer to:
          https://help.aliyun.com/zh/model-studio/text-embedding-synchronous-api

    Examples:
        >>> # Basic usage for query embedding
        >>> from zvec.extension import QwenSparseEmbedding
        >>> import os
        >>> os.environ["DASHSCOPE_API_KEY"] = "your-api-key"
        >>>
        >>> query_emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
        >>> query_vec = query_emb.embed("machine learning")
        >>> type(query_vec)
        <class 'dict'>
        >>> len(query_vec)  # Only non-zero dimensions
        156

        >>> # Document embedding
        >>> doc_emb = QwenSparseEmbedding(dimension=1024, encoding_type="document")
        >>> doc_vec = doc_emb.embed("Machine learning is a subset of AI")
        >>> isinstance(doc_vec, dict)
        True

        >>> # Asymmetric retrieval example
        >>> query_vec = query_emb.embed("what causes aging fast")
        >>> doc_vec = doc_emb.embed(
        ...     "UV-A light causes tanning, skin aging, and cataracts..."
        ... )
        >>>
        >>> # Calculate similarity (dot product for sparse vectors)
        >>> similarity = sum(
        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)
        ...     for k in set(query_vec) | set(doc_vec)
        ... )

        >>> # Output is sorted by indices
        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)
        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]

        >>> # Hybrid retrieval (combining dense + sparse)
        >>> from zvec.extension import QwenDenseEmbedding
        >>> dense_emb = QwenDenseEmbedding(dimension=1024)
        >>> sparse_emb = QwenSparseEmbedding(dimension=1024)
        >>>
        >>> query = "deep learning neural networks"
        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]
        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}

        >>> # Error handling
        >>> try:
        ...     sparse_emb.embed("")  # Empty string
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

    See Also:
        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
        - ``QwenDenseEmbedding``: Dense embedding using Qwen API
        - ``DefaultSparseEmbedding``: Sparse embedding with SPLADE model
    """

    def __init__(
        self,
        dimension: int,
        model: str = "text-embedding-v4",
        api_key: Optional[str] = None,
        **kwargs,
    ):
        """Initialize the Qwen sparse embedding function.

        Args:
            dimension (int): Target embedding dimension.
            model (str): DashScope model name. Defaults to "text-embedding-v4".
            api_key (Optional[str]): API key or None to use environment variable.
            **kwargs: Additional DashScope API parameters. Supported options:
                - ``encoding_type`` (Literal["query", "document"]): Encoding type.
                  * ``"query"``: Optimize for search queries (default).
                  * ``"document"``: Optimize for indexed documents.
                  This distinction is important for asymmetric retrieval tasks.

        Raises:
            ValueError: If API key is not provided and not in environment.
        """
        # Initialize base class for API connection
        QwenFunctionBase.__init__(self, model=model, api_key=api_key)

        self._dimension = dimension
        self._extra_params = kwargs

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: TEXT) -> SparseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    @lru_cache(maxsize=10)
    def embed(self, input: TEXT) -> SparseVectorType:
        """Generate sparse embedding vector for the input text.

        This method calls the DashScope TextEmbedding API with sparse output type
        to convert input text into a sparse vector representation. The result is
        a dictionary where keys are dimension indices and values are importance
        weights (only non-zero values included).

        The embedding is optimized based on the ``encoding_type`` specified during
        initialization: "query" for search queries or "document" for indexed content.

        Args:
            input (TEXT): Input text string to embed. Must be non-empty after
                stripping whitespace. Maximum length depends on the model used
                (typically 2048-8192 tokens).

        Returns:
            SparseVectorType: A dictionary mapping dimension index to weight.
                Only non-zero dimensions are included. The dictionary is sorted
                by indices (keys) in ascending order for consistent output.
                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty/whitespace-only, or if the API returns
                an error or malformed response.
            RuntimeError: If network connectivity issues or DashScope service
                errors occur.

        Examples:
            >>> emb = QwenSparseEmbedding(dimension=1024, encoding_type="query")
            >>> sparse_vec = emb.embed("machine learning")
            >>> isinstance(sparse_vec, dict)
            True
            >>>
            >>> # Verify sorted output
            >>> keys = list(sparse_vec.keys())
            >>> keys == sorted(keys)
            True

            >>> # Error: empty input
            >>> emb.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> emb.embed(123)
            TypeError: Expected 'input' to be str, got int

        Note:
            - This method is cached (maxsize=10). Identical inputs return cached results.
            - The cache is based on exact string match (case-sensitive).
            - Output dictionary is always sorted by indices for consistency.
        """
        if not isinstance(input, TEXT):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        # Call API with sparse output type
        output = self._call_text_embedding_api(
            input=input,
            dimension=self._dimension,
            output_type="sparse",
            text_type=self.extra_params.get("encoding_type", "query"),
        )

        embeddings = output.get("embeddings")
        if not isinstance(embeddings, list):
            raise ValueError(
                "Invalid API response: 'embeddings' field is missing or not a list"
            )

        if len(embeddings) != 1:
            raise ValueError(
                f"Expected exactly 1 embedding in response, got {len(embeddings)}"
            )

        first_emb = embeddings[0]
        if not isinstance(first_emb, dict):
            raise ValueError("Invalid API response: embedding item is not a dictionary")

        sparse_embedding = first_emb.get("sparse_embedding")
        if not isinstance(sparse_embedding, list):
            raise ValueError(
                "Invalid API response: 'sparse_embedding' field is missing or not a list"
            )

        # Parse sparse embedding: convert array of {index, value, token} to dict
        sparse_dict = {}
        for item in sparse_embedding:
            if not isinstance(item, dict):
                raise ValueError(
                    "Invalid API response: sparse_embedding item is not a dictionary"
                )

            index = item.get("index")
            value = item.get("value")

            if index is None or value is None:
                raise ValueError(
                    "Invalid API response: sparse_embedding item missing 'index' or 'value'"
                )

            # Convert to int and float, filter positive values
            idx = int(index)
            val = float(value)
            if val > 0:
                sparse_dict[idx] = val

        # Sort by indices (keys) to ensure consistent ordering
        return dict(sorted(sparse_dict.items()))


================================================
FILE: python/zvec/extension/qwen_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from http import HTTPStatus
from typing import Optional

from ..common.constants import TEXT
from ..tool import require_module


class QwenFunctionBase:
    """Base class for Qwen (DashScope) functions.

    This base class provides common functionality for calling DashScope APIs
    and handling responses. It supports embeddings (dense and sparse) and
    re-ranking operations.

    This class is not meant to be used directly. Use concrete implementations:
    - ``QwenDenseEmbedding`` for dense embeddings
    - ``QwenSparseEmbedding`` for sparse embeddings
    - ``QwenReRanker`` for semantic re-ranking

    Args:
        model (str): DashScope model identifier.
        api_key (Optional[str]): DashScope API authentication key.

    Note:
        - This is an internal base class for code reuse across Qwen features
        - Subclasses should inherit from appropriate Protocol/ABC
        - Provides unified API connection and response handling
    """

    def __init__(
        self,
        model: str,
        api_key: Optional[str] = None,
    ):
        """Initialize the base Qwen embedding functionality.

        Args:
            model (str): DashScope model name.
            api_key (Optional[str]): API key or None to use environment variable.

        Raises:
            ValueError: If API key is not provided and not in environment.
        """
        self._model = model
        self._api_key = api_key or os.environ.get("DASHSCOPE_API_KEY")
        if not self._api_key:
            raise ValueError(
                "DashScope API key is required. Please provide 'api_key' parameter "
                "or set the 'DASHSCOPE_API_KEY' environment variable."
            )

    @property
    def model(self) -> str:
        """str: The DashScope embedding model name currently in use."""
        return self._model

    def _get_connection(self):
        """Establish connection to DashScope API.

        Returns:
            module: The dashscope module with API key configured.

        Raises:
            ImportError: If dashscope package is not installed.
        """
        dashscope = require_module("dashscope")
        dashscope.api_key = self._api_key
        return dashscope

    def _call_text_embedding_api(
        self,
        input: TEXT,
        dimension: int,
        output_type: str,
        text_type: Optional[str] = None,
    ) -> dict:
        """Call DashScope TextEmbedding API.

        Args:
            input (TEXT): Input text to embed.
            dimension (int): Target embedding dimension.
            output_type (str): Output type ("dense" or "sparse").
            text_type (Optional[str]): Text type ("query" or "document").

        Returns:
            dict: API response output field.

        Raises:
            RuntimeError: If API call fails.
            ValueError: If API returns error response.
        """
        try:
            # Prepare API call parameters
            call_params = {
                "model": self.model,
                "input": input,
                "dimension": dimension,
                "output_type": output_type,
            }

            # Add optional text_type parameter if provided
            if text_type is not None:
                call_params["text_type"] = text_type

            resp = self._get_connection().TextEmbedding.call(**call_params)
        except Exception as e:
            raise RuntimeError(f"Failed to call DashScope API: {e!s}") from e

        if resp.status_code != HTTPStatus.OK:
            error_msg = getattr(resp, "message", "Unknown error")
            error_code = getattr(resp, "code", "N/A")
            raise ValueError(
                f"DashScope API error: [Code={error_code}, "
                f"Status={resp.status_code}] {error_msg}"
            )

        output = getattr(resp, "output", None)
        if not isinstance(output, dict):
            raise ValueError(
                "Invalid API response: missing or malformed 'output' field"
            )

        return output

    def _call_rerank_api(
        self,
        query: str,
        documents: list[str],
        top_n: int,
    ) -> dict:
        """Call DashScope TextReRank API.

        Args:
            query (str): Query text for semantic matching.
            documents (list[str]): List of document texts to re-rank.
            top_n (int): Maximum number of documents to return.

        Returns:
            dict: API response output field containing re-ranked results.

        Raises:
            RuntimeError: If API call fails.
            ValueError: If API returns error response.
        """
        try:
            resp = self._get_connection().TextReRank.call(
                model=self.model,
                query=query,
                documents=documents,
                top_n=top_n,
                return_documents=False,
            )
        except Exception as e:
            raise RuntimeError(f"Failed to call DashScope API: {e!s}") from e

        if resp.status_code != HTTPStatus.OK:
            error_msg = getattr(resp, "message", "Unknown error")
            error_code = getattr(resp, "code", "N/A")
            raise ValueError(
                f"DashScope API error: [Code={error_code}, "
                f"Status={resp.status_code}] {error_msg}"
            )

        output = getattr(resp, "output", None)
        if not isinstance(output, dict):
            raise ValueError(
                "Invalid API response: missing or malformed 'output' field"
            )

        return output


================================================
FILE: python/zvec/extension/qwen_rerank_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Optional

from ..model.doc import Doc
from .qwen_function import QwenFunctionBase
from .rerank_function import RerankFunction


class QwenReRanker(QwenFunctionBase, RerankFunction):
    """Re-ranker using Qwen (DashScope) cross-encoder API for semantic re-ranking.

    This re-ranker leverages DashScope's TextReRank service to perform
    cross-encoder style re-ranking. It sends query and document pairs to the
    API and receives relevance scores based on deep semantic understanding.

    The re-ranker is suitable for single-vector or multi-vector search scenarios
    where semantic relevance to a specific query is required.

    Args:
        query (str): Query text for semantic re-ranking. **Required**.
        topn (int, optional): Maximum number of documents to return after re-ranking.
            Defaults to 10.
        rerank_field (str): Document field name to use as re-ranking input text.
            **Required** (e.g., "content", "title", "body").
        model (str, optional): DashScope re-ranking model identifier.
            Defaults to ``"gte-rerank-v2"``.
        api_key (Optional[str], optional): DashScope API authentication key.
            If not provided, reads from ``DASHSCOPE_API_KEY`` environment variable.

    Raises:
        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,
            or API key is not available.

    Note:
        - Requires ``dashscope`` Python package installed
        - Documents without valid content in ``rerank_field`` are skipped
        - API rate limits and quotas apply per DashScope subscription

    Example:
        >>> reranker = QwenReRanker(
        ...     query="machine learning algorithms",
        ...     topn=5,
        ...     rerank_field="content",
        ...     model="gte-rerank-v2",
        ...     api_key="your-api-key"
        ... )
        >>> # Use in collection.query(reranker=reranker)
    """

    def __init__(
        self,
        query: Optional[str] = None,
        topn: int = 10,
        rerank_field: Optional[str] = None,
        model: str = "gte-rerank-v2",
        api_key: Optional[str] = None,
    ):
        """Initialize QwenReRanker with query and configuration.

        Args:
            query (Optional[str]): Query text for semantic matching. Required.
            topn (int): Number of top results to return.
            rerank_field (Optional[str]): Document field for re-ranking input.
            model (str): DashScope model name.
            api_key (Optional[str]): API key or None to use environment variable.

        Raises:
            ValueError: If query is empty or API key is unavailable.
        """
        QwenFunctionBase.__init__(self, model=model, api_key=api_key)
        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)

        if not query:
            raise ValueError("Query is required for QwenReRanker")
        self._query = query

    @property
    def query(self) -> str:
        """str: Query text used for semantic re-ranking."""
        return self._query

    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
        """Re-rank documents using Qwen's TextReRank API.

        Sends document texts to DashScope TextReRank service along with the query.
        Returns documents sorted by relevance scores from the cross-encoder model.

        Args:
            query_results (dict[str, list[Doc]]): Mapping from vector field names
                to lists of retrieved documents. Documents from all fields are
                deduplicated and re-ranked together.

        Returns:
            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``
                fields containing relevance scores from the API.

        Raises:
            ValueError: If no valid documents are found or API call fails.

        Note:
            - Duplicate documents (same ID) across fields are processed once
            - Documents with empty/missing ``rerank_field`` content are skipped
            - Returned scores are relevance scores from the cross-encoder model
        """
        if not query_results:
            return []

        # Collect and deduplicate documents
        id_to_doc: dict[str, Doc] = {}
        doc_ids: list[str] = []
        contents: list[str] = []

        for _, query_result in query_results.items():
            for doc in query_result:
                doc_id = doc.id
                if doc_id in id_to_doc:
                    continue

                # Extract text content from specified field
                field_value = doc.field(self.rerank_field)
                rank_content = str(field_value).strip() if field_value else ""
                if not rank_content:
                    continue

                id_to_doc[doc_id] = doc
                doc_ids.append(doc_id)
                contents.append(rank_content)

        if not contents:
            raise ValueError("No documents to rerank")

        # Call DashScope TextReRank API
        output = self._call_rerank_api(
            query=self.query,
            documents=contents,
            top_n=self.topn,
        )

        # Build result list with updated scores
        results: list[Doc] = []
        for item in output["results"]:
            idx = item["index"]
            doc_id = doc_ids[idx]
            doc = id_to_doc[doc_id]
            new_doc = doc._replace(score=item["relevance_score"])
            results.append(new_doc)

        return results


================================================
FILE: python/zvec/extension/rerank_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Optional

from ..model.doc import Doc


class RerankFunction(ABC):
    """Abstract base class for re-ranking search results.

    Re-rankers refine the output of one or more vector queries by applying
    a secondary scoring strategy. They are used in the ``query()`` method of
    ``Collection`` via the ``reranker`` parameter.

    Args:
        topn (int, optional): Number of top documents to return after re-ranking.
            Defaults to 10.
        rerank_field (Optional[str], optional): Field name used as input for
            re-ranking (e.g., document title or body). Defaults to None.

    Note:
        Subclasses must implement the ``rerank()`` method.
    """

    def __init__(
        self,
        topn: int = 10,
        rerank_field: Optional[str] = None,
    ):
        self._topn = topn
        self._rerank_field = rerank_field

    @property
    def topn(self) -> int:
        """int: Number of top documents to return after re-ranking."""
        return self._topn

    @property
    def rerank_field(self) -> Optional[str]:
        """Optional[str]: Field name used as re-ranking input."""
        return self._rerank_field

    @abstractmethod
    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
        """Re-rank documents from one or more vector queries.

        Args:
            query_results (dict[str, list[Doc]]): Mapping from vector field name
                to list of retrieved documents (sorted by relevance).

        Returns:
            list[Doc]: Re-ranked list of documents (length ≤ ``topn``),
                with updated ``score`` fields.
        """
        ...


================================================
FILE: python/zvec/extension/sentence_transformer_embedding_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import ClassVar, Literal, Optional

import numpy as np

from ..common.constants import TEXT, DenseVectorType, SparseVectorType
from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
from .sentence_transformer_function import SentenceTransformerFunctionBase


class DefaultLocalDenseEmbedding(
    SentenceTransformerFunctionBase, DenseEmbeddingFunction[TEXT]
):
    """Default local dense embedding using all-MiniLM-L6-v2 model.

    This is the default implementation for dense text embedding that uses the
    ``all-MiniLM-L6-v2`` model from Hugging Face by default. This model provides
    a good balance between speed and quality for general-purpose text embedding.

    The class provides text-to-vector dense embedding capabilities using the
    sentence-transformers library. It supports models from Hugging Face Hub and
    ModelScope, runs locally without API calls, and supports CPU/GPU acceleration.

    The model produces 384-dimensional embeddings and is optimized for semantic
    similarity tasks. It runs locally without requiring API keys.

    Args:
        model_source (Literal["huggingface", "modelscope"], optional): Model source.
            - ``"huggingface"``: Use Hugging Face Hub (default, for international users)
            - ``"modelscope"``: Use ModelScope (recommended for users in China)
            Defaults to ``"huggingface"``.
        device (Optional[str], optional): Device to run the model on.
            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
            for automatic detection. Defaults to ``None``.
        normalize_embeddings (bool, optional): Whether to normalize embeddings to
            unit length (L2 normalization). Useful for cosine similarity.
            Defaults to ``True``.
        batch_size (int, optional): Batch size for encoding. Defaults to ``32``.
        **kwargs: Additional parameters for future extension.

    Attributes:
        dimension (int): Always 384 for both models.
        model_name (str): "all-MiniLM-L6-v2" (HF) or "iic/nlp_gte_sentence-embedding_chinese-small" (MS).
        model_source (str): The model source being used.
        device (str): The device the model is running on.

    Raises:
        ValueError: If the model cannot be loaded or input is invalid.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If model inference fails.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``sentence-transformers`` package:
          ``pip install sentence-transformers``
        - For ModelScope, also requires: ``pip install modelscope``
        - First run downloads the model (~50-80MB) from chosen source
        - Hugging Face cache: ``~/.cache/torch/sentence_transformers/``
        - ModelScope cache: ``~/.cache/modelscope/hub/``
        - No API keys or network required after initial download
        - Inference speed: ~1000 sentences/sec on CPU, ~10000 on GPU

        **For users in China:**

        If you encounter Hugging Face access issues, use ModelScope instead:

        .. code-block:: python

            # Recommended for users in China
            emb = DefaultLocalDenseEmbedding(model_source="modelscope")

        Alternatively, use Hugging Face mirror:

        .. code-block:: bash

            export HF_ENDPOINT=https://hf-mirror.com
            # Then use default Hugging Face mode

    Examples:
        >>> # Basic usage with Hugging Face (default)
        >>> from zvec.extension import DefaultLocalDenseEmbedding
        >>>
        >>> emb_func = DefaultLocalDenseEmbedding()
        >>> vector = emb_func.embed("Hello, world!")
        >>> len(vector)
        384
        >>> isinstance(vector, list)
        True

        >>> # Recommended for users in China (uses ModelScope)
        >>> emb_func = DefaultLocalDenseEmbedding(model_source="modelscope")
        >>> vector = emb_func.embed("你好，世界！")  # Works well with Chinese text
        >>> len(vector)
        384

        >>> # Alternative for China users: Use Hugging Face mirror
        >>> import os
        >>> os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
        >>> emb_func = DefaultLocalDenseEmbedding()  # Uses HF mirror
        >>> vector = emb_func.embed("Hello, world!")

        >>> # Using GPU for faster inference
        >>> emb_func = DefaultLocalDenseEmbedding(device="cuda")
        >>> vector = emb_func("Machine learning is fascinating")
        >>> # Normalized vector has unit length
        >>> import numpy as np
        >>> np.linalg.norm(vector)
        1.0

        >>> # Batch processing
        >>> texts = ["First text", "Second text", "Third text"]
        >>> vectors = [emb_func.embed(text) for text in texts]
        >>> len(vectors)
        3
        >>> all(len(v) == 384 for v in vectors)
        True

        >>> # Semantic similarity
        >>> v1 = emb_func.embed("The cat sits on the mat")
        >>> v2 = emb_func.embed("A feline rests on a rug")
        >>> v3 = emb_func.embed("Python programming")
        >>> similarity_high = np.dot(v1, v2)  # Similar sentences
        >>> similarity_low = np.dot(v1, v3)   # Different topics
        >>> similarity_high > similarity_low
        True

        >>> # Error handling
        >>> try:
        ...     emb_func.embed("")  # Empty string
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

    See Also:
        - ``DenseEmbeddingFunction``: Base class for dense embeddings
        - ``DefaultLocalSparseEmbedding``: Sparse embedding with SPLADE
        - ``QwenDenseEmbedding``: Alternative using Qwen API
    """

    def __init__(
        self,
        model_source: Literal["huggingface", "modelscope"] = "huggingface",
        device: Optional[str] = None,
        normalize_embeddings: bool = True,
        batch_size: int = 32,
        **kwargs,
    ):
        """Initialize with all-MiniLM-L6-v2 model.

        Args:
            model_source (Literal["huggingface", "modelscope"]): Model source.
                Defaults to "huggingface".
            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
                Defaults to None (automatic detection).
            normalize_embeddings (bool): Whether to L2-normalize output vectors.
                Defaults to True.
            batch_size (int): Batch size for encoding. Defaults to 32.
            **kwargs: Additional parameters for future extension.

        Raises:
            ImportError: If sentence-transformers or modelscope is not installed.
            ValueError: If model cannot be loaded.
        """
        # Use different models based on source
        if model_source == "modelscope":
            # Use Chinese-optimized model for ModelScope (better for Chinese text)
            model_name = "iic/nlp_gte_sentence-embedding_chinese-small"
        else:
            model_name = "all-MiniLM-L6-v2"

        # Initialize base class for model loading
        SentenceTransformerFunctionBase.__init__(
            self, model_name=model_name, model_source=model_source, device=device
        )

        self._normalize_embeddings = normalize_embeddings
        self._batch_size = batch_size

        # Load model and get dimension
        model = self._get_model()
        self._dimension = model.get_sentence_embedding_dimension()

        # Store extra parameters
        self._extra_params = kwargs

    @property
    def dimension(self) -> int:
        """int: The expected dimensionality of the embedding vector."""
        return self._dimension

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: str) -> DenseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    def embed(self, input: str) -> DenseVectorType:
        """Generate dense embedding vector for the input text.

        This method uses the Sentence Transformer model to convert input text
        into a dense vector representation. The model runs locally without
        requiring API calls.

        Args:
            input (str): Input text string to embed. Must be non-empty after
                stripping whitespace. Maximum length depends on the model used
                (typically 128-512 tokens for most models).

        Returns:
            DenseVectorType: A list of floats representing the embedding vector.
                Length equals ``self.dimension``. If ``normalize_embeddings=True``,
                the vector has unit length. Example:
                ``[0.123, -0.456, 0.789, ...]``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty or whitespace-only.
            RuntimeError: If model inference fails.

        Examples:
            >>> emb = DefaultLocalDenseEmbedding()
            >>> vector = emb.embed("Natural language processing")
            >>> len(vector)
            384
            >>> isinstance(vector[0], float)
            True

            >>> # Normalized vectors have unit length
            >>> import numpy as np
            >>> emb = DefaultLocalDenseEmbedding(normalize_embeddings=True)
            >>> vector = emb.embed("Test sentence")
            >>> np.linalg.norm(vector)
            1.0

            >>> # Error: empty input
            >>> emb.embed("   ")
            ValueError: Input text cannot be empty or whitespace only

            >>> # Error: non-string input
            >>> emb.embed(123)
            TypeError: Expected 'input' to be str, got int

            >>> # Semantic similarity example
            >>> v1 = emb.embed("The cat sits on the mat")
            >>> v2 = emb.embed("A feline rests on a rug")
            >>> similarity = np.dot(v1, v2)  # High similarity due to semantic meaning
            >>> similarity > 0.7
            True

        Note:
            - First call may be slower due to model loading
            - Subsequent calls are much faster as the model stays in memory
            - For batch processing, consider encoding multiple texts together
              (though this method handles single texts only)
            - GPU acceleration provides 5-10x speedup over CPU
        """
        if not isinstance(input, str):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        try:
            model = self._get_model()
            embedding = model.encode(
                input,
                convert_to_numpy=True,
                normalize_embeddings=self._normalize_embeddings,
                batch_size=self._batch_size,
            )

            # Convert numpy array to list
            if isinstance(embedding, np.ndarray):
                embedding_list = embedding.tolist()
            else:
                embedding_list = list(embedding)

            # Validate dimension
            if len(embedding_list) != self.dimension:
                raise ValueError(
                    f"Dimension mismatch: expected {self.dimension}, "
                    f"got {len(embedding_list)}"
                )

            return embedding_list

        except Exception as e:
            if isinstance(e, (TypeError, ValueError)):
                raise
            raise RuntimeError(f"Failed to generate embedding: {e!s}") from e


class DefaultLocalSparseEmbedding(
    SentenceTransformerFunctionBase, SparseEmbeddingFunction[TEXT]
):
    """Default local sparse embedding using SPLADE model.

    This class provides sparse vector embedding using the SPLADE (SParse Lexical
    AnD Expansion) model. SPLADE generates sparse, interpretable representations
    where each dimension corresponds to a vocabulary term with learned importance
    weights. It's ideal for lexical matching, BM25-style retrieval, and hybrid
    search scenarios.

    The default model is ``naver/splade-cocondenser-ensembledistil``, which is
    publicly available without authentication. It produces sparse vectors with
    thousands of dimensions but only hundreds of non-zero values, making them
    efficient for storage and retrieval while maintaining strong lexical matching.

    **Model Caching:**

    This class uses class-level caching to share the SPLADE model across all instances
    with the same configuration (model_source, device). This significantly reduces
    memory usage when creating multiple instances for different encoding types
    (query vs document).

    **Cache Management:**

    The class provides methods to manage the model cache:

    - ``clear_cache()``: Clear all cached models to free memory
    - ``get_cache_info()``: Get information about cached models
    - ``remove_from_cache(model_source, device)``: Remove a specific model from cache

    .. note::
        **Why not use splade-v3?**

        The newer ``naver/splade-v3`` model is gated (requires access approval).
        We use ``naver/splade-cocondenser-ensembledistil`` instead.

        **To use splade-v3 (if you have access):**

        1. Request access at https://huggingface.co/naver/splade-v3
        2. Get your Hugging Face token from https://huggingface.co/settings/tokens
        3. Set environment variable:

           .. code-block:: bash

               export HF_TOKEN="your_huggingface_token"

        4. Or login programmatically:

           .. code-block:: python

               from huggingface_hub import login
               login(token="your_huggingface_token")

        5. To use a custom SPLADE model, you can subclass this class and override
           the model_name in ``__init__``, or create your own implementation
           inheriting from ``SentenceTransformerFunctionBase`` and
           ``SparseEmbeddingFunction``.

    Args:
        model_source (Literal["huggingface", "modelscope"], optional): Model source.
            Defaults to ``"huggingface"``. ModelScope support may vary for SPLADE models.
        device (Optional[str], optional): Device to run the model on.
            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
            for automatic detection. Defaults to ``None``.
        encoding_type (Literal["query", "document"], optional): Encoding type.
            - ``"query"``: Optimize for search queries (default)
            - ``"document"``: Optimize for indexed documents
        **kwargs: Additional parameters (currently unused, for future extension).

    Attributes:
        model_name (str): Model identifier.
        model_source (str): The model source being used.
        device (str): The device the model is running on.

    Raises:
        ValueError: If the model cannot be loaded or input is invalid.
        TypeError: If input to ``embed()`` is not a string.
        RuntimeError: If model inference fails.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires the ``sentence-transformers`` package:
          ``pip install sentence-transformers``
        - First run downloads the model (~100MB) from Hugging Face
        - Cache location: ``~/.cache/torch/sentence_transformers/``
        - No API keys or authentication required
        - Sparse vectors have ~30k dimensions but only ~100-200 non-zero values
        - Best combined with dense embeddings for hybrid retrieval

        **SPLADE vs Dense Embeddings:**

        - **Dense**: Continuous semantic vectors, good for semantic similarity
        - **Sparse**: Lexical keyword-based, interpretable, good for exact matching
        - **Hybrid**: Combine both for best retrieval performance

    Examples:
        >>> # Memory-efficient: both instances share the same model (~200MB)
        >>> from zvec.extension import DefaultLocalSparseEmbedding
        >>>
        >>> # Query embedding
        >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
        >>> query_vec = query_emb.embed("machine learning algorithms")
        >>> type(query_vec)
        <class 'dict'>
        >>> len(query_vec)  # Only non-zero dimensions
        156

        >>> # Document embedding (shares model with query_emb)
        >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type="document")
        >>> doc_vec = doc_emb.embed("Machine learning is a subset of AI")
        >>> # Total memory: ~200MB (not 400MB) thanks to model caching

        >>> # Asymmetric retrieval example
        >>> query_vec = query_emb.embed("what causes aging fast")
        >>> doc_vec = doc_emb.embed(
        ...     "UV-A light causes tanning, skin aging, and cataracts..."
        ... )
        >>>
        >>> # Calculate similarity (dot product for sparse vectors)
        >>> similarity = sum(
        ...     query_vec.get(k, 0) * doc_vec.get(k, 0)
        ...     for k in set(query_vec) | set(doc_vec)
        ... )

        >>> # Batch processing
        >>> queries = ["query 1", "query 2", "query 3"]
        >>> query_vecs = [query_emb.embed(q) for q in queries]
        >>>
        >>> documents = ["doc 1", "doc 2", "doc 3"]
        >>> doc_vecs = [doc_emb.embed(d) for d in documents]

        >>> # Inspecting sparse dimensions (output is sorted by indices)
        >>> query_vec = query_emb.embed("machine learning")
        >>> list(query_vec.items())[:5]  # First 5 dimensions (by index)
        [(10, 0.45), (23, 0.87), (56, 0.32), (89, 1.12), (120, 0.65)]
        >>>
        >>> # Sort by weight to find most important terms
        >>> sorted_by_weight = sorted(query_vec.items(), key=lambda x: x[1], reverse=True)
        >>> top_5 = sorted_by_weight[:5]  # Top 5 most important terms
        >>> top_5
        [(1023, 1.45), (245, 1.23), (8901, 0.98), (5678, 0.87), (12034, 0.76)]

        >>> # Using GPU for faster inference
        >>> sparse_emb = DefaultLocalSparseEmbedding(device="cuda")
        >>> vector = sparse_emb.embed("natural language processing")

        >>> # Hybrid retrieval example (combining dense + sparse)
        >>> from zvec.extension import DefaultDenseEmbedding
        >>> dense_emb = DefaultDenseEmbedding()
        >>> sparse_emb = DefaultLocalSparseEmbedding()
        >>>
        >>> query = "deep learning neural networks"
        >>> dense_vec = dense_emb.embed(query)   # [0.1, -0.3, 0.5, ...]
        >>> sparse_vec = sparse_emb.embed(query)  # {12: 0.8, 45: 1.2, ...}

        >>> # Error handling
        >>> try:
        ...     sparse_emb.embed("")  # Empty string
        ... except ValueError as e:
        ...     print(f"Error: {e}")
        Error: Input text cannot be empty or whitespace only

        >>> # Cache management
        >>> # Check cache status
        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
        >>> print(f"Cached models: {info['cached_models']}")
        Cached models: 1
        >>>
        >>> # Clear cache to free memory
        >>> DefaultLocalSparseEmbedding.clear_cache()
        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
        >>> print(f"Cached models: {info['cached_models']}")
        Cached models: 0
        >>>
        >>> # Remove specific model from cache
        >>> query_emb = DefaultLocalSparseEmbedding()  # Creates CPU model
        >>> cuda_emb = DefaultLocalSparseEmbedding(device="cuda")  # Creates CUDA model
        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
        >>> print(f"Cached models: {info['cached_models']}")
        Cached models: 2
        >>>
        >>> # Remove only CPU model
        >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device=None)
        >>> print(f"Removed: {removed}")
        True
        >>> info = DefaultLocalSparseEmbedding.get_cache_info()
        >>> print(f"Cached models: {info['cached_models']}")
        Cached models: 1

    See Also:
        - ``SparseEmbeddingFunction``: Base class for sparse embeddings
        - ``DefaultDenseEmbedding``: Dense embedding with all-MiniLM-L6-v2
        - ``QwenDenseEmbedding``: Alternative using Qwen API

    References:
        - SPLADE Paper: https://arxiv.org/abs/2109.10086
        - Model: https://huggingface.co/naver/splade-cocondenser-ensembledistil
    """

    # Class-level model cache: {(model_name, model_source, device): model}
    # Shared across all DefaultLocalSparseEmbedding instances to save memory
    _model_cache: ClassVar[dict] = {}

    @classmethod
    def clear_cache(cls) -> None:
        """Clear all cached SPLADE models from memory.

        This is useful for:
        - Freeing memory when models are no longer needed
        - Forcing a fresh model reload
        - Testing and debugging
                Examples:
            >>> # Clear cache to free memory
            >>> DefaultLocalSparseEmbedding.clear_cache()

            >>> # Or in tests to ensure fresh model loading
            >>> def test_something():
            ...     DefaultLocalSparseEmbedding.clear_cache()
            ...     emb = DefaultLocalSparseEmbedding()
            ...     # Test with fresh model
        """
        cls._model_cache.clear()

    @classmethod
    def get_cache_info(cls) -> dict:
        """Get information about currently cached models.

        Returns:
            dict: Dictionary with cache statistics:
                - cached_models (int): Number of cached model instances
                - cache_keys (list): List of cache keys (model_name, model_source, device)

        Examples:
            >>> info = DefaultLocalSparseEmbedding.get_cache_info()
            >>> print(f"Cached models: {info['cached_models']}")
            Cached models: 2
            >>> print(f"Cache keys: {info['cache_keys']}")
            Cache keys: [('naver/splade-cocondenser-ensembledistil', 'huggingface', None),
                        ('naver/splade-cocondenser-ensembledistil', 'huggingface', 'cuda')]
        """
        return {
            "cached_models": len(cls._model_cache),
            "cache_keys": list(cls._model_cache.keys()),
        }

    @classmethod
    def remove_from_cache(
        cls, model_source: str = "huggingface", device: Optional[str] = None
    ) -> bool:
        """Remove a specific model from cache.

        Args:
            model_source (str): Model source ("huggingface" or "modelscope").
                Defaults to "huggingface".
            device (Optional[str]): Device identifier. Defaults to None.

        Returns:
            bool: True if model was found and removed, False otherwise.

        Examples:
            >>> # Remove CPU model from cache
            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache()
            >>> print(f"Removed: {removed}")
            True

            >>> # Remove CUDA model from cache
            >>> removed = DefaultLocalSparseEmbedding.remove_from_cache(device="cuda")
            >>> print(f"Removed: {removed}")
            True
        """
        model_name = "naver/splade-cocondenser-ensembledistil"
        cache_key = (model_name, model_source, device)

        if cache_key in cls._model_cache:
            del cls._model_cache[cache_key]
            return True
        return False

    def __init__(
        self,
        model_source: Literal["huggingface", "modelscope"] = "huggingface",
        device: Optional[str] = None,
        encoding_type: Literal["query", "document"] = "query",
        **kwargs,
    ):
        """Initialize with SPLADE model.

        Args:
            model_source (Literal["huggingface", "modelscope"]): Model source.
                Defaults to "huggingface".
            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
                Defaults to None (automatic detection).
            encoding_type (Literal["query", "document"]): Encoding type for embeddings.
                - "query": Optimize for search queries (default)
                - "document": Optimize for indexed documents
                This distinction is important for asymmetric retrieval tasks.
            **kwargs: Additional parameters (reserved for future use).

        Raises:
            ImportError: If sentence-transformers is not installed.
            ValueError: If model cannot be loaded.

        Note:
            Multiple instances with the same (model_source, device) configuration
            will share the same underlying model to save memory. Different
            instances can use different encoding_type settings while sharing
            the model.

            **Model Selection:**

            Uses ``naver/splade-cocondenser-ensembledistil`` instead of the newer
            ``naver/splade-v3`` because splade-v3 is a gated model requiring
            Hugging Face authentication. The cocondenser-ensembledistil variant:

            - Does not require authentication or API tokens
            - Is immediately available for all users
            - Provides comparable retrieval performance (~2% difference)
            - Avoids "Access to model is restricted" errors

            If you need splade-v3 and have obtained access, you can subclass
            this class and override the model_name parameter.

        Examples:
            >>> # Both instances share the same model (saves memory)
            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
            >>> doc_emb = DefaultLocalSparseEmbedding(encoding_type="document")
            >>> # Only one model is loaded in memory
        """
        # Use publicly available SPLADE model (no gated access required)
        # Note: naver/splade-v3 requires authentication, so we use the
        # cocondenser-ensembledistil variant which is publicly accessible
        model_name = "naver/splade-cocondenser-ensembledistil"

        # Initialize base class for model loading
        SentenceTransformerFunctionBase.__init__(
            self, model_name=model_name, model_source=model_source, device=device
        )

        self._encoding_type = encoding_type
        self._extra_params = kwargs

        # Create cache key for this model configuration
        self._cache_key = (model_name, model_source, device)

        # Load model to ensure it's available (will use cache if exists)
        self._get_model()

    @property
    def extra_params(self) -> dict:
        """dict: Extra parameters for model-specific customization."""
        return self._extra_params

    def __call__(self, input: str) -> SparseVectorType:
        """Make the embedding function callable."""
        return self.embed(input)

    def embed(self, input: str) -> SparseVectorType:
        """Generate sparse embedding vector for the input text.

        This method uses the SPLADE model to convert input text into a sparse
        vector representation. The result is a dictionary where keys are dimension
        indices and values are importance weights (only non-zero values included).

        The embedding is optimized based on the ``encoding_type`` specified during
        initialization: "query" for search queries or "document" for indexed content.

        Args:
            input (str): Input text string to embed. Must be non-empty after
                stripping whitespace.

        Returns:
            SparseVectorType: A dictionary mapping dimension index to weight.
                Only non-zero dimensions are included. The dictionary is sorted
                by indices (keys) in ascending order for consistent output.
                Example: ``{10: 0.5, 245: 0.8, 1023: 1.2, 5678: 0.5}``

        Raises:
            TypeError: If ``input`` is not a string.
            ValueError: If input is empty or whitespace-only.
            RuntimeError: If model inference fails.

        Examples:
            >>> # Query embedding
            >>> query_emb = DefaultLocalSparseEmbedding(encoding_type="query")
            >>> query_vec = query_emb.embed("machine learning")
            >>> isinstance(query_vec, dict)
            True

        Note:
            - First call may be slower due to model loading
            - Subsequent calls are much faster as the model stays in memory
            - GPU acceleration provides significant speedup
            - Sparse vectors are memory-efficient (only store non-zero values)
        """
        if not isinstance(input, str):
            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")

        input = input.strip()
        if not input:
            raise ValueError("Input text cannot be empty or whitespace only")

        try:
            model = self._get_model()

            # Use appropriate encoding method based on type
            if self._encoding_type == "document" and hasattr(model, "encode_document"):
                # Use document encoding
                sparse_matrix = model.encode_document([input])
            elif hasattr(model, "encode_query"):
                # Use query encoding (default)
                sparse_matrix = model.encode_query([input])
            else:
                # Fallback: manual implementation for older sentence-transformers
                return self._manual_sparse_encode(input)

            # Convert sparse matrix to dictionary
            # SPLADE returns shape [1, vocab_size] for single input

            # Check if it's a sparse matrix (duck typing - has toarray method)
            if hasattr(sparse_matrix, "toarray"):
                # Sparse matrix (CSR/CSC/etc.) - convert to dense array
                sparse_array = sparse_matrix[0].toarray().flatten()
                sparse_dict = {
                    int(idx): float(val)
                    for idx, val in enumerate(sparse_array)
                    if val > 0
                }
            else:
                # Dense array format (numpy array or similar)
                if isinstance(sparse_matrix, np.ndarray):
                    sparse_array = sparse_matrix[0]
                else:
                    sparse_array = sparse_matrix

                sparse_dict = {
                    int(idx): float(val)
                    for idx, val in enumerate(sparse_array)
                    if val > 0
                }

            # Sort by indices (keys) to ensure consistent ordering
            return dict(sorted(sparse_dict.items()))

        except Exception as e:
            if isinstance(e, (TypeError, ValueError)):
                raise
            raise RuntimeError(f"Failed to generate sparse embedding: {e!s}") from e

    def _manual_sparse_encode(self, input: str) -> SparseVectorType:
        """Fallback manual SPLADE encoding for older sentence-transformers.

        Args:
            input (str): Input text to encode.

        Returns:
            SparseVectorType: Sparse vector as dictionary.
        """
        import torch

        model = self._get_model()

        # Tokenize input
        features = model.tokenize([input])

        # Move to correct device
        features = {k: v.to(model.device) for k, v in features.items()}

        # Forward pass with no gradient
        with torch.no_grad():
            embeddings = model.forward(features)

            # Get logits from model output
            # SPLADE models typically output 'token_embeddings'
            if isinstance(embeddings, dict) and "token_embeddings" in embeddings:
                logits = embeddings["token_embeddings"][0]  # First batch item
            elif hasattr(embeddings, "token_embeddings"):
                logits = embeddings.token_embeddings[0]
            # Fallback: try to get first value
            elif isinstance(embeddings, dict):
                logits = next(iter(embeddings.values()))[0]
            else:
                logits = embeddings[0]

            # Apply SPLADE activation: log(1 + relu(x))
            relu_log = torch.log(1 + torch.relu(logits))

            # Max pooling over token dimension (reduce to vocab size)
            if relu_log.dim() > 1:
                sparse_vec, _ = torch.max(relu_log, dim=0)
            else:
                sparse_vec = relu_log

            # Convert to sparse dictionary (only non-zero values)
            sparse_vec_np = sparse_vec.cpu().numpy()
            sparse_dict = {
                int(idx): float(val) for idx, val in enumerate(sparse_vec_np) if val > 0
            }

            # Sort by indices (keys) to ensure consistent ordering
            return dict(sorted(sparse_dict.items()))

    def _get_model(self):
        """Load or retrieve the SPLADE model from class-level cache.

        Returns:
            SentenceTransformer: The loaded SPLADE model instance.

        Raises:
            ImportError: If required packages are not installed.
            ValueError: If model cannot be loaded.

        Note:
            Models are cached at class level and shared across all instances
            with the same (model_name, model_source, device) configuration.
            This allows memory-efficient usage when creating multiple instances
            with different encoding_type settings.
        """
        # Check class-level cache first
        if self._cache_key in self._model_cache:
            return self._model_cache[self._cache_key]

        # Use parent class method to load model
        model = super()._get_model()

        # Cache the model at class level
        self._model_cache[self._cache_key] = model

        return model


================================================
FILE: python/zvec/extension/sentence_transformer_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Literal, Optional

from ..tool import require_module


class SentenceTransformerFunctionBase:
    """Base class for Sentence Transformer functions (both dense and sparse).

    This base class provides common functionality for loading and managing
    sentence-transformers models from Hugging Face or ModelScope. It supports
    both dense models (e.g., all-MiniLM-L6-v2) and sparse models (e.g., SPLADE).

    This class is not meant to be used directly. Use concrete implementations:
    - ``SentenceTransformerEmbeddingFunction`` for dense embeddings
    - ``SentenceTransformerSparseEmbeddingFunction`` for sparse embeddings
    - ``DefaultDenseEmbedding`` for default dense embeddings
    - ``DefaultSparseEmbedding`` for default sparse embeddings

    Args:
        model_name (str): Model identifier or local path.
        model_source (Literal["huggingface", "modelscope"]): Model source.
        device (Optional[str]): Device to run the model on.

    Note:
        - This is an internal base class for code reuse
        - Subclasses should inherit from appropriate Protocol (Dense/Sparse)
        - Provides model loading and management functionality
    """

    def __init__(
        self,
        model_name: str,
        model_source: Literal["huggingface", "modelscope"] = "huggingface",
        device: Optional[str] = None,
    ):
        """Initialize the base Sentence Transformer functionality.

        Args:
            model_name (str): Model identifier or local path.
            model_source (Literal["huggingface", "modelscope"]): Model source.
            device (Optional[str]): Device to run the model on.

        Raises:
            ValueError: If model_source is invalid.
        """
        # Validate model_source
        if model_source not in ("huggingface", "modelscope"):
            raise ValueError(
                f"Invalid model_source: '{model_source}'. "
                "Must be 'huggingface' or 'modelscope'."
            )

        self._model_name = model_name
        self._model_source = model_source
        self._device = device
        self._model = None

    @property
    def model_name(self) -> str:
        """str: The Sentence Transformer model name currently in use."""
        return self._model_name

    @property
    def model_source(self) -> str:
        """str: The model source being used ("huggingface" or "modelscope")."""
        return self._model_source

    @property
    def device(self) -> str:
        """str: The device the model is running on."""
        model = self._get_model()
        if model is not None:
            return str(model.device)
        return self._device or "cpu"

    def _get_model(self):
        """Load or retrieve the Sentence Transformer model.

        Returns:
            SentenceTransformer or SparseEncoder: The loaded model instance.

        Raises:
            ImportError: If required packages are not installed.
            ValueError: If model cannot be loaded.
        """
        # Return cached model if exists
        if self._model is not None:
            return self._model

        # Load model
        try:
            sentence_transformers = require_module("sentence_transformers")

            if self._model_source == "modelscope":
                # Load from ModelScope
                require_module("modelscope")
                from modelscope.hub.snapshot_download import snapshot_download

                # Download model to cache
                model_dir = snapshot_download(self._model_name)

                # Load from local path
                self._model = sentence_transformers.SentenceTransformer(
                    model_dir, device=self._device, trust_remote_code=True
                )
            else:
                # Load from Hugging Face (default)
                self._model = sentence_transformers.SentenceTransformer(
                    self._model_name, device=self._device, trust_remote_code=True
                )

            return self._model

        except ImportError as e:
            if "modelscope" in str(e) and self._model_source == "modelscope":
                raise ImportError(
                    "ModelScope support requires the 'modelscope' package. "
                    "Please install it with: pip install modelscope"
                ) from e
            raise
        except Exception as e:
            raise ValueError(
                f"Failed to load Sentence Transformer model '{self._model_name}' "
                f"from {self._model_source}: {e!s}"
            ) from e

    def _is_sparse_model(self) -> bool:
        """Check if the loaded model is a sparse encoder (e.g., SPLADE).

        Returns:
            bool: True if model supports sparse encoding.
        """
        model = self._get_model()
        # Check if model has sparse encoding methods
        return hasattr(model, "encode_query") or hasattr(model, "encode_document")


================================================
FILE: python/zvec/extension/sentence_transformer_rerank_function.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Literal, Optional

from ..model.doc import Doc
from ..tool import require_module
from .rerank_function import RerankFunction
from .sentence_transformer_function import SentenceTransformerFunctionBase


class DefaultLocalReRanker(SentenceTransformerFunctionBase, RerankFunction):
    """Re-ranker using Sentence Transformer cross-encoder models for semantic re-ranking.

    This re-ranker leverages pre-trained cross-encoder models to perform deep semantic
    re-ranking of search results. It runs locally without API calls, supports GPU
    acceleration, and works with models from Hugging Face or ModelScope.

    Cross-encoder models evaluate query-document pairs jointly, providing more
    accurate relevance scores than bi-encoder (embedding-based) similarity.

    Args:
        query (str): Query text for semantic re-ranking. **Required**.
        topn (int, optional): Maximum number of documents to return after re-ranking.
            Defaults to 10.
        rerank_field (Optional[str], optional): Document field name to use as
            re-ranking input text. **Required** (e.g., "content", "title", "body").
        model_name (str, optional): Cross-encoder model identifier or local path.
            Defaults to ``"cross-encoder/ms-marco-MiniLM-L6-v2"`` (MS MARCO MiniLM).
            Common options:
            - ``"cross-encoder/ms-marco-MiniLM-L6-v2"``: Lightweight, fast (~80MB, recommended)
            - ``"cross-encoder/ms-marco-MiniLM-L12-v2"``: Better accuracy (~120MB)
            - ``"BAAI/bge-reranker-base"``: BGE Reranker Base (~280MB)
            - ``"BAAI/bge-reranker-large"``: BGE Reranker Large (highest quality, ~560MB)
        model_source (Literal["huggingface", "modelscope"], optional): Model source.
            Defaults to ``"huggingface"``.
            - ``"huggingface"``: Load from Hugging Face Hub
            - ``"modelscope"``: Load from ModelScope (recommended for users in China)
        device (Optional[str], optional): Device to run the model on.
            Options: ``"cpu"``, ``"cuda"``, ``"mps"`` (for Apple Silicon), or ``None``
            for automatic detection. Defaults to ``None``.
        batch_size (int, optional): Batch size for processing query-document pairs.
            Larger values speed up processing but use more memory. Defaults to ``32``.

    Attributes:
        query (str): The query text used for re-ranking.
        topn (int): Maximum number of documents to return.
        rerank_field (Optional[str]): Field name used for re-ranking input.
        model_name (str): The cross-encoder model being used.
        model_source (str): The model source ("huggingface" or "modelscope").
        device (str): The device the model is running on.

    Raises:
        ValueError: If ``query`` is empty/None, ``rerank_field`` is None,
            or model cannot be loaded.
        TypeError: If input types are invalid.
        RuntimeError: If model inference fails.

    Note:
        - Requires Python 3.10, 3.11, or 3.12
        - Requires ``sentence-transformers`` package: ``pip install sentence-transformers``
        - For ModelScope support, also requires: ``pip install modelscope``
        - First run downloads the model (~80-560MB depending on model) from chosen source
        - No API keys or network required after initial download
        - Cross-encoders are slower than bi-encoders but more accurate
        - GPU acceleration provides significant speedup (5-10x)

        **MS MARCO MiniLM-L6-v2 Model (Default):**

        The default model ``cross-encoder/ms-marco-MiniLM-L6-v2`` is a lightweight and
        efficient cross-encoder trained on MS MARCO dataset. It provides:

        - Fast inference speed (suitable for real-time applications)
        - Small model size (~80MB, quick to download)
        - Good balance between speed and accuracy
        - Trained on 500K+ query-document pairs
        - Public availability without authentication

        **For users in China:**

        If you encounter Hugging Face access issues, use ModelScope instead:

        .. code-block:: python

            # Recommended for users in China
            reranker = SentenceTransformerReRanker(
                query="机器学习算法",
                rerank_field="content",
                model_source="modelscope"
            )

        Alternatively, use Hugging Face mirror:

        .. code-block:: bash

            export HF_ENDPOINT=https://hf-mirror.com

    Examples:
        >>> # Basic usage with default MS MARCO MiniLM model
        >>> from zvec.extension import SentenceTransformerReRanker
        >>>
        >>> reranker = SentenceTransformerReRanker(
        ...     query="machine learning algorithms",
        ...     topn=5,
        ...     rerank_field="content"
        ... )
        >>>
        >>> # Use in collection.query()
        >>> results = collection.query(
        ...     data={"vector_field": query_vector},
        ...     reranker=reranker,
        ...     topk=20
        ... )

        >>> # Using ModelScope for users in China
        >>> reranker = SentenceTransformerReRanker(
        ...     query="深度学习",
        ...     topn=10,
        ...     rerank_field="content",
        ...     model_source="modelscope"
        ... )

        >>> # Using larger model for better quality
        >>> reranker = SentenceTransformerReRanker(
        ...     query="neural networks",
        ...     topn=5,
        ...     rerank_field="content",
        ...     model_name="BAAI/bge-reranker-large",
        ...     device="cuda",
        ...     batch_size=64
        ... )

        >>> # Direct rerank call (for testing)
        >>> query_results = {
        ...     "vector1": [
        ...         Doc(id="1", score=0.9, fields={"content": "Machine learning is..."}),
        ...         Doc(id="2", score=0.8, fields={"content": "Deep learning is..."}),
        ...     ]
        ... }
        >>> reranked = reranker.rerank(query_results)
        >>> for doc in reranked:
        ...     print(f"ID: {doc.id}, Score: {doc.score:.4f}")
        ID: 2, Score: 0.9234
        ID: 1, Score: 0.8567

    See Also:
        - ``RerankFunction``: Abstract base class for re-rankers
        - ``QwenReRanker``: Re-ranker using Qwen API
        - ``RrfReRanker``: Multi-vector re-ranker using RRF
        - ``WeightedReRanker``: Multi-vector re-ranker using weighted scores

    References:
        - MS MARCO Cross-Encoder: https://huggingface.co/cross-encoder/ms-marco-MiniLM-L6-v2
        - BGE Reranker: https://huggingface.co/BAAI/bge-reranker-base
        - Cross-Encoder vs Bi-Encoder: https://www.sbert.net/examples/applications/cross-encoder/README.html
    """

    def __init__(
        self,
        query: Optional[str] = None,
        topn: int = 10,
        rerank_field: Optional[str] = None,
        model_name: str = "cross-encoder/ms-marco-MiniLM-L6-v2",
        model_source: Literal["huggingface", "modelscope"] = "huggingface",
        device: Optional[str] = None,
        batch_size: int = 32,
    ):
        """Initialize SentenceTransformerReRanker with query and configuration.

        Args:
            query (Optional[str]): Query text for semantic matching. Required.
            topn (int): Number of top results to return.
            rerank_field (Optional[str]): Document field for re-ranking input.
            model_name (str): Cross-encoder model identifier.
            model_source (Literal["huggingface", "modelscope"]): Model source.
            device (Optional[str]): Target device ("cpu", "cuda", "mps", or None).
            batch_size (int): Batch size for processing query-document pairs.

        Raises:
            ValueError: If query is empty or model cannot be loaded.
        """
        # Initialize base class for model loading
        SentenceTransformerFunctionBase.__init__(
            self, model_name=model_name, model_source=model_source, device=device
        )

        # Initialize rerank function
        RerankFunction.__init__(self, topn=topn, rerank_field=rerank_field)

        # Validate query
        if not query:
            raise ValueError("Query is required for DefaultLocalReRanker")
        self._query = query
        self._batch_size = batch_size

        # Load and validate cross-encoder model
        model = self._get_model()
        if not hasattr(model, "predict"):
            raise ValueError(
                f"Model '{model_name}' does not appear to be a cross-encoder model. "
                "Cross-encoder models should have a 'predict' method."
            )
        self._model = model

    def _get_model(self):
        """Load or retrieve the CrossEncoder model.

        This overrides the base class method to load CrossEncoder instead of
        SentenceTransformer, as reranking requires cross-encoder models.

        Returns:
            CrossEncoder: The loaded cross-encoder model instance.

        Raises:
            ImportError: If required packages are not installed.
            ValueError: If model cannot be loaded.
        """
        # Return cached model if exists
        if self._model is not None:
            return self._model

        # Load cross-encoder model
        try:
            sentence_transformers = require_module("sentence_transformers")

            if self._model_source == "modelscope":
                # Load from ModelScope
                require_module("modelscope")
                from modelscope.hub.snapshot_download import snapshot_download

                # Download model to cache
                model_dir = snapshot_download(self._model_name)

                # Load CrossEncoder from local path
                model = sentence_transformers.CrossEncoder(
                    model_dir, device=self._device
                )
            else:
                # Load CrossEncoder from Hugging Face (default)
                model = sentence_transformers.CrossEncoder(
                    self._model_name, device=self._device
                )

            return model

        except ImportError as e:
            if "modelscope" in str(e) and self._model_source == "modelscope":
                raise ImportError(
                    "ModelScope support requires the 'modelscope' package. "
                    "Please install it with: pip install modelscope"
                ) from e
            raise
        except Exception as e:
            raise ValueError(
                f"Failed to load CrossEncoder model '{self._model_name}' "
                f"from {self._model_source}: {e!s}"
            ) from e

    @property
    def query(self) -> str:
        """str: Query text used for semantic re-ranking."""
        return self._query

    @property
    def batch_size(self) -> int:
        """int: Batch size for processing query-document pairs."""
        return self._batch_size

    def rerank(self, query_results: dict[str, list[Doc]]) -> list[Doc]:
        """Re-rank documents using Sentence Transformer cross-encoder model.

        Evaluates each query-document pair using the cross-encoder model to compute
        relevance scores. Documents are then sorted by these scores and the top-k
        results are returned.

        Args:
            query_results (dict[str, list[Doc]]): Mapping from vector field names
                to lists of retrieved documents. Documents from all fields are
                deduplicated and re-ranked together.

        Returns:
            list[Doc]: Re-ranked documents (up to ``topn``) with updated ``score``
                fields containing relevance scores from the cross-encoder model.

        Raises:
            ValueError: If no valid documents are found or model inference fails.

        Note:
            - Duplicate documents (same ID) across fields are processed once
            - Documents with empty/missing ``rerank_field`` content are skipped
            - Returned scores are logits from the cross-encoder model
            - Higher scores indicate higher relevance
            - Processing time is O(n) where n is the number of documents

        Examples:
            >>> reranker = SentenceTransformerReRanker(
            ...     query="machine learning",
            ...     topn=3,
            ...     rerank_field="content"
            ... )
            >>> query_results = {
            ...     "vector1": [
            ...         Doc(id="1", score=0.9, fields={"content": "ML basics"}),
            ...         Doc(id="2", score=0.8, fields={"content": "DL tutorial"}),
            ...     ]
            ... }
            >>> reranked = reranker.rerank(query_results)
            >>> len(reranked) <= 3
            True
        """
        if not query_results:
            return []

        # Collect and deduplicate documents
        id_to_doc: dict[str, Doc] = {}
        doc_ids: list[str] = []
        contents: list[str] = []

        for _, query_result in query_results.items():
            for doc in query_result:
                doc_id = doc.id
                if doc_id in id_to_doc:
                    continue

                # Extract text content from specified field
                field_value = doc.field(self.rerank_field)
                rank_content = str(field_value).strip() if field_value else ""
                if not rank_content:
                    continue

                id_to_doc[doc_id] = doc
                doc_ids.append(doc_id)
                contents.append(rank_content)

        if not contents:
            raise ValueError("No documents to rerank")

        try:
            # Use standard cross-encoder predict method
            pairs = [[self.query, content] for content in contents]
            scores = self._model.predict(
                pairs,
                batch_size=self.batch_size,
                show_progress_bar=False,
                convert_to_numpy=True,
            )

            # Convert to float list if needed
            if hasattr(scores, "tolist"):
                scores = scores.tolist()
            else:
                scores = [float(s) for s in scores]

        except Exception as e:
            raise RuntimeError(f"Failed to compute rerank scores: {e!s}") from e

        # Create scored documents
        scored_docs = [
            (doc_ids[i], id_to_doc[doc_ids[i]], scores[i]) for i in range(len(doc_ids))
        ]

        # Sort by score (descending) and take top-k
        scored_docs.sort(key=lambda x: x[2], reverse=True)
        top_scored_docs = scored_docs[: self.topn]

        # Build result list with updated scores
        results: list[Doc] = []
        for _, doc, score in top_scored_docs:
            new_doc = doc._replace(score=score)
            results.append(new_doc)

        return results


================================================
FILE: python/zvec/model/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from .collection import Collection
from .doc import Doc
from .param.vector_query import VectorQuery
from .schema.collection_schema import CollectionSchema
from .schema.field_schema import FieldSchema

__all__ = ["Collection", "CollectionSchema", "Doc", "FieldSchema", "VectorQuery"]


================================================
FILE: python/zvec/model/collection.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Optional, Union, overload

from _zvec import _Collection

from ..executor import QueryContext, QueryExecutorFactory
from ..extension import ReRanker
from ..typing import Status
from .convert import convert_to_cpp_doc, convert_to_py_doc
from .doc import Doc
from .param import (
    AddColumnOption,
    AlterColumnOption,
    CollectionOption,
    FlatIndexParam,
    HnswIndexParam,
    IndexOption,
    InvertIndexParam,
    IVFIndexParam,
    OptimizeOption,
)
from .param.vector_query import VectorQuery
from .schema import CollectionSchema, CollectionStats, FieldSchema

__all__ = ["Collection"]

_VECTOR_INDEX_TYPES = (HnswIndexParam, IVFIndexParam, FlatIndexParam)


class Collection:
    """Represents an opened collection in Zvec.

    A `Collection` provides methods for data definition (DDL), data manipulation (DML),
    and querying (DQL). It is obtained via `create_and_open()` or `open()`.

    This class is not meant to be instantiated directly; use factory functions instead.
    """

    def __init__(self, obj: _Collection):
        self._obj = obj
        self._schema = None
        self._querier = None

    @classmethod
    def _from_core(cls, core_collection: _Collection) -> Collection:
        if not core_collection:
            raise ValueError("Collection is None")
        inst = cls.__new__(cls)
        inst._obj = core_collection
        schema = CollectionSchema._from_core(core_collection.Schema())
        inst._schema = schema
        inst._querier = QueryExecutorFactory.create(schema)
        return inst

    @property
    def path(self) -> str:
        """str: The filesystem path of the collection."""
        return self._obj.Path()

    @property
    def option(self) -> CollectionOption:
        """CollectionOption: The options used to open the collection."""
        return self._obj.Options()

    @property
    def schema(self) -> CollectionSchema:
        """CollectionSchema: The schema defining the structure of the collection."""
        return self._schema

    @property
    def stats(self) -> CollectionStats:
        """CollectionStats: Runtime statistics about the collection (e.g., doc count, size)."""
        return self._obj.Stats()

    # ========== Collection DDL Methods ==========
    def destroy(self) -> None:
        """Permanently delete the collection from disk.

        Warning:
            This operation is irreversible. All data will be lost.
        """
        self._obj.Destroy()

    def flush(self) -> None:
        """Force all pending writes to disk.

        Ensures durability of recent inserts/updates.
        """
        self._obj.Flush()

    # ========== Index DDL Methods ==========
    def create_index(
        self,
        field_name: str,
        index_param: Union[
            HnswIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam
        ],
        option: IndexOption = IndexOption(),
    ) -> None:
        """Create an index on a field.

        Vector index types (HNSW, IVF, FLAT) can only be applied to vector fields.
        Inverted index (`InvertIndexParam`) is for scalar fields.

        Args:
            field_name (str): Name of the field to index.
            index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam]):
                Index configuration.
            option (Optional[IndexOption], optional): Index creation options.
                Defaults to ``IndexOption()``.

        Raises:
            ValueError: If a vector index is applied to a non-vector field.
        """
        if index_param in _VECTOR_INDEX_TYPES and not self.schema.vector(field_name):
            supported_types = ", ".join(cls.__name__ for cls in _VECTOR_INDEX_TYPES)
            raise ValueError(
                f"Cannot apply vector index to non-vector field '{field_name}'. "
                f"The field must be of vector type to use index types like {supported_types}."
            )
        self._obj.CreateIndex(field_name, index_param, option)
        self._schema = CollectionSchema._from_core(self._obj.Schema())

    def drop_index(self, field_name: str) -> None:
        """Remove the index from a field.

        Args:
            field_name (str): Name of the indexed field.
        """
        self._obj.DropIndex(field_name)
        self._schema = CollectionSchema._from_core(self._obj.Schema())

    def optimize(self, option: OptimizeOption = OptimizeOption()) -> None:
        """Optimize the collection (e.g., merge segments, rebuild index).

        Args:
            option (Optional[OptimizeOption], optional): Optimization options.
                Defaults to ``OptimizeOption()``.
        """
        self._obj.Optimize(option)

    # ========== COLUMN DDL Methods ==========
    def add_column(
        self,
        field_schema: FieldSchema,
        expression: str = "",
        option: AddColumnOption = AddColumnOption(),
    ) -> None:
        """Add a new column to the collection.

        The column is populated using the provided expression (e.g., SQL-like formula).

        Args:
            field_schema (FieldSchema): Schema definition for the new column.
            expression (str): Expression to compute values for existing documents.
            option (Optional[AddColumnOption], optional): Options for the operation.
                Defaults to ``AddColumnOption()``.
        """
        self._obj.AddColumn(field_schema._get_object(), expression, option)
        self._schema = CollectionSchema._from_core(self._obj.Schema())

    def drop_column(self, field_name: str) -> None:
        """Remove a column from the collection.

        Args:
            field_name (str): Name of the column to drop.
        """
        self._obj.DropColumn(field_name)
        self._schema = CollectionSchema._from_core(self._obj.Schema())

    def alter_column(
        self,
        old_name: str,
        new_name: Optional[str] = None,
        field_schema: Optional[FieldSchema] = None,
        option: AlterColumnOption = AlterColumnOption(),
    ) -> None:
        """Rename a column, update its schema.

        This method supports three atomic operations:
          1. Rename only (when `field_schema` is None).
          2. Modify schema only (when `new_name` is None or empty string).

        Args:
            old_name (str): The current name of the column to be altered.
            new_name (Optional[str]): The new name for the column.
                - If provided and non-empty, the column will be renamed.
                - If `None` or empty string, no rename occurs.
            field_schema (Optional[FieldSchema]): The new schema definition.
                - If provided, the column's type, dimension, or other properties will be updated.
                - If `None`, only renaming (if requested) is performed.
            option (AlterColumnOption, optional): Options controlling the alteration behavior.
                Defaults to ``AlterColumnOption()``.

        **Limitation**: This operation **only supports scalar numeric columns**. such as:
        - `DOUBLE`, `FLOAT`,
        - `INT32`, `INT64`, `UINT32`, `UINT64`

        Note:
            - Schema modification may trigger data migration or index rebuild.

        Examples:
            >>> # Rename column only
            >>> results = collection.alter_column(old_name="id", new_name="doc_id")

            >>> # Modify schema only
            >>> new_schema = FieldSchema(name="doc_id", dtype=DataType.INT64)
            >>> collection.alter_column("id", field_schema=new_schema)
        """
        self._obj.AlterColumn(
            old_name,
            new_name or "",
            field_schema._get_object() if field_schema else None,
            option,
        )
        self._schema = CollectionSchema._from_core(self._obj.Schema())

    # ========== Collection DDL Methods ==========
    @overload
    def insert(self, docs: Doc) -> Status:
        pass

    @overload
    def insert(self, docs: list[Doc]) -> list[Status]:
        pass

    def insert(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:
        """Insert new documents into the collection.

        Documents must have unique IDs and conform to the schema.

        Args:
            docs (Union[Doc, list[Doc]]): One or more documents to insert.

        Returns:
            Union[Status, list[Status]]: If a single Doc was given, returns its Status;
            if a list was given, returns a list of Status objects.
        """
        is_single = isinstance(docs, Doc)
        doc_list = [docs] if is_single else docs
        results = self._obj.Insert(
            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]
        )
        return results[0] if is_single else results

    @overload
    def upsert(self, docs: Doc) -> Status:
        pass

    @overload
    def upsert(self, docs: list[Doc]) -> list[Status]:
        pass

    def upsert(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:
        """Insert new documents or update existing ones by ID.

        Args:
            docs (Union[Doc, list[Doc]]): Documents to upsert.

        Returns:
            Union[Status, list[Status]]: If a single Doc was given, returns its Status;
            if a list was given, returns a list of Status objects.
        """
        is_single = isinstance(docs, Doc)
        doc_list = [docs] if is_single else docs
        results = self._obj.Upsert(
            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]
        )
        return results[0] if is_single else results

    @overload
    def update(self, docs: Doc) -> Status:
        pass

    @overload
    def update(self, docs: list[Doc]) -> list[Status]:
        pass

    def update(self, docs: Union[Doc, list[Doc]]) -> Union[Status, list[Status]]:
        """Update existing documents by ID.

        Only specified fields are updated; others remain unchanged.

        Args:
            docs (Union[Doc, list[Doc]]): Documents containing updated fields.

        Returns:
            Union[Status, list[Status]]: If a single Doc was given, returns its Status;
            if a list was given, returns a list of Status objects.
        """
        is_single = isinstance(docs, Doc)
        doc_list = [docs] if is_single else docs
        results = self._obj.Update(
            [convert_to_cpp_doc(doc, self.schema) for doc in doc_list]
        )
        return results[0] if is_single else results

    @overload
    def delete(self, ids: str) -> Status:
        pass

    @overload
    def delete(self, ids: list[str]) -> list[Status]:
        pass

    def delete(self, ids: Union[str, list[str]]) -> Union[Status, list[Status]]:
        """Delete documents by ID.

        Args:
            ids (Union[str, list[str]]): One or more document IDs to delete.

        Returns:
            Union[Status, list[Status]]: If a single id was given, returns its Status;
            if a list was given, returns a list of Status objects.
        """
        is_single = isinstance(ids, str)
        id_list = [ids] if isinstance(ids, str) else ids
        results = self._obj.Delete(id_list)
        return results[0] if is_single else results

    def delete_by_filter(self, filter: str) -> None:
        """Delete documents matching a filter expression.

        Args:
            filter (str): Boolean expression (e.g., ``"age > 30"``).
        """
        self._obj.DeleteByFilter(filter)

    # ========== Collection DQL-fetch Methods ==========
    def fetch(self, ids: Union[str, list[str]]) -> dict[str, Doc]:
        """Retrieve documents by ID.

        Args:
            ids (Union[str, list[str]]): Document IDs to fetch.

        Returns:
            dict[str, Doc]: Mapping from ID to document. Missing IDs are omitted.
        """
        ids = [ids] if isinstance(ids, str) else ids
        docs = self._obj.Fetch(ids)
        return {
            doc_id: py_doc
            for doc_id, core_doc in docs.items()
            if (py_doc := convert_to_py_doc(core_doc, self.schema)) is not None
        }

    # ========== Collection DQL-Query Methods ==========

    def query(
        self,
        vectors: Optional[Union[VectorQuery, list[VectorQuery]]] = None,
        *,
        topk: int = 10,
        filter: Optional[str] = None,
        include_vector: bool = False,
        output_fields: Optional[list[str]] = None,
        reranker: Optional[ReRanker] = None,
    ) -> list[Doc]:
        """Perform vector similarity search with optional filtering and re-ranking.

        At least one `VectorQuery` must be provided.

        Args:
            vectors (Optional[Union[VectorQuery, list[VectorQuery]]], optional):
                One or more vector queries. Defaults to None.
            topk (int, optional): Number of nearest neighbors to return.
                Defaults to 10.
            filter (Optional[str], optional): Boolean expression to pre-filter candidates.
                Defaults to None.
            include_vector (bool, optional): Whether to include vector data in results.
                Defaults to False.
            output_fields (Optional[list[str]], optional): Scalar fields to include.
                If None, all fields are returned. Defaults to None.
            reranker (Optional[ReRanker], optional): Re-ranker to refine results.
                Defaults to None.

        Returns:
            list[Doc]: Top-k matching documents, sorted by relevance score.

        Examples:
            >>> from zvec import VectorQuery
            >>> results = collection.query(
            ...     vectors=VectorQuery("embedding", vector=[0.1, 0.2]),
            ...     topk=5,
            ...     filter="category == 'tech'",
            ...     output_fields=["title", "url"]
            ... )
        """
        ctx = QueryContext(
            topk=topk,
            filter=filter,
            queries=[vectors] if isinstance(vectors, VectorQuery) else vectors,
            include_vector=include_vector,
            output_fields=output_fields,
            reranker=reranker,
        )
        return self._querier.execute(ctx, self._obj)


================================================
FILE: python/zvec/model/convert.py
================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from _zvec import _Doc

from .doc import Doc
from .schema import CollectionSchema


def convert_to_cpp_doc(doc: Doc, collection_schema: CollectionSchema) -> _Doc:
    if not doc or not collection_schema:
        return None

    _doc = _Doc()

    # set pk
    _doc.set_pk(doc.id)

    # set scalar fields
    for k, v in doc.fields.items():
        field_schema = collection_schema.field(k)
        if not field_schema:
            raise ValueError(
                f"schema validate failed: {k} not found in collection schema"
            )
        _doc.set_any(k, field_schema._get_object(), v)

    # set vector fields
    for k, v in doc.vectors.items():
        vector_schema = collection_schema.vector(k)
        if not vector_schema:
            raise ValueError(
                f"schema validate failed: {k} not found in collection schema"
            )
        _doc.set_any(k, vector_schema._get_object(), v)
    return _doc


def convert_to_py_doc(doc: _Doc, collection_schema: CollectionSchema) -> Doc:
    if not doc or not collection_schema:
        return None

    data_tuple = doc.get_all(collection_schema._get_object())
    return Doc._from_tuple(data_tuple)


================================================
FILE: python/zvec/model/doc.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import json
from typing import Any, Optional

from ..common import VectorType

__all__ = [
    "Doc",
]


class Doc:
    """Represents a retrieved document with optional metadata, fields, and vectors.

    This immutable data class encapsulates the result of a search or retrieval
    operation. It includes the document ID, relevance score (if applicable),
    scalar fields, and vector embeddings.

    During initialization, any `numpy.ndarray` in `vectors` is automatically
    converted to a plain Python list for JSON serialization and immutability.

    Attributes:
        id (str): Unique identifier of the document.
        score (Optional[float], optional): Relevance score from search.
            Defaults to None.
        vectors (Optional[dict[str, VectorType]], optional): Named vector
            embeddings associated with the document. Values are converted to
            lists if originally `np.ndarray`. Defaults to None.
        fields (Optional[dict[str, Any]], optional): Scalar metadata fields
            (e.g., title, timestamp). Defaults to None.

    Examples:
        >>> import numpy as np
        >>> import zvec
        >>> doc = zvec.Doc(
        ...     id="doc1",
        ...     score=0.95,
        ...     vectors={"emb": np.array([0.1, 0.2, 0.3])},
        ...     fields={"title": "Hello World"}
        ... )
        >>> print(doc.vector("emb"))
        [0.1, 0.2, 0.3]
        >>> print(doc.has_field("title"))
        True
    """

    __slots__ = ("id", "score", "vectors", "fields")

    def __init__(
        self,
        id: str,
        score: Optional[float] = None,
        vectors: Optional[dict[str, VectorType]] = None,
        fields: Optional[dict[str, Any]] = None,
    ):
        self.id = id
        self.score = score
        self.vectors = vectors or {}
        self.fields = fields or {}

    def has_field(self, name: str) -> bool:
        """Check if the document contains a scalar field with the given name.

        Args:
            name (str): Name of the field to check.

        Returns:
            bool: True if the field exists, False otherwise.
        """
        return name in self.fields

    def has_vector(self, name: str) -> bool:
        """Check if the document contains a vector with the given name.

        Args:
            name (str): Name of the vector to check.

        Returns:
            bool: True if the vector exists, False otherwise.
        """
        return name in self.vectors

    def vector(self, name: str):
        """Get a vector by name.

        Args:
            name (str): Name of the vector.

        Returns:
            Any: The vector (as a list) if it exists, otherwise None.
        """
        return self.vectors and self.vectors.get(name)

    def field(self, name: str):
        """Get a scalar field by name.

        Args:
            name (str): Name of the field.

        Returns:
            Any: The field value if it exists, otherwise None.
        """
        return self.fields and self.fields.get(name)

    def vector_names(self) -> list[str]:
        """Get the list of all vector names in this document.

        Returns:
            list[str]: A list of vector field names. Empty if no vectors.
        """
        return [] if not self.vectors else list(self.vectors.keys())

    def field_names(self) -> list[str]:
        """Get the list of all scalar field names in this document.

        Returns:
            list[str]: A list of field names. Empty if no fields.
        """
        return [] if not self.fields else list(self.fields.keys())

    def __repr__(self) -> str:
        try:
            schema = {
                "id": self.id,
                "score": self.score,
                "fields": self.fields,
                "vectors": self.vectors,
            }
            return json.dumps(schema, indent=2, ensure_ascii=False)
        except Exception as e:
            return f"<Doc error during repr: {e}>"

    def _replace(self, **changes):
        new_tuple = (
            changes.get("id", self.id),
            changes.get("score", self.score),
            changes.get("fields", self.fields.copy() if self.fields else None),
            changes.get("vectors", self.vectors.copy() if self.vectors else None),
        )
        return type(self)._from_tuple(new_tuple)

    @classmethod
    def _from_tuple(
        cls, data_tuple: tuple[str, float, dict[str, Any], dict[str, VectorType]]
    ):
        obj = object.__new__(cls)
        obj.id = data_tuple[0]
        obj.score = data_tuple[1]
        obj.fields = data_tuple[2] or {}

        vectors = data_tuple[3]
        if vectors is not None:
            obj.vectors = {
                name: (vec.tolist() if hasattr(vec, "tolist") else vec)
                for name, vec in vectors.items()
            }
        else:
            obj.vectors = {}
        return obj


================================================
FILE: python/zvec/model/param/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from _zvec.param import (
    AddColumnOption,
    AlterColumnOption,
    CollectionOption,
    FlatIndexParam,
    HnswIndexParam,
    HnswQueryParam,
    HnswRabitqIndexParam,
    HnswRabitqQueryParam,
    IndexOption,
    InvertIndexParam,
    IVFIndexParam,
    IVFQueryParam,
    OptimizeOption,
)

__all__ = [
    "AddColumnOption",
    "AlterColumnOption",
    "CollectionOption",
    "FlatIndexParam",
    "HnswIndexParam",
    "HnswQueryParam",
    "HnswRabitqIndexParam",
    "HnswRabitqQueryParam",
    "IVFIndexParam",
    "IVFQueryParam",
    "IndexOption",
    "InvertIndexParam",
    "OptimizeOption",
]


================================================
FILE: python/zvec/model/param/__init__.pyi
================================================
"""
This module contains the params of Zvec
"""

from __future__ import annotations

import collections
import typing

import _zvec.typing

__all__: list[str] = [
    "AddColumnOption",
    "AlterColumnOption",
    "CollectionOption",
    "FlatIndexParam",
    "HnswIndexParam",
    "HnswQueryParam",
    "HnswRabitqIndexParam",
    "HnswRabitqQueryParam",
    "IVFIndexParam",
    "IVFQueryParam",
    "IndexOption",
    "IndexParam",
    "InvertIndexParam",
    "OptimizeOption",
    "QueryParam",
    "SegmentOption",
    "VectorIndexParam",
]

class AddColumnOption:
    """

    Options for adding a new column to a collection.

    Attributes:
        concurrency (int): Number of threads to use when backfilling data
            for the new column. If 0, auto-detect is used. Default is 0.

    Examples:
        >>> opt = AddColumnOption(concurrency=1)
        >>> print(opt.concurrency)
        1
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:
        """
        Constructs an AddColumnOption instance.

        Args:
            concurrency (int, optional): Number of threads for data backfill.
                0 means auto-detect. Defaults to 0.
        """

    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def concurrency(self) -> int:
        """
        int: Number of threads used when adding a column (0 = auto).
        """

class AlterColumnOption:
    """

    Options for altering an existing column (e.g., changing index settings).

    Attributes:
        concurrency (int): Number of threads to use during the alteration process.
            If 0, the system will choose an optimal value automatically.
            Default is 0.

    Examples:
        >>> opt = AlterColumnOption(concurrency=1)
        >>> print(opt.concurrency)
        1
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:
        """
        Constructs an AlterColumnOption instance.

        Args:
            concurrency (int, optional): Number of threads for column alteration.
                0 means auto-detect. Defaults to 0.
        """

    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def concurrency(self) -> int:
        """
        int: Number of threads used when altering a column (0 = auto).
        """

class CollectionOption:
    """

    Options for opening or creating a collection.

    Attributes:
        read_only (bool): Whether the collection is opened in read-only mode.
            Default is False.
        enable_mmap (bool): Whether to use memory-mapped I/O for data files.
            Default is True.

    Examples:
        >>> opt = CollectionOption(read_only=True, enable_mmap=False)
        >>> print(opt.read_only)
        True
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self, read_only: bool = False, enable_mmap: bool = True) -> None:
        """
        Constructs a CollectionOption instance.

        Args:
            read_only (bool, optional): Open collection in read-only mode.
                Defaults to False.
            enable_mmap (bool, optional): Enable memory-mapped I/O.
                Defaults to True.
        """

    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def enable_mmap(self) -> bool: ...
    @property
    def read_only(self) -> bool: ...

class FlatIndexParam(VectorIndexParam):
    """

    Parameters for configuring a flat (brute-force) index.

    A flat index performs exact nearest neighbor search by comparing the query
    vector against all vectors in the collection. It is simple, accurate, and
    suitable for small to medium datasets or as a baseline.

    Attributes:
        metric_type (MetricType): Distance metric used for similarity computation.
            Default is ``MetricType.IP`` (inner product).
        quantize_type (QuantizeType): Optional quantization type for vector
            compression (e.g., FP16, INT8). Use ``QuantizeType.UNDEFINED`` to
            disable quantization. Default is ``QuantizeType.UNDEFINED``.

    Examples:
        >>> from zvec.typing import MetricType, QuantizeType
        >>> params = FlatIndexParam(
        ...     metric_type=MetricType.L2,
        ...     quantize_type=QuantizeType.FP16
        ... )
        >>> print(params)
        {'metric_type': 'L2', 'quantize_type': 'FP16'}
    """

    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        metric_type: _zvec.typing.MetricType = ...,
        quantize_type: _zvec.typing.QuantizeType = ...,
    ) -> None:
        """
        Constructs a FlatIndexParam instance.

        Args:
            metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.
            quantize_type (QuantizeType, optional): Vector quantization type.
                Defaults to QuantizeType.UNDEFINED (no quantization).
        """

    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

class HnswIndexParam(VectorIndexParam):
    """

    Parameters for configuring an HNSW (Hierarchical Navigable Small World) index.

    HNSW is a graph-based approximate nearest neighbor search index. This class
    encapsulates its construction hyperparameters.

    Attributes:
        metric_type (MetricType): Distance metric used for similarity computation.
            Default is ``MetricType.IP`` (inner product).
        m (int): Number of bi-directional links created for every new element
            during construction. Higher values improve accuracy but increase
            memory usage and construction time. Default is 50.
        ef_construction (int): Size of the dynamic candidate list for nearest
            neighbors during index construction. Larger values yield better
            graph quality at the cost of slower build time. Default is 500.
        quantize_type (QuantizeType): Optional quantization type for vector
            compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
            disable quantization.

    Examples:
        >>> from zvec.typing import MetricType, QuantizeType
        >>> params = HnswIndexParam(
        ...     metric_type=MetricType.COSINE,
        ...     m=16,
        ...     ef_construction=200,
        ...     quantize_type=QuantizeType.INT8
        ... )
        >>> print(params)
        {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
    """

    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        metric_type: _zvec.typing.MetricType = ...,
        m: typing.SupportsInt = 50,
        ef_construction: typing.SupportsInt = 500,
        quantize_type: _zvec.typing.QuantizeType = ...,
    ) -> None: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

    @property
    def ef_construction(self) -> int:
        """
        int: Candidate list size during index construction.
        """

    @property
    def m(self) -> int:
        """
        int: Maximum number of neighbors per node in upper layers.
        """

class HnswQueryParam(QueryParam):
    """

    Query parameters for HNSW (Hierarchical Navigable Small World) index.

    Controls the trade-off between search speed and accuracy via the `ef` parameter.

    Attributes:
        type (IndexType): Always ``IndexType.HNSW``.
        ef (int): Size of the dynamic candidate list during search.
            Larger values improve recall but slow down search.
            Default is 300.
        radius (float): Search radius for range queries. Default is 0.0.
        is_linear (bool): Force linear search. Default is False.
        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.

    Examples:
        >>> params = HnswQueryParam(ef=300)
        >>> print(params.ef)
        300
        >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)
        {"type":"HNSW", "ef":300}
    """
    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        ef: typing.SupportsInt = 300,
        radius: typing.SupportsFloat = 0.0,
        is_linear: bool = False,
        is_using_refiner: bool = False,
    ) -> None:
        """
        Constructs an HnswQueryParam instance.

        Args:
            ef (int, optional): Search-time candidate list size.
                Higher values improve accuracy. Defaults to 300.
            radius (float, optional): Search radius for range queries. Default is 0.0.
            is_linear (bool, optional): Force linear search. Default is False.
            is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
        """
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def ef(self) -> int:
        """
        int: Size of the dynamic candidate list during HNSW search.
        """

class HnswRabitqIndexParam(VectorIndexParam):
    """

    Parameters for configuring an HNSW (Hierarchical Navigable Small World) index with RabitQ quantization.

    HNSW is a graph-based approximate nearest neighbor search index. RabitQ is a
    quantization method that provides high compression with minimal accuracy loss.

    Attributes:
        metric_type (MetricType): Distance metric used for similarity computation.
            Default is ``MetricType.IP`` (inner product).
        total_bits (int): Total bits for RabitQ quantization. Default is 7.
        num_clusters (int): Number of clusters for RabitQ. Default is 16.
        m (int): Number of bi-directional links created for every new element
            during construction. Higher values improve accuracy but increase
            memory usage and construction time. Default is 50.
        ef_construction (int): Size of the dynamic candidate list for nearest
            neighbors during index construction. Larger values yield better
            graph quality at the cost of slower build time. Default is 500.
        sample_count (int): Sample count for RabitQ training. Default is 0.

    Examples:
        >>> from zvec.typing import MetricType
        >>> params = HnswRabitqIndexParam(
        ...     metric_type=MetricType.COSINE,
        ...     total_bits=8,
        ...     num_clusters=256,
        ...     m=16,
        ...     ef_construction=200,
        ...     sample_count=10000
        ... )
        >>> print(params)
        {'metric_type': 'COSINE', 'total_bits': 8, 'num_clusters': 256, 'm': 16, 'ef_construction': 200, 'sample_count': 10000}
    """

    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        metric_type: _zvec.typing.MetricType = ...,
        total_bits: typing.SupportsInt = 7,
        num_clusters: typing.SupportsInt = 16,
        m: typing.SupportsInt = 50,
        ef_construction: typing.SupportsInt = 500,
        sample_count: typing.SupportsInt = 0,
    ) -> None: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

    @property
    def ef_construction(self) -> int:
        """
        int: Candidate list size during index construction.
        """

    @property
    def m(self) -> int:
        """
        int: Maximum number of neighbors per node.
        """

    @property
    def total_bits(self) -> int:
        """
        int: Total bits for RabitQ quantization.
        """

    @property
    def num_clusters(self) -> int:
        """
        int: Number of clusters for RabitQ.
        """

    @property
    def sample_count(self) -> int:
        """
        int: Sample count for RabitQ training.
        """

class HnswRabitqQueryParam(QueryParam):
    """

    Query parameters for HNSW index with RabitQ quantization.

    Controls the trade-off between search speed and accuracy via the `ef` parameter.

    Attributes:
        type (IndexType): Always ``IndexType.HNSW_RABITQ``.
        ef (int): Size of the dynamic candidate list during search.
            Larger values improve recall but slow down search.
            Default is 300.
        radius (float): Search radius for range queries. Default is 0.0.
        is_linear (bool): Force linear search. Default is False.
        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.

    Examples:
        >>> params = HnswRabitqQueryParam(ef=300)
        >>> print(params.ef)
        300
    """
    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        ef: typing.SupportsInt = 300,
        radius: typing.SupportsFloat = 0.0,
        is_linear: bool = False,
        is_using_refiner: bool = False,
    ) -> None:
        """
        Constructs an HnswRabitqQueryParam instance.

        Args:
            ef (int, optional): Search-time candidate list size.
                Higher values improve accuracy. Defaults to 300.
            radius (float, optional): Search radius for range queries. Default is 0.0.
            is_linear (bool, optional): Force linear search. Default is False.
            is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
        """
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def ef(self) -> int:
        """
        int: Size of the dynamic candidate list during HNSW search.
        """

class IVFIndexParam(VectorIndexParam):
    """

    Parameters for configuring an IVF (Inverted File Index) index.

    IVF partitions the vector space into clusters (inverted lists). At query time,
    only a subset of clusters is searched, providing a trade-off between speed
    and accuracy.

    Attributes:
        metric_type (MetricType): Distance metric used for similarity computation.
            Default is ``MetricType.IP`` (inner product).
        n_list (int): Number of clusters (inverted lists) to partition the dataset into.
            If set to 0, the system will auto-select a reasonable value based on data size.
            Default is 0 (auto).
        n_iters (int): Number of iterations for k-means clustering during index training.
            Higher values yield more stable centroids. Default is 10.
        use_soar (bool): Whether to enable SOAR (Scalable Optimized Adaptive Routing)
            for improved IVF search performance. Default is False.
        quantize_type (QuantizeType): Optional quantization type for vector
            compression (e.g., FP16, INT8). Default is ``QuantizeType.UNDEFINED``.

    Examples:
        >>> from zvec.typing import MetricType, QuantizeType
        >>> params = IVFIndexParam(
        ...     metric_type=MetricType.COSINE,
        ...     n_list=100,
        ...     n_iters=15,
        ...     use_soar=True,
        ...     quantize_type=QuantizeType.INT8
        ... )
        >>> print(params.n_list)
        100
    """

    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        metric_type: _zvec.typing.MetricType = ...,
        n_list: typing.SupportsInt = 0,
        n_iters: typing.SupportsInt = 10,
        use_soar: bool = False,
        quantize_type: _zvec.typing.QuantizeType = ...,
    ) -> None:
        """
        Constructs an IVFIndexParam instance.

        Args:
            metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.
            n_list (int, optional): Number of inverted lists (clusters). Set to 0 for auto.
                Defaults to 0.
            n_iters (int, optional): Number of k-means iterations during training.
                Defaults to 10.
            use_soar (bool, optional): Enable SOAR optimization. Defaults to False.
            quantize_type (QuantizeType, optional): Vector quantization type.
                Defaults to QuantizeType.UNDEFINED.
        """

    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

    @property
    def n_iters(self) -> int:
        """
        int: Number of k-means iterations during training.
        """

    @property
    def n_list(self) -> int:
        """
        int: Number of inverted lists (0 = auto).
        """

    @property
    def use_soar(self) -> bool:
        """
        bool: Whether SOAR optimization is enabled.
        """

class IVFQueryParam(QueryParam):
    """

    Query parameters for IVF (Inverted File Index) index.

    Controls how many inverted lists (`nprobe`) to visit during search.

    Attributes:
        type (IndexType): Always ``IndexType.IVF``.
        nprobe (int): Number of closest clusters (inverted lists) to search.
            Higher values improve recall but increase latency.
            Default is 10.
        radius (float): Search radius for range queries. Default is 0.0.
        is_linear (bool): Force linear search. Default is False.

    Examples:
        >>> params = IVFQueryParam(nprobe=20)
        >>> print(params.nprobe)
        20
    """
    def __getstate__(self) -> tuple: ...
    def __init__(self, nprobe: typing.SupportsInt = 10) -> None:
        """
        Constructs an IVFQueryParam instance.

        Args:
            nprobe (int, optional): Number of inverted lists to probe during search.
                Higher values improve accuracy. Defaults to 10.
        """
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def nprobe(self) -> int:
        """
        int: Number of inverted lists to search during IVF query.
        """

class IndexOption:
    """

    Options for creating an index.

    Attributes:
        concurrency (int): Number of threads to use during index creation.
            If 0, the system will choose an optimal value automatically.
            Default is 0.

    Examples:
        >>> opt = IndexOption(concurrency=4)
        >>> print(opt.concurrency)
        4
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:
        """
        Constructs an IndexOption instance.

        Args:
            concurrency (int, optional): Number of concurrent threads.
                0 means auto-detect. Defaults to 0.
        """

    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def concurrency(self) -> int:
        """
        int: Number of threads used for index creation (0 = auto).
        """

class IndexParam:
    """

    Base class for all index parameter configurations.

    This abstract base class defines the common interface for index types.
    It should not be instantiated directly; use derived classes instead.

    Attributes:
        type (IndexType): The type of the index (e.g., HNSW, FLAT, INVERT).
    """

    __hash__: typing.ClassVar[None] = None

    def __eq__(self, arg0: typing.Any) -> bool: ...
    def __getstate__(self) -> tuple: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def clone(self) -> IndexParam: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

    @property
    def type(self) -> _zvec.typing.IndexType:
        """
        IndexType: The type of the index.
        """

class InvertIndexParam(IndexParam):
    """

    Parameters for configuring an invert index.

    This class controls whether range query
    optimization is enabled for invert index structures.

    Attributes:
        type (IndexType): Always `IndexType.INVERTED`.
        enable_range_optimization (bool): Whether range optimization is enabled.
        enable_extended_wildcard (bool): Whether extended wildcard (suffix and infix) search is enabled.

    Examples:
        >>> params = InvertIndexParam(enable_range_optimization=True, enable_extended_wildcard=False)
        >>> print(params.enable_range_optimization)
        True
        >>> print(params.enable_extended_wildcard)
        False
        >>> config = params.to_dict()
        >>> print(config)
        {'enable_range_optimization': True, 'enable_extended_wildcard': False}
    """
    def __getstate__(self) -> tuple: ...
    def __init__(
        self,
        enable_range_optimization: bool = False,
        enable_extended_wildcard: bool = False,
    ) -> None:
        """
        Constructs an InvertIndexParam instance.

        Args:
            enable_range_optimization (bool, optional): If True, enables range query
                optimization for the invert index. Defaults to False.
            enable_extended_wildcard (bool, optional): If True, enables extended wildcard
                search including suffix and infix patterns. Defaults to False.
        """
    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """
    @property
    def enable_extended_wildcard(self) -> bool:
        """
        bool: Whether extended wildcard (suffix and infix) search is enabled.
        Note: Prefix search is always enabled regardless of this setting.
        """
    @property
    def enable_range_optimization(self) -> bool:
        """
        bool: Whether range optimization is enabled for this inverted index.
        """

class OptimizeOption:
    """

    Options for optimizing a collection (e.g., merging segments).

    Attributes:
        concurrency (int): Number of threads to use during optimization.
            If 0, the system will choose an optimal value automatically.
            Default is 0.

    Examples:
        >>> opt = OptimizeOption(concurrency=2)
        >>> print(opt.concurrency)
        2
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self, concurrency: typing.SupportsInt = 0) -> None:
        """
        Constructs an OptimizeOption instance.

        Args:
            concurrency (int, optional): Number of concurrent threads.
                0 means auto-detect. Defaults to 0.
        """

    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def concurrency(self) -> int:
        """
        int: Number of threads used for optimization (0 = auto).
        """

class QueryParam:
    """

    Base class for all query parameter configurations.

    This abstract base class defines common query settings such as search radius
    and whether to force linear (brute-force) search. It should not be instantiated
    directly; use derived classes like `HnswQueryParam` or `IVFQueryParam`.

    Attributes:
        type (IndexType): The index type this query is configured for.
        radius (float): Search radius for range queries. Used in combination with
            top-k to filter results. Default is 0.0 (disabled).
        is_linear (bool): If True, forces brute-force linear search instead of
            using the index. Useful for debugging or small datasets. Default is False.
        is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
    """
    def __getstate__(self) -> tuple: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def is_linear(self) -> bool:
        """
        bool: Whether to bypass the index and use brute-force linear search.
        """
    @property
    def is_using_refiner(self) -> bool:
        """
        bool: Whether to use refiner for the query.
        """
    @property
    def radius(self) -> float:
        """
        IndexType: The type of index this query targets.
        """
    @property
    def type(self) -> _zvec.typing.IndexType:
        """
        IndexType: The type of index this query targets.
        """

class SegmentOption:
    """

    Options for segment-level operations.

    Currently, this class mirrors CollectionOption and is used internally.
    It supports read-only mode, memory mapping, and buffer configuration.

    Note:
        This class is primarily for internal use. Most users should use
        CollectionOption instead.

    Examples:
        >>> opt = SegmentOption()
        >>> print(opt.enable_mmap)
        True
    """

    def __getstate__(self) -> tuple: ...
    def __init__(self) -> None:
        """
        Constructs a SegmentOption with default settings.
        """

    def __repr__(self) -> str: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    @property
    def enable_mmap(self) -> bool:
        """
        bool: Whether memory-mapped I/O is enabled.
        """

    @property
    def max_buffer_size(self) -> int:
        """
        int: Maximum buffer size in bytes (internal use).
        """

    @property
    def read_only(self) -> bool:
        """
        bool: Whether the segment is read-only.
        """

class VectorIndexParam(IndexParam):
    """

    Base class for vector index parameter configurations.

    Encapsulates common settings for all vector index types.

    Attributes:
        type (IndexType): The specific vector index type (e.g., HNSW, FLAT).
        metric_type (MetricType): Distance metric used for similarity search.
        quantize_type (QuantizeType): Optional vector quantization type.
    """

    def __getstate__(self) -> tuple: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def to_dict(self) -> dict:
        """
        Convert to dictionary with all fields
        """

    @property
    def metric_type(self) -> _zvec.typing.MetricType:
        """
        MetricType: Distance metric (e.g., IP, COSINE, L2).
        """

    @property
    def quantize_type(self) -> _zvec.typing.QuantizeType:
        """
        QuantizeType: Vector quantization type (e.g., FP16, INT8).
        """

class _VectorQuery:
    field_name: str
    filter: str
    include_vector: bool
    query_params: QueryParam

    def __getstate__(self) -> tuple: ...
    def __init__(self) -> None: ...
    def __setstate__(self, arg0: tuple) -> None: ...
    def set_vector(self, arg0: ..., arg1: typing.Any) -> None: ...
    @property
    def output_fields(self) -> list[str] | None: ...
    @output_fields.setter
    def output_fields(self, arg0: collections.abc.Sequence[str] | None) -> None: ...
    @property
    def topk(self) -> int: ...
    @topk.setter
    def topk(self, arg0: typing.SupportsInt) -> None: ...


================================================
FILE: python/zvec/model/param/vector_query.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from dataclasses import dataclass
from typing import Optional, Union

from ...common import VectorType
from . import HnswQueryParam, IVFQueryParam

__all__ = ["VectorQuery"]


@dataclass(frozen=True)
class VectorQuery:
    """Represents a vector search query for a specific field in a collection.

    A `VectorQuery` can be constructed using either a document ID (to look up
    its vector) or an explicit vector. It may optionally include index-specific
    query parameters to control search behavior (e.g., `ef` for HNSW, `nprobe` for IVF).

    Exactly one of `id` or `vector` should be provided. If both are given,
    behavior is implementation-defined (typically `id` takes precedence).

    Attributes:
        field_name (str): Name of the vector field to query.
        id (Optional[str], optional): Document ID to fetch vector from. Default is None.
        vector (VectorType, optional): Explicit query vector. Default is None.
        param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional):
            Index-specific query parameters. Default is None.

    Examples:
        >>> import zvec
        >>> # Query by ID
        >>> q1 = zvec.VectorQuery(field_name="embedding", id="doc123")
        >>> # Query by vector
        >>> q2 = zvec.VectorQuery(
        ...     field_name="embedding",
        ...     vector=[0.1, 0.2, 0.3],
        ...     param=HnswQueryParam(ef=300)
        ... )
    """

    field_name: str
    id: Optional[str] = None
    vector: VectorType = None
    param: Optional[Union[HnswQueryParam, IVFQueryParam]] = None

    def has_id(self) -> bool:
        """Check if the query is based on a document ID.

        Returns:
            bool: True if `id` is set, False otherwise.
        """
        return self.id is not None

    def has_vector(self) -> bool:
        """Check if the query contains an explicit vector.

        Returns:
            bool: True if `vector` is non-empty, False otherwise.
        """
        return self.vector is not None and len(self.vector) > 0

    def _validate(self) -> None:
        if self.field_name is None:
            raise ValueError("Field name cannot be empty")
        if self.id and self.vector:
            raise ValueError("Cannot provide both id and vector")


================================================
FILE: python/zvec/model/schema/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from _zvec.schema import CollectionStats

from .collection_schema import CollectionSchema
from .field_schema import FieldSchema, VectorSchema

__all__ = ["CollectionSchema", "CollectionStats", "FieldSchema", "VectorSchema"]


================================================
FILE: python/zvec/model/schema/__init__.pyi
================================================
"""
This module contains the schema of Zvec
"""

from __future__ import annotations

import collections.abc
import typing

import _zvec.param
import _zvec.typing

from .collection_schema import CollectionSchema
from .field_schema import FieldSchema, VectorSchema

__all__: list[str] = [
    "CollectionSchema",
    "CollectionStats",
    "FieldSchema",
    "VectorSchema",
]

class CollectionStats:
    def __init__(self) -> None: ...
    def __repr__(self) -> str: ...
    @property
    def doc_count(self) -> int: ...
    @property
    def index_completeness(self) -> dict[str, float]: ...

class _CollectionSchema:
    __hash__: typing.ClassVar[None] = None

    def __eq__(self, arg0: _CollectionSchema) -> bool: ...
    def __init__(
        self, name: str, fields: collections.abc.Sequence[_FieldSchema]
    ) -> None:
        """
        Construct with name and list of fields
        """

    def __ne__(self, arg0: _CollectionSchema) -> bool: ...
    def fields(self) -> list[_FieldSchema]:
        """
        Return list of all field schemas.
        """

    def forward_fields(self) -> list[_FieldSchema]:
        """
        Return list of forward-indexed fields.
        """

    def get_field(self, field_name: str) -> _FieldSchema:
        """
        Get field by name (const pointer), returns None if not found.
        """

    def get_forward_field(self, field_name: str) -> _FieldSchema:
        """
        Get forward field (used for filtering).
        """

    def get_vector_field(self, field_name: str) -> _FieldSchema:
        """
        Get vector field by name.
        """

    def has_field(self, field_name: str) -> bool:
        """
        Check if a field exists.
        """

    def vector_fields(self) -> list[_FieldSchema]:
        """
        Return list of vector fields.
        """

    @property
    def name(self) -> str: ...

class _FieldSchema:
    __hash__: typing.ClassVar[None] = None

    def __eq__(self, arg0: _FieldSchema) -> bool: ...
    def __init__(
        self,
        name: str,
        data_type: _zvec.typing.DataType,
        nullable: bool = False,
        dimension: typing.SupportsInt = 0,
        index_param: _zvec.param.IndexParam = None,
    ) -> None: ...
    def __ne__(self, arg0: _FieldSchema) -> bool: ...
    @property
    def data_type(self) -> _zvec.typing.DataType: ...
    @property
    def dimension(self) -> int: ...
    @property
    def index_param(self) -> typing.Any: ...
    @property
    def index_type(self) -> _zvec.typing.IndexType: ...
    @property
    def is_dense_vector(self) -> bool: ...
    @property
    def is_sparse_vector(self) -> bool: ...
    @property
    def name(self) -> str: ...
    @property
    def nullable(self) -> bool: ...


================================================
FILE: python/zvec/model/schema/collection_schema.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import json
from typing import Optional, Union

from _zvec.schema import _CollectionSchema, _FieldSchema

from .field_schema import FieldSchema, VectorSchema

__all__ = [
    "CollectionSchema",
]


class CollectionSchema:
    """Defines the structure of a collection in Zvec.

    A collection schema specifies the name of the collection and its fields,
    including both scalar fields (e.g., int, string) and vector fields.
    Field names must be unique across both scalar and vector fields.

    Args:
        name (str): Name of the collection.
        fields (Optional[Union[FieldSchema, list[FieldSchema]]], optional):
            One or more scalar field definitions. Defaults to None.
        vectors (Optional[Union[VectorSchema, list[VectorSchema]]], optional):
            One or more vector field definitions. Defaults to None.

    Raises:
        TypeError: If `fields` or `vectors` are of unsupported types.
        ValueError: If any field or vector name is duplicated.

    Examples:
        >>> from zvec import FieldSchema, VectorSchema, DataType, IndexType
        >>> id_field = FieldSchema("id", DataType.INT64, is_primary=True)
        >>> emb_field = VectorSchema("embedding", dim=128, data_type=DataType.VECTOR_FP32)
        >>> schema = CollectionSchema(
        ...     name="my_collection",
        ...     fields=id_field,
        ...     vectors=emb_field
        ... )
        >>> print(schema.name)
        my_collection
    """

    def __init__(
        self,
        name: str,
        fields: Optional[Union[FieldSchema, list[FieldSchema]]] = None,
        vectors: Optional[Union[VectorSchema, list[VectorSchema]]] = None,
    ):
        if name is None or not isinstance(name, str):
            raise ValueError(
                f"schema validate failed: collection name must be str, got {type(name).__name__}"
            )

        # handle fields
        _fields_name: list[str] = []
        _fields_list: list[_FieldSchema] = []

        self._check_fields(fields, _fields_name, _fields_list)
        self._check_vectors(vectors, _fields_name, _fields_list)

        # init
        self._cpp_obj = _CollectionSchema(
            name=name,
            fields=_fields_list,
        )

    def _check_fields(
        self,
        fields: Optional[Union[FieldSchema, list[FieldSchema]]],
        _fields_name: list[str],
        _fields_list: list[_FieldSchema],
    ) -> None:
        field_items = []

        if isinstance(fields, FieldSchema):
            field_items = [fields]
        elif isinstance(fields, list):
            field_items = fields
        elif fields is None:
            field_items = []
        else:
            raise TypeError(
                f"schema validate failed: invalid 'fields' type, expected FieldSchema or list[FieldSchema], "
                f"got {type(fields).__name__}"
            )

        for idx, field in enumerate(field_items):
            if not isinstance(field, FieldSchema):
                raise TypeError(
                    f"schema validate failed: invalid field type in 'fields' list, expected FieldSchema, "
                    f"got {type(field).__name__} at index {idx}"
                )

            if field.name in _fields_name:
                raise ValueError(
                    f"schema validate failed: duplicate field name '{field.name}': field names must be unique"
                )
            _fields_name.append(field.name)
            _fields_list.append(field._get_object())

    def _check_vectors(
        self,
        vectors: Optional[Union[VectorSchema, list[VectorSchema]]],
        _fields_name: list[str],
        _fields_list: list[_FieldSchema],
    ) -> None:
        # handle vector
        if isinstance(vectors, VectorSchema):
            vectors_items = [vectors]
        elif isinstance(vectors, list):
            vectors_items = vectors
        elif vectors is None:
            vectors_items = []
        else:
            raise TypeError(
                f"schema validate failed: invalid 'vectors' type, expected VectorSchema or list[VectorSchema], "
                f"got {type(vectors).__name__}"
            )

        for idx, vector in enumerate(vectors_items):
            if not isinstance(vector, VectorSchema):
                raise TypeError(
                    f"schema validate failed: invalid vector type in 'vectors' list, expected VectorSchema, "
                    f"got {type(vector).__name__} at index {idx}"
                )

            if vector.name in _fields_name:
                raise ValueError(
                    f"schema validate failed: duplicate vector name '{vector.name}', vector names must be unique "
                    f"(conflicts with existing field or vector)"
                )
            _fields_name.append(vector.name)
            _fields_list.append(vector._get_object())

    @classmethod
    def _from_core(cls, core_collection_schema: _CollectionSchema):
        inst = cls.__new__(cls)
        if not core_collection_schema:
            raise ValueError("schema validate failed: schema is null")
        inst._cpp_obj = core_collection_schema
        return inst

    @property
    def name(self) -> str:
        """str: The name of the collection."""
        return self._cpp_obj.name

    def field(self, name: str) -> Optional[FieldSchema]:
        """Retrieve a scalar field by name.

        Args:
            name (str): Name of the field.

        Returns:
            Optional[FieldSchema]: The field if found, otherwise None.
        """
        _field = self._cpp_obj.get_forward_field(name)
        return FieldSchema._from_core(_field) if _field else None

    def vector(self, name: str) -> Optional[VectorSchema]:
        """Retrieve a vector field by name.

        Args:
            name (str): Name of the vector field.

        Returns:
            Optional[VectorSchema]: The vector field if found, otherwise None.
        """
        _field = self._cpp_obj.get_vector_field(name)
        return VectorSchema._from_core(_field) if _field else None

    @property
    def fields(self) -> list[FieldSchema]:
        """list[FieldSchema]: All scalar (non-vector) fields in the schema."""
        _fields = self._cpp_obj.forward_fields()
        return [FieldSchema._from_core(_field) for _field in _fields]

    @property
    def vectors(self) -> list[VectorSchema]:
        """list[VectorSchema]: All vector fields in the schema."""
        _vectors = self._cpp_obj.vector_fields()
        return [VectorSchema._from_core(_vector) for _vector in _vectors]

    def _get_object(self) -> _CollectionSchema:
        return self._cpp_obj

    def __repr__(self) -> str:
        try:
            schema = {
                "name": self.name,
                "fields": {field.name: field.__dict__() for field in self.fields},
                "vectors": {vector.name: vector.__dict__() for vector in self.vectors},
            }
            return json.dumps(schema, indent=2, ensure_ascii=False)
        except Exception as e:
            return f"<CollectionSchema error during repr: {e}>"

    def __str__(self) -> str:
        return self.__repr__()


================================================
FILE: python/zvec/model/schema/field_schema.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import json
from typing import Any, Optional, Union

from _zvec.schema import _FieldSchema

from zvec.model.param import (
    FlatIndexParam,
    HnswIndexParam,
    InvertIndexParam,
    IVFIndexParam,
)
from zvec.typing import DataType

__all__ = [
    "FieldSchema",
    "VectorSchema",
]

SUPPORT_VECTOR_DATA_TYPE = [
    DataType.VECTOR_FP16,
    DataType.VECTOR_FP32,
    DataType.VECTOR_FP64,
    DataType.VECTOR_INT8,
    DataType.SPARSE_VECTOR_FP16,
    DataType.SPARSE_VECTOR_FP32,
]

SUPPORT_SCALAR_DATA_TYPE = [
    DataType.INT32,
    DataType.INT64,
    DataType.UINT32,
    DataType.UINT64,
    DataType.FLOAT,
    DataType.DOUBLE,
    DataType.STRING,
    DataType.BOOL,
    DataType.ARRAY_INT32,
    DataType.ARRAY_INT64,
    DataType.ARRAY_UINT32,
    DataType.ARRAY_UINT64,
    DataType.ARRAY_FLOAT,
    DataType.ARRAY_DOUBLE,
    DataType.ARRAY_STRING,
    DataType.ARRAY_BOOL,
]


class FieldSchema:
    """Represents a scalar (non-vector) field in a collection schema.

    A `FieldSchema` defines the name, data type, nullability, and optional
    inverted index configuration for a regular field (e.g., ID, timestamp, category).

    Args:
        name (str): Name of the field. Must be unique within the collection.
        data_type (DataType): Data type of the field (e.g., INT64, STRING).
        nullable (bool, optional): Whether the field can contain null values.
            Defaults to False.
        index_param (Optional[InvertIndexParam], optional): Inverted index
            parameters for this field. Only applicable to fields that support
            indexing (e.g., scalar fields used in filtering). Defaults to None.

    Examples:
        >>> from zvec.typing import DataType
        >>> from zvec.model.param import InvertIndexParam
        >>> id_field = FieldSchema(
        ...     name="id",
        ...     data_type=DataType.INT64,
        ...     nullable=False,
        ...     index_param=InvertIndexParam(enable_range_optimization=True)
        ... )
    """

    def __init__(
        self,
        name: str,
        data_type: DataType,
        nullable: bool = False,
        index_param: Optional[InvertIndexParam] = None,
    ):
        if name is None or not isinstance(name, str):
            raise ValueError(
                f"schema validate failed: field name must be str, got {type(name).__name__}"
            )

        if data_type not in SUPPORT_SCALAR_DATA_TYPE:
            raise ValueError(
                f"schema validate failed: scalar_field's data_type must be one of "
                f"{', '.join(str(dt) for dt in SUPPORT_SCALAR_DATA_TYPE)}, "
                f"but field[{name}]'s data_type is {data_type}"
            )

        self._cpp_obj = _FieldSchema(
            name=name,
            data_type=data_type,
            dimension=0,
            nullable=nullable,
            index_param=index_param,
        )

    @classmethod
    def _from_core(cls, core_field_schema: _FieldSchema):
        if core_field_schema is None:
            raise ValueError("schema validate failed: field schema is None")
        inst = cls.__new__(cls)
        inst._cpp_obj = core_field_schema
        return inst

    def _get_object(self) -> _FieldSchema:
        return self._cpp_obj

    @property
    def name(self) -> str:
        """str: The name of the field."""
        return self._cpp_obj.name

    @property
    def data_type(self) -> DataType:
        """DataType: The data type of the field (e.g., INT64, STRING)."""
        return self._cpp_obj.data_type

    @property
    def nullable(self) -> bool:
        """bool: Whether the field allows null values."""
        return self._cpp_obj.nullable

    @property
    def index_param(self) -> Optional[InvertIndexParam]:
        """Optional[InvertIndexParam]: Inverted index configuration, if any."""
        return self._cpp_obj.index_param

    def __dict__(self) -> dict[str, Any]:
        return {
            "name": self.name,
            "data_type": (
                self.data_type.name
                if hasattr(self.data_type, "name")
                else str(self.data_type)
            ),
            "nullable": self.nullable,
            "index_param": (
                self.index_param.to_dict() if self.index_param is not None else None
            ),
        }

    def __repr__(self) -> str:
        try:
            schema = self.__dict__()
            return json.dumps(schema, indent=2, ensure_ascii=False)
        except Exception as e:
            return f"<FieldSchema error during repr: {e}>"

    def __str__(self) -> str:
        return self.__repr__()

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, FieldSchema):
            return False
        return self._cpp_obj == other._cpp_obj

    def __hash__(self) -> int:
        return hash((self.name, self.data_type, self.nullable))


class VectorSchema:
    """Represents a vector field in a collection schema.

    A `VectorSchema` defines the name, data type, dimensionality, and index
    configuration for a vector field used in similarity search.

    Args:
        name (str): Name of the vector field. Must be unique within the collection.
        data_type (DataType): Vector data type (e.g., VECTOR_FP32, VECTOR_INT8).
        dimension (int, optional): Dimensionality of the vector. Must be > 0 for dense vectors;
         may be `None` for sparse vectors.
        index_param (Union[HnswIndexParam, IVFIndexParam, FlatIndexParam], optional):
            Index configuration for this vector field. Defaults to
            ``HnswIndexParam()``.

    Examples:
        >>> from zvec.typing import DataType
        >>> from zvec.model.param import HnswIndexParam
        >>> emb_field = VectorSchema(
        ...     name="embedding",
        ...     data_type=DataType.VECTOR_FP32,
        ...     dimension=128,
        ...     index_param=HnswIndexParam(ef_construction=200, m=16)
        ... )
    """

    def __init__(
        self,
        name: str,
        data_type: DataType,
        dimension: Optional[int] = 0,
        index_param: Optional[
            Union[HnswIndexParam, FlatIndexParam, IVFIndexParam]
        ] = None,
    ):
        if name is None or not isinstance(name, str):
            raise ValueError(
                f"schema validate failed: field name must be str, got {type(name).__name__}"
            )

        if not isinstance(dimension, int) or dimension < 0:
            raise ValueError("schema validate failed: vector's dimension must be >= 0")

        if data_type not in SUPPORT_VECTOR_DATA_TYPE:
            raise ValueError(
                f"schema validate failed: vector's data_type must be one of "
                f"{', '.join(str(dt) for dt in SUPPORT_VECTOR_DATA_TYPE)}, "
                f"but field[{name}]'s data_type is {data_type}"
            )

        if index_param is None:
            index_param = FlatIndexParam()

        self._cpp_obj = _FieldSchema(
            name=name,
            data_type=data_type,
            dimension=dimension,
            nullable=False,
            index_param=index_param,
        )

    @classmethod
    def _from_core(cls, core_field_schema: _FieldSchema):
        inst = cls.__new__(cls)
        inst._cpp_obj = core_field_schema
        return inst

    def _get_object(self) -> _FieldSchema:
        return self._cpp_obj

    @property
    def name(self) -> str:
        """str: The name of the vector field."""
        return self._cpp_obj.name

    @property
    def data_type(self) -> DataType:
        """DataType: The vector data type (e.g., VECTOR_FP32)."""
        return self._cpp_obj.data_type

    @property
    def dimension(self) -> int:
        """int: The dimensionality of the vector."""
        return self._cpp_obj.dimension

    @property
    def index_param(self) -> Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]:
        """Union[HnswIndexParam, IVFIndexParam, FlatIndexParam]: Index configuration for the vector."""
        return self._cpp_obj.index_param

    def __dict__(self) -> dict[str, Any]:
        return {
            "name": self.name,
            "data_type": (
                self.data_type.name
                if hasattr(self.data_type, "name")
                else str(self.data_type)
            ),
            "dimension": self.dimension,
            "index_param": (
                self.index_param.to_dict() if self.index_param is not None else None
            ),
        }

    def __repr__(self) -> str:
        try:
            schema = self.__dict__()
            return json.dumps(schema, indent=2, ensure_ascii=False)
        except Exception as e:
            return f"<FieldSchema error during repr: {e}>"

    def __str__(self) -> str:
        return self.__repr__()

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, VectorSchema):
            return False
        return self._cpp_obj == other._cpp_obj

    def __hash__(self) -> int:
        return hash((self.name, self.data_type, self.dimension))


================================================
FILE: python/zvec/py.typed
================================================


================================================
FILE: python/zvec/tool/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from .util import require_module

__all__ = ["require_module"]


================================================
FILE: python/zvec/tool/util.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import importlib
from typing import Any, Optional


def require_module(module: str, mitigation: Optional[str] = None) -> Any:
    """Import a Python module and raise a user-friendly error if it is not available.

    This utility helps provide actionable error messages when optional dependencies
    are missing. It attempts to import the given module and, on failure, suggests
    a `pip install` command based on either the module name or an optional
    mitigation package name.

    Args:
        module (str): The full module name to import (e.g., ``"numpy"``, ``"pandas.io.parquet"``).
        mitigation (Optional[str], optional): The package name to suggest for installation
            if the import fails. If not provided, the top-level package of `module`
            will be used (e.g., ``"pandas"`` for ``"pandas.io.parquet"``).

    Returns:
        Any: The imported module object.

    Raises:
        ImportError: If the module cannot be imported, with a clear installation hint.

    Examples:
        >>> import zvec
        >>> np = zvec.require_module("numpy")
        >>> pq = zvec.require_module("pyarrow.parquet", mitigation="pyarrow")

    Note:
        This function is intended for lazy-loading optional dependencies
        with helpful error messages, not for core dependencies.
    """
    try:
        return importlib.import_module(module)
    except ImportError as e:
        package = mitigation or module
        msg = f"Required package '{package}' is not installed. "
        if "." in module:
            top_level = module.split(".", maxsplit=1)[0]
            msg += f"Module '{module}' is part of '{top_level}', "
            if mitigation:
                msg += f"please pip install '{mitigation}'."
            else:
                msg += f"please pip install '{top_level}'."
        else:
            msg += f"Please pip install '{package}'."
        raise ImportError(msg) from e


================================================
FILE: python/zvec/typing/__init__.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from _zvec.typing import (
    DataType,
    IndexType,
    MetricType,
    QuantizeType,
    Status,
    StatusCode,
)

__all__ = [
    "DataType",
    "IndexType",
    "MetricType",
    "QuantizeType",
    "Status",
    "StatusCode",
]


================================================
FILE: python/zvec/typing/__init__.pyi
================================================
"""
This module contains the basic data types of Zvec
"""

from __future__ import annotations

import typing

__all__: list[str] = [
    "DataType",
    "IndexType",
    "MetricType",
    "QuantizeType",
    "Status",
    "StatusCode",
]

class DataType:
    """

    Enumeration of supported data types in Zvec.

    Includes scalar types, dense/sparse vector types, and array types.

    Examples:
        >>> import zvec
        >>> print(zvec.DataType.FLOAT)
        DataType.FLOAT
        >>> print(zvec.DataType.VECTOR_FP32)
        DataType.VECTOR_FP32


    Members:

      STRING

      BOOL

      INT32

      INT64

      FLOAT

      DOUBLE

      UINT32

      UINT64

      VECTOR_FP16

      VECTOR_FP32

      VECTOR_FP64

      VECTOR_INT8

      SPARSE_VECTOR_FP32

      SPARSE_VECTOR_FP16

      ARRAY_STRING

      ARRAY_INT32

      ARRAY_INT64

      ARRAY_FLOAT

      ARRAY_DOUBLE

      ARRAY_BOOL

      ARRAY_UINT32

      ARRAY_UINT64
    """

    ARRAY_BOOL: typing.ClassVar[DataType]  # value = <DataType.ARRAY_BOOL: 42>
    ARRAY_DOUBLE: typing.ClassVar[DataType]  # value = <DataType.ARRAY_DOUBLE: 48>
    ARRAY_FLOAT: typing.ClassVar[DataType]  # value = <DataType.ARRAY_FLOAT: 47>
    ARRAY_INT32: typing.ClassVar[DataType]  # value = <DataType.ARRAY_INT32: 43>
    ARRAY_INT64: typing.ClassVar[DataType]  # value = <DataType.ARRAY_INT64: 44>
    ARRAY_STRING: typing.ClassVar[DataType]  # value = <DataType.ARRAY_STRING: 41>
    ARRAY_UINT32: typing.ClassVar[DataType]  # value = <DataType.ARRAY_UINT32: 45>
    ARRAY_UINT64: typing.ClassVar[DataType]  # value = <DataType.ARRAY_UINT64: 46>
    BOOL: typing.ClassVar[DataType]  # value = <DataType.BOOL: 3>
    DOUBLE: typing.ClassVar[DataType]  # value = <DataType.DOUBLE: 9>
    FLOAT: typing.ClassVar[DataType]  # value = <DataType.FLOAT: 8>
    INT32: typing.ClassVar[DataType]  # value = <DataType.INT32: 4>
    INT64: typing.ClassVar[DataType]  # value = <DataType.INT64: 5>
    SPARSE_VECTOR_FP16: typing.ClassVar[
        DataType
    ]  # value = <DataType.SPARSE_VECTOR_FP16: 30>
    SPARSE_VECTOR_FP32: typing.ClassVar[
        DataType
    ]  # value = <DataType.SPARSE_VECTOR_FP32: 31>
    STRING: typing.ClassVar[DataType]  # value = <DataType.STRING: 2>
    UINT32: typing.ClassVar[DataType]  # value = <DataType.UINT32: 6>
    UINT64: typing.ClassVar[DataType]  # value = <DataType.UINT64: 7>
    VECTOR_FP16: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP16: 22>
    VECTOR_FP32: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP32: 23>
    VECTOR_FP64: typing.ClassVar[DataType]  # value = <DataType.VECTOR_FP64: 24>
    VECTOR_INT8: typing.ClassVar[DataType]  # value = <DataType.VECTOR_INT8: 26>
    __members__: typing.ClassVar[
        dict[str, DataType]
    ]  # value = {'STRING': <DataType.STRING: 2>, 'BOOL': <DataType.BOOL: 3>, 'INT32': <DataType.INT32: 4>, 'INT64': <DataType.INT64: 5>, 'FLOAT': <DataType.FLOAT: 8>, 'DOUBLE': <DataType.DOUBLE: 9>, 'UINT32': <DataType.UINT32: 6>, 'UINT64': <DataType.UINT64: 7>, 'VECTOR_FP16': <DataType.VECTOR_FP16: 22>, 'VECTOR_FP32': <DataType.VECTOR_FP32: 23>, 'VECTOR_FP64': <DataType.VECTOR_FP64: 24>, 'VECTOR_INT8': <DataType.VECTOR_INT8: 26>, 'SPARSE_VECTOR_FP32': <DataType.SPARSE_VECTOR_FP32: 31>, 'SPARSE_VECTOR_FP16': <DataType.SPARSE_VECTOR_FP16: 30>, 'ARRAY_STRING': <DataType.ARRAY_STRING: 41>, 'ARRAY_INT32': <DataType.ARRAY_INT32: 43>, 'ARRAY_INT64': <DataType.ARRAY_INT64: 44>, 'ARRAY_FLOAT': <DataType.ARRAY_FLOAT: 47>, 'ARRAY_DOUBLE': <DataType.ARRAY_DOUBLE: 48>, 'ARRAY_BOOL': <DataType.ARRAY_BOOL: 42>, 'ARRAY_UINT32': <DataType.ARRAY_UINT32: 45>, 'ARRAY_UINT64': <DataType.ARRAY_UINT64: 46>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...

class IndexType:
    """

    Enumeration of supported index types in Zvec.

    Examples:
        >>> import zvec
        >>> print(zvec.IndexType.HNSW)
        IndexType.HNSW


    Members:

      UNDEFINED

      HNSW

      IVF

      FLAT

      INVERT
    """

    FLAT: typing.ClassVar[IndexType]  # value = <IndexType.FLAT: 4>
    HNSW: typing.ClassVar[IndexType]  # value = <IndexType.HNSW: 1>
    INVERT: typing.ClassVar[IndexType]  # value = <IndexType.INVERT: 10>
    IVF: typing.ClassVar[IndexType]  # value = <IndexType.IVF: 3>
    UNDEFINED: typing.ClassVar[IndexType]  # value = <IndexType.UNDEFINED: 0>
    __members__: typing.ClassVar[
        dict[str, IndexType]
    ]  # value = {'UNDEFINED': <IndexType.UNDEFINED: 0>, 'HNSW': <IndexType.HNSW: 1>, 'IVF': <IndexType.IVF: 3>, 'FLAT': <IndexType.FLAT: 4>, 'INVERT': <IndexType.INVERT: 10>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...

class MetricType:
    """

    Enumeration of supported distance/similarity metrics.

    - COSINE: Cosine similarity.
    - IP: Inner product (dot product).
    - L2: Euclidean distance (L2 norm).

    Examples:
        >>> import zvec
        >>> print(zvec.MetricType.COSINE)
        MetricType.COSINE


    Members:

      COSINE

      IP

      L2
    """

    COSINE: typing.ClassVar[MetricType]  # value = <MetricType.COSINE: 3>
    IP: typing.ClassVar[MetricType]  # value = <MetricType.IP: 2>
    L2: typing.ClassVar[MetricType]  # value = <MetricType.L2: 1>
    __members__: typing.ClassVar[
        dict[str, MetricType]
    ]  # value = {'COSINE': <MetricType.COSINE: 3>, 'IP': <MetricType.IP: 2>, 'L2': <MetricType.L2: 1>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...

class QuantizeType:
    """

    Enumeration of supported quantization types for vector compression.

    Examples:
        >>> import zvec
        >>> print(zvec.QuantizeType.INT8)
        QuantizeType.INT8


    Members:

      UNDEFINED

      FP16

      INT8

      INT4
    """

    FP16: typing.ClassVar[QuantizeType]  # value = <QuantizeType.FP16: 1>
    INT4: typing.ClassVar[QuantizeType]  # value = <QuantizeType.INT4: 3>
    INT8: typing.ClassVar[QuantizeType]  # value = <QuantizeType.INT8: 2>
    UNDEFINED: typing.ClassVar[QuantizeType]  # value = <QuantizeType.UNDEFINED: 0>
    __members__: typing.ClassVar[
        dict[str, QuantizeType]
    ]  # value = {'UNDEFINED': <QuantizeType.UNDEFINED: 0>, 'FP16': <QuantizeType.FP16: 1>, 'INT8': <QuantizeType.INT8: 2>, 'INT4': <QuantizeType.INT4: 3>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...

class Status:
    """

    Represents the outcome of a Zvec operation.

    A `Status` object is either OK (success) or carries an error code and message.

    Examples:
        >>> from zvec.typing import Status, StatusCode
        >>> s = Status()
        >>> print(s.ok())
        True
        >>> s = Status(StatusCode.INVALID_ARGUMENT, "Field not found")
        >>> print(s.code() == StatusCode.INVALID_ARGUMENT)
        True
        >>> print(s.message())
        Field not found
    """

    __hash__: typing.ClassVar[None] = None

    @staticmethod
    def AlreadyExists(message: str) -> Status: ...
    @staticmethod
    def InternalError(message: str) -> Status: ...
    @staticmethod
    def InvalidArgument(message: str) -> Status: ...
    @staticmethod
    def NotFound(message: str) -> Status: ...
    @staticmethod
    def OK() -> Status:
        """
        Create an OK status.
        """

    @staticmethod
    def PermissionDenied(message: str) -> Status: ...
    def __eq__(self, arg0: Status) -> bool: ...
    @typing.overload
    def __init__(self) -> None: ...
    @typing.overload
    def __init__(self, code: StatusCode, message: str = "") -> None:
        """
        Construct a status with the given code and optional message.

        Args:
            code (StatusCode): The status code.
            message (str, optional): Error message. Defaults to empty string.
        """

    def __ne__(self, arg0: Status) -> bool: ...
    def __repr__(self) -> str: ...
    def code(self) -> StatusCode:
        """
        StatusCode: Returns the status code.
        """

    def message(self) -> str:
        """
        str: Returns the error message (may be empty).
        """

    def ok(self) -> bool:
        """
        bool: Returns True if the status is OK.
        """

class StatusCode:
    """

    Enumeration of possible status codes for Zvec operations.

    Used by the `Status` class to indicate success or failure reason.


    Members:

      OK

      NOT_FOUND

      ALREADY_EXISTS

      INVALID_ARGUMENT

      PERMISSION_DENIED

      FAILED_PRECONDITION

      RESOURCE_EXHAUSTED

      UNAVAILABLE

      INTERNAL_ERROR

      NOT_SUPPORTED

      UNKNOWN
    """

    ALREADY_EXISTS: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.ALREADY_EXISTS: 2>
    FAILED_PRECONDITION: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.FAILED_PRECONDITION: 5>
    INTERNAL_ERROR: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.INTERNAL_ERROR: 8>
    INVALID_ARGUMENT: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.INVALID_ARGUMENT: 3>
    NOT_FOUND: typing.ClassVar[StatusCode]  # value = <StatusCode.NOT_FOUND: 1>
    NOT_SUPPORTED: typing.ClassVar[StatusCode]  # value = <StatusCode.NOT_SUPPORTED: 9>
    OK: typing.ClassVar[StatusCode]  # value = <StatusCode.OK: 0>
    PERMISSION_DENIED: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.PERMISSION_DENIED: 4>
    RESOURCE_EXHAUSTED: typing.ClassVar[
        StatusCode
    ]  # value = <StatusCode.RESOURCE_EXHAUSTED: 6>
    UNAVAILABLE: typing.ClassVar[StatusCode]  # value = <StatusCode.UNAVAILABLE: 7>
    UNKNOWN: typing.ClassVar[StatusCode]  # value = <StatusCode.UNKNOWN: 10>
    __members__: typing.ClassVar[
        dict[str, StatusCode]
    ]  # value = {'OK': <StatusCode.OK: 0>, 'NOT_FOUND': <StatusCode.NOT_FOUND: 1>, 'ALREADY_EXISTS': <StatusCode.ALREADY_EXISTS: 2>, 'INVALID_ARGUMENT': <StatusCode.INVALID_ARGUMENT: 3>, 'PERMISSION_DENIED': <StatusCode.PERMISSION_DENIED: 4>, 'FAILED_PRECONDITION': <StatusCode.FAILED_PRECONDITION: 5>, 'RESOURCE_EXHAUSTED': <StatusCode.RESOURCE_EXHAUSTED: 6>, 'UNAVAILABLE': <StatusCode.UNAVAILABLE: 7>, 'INTERNAL_ERROR': <StatusCode.INTERNAL_ERROR: 8>, 'NOT_SUPPORTED': <StatusCode.NOT_SUPPORTED: 9>, 'UNKNOWN': <StatusCode.UNKNOWN: 10>}

    def __eq__(self, other: typing.Any) -> bool: ...
    def __getstate__(self) -> int: ...
    def __hash__(self) -> int: ...
    def __index__(self) -> int: ...
    def __init__(self, value: typing.SupportsInt) -> None: ...
    def __int__(self) -> int: ...
    def __ne__(self, other: typing.Any) -> bool: ...
    def __repr__(self) -> str: ...
    def __setstate__(self, state: typing.SupportsInt) -> None: ...
    def __str__(self) -> str: ...
    @property
    def name(self) -> str: ...
    @property
    def value(self) -> int: ...


================================================
FILE: python/zvec/typing/enum.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from enum import IntEnum

__all__ = ["LogLevel", "LogType"]


class LogLevel(IntEnum):
    """Enumeration of logging severity levels, ordered from lowest to highest priority.

    Used to control verbosity and filtering of log messages. Higher numeric values
    indicate more severe conditions.

    Note:
        ``WARNING`` is an alias for ``WARN`` to match Python's built-in :mod:`logging`
        module convention.

    Attributes:
        DEBUG (int): Detailed information, typically of interest only when diagnosing problems.
        INFO (int): Confirmation that things are working as expected.
        WARN (int): An indication that something unexpected happened, or indicative of
            potential future problems. (Alias: ``WARNING``)
        WARNING (int): Same as ``WARN``.
        ERROR (int): Due to a more serious problem, the software has not been able
            to perform some function.
        FATAL (int): A serious error, indicating that the program itself may be unable
            to continue running.
    """

    DEBUG = 0
    INFO = 1
    WARN = 2
    WARNING = 2
    ERROR = 3
    FATAL = 4


class LogType(IntEnum):
    """Enumeration of supported log output destinations.

    Specifies where log messages should be written.

    Attributes:
        CONSOLE (int): Output logs to standard output/error (e.g., terminal or IDE console).
        FILE (int): Write logs to a persistent file on disk.
    """

    CONSOLE = 0
    FILE = 1


================================================
FILE: python/zvec/zvec.py
================================================
# Copyright 2025-present the zvec project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

from typing import Optional

from _zvec import Initialize, _Collection

from .model import Collection
from .model.param import CollectionOption
from .model.schema import CollectionSchema

__all__ = ["create_and_open", "init", "open"]

from .typing.enum import LogLevel, LogType


def init(
    *,
    log_type: Optional[LogType] = LogType.CONSOLE,
    log_level: Optional[LogLevel] = LogLevel.WARN,
    log_dir: Optional[str] = "./logs",
    log_basename: Optional[str] = "zvec.log",
    log_file_size: Optional[int] = 2048,
    log_overdue_days: Optional[int] = 7,
    query_threads: Optional[int] = None,
    optimize_threads: Optional[int] = None,
    invert_to_forward_scan_ratio: Optional[float] = None,
    brute_force_by_keys_ratio: Optional[float] = None,
    memory_limit_mb: Optional[int] = None,
) -> None:
    """Initialize Zvec with configuration options.

    This function must be called before any other operation.
    It can only be called once — subsequent calls raise a ``RuntimeError``.

    Parameters set to ``None`` are **omitted** from the configuration and
    fall back to Zvec's internal defaults, which may be derived from the runtime
    environment (e.g., cgroup CPU/memory limits). Explicitly provided values
    always override defaults.

    Args:
        log_type (Optional[LogType], optional): Logger destination.
            - ``LogType.CONSOLE`` (default if omitted or set to this)
            - ``LogType.FILE``
            - If ``None``, uses internal default (currently ``CONSOLE``).
        log_level (Optional[LogLevel], optional): Minimum log severity.
            Default: ``LogLevel.WARN``.
            Accepted values: ``DEBUG``, ``INFO``, ``WARN``, ``ERROR``, ``FATAL``.
            If ``None``, uses internal default (``WARN``).
        log_dir (Optional[str], optional):
            Directory for log files (only used when ``log_type=FILE``).
            Parent directories are **not** created automatically.
            Default: ``"./logs"``.
            If ``None``, internal default is used.
        log_basename (Optional[str], optional):
            Base name for rotated log files (e.g., ``zvec.log.1``, ``zvec.log.2``).
            Default: ``"zvec.log"``.
        log_file_size (Optional[int], optional):
            Max size per log file in **MB** before rotation.
            Default: ``2048`` MB (2 GB).
        log_overdue_days (Optional[int], optional):
            Days to retain rotated log files before deletion.
            Default: ``7`` days.
        query_threads (Optional[int], optional):
            Number of threads for query execution.
            If ``None`` (default), inferred from available CPU cores (via cgroup).
            Must be ≥ 1 if provided.
        optimize_threads (Optional[int], optional):
            Threads for background tasks (e.g., compaction, indexing).
            If ``None``, defaults to same as ``query_threads`` or CPU count.
        invert_to_forward_scan_ratio (Optional[float], optional):
            Threshold to switch from inverted index to full forward scan.
            Range: [0.0, 1.0]. Higher → more aggressive index skipping.
            Default: ``0.9`` (if omitted).
        brute_force_by_keys_ratio (Optional[float], optional):
            Threshold to use brute-force key lookup over index.
            Lower → prefer index; higher → prefer brute-force.
            Range: [0.0, 1.0]. Default: ``0.1``.
        memory_limit_mb (Optional[int], optional):
            Soft memory cap in MB. Zvec may throttle or fail operations
            approaching this limit.
            If ``None``, inferred from cgroup memory limit * 0.8 (e.g., in Docker).
            Must be > 0 if provided.

    Raises:
        RuntimeError: If Zvec is already initialized.
        ValueError: On invalid values (e.g., negative thread count, log level out of range).
        TypeError: If a value has incorrect type (e.g., string for ``query_threads``).

    Note:
        - All ``None`` arguments are **excluded** from the configuration payload,
          allowing the core library to apply environment-aware defaults.
        - This design ensures container-friendliness: in Kubernetes/Docker,
          omitting ``memory_limit_mb`` and thread counts lets Zvec auto-adapt.

    Examples:
        Initialize with defaults (log to console, auto-detect resources):
        >>> import zvec
        >>> zvec.init()

        Customize logging to file with rotation:
        >>> zvec.init(
        ...     log_type=LogType.FILE,
        ...     log_dir="/var/log/zvec",
        ...     log_file_size=1024,
        ...     log_overdue_days=30
        ... )

        Limit resources explicitly:
        >>> zvec.init(
        ...     memory_limit_mb=2048,
        ...     query_threads=4,
        ...     optimize_threads=2
        ... )

        Fine-tune query heuristics:
        >>> zvec.init(
        ...     invert_to_forward_scan_ratio=0.95,
        ...     brute_force_by_keys_ratio=0.05
        ... )
    """
    # Build config dict, skipping None values
    config_dict = {}
    if log_type is not None:
        if not isinstance(log_type, LogType):
            raise TypeError("log_type must be LogType")
        config_dict["log_type"] = log_type.name
    if log_level is not None:
        if not isinstance(log_level, LogLevel):
            raise TypeError("log_level must be LogLevel")
        config_dict["log_level"] = log_level.name
    if log_dir is not None:
        config_dict["log_dir"] = log_dir
    if log_basename is not None:
        config_dict["log_basename"] = log_basename
    if log_file_size is not None:
        config_dict["log_file_size"] = log_file_size
    if log_overdue_days is not None:
        config_dict["log_overdue_days"] = log_overdue_days
    if query_threads is not None:
        config_dict["query_threads"] = query_threads
    if optimize_threads is not None:
        config_dict["optimize_threads"] = optimize_threads
    if invert_to_forward_scan_ratio is not None:
        config_dict["invert_to_forward_scan_ratio"] = invert_to_forward_scan_ratio
    if brute_force_by_keys_ratio is not None:
        config_dict["brute_force_by_keys_ratio"] = brute_force_by_keys_ratio
    if memory_limit_mb is not None:
        config_dict["memory_limit_mb"] = memory_limit_mb

    Initialize(config_dict)


def create_and_open(
    path: str,
    schema: CollectionSchema,
    option: Optional[CollectionOption] = None,
) -> Collection:
    """Create a new collection and open it for use.

    If a collection already exists at the given path, it may raise an error
    depending on the underlying implementation.

    Args:
        path (str): Path or name of the collection to create.
        schema (CollectionSchema): Schema defining the structure of the collection.
        option (CollectionOption): Configuration options
            for opening the collection. Defaults to a default-constructed
            ``CollectionOption()`` if not provided.

    Returns:
        Collection: An opened collection instance ready for operations.

    Examples:
        >>> import zvec
        >>> schema = zvec.CollectionSchema(
        ...     name="my_collection",
        ...     fields=[zvec.FieldSchema("id", zvec.DataType.INT64, nullable=True)]
        ... )
        >>> coll = create_and_open("./my_collection", schema)
    """
    if not isinstance(path, str):
        raise TypeError("path must be a string")
    if not isinstance(schema, CollectionSchema):
        raise TypeError("schema must be a CollectionSchema")

    option = option or CollectionOption()
    if not isinstance(option, CollectionOption):
        raise TypeError("option must be a CollectionOption")

    _collection = _Collection.CreateAndOpen(path, schema._get_object(), option)
    return Collection._from_core(_collection)


def open(path: str, option: CollectionOption = CollectionOption()) -> Collection:
    """Open an existing collection from disk.

    The collection must have been previously created with ``create_and_open``.

    Args:
        path (str): Path or name of the existing collection.
        option (CollectionOption): Configuration options
            for opening the collection. Defaults to a default-constructed
            ``CollectionOption()`` if not provided.

    Returns:
        Collection: An opened collection instance.

    Examples:
        >>> import zvec
        >>> coll = zvec.open("./my_collection")
    """
    _collection = _Collection.Open(path, option)
    return Collection._from_core(_collection)


================================================
FILE: scripts/README.md
================================================


================================================
FILE: scripts/build_android.sh
================================================
#!/bin/bash
set -e
CURRENT_DIR=$(pwd)

ABI=${1:-"arm64-v8a"}
API_LEVEL=${2:-21}
BUILD_TYPE=${3:-"Release"}

# step1: use host env to compile protoc
echo "step1: building protoc for host..."
HOST_BUILD_DIR="build_host"
mkdir -p $HOST_BUILD_DIR
cd $HOST_BUILD_DIR

cmake -DCMAKE_BUILD_TYPE="$BUILD_TYPE" ..
make -j protoc
PROTOC_EXECUTABLE=$CURRENT_DIR/$HOST_BUILD_DIR/bin/protoc
cd $CURRENT_DIR

echo "step1: Done!!!"

# step2: cross build zvec based on android ndk
echo "step2: building zvec for android..."

# reset thirdparty directory
git submodule foreach --recursive 'git stash --include-untracked'

export ANDROID_SDK_ROOT=$HOME/Library/Android/sdk
export ANDROID_HOME=$ANDROID_SDK_ROOT
export ANDROID_NDK_HOME=$ANDROID_SDK_ROOT/ndk/28.2.13676358
export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake

export PATH=$PATH:$ANDROID_SDK_ROOT/cmdline-tools/latest/bin
export PATH=$PATH:$ANDROID_SDK_ROOT/platform-tools
export PATH=$PATH:$ANDROID_NDK_HOME

if [ -z "$ANDROID_NDK_HOME" ]; then
    echo "error: ANDROID_NDK_HOME env not set"
    echo "please install NDK and set env variable ANDROID_NDK_HOME"
    exit 1
fi

BUILD_DIR="build_android_${ABI}"
mkdir -p $BUILD_DIR
cd $BUILD_DIR

echo "configure CMake..."
cmake \
    -DANDROID_NDK="$ANDROID_NDK_HOME" \
    -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake" \
    -DANDROID_ABI="$ABI" \
    -DANDROID_NATIVE_API_LEVEL="$API_LEVEL" \
    -DANDROID_STL="c++_static" \
    -DCMAKE_BUILD_TYPE="$BUILD_TYPE" \
    -DBUILD_PYTHON_BINDINGS=OFF \
    -DBUILD_TOOLS=OFF \
    -DCMAKE_INSTALL_PREFIX="./install" \
    -DGLOBAL_CC_PROTOBUF_PROTOC=$PROTOC_EXECUTABLE \
    ../

echo "building..."
CORE_COUNT=$(sysctl -n hw.ncpu)
make -j$CORE_COUNT

echo "step2: Done!!!"

================================================
FILE: scripts/gcov.sh
================================================
#!/bin/bash

project_name=proxima-zvec
gcov_tool=gcov
zip_html=false
output_name=html
keep_info=false

script_dir=$(cd "$(dirname "$0")"; pwd)
source_base=$(dirname "$script_dir")
filter_list="'*/tests/*' '*/thirdparty/*' '*/deps/*' '*/proto/*' '*/external/*' '*/sqlengine/antlr/gen/*'"

while getopts t:p:o:zk option; do
  case "$option" in
  t)
    gcov_tool=$OPTARG;;
  p)
    project_name=$OPTARG;;
  o)
    output_name=$OPTARG;;
  z)
    zip_html=true;;
  k)
    keep_info=true;;
  esac
done

# Process sources
lcov -c -b "$source_base" -d . -o $project_name.lcov.info --gcov-tool=$gcov_tool --no-external || exit 1
eval $(echo lcov -r $project_name.lcov.info -o $project_name-filtered.lcov.info $filter_list) || exit 1

# Gather HTML files
genhtml -t "$project_name" -o $output_name $project_name-filtered.lcov.info || exit 1
if [ "$keep_info" = false ]; then
  rm -rf *.lcov.info
fi

# Zip HTML files
if $zip_html ; then
  zip -r $output_name.zip $output_name/
fi


================================================
FILE: src/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

# Retrieve version from git repository
git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})

# Add repository
cc_directory(ailego)
cc_directory(turbo)
cc_directory(core)
cc_directory(db)
if(BUILD_PYTHON_BINDINGS)
    cc_directory(binding)
endif()


================================================
FILE: src/ailego/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

find_package(Threads REQUIRED)

if(UNIX AND NOT APPLE AND NOT ANDROID)
    find_library(LIB_RT NAMES rt)
else()
    set(LIB_RT "")
endif()

git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})
file(GLOB_RECURSE ALL_SRCS *.cc *.c *.h)

set(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})

if(UNIX AND NOT APPLE)
    list(APPEND EXTRA_LIBS ${LIB_RT})
endif()

if(NOT ANDROID AND AUTO_DETECT_ARCH)
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
        setup_compiler_march_for_x86(MATH_MARCH_FLAG_SSE MATH_MARCH_FLAG_AVX2 MATH_MARCH_FLAG_AVX512 MATH_MARCH_FLAG_AVX512FP16)
        message(STATUS "best compiler march, sse: " ${MATH_MARCH_FLAG_SSE} ", avx2: " ${MATH_MARCH_FLAG_AVX2} ", avx512: " ${MATH_MARCH_FLAG_AVX512} ", avx512fp16: " ${MATH_MARCH_FLAG_AVX512FP16})

        file(GLOB_RECURSE MATH_FILES_SSE
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_sse.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_sse.c
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_sse.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_sse.c
            )

        file(GLOB_RECURSE MATH_FILES_AVX2
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx2.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx2.c
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx2.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx2.c
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx.c
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx.c
        )

        file(GLOB_RECURSE MATH_FILES_AVX512
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512.c
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.cc
            ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512.c
        )

        file(GLOB_RECURSE MATH_FILES_AVX512FP16
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_avx512fp16.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_avx512fp16.c
        )

        foreach(MATH_FILE ${MATH_FILES_SSE})
            set_source_files_properties(
                ${MATH_FILE}
                PROPERTIES
                COMPILE_FLAGS "${MATH_MARCH_FLAG_SSE}"
            )
        endforeach()

        foreach(MATH_FILE ${MATH_FILES_AVX2})
            set_source_files_properties(
                ${MATH_FILE}
                PROPERTIES
                COMPILE_FLAGS "${MATH_MARCH_FLAG_AVX2}"
            )
        endforeach()

        foreach(MATH_FILE ${MATH_FILES_AVX512})
            set_source_files_properties(
                ${MATH_FILE}
                PROPERTIES
                COMPILE_FLAGS "${MATH_MARCH_FLAG_AVX512}"
            )
        endforeach()

        foreach(MATH_FILE ${MATH_FILES_AVX512FP16})
        set_source_files_properties(
            ${MATH_FILE}
            PROPERTIES
            COMPILE_FLAGS "${MATH_MARCH_FLAG_AVX512FP16}"
        )
    endforeach()
    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
      # set(CMAKE_CXX_FLAGS "-march=armv8-a")
      # set(CMAKE_C_FLAGS "-march=armv8-a")
      set(MATH_MARCH_FLAG_NEON "-march=armv8-a")

      file(GLOB_RECURSE MATH_FILES_NEON
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_dispatch.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_dispatch.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math/*_neon.c
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.cc
          ${CMAKE_CURRENT_SOURCE_DIR}/math_batch/*_neon.c
      )

      foreach(MATH_FILE ${MATH_FILES_NEON})
          set_source_files_properties(
              ${MATH_FILE}
              PROPERTIES
              COMPILE_FLAGS "${MATH_MARCH_FLAG_NEON}"
          )
      endforeach()
    endif()
endif()

cc_library(
    NAME zvec_ailego STATIC STRICT PACKED
    SRCS    ${ALL_SRCS}
    LIBS    ${EXTRA_LIBS}
            Arrow::arrow_static
            Arrow::parquet_static
    VERSION "${GIT_SRCS_VER}"
)


================================================
FILE: src/ailego/algorithm/binary_quantizer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "binary_quantizer.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include <iostream>
#include <numeric>
#include <ailego/math/normalizer.h>

namespace zvec {
namespace ailego {

//! Feed the training data
bool BinaryQuantizer::feed(const float *vec, size_t dim) {
  for (size_t i = 0; i < dim; ++i) {
    data_.emplace_back(vec[i]);
  }
  return true;
}

//! Train the quantizer
bool BinaryQuantizer::train(void) {
  return true;
}

//! Quantize data: encode the float input to uint32_t output
void BinaryQuantizer::encode(const float *in, size_t dim, uint32_t *out) const {
  for (size_t i = 0; i < dim; i += 32) {
    size_t remain = i + 32 <= dim ? 32 : dim - i;
    uint32_t data = 0;
    uint32_t mask = 1;

    for (size_t j = 0; j < remain; j++) {
      if (in[i + j] >= threshold_) {
        data |= mask;
      }

      mask <<= 1;
    }

    *out = data;
    out++;
  }
}

//! De-quantize data: decode the input uint32_t to float output
//!   bit value 1 will be mapped to 1.0
//!   bit value 0 will be mapped to -1.0
void BinaryQuantizer::decode(const uint32_t *in, size_t dim, float *out) const {
  for (size_t i = 0; i < dim; ++i) {
    uint8_t bit = (in[i >> 5] >> (i & 31)) & 0x01;

    if (bit == 1) {
      out[i] = 1.0f;
    } else {
      out[i] = -1.0f;
    }

    // std::cout << "dim: " << i << ", value: " << (size_t)bit << std::endl;
  }
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/algorithm/binary_quantizer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Binary Quantization Algorithm
 */
class BinaryQuantizer {
 public:
  //! Constructor
  BinaryQuantizer(void) {}

  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Quantize data: encode the float input to uint32_t output
  void encode(const float *in, size_t dim, uint32_t *out) const;

  //! De-quantize data: decode the input uint32_t to float output
  void decode(const uint32_t *in, size_t dim, float *out) const;

  //! Get encoded elements in type of uint32_t
  static size_t EncodedSizeInBinary32(size_t dim) {
    return (dim + 31) / 32;
  }

  //! Set quantization threshold
  void set_threshold(float threshold) {
    threshold_ = threshold;
  }

  //! Get quantization threshold
  float threshold(void) const {
    return threshold_;
  }

 private:
  //! Disable them
  BinaryQuantizer(const BinaryQuantizer &) = delete;
  BinaryQuantizer &operator=(const BinaryQuantizer &) = delete;

 private:
  //! Members
  std::vector<float> data_{};
  float threshold_{0.0f};
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/algorithm/integer_quantizer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "integer_quantizer.h"
#include <algorithm>
#include <cmath>
#include <cstring>
#include <numeric>
#include <ailego/math/normalizer.h>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

//! Make smooth the distribution to eliminate zero in hist
static inline void MakeSmooth(std::vector<float> &dist) {
  constexpr float epsilon = std::numeric_limits<float>::epsilon();

  // L1 Normalize first
  float norm = 1.0f;
  Normalizer<float>::L1(dist.data(), dist.size(), &norm);

  size_t zero_count = std::count_if(dist.begin(), dist.end(), [](float val) {
    return (std::abs(val) < std::numeric_limits<float>::epsilon());
  });
  size_t nonzero_count = dist.size() - zero_count;

  // Double check
  if (nonzero_count == 0 || zero_count == 0) {
    return;
  }

  float y = epsilon * zero_count / static_cast<float>(nonzero_count);
  for (auto &it : dist) {
    if (std::abs(it) < epsilon) {
      it += epsilon;
    } else {
      it -= y;
    }
  }  // end of for
}

//! Compute the Entropy of distribution p/q by  Kullback-Leibler Divergence
static inline double ComputeKlDivergence(const std::vector<float> &p,
                                         const std::vector<float> &q) {
  if (p.size() != q.size() || p.size() == 0) {
    return std::numeric_limits<float>::max();
  }

  double v = 0.0f;
  for (size_t i = 0; i != p.size(); ++i) {
    if (p[i] == 0 || q[i] == 0) {
      return std::numeric_limits<double>::max();
    }
    v += p[i] * std::log(static_cast<double>(p[i]) / static_cast<double>(q[i]));
  }
  return v;
}

//! Expand the quantization distribution to origin distribution in
//! [-threshold, threshold]
static inline void ExpandCandidateDistribution(
    const std::vector<uint32_t> &distribution,
    const std::vector<float> &quantized_distribution, size_t threshold,
    std::vector<float> *expand_distribution) {
  expand_distribution->resize(threshold * 2, 0);
  float merged_cnt = static_cast<float>(expand_distribution->size()) /
                     quantized_distribution.size();
  size_t left_boundary = distribution.size() / 2 - threshold;

  for (size_t i = 0; i < quantized_distribution.size(); ++i) {
    float start = i * merged_cnt;
    float end = start + merged_cnt;
    const size_t start_ceil = static_cast<size_t>(std::ceil(start));
    const size_t end_floor = static_cast<size_t>(std::floor(end));
    float left_ratio = static_cast<float>(start_ceil) - start;
    float right_ratio = end - static_cast<float>(end_floor);
    float nonzero_count = 0;

    //! Count the non-zeros bins, if the histogram bin is partially included,
    //! non-zero bins is also partially counted
    if (left_ratio > 0 && left_boundary + start_ceil > 0) {
      if (distribution[left_boundary + start_ceil - 1] != 0) {
        nonzero_count += left_ratio;
      }
    }
    if (right_ratio > 0 && left_boundary + end_floor < distribution.size()) {
      if (distribution[left_boundary + end_floor] != 0) {
        nonzero_count += right_ratio;
      }
    }
    for (size_t j = start_ceil; j < end_floor; j++) {
      nonzero_count += distribution[left_boundary + j] != 0;
    }
    if (nonzero_count == 0) {
      continue;
    }

    //! expand the quantized value
    float value = quantized_distribution[i] / nonzero_count;
    if (left_ratio > 0 && start_ceil > 0) {
      (*expand_distribution)[start_ceil - 1] += value * left_ratio;
    }
    if (right_ratio > 0 && end_floor < expand_distribution->size()) {
      (*expand_distribution)[end_floor] += value * right_ratio;
    }
    for (size_t j = start_ceil; j < end_floor; j++) {
      if (distribution[left_boundary + j] != 0) {
        (*expand_distribution)[j] = value;
      }
    }  // end of for
  }  // end of for
}

/*! Compute quantization threshold bins
 *  Implement Int8 Quantization Algorithm ref:
 *  http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
 */
static inline size_t ComputeThreshold(const std::vector<uint32_t> &hist,
                                      const size_t target_bins) {
  std::vector<float> P_distribution(hist.size());
  size_t zero_point_index = hist.size() / 2;

  size_t start_bin = target_bins / 2;
  size_t end_bin = hist.size() / 2;
  size_t negative_outliers_count = 0;
  size_t positive_outliers_count = 0;
  double min_divergence = std::numeric_limits<double>::max();
  size_t target_threshold = end_bin;

  for (size_t threshold = start_bin; threshold <= end_bin; ++threshold) {
    negative_outliers_count += hist[zero_point_index - threshold];
    positive_outliers_count += hist[zero_point_index + threshold - 1];
  }

  //! for each zero-axised quantization range: [-threshold, threshold], search
  //! the best solution
  for (size_t threshold = start_bin; threshold <= end_bin; ++threshold) {
    P_distribution.resize(threshold * 2);
    auto p_hist = &hist[zero_point_index - threshold];
    for (size_t i = 0; i != P_distribution.size(); ++i) {
      P_distribution[i] = static_cast<float>(p_hist[i]);
    }

    negative_outliers_count -= hist[zero_point_index - threshold];
    positive_outliers_count -= hist[zero_point_index + threshold - 1];
    P_distribution[0] += negative_outliers_count;
    P_distribution[P_distribution.size() - 1] += positive_outliers_count;

    //! Quantize the bins in range [-threshold, threshold] to target_bins
    std::vector<float> Q_distribution(target_bins, 0);
    float merged_cnt = static_cast<float>(threshold * 2) / target_bins;
    size_t left_boundary = zero_point_index - threshold;
    for (size_t i = 0; i < target_bins; ++i) {
      float start = i * merged_cnt;
      float end = start + merged_cnt;
      const size_t start_ceil = static_cast<size_t>(std::ceil(start));
      const size_t end_floor = static_cast<size_t>(std::floor(end));
      if (left_boundary + start_ceil > 0) {
        Q_distribution[i] +=
            ((float)start_ceil - start) * hist[left_boundary + start_ceil - 1];
      }
      if (left_boundary + end_floor < hist.size()) {
        Q_distribution[i] +=
            (end - (float)end_floor) * hist[left_boundary + end_floor];
      }

      for (size_t j = start_ceil; j < end_floor; j++) {
        Q_distribution[i] += hist[left_boundary + j];
      }
    }
    std::vector<float> Q_expand_distribution;
    ExpandCandidateDistribution(hist, Q_distribution, threshold,
                                &Q_expand_distribution);

    //! Compute Kullback-Leibler Divergence, normalize the smooth the data
    //! first. Ref: http://hanj.cs.illinois.edu/cs412/bk3/KL-divergence.pdf
    MakeSmooth(P_distribution);
    MakeSmooth(Q_expand_distribution);
    double divergence =
        ComputeKlDivergence(P_distribution, Q_expand_distribution);

    if (divergence < min_divergence) {
      min_divergence = divergence;
      target_threshold = threshold;
    }
  }
  return target_threshold;
}

// Quantize the value in range
template <int RANGE_MIN, int RANGE_MAX>
static inline float QuantizeValue(float val, float scale, float bias) {
  val = (val + bias) * scale;

  if (val > RANGE_MAX) {
    val = RANGE_MAX;
  } else if (val < RANGE_MIN) {
    val = RANGE_MIN;
  }
  return val;
}

// Init the historgram params
#define INIT_HISTOGRAM()                                                      \
  {                                                                           \
    if (histogram_bins_ == 0) {                                               \
      size_t range = non_bias_                                                \
                         ? std::max(std::abs(MIN_VALUE), std::abs(MAX_VALUE)) \
                         : (MAX_VALUE - MIN_VALUE);                           \
      histogram_bins_ = std::max<size_t>(4096u, range * 8);                   \
    }                                                                         \
    histogram_.resize((histogram_bins_ + 1) >> 1 << 1);                       \
    if (non_bias_) {                                                          \
      bias_ = 0.0f;                                                           \
      auto val = std::max(std::abs(max_), std::abs(min_));                    \
      left_boundary_ = -val;                                                  \
      hist_interval_ = (val * 2) / static_cast<float>(histogram_.size());     \
    } else {                                                                  \
      bias_ = -static_cast<float>(min_ + (max_ - min_) * 0.5);                \
      left_boundary_ = min_;                                                  \
      hist_interval_ = (max_ - min_) / static_cast<float>(histogram_.size()); \
    }                                                                         \
  }

// Feed vector and update the historgram
#define UPDATE_HISTOGRAM(vec, dim)                                            \
  {                                                                           \
    if (max_ < min_) {                                                        \
      return false;                                                           \
    }                                                                         \
    if (histogram_.size() == 0) {                                             \
      INIT_HISTOGRAM()                                                        \
    }                                                                         \
    for (size_t i = 0; i < dim; ++i) {                                        \
      ssize_t index = 0;                                                      \
      if (hist_interval_ > 0.0) {                                             \
        index =                                                               \
            static_cast<ssize_t>((vec[i] - left_boundary_) / hist_interval_); \
      }                                                                       \
      if (index < 0) {                                                        \
        index = 0;                                                            \
      } else if ((size_t)index >= histogram_.size()) {                        \
        index = histogram_.size() - 1;                                        \
      }                                                                       \
      ailego_assert_with((size_t)index < histogram_.size(), "Invalid index"); \
      histogram_[index] += 1;                                                 \
    }                                                                         \
    return true;                                                              \
  }

// Train the quantizer
#define TRAIN_QUANTIZER()                                                \
  {                                                                      \
    auto sum = std::accumulate(histogram_.begin(), histogram_.end(), 0); \
    if (sum == 0) {                                                      \
      return false;                                                      \
    }                                                                    \
    size_t target_bins =                                                 \
        ailego_align(static_cast<size_t>(MAX_VALUE - MIN_VALUE), 2);     \
    auto threshold_bins = ComputeThreshold(histogram_, target_bins);     \
    auto threshold =                                                     \
        (static_cast<float>(threshold_bins) + 0.5f) * hist_interval_;    \
    scale_ = target_bins / 2 / threshold;                                \
    if (!non_bias_) {                                                    \
      bias_ += (MAX_VALUE + MIN_VALUE) * 0.5f / scale_;                  \
    }                                                                    \
    scale_reciprocal_ = 1 / scale_;                                      \
    return true;                                                         \
  }

// Feed the INT16 quantizer
bool EntropyInt16Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the INT16 quantizer
bool EntropyInt16Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to INT16
void EntropyInt16Quantizer::encode(const float *in, size_t dim,
                                   int16_t *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = static_cast<int16_t>(
        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));
  }
}

// Decode from INT16
void EntropyInt16Quantizer::decode(const int16_t *in, size_t dim,
                                   float *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = in[i] * this->scale_reciprocal() - this->bias();
  }
}

// Feed the UINT16 quantizer
bool EntropyUInt16Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the UINT16 quantizer
bool EntropyUInt16Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to UINT16
void EntropyUInt16Quantizer::encode(const float *in, size_t dim,
                                    uint16_t *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = static_cast_from_float_to_uint16(
        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));
  }
}

// Decode from INT16
void EntropyUInt16Quantizer::decode(const uint16_t *in, size_t dim,
                                    float *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = in[i] * this->scale_reciprocal() - this->bias();
  }
}

// Feed the INT8 quantizer
bool EntropyInt8Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the INT8 quantizer
bool EntropyInt8Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to INT8
void EntropyInt8Quantizer::encode(const float *in, size_t dim,
                                  int8_t *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = static_cast<int8_t>(
        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));
  }
}

// Decode from INT8
void EntropyInt8Quantizer::decode(const int8_t *in, size_t dim,
                                  float *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = in[i] * this->scale_reciprocal() - this->bias();
  }
}

// Feed the UINT8 quantizer
bool EntropyUInt8Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the UINT8 quantizer
bool EntropyUInt8Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to INT8
void EntropyUInt8Quantizer::encode(const float *in, size_t dim,
                                   uint8_t *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = static_cast_from_float_to_uint8(
        std::round(QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_)));
  }
}

// Decode from UINT8
void EntropyUInt8Quantizer::decode(const uint8_t *in, size_t dim,
                                   float *out) const {
  for (size_t i = 0; i < dim; ++i) {
    out[i] = in[i] * this->scale_reciprocal() - this->bias();
  }
}

// Feed the INT4 quantizer
bool EntropyInt4Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the INT4 quantizer
bool EntropyInt4Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to INT4
void EntropyInt4Quantizer::encode(const float *in, size_t dim,
                                  uint8_t *out) const {
  ailego_assert_with(dim % 2 == 0, "Dimension must be aligned with 2");

  for (size_t i = 0; i < dim; i += 2) {
    float lo = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_);
    float hi = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i + 1], scale_, bias_);
    out[i / 2] = (static_cast_from_float_to_uint8(std::round(hi)) << 4) |
                 (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);
  }
}

// Decode from INT4
void EntropyInt4Quantizer::decode(const uint8_t *in, size_t dim,
                                  float *out) const {
  ailego_assert_with(dim % 2 == 0, "Dimension must be aligned with 2");

  size_t size = dim / 2;
  for (size_t i = 0; i < size; i += 1) {
    uint8_t v = in[i];
    int8_t lo = (static_cast<int8_t>(v << 4) >> 4);
    int8_t hi = (static_cast<int8_t>(v & 0xf0) >> 4);
    out[2 * i] = lo * this->scale_reciprocal() - this->bias();
    out[2 * i + 1] = hi * this->scale_reciprocal() - this->bias();
  }
}

// Feed the UINT4 quantizer
bool EntropyUInt4Quantizer::feed(const float *vec, size_t dim) {
  UPDATE_HISTOGRAM(vec, dim)
}

// Train the UINT4 quantizer
bool EntropyUInt4Quantizer::train(void) {
  TRAIN_QUANTIZER()
}

// Encode to INT4
void EntropyUInt4Quantizer::encode(const float *in, size_t dim,
                                   uint8_t *out) const {
  ailego_assert_with(dim % 2 == 0, "Dimension must be aligned with 2");

  for (size_t i = 0; i < dim; i += 2) {
    float lo = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i], scale_, bias_);
    float hi = QuantizeValue<MIN_VALUE, MAX_VALUE>(in[i + 1], scale_, bias_);
    out[i / 2] = (static_cast_from_float_to_uint8(std::round(hi)) << 4) |
                 (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);
  }
}

// Decode from INT4
void EntropyUInt4Quantizer::decode(const uint8_t *in, size_t dim,
                                   float *out) const {
  ailego_assert_with(dim % 2 == 0, "Dimension must be aligned with 2");

  size_t size = dim / 2;
  for (size_t i = 0; i < size; i += 1) {
    uint8_t v = in[i];
    out[2 * i] = (v & 0xf) * this->scale_reciprocal() - this->bias();
    out[2 * i + 1] = (v >> 4) * this->scale_reciprocal() - this->bias();
  }
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/algorithm/integer_quantizer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <limits>
#include <vector>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Entropy-based Integer Quantization Algorithm
 */
template <typename T, int RANGE_MIN, int RANGE_MAX>
class EntropyIntegerQuantizer {
 public:
  //! Primitive Built-in Types to store the quantized data
  using ValueType = typename std::remove_cv<T>::type;

  //! Constants
  constexpr static int MIN_VALUE = RANGE_MIN;
  constexpr static int MAX_VALUE = RANGE_MAX;

  // Check supporting type
  static_assert(std::is_integral<T>::value, "ValueType must be integral");

  // Check template values
  static_assert(RANGE_MIN < RANGE_MAX, "Invalid value range");

  //! Constructor
  EntropyIntegerQuantizer(void) {}

  //! Set histogram bins in train
  void set_histogram_bins(size_t bins) {
    if (bins > (RANGE_MAX - RANGE_MIN)) {
      histogram_bins_ = bins;
    }
  }

  //! Set quantization params scale
  void set_scale(float val) {
    if (val > 0.0f) {
      scale_ = val;
      scale_reciprocal_ = 1 / scale_;
    }
  }

  //! Set quantization params bias
  void set_bias(float val) {
    bias_ = val;
  }

  //! Set quantization params max
  void set_max(float val) {
    max_ = val;
  }

  //! Set quantization params min
  void set_min(float val) {
    min_ = val;
  }

  //! Set quantization params non bias
  void set_non_bias(bool val) {
    non_bias_ = val;
  }

  //! Get histogram bins in train
  size_t histogram_bins(void) const {
    return histogram_bins_;
  }

  //! Get quantization params scale
  float scale(void) const {
    return scale_;
  }

  //! Get quantization params bias
  float bias(void) const {
    return bias_;
  }

  //! Get quantization params max
  float max(void) const {
    return max_;
  }

  //! Get quantization params min
  float min(void) const {
    return min_;
  }

  //! Get quantization params non bias
  bool non_bias(void) const {
    return non_bias_;
  }

  //! Retrieve the scale reciprocal for decoding
  float scale_reciprocal(void) const {
    return scale_reciprocal_;
  }

 protected:
  //! Disable them
  EntropyIntegerQuantizer(const EntropyIntegerQuantizer &) = delete;
  EntropyIntegerQuantizer &operator=(const EntropyIntegerQuantizer &) = delete;

  //! Members
  size_t histogram_bins_{0};
  float hist_interval_{1.0f};
  float max_{std::numeric_limits<float>::min()};
  float min_{std::numeric_limits<float>::max()};
  float bias_{0.0f};
  float scale_{0.0f};
  float scale_reciprocal_{0.0f};
  float left_boundary_{0.0f};
  bool non_bias_{false};
  std::vector<uint32_t> histogram_{};
};

/*! INT16 Quantizer
 */
class EntropyInt16Quantizer
    : public EntropyIntegerQuantizer<int16_t, -32767, 32767> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to int16
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from int16
  void decode(const ValueType *in, size_t dim, float *out) const;
};

/*! UINT16 Quantizer
 */
class EntropyUInt16Quantizer
    : public EntropyIntegerQuantizer<uint16_t, 0, 65535> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to uint16
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from uint16
  void decode(const ValueType *in, size_t dim, float *out) const;
};

/*! INT8 Quantizer
 */
class EntropyInt8Quantizer : public EntropyIntegerQuantizer<int8_t, -127, 127> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to int8
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from int8
  void decode(const ValueType *in, size_t dim, float *out) const;
};

/*! UINT8 Quantizer
 */
class EntropyUInt8Quantizer : public EntropyIntegerQuantizer<uint8_t, 0, 255> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to uint8
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from uint8
  void decode(const ValueType *in, size_t dim, float *out) const;
};

/*! INT4 Quantizer
 */
class EntropyInt4Quantizer : public EntropyIntegerQuantizer<uint8_t, -8, 7> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to int4
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from int4
  void decode(const ValueType *in, size_t dim, float *out) const;
};

/*! UINT4 Quantizer
 */
class EntropyUInt4Quantizer : public EntropyIntegerQuantizer<uint8_t, 0, 15> {
 public:
  //! Feed the training data
  bool feed(const float *vec, size_t dim);

  //! Train the quantizer
  bool train(void);

  //! Encode float vector to uint4
  void encode(const float *in, size_t dim, ValueType *out) const;

  //! Decode to float vector from uint4
  void decode(const ValueType *in, size_t dim, float *out) const;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/algorithm/kmeans.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cmath>
#include <numeric>
#include <random>
#include <ailego/container/vector_array.h>
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/hamming_distance_matrix.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math/normalizer.h>
#include <ailego/utility/matrix_helper.h>
#include <zvec/ailego/container/heap.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/type_helper.h>
#include "lloyd_cluster.h"

namespace zvec {
namespace ailego {

/*! K-MC2 Centroids Generator
 */
template <typename T, typename TPool>
class Kmc2CentroidsGenerator {
 public:
  //! Type of values
  using OwnerType = typename std::decay<T>::type;
  using ContainerType = typename OwnerType::ContainerType;
  using ContextType = typename OwnerType::ContextType;
  using ValueType = typename OwnerType::ValueType;
  using StoreType = typename OwnerType::StoreType;
  using ThreadPoolType = TPool;

  //! constexpr variables
  constexpr static size_t BatchCount = OwnerType::BatchCount;

  //! Generate centroids
  void operator()(OwnerType *owner, ThreadPoolType &pool) const {
    if (chain_length_ == 0) {
      this->init_centroids_random(owner);
    } else if (!assumption_free_) {
      this->init_centroids_kmc2(owner, pool);
    } else {
      this->init_centroids_afkmc2(owner, pool);
    }
  }

  //! Retrieve the markov chain length
  size_t chain_length(void) const {
    return chain_length_;
  }

  //! Set the mutable markov chain length
  void set_chain_length(size_t len) {
    chain_length_ = len;
  }

  //! Retrieve assumption free option
  bool assumption_free(void) const {
    return assumption_free_;
  }

  //! Set the assumption free option
  void set_assumption_free(bool val) {
    assumption_free_ = val;
  }

 protected:
  //! Initialize centroids randomly
  void init_centroids_random(OwnerType *owner) const {
    RandomSelectBenches(owner->feature_cache(), owner->feature_matrix(),
                        owner->k_value(), owner->mutable_centroids());
  }

  //! Initialize centroids with K-MC2
  void init_centroids_kmc2(OwnerType *owner, ThreadPoolType &pool) const {
    const auto &matrix = owner->feature_matrix();
    const auto &cache = owner->feature_cache();
    auto *centroids = owner->mutable_centroids();

    std::mt19937 mt((std::random_device())());
    std::uniform_real_distribution<float> dist(0.0, 1.0);

    ContainerType benches(cache.dimension());
    std::vector<float> scores;

    // Sample first center uniformly
    RandomSelectBenches(cache, matrix, 1, centroids);

    // Make a thread group
    auto group = pool.make_group();

    for (size_t i = 1, k = owner->k_value(); i < k; ++i) {
      RandomSelectBenches(cache, matrix, chain_length_, &benches);

      // Update bench scores
      scores.resize(benches.count());
      for (size_t j = 0; j != scores.size(); ++j) {
        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateBenchScores,
                                   centroids, benches[j], &scores[j]));
      }
      group->wait_finish();

      //! Select the better centroid randomly
      float x = scores[0];
      size_t xj = 0;
      for (size_t j = 1; j != scores.size(); ++j) {
        float y = scores[j];

        if (x == 0.0f || x * dist(mt) < y) {
          x = y;
          xj = j;
        }
      }
      centroids->append(benches[xj], benches.dimension());
    }  // end of for
  }

  //! Initialize centroids with K-MC2
  void init_centroids_afkmc2(OwnerType *owner, ThreadPoolType &pool) const {
    const auto &matrix = owner->feature_matrix();
    const auto &cache = owner->feature_cache();

    // Probability
    std::vector<float> probs(matrix.count() + cache.count());

    // Sample first center uniformly
    RandomSelectBenches(cache, matrix, 1, owner->mutable_centroids());

    // Make a thread group
    auto group = pool.make_group();
    if (!matrix.empty()) {
      size_t n = matrix.count() / BatchCount;
      size_t c = std::max<size_t>(n / pool.count() / 2u, 1u);
      size_t m = n / c * c;

      for (size_t i = 0; i != m; i += c) {
        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateMatrixScores,
                                   owner, i, i + c, &probs[0]));
      }
      for (size_t i = m; i != n; i += 1) {
        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateMatrixScores,
                                   owner, i, i + 1, &probs[0]));
      }
    }
    if (!cache.empty()) {
      group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateCacheScores,
                                 owner, &probs[matrix.count()]));
    }
    group->wait_finish();

    // Update probabilities
    double p_sum = std::accumulate(probs.begin(), probs.end(), 0.0);
    for (auto it = probs.begin(); it != probs.end(); ++it) {
      *it = static_cast<float>((*it / p_sum + 1.0 / probs.size()) * 0.5);
    }

    std::mt19937 mt((std::random_device())());
    std::uniform_real_distribution<float> dist(0.0, 1.0);
    ContainerType benches(cache.dimension());
    std::vector<float> scores;
    std::vector<float> bench_probs;

    for (size_t i = 1; i < owner->k_value(); ++i) {
      RandomSelectBenches(cache, matrix, chain_length_, probs, &benches,
                          &bench_probs);

      // Update bench scores
      scores.resize(benches.count());
      for (size_t j = 0; j != scores.size(); ++j) {
        group->submit(Closure::New(&Kmc2CentroidsGenerator::UpdateBenchScores,
                                   owner->mutable_centroids(), benches[j],
                                   &scores[j]));
      }
      group->wait_finish();

      // Update scores with probabilities
      for (size_t j = 0; j != scores.size(); ++j) {
        scores[j] /= bench_probs[j];
      }

      //! Select the better centroid randomly
      float x = scores[0];
      size_t xj = 0;
      for (size_t j = 1; j != scores.size(); ++j) {
        float y = scores[j];

        if (x == 0.0f || x * dist(mt) < y) {
          x = y;
          xj = j;
        }
      }
      owner->mutable_centroids()->append(benches[xj], benches.dimension());
    }  // end of for
  }

  //! Update matrix score
  static void UpdateMatrixScores(const OwnerType *owner, size_t first,
                                 size_t last, float *out) {
    const auto &matrix = owner->feature_matrix();
    const auto *bench = owner->centroids().data();

    for (size_t i = first * BatchCount; i != last * BatchCount;
         i += BatchCount) {
      ContextType::template BatchDistance<1>(matrix[i], bench,
                                             matrix.dimension(), &out[i]);
    }
  }

  //! Update cache score
  static void UpdateCacheScores(const OwnerType *owner, float *out) {
    const auto &cache = owner->feature_cache();
    const auto *bench = owner->centroids().data();

    for (size_t i = 0, n = cache.count(); i != n; ++i) {
      ContextType::Distance(bench, cache[i], cache.dimension(), &out[i]);
    }
  }

  //! Update bench score
  static void UpdateBenchScores(const ContainerType *benches,
                                const StoreType *feat, float *out) {
    float min_score = std::numeric_limits<float>::max();

    for (size_t i = 0, c = benches->count(); i != c; ++i) {
      float new_score;
      ContextType::Distance(benches->at(i), feat, benches->dimension(),
                            &new_score);

      if (new_score < min_score) {
        min_score = new_score;
      }
    }
    *out = min_score;
  }

  //! Select k benches randomly
  static void RandomSelectBenches(const ContainerType &cache,
                                  const ContainerType &matrix, size_t k,
                                  ContainerType *benches) {
    ContainerType rows(cache.dimension());
    size_t m = matrix.count();
    size_t n = m + cache.count();
    std::mt19937 mt((std::random_device())());

    rows.resize(BatchCount);
    benches->reset(cache.dimension());
    benches->reserve(k);

    for (size_t i = 0; k > 0 && i < n; ++i) {
      if (mt() % (n - i) >= k) {
        continue;
      }
      // Selected a feature
      if (i < m) {
        ContextType::MatrixReverseTranspose(matrix[i / BatchCount * BatchCount],
                                            matrix.dimension(), rows.data());
        benches->append(rows[i & (BatchCount - 1u)], matrix.dimension());
      } else {
        benches->append(cache[i - m], cache.dimension());
      }
      --k;
    }  // end of for
  }

  //! Select k benches randomly
  static void RandomSelectBenches(const ContainerType &cache,
                                  const ContainerType &matrix, size_t k,
                                  const std::vector<float> &probs,
                                  ContainerType *benches,
                                  std::vector<float> *bench_probs) {
    std::mt19937 mt((std::random_device())());
    std::uniform_real_distribution<float> dist(0.0, 1.0);

    // Sample features
    KeyValueHeap<size_t, double, std::greater<double>> samples(k);
    for (size_t i = 0; i < probs.size(); ++i) {
      samples.emplace(i, std::pow(dist(mt), 1.0 / probs[i]));
    }

    ContainerType rows(cache.dimension());
    size_t matrix_count = matrix.count();

    rows.resize(BatchCount);
    benches->reset(cache.dimension());
    benches->reserve(k);
    bench_probs->clear();
    bench_probs->reserve(k);

    for (const auto &it : samples) {
      // Selected a feature
      if (it.first < matrix_count) {
        ContextType::MatrixReverseTranspose(
            matrix[it.first / BatchCount * BatchCount], matrix.dimension(),
            rows.data());
        benches->append(rows[it.first & (BatchCount - 1u)], matrix.dimension());
      } else {
        benches->append(cache[it.first - matrix_count], cache.dimension());
      }
      bench_probs->push_back(probs[it.first]);
    }
  }

 private:
  size_t chain_length_{32};
  bool assumption_free_{false};
};

/*! Numerical K-Means Context
 */
template <typename T, size_t BATCH_COUNT = 32u>
class NumericalKmeansContext {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = BATCH_COUNT;

  //! Type of values
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(IsSignedArithmetic<ValueType>::value,
                "ValueType must be signed arithmetic");

  /*! K-Means Context Cluster
   */
  class Cluster {
   public:
    //! Constructor
    Cluster(size_t dim) : accum_(dim, 0.0) {}

    //! Constructor
    Cluster(const Cluster &rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}

    //! Constructor
    Cluster(Cluster &&rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}

    //! Assignment
    Cluster &operator=(const Cluster &rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = rhs.accum_;
      return *this;
    }

    //! Assignment
    Cluster &operator=(Cluster &&rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = std::move(rhs.accum_);
      return *this;
    }

    //! Append a vector
    void append(const ValueType *vec, size_t dim, float dist) {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      mutex_.lock();
      cost_ += dist;
      count_ += 1;

      for (size_t i = 0; i != dim; ++i) {
        accum_[i] += vec[i];
      }
      mutex_.unlock();
    }

    //! Retrieve the centroid of vectors
    void centroid(ValueType *out, size_t dim) const {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      for (size_t i = 0; i != dim; ++i) {
        out[i] = count_ == 0 ? FloatCast<ValueType>(NAN)
                             : FloatCast<ValueType>(accum_[i] / count_);
      }
    }

    //! Retrieve squared error
    double cost(void) const {
      return cost_;
    }

    //! Retrieve feature count
    size_t count(void) const {
      return count_;
    }

   protected:
    //! Convert float type to another type
    template <typename U>
    static auto FloatCast(const double val) ->
        typename std::enable_if<!std::is_integral<U>::value, U>::type {
      return static_cast<U>(val);
    }

    //! Convert float type to another type
    template <typename U>
    static auto FloatCast(const double val) ->
        typename std::enable_if<std::is_integral<U>::value, U>::type {
      return static_cast<U>(std::round(val));
    }

   private:
    SpinMutex mutex_{};
    double cost_{0.0};
    size_t count_{0u};
    std::vector<double> accum_{};
  };

  //! operator []
  const Cluster &operator[](size_t i) const {
    return clusters_[i];
  }

  //! operator []
  Cluster &operator[](size_t i) {
    return clusters_[i];
  }

  //! Clear the context
  void clear(void) {
    clusters_.clear();
  }

  //! Reset the context
  void reset(size_t k_value, size_t dim) {
    clusters_.clear();
    clusters_.resize(k_value, dim);
  }

  //! Retrieve context of clusters
  const std::vector<Cluster> &clusters(void) const {
    return clusters_;
  }

  //! Compute the distance between matrix and query (batch)
  template <size_t N>
  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,
                            float *out) {
    SquaredEuclideanDistanceMatrix<ValueType, BatchCount, N>::Compute(m, q, dim,
                                                                      out);
  }

  //! Compute the distance between matrix and query (single)
  static void Distance(const ValueType *m, const ValueType *q, size_t dim,
                       float *out) {
    SquaredEuclideanDistanceMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);
  }

  //! Transpose a matrix
  template <typename U>
  static auto MatrixTranspose(const U *src, size_t dim, T *dst) ->
      typename std::enable_if<sizeof(U) >= 2>::type {
    MatrixHelper::Transpose<U, BatchCount>(src, dim, dst);
  }

  //! Transpose a matrix
  template <typename U>
  static auto MatrixTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) == 1>::type {
    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 2, dst);
  }

  //! Reverse transpose a matrix
  template <typename U>
  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) >= 2>::type {
    MatrixHelper::ReverseTranspose<U, BatchCount>(src, dim, dst);
  }

  //! Reverse transpose a matrix
  template <typename U>
  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) == 1>::type {
    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 2, dst);
  }

  //! Compute Norm2
  template <typename ValueType, typename = typename std::enable_if<
                                    IsFloatingPoint<ValueType>::value>::type>
  static void Norm2(ValueType *data, size_t dim, float *norm) {
    Normalizer<ValueType>::L2(data, dim, norm);
  }

  //! Compute Norm2, for non-float do nothing
  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {
    *norm = 0.0f;
  }

 private:
  //! Members
  std::vector<Cluster> clusters_{};
};

/*! Nibble K-Means Context (INT4)
 */
template <typename T, size_t BATCH_COUNT = 32u>
class NibbleKmeansContext {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = BATCH_COUNT;

  //! Type of values
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::make_unsigned<ValueType>::type;

  // Check supporting type
  static_assert(std::is_same<ValueType, int32_t>::value ||
                    std::is_same<ValueType, int64_t>::value,
                "ValueType must be int32_t or int64_t");

  /*! K-Means Context Cluster
   */
  class Cluster {
   public:
    //! Constructor
    Cluster(size_t dim) : accum_(dim, 0.0) {}

    //! Constructor
    Cluster(const Cluster &rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}

    //! Constructor
    Cluster(Cluster &&rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}

    //! Assignment
    Cluster &operator=(const Cluster &rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = rhs.accum_;
      return *this;
    }

    //! Assignment
    Cluster &operator=(Cluster &&rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = std::move(rhs.accum_);
      return *this;
    }

    //! Append a vector
    void append(const StoreType *vec, size_t dim, float dist) {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      mutex_.lock();
      cost_ += dist;
      count_ += 1;

      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);
      dim = (dim >> 1) << 1;
      for (size_t i = 0; i != dim; i += 2) {
        uint8_t val = arr[i >> 1];
        accum_[i] += ((int8_t)(val << 4) >> 4);
        accum_[i + 1] += ((int8_t)(val) >> 4);
      }
      mutex_.unlock();
    }

    //! Retrieve the centroid of vectors
    void centroid(StoreType *out, size_t dim) const {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      uint8_t *arr = reinterpret_cast<uint8_t *>(out);
      dim = (dim >> 1) << 1;
      for (size_t i = 0; i != dim; i += 2) {
        int lo =
            count_ == 0 ? 0 : static_cast<int>(std::round(accum_[i] / count_));
        int hi = count_ == 0
                     ? 0
                     : static_cast<int>(std::round(accum_[i + 1] / count_));
        arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);
      }
    }

    //! Retrieve squared error
    double cost(void) const {
      return cost_;
    }

    //! Retrieve feature count
    size_t count(void) const {
      return count_;
    }

   private:
    SpinMutex mutex_{};
    double cost_{0.0};
    size_t count_{0u};
    std::vector<double> accum_{};
  };

  //! operator []
  const Cluster &operator[](size_t i) const {
    return clusters_[i];
  }

  //! operator []
  Cluster &operator[](size_t i) {
    return clusters_[i];
  }

  //! Clear the context
  void clear(void) {
    clusters_.clear();
  }

  //! Reset the context
  void reset(size_t k_value, size_t dim) {
    clusters_.clear();
    clusters_.resize(k_value, dim);
  }

  //! Retrieve context of clusters
  const std::vector<Cluster> &clusters(void) const {
    return clusters_;
  }

  //! Compute the distance between matrix and query (batch)
  template <size_t N>
  static void BatchDistance(const StoreType *m, const StoreType *q, size_t dim,
                            float *out) {
    SquaredEuclideanDistanceMatrix<uint8_t, BatchCount, N>::Compute(
        reinterpret_cast<const uint8_t *>(m),
        reinterpret_cast<const uint8_t *>(q), dim, out);
  }

  //! Compute the distance between matrix and query (single)
  static void Distance(const StoreType *m, const StoreType *q, size_t dim,
                       float *out) {
    SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
        reinterpret_cast<const uint8_t *>(m),
        reinterpret_cast<const uint8_t *>(q), dim, out);
  }

  //! Transpose a matrix
  static void MatrixTranspose(const StoreType *src, size_t dim,
                              StoreType *dst) {
    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 3, dst);
  }

  //! Reverse transpose a matrix
  static void MatrixReverseTranspose(const StoreType *src, size_t dim,
                                     StoreType *dst) {
    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 3, dst);
  }

  //! Compute and do norm2
  static void Norm2(StoreType * /*data*/, size_t /*dim*/, float *norm) {
    *norm = 0;
  }

 private:
  //! Members
  std::vector<Cluster> clusters_{};
};

/*! Binary K-Means Context
 */
template <typename T, size_t BATCH_COUNT = 32u>
class BinaryKmeansContext {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = BATCH_COUNT;

  //! Type of values
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(std::is_same<ValueType, uint32_t>::value ||
                    std::is_same<ValueType, uint64_t>::value,
                "ValueType must be uint32_t or uint64_t");

  /*! K-Means Context Cluster
   */
  class Cluster {
   public:
    //! Constructor
    Cluster(size_t dim) : accum_(dim, 0) {}

    //! Constructor
    Cluster(const Cluster &rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}

    //! Constructor
    Cluster(Cluster &&rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}

    //! Assignment
    Cluster &operator=(const Cluster &rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = rhs.accum_;
      return *this;
    }

    //! Assignment
    Cluster &operator=(Cluster &&rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = std::move(rhs.accum_);
      return *this;
    }

    //! Append a vector
    void append(const ValueType *vec, size_t dim, float dist) {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      mutex_.lock();
      cost_ += dist;
      count_ += 1;

      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);
      for (size_t i = 0; i != dim; ++i) {
        if (arr[i >> 3] & (1u << (i & 7))) {
          accum_[i] += 1;
        }
      }
      mutex_.unlock();
    }

    //! Retrieve the centroid of vectors
    void centroid(ValueType *out, size_t dim) const {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      uint8_t *arr = reinterpret_cast<uint8_t *>(out);
      size_t half = count_ >> 1;
      for (size_t i = 0; i != dim; ++i) {
        if (accum_[i] > half) {
          arr[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));
        } else {
          arr[i >> 3] &= ~static_cast<uint8_t>(1 << (i & 0x7));
        }
      }
    }

    //! Retrieve squared error
    double cost(void) const {
      return cost_;
    }

    //! Retrieve feature count
    size_t count(void) const {
      return count_;
    }

   private:
    SpinMutex mutex_{};
    double cost_{0.0};
    size_t count_{0u};
    std::vector<uint32_t> accum_{};
  };

  //! operator []
  const Cluster &operator[](size_t i) const {
    return clusters_[i];
  }

  //! operator []
  Cluster &operator[](size_t i) {
    return clusters_[i];
  }

  //! Clear the context
  void clear(void) {
    clusters_.clear();
  }

  //! Reset the context
  void reset(size_t k_value, size_t dim) {
    clusters_.clear();
    clusters_.resize(k_value, dim);
  }

  //! Retrieve context of clusters
  const std::vector<Cluster> &clusters(void) const {
    return clusters_;
  }

  //! Compute the distance between matrix and query (batch)
  template <size_t N>
  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,
                            float *out) {
    HammingDistanceMatrix<ValueType, BatchCount, N>::Compute(m, q, dim, out);
  }

  //! Compute the distance between matrix and query (single)
  static void Distance(const ValueType *m, const ValueType *q, size_t dim,
                       float *out) {
    HammingDistanceMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);
  }

  //! Transpose a matrix
  static void MatrixTranspose(const ValueType *src, size_t dim, T *dst) {
    MatrixHelper::Transpose<ValueType, BatchCount>(
        src, (dim >> 3) / sizeof(ValueType), dst);
  }

  //! Reverse transpose a matrix
  static void MatrixReverseTranspose(const ValueType *src, size_t dim, T *dst) {
    MatrixHelper::ReverseTranspose<ValueType, BatchCount>(
        src, (dim >> 3) / sizeof(ValueType), dst);
  }

  //! Compute Norm2
  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {
    *norm = 0;
  }

 private:
  //! Members
  std::vector<Cluster> clusters_{};
};

/*! Numerical InnerProduct K-Means Context
 */
template <typename T, size_t BATCH_COUNT = 32u>
class NumericalInnerProductKmeansContext {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = BATCH_COUNT;

  //! Type of values
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(IsSignedArithmetic<ValueType>::value,
                "ValueType must be signed arithmetic");

  /*! K-Means Context Cluster
   */
  class Cluster {
   public:
    //! Constructor
    Cluster(size_t dim) : accum_(dim, 0.0) {}

    //! Constructor
    Cluster(const Cluster &rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}

    //! Constructor
    Cluster(Cluster &&rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}

    //! Assignment
    Cluster &operator=(const Cluster &rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = rhs.accum_;
      return *this;
    }

    //! Assignment
    Cluster &operator=(Cluster &&rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = std::move(rhs.accum_);
      return *this;
    }

    //! Append a vector
    void append(const ValueType *vec, size_t dim, float dist) {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      mutex_.lock();
      cost_ += dist;
      count_ += 1;

      for (size_t i = 0; i != dim; ++i) {
        accum_[i] += vec[i];
      }
      mutex_.unlock();
    }

    //! Retrieve the centroid of vectors
    void centroid(ValueType *out, size_t dim) const {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      for (size_t i = 0; i != dim; ++i) {
        out[i] = count_ == 0 ? FloatCast<ValueType>(NAN)
                             : FloatCast<ValueType>(accum_[i] / count_);
      }
    }

    //! Retrieve squared error
    double cost(void) const {
      return cost_;
    }

    //! Retrieve feature count
    size_t count(void) const {
      return count_;
    }

   protected:
    //! Convert float type to another type
    template <typename U>
    static auto FloatCast(const double val) ->
        typename std::enable_if<!std::is_integral<U>::value, U>::type {
      return static_cast<U>(val);
    }

    //! Convert float type to another type
    template <typename U>
    static auto FloatCast(const double val) ->
        typename std::enable_if<std::is_integral<U>::value, U>::type {
      return static_cast<U>(std::round(val));
    }

   private:
    SpinMutex mutex_{};
    double cost_{0.0};
    size_t count_{0u};
    std::vector<double> accum_{};
  };

  //! operator []
  const Cluster &operator[](size_t i) const {
    return clusters_[i];
  }

  //! operator []
  Cluster &operator[](size_t i) {
    return clusters_[i];
  }

  //! Clear the context
  void clear(void) {
    clusters_.clear();
  }

  //! Reset the context
  void reset(size_t k_value, size_t dim) {
    clusters_.clear();
    clusters_.resize(k_value, dim);
  }

  //! Retrieve context of clusters
  const std::vector<Cluster> &clusters(void) const {
    return clusters_;
  }

  //! Compute the distance between matrix and query (batch)
  template <size_t N>
  static void BatchDistance(const ValueType *m, const ValueType *q, size_t dim,
                            float *out) {
    MinusInnerProductMatrix<ValueType, BatchCount, N>::Compute(m, q, dim, out);
  }

  //! Compute the distance between matrix and query (single)
  static void Distance(const ValueType *m, const ValueType *q, size_t dim,
                       float *out) {
    MinusInnerProductMatrix<ValueType, 1, 1>::Compute(m, q, dim, out);
  }

  //! Transpose a matrix
  template <typename U>
  static auto MatrixTranspose(const U *src, size_t dim, T *dst) ->
      typename std::enable_if<sizeof(U) >= 2>::type {
    MatrixHelper::Transpose<U, BatchCount>(src, dim, dst);
  }

  //! Transpose a matrix
  template <typename U>
  static auto MatrixTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) == 1>::type {
    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 2, dst);
  }

  //! Reverse transpose a matrix
  template <typename U>
  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) >= 2>::type {
    MatrixHelper::ReverseTranspose<U, BatchCount>(src, dim, dst);
  }

  //! Reverse transpose a matrix
  template <typename U>
  static auto MatrixReverseTranspose(const U *src, size_t dim, U *dst) ->
      typename std::enable_if<sizeof(U) == 1>::type {
    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 2, dst);
  }

  //! Compute Norm2
  template <typename ValueType, typename = typename std::enable_if<
                                    IsFloatingPoint<ValueType>::value>::type>
  static void Norm2(ValueType *data, size_t dim, float *norm) {
    Normalizer<ValueType>::L2(data, dim, norm);
  }

  //! Compute Norm2, for non-float do nothing
  static void Norm2(ValueType * /*data*/, size_t /*dim*/, float *norm) {
    *norm = 0.0f;
  }

 private:
  //! Members
  std::vector<Cluster> clusters_{};
};

/*! Nibble InnerProduct K-Means Context (INT4)
 */
template <typename T, size_t BATCH_COUNT = 32u>
class NibbleInnerProductKmeansContext {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = BATCH_COUNT;

  //! Type of values
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::make_unsigned<ValueType>::type;

  // Check supporting type
  static_assert(std::is_same<ValueType, int32_t>::value ||
                    std::is_same<ValueType, int64_t>::value,
                "ValueType must be int32_t or int64_t");

  /*! K-Means Context Cluster
   */
  class Cluster {
   public:
    //! Constructor
    Cluster(size_t dim) : accum_(dim, 0.0) {}

    //! Constructor
    Cluster(const Cluster &rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(rhs.accum_) {}

    //! Constructor
    Cluster(Cluster &&rhs)
        : cost_(rhs.cost_), count_(rhs.count_), accum_(std::move(rhs.accum_)) {}

    //! Assignment
    Cluster &operator=(const Cluster &rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = rhs.accum_;
      return *this;
    }

    //! Assignment
    Cluster &operator=(Cluster &&rhs) {
      cost_ = rhs.cost_;
      count_ = rhs.count_;
      accum_ = std::move(rhs.accum_);
      return *this;
    }

    //! Append a vector
    void append(const StoreType *vec, size_t dim, float dist) {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      mutex_.lock();
      cost_ += dist;
      count_ += 1;

      const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);
      dim = (dim >> 1) << 1;
      for (size_t i = 0; i != dim; i += 2) {
        uint8_t val = arr[i >> 1];
        accum_[i] += ((int8_t)(val << 4) >> 4);
        accum_[i + 1] += ((int8_t)(val) >> 4);
      }
      mutex_.unlock();
    }

    //! Retrieve the centroid of vectors
    void centroid(StoreType *out, size_t dim) const {
      ailego_check_with(dim == accum_.size(), "Unmatched dimension");

      uint8_t *arr = reinterpret_cast<uint8_t *>(out);
      dim = (dim >> 1) << 1;
      for (size_t i = 0; i != dim; i += 2) {
        int lo =
            count_ == 0 ? 0 : static_cast<int>(std::round(accum_[i] / count_));
        int hi = count_ == 0
                     ? 0
                     : static_cast<int>(std::round(accum_[i + 1] / count_));
        arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);
      }
    }

    //! Retrieve squared error
    double cost(void) const {
      return cost_;
    }

    //! Retrieve feature count
    size_t count(void) const {
      return count_;
    }

   private:
    SpinMutex mutex_{};
    double cost_{0.0};
    size_t count_{0u};
    std::vector<double> accum_{};
  };

  //! operator []
  const Cluster &operator[](size_t i) const {
    return clusters_[i];
  }

  //! operator []
  Cluster &operator[](size_t i) {
    return clusters_[i];
  }

  //! Clear the context
  void clear(void) {
    clusters_.clear();
  }

  //! Reset the context
  void reset(size_t k_value, size_t dim) {
    clusters_.clear();
    clusters_.resize(k_value, dim);
  }

  //! Retrieve context of clusters
  const std::vector<Cluster> &clusters(void) const {
    return clusters_;
  }

  //! Compute the distance between matrix and query (batch)
  template <size_t N>
  static void BatchDistance(const StoreType *m, const StoreType *q, size_t dim,
                            float *out) {
    MinusInnerProductMatrix<uint8_t, BatchCount, N>::Compute(
        reinterpret_cast<const uint8_t *>(m),
        reinterpret_cast<const uint8_t *>(q), dim, out);
  }

  //! Compute the distance between matrix and query (single)
  static void Distance(const StoreType *m, const StoreType *q, size_t dim,
                       float *out) {
    MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(
        reinterpret_cast<const uint8_t *>(m),
        reinterpret_cast<const uint8_t *>(q), dim, out);
  }

  //! Transpose a matrix
  static void MatrixTranspose(const StoreType *src, size_t dim,
                              StoreType *dst) {
    MatrixHelper::Transpose<uint32_t, BatchCount>(src, dim >> 3, dst);
  }

  //! Reverse transpose a matrix
  static void MatrixReverseTranspose(const StoreType *src, size_t dim,
                                     StoreType *dst) {
    MatrixHelper::ReverseTranspose<uint32_t, BatchCount>(src, dim >> 3, dst);
  }

  //! Compute Norm2
  static void Norm2(StoreType * /*data*/, size_t /*dim*/, float *norm) {
    *norm = 0;
  }

 private:
  //! Members
  std::vector<Cluster> clusters_{};
};

/*! Numerical K-Means cluster algorithm
 */
template <typename T, typename TPool,
          typename TContext = NumericalKmeansContext<T>>
using NumericalKmeans =
    LloydCluster<T, TPool, TContext, NumericalVectorArray<T>>;

/*! Nibble K-Means cluster algorithm
 */
template <typename T, typename TPool,
          typename TContext = NibbleKmeansContext<T>>
using NibbleKmeans = LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;

/*! Binary K-Means cluster algorithm
 */
template <typename T, typename TPool,
          typename TContext = BinaryKmeansContext<T>>
using BinaryKmeans = LloydCluster<T, TPool, TContext, BinaryVectorArray<T>>;

/*! Numerical K-Means cluster algorithm
 */
template <typename T, typename TPool,
          typename TContext = NumericalInnerProductKmeansContext<T>>
using NumericalInnerProductKmeans =
    LloydCluster<T, TPool, TContext, NumericalVectorArray<T>>;

/*! Nibble K-Means cluster algorithm
 */
template <typename T, typename TPool,
          typename TContext = NibbleInnerProductKmeansContext<T>>
using NibbleInnerProductKmeans =
    LloydCluster<T, TPool, TContext, NibbleVectorArray<T>>;

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/algorithm/lloyd_cluster.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <array>
#include <random>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! Random Centroids Generator
 */
template <typename T, typename TPool>
struct RandomCentroidsGenerator {
  //! Type of values
  using OwnerType = typename std::decay<T>::type;
  using ContainerType = typename OwnerType::ContainerType;
  using ContextType = typename OwnerType::ContextType;
  using ThreadPoolType = TPool;

  //! constexpr variables
  constexpr static size_t BatchCount = OwnerType::BatchCount;

  //! Generate centroids
  void operator()(OwnerType *owner, ThreadPoolType &) const {
    const auto &matrix = owner->feature_matrix();
    const auto &cache = owner->feature_cache();
    auto *centroids = owner->mutable_centroids();

    ContainerType rows(cache.dimension());
    size_t m = matrix.count();
    size_t n = m + cache.count();
    size_t k = owner->k_value();
    std::mt19937 mt((std::random_device())());

    rows.resize(BatchCount);
    centroids->reset(cache.dimension());
    centroids->reserve(k);

    for (size_t i = 0; k > 0 && i < n; ++i) {
      if (mt() % (n - i) >= k) {
        continue;
      }
      // Selected a feature
      if (i < m) {
        ContextType::MatrixReverseTranspose(matrix[i / BatchCount * BatchCount],
                                            matrix.dimension(), rows.data());
        centroids->append(rows[i & (BatchCount - 1u)], matrix.dimension());
      } else {
        centroids->append(cache[i - m], cache.dimension());
      }
      --k;
    }
  }
};

/*! Lloyd's algorithm cluster
 */
template <typename T, typename TPool, typename TContext, typename TContainer>
class LloydCluster {
 public:
  //! constexpr variables
  constexpr static size_t BatchCount = TContext::BatchCount;

  //! Type of values
  using ThreadPoolType = TPool;
  using ContainerType = TContainer;
  using ContextType = TContext;
  using ValueType = typename TContext::ValueType;
  using StoreType = typename TContext::StoreType;

  //! Constructor
  LloydCluster(size_t k, size_t dim)
      : k_value_(k),
        feature_cache_(dim),
        feature_matrix_(dim),
        centroids_matrix_(dim),
        centroids_(dim) {}

  //! Constructor
  LloydCluster(size_t k, size_t dim, bool spherical)
      : k_value_(k),
        feature_cache_(dim),
        feature_matrix_(dim),
        centroids_matrix_(dim),
        centroids_(dim),
        spherical_{spherical} {}

  //! Constructor
  LloydCluster(void) {}

  //! Destructor
  ~LloydCluster(void) {}

  //! Append a feature
  void append(const StoreType *arr, size_t dim) {
    feature_cache_.append(arr, dim);

    if (feature_cache_.count() == BatchCount) {
      size_t pos = feature_matrix_.count();
      feature_matrix_.resize(pos + BatchCount);
      ContextType::MatrixTranspose(feature_cache_.data(), dim,
                                   feature_matrix_[pos]);
      feature_cache_.clear();
    }
  }

  //! Reset cluster
  void reset(size_t k, size_t dim) {
    k_value_ = k;
    feature_cache_.reset(dim);
    feature_matrix_.reset(dim);
    centroids_.reset(dim);
    centroids_matrix_.reset(dim);
    context_.clear();
  }

  //! Reset cluster
  void reset(size_t k, size_t dim, bool spherical) {
    k_value_ = k;
    feature_cache_.reset(dim);
    feature_matrix_.reset(dim);
    centroids_.reset(dim);
    centroids_matrix_.reset(dim);
    context_.clear();
    spherical_ = spherical;
  }

  //! Initialize centroids
  template <typename G = RandomCentroidsGenerator<LloydCluster, ThreadPoolType>>
  void init_centroids(ThreadPoolType &pool, const G &g = G()) {
    g(this, pool);
  }

  //! Cluster one time
  template <typename ThreadPoolType>
  bool cluster_once(ThreadPoolType &pool, double *cost) {
    if (centroids_.empty()) {
      RandomCentroidsGenerator<LloydCluster, ThreadPoolType> g;
      this->init_centroids(pool, g);
    }
    if (centroids_.count() != k_value_) {
      return false;
    }
    context_.reset(centroids_.count(), centroids_.dimension());

    size_t count = centroids_.count() / BatchCount * BatchCount;
    centroids_matrix_.resize(count);
    for (size_t i = 0; i != count; i += BatchCount) {
      ContextType::MatrixTranspose(centroids_[i], centroids_.dimension(),
                                   centroids_matrix_[i]);
    }
    size_t remain = static_cast<uint32_t>(centroids_.count() - count);
    if (remain > 0) {
      centroids_matrix_.append(centroids_[count], centroids_.dimension(),
                               remain);
    }

    // Using thread pool
    auto group = pool.make_group();
    if (!feature_matrix_.empty()) {
      size_t n = feature_matrix_.count() / BatchCount;
      size_t c = std::max<size_t>(n / pool.count() / 2u, 1u);
      size_t m = n / c * c;

      for (size_t i = 0; i != m; i += c) {
        group->submit(Closure::New(this, &LloydCluster::cluster_matrix_features,
                                   i, i + c));
      }
      for (size_t i = m; i != n; i += 1) {
        group->submit(Closure::New(this, &LloydCluster::cluster_matrix_features,
                                   i, i + 1));
      }
    }
    if (!feature_cache_.empty()) {
      group->submit(Closure::New(this, &LloydCluster::cluster_cache_features));
    }
    group->wait_finish();

    *cost = 0.0;
    for (size_t i = 0, n = centroids_.count(); i != n; ++i) {
      const auto &item = context_[i];
      item.centroid(centroids_[i], centroids_.dimension());
      *cost += item.cost();
    }

    if (spherical_) {
      for (size_t i = 0, n = centroids_.count(); i != n; ++i) {
        float norm;
        ContextType::Norm2(centroids_[i], centroids_.dimension(), &norm);
      }
    }

    return true;
  }

  //! Retrieve the controids
  ContainerType *mutable_centroids(void) {
    return &centroids_;
  }

  //! Retrieve the controids
  const ContainerType &centroids(void) const {
    return centroids_;
  }

  //! Retrieve the K value
  size_t k_value(void) const {
    return k_value_;
  }

  //! Retrieve context
  const ContextType &context(void) const {
    return context_;
  }

  //! Retrieve the feature cache
  const ContainerType &feature_cache(void) const {
    return feature_cache_;
  }

  //! Retrieve the feature matrix
  const ContainerType &feature_matrix(void) const {
    return feature_matrix_;
  }

  //! Reserve the feature matrix
  void feature_matrix_reserve(size_t count) {
    feature_matrix_.reserve(count);
  }

 protected:
  //! Cluster the cache features
  void cluster_cache_features(void) {
    std::array<float, BatchCount> scores;

    for (size_t i = 0, n = feature_cache_.count(); i != n; ++i) {
      size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
      const StoreType *feature = feature_cache_[i];
      float nearest_score = std::numeric_limits<float>::max();
      size_t nearest_index = 0;

      for (size_t j = 0; j != count; j += BatchCount) {
        ContextType::template BatchDistance<1>(centroids_matrix_[j], feature,
                                               centroids_matrix_.dimension(),
                                               scores.data());

        for (size_t k = 0; k < BatchCount; ++k) {
          if (scores[k] < nearest_score) {
            nearest_score = scores[k];
            nearest_index = j + k;
          }
        }
      }  // end of for

      for (size_t j = count, total = centroids_matrix_.count(); j != total;
           ++j) {
        ContextType::Distance(centroids_matrix_[j], feature,
                              centroids_matrix_.dimension(), scores.data());

        if (scores[0] < nearest_score) {
          nearest_score = scores[0];
          nearest_index = j;
        }
      }
      context_[nearest_index].append(feature, feature_cache_.dimension(),
                                     nearest_score);
    }  // end of for
  }

  //! Cluster the matrix features
  void cluster_matrix_features(size_t first, size_t last) {
    std::array<float, BatchCount * BatchCount> scores;
    ContainerType rows(centroids_matrix_.dimension());

    auto comp = [](float i, float j) {
      if (std::isnan(i)) return false;
      if (std::isnan(j)) return true;

      return i < j;
    };

    std::array<float, BatchCount> nearest_scores;
    std::array<size_t, BatchCount> nearest_indexes;

    rows.resize(BatchCount);
    for (size_t i = first * BatchCount; i != last * BatchCount;
         i += BatchCount) {
      size_t count = centroids_matrix_.count() / BatchCount * BatchCount;
      const StoreType *block = feature_matrix_[i];

      std::fill(nearest_indexes.data(), nearest_indexes.data() + BatchCount, 0);
      std::fill(nearest_scores.data(), nearest_scores.data() + BatchCount,
                std::numeric_limits<float>::max());

      for (size_t j = 0; j != count; j += BatchCount) {
        ContextType::template BatchDistance<BatchCount>(
            centroids_matrix_[j], block, centroids_matrix_.dimension(),
            scores.data());

        for (size_t k = 0; k < BatchCount; ++k) {
          const float *start = &scores[k * BatchCount];
          const float *result =
              std::min_element(start, start + BatchCount, comp);
          if (*result < nearest_scores[k]) {
            nearest_scores[k] = *result;
            nearest_indexes[k] = j + (result - start);
          }
        }
      }  // end of for

      for (size_t j = count, total = centroids_matrix_.count(); j != total;
           ++j) {
        ContextType::template BatchDistance<1>(block, centroids_matrix_[j],
                                               centroids_matrix_.dimension(),
                                               scores.data());

        for (size_t k = 0; k < BatchCount; ++k) {
          float score = scores[k];
          if (score < nearest_scores[k]) {
            nearest_scores[k] = score;
            nearest_indexes[k] = j;
          }
        }
      }  // end of for

      ContextType::MatrixReverseTranspose(block, feature_matrix_.dimension(),
                                          rows.data());
      for (size_t k = 0; k < BatchCount; ++k) {
        context_[nearest_indexes[k]].append(
            rows[k], feature_matrix_.dimension(), nearest_scores[k]);
      }
    }  // end of for
  }

 private:
  //! Members
  size_t k_value_{0u};
  ContainerType feature_cache_{};
  ContainerType feature_matrix_{};
  ContainerType centroids_matrix_{};
  ContainerType centroids_{};
  ContextType context_{};
  bool spherical_{false};
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/buffer/buffer_manager.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <atomic>
#include <mutex>
#include <ailego/pattern/defer.h>
#include <arrow/io/api.h>
#include <parquet/arrow/reader.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/logger/logger.h>

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wshadow"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wshadow"
#endif

#include <arrow/api.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif


namespace zvec {


namespace ailego {


namespace {


struct IDHash {
  size_t operator()(const BufferID &buffer_id) const {
    size_t hash = std::hash<int>{}(static_cast<int>(buffer_id.type));
    hash = hash ^ (std::hash<uint64_t>{}(buffer_id.file_id));
    if (buffer_id.type == BufferID::TYPE::PARQUET) {
      hash = hash * 31 + std::hash<int>{}(buffer_id.parquet().column);
      hash = hash * 31 + std::hash<int>{}(buffer_id.parquet().row_group);
    } else if (buffer_id.type == BufferID::TYPE::VECTOR) {
      hash = hash * 31 + std::hash<uint32_t>{}(buffer_id.vector().offset);
    }
    return hash;
  }
};


struct IDEqual {
  bool operator()(const BufferID &a, const BufferID &b) const {
    if (a.type != b.type) {
      return false;
    }
    if (a.file_name != b.file_name) {
      return false;
    }
    if (a.file_id != b.file_id) {
      return false;
    }
    if (a.mtime != b.mtime) {
      return false;
    }
    if (a.type == BufferID::TYPE::PARQUET) {
      return a.parquet().column == b.parquet().column &&
             a.parquet().row_group == b.parquet().row_group;
    } else if (a.type == BufferID::TYPE::VECTOR) {
      return a.vector().offset == b.vector().offset;
    } else {
      return false;
    }
  }
};


}  // namespace


struct BufferManager::BufferContext {
  BufferContext(const BufferID &id, BufferPool *p) : id(id), pool(p) {};
  BufferContext(const BufferContext &) = delete;
  BufferContext(BufferContext &&) = delete;
  BufferContext &operator=(const BufferContext &) = delete;
  BufferContext &operator=(BufferContext &&) = delete;


  ~BufferContext() {
    if (vector) {
      free(vector);
    }
  }


  typedef std::unique_ptr<BufferManager::BufferContext> Pointer;


  enum State : uint32_t {
    IDLE = 0,      // Empty and not held by any users, not in LRU
    RESERVED = 1,  // Pinned by a user but no data yet, not in LRU
    IN_USE = 2,    // Pinned by a user and data is present, not in LRU
    CACHED = 3,    // Data is present but not held by any users, in LRU
    ERROR = 4      // Something went wrong, not in LRU
  };


  // Identifier for the buffer
  BufferID id;

  // Current state
  State state{IDLE};

  // The size of the buffer
  uint32_t size{0};

  // Handle of the file backing this buffer
  File file;

  // The number of external references to this buffer (via pin/unpin)
  std::atomic<uint32_t> refs_buf{0};

  // The number of external references to this context (via BufferHandle)
  std::atomic<uint32_t> refs_context{0};

  BufferPool *pool{nullptr};

  // A shared pointer to the buffers allocated for arrow parquet data
  std::shared_ptr<arrow::ChunkedArray> arrow{nullptr};

  // Guard original arrow buffers to prevent premature deletion
  std::vector<std::shared_ptr<arrow::Buffer>> arrow_refs{};

  // A pointer to the buffer allocated for vector data
  void *vector{nullptr};

  // Doubly linked LRU list
  BufferContext *next{nullptr};
  BufferContext *prev{nullptr};


  // Return a string representation of the status
  const std::string status_string() const;

  // Populate the buffer with parquet data
  arrow::Status read_arrow_parquet();

  // Populate the buffer with vector data
  bool read_vector();
};


const std::string BufferManager::BufferContext::status_string() const {
  std::string msg{id.to_string() + ": "};
  switch (state) {
    case State::IDLE: {
      msg += "Idle";
      break;
    }
    case State::RESERVED: {
      msg += "Reserved";
      break;
    }
    case State::IN_USE: {
      msg += "In use";
      break;
    }
    case State::CACHED: {
      msg += "Cached";
      break;
    }
    case State::ERROR: {
      msg += "Error";
      break;
    }
  }
  return msg;
}


arrow::Status BufferManager::BufferContext::read_arrow_parquet() {
  // TODO: file handler and memory pool can be optimized
  arrow::MemoryPool *mem_pool = arrow::default_memory_pool();

  // Open file
  std::shared_ptr<arrow::io::RandomAccessFile> input;
  const auto &file_name = id.file_name;
  ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name));

  // Open reader
  std::unique_ptr<parquet::arrow::FileReader> reader;
  ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool));

  // Perform read
  int row_group = id.parquet().row_group;
  int column = id.parquet().column;
  auto s = reader->RowGroup(row_group)->Column(column)->Read(&arrow);
  if (!s.ok()) {
    LOG_ERROR("Failed to read parquet file[%s]", file_name.c_str());
    arrow = nullptr;
    return s;
  }

  // Compute the memory usage and hijack Arrow's buffers with our implementation
  for (auto &array : arrow->chunks()) {
    auto &buffers = array->data()->buffers;
    for (size_t buf_idx = 0; buf_idx < buffers.size(); ++buf_idx) {
      if (buffers[buf_idx] == nullptr) {
        continue;
      }
      // Keep references to original buffers to prevent premature deletion
      arrow_refs.emplace_back(buffers[buf_idx]);
      size += buffers[buf_idx]->capacity();
      // Create hijacked buffer with custom deleter that notifies us when Arrow
      // is finished with the buffer
      std::shared_ptr<arrow::Buffer> hijacked_buffer(
          buffers[buf_idx].get(), BufferManager::ArrowBufferDeleter(this));
      buffers[buf_idx] = hijacked_buffer;
    }
  }

  return arrow::Status::OK();
}


bool BufferManager::BufferContext::read_vector() {
  const auto &file_name = id.file_name;
  if (!file.is_valid()) {
    if (!File::IsExist(file_name)) {
      LOG_ERROR("File[%s] does not exist", file_name.c_str());
      return false;
    }
    if (!File::IsRegular(file_name)) {
      LOG_ERROR("[%s] is not a regular file", file_name.c_str());
      return false;
    }
    if (!file.open(file_name.c_str(), true, false)) {
      LOG_ERROR("Failed to open file[%s]", file_name.c_str());
      return false;
    }
  }
  AILEGO_DEFER([this] { file.close(); });
  uint32_t len = id.vector().length;
  auto ret = posix_memalign((void **)&vector, 64, len);  // 64-byte alignment
  if (ret != 0 || vector == nullptr) {
    LOG_ERROR("Failed to allocate buffer for file[%s]", file_name.c_str());
    return false;
  }
  uint32_t offset = id.vector().offset;
  if (file.read(offset, vector, len) != len) {
    LOG_ERROR("Failed to read file[%s]", file_name.c_str());
    free(vector);
    vector = nullptr;
    return false;
  }
  size = len;
  return true;
}


// Thread-safe buffer pool implementation.
//
// BufferContext states:
// 1. Must exist in the lookup (hash) table.
// 2. LRU list presence:
//    - In LRU: holds memory but not pinned by any users
//    - Not in LRU: either holds memory pinned by users, or doesn't hold memory
// 3. External references: when an external user acquires a context and pins the
//    memory, that context is removed from LRU list; when they unpins the
//    memory, that context is moved to LRU list if it was the last reference.
//
// Any operation on the hash table is protected by mutex_table_.
// Any change to context state and LRU list is protected by mutex_context_.
//
class BufferManager::BufferPool {
 public:
  explicit BufferPool(uint64_t limit) : limit_(limit) {
    sentinel_.next = &sentinel_;
    sentinel_.prev = &sentinel_;
  }


  BufferContext *acquire_locked(BufferID &id) {
    std::lock_guard<std::mutex> lock(mutex_context_);
    if (auto iter = table_.find(id); iter != table_.end()) {
      return iter->second.get();
    }
    auto [iter, _] =
        table_.emplace(id, std::make_unique<BufferContext>(id, this));
    return iter->second.get();
  }


  void try_release_context_locked(BufferContext *context) {
    if (context->refs_context.load() != 0) {
      return;
    }
    std::lock_guard<std::mutex> lock(mutex_table_);
    if (context->refs_context.load() != 0) {
      return;
    }
    if (context->state == BufferContext::State::IDLE) {
      table_.erase(context->id);
    }
  }


  void pin_locked(BufferContext *ctx) {
    std::lock_guard<std::mutex> lock(mutex_context_);
    if (ctx->state == BufferContext::State::IDLE) {
      return pin_at_IDLE(ctx);
    }
    if (ctx->state == BufferContext::State::IN_USE) {
      return pin_at_IN_USE(ctx);
    }
    if (ctx->state == BufferContext::State::CACHED) {
      return pin_at_CACHED(ctx);
    }
    if (ctx->state == BufferContext::State::ERROR) {
      return;
    }
  }


  bool unpin_locked(BufferContext *ctx) {
    uint32_t prev_refs = ctx->refs_buf.fetch_sub(1);
    if (prev_refs > 1) {
      return false;
    }
    std::lock_guard<std::mutex> lock(mutex_context_);
    if (ctx->refs_buf.load() == 0 &&
        ctx->state != BufferContext::State::CACHED) {
      ctx->state = BufferContext::State::CACHED;
      LRU_insert(ctx);
      return true;
    } else {
      return false;
    }
  }


  void LRU_insert_locked(BufferContext *context) {
    std::lock_guard<std::mutex> lock(mutex_context_);
    LRU_insert(context);
  }


  void LRU_remove_locked(BufferContext *context) {
    std::lock_guard<std::mutex> lock(mutex_context_);
    LRU_remove(context);
  }


  uint64_t usage() const {
    return usage_;
  }


 private:
  void pin_at_IDLE(BufferContext *ctx) {
    ctx->state = BufferContext::State::RESERVED;

    while (usage_ >= limit_) {
      // The tail of LRU list is the least recently used context
      BufferContext *victim = sentinel_.prev;
      if (victim == &sentinel_) {  // No victim could be found
        ctx->state = BufferContext::State::ERROR;
        return;
      }
      if (victim->state == BufferContext::State::ERROR) {
        LRU_remove(victim);
        try_release_context_locked(ctx);
        continue;
      }
      if (victim->id.type == BufferID::TYPE::PARQUET) {
        victim->arrow_refs.clear();
      } else {
        free(victim->vector);
        victim->vector = nullptr;
      }
      victim->state = BufferContext::State::IDLE;
      LRU_remove(victim);
      try_release_context_locked(ctx);
      usage_ -= victim->size;
    }

    if (ctx->id.type == BufferID::TYPE::PARQUET) {
      if (ctx->read_arrow_parquet().ok()) {
        ctx->state = BufferContext::State::IN_USE;
        ctx->refs_buf.fetch_add(ctx->arrow_refs.size());
        usage_ += ctx->size;
      } else {
        LOG_ERROR("Failed to read to %s", ctx->id.to_string().c_str());
        ctx->state = BufferContext::State::ERROR;
      }
    } else {
      if (ctx->read_vector()) {
        ctx->state = BufferContext::State::IN_USE;
        ctx->refs_buf.fetch_add(1);
        usage_ += ctx->size;
      } else {
        LOG_ERROR("Failed to read to %s", ctx->id.to_string().c_str());
        ctx->state = BufferContext::State::ERROR;
      }
    }
  }


  void pin_at_IN_USE(BufferContext *ctx) {
    if (ctx->id.type == BufferID::TYPE::PARQUET) {
      ctx->refs_buf.fetch_add(ctx->arrow_refs.size());
    } else {
      ctx->refs_buf.fetch_add(1);
    }
  }


  void pin_at_CACHED(BufferContext *ctx) {
    if (ctx->id.type == BufferID::TYPE::PARQUET) {
      ctx->refs_buf.fetch_add(ctx->arrow_refs.size());
    } else {
      ctx->refs_buf.fetch_add(1);
    }
    LRU_remove(ctx);
    ctx->state = BufferContext::State::IN_USE;
  }


  void LRU_insert(BufferContext *context) {
    if (context->refs_buf > 0) {
      return;  // Already pinned, should not be evicted
    }
    if (context->next != nullptr || context->prev != nullptr) {
      return;
    }
    // Insert the context to the head of LRU list
    context->next = sentinel_.next;
    context->prev = &sentinel_;
    sentinel_.next = context;
    context->next->prev = context;
    inactive_ += context->size;
  }


  void LRU_remove(BufferContext *context) {
    if (context->next == nullptr) {
      return;  // Not in LRU list
    }
    context->next->prev = context->prev;
    context->prev->next = context->next;
    context->next = nullptr;
    context->prev = nullptr;
    inactive_ -= context->size;
  }

 private:
  using Table =
      std::unordered_map<BufferID, BufferContext::Pointer, IDHash, IDEqual>;

  uint64_t limit_;
  std::atomic<uint64_t> usage_{0};
  std::atomic<uint64_t> inactive_{0};

  Table table_{};
  std::mutex mutex_table_{};
  BufferContext sentinel_{BufferID{}, this};  // LRU list sentinel
  std::mutex mutex_context_{};
};


BufferManager::ArrowBufferDeleter::ArrowBufferDeleter(BufferContext *c)
    : context(c) {}


void BufferManager::ArrowBufferDeleter::operator()(arrow::Buffer *) {
  context->pool->unpin_locked(context);
}


BufferHandle::BufferHandle(BufferContext *context) : context_(context) {
  if (context_ != nullptr) {
    pool_ = context_->pool;
    context_->refs_context.fetch_add(1);
  }
}


BufferHandle::~BufferHandle() {
  if (context_ != nullptr) {
    uint32_t prev_refs = context_->refs_context.fetch_sub(1);
    if (prev_refs > 1) {
      return;
    }
    if (context_->state == BufferContext::State::IDLE) {
      pool_->try_release_context_locked(context_);
    }
  }
}


std::shared_ptr<arrow::ChunkedArray> BufferHandle::pin_parquet_data() {
  pool_->pin_locked(context_);
  return context_->arrow;
}


void *BufferHandle::pin_vector_data() {
  if (!context_) {
    return nullptr;
  }
  pool_->pin_locked(context_);
  return context_->vector;
}


bool BufferHandle::unpin_vector_data() {
  if (!context_) {
    return true;
  }
  return pool_->unpin_locked(context_);
}


uint32_t BufferHandle::references() const {
  return context_->refs_buf.load();
}


uint32_t BufferHandle::size() const {
  return context_->size;
}


void BufferManager::init(uint64_t limit, uint32_t num_shards) {
  pools_.clear();
  uint64_t limit_per_shard = ailego_align(limit / num_shards, 4096);
  for (uint32_t i = 0; i < num_shards; ++i) {
    auto pool = new BufferPool(limit_per_shard);
    pools_.push_back(pool);
  }
  LOG_INFO(
      "BufferManager initialized with [%u] buffer pools, [%zu] bytes memory "
      "limit per pool, total memory limit [%zu] bytes",
      num_shards, (size_t)limit_per_shard, (size_t)limit);
}


BufferHandle BufferManager::acquire(BufferID &buffer_id) {
  static IDHash id_hash{};
  auto hash_val = id_hash(buffer_id);
  auto ctx = pools_[hash_val % pools_.size()]->acquire_locked(buffer_id);
  return BufferHandle(ctx);
}


std::unique_ptr<BufferHandle> BufferManager::acquire_ptr(BufferID &buffer_id) {
  static IDHash id_hash{};
  auto hash_val = id_hash(buffer_id);
  auto ctx = pools_[hash_val % pools_.size()]->acquire_locked(buffer_id);
  return std::make_unique<BufferHandle>(ctx);
}


uint64_t BufferManager::total_size_in_bytes() const {
  uint64_t total_usage = 0;
  for (auto pool : pools_) {
    total_usage += pool->usage();
  }
  return total_usage;
}


BufferManager::~BufferManager() {
  for (auto pool : pools_) {
    delete pool;
  }
}


}  // namespace ailego


}  // namespace zvec

================================================
FILE: src/ailego/buffer/buffer_pool.cc
================================================
#include <zvec/ailego/buffer/buffer_pool.h>
#include <zvec/core/framework/index_logger.h>

namespace zvec {
namespace ailego {

int LRUCache::init(size_t block_size) {
  block_size_ = block_size;
  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {
    queues_.push_back(ConcurrentQueue(block_size));
  }
  return 0;
}

bool LRUCache::evict_single_block(BlockType &item) {
  bool found = false;
  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {
    found = queues_[i].try_dequeue(item);
    if (found) {
      break;
    }
  }
  return found;
}

bool LRUCache::add_single_block(const LPMap *lp_map, const BlockType &block,
                                int block_type) {
  bool ok = queues_[block_type].enqueue(block);
  if (!ok) {
    LOG_ERROR("enqueue failed.");
    return false;
  }
  evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed);
  if (evict_queue_insertions_ % block_size_ == 0) {
    this->clear_dead_node(lp_map);
  }
  return true;
}

void LRUCache::clear_dead_node(const LPMap *lp_map) {
  for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) {
    size_t clear_size = block_size_ * 2;
    if (queues_[i].size_approx() < clear_size * 4) {
      continue;
    }
    size_t clear_count = 0;
    ConcurrentQueue tmp(block_size_);
    BlockType item;
    while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) {
      if (!lp_map->isDeadBlock(item)) {
        if (!tmp.enqueue(item)) {
          LOG_ERROR("enqueue failed.");
        }
      }
    }
    while (tmp.try_dequeue(item)) {
      if (!lp_map->isDeadBlock(item)) {
        if (!queues_[i].enqueue(item)) {
          LOG_ERROR("enqueue failed.");
        }
      }
    }
  }
}

void LPMap::init(size_t entry_num) {
  if (entries_) {
    delete[] entries_;
  }
  entry_num_ = entry_num;
  entries_ = new Entry[entry_num_];
  for (size_t i = 0; i < entry_num_; i++) {
    entries_[i].ref_count.store(std::numeric_limits<int>::min());
    entries_[i].load_count.store(0);
    entries_[i].buffer = nullptr;
  }
  cache_.init(entry_num * 4);
}

char *LPMap::acquire_block(block_id_t block_id, bool lru_mode) {
  assert(block_id < entry_num_);
  Entry &entry = entries_[block_id];
  if (!lru_mode) {
    return entry.buffer;
  }
  while (true) {
    int current_count = entry.ref_count.load(std::memory_order_acquire);
    if (current_count < 0) {
      return nullptr;
    }
    if (entry.ref_count.compare_exchange_weak(current_count, current_count + 1,
                                              std::memory_order_acq_rel,
                                              std::memory_order_acquire)) {
      if (current_count == 0) {
        entry.load_count.fetch_add(1, std::memory_order_relaxed);
      }
      return entry.buffer;
    }
  }
}

void LPMap::release_block(block_id_t block_id) {
  assert(block_id < entry_num_);
  Entry &entry = entries_[block_id];

  if (entry.ref_count.fetch_sub(1, std::memory_order_release) == 1) {
    std::atomic_thread_fence(std::memory_order_acquire);
    LRUCache::BlockType block;
    block.first = block_id;
    block.second = entry.load_count.load();
    cache_.add_single_block(this, block, 0);
  }
}

char *LPMap::evict_block(block_id_t block_id) {
  assert(block_id < entry_num_);
  Entry &entry = entries_[block_id];
  int expected = 0;
  if (entry.ref_count.compare_exchange_strong(
          expected, std::numeric_limits<int>::min())) {
    char *buffer = entry.buffer;
    entry.buffer = nullptr;
    return buffer;
  } else {
    return nullptr;
  }
}

char *LPMap::set_block_acquired(block_id_t block_id, char *buffer) {
  assert(block_id < entry_num_);
  Entry &entry = entries_[block_id];
  while (true) {
    int current_count = entry.ref_count.load(std::memory_order_relaxed);
    if (current_count >= 0) {
      if (entry.ref_count.compare_exchange_weak(
              current_count, current_count + 1, std::memory_order_acq_rel,
              std::memory_order_acquire)) {
        return entry.buffer;
      }
    } else {
      if (entry.ref_count.compare_exchange_weak(current_count, 1,
                                                std::memory_order_acq_rel,
                                                std::memory_order_acquire)) {
        entry.buffer = buffer;
        entry.load_count.fetch_add(1, std::memory_order_relaxed);
        return entry.buffer;
      }
    }
  }
}

void LPMap::recycle(moodycamel::ConcurrentQueue<char *> &free_buffers) {
  LRUCache::BlockType block;
  do {
    bool ok = cache_.evict_single_block(block);
    if (!ok) {
      return;
    }
  } while (isDeadBlock(block));
  char *buffer = evict_block(block.first);
  if (buffer) {
    if (!free_buffers.enqueue(buffer)) {
      LOG_ERROR("recycle buffer enqueue failed.");
      ailego_free(buffer);
    }
  }
}

VecBufferPool::VecBufferPool(const std::string &filename) {
  fd_ = open(filename.c_str(), O_RDONLY);
  if (fd_ < 0) {
    throw std::runtime_error("Failed to open file: " + filename);
  }
  struct stat st;
  if (fstat(fd_, &st) < 0) {
    ::close(fd_);
    throw std::runtime_error("Failed to stat file: " + filename);
  }
  file_size_ = st.st_size;
}

int VecBufferPool::init(size_t pool_capacity, size_t block_size,
                        size_t segment_count) {
  if (block_size == 0) {
    LOG_ERROR("block_size must not be 0");
    return -1;
  }
  pool_capacity_ = pool_capacity;
  size_t buffer_num = pool_capacity_ / block_size + 10;
  size_t block_num = segment_count + 10;
  lp_map_.init(block_num);
  mutex_vec_.reserve(block_num);
  for (int i = 0; i < block_num; i++) {
    mutex_vec_.emplace_back(std::make_unique<std::mutex>());
  }
  for (size_t i = 0; i < buffer_num; i++) {
    char *buffer = (char *)ailego_malloc(block_size);
    if (buffer != nullptr) {
      if (!free_buffers_.enqueue(buffer)) {
        LOG_ERROR("recycle buffer enqueue failed.");
        ailego_free(buffer);
        return -1;
      }
    } else {
      LOG_ERROR("aligned_alloc %zu(size: %zu) failed", i, block_size);
      return -1;
    }
  }
  LOG_DEBUG("Buffer pool num: %zu, entry num: %zu", buffer_num,
            lp_map_.entry_num());
  no_lru_mode_ = false;
  if (lp_map_.entry_num() <= buffer_num) {
    no_lru_mode_ = true;
  }
  return 0;
}

VecBufferPoolHandle VecBufferPool::get_handle() {
  return VecBufferPoolHandle(*this);
}

char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset,
                                    size_t size, int retry) {
  char *buffer = lp_map_.acquire_block(block_id, !no_lru_mode());
  if (buffer) {
    return buffer;
  }
  std::lock_guard<std::mutex> lock(*mutex_vec_[block_id]);
  buffer = lp_map_.acquire_block(block_id, !no_lru_mode());
  if (buffer) {
    return buffer;
  }
  {
    bool found = free_buffers_.try_dequeue(buffer);
    if (!found && !no_lru_mode_) {
      for (int i = 0; i < retry; i++) {
        lp_map_.recycle(free_buffers_);
        found = free_buffers_.try_dequeue(buffer);
        if (found) {
          break;
        }
      }
    }
    if (!found) {
      LOG_ERROR("Buffer pool failed to get free buffer");
      return nullptr;
    }
  }

  ssize_t read_bytes = pread(fd_, buffer, size, offset);
  if (read_bytes != static_cast<ssize_t>(size)) {
    LOG_ERROR("Buffer pool failed to read file at offset: %zu", offset);
    free_buffers_.enqueue(buffer);
    return nullptr;
  }
  return lp_map_.set_block_acquired(block_id, buffer);
}

int VecBufferPool::get_meta(size_t offset, size_t length, char *buffer) {
  ssize_t read_bytes = pread(fd_, buffer, length, offset);
  if (read_bytes != static_cast<ssize_t>(length)) {
    LOG_ERROR("Buffer pool failed to read file at offset: %zu", offset);
    return -1;
  }
  return 0;
}

char *VecBufferPoolHandle::get_block(size_t offset, size_t size,
                                     size_t block_id) {
  char *buffer = pool_.acquire_buffer(block_id, offset, size, 5);
  return buffer;
}

int VecBufferPoolHandle::get_meta(size_t offset, size_t length, char *buffer) {
  return pool_.get_meta(offset, length, buffer);
}

void VecBufferPoolHandle::release_one(block_id_t block_id) {
  if (!pool_.no_lru_mode()) {
    pool_.lp_map_.release_block(block_id);
  }
}

void VecBufferPoolHandle::acquire_one(block_id_t block_id) {
  if (!pool_.no_lru_mode()) {
    pool_.lp_map_.acquire_block(block_id, true);
  }
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/container/bitmap.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "bitmap.h"

namespace zvec {
namespace ailego {

size_t Bitset::BitwiseAndCardinality(const Bitset &lhs, const Bitset &rhs) {
  return BitsetHelper::BitwiseAndCardinality(
      lhs.array_.data(), rhs.array_.data(),
      std::min(lhs.array_.size(), rhs.array_.size()));
}

size_t Bitset::BitwiseAndnotCardinality(const Bitset &lhs, const Bitset &rhs) {
  size_t lsize = lhs.array_.size();
  size_t rsize = rhs.array_.size();

  if (lsize > rsize) {
    return (
        BitsetHelper::BitwiseAndnotCardinality(lhs.array_.data(),
                                               rhs.array_.data(), rsize) +
        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));
  }
  return BitsetHelper::BitwiseAndnotCardinality(lhs.array_.data(),
                                                rhs.array_.data(), lsize);
}

size_t Bitset::BitwiseXorCardinality(const Bitset &lhs, const Bitset &rhs) {
  size_t lsize = lhs.array_.size();
  size_t rsize = rhs.array_.size();

  if (lsize < rsize) {
    return (
        BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),
                                            rhs.array_.data(), lsize) +
        BitsetHelper::Cardinality(rhs.array_.data() + lsize, rsize - lsize));
  } else if (lsize > rsize) {
    return (
        BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),
                                            rhs.array_.data(), rsize) +
        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));
  }
  return BitsetHelper::BitwiseXorCardinality(lhs.array_.data(),
                                             rhs.array_.data(), lsize);
}

size_t Bitset::BitwiseOrCardinality(const Bitset &lhs, const Bitset &rhs) {
  size_t lsize = lhs.array_.size();
  size_t rsize = rhs.array_.size();

  if (lsize < rsize) {
    return (
        BitsetHelper::BitwiseOrCardinality(lhs.array_.data(), rhs.array_.data(),
                                           lsize) +
        BitsetHelper::Cardinality(rhs.array_.data() + lsize, rsize - lsize));
  } else if (lsize > rsize) {
    return (
        BitsetHelper::BitwiseOrCardinality(lhs.array_.data(), rhs.array_.data(),
                                           rsize) +
        BitsetHelper::Cardinality(lhs.array_.data() + rsize, lsize - rsize));
  }
  return BitsetHelper::BitwiseOrCardinality(lhs.array_.data(),
                                            rhs.array_.data(), lsize);
}

void Bitmap::clear(void) {
  for (std::vector<Bucket *>::iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    delete (*iter);
  }
  array_.clear();
}

void Bitmap::copy(const Bitmap &rhs) {
  this->clear();

  for (std::vector<Bucket *>::const_iterator iter = rhs.array_.begin();
       iter != rhs.array_.end(); ++iter) {
    Bucket *bucket = NULL;
    if (*iter) {
      bucket = new Bucket(*(*iter));
    }
    array_.push_back(bucket);
  }
}

void Bitmap::shrink_to_fit(void) {
  size_t shrink_count = 0;
  std::vector<Bucket *>::reverse_iterator iter;

  for (iter = array_.rbegin(); iter != array_.rend(); ++iter) {
    if (*iter) {
      if (!(*iter)->test_none()) {
        break;
      }
      delete (*iter);
      *iter = NULL;
    }
    ++shrink_count;
  }
  for (; iter != array_.rend(); ++iter) {
    if ((*iter) && (*iter)->test_none()) {
      delete (*iter);
      *iter = NULL;
    }
  }
  if (shrink_count != 0) {
    array_.resize(array_.size() - shrink_count);
  }
}

bool Bitmap::test(size_t num) const {
  // High 16 bits
  size_t offset = num >> 16;

  if (offset < array_.size()) {
    const Bucket *bucket = array_[offset];
    if (bucket) {
      // Low 16 bits
      return bucket->test(static_cast<uint16_t>(num));
    }
  }
  return false;
}

void Bitmap::set(size_t num) {
  // High 16 bits
  size_t offset = num >> 16;
  if (offset >= array_.size()) {
    array_.resize(offset + 1, NULL);
  }

  Bucket *&bucket = array_[offset];
  if (!bucket) {
    bucket = new Bucket;
  }
  // Low 16 bits
  bucket->set(static_cast<uint16_t>(num));
}

void Bitmap::reset(size_t num) {
  // High 16 bits
  size_t offset = num >> 16;
  if (offset >= array_.size()) {
    array_.resize(offset + 1, NULL);
  }

  if (offset < array_.size()) {
    Bucket *bucket = array_[offset];
    if (bucket) {
      // Low 16 bits
      bucket->reset(static_cast<uint16_t>(num));
    }
  }
}

void Bitmap::flip(size_t num) {
  // High 16 bits
  uint16_t offset = (uint16_t)(num >> 16);
  if (offset >= array_.size()) {
    array_.resize(offset + 1, NULL);
  }

  Bucket *&bucket = array_[offset];
  if (!bucket) {
    bucket = new Bucket;
  }
  // Low 16 bits
  bucket->flip(static_cast<uint16_t>(num));
}

void Bitmap::bitwise_and(const Bitmap &rhs) {
  size_t overlap = std::min(array_.size(), rhs.array_.size());

  for (size_t i = 0; i < overlap; ++i) {
    Bucket *&dst = array_[i];

    if (dst) {
      const Bucket *src = rhs.array_[i];
      if (src) {
        dst->bitwise_and(*src);
      } else {
        delete dst;
        dst = NULL;
      }
    }
  }
  for (size_t i = overlap; i < array_.size(); ++i) {
    Bucket *&dst = array_[i];
    delete dst;
    dst = NULL;
  }
}

void Bitmap::bitwise_andnot(const Bitmap &rhs) {
  size_t overlap = std::min(array_.size(), rhs.array_.size());

  for (size_t i = 0; i < overlap; ++i) {
    Bucket *&dst = array_[i];

    if (dst) {
      const Bucket *src = rhs.array_[i];
      if (src) {
        dst->bitwise_andnot(*src);
      }
    }
  }
}

void Bitmap::bitwise_or(const Bitmap &rhs) {
  size_t overlap = std::min(array_.size(), rhs.array_.size());

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *src = rhs.array_[i];

    if (src) {
      Bucket *&dst = array_[i];

      if (dst) {
        dst->bitwise_or(*src);
      } else {
        dst = new Bucket(*src);
      }
    }
  }
  for (size_t i = overlap; i < rhs.array_.size(); ++i) {
    const Bucket *src = rhs.array_[i];
    Bucket *bucket = NULL;

    if (src) {
      bucket = new Bucket(*src);
    }
    array_.push_back(bucket);
  }
}

void Bitmap::bitwise_xor(const Bitmap &rhs) {
  size_t overlap = std::min(array_.size(), rhs.array_.size());

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *src = rhs.array_[i];

    if (src) {
      Bucket *&dst = array_[i];

      if (dst) {
        dst->bitwise_xor(*src);
      } else {
        dst = new Bucket(*src);
      }
    }
  }
  for (size_t i = overlap; i < rhs.array_.size(); ++i) {
    const Bucket *src = rhs.array_[i];
    Bucket *bucket = NULL;

    if (src) {
      bucket = new Bucket(*src);
    }
    array_.push_back(bucket);
  }
}

void Bitmap::bitwise_not(void) {
  for (std::vector<Bucket *>::iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    Bucket *&bucket = *iter;
    if (!bucket) {
      bucket = new Bucket;
    }
    bucket->bitwise_not();
  }
}

bool Bitmap::test_all(void) const {
  if (array_.empty()) {
    return false;
  }
  for (std::vector<Bucket *>::const_iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    if (!(*iter) || !(*iter)->test_all()) {
      return false;
    }
  }
  return true;
}

bool Bitmap::test_any(void) const {
  for (std::vector<Bucket *>::const_iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    if (*iter && (*iter)->test_any()) {
      return true;
    }
  }
  return false;
}

bool Bitmap::test_none(void) const {
  for (std::vector<Bucket *>::const_iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    if (*iter && !(*iter)->test_none()) {
      return false;
    }
  }
  return true;
}

size_t Bitmap::cardinality(void) const {
  size_t result = 0;
  for (std::vector<Bucket *>::const_iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    if (*iter) {
      result += (*iter)->cardinality();
    }
  }
  return result;
}

void Bitmap::extract(size_t base, std::vector<size_t> *out) const {
  for (std::vector<Bucket *>::const_iterator iter = array_.begin();
       iter != array_.end(); ++iter) {
    if (*iter) {
      (*iter)->extract(base, out);
    }
    base += Bucket::MAX_SIZE;
  }
}

size_t Bitmap::BitwiseAndCardinality(const Bitmap &lhs, const Bitmap &rhs) {
  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());
  size_t dist = 0;

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *l = lhs.array_[i];
    const Bucket *r = rhs.array_[i];

    if (l && r) {
      dist += Bucket::BitwiseAndCardinality(*l, *r);
    }
  }
  return dist;
}

size_t Bitmap::BitwiseAndnotCardinality(const Bitmap &lhs, const Bitmap &rhs) {
  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());
  size_t dist = 0;

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *l = lhs.array_[i];
    if (l) {
      const Bucket *r = rhs.array_[i];
      if (r) {
        dist += Bucket::BitwiseAndnotCardinality(*l, *r);
      } else {
        dist += l->cardinality();
      }
    }
  }
  for (size_t i = overlap; i < lhs.array_.size(); ++i) {
    const Bucket *l = lhs.array_[i];
    if (l) {
      dist += l->cardinality();
    }
  }
  return dist;
}

size_t Bitmap::BitwiseXorCardinality(const Bitmap &lhs, const Bitmap &rhs) {
  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());
  size_t dist = 0;

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *l = lhs.array_[i];
    const Bucket *r = rhs.array_[i];

    if (l && r) {
      dist += Bucket::BitwiseXorCardinality(*l, *r);
    } else if (l) {
      dist += l->cardinality();
    } else if (r) {
      dist += r->cardinality();
    }
  }
  for (size_t i = overlap; i < lhs.array_.size(); ++i) {
    const Bucket *l = lhs.array_[i];
    if (l) {
      dist += l->cardinality();
    }
  }
  for (size_t i = overlap; i < rhs.array_.size(); ++i) {
    const Bucket *r = rhs.array_[i];
    if (r) {
      dist += r->cardinality();
    }
  }
  return dist;
}

size_t Bitmap::BitwiseOrCardinality(const Bitmap &lhs, const Bitmap &rhs) {
  size_t overlap = std::min(lhs.array_.size(), rhs.array_.size());
  size_t dist = 0;

  for (size_t i = 0; i < overlap; ++i) {
    const Bucket *l = lhs.array_[i];
    const Bucket *r = rhs.array_[i];

    if (l && r) {
      dist += Bucket::BitwiseOrCardinality(*l, *r);
    } else if (l) {
      dist += l->cardinality();
    } else if (r) {
      dist += r->cardinality();
    }
  }
  for (size_t i = overlap; i < lhs.array_.size(); ++i) {
    const Bucket *l = lhs.array_[i];
    if (l) {
      dist += l->cardinality();
    }
  }
  for (size_t i = overlap; i < rhs.array_.size(); ++i) {
    const Bucket *r = rhs.array_[i];
    if (r) {
      dist += r->cardinality();
    }
  }
  return dist;
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/container/bitmap.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <vector>
#include <ailego/utility/bitset_helper.h>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Fixed Bitset
 */
template <size_t N, typename = typename std::enable_if<N % 32 == 0>::type>
class FixedBitset {
 public:
  enum { MAX_SIZE = N };

  //! Constructor
  FixedBitset(void) {
    memset(array_, 0, sizeof(array_));
  }

  //! Constructor
  FixedBitset(const FixedBitset &rhs) {
    memcpy(array_, rhs.array_, sizeof(array_));
  }

  //! Destructor
  ~FixedBitset(void) {}

  //! Assignment
  FixedBitset &operator=(const FixedBitset &rhs) {
    memcpy(array_, rhs.array_, sizeof(array_));
    return *this;
  }

  //! Retrieve data pointer
  uint32_t *data(void) {
    return reinterpret_cast<uint32_t *>(array_);
  }

  //! Retrieve data pointer
  const uint32_t *data(void) const {
    return reinterpret_cast<const uint32_t *>(array_);
  }

  //! Retrieve count of bits in set
  constexpr size_t size(void) const {
    return MAX_SIZE;
  }

  // ！Clear the bitset
  void clear(void) {
    memset(array_, 0, sizeof(array_));
  }

  //! Test a bit in bitset
  bool test(size_t num) const {
    ailego_assert_with(N > num, "overflow argument");
    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);
  }

  //! Set a bit in bitset
  void set(size_t num) {
    ailego_assert_with(N > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] |= mask;
  }

  //! Clear a bit in bitset
  void reset(size_t num) {
    ailego_assert_with(N > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] &= ~mask;
  }

  //! Toggle a bit in bitset
  void flip(size_t num) {
    ailego_assert_with(N > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] ^= mask;
  }

  //! Perform binary AND
  void bitwise_and(const FixedBitset &rhs) {
    BitsetHelper::BitwiseAnd(array_, rhs.array_, ((N + 0x1f) >> 5));
  }

  //! Perform binary AND NOT
  void bitwise_andnot(const FixedBitset &rhs) {
    BitsetHelper::BitwiseAndnot(array_, rhs.array_, ((N + 0x1f) >> 5));
  }

  //! Perform binary OR
  void bitwise_or(const FixedBitset &rhs) {
    BitsetHelper::BitwiseOr(array_, rhs.array_, ((N + 0x1f) >> 5));
  }

  //! Perform binary XOR
  void bitwise_xor(const FixedBitset &rhs) {
    BitsetHelper::BitwiseXor(array_, rhs.array_, ((N + 0x1f) >> 5));
  }

  //! Perform binary NOT
  void bitwise_not(void) {
    BitsetHelper::BitwiseNot(array_, ((N + 0x1f) >> 5));
  }

  //! Check if all bits are set to true
  bool test_all(void) const {
    return BitsetHelper::TestAll(array_, ((N + 0x1f) >> 5));
  }

  //! Check if any bits are set to true
  bool test_any(void) const {
    return BitsetHelper::TestAny(array_, ((N + 0x1f) >> 5));
  }

  //! Check if none of the bits are set to true
  bool test_none(void) const {
    return BitsetHelper::TestNone(array_, ((N + 0x1f) >> 5));
  }

  //! Compute the cardinality of a bitset
  size_t cardinality(void) const {
    return BitsetHelper::Cardinality(array_, ((N + 0x1f) >> 5));
  }

  //! Extract the bitset to an array
  void extract(size_t base, std::vector<size_t> *out) const {
    const uint32_t *iter = array_;
    const uint32_t *last = array_ + ((N + 0x1f) >> 5);

    for (; iter != last; ++iter) {
      uint32_t w = *iter;

      while (w != 0) {
        uint32_t c = ailego_ctz32(w);
        w &= ~(1u << c);
        out->push_back(base + c);
      }
      base += 32u;
    }
  }

  //! Extract the bitset to an array
  void extract(std::vector<size_t> *out) const {
    this->extract(0, out);
  }

  //! Compute the AND cardinality between two bitsets
  static size_t BitwiseAndCardinality(const FixedBitset &lhs,
                                      const FixedBitset &rhs) {
    return BitsetHelper::BitwiseAndCardinality(lhs.array_, rhs.array_,
                                               ((N + 0x1f) >> 5));
  }

  //! Compute the ANDNOT cardinality between two bitsets
  static size_t BitwiseAndnotCardinality(const FixedBitset &lhs,
                                         const FixedBitset &rhs) {
    return BitsetHelper::BitwiseAndnotCardinality(lhs.array_, rhs.array_,
                                                  ((N + 0x1f) >> 5));
  }

  //! Compute the XOR cardinality between two bitsets
  static size_t BitwiseXorCardinality(const FixedBitset &lhs,
                                      const FixedBitset &rhs) {
    return BitsetHelper::BitwiseXorCardinality(lhs.array_, rhs.array_,
                                               ((N + 0x1f) >> 5));
  }

  //! Compute the OR cardinality between two bitsets
  static size_t BitwiseOrCardinality(const FixedBitset &lhs,
                                     const FixedBitset &rhs) {
    return BitsetHelper::BitwiseOrCardinality(lhs.array_, rhs.array_,
                                              ((N + 0x1f) >> 5));
  }

  //! Convert a array pointer to bitset pointer
  static FixedBitset *Cast(uint32_t *arr) {
    return reinterpret_cast<FixedBitset<N> *>(arr);
  }

  //! Convert a array pointer to bitset pointer
  static const FixedBitset *Cast(const uint32_t *arr) {
    return reinterpret_cast<const FixedBitset<N> *>(arr);
  }

  //! Convert a array pointer to bitset pointer
  static FixedBitset *Cast(uint64_t *arr) {
    return reinterpret_cast<FixedBitset<N> *>(arr);
  }

  //! Convert a array pointer to bitset pointer
  static const FixedBitset *Cast(const uint64_t *arr) {
    return reinterpret_cast<const FixedBitset<N> *>(arr);
  }

 private:
  uint32_t array_[(N + 0x1f) >> 5];
};

/*! Fixed Bitset (Special)
 */
template <>
class FixedBitset<0> {
 public:
  enum { MAX_SIZE = 0 };

  //! Retrieve max size of bitset
  constexpr size_t size(void) const {
    return MAX_SIZE;
  }
};

/*! Bitset
 */
class Bitset {
 public:
  //! Constructor
  Bitset(void) : array_() {}

  //! Constructor
  Bitset(size_t bits) : array_((bits + 0x1f) >> 5) {}

  //! Constructor
  Bitset(const Bitset &rhs) : array_(rhs.array_) {}

  //! Constructor
  Bitset(Bitset &&rhs) : array_(std::move(rhs.array_)) {}

  //! Destructor
  ~Bitset(void) {}

  //! Assignment
  Bitset &operator=(const Bitset &rhs) {
    array_ = rhs.array_;
    return *this;
  }

  //! Assignment
  Bitset &operator=(Bitset &&rhs) {
    array_ = std::move(rhs.array_);
    return *this;
  }

  //! Retrieve data pointer
  uint32_t *data(void) {
    return array_.data();
  }

  //! Retrieve data pointer
  const uint32_t *data(void) const {
    return array_.data();
  }

  //! Retrieve count of bits in set
  size_t size(void) const {
    return (array_.size() << 5);
  }

  //! Resize the bitset
  void resize(size_t bits) {
    array_.resize((bits + 0x1f) >> 5);
  }

  // ！Clear the bitset
  void clear(void) {
    array_.clear();
  }

  //! Test a bit in bitset
  bool test(size_t num) const {
    ailego_assert_with(this->size() > num, "overflow argument");
    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);
  }

  //! Set a bit in bitset
  void set(size_t num) {
    ailego_assert_with(this->size() > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] |= mask;
  }

  //! Clear a bit in bitset
  void reset(size_t num) {
    ailego_assert_with(this->size() > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] &= ~mask;
  }

  //! Toggle a bit in bitset
  void flip(size_t num) {
    ailego_assert_with(this->size() > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] ^= mask;
  }

  //! Perform binary AND
  void bitwise_and(const Bitset &rhs) {
    BitsetHelper::BitwiseAnd(array_.data(), rhs.array_.data(),
                             std::min(array_.size(), rhs.array_.size()));
  }

  //! Perform binary AND NOT
  void bitwise_andnot(const Bitset &rhs) {
    BitsetHelper::BitwiseAndnot(array_.data(), rhs.array_.data(),
                                std::min(array_.size(), rhs.array_.size()));
  }

  //! Perform binary OR
  void bitwise_or(const Bitset &rhs) {
    BitsetHelper::BitwiseOr(array_.data(), rhs.array_.data(),
                            std::min(array_.size(), rhs.array_.size()));
  }

  //! Perform binary XOR
  void bitwise_xor(const Bitset &rhs) {
    BitsetHelper::BitwiseXor(array_.data(), rhs.array_.data(),
                             std::min(array_.size(), rhs.array_.size()));
  }

  //! Perform binary NOT
  void bitwise_not(void) {
    BitsetHelper::BitwiseNot(array_.data(), array_.size());
  }

  //! Check if all bits are set to true
  bool test_all(void) const {
    return BitsetHelper::TestAll(array_.data(), array_.size());
  }

  //! Check if any bits are set to true
  bool test_any(void) const {
    return BitsetHelper::TestAny(array_.data(), array_.size());
  }

  //! Check if none of the bits are set to true
  bool test_none(void) const {
    return BitsetHelper::TestNone(array_.data(), array_.size());
  }

  //! Compute the cardinality of a bitset
  size_t cardinality(void) const {
    return BitsetHelper::Cardinality(array_.data(), array_.size());
  }

  //! Extract the bitset to an array
  void extract(size_t base, std::vector<size_t> *out) const {
    const uint32_t *iter = array_.data();
    const uint32_t *last = array_.data() + array_.size();

    for (; iter != last; ++iter) {
      uint32_t w = *iter;

      while (w != 0) {
        uint32_t c = ailego_ctz32(w);
        w &= ~(1u << c);
        out->push_back(base + c);
      }
      base += 32u;
    }
  }

  //! Extract the bitset to an array
  void extract(std::vector<size_t> *out) const {
    this->extract(0, out);
  }

  //! Compute the AND cardinality between two bitsets
  static size_t BitwiseAndCardinality(const Bitset &lhs, const Bitset &rhs);

  //! Compute the ANDNOT cardinality between two bitsets
  static size_t BitwiseAndnotCardinality(const Bitset &lhs, const Bitset &rhs);

  //! Compute the XOR cardinality between two bitsets
  static size_t BitwiseXorCardinality(const Bitset &lhs, const Bitset &rhs);

  //! Compute the OR cardinality between two bitsets
  static size_t BitwiseOrCardinality(const Bitset &lhs, const Bitset &rhs);

 private:
  std::vector<uint32_t> array_;
};

/*! Bitmap
 */
class Bitmap {
 public:
  typedef FixedBitset<65536u> Bucket;

  //! Constructor
  Bitmap(void) : array_() {}

  //! Constructor
  Bitmap(const Bitmap &rhs) {
    this->copy(rhs);
  }

  //! Destructor
  ~Bitmap(void) {
    this->clear();
  }

  //! Assignment
  Bitmap &operator=(const Bitmap &rhs) {
    this->copy(rhs);
    return *this;
  }

  //! Retrieve bucket size of bitmap
  size_t bucket_size(void) const {
    return array_.size();
  }

  // ！Clear the bitmap
  void clear(void);

  //! Remove the none buckets
  void shrink_to_fit(void);

  //! Test a bit in bitmap
  bool test(size_t num) const;

  //! Set a bit in bitmap
  void set(size_t num);

  //! Reset a bit in bitmap
  void reset(size_t num);

  //! Toggle a bit in bitmap
  void flip(size_t num);

  //! Perform binary AND
  void bitwise_and(const Bitmap &rhs);

  //! Perform binary AND NOT
  void bitwise_andnot(const Bitmap &rhs);

  //! Perform binary OR
  void bitwise_or(const Bitmap &rhs);

  //! Perform binary XOR
  void bitwise_xor(const Bitmap &rhs);

  //! Perform binary NOT (It will expand the whole map)
  void bitwise_not(void);

  //! Check if all bits are set to true
  bool test_all(void) const;

  //! Check if any bits are set to true
  bool test_any(void) const;

  //! Check if none of the bits are set to true
  bool test_none(void) const;

  //! Compute the cardinality of a bitmap
  size_t cardinality(void) const;

  //! Extract the bitmap to an array
  void extract(size_t base, std::vector<size_t> *out) const;

  //! Extract the bitmap to an array
  void extract(std::vector<size_t> *out) const {
    this->extract(0, out);
  }

  //! Compute the AND cardinality between two bitmaps
  static size_t BitwiseAndCardinality(const Bitmap &lhs, const Bitmap &rhs);

  //! Compute the ANDNOT cardinality between two bitmaps
  static size_t BitwiseAndnotCardinality(const Bitmap &lhs, const Bitmap &rhs);

  //! Compute the XOR cardinality between two bitmaps
  static size_t BitwiseXorCardinality(const Bitmap &lhs, const Bitmap &rhs);

  //! Compute the OR cardinality between two bitmaps
  static size_t BitwiseOrCardinality(const Bitmap &lhs, const Bitmap &rhs);

 protected:
  //! Copy the content from another bitmap
  void copy(const Bitmap &rhs);

 private:
  std::vector<Bucket *> array_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/container/bloom_filter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cmath>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! Bloom Filter Calculator
 */
struct BloomFilterCalculator {
  /**
   *  \brief          Calculate probability of false positives
   *  \param n        Number of items in the filter
   *  \param m        Number of bits in the filter
   *  \param k        Number of hash functions
   *  \return         Probability of false positives
   */
  static double Probability(size_t n, size_t m, size_t k) {
    return std::pow(1.0 - std::exp(-((double)k / (double)m * (double)n)), k);
  }

  /**
   *  \brief          Calculate number of items in the filter
   *  \param m        Number of bits in the filter
   *  \param k        Number of hash functions
   *  \param p        Probability of false positives
   *  \return         Number of items in the filter
   */
  static size_t NumberOfItems(size_t m, size_t k, double p) {
    return (size_t)std::ceil(
        -((double)m / (double)k *
          std::log(1.0 - std::exp(std::log(p) / (double)k))));
  }

  /**
   *  \brief          Calculate number of bits in the filter
   *  \param n        Number of items in the filter
   *  \param p        Probability of false positives
   *  \return         Number of bits in the filter
   */
  static size_t NumberOfBits(size_t n, double p) {
    return (size_t)std::ceil((double)n * std::log(p) /
                             std::log(1.0 / std::pow(2.0, std::log(2.0))));
  }

  /**
   *  \brief          Calculate number of bits in the filter
   *  \param n        Number of items in the filter
   *  \param k        Number of hash functions
   *  \param p        Probability of false positives
   *  \return         Number of bits in the filter
   */
  static size_t NumberOfBits(size_t n, size_t k, double p) {
    return (size_t)std::ceil(-((double)k * (double)n /
                               std::log(1.0 - std::pow(p, 1.0 / (double)k))));
  }

  /**
   *  \brief          Calculate number of bytes in the filter
   *  \param n        Number of items in the filter
   *  \param p        Probability of false positives
   *  \return         Number of bytes in the filter
   */
  static size_t NumberOfBytes(size_t n, double p) {
    return ((NumberOfBits(n, p) + 7) >> 3);
  }

  /**
   *  \brief          Calculate number of bits in the filter
   *  \param n        Number of items in the filter
   *  \param k        Number of hash functions
   *  \param p        Probability of false positives
   *  \return         Number of bits in the filter
   */
  static size_t NumberOfBytes(size_t n, size_t k, double p) {
    return ((NumberOfBits(n, k, p) + 7) >> 3);
  }

  /**
   *  \brief          Calculate number of hash functions
   *  \param n        Number of items in the filter
   *  \param m        Number of bits in the filter
   *  \return         Number of hash functions
   */
  static size_t NumberOfHash(size_t n, size_t m) {
    return (size_t)std::round((double)m / (double)n * std::log(2.0));
  }
};

/*! Bloom Filter
 */
template <size_t K>
class BloomFilter {
 public:
  //! Constructor
  BloomFilter(void) {}

  //! Constructor
  BloomFilter(size_t n, double p) {
    if (n > 0 && p > 0.0 && p < 1.0) {
      capacity_ = n;
      bits_count_ = BloomFilterCalculator::NumberOfBits(n, K, p);
      bits_count_ = ((bits_count_ + 31) >> 5) << 5;
      probability_ = BloomFilterCalculator::Probability(n, bits_count_, K);
      bitset_ = new uint32_t[bits_count_ >> 5];
      memset(bitset_, 0, (bits_count_ >> 3));
    }
  }

  //! Constructor
  BloomFilter(BloomFilter &&rhs)
      : bitset_(rhs.bitset_),
        bits_count_(rhs.bits_count_),
        capacity_(rhs.capacity_),
        count_(rhs.count_),
        probability_(rhs.probability_) {
    rhs.bitset_ = nullptr;
    rhs.bits_count_ = 0u;
    rhs.capacity_ = 0u;
    rhs.count_ = 0u;
    rhs.probability_ = 0u;
  }

  //! Destructor
  ~BloomFilter(void) {
    delete[] bitset_;
  }

  //! Test if the filter is valid
  bool is_valid(void) const {
    return (bitset_ != nullptr);
  }

  //! Reset the bloom filter
  bool reset(size_t n, double p) {
    if (n <= 0 || p <= 0.0 || p >= 1.0) {
      return false;
    }
    delete[] bitset_;
    capacity_ = n;
    count_ = 0u;
    bits_count_ = BloomFilterCalculator::NumberOfBits(n, K, p);
    bits_count_ = ((bits_count_ + 31) >> 5) << 5;
    probability_ = BloomFilterCalculator::Probability(n, bits_count_, K);
    bitset_ = new (std::nothrow) uint32_t[bits_count_ >> 5];
    if (!bitset_) {
      return false;
    }
    memset(bitset_, 0, (bits_count_ >> 3));
    return true;
  }

  //! Clear the bloom filter
  void clear(void) {
    if (bitset_) {
      memset(bitset_, 0, (bits_count_ >> 3));
      count_ = 0u;
    }
  }

  //! Insert a item into bloom filter
  template <typename... TArgs,
            typename = typename std::enable_if<
                Conjunction<std::is_integral<TArgs>...>::value &&
                sizeof...(TArgs) == K>::type>
  bool insert(TArgs... vals) {
    if (count_ >= capacity_) {
      return false;
    }
    this->set_bits(vals...);
    ++count_;
    return true;
  }

  //! Force insert a item into bloom filter
  template <typename... TArgs,
            typename = typename std::enable_if<
                Conjunction<std::is_integral<TArgs>...>::value &&
                sizeof...(TArgs) == K>::type>
  void force_insert(TArgs... vals) {
    this->set_bits(vals...);
    ++count_;
  }

  //! Insert a item into bloom filter
  template <typename... TArgs,
            typename = typename std::enable_if<
                Conjunction<std::is_integral<TArgs>...>::value &&
                sizeof...(TArgs) == K>::type>
  bool has(TArgs... vals) const {
    return this->test_bits(vals...);
  }

  //! Retrieve count of bits in bloom filter
  size_t bits_count(void) const {
    return bits_count_;
  }

  //! Retrieve capacity of bloom filter
  size_t capacity(void) const {
    return capacity_;
  }

  //! Retrieve count of items in bloom filter
  size_t count(void) const {
    return count_;
  }

  //! Retrieve probability of false positives
  double probability(void) const {
    return probability_;
  }

 protected:
  //! Disable them
  BloomFilter(const BloomFilter &) = delete;
  BloomFilter &operator=(const BloomFilter &) = delete;

  //! Set bits in bloom filter
  template <typename TArg>
  void set_bits(TArg val) {
    size_t num = static_cast<size_t>(val) % bits_count_;
    bitset_[num >> 5] |= (1u << (num & 0x1f));
  }

  //! Set bits in bloom filter
  template <typename TArg, typename... TArgs>
  void set_bits(TArg val, TArgs... vals) {
    this->set_bits(val);
    this->set_bits(vals...);
  }

  //! Test bits in bloom filter
  template <typename TArg>
  bool test_bits(TArg val) const {
    size_t num = static_cast<size_t>(val) % bits_count_;
    return ((bitset_[num >> 5] & (1u << (num & 0x1f))) != 0);
  }

  //! Test bits in bloom filter
  template <typename TArg, typename... TArgs>
  bool test_bits(TArg val, TArgs... vals) const {
    if (!this->test_bits(val)) {
      return false;
    }
    return this->test_bits(vals...);
  }

 private:
  uint32_t *bitset_{nullptr};
  size_t bits_count_{0u};
  size_t capacity_{0u};
  size_t count_{0u};
  double probability_{0.0};
};

/*! Bloom Filter (Special)
 */
template <>
struct BloomFilter<0> {};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/container/params.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstring>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/logger/logger.h>

//! Global environ variable
extern char **environ;

namespace zvec {
namespace ailego {

static void ParseFromJsonObject(const ailego::JsonObject &obj, Params *params) {
  for (ailego::JsonObject::const_iterator it = obj.begin(); it != obj.end();
       ++it) {
    const ailego::JsonValue &val = it->value();

    if (val.is_boolean()) {
      params->set(it->key().as_stl_string(), val.as_bool());
    } else if (val.is_integer()) {
      params->set(it->key().as_stl_string(),
                  static_cast<int64_t>(val.as_integer()));
    } else if (val.is_float()) {
      params->set(it->key().as_stl_string(), val.as_float());
    } else if (val.is_string()) {
      params->set(it->key().as_stl_string(),
                  val.as_string().decode().as_stl_string());
    } else if (val.is_object()) {
      Params subparams;
      ParseFromJsonObject(val.as_object(), &subparams);
      params->set(it->key().as_stl_string(), std::move(subparams));
    }
  }
}

bool Params::ParseFromBuffer(const std::string &buf, Params *params) {
  ailego::JsonValue val;
  ailego::JsonParser parser;

  parser.set_comment(true);
  parser.set_simple(true);
  parser.set_squote(true);
  parser.set_unstrict(false);
  if (!parser.parse(buf.c_str(), &val)) {
    return false;
  }

  if (!val.is_object()) {
    return false;
  }
  ParseFromJsonObject(val.as_object(), params);
  return true;
}

void Params::ParseFromEnvironment(Params *params) {
  // Dump all environ string
  for (size_t i = 0; environ[i]; ++i) {
    const char *env = environ[i];
    const char *p = std::strchr(env, '=');
    if (p) {
      params->set(std::string(env, p - env), std::string(p + 1));
    }
  }
}

static void SerializeToJsonObject(const Params &params,
                                  ailego::JsonObject *obj) {
  for (const auto &it : params.hypercube().cubes()) {
    const ailego::Cube &cube = it.second;
    const char *key = it.first.c_str();

    if (cube.compatible<std::string>()) {
      const auto &val = cube.unsafe_cast<std::string>();
      ailego::JsonString str(val.data(), val.size());
      obj->set(key, ailego::JsonValue(str.encode()));
    } else if (cube.compatible<unsigned long long int>()) {
      obj->set(key,
               ailego::JsonValue(cube.unsafe_cast<unsigned long long int>()));
    } else if (cube.compatible<long long int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long long int>()));
    } else if (cube.compatible<unsigned long int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned long int>()));
    } else if (cube.compatible<long int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long int>()));
    } else if (cube.compatible<unsigned int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned int>()));
    } else if (cube.compatible<int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<int>()));
    } else if (cube.compatible<unsigned short int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned short int>()));
    } else if (cube.compatible<short int>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<short int>()));
    } else if (cube.compatible<unsigned char>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<unsigned char>()));
    } else if (cube.compatible<char>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<char>()));
    } else if (cube.compatible<signed char>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<signed char>()));
    } else if (cube.compatible<bool>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<bool>()));
    } else if (cube.compatible<float>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<float>()));
    } else if (cube.compatible<double>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<double>()));
    } else if (cube.compatible<long double>()) {
      obj->set(key, ailego::JsonValue(cube.unsafe_cast<long double>()));
    } else if (cube.compatible<Params>()) {
      ailego::JsonObject subobj;
      SerializeToJsonObject(cube.unsafe_cast<Params>(), &subobj);
      obj->set(key, ailego::JsonValue(subobj));
    } else {
      LOG_WARN("Unsupported serializing \'%s\' <%s>.", key, cube.type().name());
    }
  }
}

void Params::SerializeToBuffer(const Params &params, std::string *buf) {
  if (buf != nullptr) {
    ailego::JsonObject obj;
    SerializeToJsonObject(params, &obj);
    buf->assign(ailego::JsonValue(obj).as_json_string().as_stl_string());
  }
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/container/reservoir.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <random>
#include <vector>

namespace zvec {
namespace ailego {

/*! Sampling Reservoir
 */
template <typename T, typename Allocator = std::allocator<T>>
class Reservoir {
 public:
  //! Constructor
  Reservoir(size_t cnt)
      : samples_(cnt), total_(0), mt_(std::random_device()()), pool_() {
    pool_.reserve(samples_);
  }

  //! Constructor
  Reservoir(const Reservoir &rhs)
      : samples_(rhs.samples_),
        total_(rhs.total_),
        mt_(std::random_device()()),
        pool_(rhs.pool_) {}

  //! Constructor
  Reservoir(Reservoir &&rhs)
      : samples_(rhs.samples_),
        total_(rhs.total_),
        mt_(std::random_device()()),
        pool_(std::move(rhs.pool_)) {}

  //! Destructor
  ~Reservoir(void) {}

  //! Assignment
  Reservoir &operator=(const Reservoir &rhs) {
    samples_ = rhs.samples_;
    total_ = rhs.total_;
    pool_ = rhs.pool_;
    return *this;
  }

  //! Assignment
  Reservoir &operator=(Reservoir &&rhs) {
    samples_ = rhs.samples_;
    total_ = rhs.total_;
    pool_ = std::move(rhs.pool_);
    return *this;
  }

  //! Retrieve pool of reservoir
  std::vector<T, Allocator> *mutable_pool(void) {
    return &pool_;
  }

  //! Retrieve pool of reservoir
  const std::vector<T, Allocator> &pool(void) const {
    return pool_;
  }

  //! Retrieve count of samples
  size_t samples(void) const {
    return samples_;
  }

  //! Retrieve total count of filling
  size_t total(void) const {
    return total_;
  }

  //! Reset the reservoir
  void reset(void) {
    total_ = 0;
    pool_.clear();
    pool_.reserve(samples_);
  }

  //! Fill the reservoir
  void fill(const T &item) {
    if (samples_ > 0) {
      if (pool_.size() >= samples_) {
        std::uniform_int_distribution<size_t> dt(0, total_);
        size_t i = dt(mt_);

        if (i < samples_) {
          pool_[i] = item;
        }
      } else {
        pool_.push_back(item);
      }
    }
    ++total_;
  }

  //! Fill the reservoir
  void fill(T &&item) {
    if (samples_ > 0) {
      if (pool_.size() >= samples_) {
        std::uniform_int_distribution<size_t> dt(0, total_);
        size_t i = dt(mt_);

        if (i < samples_) {
          pool_[i] = std::move(item);
        }
      } else {
        pool_.push_back(std::move(item));
      }
    }
    ++total_;
  }

 private:
  //! Disable them
  Reservoir(void) = delete;

  //! Members
  size_t samples_;
  size_t total_;
  std::mt19937 mt_;
  std::vector<T, Allocator> pool_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/container/vector_array.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Numerical Vector Array
 */
template <typename T,
          typename =
              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>
class NumericalVectorArray {
 public:
  //! Type of value
  using ValueType = typename NumericalVector<T>::ValueType;

  //! Constructor
  NumericalVectorArray(void) {}

  //! Constructor
  explicit NumericalVectorArray(size_t dim) : dimension_(dim) {}

  //! Constructor
  NumericalVectorArray(const NumericalVectorArray &rhs)
      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}

  //! Constructor
  NumericalVectorArray(NumericalVectorArray &&rhs)
      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}

  //! Assignment
  NumericalVectorArray &operator=(const NumericalVectorArray &rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = rhs.buffer_;
    return *this;
  }

  //! Assignment
  NumericalVectorArray &operator=(NumericalVectorArray &&rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = std::move(rhs.buffer_);
    return *this;
  }

  //! Overloaded operator []
  ValueType *operator[](size_t i) {
    return (reinterpret_cast<ValueType *>(&buffer_[0]) + i * dimension_);
  }

  //! Overloaded operator []
  const ValueType *operator[](size_t i) const {
    return (reinterpret_cast<const ValueType *>(buffer_.data()) +
            i * dimension_);
  }

  //! Append a vector
  void append(const ValueType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec),
                   dim * sizeof(ValueType));
  }

  //! Append vectors
  void append(const ValueType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec),
                   cnt * dim * sizeof(ValueType));
  }

  //! Append a vector
  void append(const NumericalVector<ValueType> &vec) {
    this->append(vec.data(), vec.dimension());
  }

  //! Replace a vector
  void replace(size_t index, const ValueType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = dim * sizeof(ValueType);
    buffer_.replace(index * element_size, element_size,
                    reinterpret_cast<const char *>(vec), element_size);
  }

  //! Replace a vector
  void replace(size_t index, const ValueType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = dim * sizeof(ValueType);
    size_t total = element_size * cnt;
    buffer_.replace(index * element_size, total,
                    reinterpret_cast<const char *>(vec), total);
  }

  //! Replace a vector
  void replace(size_t index, const NumericalVector<ValueType> &vec) {
    this->replace(index, vec.data(), vec.dimension());
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    buffer_.reserve(n * dimension_ * sizeof(ValueType));
  }

  //! Resize the array to a length of n elements
  void resize(size_t n) {
    buffer_.resize(n * dimension_ * sizeof(ValueType));
  }

  //! Clear the vector array
  void clear(void) {
    buffer_.clear();
  }

  //! Reset the vector array
  void reset(size_t dim) {
    dimension_ = dim;
    buffer_.clear();
  }

  //! Requests the removal of unused capacity.
  void shrink_to_fit(void) {
    buffer_.shrink_to_fit();
  }

  //! Retrieve pointer of data
  ValueType *data(void) {
    return reinterpret_cast<ValueType *>(&buffer_[0]);
  }

  //! Retrieve pointer of data
  const ValueType *data(void) const {
    return reinterpret_cast<const ValueType *>(buffer_.data());
  }

  //! Retrieve pointer of data
  ValueType *at(size_t i) {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return (reinterpret_cast<ValueType *>(&buffer_[0]) + i * dimension_);
  }

  //! Retrieve pointer of data
  const ValueType *at(size_t i) const {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return (reinterpret_cast<const ValueType *>(buffer_.data()) +
            i * dimension_);
  }

  //! Test if the array is empty
  bool empty(void) const {
    return buffer_.empty();
  }

  //! Retrieve count of vectors
  size_t count(void) const {
    return (dimension_ > 0 ? buffer_.size() / (dimension_ * sizeof(ValueType))
                           : 0u);
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return dimension_;
  }

  //! Retrieve size of array in bytes
  size_t bytes(void) const {
    return buffer_.size();
  }

 private:
  size_t dimension_{0u};
  std::string buffer_{};
};

/*! Nibble Vector Array
 */
template <typename T,
          typename = typename std::enable_if<std::is_integral<T>::value>::type>
class NibbleVectorArray {
 public:
  //! Type of value
  using ValueType = typename NibbleVector<T>::ValueType;
  using StoreType = typename NibbleVector<T>::StoreType;

  //! Constructor
  NibbleVectorArray(void) {}

  //! Constructor
  explicit NibbleVectorArray(size_t dim)
      : dimension_((dim + (sizeof(ValueType) << 1) - 1) /
                       (sizeof(ValueType) << 1) * sizeof(ValueType)
                   << 1) {}

  //! Constructor
  NibbleVectorArray(const NibbleVectorArray &rhs)
      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}

  //! Constructor
  NibbleVectorArray(NibbleVectorArray &&rhs)
      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}

  //! Assignment
  NibbleVectorArray &operator=(const NibbleVectorArray &rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = rhs.buffer_;
    return *this;
  }

  //! Assignment
  NibbleVectorArray &operator=(NibbleVectorArray &&rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = std::move(rhs.buffer_);
    return *this;
  }

  //! Overloaded operator []
  StoreType *operator[](size_t i) {
    return reinterpret_cast<StoreType *>(&buffer_[0] + i * (dimension_ >> 1));
  }

  //! Overloaded operator []
  const StoreType *operator[](size_t i) const {
    return reinterpret_cast<const StoreType *>(&buffer_[0] +
                                               i * (dimension_ >> 1));
  }

  //! Append a vector
  void append(const StoreType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec), dim >> 1);
  }

  //! Append vectors
  void append(const StoreType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec), cnt * (dim >> 1));
  }

  //! Append a vector
  void append(const NibbleVector<ValueType> &vec) {
    this->append(vec.data(), vec.dimension());
  }

  //! Replace a vector
  void replace(size_t index, const StoreType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = (dim >> 1);
    buffer_.replace(index * element_size, element_size,
                    reinterpret_cast<const char *>(vec), element_size);
  }

  //! Replace a vector
  void replace(size_t index, const StoreType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = (dim >> 1);
    size_t total = element_size * cnt;
    buffer_.replace(index * element_size, total,
                    reinterpret_cast<const char *>(vec), total);
  }

  //! Replace a vector
  void replace(size_t index, const NibbleVector<ValueType> &vec) {
    this->replace(index, vec.data(), vec.dimension());
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    buffer_.reserve(n * (dimension_ >> 1));
  }

  //! Resize the array to a length of n elements
  void resize(size_t n) {
    buffer_.resize(n * (dimension_ >> 1));
  }

  //! Clear the vector array
  void clear(void) {
    buffer_.clear();
  }

  //! Reset the vector array
  void reset(size_t dim) {
    dimension_ = (dim + (sizeof(ValueType) << 1) - 1) /
                     (sizeof(ValueType) << 1) * sizeof(ValueType)
                 << 1;
    buffer_.clear();
  }

  //! Requests the removal of unused capacity.
  void shrink_to_fit(void) {
    buffer_.shrink_to_fit();
  }

  //! Retrieve pointer of data
  StoreType *data(void) {
    return reinterpret_cast<StoreType *>(&buffer_[0]);
  }

  //! Retrieve pointer of data
  const StoreType *data(void) const {
    return reinterpret_cast<const StoreType *>(buffer_.data());
  }

  //! Retrieve pointer of data
  StoreType *at(size_t i) {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return reinterpret_cast<StoreType *>(&buffer_[0] + i * (dimension_ >> 1));
  }

  //! Retrieve pointer of data
  const StoreType *at(size_t i) const {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return reinterpret_cast<const StoreType *>(buffer_.data() +
                                               i * (dimension_ >> 1));
  }

  //! Test if the array is empty
  bool empty(void) const {
    return buffer_.empty();
  }

  //! Retrieve count of vectors
  size_t count(void) const {
    return (dimension_ > 1 ? buffer_.size() / (dimension_ >> 1) : 0u);
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return dimension_;
  }

  //! Retrieve size of array in bytes
  size_t bytes(void) const {
    return buffer_.size();
  }

 private:
  size_t dimension_{0u};
  std::string buffer_{};
};

/*! Binary Vector Array
 */
template <typename T,
          typename = typename std::enable_if<std::is_integral<T>::value>::type>
class BinaryVectorArray {
 public:
  //! Type of value
  using ValueType = typename BinaryVector<T>::ValueType;

  //! Constructor
  BinaryVectorArray(void) {}

  //! Constructor
  explicit BinaryVectorArray(size_t dim)
      : dimension_((dim + (sizeof(ValueType) << 3) - 1) /
                   (sizeof(ValueType) << 3) * (sizeof(ValueType) << 3)) {}

  //! Constructor
  BinaryVectorArray(const BinaryVectorArray &rhs)
      : dimension_(rhs.dimension_), buffer_(rhs.buffer_) {}

  //! Constructor
  BinaryVectorArray(BinaryVectorArray &&rhs)
      : dimension_(rhs.dimension_), buffer_(std::move(rhs.buffer_)) {}

  //! Assignment
  BinaryVectorArray &operator=(const BinaryVectorArray &rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = rhs.buffer_;
    return *this;
  }

  //! Assignment
  BinaryVectorArray &operator=(BinaryVectorArray &&rhs) {
    dimension_ = rhs.dimension_;
    buffer_ = std::move(rhs.buffer_);
    return *this;
  }

  //! Overloaded operator []
  ValueType *operator[](size_t i) {
    return reinterpret_cast<ValueType *>(&buffer_[0] + i * (dimension_ >> 3));
  }

  //! Overloaded operator []
  const ValueType *operator[](size_t i) const {
    return reinterpret_cast<const ValueType *>(buffer_.data() +
                                               i * (dimension_ >> 3));
  }

  //! Append a vector
  void append(const ValueType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec), (dim >> 3));
  }

  //! Append vectors
  void append(const ValueType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    buffer_.append(reinterpret_cast<const char *>(vec), cnt * (dim >> 3));
  }

  //! Append a vector
  void append(const BinaryVector<ValueType> &vec) {
    this->append(vec.data(), vec.dimension());
  }

  //! Replace a vector
  void replace(size_t index, const ValueType *vec, size_t dim) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = (dim >> 3);
    buffer_.replace(index * element_size, element_size,
                    reinterpret_cast<const char *>(vec), element_size);
  }

  //! Replace a vector
  void replace(size_t index, const ValueType *vec, size_t dim, size_t cnt) {
    if (ailego_unlikely(dim != dimension_)) {
      throw std::length_error("Unmatched dimension");
    }
    size_t element_size = (dim >> 3);
    size_t total = element_size * cnt;
    buffer_.replace(index * element_size, total,
                    reinterpret_cast<const char *>(vec), total);
  }

  //! Replace a vector
  void replace(size_t index, const BinaryVector<ValueType> &vec) {
    this->replace(index, vec.data(), vec.dimension());
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    buffer_.reserve(n * (dimension_ >> 3));
  }

  //! Resize the array to a length of n elements
  void resize(size_t n) {
    buffer_.resize(n * (dimension_ >> 3));
  }

  //! Clear the vector array
  void clear(void) {
    buffer_.clear();
  }

  //! Reset the vector array
  void reset(size_t dim) {
    dimension_ = (dim + (sizeof(ValueType) << 3) - 1) /
                 (sizeof(ValueType) << 3) * (sizeof(ValueType) << 3);
    buffer_.clear();
  }

  //! Requests the removal of unused capacity.
  void shrink_to_fit(void) {
    buffer_.shrink_to_fit();
  }

  //! Retrieve pointer of data
  ValueType *data(void) {
    return reinterpret_cast<ValueType *>(&buffer_[0]);
  }

  //! Retrieve pointer of data
  const ValueType *data(void) const {
    return reinterpret_cast<const ValueType *>(buffer_.data());
  }

  //! Retrieve pointer of data
  ValueType *at(size_t i) {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return reinterpret_cast<ValueType *>(&buffer_[0] + i * (dimension_ >> 3));
  }

  //! Retrieve pointer of data
  const ValueType *at(size_t i) const {
    if (ailego_unlikely(i >= this->count())) {
      throw std::out_of_range("Index overflow");
    }
    return reinterpret_cast<const ValueType *>(buffer_.data() +
                                               i * (dimension_ >> 3));
  }

  //! Test if the array is empty
  bool empty(void) const {
    return buffer_.empty();
  }

  //! Retrieve count of vectors
  size_t count(void) const {
    return (dimension_ > 0 ? buffer_.size() / (dimension_ >> 3) : 0u);
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return dimension_;
  }

  //! Retrieve size of array in bytes
  size_t bytes(void) const {
    return buffer_.size();
  }

 private:
  size_t dimension_{0u};
  std::string buffer_{};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/encoding/json/mod_json.c
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <float.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zvec/ailego/encoding/json/mod_json.h>

#ifndef MOD_JSON_TOKEN_DEFOPTS
#define MOD_JSON_TOKEN_DEFOPTS 0 /* default options of token */
#endif
#ifndef MOD_JSON_TOKEN_DEFOBJDEP
#define MOD_JSON_TOKEN_DEFOBJDEP 64 /* default objects depth of token */
#endif
#ifndef MOD_JSON_TOKEN_DEFARRDEP
#define MOD_JSON_TOKEN_DEFARRDEP 64 /* default arrays depth of token */
#endif
#ifndef MOD_JSON_STRING_DEFSIZE
#define MOD_JSON_STRING_DEFSIZE 32 /* default started size of string */
#endif
#ifndef MOD_JSON_ARRAY_DEFSIZE
#define MOD_JSON_ARRAY_DEFSIZE 32 /* default started size of array */
#endif
#ifndef MOD_JSON_OBJECT_DEFSIZE
#define MOD_JSON_OBJECT_DEFSIZE 32 /* default started size of object */
#endif

#ifndef mod_json_malloc
#define mod_json_malloc malloc
#endif
#ifndef mod_json_free
#define mod_json_free free
#endif

#ifdef __GNUC__
#define mod_json_likely(x) __builtin_expect(!!(x), 1)
#define mod_json_unlikely(x) __builtin_expect(!!(x), 0)
#else
#define mod_json_likely(x) (x)
#define mod_json_unlikely(x) (x)
#endif

#define mod_json_minus_if_ne_zero(COND) \
  if (mod_json_unlikely((COND) != 0)) return (-1)

#define mod_json_minus_if_false(COND) \
  if (mod_json_unlikely(!(COND))) return (-1)

#define mod_json_null_if_ne_zero(COND) \
  if (mod_json_unlikely((COND) != 0)) return (NULL)

#define mod_json_null_if_false(COND) \
  if (mod_json_unlikely(!(COND))) return (NULL)

#if defined(_MSC_VER)
#pragma warning(disable : 4200)
#define strtoull _strtoui64
#define snprintf(buf, size, format, ...) \
  _snprintf_s(buf, size, _TRUNCATE, format, ##__VA_ARGS__)
#endif
#define mod_json_utils_snprintf snprintf
#define mod_json_utils_strtoi strtoull
#define mod_json_utils_strtof strtod
#define mod_json_utils_strlen strlen

/*! JSON Token
 */
struct mod_json_token {
  mod_json_state_t state;
  mod_json_error_t error;
  mod_json_cchar_t *context;
  mod_json_size_t options;
  mod_json_size_t object_max_depth;
  mod_json_size_t array_max_depth;
  mod_json_size_t object_depth;
  mod_json_size_t array_depth;
  mod_json_event_t event_code;
  mod_json_event_proc event_proc;
  mod_json_void_t *param;
  mod_json_char_t tags[0];
};

typedef struct mod_json_parser mod_json_parser_t;

/*! JSON Parser
 */
struct mod_json_parser {
  mod_json_string_t *key;
  mod_json_value_t *val_null;
  mod_json_value_t *val_true;
  mod_json_value_t *val_false;
  mod_json_value_t *val_zero;
  mod_json_value_t *val_zerof;
  mod_json_value_t *val_empty;
  mod_json_value_t *vals[0];
};

static inline mod_json_size_t mod_json_utils_clp2(mod_json_size_t n) {
  n = n - 1;
  n = n | (n >> 1);
  n = n | (n >> 2);
  n = n | (n >> 4);
  n = n | (n >> 8);
  n = n | (n >> 16);
  return (n + 1);
}

static inline mod_json_size_t mod_json_utils_itostr(mod_json_char_t *buf,
                                                    mod_json_integer_t val) {
  mod_json_char_t *pos, *first, *last;

  pos = buf;
  if (val < 0) {
    *pos++ = '-';
    val = -val;
  }

  /* save pointer to first digit */
  first = pos;

  do {
    /* convert to ASCII and store */
    *pos++ = (mod_json_char_t)(val % 10 + '0');

    /* next digit */
    val /= 10;

  } while (val > 0);

  *pos = '\0';

  /* save pointer to last digit */
  last = pos - 1;

  /* reverse digit string */
  while (first < last) {
    mod_json_char_t temp = *first;
    *first++ = *last;
    *last-- = temp;
  }
  return (mod_json_size_t)(pos - buf);
}

static inline mod_json_float_t mod_json_utils_pow10(int n) {
  /* 1e-308...1e308: 617 * 8 bytes = 4936 bytes */
  static const mod_json_float_t etab[] = {
      1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300,
      1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
      1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282,
      1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273,
      1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264,
      1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255,
      1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246,
      1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237,
      1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228,
      1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219,
      1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210,
      1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,
      1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192,
      1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183,
      1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174,
      1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165,
      1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156,
      1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147,
      1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138,
      1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129,
      1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,
      1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
      1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,
      1e-101, 1e-100, 1e-99,  1e-98,  1e-97,  1e-96,  1e-95,  1e-94,  1e-93,
      1e-92,  1e-91,  1e-90,  1e-89,  1e-88,  1e-87,  1e-86,  1e-85,  1e-84,
      1e-83,  1e-82,  1e-81,  1e-80,  1e-79,  1e-78,  1e-77,  1e-76,  1e-75,
      1e-74,  1e-73,  1e-72,  1e-71,  1e-70,  1e-69,  1e-68,  1e-67,  1e-66,
      1e-65,  1e-64,  1e-63,  1e-62,  1e-61,  1e-60,  1e-59,  1e-58,  1e-57,
      1e-56,  1e-55,  1e-54,  1e-53,  1e-52,  1e-51,  1e-50,  1e-49,  1e-48,
      1e-47,  1e-46,  1e-45,  1e-44,  1e-43,  1e-42,  1e-41,  1e-40,  1e-39,
      1e-38,  1e-37,  1e-36,  1e-35,  1e-34,  1e-33,  1e-32,  1e-31,  1e-30,
      1e-29,  1e-28,  1e-27,  1e-26,  1e-25,  1e-24,  1e-23,  1e-22,  1e-21,
      1e-20,  1e-19,  1e-18,  1e-17,  1e-16,  1e-15,  1e-14,  1e-13,  1e-12,
      1e-11,  1e-10,  1e-9,   1e-8,   1e-7,   1e-6,   1e-5,   1e-4,   1e-3,
      1e-2,   1e-1,   1e+0,   1e+1,   1e+2,   1e+3,   1e+4,   1e+5,   1e+6,
      1e+7,   1e+8,   1e+9,   1e+10,  1e+11,  1e+12,  1e+13,  1e+14,  1e+15,
      1e+16,  1e+17,  1e+18,  1e+19,  1e+20,  1e+21,  1e+22,  1e+23,  1e+24,
      1e+25,  1e+26,  1e+27,  1e+28,  1e+29,  1e+30,  1e+31,  1e+32,  1e+33,
      1e+34,  1e+35,  1e+36,  1e+37,  1e+38,  1e+39,  1e+40,  1e+41,  1e+42,
      1e+43,  1e+44,  1e+45,  1e+46,  1e+47,  1e+48,  1e+49,  1e+50,  1e+51,
      1e+52,  1e+53,  1e+54,  1e+55,  1e+56,  1e+57,  1e+58,  1e+59,  1e+60,
      1e+61,  1e+62,  1e+63,  1e+64,  1e+65,  1e+66,  1e+67,  1e+68,  1e+69,
      1e+70,  1e+71,  1e+72,  1e+73,  1e+74,  1e+75,  1e+76,  1e+77,  1e+78,
      1e+79,  1e+80,  1e+81,  1e+82,  1e+83,  1e+84,  1e+85,  1e+86,  1e+87,
      1e+88,  1e+89,  1e+90,  1e+91,  1e+92,  1e+93,  1e+94,  1e+95,  1e+96,
      1e+97,  1e+98,  1e+99,  1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105,
      1e+106, 1e+107, 1e+108, 1e+109, 1e+110, 1e+111, 1e+112, 1e+113, 1e+114,
      1e+115, 1e+116, 1e+117, 1e+118, 1e+119, 1e+120, 1e+121, 1e+122, 1e+123,
      1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129, 1e+130, 1e+131, 1e+132,
      1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139, 1e+140, 1e+141,
      1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149, 1e+150,
      1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159,
      1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168,
      1e+169, 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177,
      1e+178, 1e+179, 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186,
      1e+187, 1e+188, 1e+189, 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195,
      1e+196, 1e+197, 1e+198, 1e+199, 1e+200, 1e+201, 1e+202, 1e+203, 1e+204,
      1e+205, 1e+206, 1e+207, 1e+208, 1e+209, 1e+210, 1e+211, 1e+212, 1e+213,
      1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219, 1e+220, 1e+221, 1e+222,
      1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229, 1e+230, 1e+231,
      1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239, 1e+240,
      1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249,
      1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258,
      1e+259, 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267,
      1e+268, 1e+269, 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276,
      1e+277, 1e+278, 1e+279, 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285,
      1e+286, 1e+287, 1e+288, 1e+289, 1e+290, 1e+291, 1e+292, 1e+293, 1e+294,
      1e+295, 1e+296, 1e+297, 1e+298, 1e+299, 1e+300, 1e+301, 1e+302, 1e+303,
      1e+304, 1e+305, 1e+306, 1e+307, 1e+308};
  return (n < -308 ? 0.0 : etab[n + 308]);
}

static inline mod_json_cchar_t *mod_json_utils_strskpb(mod_json_cchar_t *cstr) {
  static const mod_json_char_t blanks[256] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, '\t', '\n', '\v', '\f', '\r', 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    ' ',  0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0,    0,    0,    0,    0,    0, 0};

  while (*(blanks + *cstr)) {
    ++cstr;
  }
  return cstr;
}

static inline mod_json_cchar_t *mod_json_utils_strskpc1(
    mod_json_cchar_t *cstr) {
  mod_json_char_t c;

  while ((c = *cstr++) != '\0') {
    if (c == '\r' || c == '\n') {
      return mod_json_utils_strskpb(cstr);
    }
  }
  return (cstr - 1);
}

static inline mod_json_cchar_t *mod_json_utils_strskpc2(
    mod_json_cchar_t *cstr) {
  mod_json_char_t c;

  while ((c = *cstr++) != '\0') {
    /* asterisk, slash */
    if (c == '*' && *cstr == '/') {
      return mod_json_utils_strskpb(cstr + 1);
    }
  }
  return (cstr - 1);
}

static inline mod_json_cchar_t *mod_json_utils_strskp(mod_json_cchar_t *cstr) {
  cstr = mod_json_utils_strskpb(cstr);

  /* treat it as comments? */
  while (*cstr == '/') {
    mod_json_char_t c = *(cstr + 1); /* second char */

    if (c == '/') {
      /* two slashes */
      cstr = mod_json_utils_strskpc1(cstr + 2);
    } else if (c == '*') {
      /* slash, asterisk */
      cstr = mod_json_utils_strskpc2(cstr + 2);
    } else {
      /* invalid format */
      break;
    }
  }
  return cstr;
}

static inline int mod_json_utils_char2hex(mod_json_char_t ch) {
  static const mod_json_char_t char2hex[256] = {
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0,  1,  2,  3,  4,  5,
      6,  7,  8,  9,  16, 16, 16, 16, 16, 16, 16, 10, 11, 12, 13, 14, 15, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 10, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16};
  return *(char2hex + ch);
}

static inline mod_json_cchar_t *mod_json_utils_strfquo(mod_json_cchar_t *cstr,
                                                       mod_json_char_t quo) {
  mod_json_char_t c;

  for (c = *cstr; c != quo; c = *(++cstr)) {
    if ((mod_json_uchar_t)c <= 0x1f) {
      return NULL;
    }

    if (c != '\\') {
      continue;
    }

    /* next char */
    switch (*(++cstr)) {
      case '\"':
      case '/':
      case 'b':
      case 'f':
      case '\\':
      case 'n':
      case 'r':
      case 't':
        /* ignore next char */
        break;

      case 'u':
        if (mod_json_utils_char2hex(*(cstr + 1)) > 15) {
          return NULL;
        }
        if (mod_json_utils_char2hex(*(cstr + 2)) > 15) {
          return NULL;
        }
        if (mod_json_utils_char2hex(*(cstr + 3)) > 15) {
          return NULL;
        }
        if (mod_json_utils_char2hex(*(cstr + 4)) > 15) {
          return NULL;
        }
        cstr += 4;
        break;

      default:
        /* invalid */
        return NULL;
    }
  }
  /* found it */
  return cstr;
}

static inline mod_json_cchar_t *mod_json_utils_strfquo2(mod_json_cchar_t *cstr,
                                                        mod_json_char_t quo) {
  mod_json_char_t c;

  for (c = *cstr; c; c = *(++cstr)) {
    if (c == quo) {
      /* found it */
      return cstr;
    }

    if (c == '\\') {
      /* ignore next char */
      if (*(++cstr) == '\0') {
        break;
      }
    }
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_utils_strfsep(mod_json_cchar_t *cstr) {
  mod_json_char_t c;

  while ((c = *cstr++) != '\0') {
    switch (c) {
      case ':':
      case ' ':
      case '\t':
      case '\r':
      case '\n':
      case '\f':
      case '\v':
        return (cstr - 1);
    }
  }
  return (cstr - 1);
}

static inline mod_json_cchar_t *mod_json_utils_strfsep2(
    mod_json_cchar_t *cstr) {
  mod_json_char_t c;

  while ((c = *cstr++) != '\0') {
    switch (c) {
      case ':':
      case ' ':
      case '\t':
      case '\r':
      case '\n':
      case '\f':
      case '\v':
        return (cstr - 1);

      case '/':
        if (*cstr == '/' || *cstr == '*') {
          return (cstr - 1);
        }
    }
  }
  return (cstr - 1);
}

static inline mod_json_char_t *mod_json_utils_uni2utf8(mod_json_char_t *buf,
                                                       mod_json_size_t size,
                                                       mod_json_uchar_t high,
                                                       mod_json_uchar_t low) {
  /* convert to UTF-8 */
  if (high >= 0x8) {
    /* 0800 - FFFF | 1110xxxx 10xxxxxx 10xxxxxx */
    if (size >= 3) {
      *buf++ = (mod_json_char_t)(0xE0 | (high >> 4));
      *buf++ = (mod_json_char_t)(0x80 | ((high & 0xF) << 2) | (low >> 6));
      *buf++ = (mod_json_char_t)(0x80 | (low & 0x3F));
      return buf;
    }
  } else if (high > 0 || low >= 0x80) {
    /* 0080 - 07FF | 110xxxxx 10xxxxxx */
    if (size >= 2) {
      *buf++ = (mod_json_char_t)(0xC0 | (high << 2) | (low >> 6));
      *buf++ = (mod_json_char_t)(0x80 | (low & 0x3F));
      return buf;
    }
  } else {
    /* 0000 - 007F | 0xxxxxxx */
    if (size >= 1) {
      *buf++ = (mod_json_char_t)(low);
      return buf;
    }
  }
  return (mod_json_char_t *)0;
}

mod_json_value_t *mod_json_value_set_null(void) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_null;
  val->data.c_int = 0;
  return val;
}

mod_json_value_t *mod_json_value_set_object(mod_json_object_t *obj) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_object;
  val->data.c_obj = obj ? mod_json_object_grab(obj) : NULL;
  return val;
}

mod_json_value_t *mod_json_value_set_array(mod_json_array_t *arr) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_array;
  val->data.c_arr = arr ? mod_json_array_grab(arr) : NULL;
  return val;
}

mod_json_value_t *mod_json_value_set_string(mod_json_string_t *str) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_string;
  val->data.c_str = str ? mod_json_string_grab(str) : NULL;
  return val;
}

mod_json_value_t *mod_json_value_set_buffer(mod_json_cchar_t *buf,
                                            mod_json_size_t len) {
  mod_json_value_t *val;
  mod_json_string_t *str;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  /* create a string */
  str = mod_json_string_set(buf, len);
  if (mod_json_unlikely(!str)) {
    mod_json_free(val);
    return NULL;
  }

  val->refer = 1;
  val->type = mod_json_type_string;
  val->data.c_str = str;
  return val;
}

mod_json_value_t *mod_json_value_set_integer(mod_json_integer_t num) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_integer;
  val->data.c_int = num;
  return val;
}

mod_json_value_t *mod_json_value_set_float(mod_json_float_t dbl) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_float;
  val->data.c_float = dbl;
  return val;
}

mod_json_value_t *mod_json_value_set_boolean(mod_json_boolean_t bol) {
  mod_json_value_t *val;

  /* create a value */
  val = (mod_json_value_t *)mod_json_malloc(sizeof(mod_json_value_t));
  mod_json_null_if_false(val);

  val->refer = 1;
  val->type = mod_json_type_boolean;
  val->data.c_bool = bol ? MOD_JSON_TRUE : MOD_JSON_FALSE;
  return val;
}

static inline void mod_json_value_clear(mod_json_value_t *val) {
  switch (val->type) {
    case mod_json_type_object:
      mod_json_object_unset(val->data.c_obj);
      break;

    case mod_json_type_array:
      mod_json_array_unset(val->data.c_arr);
      break;

    case mod_json_type_string:
      mod_json_string_unset(val->data.c_str);
      break;

    default:
      break;
  }
}

void mod_json_value_assign_null(mod_json_value_t *val) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_null;
    val->data.c_int = 0;
  }
}

void mod_json_value_assign_object(mod_json_value_t *val,
                                  mod_json_object_t *obj) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_object;
    val->data.c_obj = obj ? mod_json_object_grab(obj) : NULL;
  }
}

void mod_json_value_assign_array(mod_json_value_t *val, mod_json_array_t *arr) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_array;
    val->data.c_arr = arr ? mod_json_array_grab(arr) : NULL;
  }
}

void mod_json_value_assign_string(mod_json_value_t *val,
                                  mod_json_string_t *str) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_string;
    val->data.c_str = str ? mod_json_string_grab(str) : NULL;
  }
}

void mod_json_value_assign_integer(mod_json_value_t *val,
                                   mod_json_integer_t num) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_integer;
    val->data.c_int = num;
  }
}

void mod_json_value_assign_float(mod_json_value_t *val, mod_json_float_t dbl) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_float;
    val->data.c_float = dbl;
  }
}

void mod_json_value_assign_boolean(mod_json_value_t *val,
                                   mod_json_boolean_t bol) {
  if (val) {
    mod_json_value_clear(val);
    val->type = mod_json_type_boolean;
    val->data.c_bool = bol ? MOD_JSON_TRUE : MOD_JSON_FALSE;
  }
}

void mod_json_value_assign(mod_json_value_t *dst, mod_json_value_t *src) {
  if (!dst || dst == src) {
    return;
  }

  if (!src) {
    /* treat as JSON null */
    mod_json_value_assign_null(dst);
    return;
  }

  switch (src->type) {
    case mod_json_type_boolean:
      mod_json_value_assign_boolean(dst, src->data.c_bool);
      break;

    case mod_json_type_integer:
      mod_json_value_assign_integer(dst, src->data.c_int);
      break;

    case mod_json_type_float:
      mod_json_value_assign_float(dst, src->data.c_float);
      break;

    case mod_json_type_string:
      mod_json_value_assign_string(dst, src->data.c_str);
      break;

    case mod_json_type_array:
      mod_json_value_assign_array(dst, src->data.c_arr);
      break;

    case mod_json_type_object:
      mod_json_value_assign_object(dst, src->data.c_obj);
      break;

    default:
      mod_json_value_assign_null(dst);
      break;
  }
}

static inline int mod_json_value_merge_array(mod_json_value_t *val,
                                             mod_json_array_t *arr) {
  if (val->type != mod_json_type_array || !val->data.c_arr) {
    mod_json_value_assign_array(val, arr);
    return 0;
  }

  if (arr) {
    if (mod_json_array_is_shared(val->data.c_arr)) {
      mod_json_array_put(val->data.c_arr);
      val->data.c_arr = mod_json_array_clone(val->data.c_arr);
    }
    return mod_json_array_merge(val->data.c_arr, arr);
  }
  return 0;
}

static inline int mod_json_value_merge_object(mod_json_value_t *val,
                                              mod_json_object_t *obj) {
  if (val->type != mod_json_type_object || !val->data.c_obj) {
    mod_json_value_assign_object(val, obj);
    return 0;
  }

  if (obj) {
    if (mod_json_object_is_shared(val->data.c_obj)) {
      mod_json_object_put(val->data.c_obj);
      val->data.c_obj = mod_json_object_clone(val->data.c_obj);
    }
    return mod_json_object_merge(val->data.c_obj, obj);
  }
  return 0;
}

int mod_json_value_merge(mod_json_value_t *dst, mod_json_value_t *src) {
  mod_json_minus_if_false(dst && dst != src);

  if (!src) {
    mod_json_value_assign_null(dst);
    return 0;
  }

  switch (src->type) {
    case mod_json_type_boolean:
      mod_json_value_assign_boolean(dst, src->data.c_bool);
      break;

    case mod_json_type_integer:
      mod_json_value_assign_integer(dst, src->data.c_int);
      break;

    case mod_json_type_float:
      mod_json_value_assign_float(dst, src->data.c_float);
      break;

    case mod_json_type_string:
      mod_json_value_assign_string(dst, src->data.c_str);
      break;

    case mod_json_type_array:
      return mod_json_value_merge_array(dst, src->data.c_arr);

    case mod_json_type_object:
      return mod_json_value_merge_object(dst, src->data.c_obj);

    default:
      mod_json_value_assign_null(dst);
      break;
  }
  return 0;
}

mod_json_object_t *mod_json_value_object(mod_json_value_t *val) {
  if (val && val->type == mod_json_type_object) {
    return (val->data.c_obj);
  }
  return NULL;
}

mod_json_array_t *mod_json_value_array(mod_json_value_t *val) {
  if (val && val->type == mod_json_type_array) {
    return (val->data.c_arr);
  }
  return NULL;
}

mod_json_string_t *mod_json_value_string(mod_json_value_t *val) {
  if (val && val->type == mod_json_type_string) {
    return (val->data.c_str);
  }
  return NULL;
}

mod_json_cchar_t *mod_json_value_cstring(mod_json_value_t *val) {
  if (val && val->type == mod_json_type_string) {
    return mod_json_string_cstr(val->data.c_str);
  }
  return NULL;
}

mod_json_float_t mod_json_value_float(mod_json_value_t *val) {
  if (val) {
    switch (val->type) {
      case mod_json_type_boolean:
        return (val->data.c_bool ? 1.0 : 0.0);

      case mod_json_type_integer:
        return (mod_json_float_t)(val->data.c_int);

      case mod_json_type_float:
        return (val->data.c_float);

      case mod_json_type_string:
        return mod_json_string_float(val->data.c_str);

      default:
        break;
    }
  }
  return (0.0);
}

mod_json_boolean_t mod_json_value_boolean(mod_json_value_t *val) {
  if (val) {
    switch (val->type) {
      case mod_json_type_null:
        return MOD_JSON_FALSE;

      case mod_json_type_object:
        return (mod_json_object_count(val->data.c_obj) != 0);

      case mod_json_type_array:
        return (mod_json_array_count(val->data.c_arr) != 0);

      case mod_json_type_string:
        return (mod_json_string_length(val->data.c_str) != 0);

      case mod_json_type_integer:
        return (val->data.c_int != 0);

      case mod_json_type_float:
        return (val->data.c_float != 0);

      case mod_json_type_boolean:
        return (val->data.c_bool);

      default:
        break;
    }
  }
  return MOD_JSON_FALSE;
}

mod_json_integer_t mod_json_value_integer(mod_json_value_t *val) {
  if (val) {
    switch (val->type) {
      case mod_json_type_boolean:
        return (val->data.c_bool ? 1 : 0);

      case mod_json_type_integer:
        return (val->data.c_int);

      case mod_json_type_float:
        return (mod_json_integer_t)(val->data.c_float);

      case mod_json_type_string:
        return mod_json_string_integer(val->data.c_str);

      default:
        break;
    }
  }
  return (0);
}

mod_json_value_t *mod_json_value_clone(mod_json_value_t *val) {
  if (val) {
    switch (val->type) {
      case mod_json_type_null:
        return mod_json_value_set_null();

      case mod_json_type_object:
        return mod_json_value_set_object(val->data.c_obj);

      case mod_json_type_array:
        return mod_json_value_set_array(val->data.c_arr);

      case mod_json_type_string:
        return mod_json_value_set_string(val->data.c_str);

      case mod_json_type_integer:
        return mod_json_value_set_integer(val->data.c_int);

      case mod_json_type_float:
        return mod_json_value_set_float(val->data.c_float);

      case mod_json_type_boolean:
        return mod_json_value_set_boolean(val->data.c_bool);

      default:
        break;
    }
  }
  return NULL;
}

static inline mod_json_boolean_t mod_json_value_is_equal_float(
    mod_json_float_t lhs, mod_json_float_t rhs) {
  mod_json_float_t diff = lhs - rhs;
  return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
}

mod_json_boolean_t mod_json_value_is_equal(mod_json_value_t *lhs,
                                           mod_json_value_t *rhs) {
  if (lhs == rhs) {
    /* The same pointer */
    return MOD_JSON_TRUE;
  }

  if (lhs && rhs && lhs->type == rhs->type) {
    switch (lhs->type) {
      case mod_json_type_null:
        return MOD_JSON_TRUE;

      case mod_json_type_object:
        return mod_json_object_is_equal(lhs->data.c_obj, rhs->data.c_obj);

      case mod_json_type_array:
        return mod_json_array_is_equal(lhs->data.c_arr, rhs->data.c_arr);

      case mod_json_type_string:
        return (mod_json_string_compare(lhs->data.c_str, rhs->data.c_str) == 0);

      case mod_json_type_integer:
        return (lhs->data.c_int == rhs->data.c_int);

      case mod_json_type_float:
        return mod_json_value_is_equal_float(lhs->data.c_float,
                                             rhs->data.c_float);

      case mod_json_type_boolean:
        return ((!lhs->data.c_bool) == (!rhs->data.c_bool));

      default:
        break;
    }
  }
  return MOD_JSON_FALSE;
}

void mod_json_value_unset(mod_json_value_t *val) {
  if (val && mod_json_value_put(val) <= 0) {
    mod_json_value_clear(val);
    mod_json_free(val);
  }
}

static inline int mod_json_string_expand(mod_json_string_t *str,
                                         mod_json_size_t size) {
  mod_json_char_t *cstr;
  mod_json_size_t len;

  size = mod_json_utils_clp2(size);
  if (size < MOD_JSON_STRING_DEFSIZE) {
    size = MOD_JSON_STRING_DEFSIZE;
  }
  mod_json_minus_if_false(size > str->size);

  cstr = (mod_json_char_t *)mod_json_malloc(size * sizeof(mod_json_char_t));
  mod_json_minus_if_false(cstr);

  len = (mod_json_size_t)(str->last - str->first);
  if (len != 0) {
    memcpy(cstr, str->first, len + 1);
  } else {
    *cstr = '\0'; /* terminal character */
  }
  mod_json_free(str->first);
  str->first = cstr;
  str->last = cstr + len;
  str->size = size;

  /* success */
  return 0;
}

int mod_json_string_reserve(mod_json_string_t *str, mod_json_size_t n) {
  mod_json_minus_if_false(str);

  if (str->size >= n + 1) {
    /* needn't grow */
    return 0;
  }
  return mod_json_string_expand(str, n + 1);
}

static inline mod_json_string_t *mod_json_string_malloc(mod_json_size_t size) {
  mod_json_string_t *str;
  mod_json_char_t *buf;

  buf = (mod_json_char_t *)mod_json_malloc(size * sizeof(mod_json_char_t));
  mod_json_null_if_false(buf);

  str = (mod_json_string_t *)mod_json_malloc(sizeof(mod_json_string_t));
  if (mod_json_unlikely(!str)) {
    mod_json_free(buf);
    return NULL;
  }

  str->refer = 1;
  str->size = size;
  str->first = buf;
  str->last = buf;
  *buf = '\0';
  return str;
}

int mod_json_string_assign(mod_json_string_t *str, mod_json_cchar_t *cstr,
                           mod_json_size_t len) {
  mod_json_string_reset(str);
  mod_json_minus_if_ne_zero(mod_json_string_reserve(str, len));

  if (cstr && len) {
    memcpy(str->first, cstr, len);
  }
  str->last = str->first + len;
  *(str->last) = '\0';

  /* success */
  return 0;
}

static inline mod_json_string_t *mod_json_string_set_empty(void) {
  return mod_json_string_malloc(MOD_JSON_STRING_DEFSIZE);
}

static inline mod_json_string_t *mod_json_string_set_cstr(
    mod_json_cchar_t *cstr, mod_json_size_t len) {
  mod_json_string_t *str;

  str = mod_json_string_malloc(mod_json_utils_clp2(len + 1));
  mod_json_null_if_false(str);

  str->last = str->first + len;
  memcpy(str->first, cstr, len);
  *(str->last) = '\0';
  return str;
}

mod_json_string_t *mod_json_string_set(mod_json_cchar_t *cstr,
                                       mod_json_size_t len) {
  return ((cstr && len) ? mod_json_string_set_cstr(cstr, len)
                        : mod_json_string_set_empty());
}

void mod_json_string_unset(mod_json_string_t *str) {
  if (str && mod_json_string_put(str) <= 0) {
    mod_json_free(str->first);
    mod_json_free(str);
  }
}

void mod_json_string_reset(mod_json_string_t *str) {
  if (str) {
    str->last = str->first;
    *(str->first) = '\0';
  }
}

static inline int mod_json_string_add_char(mod_json_string_t *str,
                                           mod_json_char_t ch) {
  mod_json_size_t need;

  need = (mod_json_size_t)(str->last - str->first) + 2;
  if (need > str->size) {
    mod_json_minus_if_ne_zero(mod_json_string_expand(str, need));
  }

  /* append to string */
  *(str->last++) = ch;
  *(str->last) = '\0';

  /* success */
  return 0;
}

static inline int mod_json_string_add_cstr(mod_json_string_t *str,
                                           mod_json_cchar_t *cstr,
                                           mod_json_size_t len) {
  if (cstr && len) {
    mod_json_size_t need;

    need = len + (mod_json_size_t)(str->last - str->first) + 1;
    if (need > str->size) {
      mod_json_minus_if_ne_zero(mod_json_string_expand(str, need));
    }

    /* append to string */
    memcpy(str->last, cstr, len);
    str->last += len;
    *(str->last) = '\0';
  }

  /* success */
  return 0;
}

static inline int mod_json_string_add_jstr(mod_json_string_t *str,
                                           mod_json_string_t *val) {
  return mod_json_string_add_cstr(str, val->first,
                                  (mod_json_size_t)(val->last - val->first));
}

int mod_json_string_add(mod_json_string_t *str, mod_json_string_t *val) {
  return mod_json_string_add_jstr(str, val);
}

int mod_json_string_append(mod_json_string_t *str, mod_json_cchar_t *cstr,
                           mod_json_size_t len) {
  return mod_json_string_add_cstr(str, cstr, len);
}

mod_json_size_t mod_json_string_hash(mod_json_string_t *str) {
  mod_json_size_t hash = 1;

  if (str) {
    mod_json_cchar_t *iter = str->first;
    mod_json_cchar_t *last = str->last;

    for (; iter != last; ++iter) {
      mod_json_size_t c = (mod_json_size_t)(*iter);
      hash = hash * 131 + c;
    }
  }
  return hash;
}

int mod_json_string_compare(mod_json_string_t *str1, mod_json_string_t *str2) {
  mod_json_size_t len1 = 0, len2 = 0;

  if (str1 == str2) {
    /* The same pointer */
    return 0;
  }

  if (str1) {
    len1 = (mod_json_size_t)(str1->last - str1->first);
    if (str2) {
      len2 = (mod_json_size_t)(str2->last - str2->first);
      if (len1 == len2) {
        return memcmp(str1->first, str2->first, len1);
      }
    }
  } else {
    /* The first string is null, and the second string it not null. */
    len2 = (mod_json_size_t)(str2->last - str2->first);
  }
  return (int)(len1 - len2);
}

mod_json_integer_t mod_json_string_integer(mod_json_string_t *str) {
  return (str ? (mod_json_integer_t)mod_json_utils_strtoi(str->first, NULL, 0)
              : 0);
}

mod_json_float_t mod_json_string_float(mod_json_string_t *str) {
  return (str ? mod_json_utils_strtof(str->first, NULL) : 0.0);
}

static inline int mod_json_string_flat(mod_json_string_t *dst,
                                       mod_json_string_t *src) {
  static mod_json_cchar_t *flattab[32] = {
      "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005",
      "\\u0006", "\\u0007", "\\b",     "\\t",     "\\n",     "\\u000b",
      "\\f",     "\\r",     "\\u000e", "\\u000f", "\\u0010", "\\u0011",
      "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017",
      "\\u0018", "\\u0019", "\\u001a", "\\u001b", "\\u001c", "\\u001d",
      "\\u001e", "\\u001f"};

  /* length of items in flat table */
  static const mod_json_uchar_t flatlen[32] = {6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2,
                                               6, 2, 2, 6, 6, 6, 6, 6, 6, 6, 6,
                                               6, 6, 6, 6, 6, 6, 6, 6, 6, 6};

  mod_json_cchar_t *first = src->first;
  mod_json_cchar_t *iter = src->first;
  mod_json_cchar_t *last = src->last;

  /* the whole string */
  for (; iter != last; ++iter) {
    int c = *iter;

    if ((mod_json_uchar_t)c <= 0x1f) {
      if (iter > first) {
        mod_json_minus_if_ne_zero(mod_json_string_add_cstr(
            dst, first, (mod_json_size_t)(iter - first)));
      }
      mod_json_minus_if_ne_zero(
          mod_json_string_add_cstr(dst, flattab[c], flatlen[c]));

      /* skip current character */
      first = iter + 1;
    } else if (c == '\"' || c == '\\') {
      if (iter > first) {
        mod_json_minus_if_ne_zero(mod_json_string_add_cstr(
            dst, first, (mod_json_size_t)(iter - first)));
      }
      mod_json_minus_if_ne_zero(mod_json_string_add_char(dst, '\\'));

      /* don't skip current character */
      first = iter;
    }
  }

  if (iter > first) {
    mod_json_minus_if_ne_zero(
        mod_json_string_add_cstr(dst, first, (mod_json_size_t)(iter - first)));
  }

  /* success */
  return 0;
}

static inline int mod_json_string_unflat(mod_json_string_t *dst,
                                         mod_json_string_t *src) {
  enum {
    state_normal,
    state_rev_slash,
    state_digit_1,
    state_digit_2,
    state_digit_3,
    state_digit_4
  } state;

  mod_json_char_t *pbuf = dst->first;
  mod_json_char_t *pend = dst->first + dst->size;
  mod_json_cchar_t *iter = src->first;
  mod_json_cchar_t *last = src->last;
  mod_json_uchar_t high = 0;
  mod_json_uchar_t low = 0;

  /* the whole string */
  for (state = state_normal; iter != last; ++iter) {
    int c = *iter;

    switch (state) {
      case state_normal:
        if (c != '\\') {
          mod_json_minus_if_false(pbuf < pend);
          *pbuf++ = (mod_json_char_t)c;
        } else {
          /* '\\' in process */
          state = state_rev_slash;
        }
        break;

      case state_rev_slash:
        mod_json_minus_if_false(pbuf < pend);

        switch (c) {
          case '\"':
            state = state_normal;
            *pbuf++ = '\"';
            break;
          case '/':
            state = state_normal;
            *pbuf++ = '/';
            break;
          case 'b':
            state = state_normal;
            *pbuf++ = '\b';
            break;
          case 'f':
            state = state_normal;
            *pbuf++ = '\f';
            break;
          case '\\':
            state = state_normal;
            *pbuf++ = '\\';
            break;
          case 'n':
            state = state_normal;
            *pbuf++ = '\n';
            break;
          case 'r':
            state = state_normal;
            *pbuf++ = '\r';
            break;
          case 't':
            state = state_normal;
            *pbuf++ = '\t';
            break;
          case 'u':
            state = state_digit_1;
            break;
          default:
            return -1;
        }
        break;

      case state_digit_1:
        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {
          /* invalid character */
          return -1;
        }
        high = (mod_json_uchar_t)(c << 4);
        state = state_digit_2;
        break;

      case state_digit_2:
        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {
          /* invalid character */
          return -1;
        }
        high |= (mod_json_uchar_t)c;
        state = state_digit_3;
        break;

      case state_digit_3:
        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {
          /* invalid character */
          return -1;
        }
        low = (mod_json_uchar_t)(c << 4);
        state = state_digit_4;
        break;

      case state_digit_4:
        if ((c = mod_json_utils_char2hex((mod_json_char_t)c)) > 15) {
          /* invalid character */
          return -1;
        }
        low |= (mod_json_uchar_t)c;

        /* decode as a UTF-8 string */
        pbuf = mod_json_utils_uni2utf8(pbuf, (mod_json_size_t)(pend - pbuf),
                                       high, low);
        if (!pbuf) {
          /* lack of buffer */
          return -1;
        }
        state = state_normal;
        break;
    }
  }

  if (state != state_normal) {
    /* uncompleted state */
    return -1;
  }
  mod_json_minus_if_false(pbuf < pend);

  /* update the last pointer */
  *(dst->last = pbuf) = '\0';

  /* success */
  return 0;
}

mod_json_string_t *mod_json_string_encode(mod_json_string_t *src) {
  mod_json_string_t *dst;
  mod_json_null_if_false(src);

  dst = mod_json_string_malloc(
      mod_json_utils_clp2((mod_json_size_t)(src->last - src->first) + 1));
  mod_json_null_if_false(dst);

  if (mod_json_unlikely(mod_json_string_flat(dst, src) != 0)) {
    mod_json_string_unset(dst);
    return NULL;
  }
  return dst;
}

mod_json_string_t *mod_json_string_decode(mod_json_string_t *src) {
  mod_json_string_t *dst;
  mod_json_null_if_false(src);

  dst = mod_json_string_malloc(
      mod_json_utils_clp2((mod_json_size_t)(src->last - src->first) + 1));
  mod_json_null_if_false(dst);

  if (mod_json_unlikely(mod_json_string_unflat(dst, src) != 0)) {
    mod_json_string_unset(dst);
    return NULL;
  }
  return dst;
}

mod_json_array_t *mod_json_array_set(mod_json_size_t size) {
  mod_json_array_t *arr;
  mod_json_value_t **buf;

  size = (size ? mod_json_utils_clp2(size) : MOD_JSON_ARRAY_DEFSIZE);
  buf = (mod_json_value_t **)mod_json_malloc(size * sizeof(mod_json_value_t *));
  mod_json_null_if_false(buf);

  /* create an array */
  arr = (mod_json_array_t *)mod_json_malloc(sizeof(mod_json_array_t));
  if (mod_json_unlikely(!arr)) {
    mod_json_free(buf);
    return NULL;
  }

  arr->refer = 1;
  arr->size = size;
  arr->first = buf;
  arr->last = buf;
  return arr;
}

mod_json_array_t *mod_json_array_clone(mod_json_array_t *arr) {
  mod_json_array_t *arr2 = NULL;

  if (arr) {
    arr2 = mod_json_array_set((mod_json_size_t)(arr->last - arr->first));
    if (arr2) {
      mod_json_value_t **iter = arr->first;

      /* clone items */
      for (; iter != arr->last; ++iter) {
        *arr2->last++ = *iter ? mod_json_value_grab(*iter) : NULL;
      }
    }
  }
  return arr2;
}

mod_json_boolean_t mod_json_array_is_equal(mod_json_array_t *lhs,
                                           mod_json_array_t *rhs) {
  mod_json_value_t **itl, **itr;

  if (lhs == rhs) {
    return MOD_JSON_TRUE;
  }

  if (!lhs || !rhs || ((lhs->last - lhs->first) != (rhs->last - rhs->first))) {
    return MOD_JSON_FALSE;
  }

  /* compare items */
  for (itl = lhs->first, itr = rhs->first; itl != lhs->last; ++itl, ++itr) {
    if (!mod_json_value_is_equal(*itl, *itr)) {
      return MOD_JSON_FALSE;
    }
  }
  return MOD_JSON_TRUE;
}

void mod_json_array_unset(mod_json_array_t *arr) {
  if (arr && mod_json_array_put(arr) <= 0) {
    mod_json_value_t **iter = arr->first;

    for (; iter != arr->last; ++iter) {
      mod_json_value_unset(*iter);
    }
    mod_json_free(arr->first);
    mod_json_free(arr);
  }
}

void mod_json_array_reset(mod_json_array_t *arr) {
  if (arr) {
    mod_json_value_t **iter = arr->first;

    for (; iter != arr->last; ++iter) {
      mod_json_value_unset(*iter);
    }
    arr->last = arr->first;
  }
}

static inline void mod_json_array_migrate(mod_json_array_t *arr,
                                          mod_json_value_t **buf,
                                          mod_json_size_t size) {
  mod_json_size_t count = (mod_json_size_t)(arr->last - arr->first);
  if (count > 0) {
    memcpy(buf, arr->first, count * sizeof(mod_json_value_t *));
  }
  mod_json_free(arr->first);

  arr->first = buf;
  arr->last = buf + count;
  arr->size = size;
}

static inline int mod_json_array_expand(mod_json_array_t *arr,
                                        mod_json_size_t n) {
  mod_json_size_t size;
  mod_json_value_t **vals;

  size = mod_json_utils_clp2(n);
  if (size < MOD_JSON_ARRAY_DEFSIZE) {
    size = MOD_JSON_ARRAY_DEFSIZE;
  }
  mod_json_minus_if_false(size > arr->size);

  vals =
      (mod_json_value_t **)mod_json_malloc(size * sizeof(mod_json_value_t *));
  mod_json_minus_if_false(vals);

  /* use new buffer */
  mod_json_array_migrate(arr, vals, size);

  /* success */
  return 0;
}

int mod_json_array_reserve(mod_json_array_t *arr, mod_json_size_t n) {
  mod_json_minus_if_false(arr);

  if (arr->size >= n) {
    /* needn't grow */
    return 0;
  }
  return mod_json_array_expand(arr, n);
}

void mod_json_array_reverse(mod_json_array_t *arr) {
  if (arr) {
    mod_json_value_t **first = arr->first;
    mod_json_value_t **last = arr->last - 1;

    while (first < last) {
      mod_json_value_t *temp = *first;
      *first++ = *last;
      *last-- = temp;
    }
  }
}

int mod_json_array_push(mod_json_array_t *arr, mod_json_value_t *val) {
  mod_json_size_t count;
  mod_json_minus_if_false(arr);

  count = (mod_json_size_t)(arr->last - arr->first);
  if (count >= arr->size) {
    mod_json_minus_if_ne_zero(mod_json_array_expand(arr, count + 1));
  }

  *arr->last++ = val ? mod_json_value_grab(val) : NULL;
  return 0;
}

void mod_json_array_pop(mod_json_array_t *arr) {
  if (arr && arr->first != arr->last) {
    mod_json_value_unset(*(--arr->last));
  }
}

void mod_json_array_shift(mod_json_array_t *arr) {
  if (arr && arr->first != arr->last) {
    mod_json_value_t **it = arr->first;
    mod_json_value_t **last = --arr->last;

    mod_json_value_unset(*it++);
    for (; it <= last; ++it) {
      *(it - 1) = *it;
    }
  }
}

mod_json_value_t *mod_json_array_at(mod_json_array_t *arr, mod_json_size_t id) {
  if (arr && ((arr->first + id) < arr->last)) {
    return (arr->first[id]);
  }
  return NULL;
}

int mod_json_array_merge(mod_json_array_t *dst, mod_json_array_t *src) {
  long count, len1, len2;

  mod_json_minus_if_false(dst && src && dst != src);

  /* update length of array */
  len1 = (mod_json_size_t)(src->last - src->first);
  len2 = (mod_json_size_t)(dst->last - dst->first);
  mod_json_minus_if_false(len1 >= 0 && len2 >= 0);

  /* append empty values */
  count = len1 - len2;
  for (; count > 0; --count) {
    mod_json_array_push(dst, NULL);
  }

  /* It must be assigned again. */
  len2 = (mod_json_size_t)(dst->last - dst->first);
  count = (len1 < len2 ? len1 : len2);

  while ((count--) > 0) {
    mod_json_value_t **iter1 = src->first + count;
    mod_json_value_t **iter2 = dst->first + count;

    if (!(*iter2)) {
      *iter2 = *iter1 ? mod_json_value_grab(*iter1) : NULL;
      continue;
    }

    if (mod_json_value_is_shared(*iter2)) {
      mod_json_value_put(*iter2);
      *iter2 = mod_json_value_clone(*iter2);
    }
    mod_json_value_merge(*iter2, *iter1);
  }

  /* success */
  return 0;
}

int mod_json_array_resize(mod_json_array_t *arr, mod_json_size_t n,
                          mod_json_value_t *val) {
  mod_json_size_t orig;

  /* check input */
  mod_json_minus_if_false(arr);

  /* original count of array */
  orig = (mod_json_size_t)(arr->last - arr->first);

  if (orig < n) {
    mod_json_value_t **iter;

    if (arr->size < n) {
      mod_json_minus_if_ne_zero(mod_json_array_expand(arr, n));
    }

    iter = arr->last;
    arr->last = arr->first + n;

    /* grab the first one, but get the others */
    *iter++ = val = val ? mod_json_value_grab(val) : NULL;
    for (; iter != arr->last; ++iter) {
      *iter = val ? mod_json_value_get(val) : NULL;
    }
  } else if (orig > n) {
    mod_json_value_t **iter = arr->first + n;

    for (; iter != arr->last; ++iter) {
      mod_json_value_unset(*iter);
      *iter = NULL;
    }
    arr->last = arr->first + n;
  }

  /* success */
  return 0;
}

static inline void mod_json_pair_init(mod_json_pair_t *pair,
                                      mod_json_string_t *key,
                                      mod_json_value_t *val) {
  pair->key = mod_json_string_grab(key);
  pair->val = val ? mod_json_value_grab(val) : NULL;
}

static inline void mod_json_pair_cleanup(mod_json_pair_t *pair) {
  mod_json_string_unset(pair->key);
  mod_json_value_unset(pair->val);
  pair->key = NULL;
  pair->val = NULL;
}

mod_json_object_t *mod_json_object_set(mod_json_size_t size) {
  mod_json_object_t *obj;
  mod_json_pair_t *buf;

  size = (size ? mod_json_utils_clp2(size) : MOD_JSON_OBJECT_DEFSIZE);
  buf = (mod_json_pair_t *)mod_json_malloc(size * sizeof(mod_json_pair_t));
  mod_json_null_if_false(buf);

  /* create a object */
  obj = (mod_json_object_t *)mod_json_malloc(sizeof(mod_json_object_t));
  if (mod_json_unlikely(!obj)) {
    mod_json_free(buf);
    return NULL;
  }

  obj->refer = 1;
  obj->size = size;
  obj->first = buf;
  obj->last = buf;
  return obj;
}

void mod_json_object_unset(mod_json_object_t *obj) {
  if (obj && mod_json_object_put(obj) <= 0) {
    mod_json_pair_t *iter = obj->first;

    for (; iter != obj->last; ++iter) {
      mod_json_pair_cleanup(iter);
    }
    mod_json_free(obj->first);
    mod_json_free(obj);
  }
}

void mod_json_object_reset(mod_json_object_t *obj) {
  if (obj) {
    mod_json_pair_t *iter = obj->first;

    for (; iter != obj->last; ++iter) {
      mod_json_pair_cleanup(iter);
    }
    obj->last = obj->first;
  }
}

static inline void mod_json_object_migrate(mod_json_object_t *obj,
                                           mod_json_pair_t *buf,
                                           mod_json_size_t size) {
  mod_json_size_t count = (mod_json_size_t)(obj->last - obj->first);
  if (count > 0) {
    memcpy(buf, obj->first, count * sizeof(mod_json_pair_t));
  }
  mod_json_free(obj->first);

  obj->first = buf;
  obj->last = buf + count;
  obj->size = size;
}

static inline int mod_json_object_expand(mod_json_object_t *obj,
                                         mod_json_size_t n) {
  mod_json_size_t size;
  mod_json_pair_t *buf;

  size = mod_json_utils_clp2(n);
  if (size < MOD_JSON_OBJECT_DEFSIZE) {
    size = MOD_JSON_OBJECT_DEFSIZE;
  }
  mod_json_minus_if_false(size > obj->size);

  buf = (mod_json_pair_t *)mod_json_malloc(size * sizeof(mod_json_pair_t));
  mod_json_minus_if_false(buf);

  /* use new buffer */
  mod_json_object_migrate(obj, buf, size);

  /* success */
  return 0;
}

static inline mod_json_pair_t *mod_json_object_find_pair(mod_json_object_t *obj,
                                                         mod_json_string_t *key,
                                                         mod_json_size_t *out) {
  mod_json_pair_t *first = obj->first;
  mod_json_pair_t *last = obj->last;

  while (first < last) {
    mod_json_pair_t *middle = first + ((last - first) >> 2);
    int diff = mod_json_string_compare(middle->key, key);

    if (diff < 0) {
      first = middle + 1;
    } else if (diff > 0) {
      last = middle;
    } else /*if (diff == 0)*/
    {
      *out = (mod_json_size_t)(middle - obj->first);
      return middle;
    }
  }
  *out = (mod_json_size_t)(first - obj->first);
  return NULL;
}

mod_json_pair_t *mod_json_object_insert_force(mod_json_object_t *obj,
                                              mod_json_size_t npos,
                                              mod_json_string_t *key,
                                              mod_json_value_t *val) {
  mod_json_pair_t *iter, *pos;
  mod_json_size_t count;

  count = (mod_json_size_t)(obj->last - obj->first);
  if (count >= obj->size) {
    mod_json_null_if_ne_zero(mod_json_object_expand(obj, count + 1));
  }

  pos = obj->first + npos;
  iter = obj->last++;
  for (; iter != pos; --iter) {
    mod_json_pair_t *prev = iter - 1;
    iter->key = prev->key;
    iter->val = prev->val;
  }
  mod_json_pair_init(pos, key, val);
  return pos;
}

mod_json_pair_t *mod_json_object_insert(mod_json_object_t *obj,
                                        mod_json_string_t *key,
                                        mod_json_value_t *val) {
  mod_json_size_t npos;
  mod_json_null_if_false(obj && key);

  if (mod_json_object_find_pair(obj, key, &npos)) {
    /* One in object */
    return NULL;
  }
  return mod_json_object_insert_force(obj, npos, key, val);
}

mod_json_pair_t *mod_json_object_assign(mod_json_object_t *obj,
                                        mod_json_string_t *key,
                                        mod_json_value_t *val) {
  mod_json_pair_t *elem = NULL;

  if (obj && key) {
    mod_json_size_t npos;

    elem = mod_json_object_find_pair(obj, key, &npos);
    if (elem) {
      if (!elem->val) {
        elem->val = val ? mod_json_value_grab(val) : NULL;
      } else {
        /* overwrite the old value */
        mod_json_value_assign(elem->val, val);
      }
    } else {
      /* insert a new one */
      elem = mod_json_object_insert_force(obj, npos, key, val);
    }
  }
  return elem;
}

mod_json_pair_t *mod_json_object_touch(mod_json_object_t *obj,
                                       mod_json_cchar_t *key) {
  mod_json_pair_t *elem = NULL;

  if (obj && key) {
    mod_json_string_t str;
    mod_json_size_t npos;

    str.first = (mod_json_char_t *)key;
    str.last = str.first + mod_json_utils_strlen(key);

    elem = mod_json_object_find_pair(obj, &str, &npos);
    if (!elem) {
      mod_json_string_t *jkey;

      /* insert a new one */
      jkey =
          mod_json_string_set(key, (mod_json_size_t)mod_json_utils_strlen(key));
      elem = mod_json_object_insert_force(obj, npos, jkey, NULL);
      mod_json_string_unset(jkey);
    }
  }
  return elem;
}

mod_json_object_t *mod_json_object_clone(mod_json_object_t *obj) {
  mod_json_object_t *obj2 = NULL;

  if (obj) {
    obj2 = mod_json_object_set((mod_json_size_t)(obj->last - obj->first));
    if (obj2) {
      mod_json_pair_t *iter = obj->first;

      /* clone items */
      for (; iter != obj->last; ++iter) {
        mod_json_pair_init(obj2->last++, iter->key, iter->val);
      }
    }
  }
  return obj2;
}

mod_json_boolean_t mod_json_object_is_equal(mod_json_object_t *lhs,
                                            mod_json_object_t *rhs) {
  mod_json_pair_t *itl, *itr;

  if (lhs == rhs) {
    /* The same pointer */
    return MOD_JSON_TRUE;
  }

  if (!lhs || !rhs || ((lhs->last - lhs->first) != (rhs->last - rhs->first))) {
    return MOD_JSON_FALSE;
  }

  /* compare items */
  for (itl = lhs->first, itr = rhs->first; itl != lhs->last; ++itl, ++itr) {
    if ((mod_json_string_compare(itl->key, itr->key) != 0) ||
        (!mod_json_value_is_equal(itl->val, itr->val))) {
      return MOD_JSON_FALSE;
    }
  }
  return MOD_JSON_TRUE;
}

void mod_json_object_erase(mod_json_object_t *obj, mod_json_cchar_t *key) {
  if (obj && key) {
    mod_json_string_t str;
    mod_json_pair_t *iter;
    mod_json_size_t npos;

    str.first = (mod_json_char_t *)key;
    str.last = str.first + mod_json_utils_strlen(key);

    iter = mod_json_object_find_pair(obj, &str, &npos);
    if (iter) {
      mod_json_pair_cleanup(iter++);

      for (; iter != obj->last; ++iter) {
        mod_json_pair_t *prev = iter - 1;
        prev->key = iter->key;
        prev->val = iter->val;
      }
      --obj->last;
    }
  }
}

mod_json_value_t *mod_json_object_at(mod_json_object_t *obj,
                                     mod_json_cchar_t *key) {
  if (obj && key) {
    mod_json_string_t str;
    mod_json_pair_t *elem;
    mod_json_size_t npos;

    str.first = (mod_json_char_t *)key;
    str.last = str.first + mod_json_utils_strlen(key);

    elem = mod_json_object_find_pair(obj, &str, &npos);
    if (elem) {
      return (elem->val);
    }
  }
  return NULL;
}

mod_json_pair_t *mod_json_object_find(mod_json_object_t *obj,
                                      mod_json_cchar_t *key) {
  if (obj && key) {
    mod_json_string_t str;
    mod_json_size_t npos;

    str.first = (mod_json_char_t *)key;
    str.last = str.first + mod_json_utils_strlen(key);

    return mod_json_object_find_pair(obj, &str, &npos);
  }
  return NULL;
}

int mod_json_object_merge(mod_json_object_t *dst, mod_json_object_t *src) {
  mod_json_pair_t *iter;

  mod_json_minus_if_false(dst && src && dst != src);

  for (iter = src->first; iter != src->last; ++iter) {
    mod_json_pair_t *elem;
    mod_json_size_t npos;

    elem = mod_json_object_find_pair(dst, iter->key, &npos);
    if (!elem) {
      /* insert a new one */
      mod_json_object_insert_force(dst, npos, iter->key, iter->val);
      continue;
    }

    if (!elem->val) {
      elem->val = iter->val ? mod_json_value_grab(iter->val) : NULL;
      continue;
    }

    if (mod_json_value_is_shared(elem->val)) {
      mod_json_value_put(elem->val);
      elem->val = mod_json_value_clone(elem->val);
    }
    mod_json_value_merge(elem->val, iter->val);
  }
  return 0;
}

static inline mod_json_cchar_t *mod_json_token_strskp(mod_json_token_t *tok,
                                                      mod_json_cchar_t *cstr) {
  if ((tok->options & MOD_JSON_COMMENT) == 0) {
    return mod_json_utils_strskpb(cstr);
  }
  return mod_json_utils_strskp(cstr);
}

static inline mod_json_cchar_t *mod_json_token_strfquo(mod_json_token_t *tok,
                                                       mod_json_cchar_t *cstr,
                                                       mod_json_char_t quo) {
  if ((tok->options & MOD_JSON_UNSTRICT) == 0) {
    return mod_json_utils_strfquo(cstr, quo);
  }
  return mod_json_utils_strfquo2(cstr, quo);
}

static inline mod_json_cchar_t *mod_json_token_strfsep(mod_json_token_t *tok,
                                                       mod_json_cchar_t *cstr) {
  if ((tok->options & MOD_JSON_COMMENT) == 0) {
    return mod_json_utils_strfsep(cstr);
  }
  return mod_json_utils_strfsep2(cstr);
}

mod_json_token_t *mod_json_token_create(mod_json_option_t *opt) {
  mod_json_token_t *tok;
  mod_json_size_t opts = MOD_JSON_TOKEN_DEFOPTS;
  mod_json_size_t mobj = MOD_JSON_TOKEN_DEFOBJDEP;
  mod_json_size_t marr = MOD_JSON_TOKEN_DEFARRDEP;

  if (opt) {
    opts = opt->options;

    if (opt->object_depth > 0) {
      mobj = opt->object_depth;
    }
    if (opt->array_depth > 0) {
      marr = opt->array_depth;
    }
  }

  tok = (mod_json_token_t *)mod_json_malloc(
      (mobj + marr) * sizeof(mod_json_char_t) + sizeof(mod_json_token_t));
  mod_json_null_if_false(tok);

  memset(tok, 0, sizeof(mod_json_token_t));
  tok->state = mod_json_state_null;
  tok->error = mod_json_error_null;
  tok->options = opts;
  tok->object_max_depth = mobj;
  tok->array_max_depth = marr;
  return tok;
}

void mod_json_token_destroy(mod_json_token_t *tok) {
  mod_json_free(tok);
}

static inline void mod_json_token_set_tag(mod_json_token_t *tok,
                                          mod_json_char_t tag) {
  mod_json_size_t depth = tok->object_depth + tok->array_depth;
  if (depth != 0) {
    tok->tags[depth - 1] = tag;
  }
}

static inline mod_json_char_t mod_json_token_tag(mod_json_token_t *tok) {
  mod_json_size_t depth = tok->object_depth + tok->array_depth;

  /* type of current depth */
  return (depth ? tok->tags[depth - 1] : (mod_json_char_t)-1);
}

mod_json_error_t mod_json_token_error(mod_json_token_t *tok) {
  return (tok->error);
}

mod_json_cchar_t *mod_json_token_context(mod_json_token_t *tok) {
  return (tok->context);
}

mod_json_state_t mod_json_token_state(mod_json_token_t *tok) {
  return (tok->state);
}

mod_json_size_t mod_json_token_object_depth(mod_json_token_t *tok) {
  return (tok->object_depth);
}

mod_json_size_t mod_json_token_array_depth(mod_json_token_t *tok) {
  return (tok->array_depth);
}

mod_json_size_t mod_json_token_depth(mod_json_token_t *tok) {
  return (tok->object_depth + tok->array_depth);
}

mod_json_size_t mod_json_token_max_object_depth(mod_json_token_t *tok) {
  return (tok->object_max_depth);
}

mod_json_size_t mod_json_token_max_array_depth(mod_json_token_t *tok) {
  return (tok->array_max_depth);
}

mod_json_size_t mod_json_token_max_depth(mod_json_token_t *tok) {
  return (tok->object_max_depth + tok->array_max_depth);
}

mod_json_void_t *mod_json_token_param(mod_json_token_t *tok) {
  return (tok->param);
}

void mod_json_token_set_param(mod_json_token_t *tok, mod_json_void_t *param) {
  tok->param = param;
}

void mod_json_token_set_event(mod_json_token_t *tok, mod_json_event_proc proc) {
  tok->event_proc = proc;
}

mod_json_event_t mod_json_token_event(mod_json_token_t *tok) {
  return (tok->event_code);
}

static inline int mod_json_token_invoke_field(mod_json_token_t *tok,
                                              mod_json_cchar_t *val,
                                              mod_json_size_t len) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_field;
    return invoke(tok, (mod_json_void_t *)val, len);
  }
  return 0;
}

static inline int mod_json_token_invoke_object(mod_json_token_t *tok) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_object;
    return invoke(tok, NULL, 0);
  }
  return 0;
}

static inline int mod_json_token_invoke_array(mod_json_token_t *tok) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_array;
    return invoke(tok, NULL, 0);
  }
  return 0;
}

static inline int mod_json_token_invoke_null(mod_json_token_t *tok) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_null;
    return invoke(tok, NULL, 0);
  }
  return 0;
}

static inline int mod_json_token_invoke_boolean(mod_json_token_t *tok,
                                                mod_json_boolean_t val) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_boolean;
    return invoke(tok, &val, sizeof(val));
  }
  return 0;
}

static inline int mod_json_token_invoke_integer(mod_json_token_t *tok,
                                                mod_json_integer_t val) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_integer;
    return invoke(tok, &val, sizeof(val));
  }
  return 0;
}

static inline int mod_json_token_invoke_float(mod_json_token_t *tok,
                                              mod_json_float_t val) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_float;
    return invoke(tok, &val, sizeof(val));
  }
  return 0;
}

static inline int mod_json_token_invoke_string(mod_json_token_t *tok,
                                               mod_json_cchar_t *val,
                                               mod_json_size_t len) {
  mod_json_event_proc invoke = tok->event_proc;
  if (invoke) {
    tok->event_code = mod_json_event_string;
    return invoke(tok, (mod_json_void_t *)val, len);
  }
  return 0;
}

static inline mod_json_cchar_t *mod_json_token_start(mod_json_token_t *tok,
                                                     mod_json_cchar_t *cstr) {
  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case '{':
      tok->state = mod_json_state_object_start;
      return (cstr + 1);

    case '[':
      tok->state = mod_json_state_array_start;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_empty;
      tok->context = cstr;
      break;

    default:
      tok->error = mod_json_error_start;
      tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_value_null(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  mod_json_char_t c1 = *(cstr + 1);
  mod_json_char_t c2 = *(cstr + 2);
  mod_json_char_t c3 = *(cstr + 3);

  if ((c1 != 'u' && c1 != 'U') || (c2 != 'l' && c2 != 'L') ||
      (c3 != 'l' && c3 != 'L')) {
    tok->error = mod_json_error_value;
    tok->context = cstr;
    return NULL;
  }

  if (mod_json_token_invoke_null(tok) != 0) {
    tok->error = mod_json_error_break;
    tok->context = cstr;
    return NULL;
  }
  return (cstr + 4);
}

static inline mod_json_cchar_t *mod_json_token_value_true(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  mod_json_char_t c1 = *(cstr + 1);
  mod_json_char_t c2 = *(cstr + 2);
  mod_json_char_t c3 = *(cstr + 3);

  if ((c1 != 'r' && c1 != 'R') || (c2 != 'u' && c2 != 'U') ||
      (c3 != 'e' && c3 != 'E')) {
    tok->error = mod_json_error_value;
    tok->context = cstr;
    return NULL;
  }

  if (mod_json_token_invoke_boolean(tok, MOD_JSON_TRUE) != 0) {
    tok->error = mod_json_error_break;
    tok->context = cstr;
    return NULL;
  }
  return (cstr + 4);
}

static inline mod_json_cchar_t *mod_json_token_value_false(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  mod_json_char_t c1 = *(cstr + 1);
  mod_json_char_t c2 = *(cstr + 2);
  mod_json_char_t c3 = *(cstr + 3);
  mod_json_char_t c4 = *(cstr + 4);

  if ((c1 != 'a' && c1 != 'A') || (c2 != 'l' && c2 != 'L') ||
      (c3 != 's' && c3 != 'S') || (c4 != 'e' && c4 != 'E')) {
    tok->error = mod_json_error_value;
    tok->context = cstr;
    return NULL;
  }

  if (mod_json_token_invoke_boolean(tok, MOD_JSON_FALSE) != 0) {
    tok->error = mod_json_error_break;
    tok->context = cstr;
    return NULL;
  }
  return (cstr + 5);
}

static inline mod_json_cchar_t *mod_json_token_value_infinity(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  mod_json_char_t c1 = *(cstr + 1);
  mod_json_char_t c2 = *(cstr + 2);

  if ((c1 != 'n' && c1 != 'N') || (c2 != 'f' && c2 != 'F')) {
    tok->error = mod_json_error_value;
    tok->context = cstr;
    return NULL;
  }

  if (mod_json_token_invoke_float(tok, MOD_JSON_INFINITY) != 0) {
    tok->error = mod_json_error_break;
    tok->context = cstr;
    return NULL;
  }
  return (cstr + 3);
}

static inline mod_json_cchar_t *mod_json_token_value_string(
    mod_json_token_t *tok, mod_json_cchar_t *cstr, mod_json_char_t quo) {
  mod_json_cchar_t *cstr2 = mod_json_token_strfquo(tok, ++cstr, quo);
  if (!cstr2) {
    tok->error = mod_json_error_quote;
    tok->context = cstr;
    return NULL;
  }

  if (mod_json_token_invoke_string(tok, cstr,
                                   (mod_json_size_t)(cstr2 - cstr)) != 0) {
    tok->error = mod_json_error_break;
    tok->context = cstr;
    return NULL;
  }
  return (cstr2 + 1);
}

static inline mod_json_cchar_t *mod_json_token_value_number(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  enum { number_integer, number_float } num_type = number_integer;

  mod_json_float_t dbl = 0.0;
  uint32_t dig = 0;
  uint64_t u64 = 0;
  int32_t minus = 0;
  int32_t exp_frac = 0, exp = 0;

  /* Parse minus */
  minus = *cstr;
  if (minus == '-' || minus == '+') {
    ++cstr;
  }

  /* The first digit */
  if ((dig = (uint32_t)(*cstr - '0')) > 9) {
    return NULL;
  }

  /* Save the first digit */
  u64 = dig;

  /* Parse as 64bit integer */
  if (minus != '-') {
    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {
      if (u64 >= 1844674407370955161uLL) {
        /* 2^64 - 1 = 18446744073709551615 */
        if (u64 != 1844674407370955161uLL || dig > 5) {
          dbl = (mod_json_float_t)u64 * 10 + dig;
          num_type = number_float;
          break;
        }
      }
      u64 = u64 * 10 + dig;
    }
  } else {
    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {
      /* 2^63 = 9223372036854775808 */
      if (u64 >= 922337203685477580uLL) {
        if (u64 != 922337203685477580uLL || dig > 8) {
          dbl = (mod_json_float_t)u64 * 10 + dig;
          num_type = number_float;
          break;
        }
      }
      u64 = u64 * 10 + dig;
    }
  }

  /* Force double for big integer */
  if (num_type == number_float) {
    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {
      if (dbl >= 1E307) {
        /* Number too big to store in double */
        return NULL;
      }
      dbl = dbl * 10 + dig;
    }
  }

  /* Parse frac = decimal-point 1*DIGIT */
  if (*cstr == '.') {
    if (num_type != number_float) {
      dbl = (mod_json_float_t)u64;
      num_type = number_float;
    }

    if ((dig = (uint32_t)(*(++cstr) - '0')) > 9) {
      /* At least one digit in fraction part */
      return NULL;
    }

    dbl = dbl * 10 + dig;
    --exp_frac;

    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {
      if (exp_frac > -16) {
        dbl = dbl * 10 + dig;
        --exp_frac;
      }
    }
  }

  /* Parse exp = e [ minus / plus ] 1*DIGIT */
  if (*cstr == 'e' || *cstr == 'E') {
    int32_t exp_minus = 0;

    if (num_type != number_float) {
      dbl = (mod_json_float_t)u64;
      num_type = number_float;
    }

    exp_minus = *(++cstr);
    if (exp_minus == '-' || exp_minus == '+') {
      ++cstr;
    }

    /* The first number char after 'e/E' */
    if ((dig = (uint32_t)(*cstr - '0')) > 9) {
      return NULL;
    }
    exp = (int32_t)dig;

    while ((dig = (uint32_t)(*(++cstr) - '0')) <= 9) {
      exp = exp * 10 + (int32_t)dig;
      if (exp > 308) {
        /* Number too big to store in double */
        return NULL;
      }
    }

    if (exp_minus == '-') {
      exp = -exp;
    }
  }

  /* Finish parsing, call event according to the type of number. */
  if (num_type == number_float) {
    dbl *= mod_json_utils_pow10(exp + exp_frac);
    if (minus == '-') {
      dbl = -dbl;
    }
    if (mod_json_token_invoke_float(tok, dbl) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }
  } else {
    if (minus == '-') {
      u64 = (uint64_t)(-(int64_t)u64);
    }
    if (mod_json_token_invoke_integer(tok, (mod_json_integer_t)u64) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }
  }
  return cstr;
}

static inline mod_json_cchar_t *mod_json_token_array_start(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  if (tok->array_depth < tok->array_max_depth) {
    /* callback */
    if (mod_json_token_invoke_array(tok) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    /* increase depth */
    ++tok->array_depth;

    /* push current tag */
    mod_json_token_set_tag(tok, '[');

    cstr = mod_json_token_strskp(tok, cstr);
    switch (*cstr) {
      case '[':
        tok->state = mod_json_state_array_start;
        return (cstr + 1);

      case ']':
        tok->state = mod_json_state_array_finish;
        return (cstr + 1);

      case '\0':
        tok->error = mod_json_error_trunc;
        tok->context = cstr;
        break;

      default:
        tok->state = mod_json_state_array_half;
        return (cstr);
    }
  } else {
    tok->error = mod_json_error_depth;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_array_half(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case ',':
      tok->state = mod_json_state_array_half;
      return (cstr + 1);

    case '[':
      tok->state = mod_json_state_array_start;
      return (cstr + 1);

    case ']':
      tok->state = mod_json_state_array_finish;
      return (cstr + 1);

    case '{':
      tok->state = mod_json_state_object_start;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_trunc;
      tok->context = cstr;
      return NULL;

    /* value in array */
    case 't':
    case 'T':
      cstr = mod_json_token_value_true(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case 'f':
    case 'F':
      cstr = mod_json_token_value_false(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case 'n':
    case 'N':
      cstr = mod_json_token_value_null(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case '+':
    case '-':
      cstr = mod_json_token_value_number(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case '\"':
      cstr = mod_json_token_value_string(tok, cstr, '\"');
      if (!cstr) {
        return NULL;
      }
      break;

    case '\'':
      if (tok->options & MOD_JSON_SQUOTE) {
        cstr = mod_json_token_value_string(tok, cstr, '\'');
        if (!cstr) {
          return NULL;
        }
        break;
      }
      /* FALLTHRU */

    default:
      tok->error = mod_json_error_value;
      tok->context = cstr;
      return NULL;
  }

  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case ',':
      tok->state = mod_json_state_array_half;
      return (cstr + 1);

    case ']':
      tok->state = mod_json_state_array_finish;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_trunc;
      tok->context = cstr;
      break;

    default:
      tok->error = mod_json_error_value;
      tok->context = cstr;
      break;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_array_finish(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  if (tok->array_depth) {
    /* decrease depth */
    --tok->array_depth;

    /* callback */
    if (mod_json_token_invoke_array(tok) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    cstr = mod_json_token_strskp(tok, cstr);
    switch (*cstr) {
      case ']':
        tok->state = mod_json_state_array_finish;
        return (cstr + 1);

      case '}':
        tok->state = mod_json_state_object_finish;
        return (cstr + 1);

      case '\0':
        if (tok->object_depth || tok->array_depth) {
          tok->error = mod_json_error_trunc;
          tok->context = cstr;
        } else {
          tok->state = mod_json_state_finish;
        }
        break;

      case ',':
        if (tok->object_depth || tok->array_depth) {
          mod_json_char_t tag = mod_json_token_tag(tok);

          if (tag == '{') {
            tok->state = mod_json_state_object_half1;
            return (cstr + 1);
          } else if (tag == '[') {
            tok->state = mod_json_state_array_half;
            return (cstr + 1);
          }
        }
        /* FALLTHRU */

      default:
        tok->error = mod_json_error_array;
        tok->context = cstr;
    }
  } else {
    tok->error = mod_json_error_depth;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_start(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  if (tok->object_depth < tok->object_max_depth) {
    /* callback */
    if (mod_json_token_invoke_object(tok) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    /* increase depth */
    ++tok->object_depth;

    /* push current tag */
    mod_json_token_set_tag(tok, '{');

    cstr = mod_json_token_strskp(tok, cstr);
    switch (*cstr) {
      case '}':
        tok->state = mod_json_state_object_finish;
        return (cstr + 1);

      case '\0':
        tok->error = mod_json_error_trunc;
        tok->context = cstr;
        break;

      default:
        tok->state = mod_json_state_object_half1;
        return (cstr);
    }
  } else {
    tok->error = mod_json_error_depth;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_quotekey(
    mod_json_token_t *tok, mod_json_cchar_t *cstr, mod_json_char_t quo) {
  mod_json_cchar_t *cstr2 = mod_json_token_strfquo(tok, ++cstr, quo);
  if (cstr2) {
    /* callback */
    if (mod_json_token_invoke_field(tok, cstr,
                                    (mod_json_size_t)(cstr2 - cstr)) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    cstr2 = mod_json_token_strskp(tok, ++cstr2);
    switch (*cstr2) {
      case ':':
        tok->state = mod_json_state_object_half2;
        return (cstr2 + 1);

      case '\0':
        tok->error = mod_json_error_trunc;
        tok->context = cstr;
        break;

      default:
        tok->error = mod_json_error_key;
        tok->context = cstr2;
        break;
    }
  } else {
    tok->error = mod_json_error_quote;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_simplekey(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  mod_json_cchar_t *cstr2 = mod_json_token_strfsep(tok, cstr);
  if (cstr2 != cstr) {
    /* callback */
    if (mod_json_token_invoke_field(tok, cstr,
                                    (mod_json_size_t)(cstr2 - cstr)) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    cstr2 = mod_json_token_strskp(tok, cstr2);
    switch (*cstr2) {
      case ':':
        tok->state = mod_json_state_object_half2;
        return (cstr2 + 1);

      case '\0':
        tok->error = mod_json_error_trunc;
        tok->context = cstr;
        break;

      default:
        tok->error = mod_json_error_key;
        tok->context = cstr2;
        break;
    }
  } else {
    tok->error = mod_json_error_key;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_half1(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case ',':
      tok->state = mod_json_state_object_half1;
      return (cstr + 1);

    case '}':
      tok->state = mod_json_state_object_finish;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_trunc;
      tok->context = cstr;
      break;

    case '\"':
      /* The key with double quotes */
      return mod_json_token_object_quotekey(tok, cstr, '\"');

    case '\'':
      if (tok->options & MOD_JSON_SQUOTE) {
        /* The key with single quotes */
        return mod_json_token_object_quotekey(tok, cstr, '\'');
      }
      /* FALLTHRU */

    default:
      /* support simple format? */
      if (tok->options & MOD_JSON_SIMPLE) {
        return mod_json_token_object_simplekey(tok, cstr);
      } else {
        tok->error = mod_json_error_quote;
        tok->context = cstr;
      }
      break;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_half2(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case '{':
      tok->state = mod_json_state_object_start;
      return (cstr + 1);

    case '[':
      tok->state = mod_json_state_array_start;
      return (cstr + 1);

    case ',':
      tok->state = mod_json_state_object_half1;
      return (cstr + 1);

    case '}':
      tok->state = mod_json_state_object_finish;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_trunc;
      tok->context = cstr;
      return NULL;

    case 't':
    case 'T':
      cstr = mod_json_token_value_true(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case 'f':
    case 'F':
      cstr = mod_json_token_value_false(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case 'i':
    case 'I':
      cstr = mod_json_token_value_infinity(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case 'n':
    case 'N':
      cstr = mod_json_token_value_null(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
    case '+':
    case '-':
      cstr = mod_json_token_value_number(tok, cstr);
      if (!cstr) {
        return NULL;
      }
      break;

    case '\"':
      cstr = mod_json_token_value_string(tok, cstr, '\"');
      if (!cstr) {
        return NULL;
      }
      break;

    case '\'':
      if (tok->options & MOD_JSON_SQUOTE) {
        cstr = mod_json_token_value_string(tok, cstr, '\'');
        if (!cstr) {
          return NULL;
        }
        break;
      }
      /* FALLTHRU */

    default:
      tok->error = mod_json_error_value;
      tok->context = cstr;
      return NULL;
  }

  cstr = mod_json_token_strskp(tok, cstr);
  switch (*cstr) {
    case ',':
      tok->state = mod_json_state_object_half1;
      return (cstr + 1);

    case '}':
      tok->state = mod_json_state_object_finish;
      return (cstr + 1);

    case '\0':
      tok->error = mod_json_error_trunc;
      tok->context = cstr;
      break;

    default:
      tok->error = mod_json_error_value;
      tok->context = cstr;
      break;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_object_finish(
    mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  if (tok->object_depth) {
    /* decrease depth */
    --tok->object_depth;

    /* callback */
    if (mod_json_token_invoke_object(tok) != 0) {
      tok->error = mod_json_error_break;
      tok->context = cstr;
      return NULL;
    }

    cstr = mod_json_token_strskp(tok, cstr);
    switch (*cstr) {
      case '}':
        tok->state = mod_json_state_object_finish;
        return (cstr + 1);

      case ']':
        tok->state = mod_json_state_array_finish;
        return (cstr + 1);

      case '\0':
        if (tok->object_depth || tok->array_depth) {
          tok->error = mod_json_error_trunc;
          tok->context = cstr;
        } else {
          tok->state = mod_json_state_finish;
        }
        break;

      case ',':
        if (tok->object_depth || tok->array_depth) {
          mod_json_char_t tag = mod_json_token_tag(tok);

          if (tag == '{') {
            tok->state = mod_json_state_object_half1;
            return (cstr + 1);
          } else if (tag == '[') {
            tok->state = mod_json_state_array_half;
            return (cstr + 1);
          }
        }
        /* FALLTHRU */

      default:
        tok->error = mod_json_error_object;
        tok->context = cstr;
    }
  } else {
    tok->error = mod_json_error_depth;
    tok->context = cstr;
  }
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_null(mod_json_token_t *tok,
                                                    mod_json_cchar_t *cstr) {
  if (!cstr || *cstr == '\0') {
    tok->error = mod_json_error_invalid;
    tok->context = cstr;
    return NULL;
  }

  tok->state = mod_json_state_start;
  return cstr;
}

static inline mod_json_cchar_t *mod_json_token_finish(mod_json_token_t *tok,
                                                      mod_json_cchar_t *cstr) {
  tok->error = mod_json_error_null;
  (void)cstr;
  return NULL;
}

static inline mod_json_cchar_t *mod_json_token_default(mod_json_token_t *tok,
                                                       mod_json_cchar_t *cstr) {
  tok->error = mod_json_error_state;
  tok->context = cstr;
  return NULL;
}

int mod_json_token_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr) {
  while (cstr) {
    switch (tok->state) {
      case mod_json_state_start:
        cstr = mod_json_token_start(tok, cstr);
        break;

      case mod_json_state_array_start:
        cstr = mod_json_token_array_start(tok, cstr);
        break;

      case mod_json_state_array_half:
        cstr = mod_json_token_array_half(tok, cstr);
        break;

      case mod_json_state_array_finish:
        cstr = mod_json_token_array_finish(tok, cstr);
        break;

      case mod_json_state_object_start:
        cstr = mod_json_token_object_start(tok, cstr);
        break;

      case mod_json_state_object_half1:
        cstr = mod_json_token_object_half1(tok, cstr);
        break;

      case mod_json_state_object_half2:
        cstr = mod_json_token_object_half2(tok, cstr);
        break;

      case mod_json_state_object_finish:
        cstr = mod_json_token_object_finish(tok, cstr);
        break;

      case mod_json_state_null:
        cstr = mod_json_token_null(tok, cstr);
        break;

      case mod_json_state_finish:
        cstr = mod_json_token_finish(tok, cstr);
        break;

      default:
        cstr = mod_json_token_default(tok, cstr);
        break;
    }
  }
  return (tok->error == mod_json_error_null ? 0 : -1);
}

static inline int mod_json_parser_insert(mod_json_parser_t *par,
                                         mod_json_size_t depth,
                                         mod_json_value_t *val) {
  if (depth > 0) {
    mod_json_value_t *cur = par->vals[depth - 1];

    switch (cur->type) {
      case mod_json_type_object:
        return (mod_json_object_insert(cur->data.c_obj, par->key, val) ? 0
                                                                       : -1);

      case mod_json_type_array:
        return mod_json_array_push(cur->data.c_arr, val);

      default:
        break;
    }
  }
  return -1;
}

static inline int mod_json_parser_insert_object(mod_json_parser_t *par,
                                                mod_json_size_t depth) {
  mod_json_object_t *obj;
  mod_json_value_t *jval;

  obj = mod_json_object_set_default();
  mod_json_minus_if_false(obj);

  jval = mod_json_value_set_object(obj);
  mod_json_object_unset(obj);
  mod_json_minus_if_false(jval);

  if (depth > 0) {
    int ret = mod_json_parser_insert(par, depth, jval);
    if (ret == 0) {
      par->vals[depth] = jval;
    }
    mod_json_value_unset(jval);
    return ret;
  } else {
    /* It's the root, save the pointer. Don't unset it. */
    par->vals[0] = jval;
  }
  return 0;
}

static inline int mod_json_parser_insert_array(mod_json_parser_t *par,
                                               mod_json_size_t depth) {
  mod_json_array_t *arr;
  mod_json_value_t *jval;

  arr = mod_json_array_set_default();
  mod_json_minus_if_false(arr);

  jval = mod_json_value_set_array(arr);
  mod_json_array_unset(arr);
  mod_json_minus_if_false(jval);

  if (depth > 0) {
    int ret = mod_json_parser_insert(par, depth, jval);
    if (ret == 0) {
      par->vals[depth] = jval;
    }
    mod_json_value_unset(jval);
    return ret;
  } else {
    /* It's the root, save the pointer. Don't unset it. */
    par->vals[0] = jval;
  }
  return 0;
}

static inline void mod_json_token_set_parser(mod_json_token_t *tok,
                                             mod_json_parser_t *par) {
  mod_json_token_set_param(tok, par);
}

static inline mod_json_parser_t *mod_json_token_parser(mod_json_token_t *tok) {
  return (mod_json_parser_t *)mod_json_token_param(tok);
}

static inline int mod_json_parser_event_field(mod_json_token_t *tok,
                                              mod_json_cchar_t *val,
                                              mod_json_size_t len) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  /* unset previous one */
  mod_json_string_unset(parser->key);

  parser->key = mod_json_string_set(val, len);
  return (parser->key ? 0 : -1);
}

static inline int mod_json_parser_event_array(mod_json_token_t *tok) {
  switch (mod_json_token_state(tok)) {
    case mod_json_state_array_finish:
      /* continue */
      return 0;

    case mod_json_state_array_start:
      return mod_json_parser_insert_array(mod_json_token_parser(tok),
                                          mod_json_token_depth(tok));

    default:
      break;
  }
  return -1;
}

static inline int mod_json_parser_event_object(mod_json_token_t *tok) {
  switch (mod_json_token_state(tok)) {
    case mod_json_state_object_finish:
      /* continue */
      return 0;

    case mod_json_state_object_start:
      return mod_json_parser_insert_object(mod_json_token_parser(tok),
                                           mod_json_token_depth(tok));

    default:
      break;
  }
  return -1;
}

static inline int mod_json_parser_event_null(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_null) {
    parser->val_null = mod_json_value_set_null();
    mod_json_minus_if_false(parser->val_null);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_null);
}

static inline int mod_json_parser_event_true(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_true) {
    parser->val_true = mod_json_value_set_boolean(MOD_JSON_TRUE);
    mod_json_minus_if_false(parser->val_true);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_true);
}

static inline int mod_json_parser_event_false(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_false) {
    parser->val_false = mod_json_value_set_boolean(MOD_JSON_FALSE);
    mod_json_minus_if_false(parser->val_false);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_false);
}

static inline int mod_json_parser_event_boolean(mod_json_token_t *tok,
                                                mod_json_boolean_t val) {
  if (!val) {
    return mod_json_parser_event_false(tok);
  }
  return mod_json_parser_event_true(tok);
}

static inline int mod_json_parser_event_zero(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_zero) {
    parser->val_zero = mod_json_value_set_integer(0);
    mod_json_minus_if_false(parser->val_zero);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_zero);
}

static inline int mod_json_parser_event_integer(mod_json_token_t *tok,
                                                mod_json_integer_t val) {
  int ret = -1;

  if (val != 0) {
    mod_json_value_t *jval;

    jval = mod_json_value_set_integer(val);
    if (jval) {
      ret = mod_json_parser_insert(mod_json_token_parser(tok),
                                   mod_json_token_depth(tok), jval);
      mod_json_value_unset(jval);
    }
  } else {
    /* zero event */
    ret = mod_json_parser_event_zero(tok);
  }
  return ret;
}

static inline int mod_json_parser_event_zerof(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_zerof) {
    parser->val_zerof = mod_json_value_set_float(0.0);
    mod_json_minus_if_false(parser->val_zerof);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_zerof);
}

static inline int mod_json_parser_event_float(mod_json_token_t *tok,
                                              mod_json_float_t val) {
  int ret = -1;

  if (val != 0.0) {
    mod_json_value_t *jval;

    jval = mod_json_value_set_float(val);
    if (jval) {
      ret = mod_json_parser_insert(mod_json_token_parser(tok),
                                   mod_json_token_depth(tok), jval);
      mod_json_value_unset(jval);
    }
  } else {
    /* zero event */
    ret = mod_json_parser_event_zerof(tok);
  }
  return ret;
}

static inline int mod_json_parser_event_empty(mod_json_token_t *tok) {
  mod_json_parser_t *parser;

  /* get information */
  parser = mod_json_token_parser(tok);

  if (!parser->val_empty) {
    mod_json_string_t *str;

    str = mod_json_string_set("", 0);
    mod_json_minus_if_false(str);

    parser->val_empty = mod_json_value_set_string(str);
    mod_json_string_unset(str);
    mod_json_minus_if_false(parser->val_empty);
  }
  return mod_json_parser_insert(parser, mod_json_token_depth(tok),
                                parser->val_empty);
}

static inline int mod_json_parser_event_string(mod_json_token_t *tok,
                                               mod_json_cchar_t *val,
                                               mod_json_size_t len) {
  int ret = -1;

  if (len > 0) {
    mod_json_string_t *str;
    mod_json_value_t *jval;

    str = mod_json_string_set(val, len);
    if (str) {
      jval = mod_json_value_set_string(str);
    } else {
      jval = NULL;
    }
    mod_json_string_unset(str);

    if (jval) {
      ret = mod_json_parser_insert(mod_json_token_parser(tok),
                                   mod_json_token_depth(tok), jval);
      mod_json_value_unset(jval);
    }
  } else {
    /* empty event */
    ret = mod_json_parser_event_empty(tok);
  }
  return ret;
}

static int mod_json_parser_event(mod_json_token_t *tok, mod_json_void_t *val,
                                 mod_json_size_t len) {
  switch (tok->event_code) {
    case mod_json_event_field:
      return mod_json_parser_event_field(tok, (mod_json_cchar_t *)val, len);

    case mod_json_event_object:
      return mod_json_parser_event_object(tok);

    case mod_json_event_array:
      return mod_json_parser_event_array(tok);

    case mod_json_event_null:
      return mod_json_parser_event_null(tok);

    case mod_json_event_boolean:
      return mod_json_parser_event_boolean(tok, *(mod_json_boolean_t *)val);

    case mod_json_event_integer:
      return mod_json_parser_event_integer(tok, *(mod_json_integer_t *)val);

    case mod_json_event_float:
      return mod_json_parser_event_float(tok, *(mod_json_float_t *)val);

    case mod_json_event_string:
      return mod_json_parser_event_string(tok, (mod_json_cchar_t *)val, len);

    default:
      break;
  }
  return -1;
}

static inline mod_json_parser_t *mod_json_parser_create(mod_json_size_t depth) {
  mod_json_parser_t *parser;
  mod_json_null_if_false(depth > 0);

  parser = (mod_json_parser_t *)mod_json_malloc(
      depth * sizeof(mod_json_value_t *) + sizeof(mod_json_parser_t));
  mod_json_null_if_false(parser);

  memset(parser, 0, sizeof(mod_json_parser_t));
  parser->vals[0] = NULL;
  return parser;
}

static inline void mod_json_parser_destroy(mod_json_parser_t *par) {
  mod_json_value_unset(par->val_null);
  mod_json_value_unset(par->val_true);
  mod_json_value_unset(par->val_false);
  mod_json_value_unset(par->val_zero);
  mod_json_value_unset(par->val_zerof);
  mod_json_value_unset(par->val_empty);
  mod_json_string_unset(par->key);
  mod_json_free(par);
}

mod_json_value_t *mod_json_parse(mod_json_token_t *tok,
                                 mod_json_cchar_t *cstr) {
  mod_json_parser_t *parser;
  mod_json_value_t *root;
  mod_json_null_if_false(tok && cstr && *cstr);

  parser = mod_json_parser_create(mod_json_token_max_depth(tok));
  mod_json_null_if_false(parser);

  mod_json_token_set_parser(tok, parser);
  mod_json_token_set_event(tok, mod_json_parser_event);

  if (mod_json_token_parse(tok, cstr) == 0) {
    root = parser->vals[0];
  } else {
    /* error occur */
    root = NULL;
    mod_json_value_unset(parser->vals[0]);
  }

  /* clean up */
  mod_json_parser_destroy(parser);

  /* success? */
  return root;
}

mod_json_value_t *mod_json_parse_simply(mod_json_cchar_t *cstr,
                                        mod_json_size_t opts) {
  mod_json_value_t *val;
  mod_json_token_t *tok;
  mod_json_option_t opt;

  opt.options = opts;
  opt.object_depth = 0; /* Use default object depth */
  opt.array_depth = 0;  /* Use default array depth */

  tok = mod_json_token_create(&opt);
  mod_json_null_if_false(tok);

  val = mod_json_parse(tok, cstr);
  mod_json_token_destroy(tok);

  /* value of root */
  return val;
}

static inline int mod_json_dump_null(mod_json_string_t *str) {
  return mod_json_string_add_cstr(str, "null", 4);
}

static inline int mod_json_dump_boolean(mod_json_string_t *str,
                                        mod_json_boolean_t bol) {
  if (!bol) {
    return mod_json_string_add_cstr(str, "false", 5);
  }
  return mod_json_string_add_cstr(str, "true", 4);
}

static inline int mod_json_dump_integer(mod_json_string_t *str,
                                        mod_json_integer_t num) {
  mod_json_char_t buf[32];

  return mod_json_string_add_cstr(str, buf, mod_json_utils_itostr(buf, num));
}

static inline int mod_json_dump_float(mod_json_string_t *str,
                                      mod_json_float_t dbl) {
  mod_json_char_t buf[32];

  return mod_json_string_add_cstr(
      str, buf,
      (mod_json_size_t)mod_json_utils_snprintf(buf, sizeof(buf), "%g", dbl));
}

static inline int mod_json_dump_string(mod_json_string_t *str,
                                       mod_json_string_t *val) {
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\"'));

  if (val) {
    mod_json_minus_if_ne_zero(mod_json_string_add_jstr(str, val));
  }
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\"'));
  return 0;
}

static inline int mod_json_dump_value(mod_json_string_t *str,
                                      mod_json_value_t *val);

static inline int mod_json_dump_array(mod_json_string_t *str,
                                      mod_json_array_t *arr) {
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '['));

  if (arr) {
    mod_json_value_t **iter = arr->first;

    for (; iter != arr->last; ++iter) {
      mod_json_minus_if_ne_zero(mod_json_dump_value(str, *iter));
      if (iter + 1 != arr->last) {
        mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ','));
      }
    }
  }
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ']'));
  return 0;
}

static inline int mod_json_dump_key(mod_json_string_t *str,
                                    mod_json_string_t *key) {
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '\"'));
  mod_json_minus_if_ne_zero(mod_json_string_add_jstr(str, key));
  mod_json_minus_if_ne_zero(mod_json_string_add_cstr(str, "\":", 2));
  return 0;
}

static inline int mod_json_dump_object(mod_json_string_t *str,
                                       mod_json_object_t *obj) {
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '{'));

  if (obj) {
    mod_json_pair_t *iter = obj->first;

    for (; iter != obj->last; ++iter) {
      mod_json_minus_if_ne_zero(mod_json_dump_key(str, iter->key));
      mod_json_minus_if_ne_zero(mod_json_dump_value(str, iter->val));

      if (iter + 1 != obj->last) {
        mod_json_minus_if_ne_zero(mod_json_string_add_char(str, ','));
      }
    }
  }
  mod_json_minus_if_ne_zero(mod_json_string_add_char(str, '}'));
  return 0;
}

static inline int mod_json_dump_value(mod_json_string_t *str,
                                      mod_json_value_t *val) {
  if (val) {
    switch (val->type) {
      case mod_json_type_null:
        return mod_json_dump_null(str);

      case mod_json_type_boolean:
        return mod_json_dump_boolean(str, val->data.c_bool);

      case mod_json_type_integer:
        return mod_json_dump_integer(str, val->data.c_int);

      case mod_json_type_float:
        return mod_json_dump_float(str, val->data.c_float);

      case mod_json_type_string:
        return mod_json_dump_string(str, val->data.c_str);

      case mod_json_type_array:
        return mod_json_dump_array(str, val->data.c_arr);

      case mod_json_type_object:
        return mod_json_dump_object(str, val->data.c_obj);

      default:
        return -1;
    }
  }
  return mod_json_dump_null(str);
}

mod_json_string_t *mod_json_dump(mod_json_value_t *val) {
  mod_json_string_t *str = mod_json_string_set("", 0);
  mod_json_null_if_false(str);

  if (mod_json_unlikely(mod_json_dump_value(str, val) != 0)) {
    /* error occur */
    mod_json_string_unset(str);
    return NULL;
  }
  return str;
}


================================================
FILE: src/ailego/hash/crc32c.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/internal/platform.h>

#if !defined(__SSE4_2__) && !defined(__ARM_FEATURE_CRC32)
/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o32[256] = {
    0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C,
    0x26A1E7E8, 0xD4CA64EB, 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
    0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, 0x105EC76F, 0xE235446C,
    0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
    0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC,
    0xBC267848, 0x4E4DFB4B, 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
    0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, 0xAA64D611, 0x580F5512,
    0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
    0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD,
    0x1642AE59, 0xE4292D5A, 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
    0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, 0x417B1DBC, 0xB3109EBF,
    0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
    0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F,
    0xED03A29B, 0x1F682198, 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
    0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, 0xDBFC821C, 0x2997011F,
    0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
    0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E,
    0x4767748A, 0xB50CF789, 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
    0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, 0x7198540D, 0x83F3D70E,
    0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
    0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE,
    0xDDE0EB2A, 0x2F8B6829, 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
    0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, 0x082F63B7, 0xFA44E0B4,
    0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
    0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B,
    0xB4091BFF, 0x466298FC, 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
    0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, 0xA24BB5A6, 0x502036A5,
    0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
    0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975,
    0x0E330A81, 0xFC588982, 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
    0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, 0x38CC2A06, 0xCAA7A905,
    0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
    0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8,
    0xE52CC12C, 0x1747422F, 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
    0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, 0xD3D3E1AB, 0x21B862A8,
    0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
    0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78,
    0x7FAB5E8C, 0x8DC0DD8F, 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
    0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, 0x69E9F0D5, 0x9B8273D6,
    0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
    0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69,
    0xD5CF889D, 0x27A40B9E, 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
    0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o40[256] = {
    0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB,
    0x69CF5132, 0x7A6DC945, 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21,
    0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, 0x3FC5F181, 0x2C6769F6,
    0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4,
    0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92,
    0xCB1E630B, 0xD8BCFB7C, 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B,
    0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, 0xE29F20BA, 0xF13DB8CD,
    0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF,
    0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28,
    0x298143B1, 0x3A23DBC6, 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2,
    0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, 0xFF17C604, 0xECB55E73,
    0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41,
    0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17,
    0x0BCC548E, 0x186ECCF9, 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C,
    0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, 0x5DC6F43D, 0x4E646C4A,
    0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78,
    0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD,
    0xE9537434, 0xFAF1EC43, 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27,
    0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, 0xBF59D487, 0xACFB4CF0,
    0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2,
    0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94,
    0x4B82460D, 0x5820DE7A, 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260,
    0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, 0x66D73941, 0x7575A136,
    0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004,
    0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3,
    0xADC95A4A, 0xBE6BC23D, 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059,
    0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, 0x844819FB, 0x97EA818C,
    0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE,
    0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8,
    0x70938B71, 0x63311306, 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3,
    0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, 0x26992BC2, 0x353BB3B5,
    0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287,
    0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556,
    0x6D1B6DCF, 0x7EB9F5B8, 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC,
    0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, 0x3B11CD7C, 0x28B3550B,
    0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439,
    0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F,
    0xCFCA5FF6, 0xDC68C781, 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766,
    0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, 0xE64B1C47, 0xF5E98430,
    0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502,
    0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5,
    0x2D557F4C, 0x3EF7E73B, 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F,
    0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o48[256] = {
    0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664,
    0xD1B1F617, 0x74F06469, 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6,
    0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, 0x70A27D8A, 0xD5E3EFF4,
    0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3,
    0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B,
    0x9942B558, 0x3C032726, 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67,
    0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, 0xD915C5D1, 0x7C5457AF,
    0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8,
    0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA,
    0x40577089, 0xE516E2F7, 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828,
    0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, 0xC76580D9, 0x622412A7,
    0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0,
    0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878,
    0x2E85480B, 0x8BC4DA75, 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20,
    0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, 0x8F96C396, 0x2AD751E8,
    0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF,
    0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9,
    0xF7908DDA, 0x52D11FA4, 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B,
    0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, 0x56830647, 0xF3C29439,
    0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E,
    0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6,
    0xBF63CE95, 0x1A225CEB, 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730,
    0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, 0xB3764986, 0x1637DBF8,
    0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF,
    0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD,
    0x2A34FCDE, 0x8F756EA0, 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F,
    0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, 0x6A638C57, 0xCF221E29,
    0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E,
    0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6,
    0x83834485, 0x26C2D6FB, 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE,
    0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, 0x2290CF18, 0x87D15D66,
    0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71,
    0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE,
    0x9DF3018D, 0x38B293F3, 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C,
    0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, 0x3CE08A10, 0x99A1186E,
    0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79,
    0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1,
    0xD50042C2, 0x7041D0BC, 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD,
    0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, 0x9557324B, 0x3016A035,
    0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622,
    0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760,
    0x0C158713, 0xA954156D, 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2,
    0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8};

/**
 * The following CRC lookup table was generated automagically
 * using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o56[256] = {
    0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B,
    0xC4451272, 0x1900B8CA, 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF,
    0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, 0xE964B13D, 0x34211B85,
    0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7,
    0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990,
    0xDB65C0A9, 0x06206A11, 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2,
    0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, 0x2161776D, 0xFC24DDD5,
    0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7,
    0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD,
    0xFA04B7C4, 0x27411D7C, 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69,
    0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, 0xABA65FE7, 0x76E3F55F,
    0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D,
    0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A,
    0x99A72E73, 0x44E284CB, 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3,
    0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, 0xB4868D3C, 0x69C32784,
    0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6,
    0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027,
    0xB8C6591E, 0x6583F3A6, 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3,
    0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, 0x95E7FA51, 0x48A250E9,
    0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B,
    0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC,
    0xA7E68BC5, 0x7AA3217D, 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006,
    0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, 0xA4E4AAD9, 0x79A10061,
    0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213,
    0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349,
    0x7F816A70, 0xA2C4C0C8, 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD,
    0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, 0x8585DDB4, 0x58C0770C,
    0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E,
    0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519,
    0xB784AC20, 0x6AC10698, 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0,
    0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, 0x9AA50F6F, 0x47E0A5D7,
    0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5,
    0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93,
    0x3D4384AA, 0xE0062E12, 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07,
    0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, 0x106227E5, 0xCD278D5D,
    0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F,
    0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48,
    0x22635671, 0xFF26FCC9, 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A,
    0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, 0xD867E1B5, 0x05224B0D,
    0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F,
    0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825,
    0x0302211C, 0xDE478BA4, 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1,
    0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o64[256] = {
    0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C,
    0x906761E8, 0xA8760E44, 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65,
    0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, 0x8F2261D3, 0xB7330E7F,
    0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97,
    0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E,
    0xDA220BAA, 0xE2336406, 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3,
    0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, 0xDECFBEC6, 0xE6DED16A,
    0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082,
    0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598,
    0x04EDB56C, 0x3CFCDAC0, 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1,
    0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, 0x37516AAE, 0x0F400502,
    0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA,
    0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023,
    0x625100D7, 0x5A406F7B, 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89,
    0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, 0x7D1400EC, 0x45056F40,
    0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8,
    0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5,
    0xBC9EBE11, 0x848FD1BD, 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C,
    0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, 0xA3DBBE2A, 0x9BCAD186,
    0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E,
    0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7,
    0xF6DBD453, 0xCECABBFF, 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8,
    0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, 0xABC5DECD, 0x93D4B161,
    0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089,
    0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593,
    0x71E7D567, 0x49F6BACB, 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA,
    0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, 0x750A600B, 0x4D1B0FA7,
    0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F,
    0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86,
    0x200A0A72, 0x181B65DE, 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C,
    0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, 0x3F4F0A49, 0x075E65E5,
    0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D,
    0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE,
    0xC994DE1A, 0xF185B1B6, 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497,
    0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, 0xD6D1DE21, 0xEEC0B18D,
    0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065,
    0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC,
    0x83D1B458, 0xBBC0DBF4, 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51,
    0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, 0x873C0134, 0xBF2D6E98,
    0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70,
    0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A,
    0x5D1E0A9E, 0x650F6532, 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013,
    0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o72[256] = {
    0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E,
    0x697997B4, 0x8649FCAD, 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5,
    0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, 0xC00C303E, 0x2F3C5B27,
    0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93,
    0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F,
    0xC973BF95, 0x2643D48C, 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57,
    0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, 0xE5F20E92, 0x0AC2658B,
    0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F,
    0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD,
    0x2C81B107, 0xC3B1DA1E, 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576,
    0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, 0x0E045BEB, 0xE13430F2,
    0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746,
    0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A,
    0x077BD440, 0xE84BBF59, 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F,
    0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, 0xAE0E73CA, 0x413E18D3,
    0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67,
    0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108,
    0xE289DAD2, 0x0DB9B1CB, 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3,
    0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, 0x4BFC7D58, 0xA4CC1641,
    0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5,
    0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929,
    0x4283F2F3, 0xADB399EA, 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C,
    0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, 0x7C0EAFC9, 0x933EC4D0,
    0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364,
    0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86,
    0xB57D105C, 0x5A4D7B45, 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D,
    0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, 0x99FCA15B, 0x76CCCA42,
    0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6,
    0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A,
    0x90832EF0, 0x7FB345E9, 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF,
    0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, 0x39F6897A, 0xD6C6E263,
    0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7,
    0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053,
    0x7B757B89, 0x94451090, 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8,
    0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, 0xD200DC03, 0x3D30B71A,
    0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE,
    0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872,
    0xDB7F53A8, 0x344F38B1, 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A,
    0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, 0xF7FEE2AF, 0x18CE89B6,
    0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02,
    0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0,
    0x3E8D5D3A, 0xD1BD3623, 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B,
    0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o80[256] = {
    0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919,
    0x75E69C41, 0x1DE5B089, 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B,
    0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, 0x9C5BFAA6, 0xF458D66E,
    0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F,
    0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC,
    0xA7909BB4, 0xCF93B77C, 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5,
    0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, 0x73767EEE, 0x1B755226,
    0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67,
    0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002,
    0xD4E6E55A, 0xBCE5C992, 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110,
    0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, 0x7AB7077A, 0x12B42BB2,
    0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3,
    0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330,
    0x417C6668, 0x297F4AA0, 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884,
    0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, 0xA8C1008F, 0xC0C22C47,
    0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006,
    0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE,
    0x320A1886, 0x5A09344E, 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC,
    0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, 0xDBB77E61, 0xB3B452A9,
    0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8,
    0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B,
    0xE07C1F73, 0x887F33BB, 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC,
    0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, 0xBB43F3A7, 0xD340DF6F,
    0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E,
    0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B,
    0x1CD36813, 0x74D044DB, 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59,
    0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, 0xC8358D49, 0xA036A181,
    0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0,
    0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903,
    0xF3FEEC5B, 0x9BFDC093, 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7,
    0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, 0x1A438ABC, 0x7240A674,
    0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35,
    0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097,
    0xFA3F95CF, 0x923CB907, 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185,
    0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, 0x1382F328, 0x7B81DFE0,
    0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1,
    0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762,
    0x2849923A, 0x404ABEF2, 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B,
    0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, 0xFCAF7760, 0x94AC5BA8,
    0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9,
    0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C,
    0x5B3FECD4, 0x333CC01C, 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E,
    0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F};

/**
 *  The following CRC lookup table was generated automagically
 *  using the following model parameters:
 *
 *  Generator Polynomial = ................. 0x1EDC6F41
 *  Generator Polynomial Length = .......... 32 bits
 *  Reflected Bits = ....................... TRUE
 *  Table Generation Offset = .............. 32 bits
 *  Number of Slices = ..................... 8 slices
 *  Slice Lengths = ........................ 8 8 8 8 8 8 8 8
 */
static uint32_t crc_tableil8_o88[256] = {
    0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A,
    0xB3657823, 0xFA590504, 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3,
    0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, 0x847609B4, 0xCD4A7493,
    0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0,
    0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224,
    0x7528754D, 0x3C14086A, 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0,
    0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, 0x4F3B6143, 0x06071C64,
    0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447,
    0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367,
    0x3A13140E, 0x732F6929, 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E,
    0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, 0x1A00CB32, 0x533CB615,
    0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36,
    0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2,
    0xEB5EB7CB, 0xA262CAEC, 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF,
    0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, 0xDC4DC65C, 0x9571BB7B,
    0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358,
    0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1,
    0xA465D688, 0xED59ABAF, 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18,
    0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, 0x9376A71F, 0xDA4ADA38,
    0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B,
    0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F,
    0x6228DBE6, 0x2B14A6C1, 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D,
    0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, 0x763A92BE, 0x3F06EF99,
    0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA,
    0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A,
    0x0312E7F3, 0x4A2E9AD4, 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63,
    0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, 0x3901F3FD, 0x703D8EDA,
    0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9,
    0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D,
    0xC85F8F04, 0x8163F223, 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20,
    0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, 0xFF4CFE93, 0xB67083B4,
    0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97,
    0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C,
    0x9D642575, 0xD4585852, 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5,
    0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, 0xAA7754E2, 0xE34B29C5,
    0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6,
    0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72,
    0x5B29281B, 0x1215553C, 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6,
    0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, 0x613A3C15, 0x28064132,
    0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911,
    0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31,
    0x14124958, 0x5D2E347F, 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8,
    0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5};

/**
 *  Implementations adapted from Intel's Slicing By 8 Sourceforge Project
 *  http://sourceforge.net/projects/slicing-by-8/
 *  http://www.evanjones.ca/crc32c.html
 */
static inline uint32_t crc32c_slicing8(const void *data, size_t len,
                                       uint32_t crc) {
  const uint8_t *p_buf = (const uint8_t *)data;

  /* Handle leading misaligned bytes */
  size_t init_bytes =
      (sizeof(int32_t) - (intptr_t)p_buf) & (sizeof(int32_t) - 1);
  if (len < init_bytes) {
    init_bytes = len;
  }
  for (size_t li = 0; li < init_bytes; li++) {
    crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
  }

  len -= init_bytes;
  size_t running_length = len & ~(sizeof(uint64_t) - 1);
  size_t end_bytes = len - running_length;

  for (size_t li = 0; li < running_length / 8; li++) {
    uint32_t term1, term2;

    crc ^= *(uint32_t *)p_buf;
    p_buf += 4;
    term1 = crc_tableil8_o88[crc & 0x000000FF] ^
            crc_tableil8_o80[(crc >> 8) & 0x000000FF];
    term2 = crc >> 16;
    crc = term1 ^ crc_tableil8_o72[term2 & 0x000000FF] ^
          crc_tableil8_o64[(term2 >> 8) & 0x000000FF];
    term1 = crc_tableil8_o56[(*(uint32_t *)p_buf) & 0x000000FF] ^
            crc_tableil8_o48[((*(uint32_t *)p_buf) >> 8) & 0x000000FF];

    term2 = (*(uint32_t *)p_buf) >> 16;
    crc = crc ^ term1 ^ crc_tableil8_o40[term2 & 0x000000FF] ^
          crc_tableil8_o32[(term2 >> 8) & 0x000000FF];
    p_buf += 4;
  }

  for (size_t li = 0; li < end_bytes; li++) {
    crc = crc_tableil8_o32[(crc ^ *p_buf++) & 0x000000FF] ^ (crc >> 8);
  }
  return crc;
}
#endif  // !__SSE4_2__

#if defined(__SSE4_2__)
#if defined(AILEGO_M64)
static inline uint32_t crc32c_sse42(const void *data, size_t len,
                                    uint32_t crc) {
  const uint8_t *first = (const uint8_t *)data;
  const uint8_t *last = first + ((len >> 3) << 3);

  for (; first != last; first += 8) {
    crc = (uint32_t)_mm_crc32_u64(crc, *(uint64_t *)first);
  }
  switch (((uint8_t *)data + len) - last) {
    case 1:
      crc = _mm_crc32_u8(crc, *last);
      break;
    case 2:
      crc = _mm_crc32_u16(crc, *(uint16_t *)last);
      break;
    case 3:
      crc = _mm_crc32_u16(crc, *(uint16_t *)last);
      crc = _mm_crc32_u8(crc, *(last + 2));
      break;
    case 4:
      crc = _mm_crc32_u32(crc, *(uint32_t *)last);
      break;
    case 5:
      crc = _mm_crc32_u32(crc, *(uint32_t *)last);
      crc = _mm_crc32_u8(crc, *(last + 4));
      break;
    case 6:
      crc = _mm_crc32_u32(crc, *(uint32_t *)last);
      crc = _mm_crc32_u16(crc, *(uint16_t *)(last + 4));
      break;
    case 7:
      crc = _mm_crc32_u32(crc, *(uint32_t *)last);
      crc = _mm_crc32_u16(crc, *(uint16_t *)(last + 4));
      crc = _mm_crc32_u8(crc, *(last + 6));
      break;
  }
  return crc;
}
#else
static inline uint32_t crc32c_sse42(const void *data, size_t len,
                                    uint32_t crc) {
  const uint8_t *first = (const uint8_t *)data;
  const uint8_t *last = first + ((len >> 2) << 2);

  for (; first != last; first += 4) {
    crc = _mm_crc32_u32(crc, *(uint32_t *)first);
  }
  switch (((uint8_t *)data + len) - last) {
    case 1:
      crc = _mm_crc32_u8(crc, *last);
      break;
    case 2:
      crc = _mm_crc32_u16(crc, *(uint16_t *)last);
      break;
    case 3:
      crc = _mm_crc32_u16(crc, *(uint16_t *)last);
      crc = _mm_crc32_u8(crc, *(last + 2));
      break;
  }
  return crc;
}
#endif  // AILEGO_M64
#endif  // __SSE4_2__

#if defined(__ARM_FEATURE_CRC32)
static inline uint32_t crc32c_neon(const void *data, size_t len, uint32_t crc) {
  const uint8_t *first = (const uint8_t *)data;
  const uint8_t *last = first + ((len >> 3) << 3);

  for (; first != last; first += 8) {
    crc = __crc32cd(crc, *(uint64_t *)first);
  }
  switch (((uint8_t *)data + len) - last) {
    case 1:
      crc = __crc32cb(crc, *last);
      break;
    case 2:
      crc = __crc32ch(crc, *(uint16_t *)last);
      break;
    case 3:
      crc = __crc32ch(crc, *(uint16_t *)last);
      crc = __crc32cb(crc, *(last + 2));
      break;
    case 4:
      crc = __crc32cw(crc, *(uint32_t *)last);
      break;
    case 5:
      crc = __crc32cw(crc, *(uint32_t *)last);
      crc = __crc32cb(crc, *(last + 4));
      break;
    case 6:
      crc = __crc32cw(crc, *(uint32_t *)last);
      crc = __crc32ch(crc, *(uint16_t *)(last + 4));
      break;
    case 7:
      crc = __crc32cw(crc, *(uint32_t *)last);
      crc = __crc32ch(crc, *(uint16_t *)(last + 4));
      crc = __crc32cb(crc, *(last + 6));
      break;
  }
  return crc;
}
#endif  // __ARM_FEATURE_CRC32

namespace zvec {
namespace ailego {

uint32_t Crc32c::Hash(const void *data, size_t len, uint32_t crc) {
#if defined(__SSE4_2__)
  return crc32c_sse42(data, len, crc);
#elif defined(__ARM_FEATURE_CRC32)
  return crc32c_neon(data, len, crc);
#else
  return crc32c_slicing8(data, len, crc);
#endif
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/internal/cpu_features.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "cpu_features.h"
#include <cstddef>

#if !defined(_MSC_VER) && !defined(__ARM_ARCH)
#include <cpuid.h>
#endif

namespace zvec {
namespace ailego {
namespace internal {

//
// REFER: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/
//        tree/arch/x86/include/asm/cpufeatures.h
//        https://software.intel.com/sites/default/files/managed/c5/15/
//        architecture-instruction-set-extensions-programming-reference.pdf
//

CpuFeatures::CpuFlags CpuFeatures::flags_;

#if defined(_MSC_VER)
CpuFeatures::CpuFlags::CpuFlags(void)
    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {
  int l1[4] = {0, 0, 0, 0};
  int l7[4] = {0, 0, 0, 0};

  __cpuidex(l1, 1, 0);
  __cpuidex(l7, 7, 0);
  L1_ECX = l1[2];
  L1_EDX = l1[3];
  L7_EBX = l7[1];
  L7_ECX = l7[2];
  L7_EDX = l7[3];
}
#elif !defined(__ARM_ARCH)
CpuFeatures::CpuFlags::CpuFlags(void)
    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {
  uint32_t eax, ebx, ecx, edx;

  if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
    L1_ECX = ecx;
    L1_EDX = edx;
  }
  if (__get_cpuid_max(0, NULL) >= 7) {
    __cpuid_count(7, 0, eax, ebx, ecx, edx);
    L7_EBX = ebx;
    L7_ECX = ecx;
    L7_EDX = edx;
  }
}
#else
CpuFeatures::CpuFlags::CpuFlags(void)
    : L1_ECX(0), L1_EDX(0), L7_EBX(0), L7_ECX(0), L7_EDX(0) {}
#endif

//! 16-bit FP conversions
bool CpuFeatures::F16C(void) {
  return !!(flags_.L1_ECX & (1u << 29));
}

//! Multimedia Extensions
bool CpuFeatures::MMX(void) {
  return !!(flags_.L1_EDX & (1u << 23));
}

//! Streaming SIMD Extensions
bool CpuFeatures::SSE(void) {
  return !!(flags_.L1_EDX & (1u << 25));
}

//! Streaming SIMD Extensions 2
bool CpuFeatures::SSE2(void) {
  return !!(flags_.L1_EDX & (1u << 26));
}

//! Streaming SIMD Extensions 3
bool CpuFeatures::SSE3(void) {
  return !!(flags_.L1_ECX & (1u << 0));
}

//! Supplemental Streaming SIMD Extensions 3
bool CpuFeatures::SSSE3(void) {
  return !!(flags_.L1_ECX & (1u << 9));
}

//! Streaming SIMD Extensions 4.1
bool CpuFeatures::SSE4_1(void) {
  return !!(flags_.L1_ECX & (1u << 19));
}

//! Streaming SIMD Extensions 4.2
bool CpuFeatures::SSE4_2(void) {
  return !!(flags_.L1_ECX & (1u << 20));
}

//! Advanced Vector Extensions
bool CpuFeatures::AVX(void) {
  return !!(flags_.L1_ECX & (1u << 28));
}

//! Advanced Vector Extensions 2
bool CpuFeatures::AVX2(void) {
  return !!(flags_.L7_EBX & (1u << 5));
}

//! AVX-512 Foundation
bool CpuFeatures::AVX512F(void) {
  return !!(flags_.L7_EBX & (1u << 16));
}

//! AVX-512 DQ (Double/Quad granular) Instructions
bool CpuFeatures::AVX512DQ(void) {
  return !!(flags_.L7_EBX & (1u << 17));
}

//! AVX-512 Prefetch
bool CpuFeatures::AVX512PF(void) {
  return !!(flags_.L7_EBX & (1u << 26));
}

//! AVX-512 Exponential and Reciprocal
bool CpuFeatures::AVX512ER(void) {
  return !!(flags_.L7_EBX & (1u << 27));
}

//! AVX-512 Conflict Detection
bool CpuFeatures::AVX512CD(void) {
  return !!(flags_.L7_EBX & (1u << 28));
}

//! AVX-512 BW (Byte/Word granular) Instructions
bool CpuFeatures::AVX512BW(void) {
  return !!(flags_.L7_EBX & (1u << 30));
}

//! AVX-512 VL (128/256 Vector Length) Extensions
bool CpuFeatures::AVX512VL(void) {
  return !!(flags_.L7_EBX & (1u << 31));
}

//! AVX-512 Integer Fused Multiply-Add instructions
bool CpuFeatures::AVX512_IFMA(void) {
  return !!(flags_.L7_EBX & (1u << 21));
}

//! AVX512 Vector Bit Manipulation instructions
bool CpuFeatures::AVX512_VBMI(void) {
  return !!(flags_.L7_ECX & (1u << 1));
}

//! Additional AVX512 Vector Bit Manipulation Instructions
bool CpuFeatures::AVX512_VBMI2(void) {
  return !!(flags_.L7_ECX & (1u << 6));
}

//! Vector Neural Network Instructions
bool CpuFeatures::AVX512_VNNI(void) {
  return !!(flags_.L7_ECX & (1u << 11));
}

//! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions
bool CpuFeatures::AVX512_BITALG(void) {
  return !!(flags_.L7_ECX & (1u << 12));
}

//! POPCNT for vectors of DW/QW
bool CpuFeatures::AVX512_VPOPCNTDQ(void) {
  return !!(flags_.L7_ECX & (1u << 14));
}

//! AVX-512 Neural Network Instructions
bool CpuFeatures::AVX512_4VNNIW(void) {
  return !!(flags_.L7_EDX & (1u << 2));
}

//! AVX-512 Multiply Accumulation Single precision
bool CpuFeatures::AVX512_4FMAPS(void) {
  return !!(flags_.L7_EDX & (1u << 3));
}

//! AVX-512 FP16 instructions
bool CpuFeatures::AVX512_FP16(void) {
  return !!(flags_.L7_EDX & (1u << 23));
}

//! CMPXCHG8 instruction
bool CpuFeatures::CX8(void) {
  return !!(flags_.L1_EDX & (1u << 8));
}

//! CMPXCHG16B instruction
bool CpuFeatures::CX16(void) {
  return !!(flags_.L1_ECX & (1u << 13));
}

//! PCLMULQDQ instruction
bool CpuFeatures::PCLMULQDQ(void) {
  return !!(flags_.L1_ECX & (1u << 1));
}

//! Carry-Less Multiplication Double Quadword
bool CpuFeatures::VPCLMULQDQ(void) {
  return !!(flags_.L7_ECX & (1u << 10));
}

//! CMOV instructions (plus FCMOVcc, FCOMI with FPU)
bool CpuFeatures::CMOV(void) {
  return !!(flags_.L1_EDX & (1u << 15));
}

//! MOVBE instruction
bool CpuFeatures::MOVBE(void) {
  return !!(flags_.L1_ECX & (1u << 22));
}

//! Enhanced REP MOVSB/STOSB instructions
bool CpuFeatures::ERMS(void) {
  return !!(flags_.L7_EBX & (1u << 9));
}

//! POPCNT instruction
bool CpuFeatures::POPCNT(void) {
  return !!(flags_.L1_ECX & (1u << 23));
}

//! XSAVE/XRSTOR/XSETBV/XGETBV instructions
bool CpuFeatures::XSAVE(void) {
  return !!(flags_.L1_ECX & (1u << 26));
}

//! Fused multiply-add
bool CpuFeatures::FMA(void) {
  return !!(flags_.L1_ECX & (1u << 12));
}

//! ADCX and ADOX instructions
bool CpuFeatures::ADX(void) {
  return !!(flags_.L7_EBX & (1u << 19));
}

//! Galois Field New Instructions
bool CpuFeatures::GFNI(void) {
  return !!(flags_.L7_ECX & (1u << 8));
}

//! AES instructions
bool CpuFeatures::AES(void) {
  return !!(flags_.L1_ECX & (1u << 25));
}

//! Vector AES
bool CpuFeatures::VAES(void) {
  return !!(flags_.L7_ECX & (1u << 9));
}

//! RDSEED instruction
bool CpuFeatures::RDSEED(void) {
  return !!(flags_.L7_EBX & (1u << 18));
}

//! RDRAND instruction
bool CpuFeatures::RDRAND(void) {
  return !!(flags_.L1_ECX & (1u << 30));
}

//! SHA1/SHA256 Instruction Extensions
bool CpuFeatures::SHA(void) {
  return !!(flags_.L7_EBX & (1u << 29));
}

//! 1st group bit manipulation extensions
bool CpuFeatures::BMI1(void) {
  return !!(flags_.L7_EBX & (1u << 3));
}

//! 2nd group bit manipulation extensions
bool CpuFeatures::BMI2(void) {
  return !!(flags_.L7_EBX & (1u << 8));
}

//! CLFLUSH instruction
bool CpuFeatures::CLFLUSH(void) {
  return !!(flags_.L1_EDX & (1u << 19));
}

//! CLFLUSHOPT instruction
bool CpuFeatures::CLFLUSHOPT(void) {
  return !!(flags_.L7_EBX & (1u << 23));
}

//! CLWB instruction
bool CpuFeatures::CLWB(void) {
  return !!(flags_.L7_EBX & (1u << 24));
}

//! RDPID instruction
bool CpuFeatures::RDPID(void) {
  return !!(flags_.L7_ECX & (1u << 22));
}

//! Onboard FPU
bool CpuFeatures::FPU(void) {
  return !!(flags_.L1_EDX & (1u << 0));
}

//! Hyper-Threading
bool CpuFeatures::HT(void) {
  return !!(flags_.L1_EDX & (1u << 28));
}

//! Hardware virtualization
bool CpuFeatures::VMX(void) {
  return !!(flags_.L1_ECX & (1u << 5));
}

// ！Running on a hypervisor
bool CpuFeatures::HYPERVISOR(void) {
  return !!(flags_.L1_ECX & (1u << 31));
}

const char *CpuFeatures::Intrinsics(void) {
  return ""
#if defined(__ARM_NEON)
         "Neon"
#if defined(__ARM_FEATURE_CRC32)
         "+CRC"
#endif
#if defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) || \
    defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
         "+FP16"
#endif
#elif defined(__AVX512F__)
         "AVX512F"
#if defined(__AVX512VL__)
         "+AVX512VL"
#endif
#if defined(__AVX512BW__)
         "+AVX512BW"
#endif
#if defined(__AVX512DQ__)
         "+AVX512DQ"
#endif
#if defined(__AVX512CD__)
         "+AVX512CD"
#endif
#if defined(__AVX512ER__)
         "+AVX512ER"
#endif
#if defined(__AVX512PF__)
         "+AVX512PF"
#endif
#if defined(__AVX512IFMA__)
         "+AVX512IFMA"
#endif
#if defined(__AVX512VBMI__)
         "+AVX512VBMI"
#endif
#if defined(__AVX512VBMI2__)
         "+AVX512VBMI2"
#endif
#if defined(__AVX512VNNI__)
         "+AVX512VNNI"
#endif
#if defined(__AVX512BITALG__)
         "+AVX512BITALG"
#endif
#if defined(__AVX512VPOPCNTDQ__)
         "+AVX512VPOPCNTDQ"
#endif
#if defined(__AVX512FP16__)
         "+AVX512FP16"
#endif
#elif defined(__AVX2__)
         "AVX2"
#elif defined(__AVX__)
         "AVX"
#elif defined(__SSE4_2__)
         "SSE4.2"
#elif defined(__SSE4_1__)
         "SSE4.1"
#elif defined(__SSSE3__)
         "SSSE3"
#elif defined(__SSE3__)
         "SSE3"
#elif defined(__SSE2__)
         "SSE2"
#elif defined(__SSE__)
         "SSE"
#elif defined(__MMX__)
         "MMX"
#endif
#if defined(__FMA__)
         "+FMA"
#endif
#if defined(__BMI2__)
         "+BMI2"
#elif defined(__BMI__)
         "+BMI"
#endif
#if defined(__F16C__)
         "+F16C"
#endif
      ;
}

CpuFeatures::StaticFlags CpuFeatures::static_flags_;
}  // namespace internal
}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/internal/cpu_features.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
namespace zvec {
namespace ailego {
namespace internal {

/*! Cpu Features
 */
class CpuFeatures {
 public:
  //! 16-bit FP conversions
  static bool F16C(void);

  //! Multimedia Extensions
  static bool MMX(void);

  //! Streaming SIMD Extensions
  static bool SSE(void);

  //! Streaming SIMD Extensions 2
  static bool SSE2(void);

  //! Streaming SIMD Extensions 3
  static bool SSE3(void);

  //! Supplemental Streaming SIMD Extensions 3
  static bool SSSE3(void);

  //! Streaming SIMD Extensions 4.1
  static bool SSE4_1(void);

  //! Streaming SIMD Extensions 4.2
  static bool SSE4_2(void);

  //! Advanced Vector Extensions
  static bool AVX(void);

  //! Advanced Vector Extensions 2
  static bool AVX2(void);

  //! AVX-512 Foundation
  static bool AVX512F(void);

  //! AVX-512 DQ (Double/Quad granular) Instructions
  static bool AVX512DQ(void);

  //! AVX-512 Prefetch
  static bool AVX512PF(void);

  //! AVX-512 Exponential and Reciprocal
  static bool AVX512ER(void);

  //! AVX-512 Conflict Detection
  static bool AVX512CD(void);

  //! AVX-512 BW (Byte/Word granular) Instructions
  static bool AVX512BW(void);

  //! AVX-512 VL (128/256 Vector Length) Extensions
  static bool AVX512VL(void);

  //! AVX-512 Integer Fused Multiply-Add instructions
  static bool AVX512_IFMA(void);

  //! AVX512 Vector Bit Manipulation instructions
  static bool AVX512_VBMI(void);

  //! Additional AVX512 Vector Bit Manipulation Instructions
  static bool AVX512_VBMI2(void);

  //! Vector Neural Network Instructions
  static bool AVX512_VNNI(void);

  //! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions
  static bool AVX512_BITALG(void);

  //! POPCNT for vectors of DW/QW
  static bool AVX512_VPOPCNTDQ(void);

  //! AVX-512 Neural Network Instructions
  static bool AVX512_4VNNIW(void);

  //! AVX-512 Multiply Accumulation Single precision
  static bool AVX512_4FMAPS(void);

  //! AVX-512 FP16 instructions
  static bool AVX512_FP16(void);

  //! CMPXCHG8 instruction
  static bool CX8(void);

  //! CMPXCHG16B instruction
  static bool CX16(void);

  //! PCLMULQDQ instruction
  static bool PCLMULQDQ(void);

  //! Carry-Less Multiplication Double Quadword
  static bool VPCLMULQDQ(void);

  //! CMOV instructions (plus FCMOVcc, FCOMI with FPU)
  static bool CMOV(void);

  //! MOVBE instruction
  static bool MOVBE(void);

  //! Enhanced REP MOVSB/STOSB instructions
  static bool ERMS(void);

  //! POPCNT instruction
  static bool POPCNT(void);

  //! XSAVE/XRSTOR/XSETBV/XGETBV instructions
  static bool XSAVE(void);

  //! Fused multiply-add
  static bool FMA(void);

  //! ADCX and ADOX instructions
  static bool ADX(void);

  //! Galois Field New Instructions
  static bool GFNI(void);

  //! AES instructions
  static bool AES(void);

  //! Vector AES
  static bool VAES(void);

  //! RDSEED instruction
  static bool RDSEED(void);

  //! RDRAND instruction
  static bool RDRAND(void);

  //! SHA1/SHA256 Instruction Extensions
  static bool SHA(void);

  //! 1st group bit manipulation extensions
  static bool BMI1(void);

  //! 2nd group bit manipulation extensions
  static bool BMI2(void);

  //! CLFLUSH instruction
  static bool CLFLUSH(void);

  //! CLFLUSHOPT instruction
  static bool CLFLUSHOPT(void);

  //! CLWB instruction
  static bool CLWB(void);

  //! RDPID instruction
  static bool RDPID(void);

  //! Onboard FPU
  static bool FPU(void);

  //! Hyper-Threading
  static bool HT(void);

  //! Hardware virtualization
  static bool VMX(void);

  // ！Running on a hypervisor
  static bool HYPERVISOR(void);

  //! Intrinsics of compiling
  static const char *Intrinsics(void);

 private:
  struct CpuFlags {
    //! Constructor
    CpuFlags(void);

    //! Members
    uint32_t L1_ECX;
    uint32_t L1_EDX;
    uint32_t L7_EBX;
    uint32_t L7_ECX;
    uint32_t L7_EDX;
  };

  //! Static Members
  static CpuFlags flags_;

 public:
  struct StaticFlags {
    //! 16-bit FP conversions
    bool F16C = CpuFeatures::F16C();

    //! Multimedia Extensions
    bool MMX = CpuFeatures::MMX();

    //! Streaming SIMD Extensions
    bool SSE = CpuFeatures::SSE();

    //! Streaming SIMD Extensions 2
    bool SSE2 = CpuFeatures::SSE2();

    //! Streaming SIMD Extensions 3
    bool SSE3 = CpuFeatures::SSE3();

    //! Supplemental Streaming SIMD Extensions 3
    bool SSSE3 = CpuFeatures::SSSE3();

    //! Streaming SIMD Extensions 4.1
    bool SSE4_1 = CpuFeatures::SSE4_1();

    //! Streaming SIMD Extensions 4.2
    bool SSE4_2 = CpuFeatures::SSE4_2();

    //! Advanced Vector Extensions
    bool AVX = CpuFeatures::AVX();

    //! Advanced Vector Extensions 2
    bool AVX2 = CpuFeatures::AVX2();

    //! AVX-512 Foundation
    bool AVX512F = CpuFeatures::AVX512F();

    //! AVX-512 DQ (Double/Quad granular) Instructions
    bool AVX512DQ = CpuFeatures::AVX512DQ();

    //! AVX-512 Prefetch
    bool AVX512PF = CpuFeatures::AVX512PF();

    //! AVX-512 Exponential and Reciprocal
    bool AVX512ER = CpuFeatures::AVX512ER();

    //! AVX-512 Conflict Detection
    bool AVX512CD = CpuFeatures::AVX512CD();

    //! AVX-512 BW (Byte/Word granular) Instructions
    bool AVX512BW = CpuFeatures::AVX512BW();

    //! AVX-512 VL (128/256 Vector Length) Extensions
    bool AVX512VL = CpuFeatures::AVX512VL();

    //! AVX-512 Integer Fused Multiply-Add instructions
    bool AVX512_IFMA = CpuFeatures::AVX512_IFMA();

    //! AVX512 Vector Bit Manipulation instructions
    bool AVX512_VBMI = CpuFeatures::AVX512_VBMI();

    //! Additional AVX512 Vector Bit Manipulation Instructions
    bool AVX512_VBMI2 = CpuFeatures::AVX512_VBMI2();

    //! Vector Neural Network Instructions
    bool AVX512_VNNI = CpuFeatures::AVX512_VNNI();

    //! Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions
    bool AVX512_BITALG = CpuFeatures::AVX512_BITALG();

    //! POPCNT for vectors of DW/QW
    bool AVX512_VPOPCNTDQ = CpuFeatures::AVX512_VPOPCNTDQ();

    //! AVX-512 Neural Network Instructions
    bool AVX512_4VNNIW = CpuFeatures::AVX512_4VNNIW();

    //! AVX-512 Multiply Accumulation Single precision
    bool AVX512_4FMAPS = CpuFeatures::AVX512_4FMAPS();

    //! AVX-512 FP16 instructions
    bool AVX512_FP16 = CpuFeatures::AVX512_FP16();

    //! CMPXCHG8 instruction
    bool CX8 = CpuFeatures::CX8();

    //! CMPXCHG16B instruction
    bool CX16 = CpuFeatures::CX16();

    //! PCLMULQDQ instruction
    bool PCLMULQDQ = CpuFeatures::PCLMULQDQ();

    //! Carry-Less Multiplication Double Quadword
    bool VPCLMULQDQ = CpuFeatures::VPCLMULQDQ();

    //! CMOV instructions (plus FCMOVcc, FCOMI with FPU)
    bool CMOV = CpuFeatures::CMOV();

    //! MOVBE instruction
    bool MOVBE = CpuFeatures::MOVBE();

    //! Enhanced REP MOVSB/STOSB instructions
    bool ERMS = CpuFeatures::ERMS();

    //! POPCNT instruction
    bool POPCNT = CpuFeatures::POPCNT();

    //! XSAVE/XRSTOR/XSETBV/XGETBV instructions
    bool XSAVE = CpuFeatures::XSAVE();

    //! Fused multiply-add
    bool FMA = CpuFeatures::FMA();

    //! ADCX and ADOX instructions
    bool ADX = CpuFeatures::ADX();

    //! Galois Field New Instructions
    bool GFNI = CpuFeatures::GFNI();

    //! AES instructions
    bool AES = CpuFeatures::AES();

    //! Vector AES
    bool VAES = CpuFeatures::VAES();

    //! RDSEED instruction
    bool RDSEED = CpuFeatures::RDSEED();

    //! RDRAND instruction
    bool RDRAND = CpuFeatures::RDRAND();

    //! SHA1/SHA256 Instruction Extensions
    bool SHA = CpuFeatures::SHA();

    //! 1st group bit manipulation extensions
    bool BMI1 = CpuFeatures::BMI1();

    //! 2nd group bit manipulation extensions
    bool BMI2 = CpuFeatures::BMI2();

    //! CLFLUSH instruction
    bool CLFLUSH = CpuFeatures::CLFLUSH();

    //! CLFLUSHOPT instruction
    bool CLFLUSHOPT = CpuFeatures::CLFLUSHOPT();

    //! CLWB instruction
    bool CLWB = CpuFeatures::CLWB();

    //! RDPID instruction
    bool RDPID = CpuFeatures::RDPID();

    //! Onboard FPU
    bool FPU = CpuFeatures::FPU();

    //! Hyper-Threading
    bool HT = CpuFeatures::HT();

    //! Hardware virtualization
    bool VMX = CpuFeatures::VMX();

    // ！Running on a hypervisor
    bool HYPERVISOR = CpuFeatures::HYPERVISOR();
  };
  static StaticFlags static_flags_;
};

}  // namespace internal
}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/io/file.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/io/file.h>
#if !defined(_WIN64) && !defined(_WIN32)
#include <sys/mman.h>
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#else
#include <Windows.h>
#endif

namespace zvec {
namespace ailego {

#if !defined(_WIN64) && !defined(_WIN32)

static inline int OpenSafely(const char *path, int flags) {
  int fd = open(path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  while (fd == -1 && errno == EINTR) {
    fd = open(path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
  }
  return fd;
}

static inline void CloseSafely(int fd) {
  int ret = close(fd);
  while (ret == -1 && errno == EINTR) {
    ret = close(fd);
  }
}

static inline ssize_t ReadSafely(int fd, void *buf, size_t count) {
  ssize_t ret = read(fd, buf, count);
  while (ret == -1 && errno == EINTR) {
    ret = read(fd, buf, count);
  }
  return ret;
}

static inline ssize_t PreadSafely(int fd, void *buf, size_t count,
                                  ssize_t offset) {
  ssize_t ret = pread(fd, buf, count, offset);
  while (ret == -1 && errno == EINTR) {
    ret = pread(fd, buf, count, offset);
  }
  return ret;
}

static inline ssize_t WriteSafely(int fd, const void *buf, size_t count) {
  ssize_t ret = write(fd, buf, count);
  while (ret == -1 && errno == EINTR) {
    ret = write(fd, buf, count);
  }
  return ret;
}

static inline ssize_t PwriteSafely(int fd, const void *buf, size_t count,
                                   ssize_t offset) {
  ssize_t ret = pwrite(fd, buf, count, offset);
  while (ret == -1 && errno == EINTR) {
    ret = pwrite(fd, buf, count, offset);
  }
  return ret;
}

static inline size_t ReadAll(int fd, void *buf, size_t count) {
  size_t rdlen = 0;
  while (rdlen < count) {
    ssize_t ret = ReadSafely(fd, (char *)buf + rdlen, count - rdlen);
    if (ret <= 0) {
      break;
    }
    rdlen += ret;
  }
  return rdlen;
}

static inline size_t PreadAll(int fd, void *buf, size_t count, ssize_t offset) {
  size_t rdlen = 0;
  while (rdlen < count) {
    ssize_t ret =
        PreadSafely(fd, (char *)buf + rdlen, count - rdlen, offset + rdlen);
    if (ret <= 0) {
      break;
    }
    rdlen += ret;
  }
  return rdlen;
}

static inline size_t WriteAll(int fd, const void *buf, size_t count) {
  size_t wrlen = 0;
  while (wrlen < count) {
    ssize_t ret = WriteSafely(fd, (const char *)buf + wrlen, count - wrlen);
    if (ret <= 0) {
      break;
    }
    wrlen += ret;
  }
  return wrlen;
}

static inline size_t PwriteAll(int fd, const void *buf, size_t count,
                               ssize_t offset) {
  size_t wrlen = 0;
  while (wrlen < count) {
    ssize_t ret = PwriteSafely(fd, (const char *)buf + wrlen, count - wrlen,
                               offset + wrlen);
    if (ret <= 0) {
      break;
    }
    wrlen += ret;
  }
  return wrlen;
}

bool File::create(const char *path, size_t len, bool direct) {
  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);

  // Try opening or creating a file
  int flags = O_RDWR | O_CREAT;
#ifdef O_DIRECT
  if (direct) {
    flags |= O_DIRECT;
  }
#else
  (void)direct;
#endif

  int fd = OpenSafely(path, flags);
  ailego_false_if_lt_zero(fd);

#ifdef F_NOCACHE
  // Direct IO canonical solution for Mac OSX
  if (direct) {
    ailego_false_if_ne_zero(fcntl(fd, F_NOCACHE, 1));
  }
#endif

  // Truncate the file to the specified size
  ailego_do_if_ne_zero(ftruncate(fd, len)) {
    CloseSafely(fd);
    return false;
  }

  read_only_ = false;
  native_handle_ = fd;
  return true;
}

bool File::open(const char *path, bool rdonly, bool direct) {
  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);

  // Try opening the file
  int flags = rdonly ? O_RDONLY : O_RDWR;
#ifdef O_DIRECT
  if (direct) {
    flags |= O_DIRECT;
  }
#else
  (void)direct;
#endif

  int fd = OpenSafely(path, flags);
  ailego_false_if_lt_zero(fd);

#ifdef F_NOCACHE
  // Direct IO canonical solution for Mac OSX
  if (direct) {
    ailego_false_if_ne_zero(fcntl(fd, F_NOCACHE, 1));
  }
#endif

  read_only_ = rdonly;
  native_handle_ = fd;
  return true;
}

void File::close(void) {
  ailego_return_if_false(native_handle_ != File::InvalidHandle);
  CloseSafely(native_handle_);
  native_handle_ = File::InvalidHandle;
}

void File::reset(void) {
  ailego_return_if_false(native_handle_ != File::InvalidHandle);
  lseek(native_handle_, 0, SEEK_SET);
}

size_t File::write(const void *data, size_t len) {
  const size_t block_size = 0x40000000u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    size_t wrlen =
        WriteAll(native_handle_, (const uint8_t *)data + total, block_size);
    if (wrlen != block_size) {
      return (total + wrlen);
    }
    total += block_size;
  }
  if (len > 0) {
    total += WriteAll(native_handle_, (const uint8_t *)data + total, len);
  }
  return total;
}

size_t File::write(ssize_t off, const void *data, size_t len) {
  const size_t block_size = 0x40000000u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    size_t wrlen = PwriteAll(native_handle_, (const uint8_t *)data + total,
                             block_size, off + total);
    if (wrlen != block_size) {
      return (total + wrlen);
    }
    total += block_size;
  }
  if (len > 0) {
    total += PwriteAll(native_handle_, (const uint8_t *)data + total, len,
                       off + total);
  }
  return total;
}

size_t File::read(void *buf, size_t len) {
  const size_t block_size = 0x40000000u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    size_t rdlen = ReadAll(native_handle_, (uint8_t *)buf + total, block_size);
    if (rdlen != block_size) {
      return (total + rdlen);
    }
    total += block_size;
  }
  if (len > 0) {
    total += ReadAll(native_handle_, (uint8_t *)buf + total, len);
  }
  return total;
}

size_t File::read(ssize_t off, void *buf, size_t len) {
  const size_t block_size = 0x40000000u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    size_t rdlen = PreadAll(native_handle_, (uint8_t *)buf + total, block_size,
                            off + total);
    if (rdlen != block_size) {
      return (total + rdlen);
    }
    total += block_size;
  }
  if (len > 0) {
    total += PreadAll(native_handle_, (uint8_t *)buf + total, len, off + total);
  }
  return total;
}

bool File::flush(void) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);
  return (fsync(native_handle_) == 0);
}

bool File::seek(ssize_t off, Origin origin) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);
  ailego_false_if_false(lseek(native_handle_, off, (int)origin) != (off_t)-1);
  return true;
}

bool File::truncate(size_t len) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);
  ailego_false_if_ne_zero(ftruncate(native_handle_, (off_t)len));
  return true;
}

size_t File::size(void) const {
  struct stat fs;
  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&
                       fstat(native_handle_, &fs) == 0);
  return (fs.st_size);
}

ssize_t File::offset(void) const {
  off_t off;
  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&
                       (off = lseek(native_handle_, 0, SEEK_CUR)) != -1);
  return off;
}

void *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {
  int prot =
      ((opts & File::MMAP_READONLY) ? PROT_READ : PROT_READ | PROT_WRITE);

#if defined(MAP_POPULATE)
  if (opts & File::MMAP_POPULATE) {
    prot |= MAP_POPULATE;
  }
#endif
  int flags = (opts & File::MMAP_SHARED) ? MAP_SHARED : MAP_PRIVATE;
#if defined(MAP_HUGETLB)
  if (opts & File::MMAP_HUGE_PAGE) {
    flags |= MAP_HUGETLB;
  }
#endif
  void *addr = mmap(nullptr, len, prot, flags, handle, off);
  ailego_null_if_false(addr != MAP_FAILED);

  if (opts & File::MMAP_LOCKED) {
    mlock(addr, len);
  }
  if (opts & File::MMAP_WARMUP) {
    File::MemoryWarmup(addr, len);
  }
  return addr;
}

#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
#define MAP_ANONYMOUS MAP_ANON
#endif

void *File::MemoryMap(size_t len, int opts) {
#if defined(MAP_ANONYMOUS)
  int prot =
      ((opts & File::MMAP_READONLY) ? PROT_READ : PROT_READ | PROT_WRITE);

#if defined(MAP_POPULATE)
  if (opts & File::MMAP_POPULATE) {
    prot |= MAP_POPULATE;
  }
#endif
  int flags = (opts & File::MMAP_SHARED) ? MAP_SHARED | MAP_ANONYMOUS
                                         : MAP_PRIVATE | MAP_ANONYMOUS;
#if defined(MAP_HUGETLB)
  if (opts & File::MMAP_HUGE_PAGE) {
    flags |= MAP_HUGETLB;
  }
#endif
  void *addr = mmap(nullptr, len, prot, flags, -1, 0);
  ailego_null_if_false(addr != MAP_FAILED);
  return addr;
#else
  (void)len;
  (void)opts;
  return nullptr;
#endif  // MAP_ANONYMOUS
}

void *File::MemoryRemap(void *oldptr, size_t oldsize, void *newptr,
                        size_t newsize) {
#if defined(__linux) || defined(__linux__)
  return newptr ? mremap(oldptr, oldsize, newsize, MREMAP_FIXED, newptr)
                : mremap(oldptr, oldsize, newsize, MREMAP_MAYMOVE);
#elif defined(__NetBSD__)
  return newptr ? mremap(oldptr, oldsize, newptr, newsize, MAP_FIXED)
                : mremap(oldptr, oldsize, nullptr, newsize, 0);
#else
  (void)oldptr;
  (void)oldsize;
  (void)newptr;
  (void)newsize;
  errno = ENOTSUP;
  return nullptr;
#endif
}

void File::MemoryUnmap(void *addr, size_t len) {
  ailego_return_if_false(addr);
  munmap(addr, len);
}

bool File::MemoryFlush(void *addr, size_t len) {
  ailego_false_if_false(addr);
  return (msync(addr, len, MS_ASYNC) == 0);
}

bool File::MemoryLock(void *addr, size_t len) {
  ailego_false_if_false(addr && len);
  return (mlock(addr, len) == 0);
}

bool File::MemoryUnlock(void *addr, size_t len) {
  ailego_false_if_false(addr && len);
  return (munlock(addr, len) == 0);
}

#else

//! Create a local file
bool File::create(const char *path, size_t len, bool direct) {
  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);

  // Try opening or creating the file
  HANDLE file_handle =
      CreateFileA(path, GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, nullptr,
                  CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);
  ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);

  // Truncate the file to the specified size
  LARGE_INTEGER file_size;
  file_size.QuadPart = len;
  ailego_do_if_false(
      SetFilePointerEx(file_handle, file_size, nullptr, FILE_BEGIN) &&
      SetEndOfFile(file_handle)) {
    CloseHandle(file_handle);
    return false;
  }

  if (!direct) {
    // Reset the file pointer
    SetFilePointer(file_handle, 0, nullptr, FILE_BEGIN);
  } else {
    // Close and reopen file
    CloseHandle(file_handle);
    file_handle = CreateFileA(
        path, GENERIC_WRITE | GENERIC_READ, FILE_SHARE_READ, nullptr,
        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_NO_BUFFERING, nullptr);
    ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);
  }

  read_only_ = false;
  native_handle_ = file_handle;
  return true;
}

//! Open a local file
bool File::open(const char *path, bool rdonly, bool direct) {
  ailego_false_if_false(native_handle_ == File::InvalidHandle && path);

  // Try opening the file
  DWORD flags = FILE_ATTRIBUTE_NORMAL;
  if (direct) {
    flags |= FILE_FLAG_NO_BUFFERING;
  }
  HANDLE file_handle =
      CreateFileA(path, (rdonly ? GENERIC_READ : GENERIC_READ | GENERIC_WRITE),
                  FILE_SHARE_READ, nullptr, OPEN_EXISTING, flags, nullptr);
  ailego_false_if_false(file_handle != INVALID_HANDLE_VALUE);

  read_only_ = rdonly;
  native_handle_ = file_handle;
  return true;
}

void File::close(void) {
  ailego_return_if_false(native_handle_ != File::InvalidHandle);
  CloseHandle(native_handle_);
  native_handle_ = File::InvalidHandle;
}

void File::reset(void) {
  ailego_return_if_false(native_handle_ != File::InvalidHandle);
  SetFilePointer(native_handle_, 0, nullptr, FILE_BEGIN);
}

size_t File::write(const void *data, size_t len) {
  const DWORD block_size = 0x40000000u;
  DWORD wrlen = 0u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    if (!WriteFile(native_handle_, (const uint8_t *)data + total, block_size,
                   &wrlen, nullptr)) {
      return total;
    }
    if (wrlen != block_size) {
      return (total + wrlen);
    }
    total += block_size;
  }
  if (len > 0 && WriteFile(native_handle_, (const uint8_t *)data + total,
                           (DWORD)len, &wrlen, nullptr)) {
    total += wrlen;
  }
  return total;
}

size_t File::write(ssize_t off, const void *data, size_t len) {
  const DWORD block_size = 0x40000000u;
  DWORD wrlen = 0u;
  size_t total = 0u;

  OVERLAPPED overlapped;
  memset(&overlapped, 0, sizeof(OVERLAPPED));

  for (; len >= block_size; len -= block_size) {
    uint64_t current = off + total;
    overlapped.OffsetHigh = (DWORD)(current >> 32);
    overlapped.Offset = (DWORD)(current & 0xffffffffu);

    if (!WriteFile(native_handle_, (const uint8_t *)data + total, block_size,
                   &wrlen, &overlapped)) {
      return total;
    }
    if (wrlen != block_size) {
      return (total + wrlen);
    }
    total += block_size;
  }
  if (len > 0) {
    uint64_t current = off + total;
    overlapped.OffsetHigh = (DWORD)(current >> 32);
    overlapped.Offset = (DWORD)(current & 0xffffffffu);

    if (WriteFile(native_handle_, (const uint8_t *)data + total, (DWORD)len,
                  &wrlen, &overlapped)) {
      total += wrlen;
    }
  }
  return total;
}

size_t File::read(void *buf, size_t len) {
  const DWORD block_size = 0x40000000u;
  DWORD rdlen = 0u;
  size_t total = 0u;

  for (; len >= block_size; len -= block_size) {
    if (!ReadFile(native_handle_, (uint8_t *)buf + total, block_size, &rdlen,
                  nullptr)) {
      return total;
    }
    if (rdlen != block_size) {
      return (total + rdlen);
    }
    total += block_size;
  }
  if (len > 0 && ReadFile(native_handle_, (uint8_t *)buf + total, (DWORD)len,
                          &rdlen, nullptr)) {
    total += rdlen;
  }
  return total;
}

size_t File::read(ssize_t off, void *buf, size_t len) {
  const DWORD block_size = 0x40000000u;
  DWORD rdlen = 0u;
  size_t total = 0u;

  OVERLAPPED overlapped;
  memset(&overlapped, 0, sizeof(OVERLAPPED));

  for (; len >= block_size; len -= block_size) {
    uint64_t current = off + total;
    overlapped.OffsetHigh = (DWORD)(current >> 32);
    overlapped.Offset = (DWORD)(current & 0xffffffffu);

    if (!ReadFile(native_handle_, (uint8_t *)buf + total, block_size, &rdlen,
                  &overlapped)) {
      return total;
    }
    if (rdlen != block_size) {
      return (total + rdlen);
    }
    total += block_size;
  }
  if (len > 0) {
    uint64_t current = off + total;
    overlapped.OffsetHigh = (DWORD)(current >> 32);
    overlapped.Offset = (DWORD)(current & 0xffffffffu);

    if (ReadFile(native_handle_, (uint8_t *)buf + total, (DWORD)len, &rdlen,
                 &overlapped)) {
      total += rdlen;
    }
  }
  return total;
}

bool File::flush(void) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);
  return (!!FlushFileBuffers(native_handle_));
}

bool File::seek(ssize_t off, Origin origin) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);

  LARGE_INTEGER file_offset;
  file_offset.QuadPart = off;
  ailego_false_if_false(SetFilePointerEx(native_handle_, file_offset, nullptr,
                                         (DWORD)origin) != 0);
  return true;
}

bool File::truncate(size_t len) {
  ailego_false_if_false(native_handle_ != File::InvalidHandle);

  LARGE_INTEGER file_size, orig_file_size;
  file_size.QuadPart = 0;
  orig_file_size.QuadPart = 0;
  ailego_false_if_false(SetFilePointerEx(native_handle_, file_size,
                                         &orig_file_size, FILE_CURRENT));

  // Truncate the file to the specified size
  file_size.QuadPart = len;
  ailego_false_if_false(
      SetFilePointerEx(native_handle_, file_size, nullptr, FILE_BEGIN) &&
      SetEndOfFile(native_handle_));

  // Reset the file pointer
  SetFilePointerEx(native_handle_, orig_file_size, nullptr, FILE_BEGIN);
  return true;
}

size_t File::size(void) const {
  LARGE_INTEGER file_size;
  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&
                       GetFileSizeEx(native_handle_, &file_size));
  return (size_t)file_size.QuadPart;
}

ssize_t File::offset(void) const {
  LARGE_INTEGER file_size;
  LARGE_INTEGER file_size_new;
  file_size.QuadPart = 0;
  ailego_zero_if_false(native_handle_ != File::InvalidHandle &&
                       SetFilePointerEx(native_handle_, file_size,
                                        &file_size_new, FILE_CURRENT));
  return (size_t)file_size_new.QuadPart;
}

void *File::MemoryMap(NativeHandle handle, ssize_t off, size_t len, int opts) {
  LARGE_INTEGER file_size;
  file_size.QuadPart = len;

  // Create map object
  HANDLE file_mapping = CreateFileMapping(
      handle, nullptr,
      ((opts & File::MMAP_READONLY) ? PAGE_READONLY : PAGE_READWRITE),
      file_size.HighPart, file_size.LowPart, nullptr);
  ailego_null_if_false(file_mapping != nullptr);

  DWORD desired_access = FILE_MAP_READ;
  if (!(opts & File::MMAP_READONLY)) {
    desired_access |= FILE_MAP_WRITE;
  }
  if (!(opts & File::MMAP_SHARED)) {
    desired_access |= FILE_MAP_COPY;
  }
  file_size.QuadPart = off;

  // Map the whole file to memory and close handle
  void *addr = MapViewOfFile(file_mapping, desired_access, file_size.HighPart,
                             file_size.LowPart, 0);
  CloseHandle(file_mapping);

  ailego_null_if_false(addr);
  if (opts & File::MMAP_LOCKED) {
    VirtualLock(addr, len);
  }
  if (opts & File::MMAP_WARMUP) {
    File::MemoryWarmup(addr, len);
  }
  return addr;
}

void *File::MemoryMap(size_t, int) {
  return nullptr;
}

void *File::MemoryRemap(void *, size_t, void *, size_t) {
  return nullptr;
}

void File::MemoryUnmap(void *addr, size_t /*len*/) {
  ailego_return_if_false(addr);
  UnmapViewOfFile(addr);
}

bool File::MemoryFlush(void *addr, size_t /*len*/) {
  ailego_false_if_false(addr);
  return (!!FlushViewOfFile(addr, 0));
}

bool File::MemoryLock(void *addr, size_t len) {
  ailego_false_if_false(addr && len);
  return (!!VirtualLock(addr, len));
}

bool File::MemoryUnlock(void *addr, size_t len) {
  ailego_false_if_false(addr && len);
  return (!!VirtualUnlock(addr, len));
}

static inline int getpagesize(void) {
  SYSTEM_INFO info;
  GetSystemInfo(&info);
  return info.dwPageSize;
}
#endif

void File::MemoryWarmup(void *addr, size_t len) {
  static int page_size = getpagesize();

  if (addr && len) {
    uint8_t *p = reinterpret_cast<uint8_t *>(addr);
    uint8_t *end = p + len;
    volatile uint8_t tmp = 0;

    while (p < end) {
      tmp ^= *p;
      p += page_size;
    }
  }
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/io/file_lock.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "file_lock.h"

#if !defined(_WIN64) && !defined(_WIN32)
#include <sys/file.h>
#else
#include <Windows.h>
#endif

namespace zvec {
namespace ailego {

#if !defined(_WIN64) && !defined(_WIN32)
bool FileLock::Lock(int fd) {
  return (flock(fd, LOCK_EX) == 0);
}

bool FileLock::TryLock(int fd) {
  return (flock(fd, LOCK_EX | LOCK_NB) == 0);
}

bool FileLock::LockShared(int fd) {
  return (flock(fd, LOCK_SH) == 0);
}

bool FileLock::TryLockShared(int fd) {
  return (flock(fd, LOCK_SH | LOCK_NB) == 0);
}

bool FileLock::Unlock(int fd) {
  return (flock(fd, LOCK_UN) == 0);
}

#else
bool FileLock::Lock(HANDLE handle) {
  OVERLAPPED ol = {0};
  return (!!LockFileEx(handle, LOCKFILE_EXCLUSIVE_LOCK, 0, MAXDWORD, MAXDWORD,
                       &ol));
}

bool FileLock::TryLock(HANDLE handle) {
  OVERLAPPED ol = {0};
  return (!!LockFileEx(handle,
                       LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, 0,
                       MAXDWORD, MAXDWORD, &ol));
}

bool FileLock::LockShared(HANDLE handle) {
  OVERLAPPED ol = {0};
  return (!!LockFileEx(handle, 0, 0, MAXDWORD, MAXDWORD, &ol));
}

bool FileLock::TryLockShared(HANDLE handle) {
  OVERLAPPED ol = {0};
  return (!!LockFileEx(handle, LOCKFILE_FAIL_IMMEDIATELY, 0, MAXDWORD, MAXDWORD,
                       &ol));
}

bool FileLock::Unlock(HANDLE handle) {
  OVERLAPPED ol = {0};
  return (!!UnlockFileEx(handle, 0, MAXDWORD, MAXDWORD, &ol));
}

#endif  // !_WIN64 && !_WIN32

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/io/file_lock.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/io/file.h>

namespace zvec {
namespace ailego {

/*! File Utility
 */
class FileLock {
 public:
  //! Constructor
  FileLock(const File &file) : native_handle_(file.native_handle()) {}

  //! Constructor
  FileLock(File::NativeHandle handle) : native_handle_(handle) {}

  //! Locking
  bool lock(void) const {
    return FileLock::Lock(native_handle_);
  }

  //! Try locking
  bool try_lock(void) const {
    return FileLock::TryLock(native_handle_);
  }

  //! Locking (shared)
  bool lock_shared(void) const {
    return FileLock::LockShared(native_handle_);
  }

  //! Try locking (shared)
  bool try_lock_shared(void) const {
    return FileLock::TryLockShared(native_handle_);
  }

  //! Unlocking
  bool unlock(void) const {
    return FileLock::Unlock(native_handle_);
  }

  //! Locking
  static bool Lock(File::NativeHandle handle);

  //! Try locking
  static bool TryLock(File::NativeHandle handle);

  //! Locking (shared)
  static bool LockShared(File::NativeHandle handle);

  //! Try locking (shared)
  static bool TryLockShared(File::NativeHandle handle);

  //! Unlocking
  static bool Unlock(File::NativeHandle handle);

 private:
  //! Disable them
  FileLock(const FileLock &) = delete;
  FileLock(FileLock &&) = delete;
  FileLock &operator=(const FileLock &) = delete;
  FileLock &operator=(FileLock &&) = delete;

  //! Members
  File::NativeHandle native_handle_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/io/file_writer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdarg>
#include <ios>
#include "file.h"

namespace zvec {
namespace ailego {

/*! File Stream Writer
 */
class FileWriter {
 public:
  //! Constructor
  FileWriter(void) {}

  //! Constructor
  FileWriter(FileWriter &&rhs) : file_(std::move(rhs.file_)) {}

  //! Destructor
  ~FileWriter(void) {}

  //! Assignment
  FileWriter &operator=(FileWriter &&rhs) {
    file_ = std::move(rhs.file_);
    return *this;
  }

  //! Output to writer
  FileWriter &operator<<(const char *str) {
    size_t len = std::strlen(str);
    if (file_.write(str, len) != len) {
      throw std::ios_base::failure("Write error");
    }
    return *this;
  }

  //! Output to writer
  FileWriter &operator<<(const std::string &str) {
    if (file_.write(str.data(), str.size()) != str.size()) {
      throw std::ios_base::failure("Write error");
    }
    return *this;
  }

  //! Output to writer
  FileWriter &operator<<(char c) {
    if (file_.write(&c, 1) != 1) {
      throw std::ios_base::failure("Write error");
    }
    return *this;
  }

  //! Test if the file is valid
  bool is_valid(void) const {
    return file_.is_valid();
  }

  //! Create a local file
  bool create(const char *path) {
    return file_.create(path, 0, false);
  }

  //! Open a local file
  bool open(const char *path) {
    return file_.open(path, false, false);
  }

  //! Close the local file
  void close(void) {
    file_.close();
  }

  //! Write data into the file
  size_t write(const void *data, size_t len) {
    return file_.write(data, len);
  }

  //! Synchronize memory with physical storage
  bool flush(void) {
    return file_.flush();
  }

  //! Output with format
  void print(const char *format, va_list args) {
    char buf[8192];
    std::vsnprintf(buf, sizeof(buf), format, args);
    (*this) << buf;
  }

  //! Output with format
#if defined(__GNUC__)
  void print(const char *format, ...) __attribute__((format(printf, 2, 3))) {
#else
  void print(const char *format, ...) {
#endif
    va_list args;
    va_start(args, format);
    this->print(format, args);
    va_end(args);
  }

 private:
  //! Disable them
  FileWriter(const FileWriter &) = delete;
  FileWriter &operator=(const FileWriter &) = delete;

  //! Members
  File file_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/logger/logger.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <sstream>
#include <thread>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/time_helper.h>

namespace zvec {
namespace ailego {

const int Logger::LEVEL_DEBUG = 0;
const int Logger::LEVEL_INFO = 1;
const int Logger::LEVEL_WARN = 2;
const int Logger::LEVEL_ERROR = 3;
const int Logger::LEVEL_FATAL = 4;

/*! Console Logger
 */
struct ConsoleLogger : public Logger {
  //! Initialize Logger
  int init(const Params &) override {
    return 0;
  }

  //! Cleanup Logger
  int cleanup(void) override {
    return 0;
  }

  //! Log Message
  void log(int level, const char *file, int line, const char *format,
           va_list args) override {
    char buffer[8192];
    std::ostringstream stream;

    ailego::Realtime::Localtime(buffer, sizeof(buffer));
    stream << '[' << LevelString(level) << ' ' << buffer << ' '
           << std::this_thread::get_id() << ' ' << ailego::File::BaseName(file)
           << ':' << line << "] ";

    vsnprintf(buffer, sizeof(buffer), format, args);
    stream << buffer << '\n';

    if (level <= LEVEL_INFO) {
      std::cout << stream.str() << std::flush;
    } else {
      std::cerr << stream.str() << std::flush;
    }
  }
};

//! Logger Level
int LoggerBroker::logger_level_ = Logger::LEVEL_WARN;

//! Logger
Logger::Pointer LoggerBroker::logger_(new ConsoleLogger);

//! Register Console Logger in Factory
FACTORY_REGISTER_LOGGER(ConsoleLogger);

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/cosine_distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

/*! Cosine Distance Matrix
 */
template <typename T, size_t M, size_t N, typename = void>
struct CosineDistanceMatrix;

/*! Cosine Distance Matrix (M=1, N=1)
 */
template <typename T>
struct CosineDistanceMatrix<
    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    constexpr size_t extra_dim = sizeof(float) / sizeof(ValueType);
    size_t d = dim - extra_dim;

    float ip;
    InnerProductMatrix<T, 1, 1>::Compute(m, q, d, &ip);

    *out = 1 - ip;
  }
};

/*! Cosine Distance Matrix
 */
template <typename T, size_t M, size_t N>
struct CosineDistanceMatrix<
    T, M, N,
    typename std::enable_if<IsSignedArithmetic<T>::value && M >= 2 &&
                            N >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType * /*m*/, const ValueType * /*q*/,
                             size_t /*dim*/, float *out) {
    // ailego_assert(m && q && dim && out);

    *out = 0.0f;
  }
};

/*! Cosine Distance Matrix (N=1)
 */
template <typename T, size_t M>
struct CosineDistanceMatrix<
    T, M, 1,
    typename std::enable_if<IsSignedArithmetic<T>::value && M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType * /*m*/, const ValueType * /*q*/,
                             size_t /*dim*/, float *out) {
    // ailego_assert(m && q && dim && out);

    *out = 0.0f;
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/distance.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "distance_matrix.h"

namespace zvec {
namespace ailego {

/*! Distance module
 */
struct Distance {
  //! Compute the hamming distance between two vectors (BINARY)
  static float Hamming(const uint32_t *lhs, const uint32_t *rhs, size_t dim) {
    float result;
    HammingDistanceMatrix<uint32_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

#if defined(AILEGO_M64)
  //! Compute the hamming distance between two vectors (BINARY)
  static float Hamming(const uint64_t *lhs, const uint64_t *rhs, size_t dim) {
    float result;
    HammingDistanceMatrix<uint64_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

#else
  //! Compute the hamming distance between two vectors (BINARY)
  static float Hamming(const uint64_t *lhs, const uint64_t *rhs, size_t dim) {
    float result;
    HammingDistanceMatrix<uint32_t, 1, 1>::Compute(
        reinterpret_cast<const uint32_t *>(lhs),
        reinterpret_cast<const uint32_t *>(rhs), dim, &result);
    return result;
  }
#endif

  //! Compute the squared euclidean distance between two vectors (FP32)
  static float SquaredEuclidean(const float *lhs, const float *rhs,
                                size_t dim) {
    float result;
    SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim,
                                                         &result);
    return result;
  }

  //! Compute the squared euclidean distance between two vectors (FP16)
  static float SquaredEuclidean(const Float16 *lhs, const Float16 *rhs,
                                size_t dim) {
    float result;
    SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim,
                                                           &result);
    return result;
  }

  //! Compute the squared euclidean distance between two vectors (INT8)
  static float SquaredEuclidean(const int8_t *lhs, const int8_t *rhs,
                                size_t dim) {
    float result;
    SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim,
                                                          &result);
    return result;
  }

  //! Compute the squared euclidean distance between two vectors (INT4)
  static float SquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,
                                size_t dim) {
    float result;
    SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim,
                                                           &result);
    return result;
  }

  //! Compute the euclidean distance between two vectors (FP32)
  static float Euclidean(const float *lhs, const float *rhs, size_t dim) {
    float result;
    EuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the euclidean distance between two vectors (FP16)
  static float Euclidean(const Float16 *lhs, const Float16 *rhs, size_t dim) {
    float result;
    EuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the euclidean distance between two vectors (INT8)
  static float Euclidean(const int8_t *lhs, const int8_t *rhs, size_t dim) {
    float result;
    EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the euclidean distance between two vectors (INT4)
  static float Euclidean(const uint8_t *lhs, const uint8_t *rhs, size_t dim) {
    float result;
    EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the inner product between two vectors (FP32)
  static float InnerProduct(const float *lhs, const float *rhs, size_t dim) {
    float result;
    InnerProductMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the inner product between two vectors (FP16)
  static float InnerProduct(const Float16 *lhs, const Float16 *rhs,
                            size_t dim) {
    float result;
    InnerProductMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the inner product between two vectors (INT8)
  static float InnerProduct(const int8_t *lhs, const int8_t *rhs, size_t dim) {
    float result;
    InnerProductMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the minus inner product between two vectors (INT4)
  static float InnerProduct(const uint8_t *lhs, const uint8_t *rhs,
                            size_t dim) {
    float result;
    InnerProductMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the minus inner product between two vectors (FP32)
  static float MinusInnerProduct(const float *lhs, const float *rhs,
                                 size_t dim) {
    float result;
    MinusInnerProductMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the minus inner product between two vectors (FP16)
  static float MinusInnerProduct(const Float16 *lhs, const Float16 *rhs,
                                 size_t dim) {
    float result;
    MinusInnerProductMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the minus inner product between two vectors (INT8)
  static float MinusInnerProduct(const int8_t *lhs, const int8_t *rhs,
                                 size_t dim) {
    float result;
    MinusInnerProductMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the minus inner product between two vectors (INT4)
  static float MinusInnerProduct(const uint8_t *lhs, const uint8_t *rhs,
                                 size_t dim) {
    float result;
    MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (FP32, RepeatedQuadraticInjection)
  static float MipsSquaredEuclidean(const float *lhs, const float *rhs,
                                    size_t dim, size_t m, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, m,
                                                             eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (FP16, RepeatedQuadraticInjection)
  static float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,
                                    size_t dim, size_t m, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, m,
                                                               eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (INT8, RepeatedQuadraticInjection)
  static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,
                                    size_t dim, size_t m, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, m,
                                                              eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (INT4, RepeatedQuadraticInjection)
  static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,
                                    size_t dim, size_t m, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim, m,
                                                               eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (FP32, SphericalInjection)
  static float MipsSquaredEuclidean(const float *lhs, const float *rhs,
                                    size_t dim, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, eta,
                                                             &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (FP16, SphericalInjection)
  static float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,
                                    size_t dim, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim,
                                                               eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (INT8, SphericalInjection)
  static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,
                                    size_t dim, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim,
                                                              eta, &result);
    return result;
  }

  //! Compute the mips squared L2 distance between two vectors
  //! (INT4, SphericalInjection)
  static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,
                                    size_t dim, float eta) {
    float result;
    MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(lhs, rhs, dim,
                                                               eta, &result);
    return result;
  }

  //! Compute the cosine distance between two vectors (FP32)
  static float Cosine(const float *lhs, const float *rhs, size_t dim) {
    float result;
    CosineDistanceMatrix<float, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the cosine distance between two vectors (FP16)
  static float Cosine(const Float16 *lhs, const Float16 *rhs, size_t dim) {
    float result;
    CosineDistanceMatrix<Float16, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }

  //! Compute the cosine distance between two vectors (FP16)
  static float Cosine(const int8_t *lhs, const int8_t *rhs, size_t dim) {
    float result;
    CosineDistanceMatrix<int8_t, 1, 1>::Compute(lhs, rhs, dim, &result);
    return result;
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "cosine_distance_matrix.h"
#include "euclidean_distance_matrix.h"
#include "hamming_distance_matrix.h"
#include "inner_product_matrix.h"
#include "mips_euclidean_distance_matrix.h"


================================================
FILE: src/ailego/math/distance_matrix_accum_fp16.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_fp16.i"
#include "matrix_utility.i"

#if !defined(__FMA__)
#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))
#endif  // !__FMA__

#if defined(__AVX512F__) && !defined(__AVX512DQ__)
#define _mm512_and_ps(a, b) \
  _mm512_castsi512_ps(      \
      _mm512_and_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))
#define _mm512_mask_and_ps(src, k, a, b)                                   \
  _mm512_castsi512_ps(_mm512_mask_and_epi32(_mm512_castps_si512(src), (k), \
                                            _mm512_castps_si512(a),        \
                                            _mm512_castps_si512(b)))
#endif  // __AVX512DQ__

//! Compute the distance between matrix and query (FP16, M=1, N=1)
#define ACCUM_FP16_1X1_AVX(m, q, dim, out, _MASK, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())               \
  const Float16 *qe = q + dim;                                              \
  const Float16 *qe_aligned = q + ((dim >> 4) << 4);                        \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {           \
    for (; q != qe_aligned; m += 16, q += 16) {                             \
      MATRIX_FP16_ITER_1X1_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                               ACCUM_FP32_STEP_AVX)                         \
    }                                                                       \
    if (qe >= qe_aligned + 8) {                                             \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)m));   \
      __m256 ymm_q = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)q));   \
      ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                        \
      m += 8;                                                               \
      q += 8;                                                               \
    }                                                                       \
  } else {                                                                  \
    for (; q != qe_aligned; m += 16, q += 16) {                             \
      MATRIX_FP16_ITER_1X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                               ACCUM_FP32_STEP_AVX)                         \
    }                                                                       \
    if (qe >= qe_aligned + 8) {                                             \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));  \
      __m256 ymm_q = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)q));  \
      ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                        \
      m += 8;                                                               \
      q += 8;                                                               \
    }                                                                       \
  }                                                                         \
  MATRIX_FP16_MASK_AVX(m, q, (qe - q), _MASK, ymm_sum, ACCUM_FP32_STEP_AVX) \
  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));

//! Compute the distance between matrix and query (FP16, M=2, N=1)
#define ACCUM_FP16_2X1_AVX(m, q, dim, out, _NORM)                             \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                 \
  const Float16 *qe_aligned = q + ((dim >> 2) << 2);                          \
  const Float16 *qe = q + dim;                                                \
  if (((uintptr_t)m & 0xf) == 0) {                                            \
    for (; q != qe_aligned; m += 8, q += 4) {                                 \
      MATRIX_FP16_ITER_2X1_AVX(m, q, ymm_sum, _mm_load_si128,                 \
                               ACCUM_FP32_STEP_AVX)                           \
    }                                                                         \
  } else {                                                                    \
    for (; q != qe_aligned; m += 8, q += 4) {                                 \
      MATRIX_FP16_ITER_2X1_AVX(m, q, ymm_sum, _mm_loadu_si128,                \
                               ACCUM_FP32_STEP_AVX)                           \
    }                                                                         \
  }                                                                           \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),        \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));     \
  if (qe >= qe_aligned + 2) {                                                 \
    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));         \
    __m128 xmm_q = _mm_cvtph_ps(                                              \
        _mm_shufflelo_epi16(_mm_broadcast_si32(q), _MM_SHUFFLE(1, 1, 0, 0))); \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \
    m += 4;                                                                   \
    q += 2;                                                                   \
  }                                                                           \
  xmm_sum_0_0 =                                                               \
      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0));       \
  if (q != qe) {                                                              \
    __m128 xmm_m = _mm_cvtph_ps(                                              \
        _mm_shufflelo_epi16(_mm_broadcast_si32(m), _MM_SHUFFLE(0, 0, 1, 0))); \
    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q)));         \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \
  }                                                                           \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (FP16, M=2, N=2)
#define ACCUM_FP16_2X2_AVX(m, q, dim, out, _NORM)                             \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())                 \
  const Float16 *qe_aligned = q + ((dim >> 2) << 3);                          \
  const Float16 *qe = q + (dim << 1);                                         \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {               \
    for (; q != qe_aligned; m += 8, q += 8) {                                 \
      MATRIX_FP16_ITER_2X2_AVX(m, q, ymm_sum, _mm_load_si128,                 \
                               ACCUM_FP32_STEP_AVX)                           \
    }                                                                         \
  } else {                                                                    \
    for (; q != qe_aligned; m += 8, q += 8) {                                 \
      MATRIX_FP16_ITER_2X2_AVX(m, q, ymm_sum, _mm_loadu_si128,                \
                               ACCUM_FP32_STEP_AVX)                           \
    }                                                                         \
  }                                                                           \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),        \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));     \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),        \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));     \
  if (qe >= qe_aligned + 4) {                                                 \
    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));         \
    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q)));         \
    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));            \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                            \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));                   \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                            \
    m += 4;                                                                   \
    q += 4;                                                                   \
  }                                                                           \
  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),           \
                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));          \
  if (q != qe) {                                                              \
    __m128 xmm_m = _mm_cvtph_ps(                                              \
        _mm_shufflelo_epi16(_mm_broadcast_si32(m), _MM_SHUFFLE(1, 0, 1, 0))); \
    __m128 xmm_q = _mm_cvtph_ps(                                              \
        _mm_shufflelo_epi16(_mm_broadcast_si32(q), _MM_SHUFFLE(1, 1, 0, 0))); \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                            \
  }                                                                           \
  if (((uintptr_t)out & 0xf) == 0) {                                          \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)              \
  } else {                                                                    \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)             \
  }

//! Compute the distance between matrix and query (FP16, M=4, N=1)
#define ACCUM_FP16_4X1_AVX(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                \
  const Float16 *qe = q + dim;                                               \
  if (((uintptr_t)m & 0xf) == 0) {                                           \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                   \
      MATRIX_FP16_ITER_4X1_AVX(m, q, ymm_sum, _mm_load_si128,                \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  } else {                                                                   \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                   \
      MATRIX_FP16_ITER_4X1_AVX(m, q, ymm_sum, _mm_loadu_si128,               \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  }                                                                          \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \
  if (q != qe) {                                                             \
    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \
    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q)));        \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                           \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (FP16, M=4, N=2)
#define ACCUM_FP16_4X2_AVX(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())                \
  const Float16 *qe = q + (dim << 1);                                        \
  if (((uintptr_t)m & 0xf) == 0) {                                           \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 8, q += 4) {                                                   \
      MATRIX_FP16_ITER_4X2_AVX(m, q, ymm_sum, _mm_load_si128,                \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  } else {                                                                   \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 8, q += 4) {                                                   \
      MATRIX_FP16_ITER_4X2_AVX(m, q, ymm_sum, _mm_loadu_si128,               \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  }                                                                          \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),       \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));    \
  if (q != qe) {                                                             \
    __m128 xmm_q_0 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 0)));  \
    __m128 xmm_q_1 = _mm_cvtph_ps(_mm_set1_epi16(*(const short *)(q + 1)));  \
    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \
    MATRIX_VAR_PROC(1, 2, 0, xmm_m, xmm_q, xmm_sum, ACCUM_FP32_STEP_SSE)     \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (FP16, M=4, N=4)
#define ACCUM_FP16_4X4_AVX(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())                \
  const Float16 *qe = q + (dim << 2);                                        \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \
         m += 8, q += 8) {                                                   \
      MATRIX_FP16_ITER_4X4_AVX(m, q, ymm_sum, _mm_load_si128,                \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  } else {                                                                   \
    for (const Float16 *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \
         m += 8, q += 8) {                                                   \
      MATRIX_FP16_ITER_4X4_AVX(m, q, ymm_sum, _mm_loadu_si128,               \
                               ACCUM_FP32_STEP_AVX)                          \
    }                                                                        \
  }                                                                          \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),       \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));    \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),       \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));    \
  __m128 xmm_sum_0_2 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_2),       \
                                  _mm256_extractf128_ps(ymm_sum_0_2, 1));    \
  __m128 xmm_sum_0_3 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_3),       \
                                  _mm256_extractf128_ps(ymm_sum_0_3, 1));    \
  if (q != qe) {                                                             \
    __m128 xmm_m = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(m)));        \
    __m128 xmm_q = _mm_cvtph_ps(_mm_set1_epi64(*(const __m64 *)(q)));        \
    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(0, 0, 0, 0));           \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                           \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(1, 1, 1, 1));                  \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                           \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 2, 2));                  \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_2)                           \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 3, 3));                  \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_3)                           \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (FP16, M=8, N=1)
#define ACCUM_FP16_8X1_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0xf) == 0) {                                   \
    for (const Float16 *qe = q + dim; q != qe; m += 8, ++q) {        \
      MATRIX_FP16_ITER_8X1_AVX(m, q, ymm_sum, _mm_load_si128,        \
                               ACCUM_FP32_STEP_AVX)                  \
    }                                                                \
  } else {                                                           \
    for (const Float16 *qe = q + dim; q != qe; m += 8, ++q) {        \
      MATRIX_FP16_ITER_8X1_AVX(m, q, ymm_sum, _mm_loadu_si128,       \
                               ACCUM_FP32_STEP_AVX)                  \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP16, M=8, N=2)
#define ACCUM_FP16_8X2_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())           \
  if (((uintptr_t)m & 0xf) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP16_ITER_8X2_AVX(m, q, ymm_sum, _mm_load_si128,           \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  } else {                                                              \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP16_ITER_8X2_AVX(m, q, ymm_sum, _mm_loadu_si128,          \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \
  } else {                                                              \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=8, N=4)
#define ACCUM_FP16_8X4_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())           \
  if (((uintptr_t)m & 0xf) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP16_ITER_8X4_AVX(m, q, ymm_sum, _mm_load_si128,           \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  } else {                                                              \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP16_ITER_8X4_AVX(m, q, ymm_sum, _mm_loadu_si128,          \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \
  } else {                                                              \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=8, N=8)
#define ACCUM_FP16_8X8_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(1, 8, __m256, ymm_sum, _mm256_setzero_ps())           \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {         \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP16_ITER_8X8_AVX(m, q, ymm_sum, _mm_load_si128,           \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  } else {                                                              \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP16_ITER_8X8_AVX(m, q, ymm_sum, _mm_loadu_si128,          \
                               ACCUM_FP32_STEP_AVX)                     \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)     \
  } else {                                                              \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=1)
#define ACCUM_FP16_16X1_AVX(m, q, dim, out, _NORM)                   \
  MATRIX_VAR_INIT(2, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {       \
      MATRIX_FP16_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {       \
      MATRIX_FP16_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=2)
#define ACCUM_FP16_16X2_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(2, 2, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP16_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP16_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=4)
#define ACCUM_FP16_16X4_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(2, 4, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP16_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP16_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=8)
#define ACCUM_FP16_16X8_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(2, 8, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP16_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP16_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=16)
#define ACCUM_FP16_16X16_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(2, 16, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                       \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP16_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP16_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x1f) == 0) {                                     \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                                \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=1)
#define ACCUM_FP16_32X1_AVX(m, q, dim, out, _NORM)                   \
  MATRIX_VAR_INIT(4, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {       \
      MATRIX_FP16_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {       \
      MATRIX_FP16_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=2)
#define ACCUM_FP16_32X2_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(4, 2, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP16_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP16_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=4)
#define ACCUM_FP16_32X4_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(4, 4, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP16_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP16_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=8)
#define ACCUM_FP16_32X8_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(4, 8, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP16_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP16_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                ACCUM_FP32_STEP_AVX)                     \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x1f) == 0) {                                    \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                               \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=16)
#define ACCUM_FP16_32X16_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(4, 16, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                       \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP16_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP16_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x1f) == 0) {                                     \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                                \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=32)
#define ACCUM_FP16_32X32_AVX(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(4, 32, __m256, ymm_sum, _mm256_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                       \
    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP16_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,        \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP16_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,       \
                                 ACCUM_FP32_STEP_AVX)                     \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x1f) == 0) {                                     \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)      \
  } else {                                                                \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP16, M=1, N=1)
#define ACCUM_FP16_1X1_AVX512(m, q, dim, out, _MASK, _NORM)                   \
  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())                 \
  const Float16 *qe = q + dim;                                                \
  const Float16 *qe_aligned = q + ((dim >> 5) << 5);                          \
  if (((uintptr_t)m & 0x3f) == 0 && ((uintptr_t)q & 0x3f) == 0) {             \
    for (; q != qe_aligned; m += 32, q += 32) {                               \
      MATRIX_FP16_ITER_1X1_AVX512(m, q, zmm_sum, _mm512_load_si512,           \
                                  ACCUM_FP32_STEP_AVX512)                     \
    }                                                                         \
    if (qe >= qe_aligned + 16) {                                              \
      __m512 zmm_m = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)m));  \
      __m512 zmm_q = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)q));  \
      ACCUM_FP32_STEP_AVX512(zmm_m, zmm_q, zmm_sum_0_0)                       \
      m += 16;                                                                \
      q += 16;                                                                \
    }                                                                         \
  } else {                                                                    \
    for (; q != qe_aligned; m += 32, q += 32) {                               \
      MATRIX_FP16_ITER_1X1_AVX512(m, q, zmm_sum, _mm512_loadu_si512,          \
                                  ACCUM_FP32_STEP_AVX512)                     \
    }                                                                         \
    if (qe >= qe_aligned + 16) {                                              \
      __m512 zmm_m = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)m)); \
      __m512 zmm_q = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)q)); \
      ACCUM_FP32_STEP_AVX512(zmm_m, zmm_q, zmm_sum_0_0)                       \
      m += 16;                                                                \
      q += 16;                                                                \
    }                                                                         \
  }                                                                           \
  __m256 ymm_sum_0_0 = _mm256_add_ps(_mm512_castps512_ps256(zmm_sum_0_0),     \
                                     _mm256_castpd_ps(_mm512_extractf64x4_pd( \
                                         _mm512_castps_pd(zmm_sum_0_0), 1))); \
  if (qe >= q + 8) {                                                          \
    __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));      \
    __m256 ymm_q = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)q));      \
    ACCUM_FP32_STEP_AVX(ymm_m, ymm_q, ymm_sum_0_0)                            \
    m += 8;                                                                   \
    q += 8;                                                                   \
  }                                                                           \
  MATRIX_FP16_MASK_AVX(m, q, (qe - q), _MASK, ymm_sum, ACCUM_FP32_STEP_AVX)   \
  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));

//! Compute the distance between matrix and query (FP16, M=16, N=1)
#define ACCUM_FP16_16X1_AVX512(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())         \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {        \
      MATRIX_FP16_ITER_16X1_AVX512(m, q, zmm_sum, _mm256_load_si256,  \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  } else {                                                            \
    for (const Float16 *qe = q + dim; q != qe; m += 16, ++q) {        \
      MATRIX_FP16_ITER_16X1_AVX512(m, q, zmm_sum, _mm256_loadu_si256, \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x3f) == 0) {                                 \
    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=2)
#define ACCUM_FP16_16X2_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP16_ITER_16X2_AVX512(m, q, zmm_sum, _mm256_load_si256,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP16_ITER_16X2_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=4)
#define ACCUM_FP16_16X4_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 4, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP16_ITER_16X4_AVX512(m, q, zmm_sum, _mm256_load_si256,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP16_ITER_16X4_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=8)
#define ACCUM_FP16_16X8_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 8, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP16_ITER_16X8_AVX512(m, q, zmm_sum, _mm256_load_si256,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP16_ITER_16X8_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=16, N=16)
#define ACCUM_FP16_16X16_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 16, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {         \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP16_ITER_16X16_AVX512(m, q, zmm_sum, _mm256_load_si256,     \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP16_ITER_16X16_AVX512(m, q, zmm_sum, _mm256_loadu_si256,    \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x3f) == 0) {                                     \
    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                                \
    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=1)
#define ACCUM_FP16_32X1_AVX512(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 1, __m512, zmm_sum, _mm512_setzero_ps())         \
  if (((uintptr_t)m & 0x3f) == 0) {                                   \
    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {        \
      MATRIX_FP16_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_load_si512,  \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  } else {                                                            \
    for (const Float16 *qe = q + dim; q != qe; m += 32, ++q) {        \
      MATRIX_FP16_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_loadu_si512, \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x3f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=2)
#define ACCUM_FP16_32X2_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 2, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x3f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP16_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_load_si512,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP16_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=4)
#define ACCUM_FP16_32X4_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 4, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x3f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP16_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_load_si512,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP16_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=8)
#define ACCUM_FP16_32X8_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 8, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x3f) == 0) {                                      \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP16_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_load_si512,     \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  } else {                                                               \
    for (const Float16 *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP16_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \
                                   ACCUM_FP32_STEP_AVX512)               \
    }                                                                    \
  }                                                                      \
  if (((uintptr_t)out & 0x3f) == 0) {                                    \
    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                               \
    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=16)
#define ACCUM_FP16_32X16_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 16, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x3f) == 0) {                                       \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP16_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_load_si512,     \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP16_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x3f) == 0) {                                     \
    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                                \
    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (FP16, M=32, N=32)
#define ACCUM_FP16_32X32_AVX512(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 32, __m512, zmm_sum, _mm512_setzero_ps())            \
  if (((uintptr_t)m & 0x3f) == 0) {                                       \
    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP16_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_load_si512,     \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  } else {                                                                \
    for (const Float16 *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP16_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_loadu_si512,    \
                                    ACCUM_FP32_STEP_AVX512)               \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x3f) == 0) {                                     \
    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_store_ps, _NORM)     \
  } else {                                                                \
    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)    \
  }

#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
//! Compute the distance between matrix and query (FP16, M=1, N=1)
#define ACCUM_FP16_1X1_NEON(m, q, dim, out, _MASK, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, float16x8_t, v_sum, vdupq_n_f16(0))                  \
  const Float16 *qe = q + dim;                                               \
  const Float16 *qe_aligned = q + ((dim >> 3) << 3);                         \
  for (; q != qe_aligned; m += 8, q += 8) {                                  \
    MATRIX_FP16_ITER_1X1_NEON(m, q, v_sum, ACCUM_FP16_STEP_NEON)             \
  }                                                                          \
  if (qe >= qe_aligned + 4) {                                                \
    float16x8_t v_m =                                                        \
        vcombine_f16(vld1_f16((const float16_t *)m),                         \
                     vreinterpret_f16_u64(vdup_n_u64((uint64_t)(_MASK))));   \
    float16x8_t v_q =                                                        \
        vcombine_f16(vld1_f16((const float16_t *)q),                         \
                     vreinterpret_f16_u64(vdup_n_u64((uint64_t)(_MASK))));   \
    ACCUM_FP16_STEP_NEON(v_m, v_q, v_sum_0_0)                                \
    m += 4;                                                                  \
    q += 4;                                                                  \
  }                                                                          \
  float result = vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v_sum_0_0)), \
                                      vcvt_high_f32_f16(v_sum_0_0)));        \
  switch (qe - q) {                                                          \
    case 3:                                                                  \
      ACCUM_FP16_STEP_GENERAL(m[2], q[2], result)                            \
      /* FALLTHRU */                                                         \
    case 2:                                                                  \
      ACCUM_FP16_STEP_GENERAL(m[1], q[1], result)                            \
      /* FALLTHRU */                                                         \
    case 1:                                                                  \
      ACCUM_FP16_STEP_GENERAL(m[0], q[0], result)                            \
  }                                                                          \
  *out = _NORM(result);

#else
//! Compute the distance between matrix and query (FP16, M=1, N=1)
#define ACCUM_FP16_1X1_NEON(m, q, dim, out, _MASK, _NORM)           \
  MATRIX_VAR_INIT(1, 1, float32x4_t, v_sum, vdupq_n_f32(0))         \
  const Float16 *qe = q + dim;                                      \
  const Float16 *qe_aligned = q + ((dim >> 3) << 3);                \
  for (; q != qe_aligned; m += 8, q += 8) {                         \
    MATRIX_FP16_ITER_1X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  if (qe >= qe_aligned + 4) {                                       \
    float32x4_t v_m = vcvt_f32_f16(vld1_f16((const float16_t *)m)); \
    float32x4_t v_q = vcvt_f32_f16(vld1_f16((const float16_t *)q)); \
    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                       \
    m += 4;                                                         \
    q += 4;                                                         \
  }                                                                 \
  float result = vaddvq_f32(v_sum_0_0);                             \
  switch (qe - q) {                                                 \
    case 3:                                                         \
      ACCUM_FP16_STEP_GENERAL(m[2], q[2], result)                   \
      /* FALLTHRU */                                                \
    case 2:                                                         \
      ACCUM_FP16_STEP_GENERAL(m[1], q[1], result)                   \
      /* FALLTHRU */                                                \
    case 1:                                                         \
      ACCUM_FP16_STEP_GENERAL(m[0], q[0], result)                   \
  }                                                                 \
  *out = _NORM(result);

#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC


================================================
FILE: src/ailego/math/distance_matrix_accum_fp32.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_fp32.i"
#include "matrix_utility.i"

#if !defined(__FMA__)
#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))
#endif  // !__FMA__

#if defined(__AVX512F__) && !defined(__AVX512DQ__)
#define _mm512_and_ps(a, b) \
  _mm512_castsi512_ps(      \
      _mm512_and_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))
#define _mm512_mask_and_ps(src, k, a, b)                                   \
  _mm512_castsi512_ps(_mm512_mask_and_epi32(_mm512_castps_si512(src), (k), \
                                            _mm512_castps_si512(a),        \
                                            _mm512_castps_si512(b)))
#endif  // __AVX512DQ__

#if defined(__ARM_NEON) && !defined(__aarch64__)
static inline float32_t vaddvq_f32(float32x4_t v) {
  float32x2_t s = vadd_f32(vget_low_f32(v), vget_high_f32(v));
  return vget_lane_f32(vpadd_f32(s, s), 0);
}

static inline int32_t vaddvq_s32(int32x4_t v) {
  int32x2_t s = vadd_s32(vget_low_s32(v), vget_high_s32(v));
  return vget_lane_s32(vpadd_s32(s, s), 0);
}
#endif  //__ARM_NEON && !__aarch64__

#if defined(__aarch64__)
#define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A64
#else
#define ACCUM_FP32_2X1_NEON ACCUM_FP32_2X1_NEON_A32
#endif  // __aarch64__

//! Compute the distance between matrix and query (FP32, M=2, N=1)
#define ACCUM_FP32_2X1_SSE(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())              \
  const float *qe_aligned = q + ((dim >> 2) << 2);                      \
  const float *qe = q + dim;                                            \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {         \
    for (; q != qe_aligned; m += 8, q += 4) {                           \
      MATRIX_FP32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_ps,              \
                               ACCUM_FP32_STEP_SSE)                     \
    }                                                                   \
    if (qe >= qe_aligned + 2) {                                         \
      __m128 xmm_m = _mm_load_ps(m);                                    \
      __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                \
      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                    \
      m += 4;                                                           \
      q += 2;                                                           \
    }                                                                   \
  } else {                                                              \
    for (; q != qe_aligned; m += 8, q += 4) {                           \
      MATRIX_FP32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_ps,             \
                               ACCUM_FP32_STEP_SSE)                     \
    }                                                                   \
    if (qe >= qe_aligned + 2) {                                         \
      __m128 xmm_m = _mm_loadu_ps(m);                                   \
      __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                \
      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                    \
      m += 4;                                                           \
      q += 2;                                                           \
    }                                                                   \
  }                                                                     \
  xmm_sum_0_0 = _mm_add_ps(xmm_sum_0_0, xmm_sum_0_1);                   \
  xmm_sum_0_0 =                                                         \
      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0)); \
  if (q != qe) {                                                        \
    __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]);                  \
    __m128 xmm_q = _mm_broadcast_ss(q);                                 \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                      \
  }                                                                     \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (FP32, M=2, N=2)
#define ACCUM_FP32_2X2_SSE(m, q, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())                 \
  const float *qe = q + (dim << 1);                                        \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {            \
    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 4, q += 4) {                                                 \
      MATRIX_FP32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_ps,                 \
                               ACCUM_FP32_STEP_SSE)                        \
    }                                                                      \
  } else {                                                                 \
    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 4, q += 4) {                                                 \
      MATRIX_FP32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_ps,                \
                               ACCUM_FP32_STEP_SSE)                        \
    }                                                                      \
  }                                                                        \
  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),        \
                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));       \
  if (q != qe) {                                                           \
    __m128 xmm_m = _mm_set_ps(m[1], m[0], m[1], m[0]);                     \
    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                     \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=1)
#define ACCUM_FP32_4X1_SSE(m, q, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())                 \
  const float *qe = q + dim;                                               \
  if (((uintptr_t)m & 0xf) == 0) {                                         \
    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                 \
      MATRIX_FP32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_ps,                 \
                               ACCUM_FP32_STEP_SSE)                        \
    }                                                                      \
    if (q != qe) {                                                         \
      __m128 xmm_m = _mm_load_ps(m);                                       \
      __m128 xmm_q = _mm_broadcast_ss(q);                                  \
      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \
    }                                                                      \
  } else {                                                                 \
    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                 \
      MATRIX_FP32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_ps,                \
                               ACCUM_FP32_STEP_SSE)                        \
    }                                                                      \
    if (q != qe) {                                                         \
      __m128 xmm_m = _mm_loadu_ps(m);                                      \
      __m128 xmm_q = _mm_broadcast_ss(q);                                  \
      ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \
    }                                                                      \
  }                                                                        \
  xmm_sum_0_0 = _mm_add_ps(xmm_sum_0_0, xmm_sum_0_1);                      \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=2)
#define ACCUM_FP32_4X2_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(1, 2, __m128, xmm_sum, _mm_setzero_ps())            \
  if (((uintptr_t)m & 0xf) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \
      MATRIX_FP32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_ps,            \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \
      MATRIX_FP32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0xf) == 0) {                                  \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)      \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=4)
#define ACCUM_FP32_4X4_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(1, 4, __m128, xmm_sum, _mm_setzero_ps())            \
  if (((uintptr_t)m & 0xf) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \
      MATRIX_FP32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_ps,            \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \
      MATRIX_FP32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0xf) == 0) {                                  \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)      \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=1)
#define ACCUM_FP32_8X1_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 1, __m128, xmm_sum, _mm_setzero_ps())        \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (const float *qe = q + dim; q != qe; m += 8, ++q) {       \
      MATRIX_FP32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_ps,        \
                               ACCUM_FP32_STEP_SSE)               \
    }                                                             \
  } else {                                                        \
    for (const float *qe = q + dim; q != qe; m += 8, ++q) {       \
      MATRIX_FP32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_ps,       \
                               ACCUM_FP32_STEP_SSE)               \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=2)
#define ACCUM_FP32_8X2_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 2, __m128, xmm_sum, _mm_setzero_ps())            \
  if (((uintptr_t)m & 0xf) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_ps,            \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0xf) == 0) {                                  \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)      \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=4)
#define ACCUM_FP32_8X4_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 4, __m128, xmm_sum, _mm_setzero_ps())            \
  if (((uintptr_t)m & 0xf) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_ps,            \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0xf) == 0) {                                  \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)      \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=8)
#define ACCUM_FP32_8X8_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 8, __m128, xmm_sum, _mm_setzero_ps())            \
  if (((uintptr_t)m & 0xf) == 0) {                                    \
    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_ps,            \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                               ACCUM_FP32_STEP_SSE)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0xf) == 0) {                                  \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)      \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)     \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=1)
#define ACCUM_FP32_16X1_SSE(m, q, dim, out, _NORM)                \
  MATRIX_VAR_INIT(4, 1, __m128, xmm_sum, _mm_setzero_ps())        \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {      \
      MATRIX_FP32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_ps,       \
                                ACCUM_FP32_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {      \
      MATRIX_FP32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_ps,      \
                                ACCUM_FP32_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=2)
#define ACCUM_FP32_16X2_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 2, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=4)
#define ACCUM_FP32_16X4_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 4, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=8)
#define ACCUM_FP32_16X8_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 8, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=16)
#define ACCUM_FP32_16X16_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 16, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                      \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0xf) == 0) {                                    \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                              \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=1)
#define ACCUM_FP32_32X1_SSE(m, q, dim, out, _NORM)                \
  MATRIX_VAR_INIT(8, 1, __m128, xmm_sum, _mm_setzero_ps())        \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {      \
      MATRIX_FP32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_ps,       \
                                ACCUM_FP32_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {      \
      MATRIX_FP32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_ps,      \
                                ACCUM_FP32_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=2)
#define ACCUM_FP32_32X2_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(8, 2, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=4)
#define ACCUM_FP32_32X4_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(8, 4, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=8)
#define ACCUM_FP32_32X8_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(8, 8, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                ACCUM_FP32_STEP_SSE)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=16)
#define ACCUM_FP32_32X16_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(8, 16, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                      \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0xf) == 0) {                                    \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                              \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=32)
#define ACCUM_FP32_32X32_SSE(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(8, 32, __m128, xmm_sum, _mm_setzero_ps())             \
  if (((uintptr_t)m & 0xf) == 0) {                                      \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_ps,            \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_ps,           \
                                 ACCUM_FP32_STEP_SSE)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0xf) == 0) {                                    \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                              \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (FP32, M=2, N=1)
#define ACCUM_FP32_2X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())             \
  const float *qe_aligned = q + ((dim >> 2) << 2);                        \
  const float *qe = q + dim;                                              \
  if (((uintptr_t)m & 0x1f) == 0) {                                       \
    for (; q != qe_aligned; m += 8, q += 4) {                             \
      MATRIX_FP32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_ps,             \
                               ACCUM_FP32_STEP_AVX)                       \
    }                                                                     \
  } else {                                                                \
    for (; q != qe_aligned; m += 8, q += 4) {                             \
      MATRIX_FP32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,            \
                               ACCUM_FP32_STEP_AVX)                       \
    }                                                                     \
  }                                                                       \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),    \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1)); \
  if (qe >= qe_aligned + 2) {                                             \
    __m128 xmm_m = _mm_loadu_ps(m);                                       \
    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                    \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \
    m += 4;                                                               \
    q += 2;                                                               \
  }                                                                       \
  xmm_sum_0_0 =                                                           \
      _mm_add_ps(xmm_sum_0_0, _mm_movehl_ps(xmm_sum_0_0, xmm_sum_0_0));   \
  if (q != qe) {                                                          \
    __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]);                    \
    __m128 xmm_q = _mm_broadcast_ss(q);                                   \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \
  }                                                                       \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (FP32, M=2, N=2)
#define ACCUM_FP32_2X2_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())             \
  const float *qe_aligned = q + ((dim >> 2) << 3);                        \
  const float *qe = q + (dim << 1);                                       \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {         \
    for (; q != qe_aligned; m += 8, q += 8) {                             \
      MATRIX_FP32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_ps,             \
                               ACCUM_FP32_STEP_AVX)                       \
    }                                                                     \
  } else {                                                                \
    for (; q != qe_aligned; m += 8, q += 8) {                             \
      MATRIX_FP32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,            \
                               ACCUM_FP32_STEP_AVX)                       \
    }                                                                     \
  }                                                                       \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),    \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1)); \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),    \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1)); \
  if (qe >= qe_aligned + 4) {                                             \
    __m128 xmm_q = _mm_loadu_ps(q);                                       \
    __m128 xmm_m = _mm_loadu_ps(m);                                       \
    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));        \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_0)                        \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));               \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_p, xmm_sum_0_1)                        \
    m += 4;                                                               \
    q += 4;                                                               \
  }                                                                       \
  xmm_sum_0_0 = _mm_add_ps(_mm_movelh_ps(xmm_sum_0_0, xmm_sum_0_1),       \
                           _mm_movehl_ps(xmm_sum_0_1, xmm_sum_0_0));      \
  if (q != qe) {                                                          \
    __m128 xmm_m = _mm_set_ps(m[1], m[0], m[1], m[0]);                    \
    __m128 xmm_q = _mm_set_ps(q[1], q[1], q[0], q[0]);                    \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                        \
  }                                                                       \
  if (((uintptr_t)out & 0xf) == 0) {                                      \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \
  } else {                                                                \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=1)
#define ACCUM_FP32_4X1_AVX(m, q, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())              \
  const float *qe = q + dim;                                               \
  if (((uintptr_t)m & 0x1f) == 0) {                                        \
    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                 \
      MATRIX_FP32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_ps,              \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  } else {                                                                 \
    for (const float *qe_aligned = q + ((dim >> 1) << 1); q != qe_aligned; \
         m += 8, q += 2) {                                                 \
      MATRIX_FP32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  }                                                                        \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \
  if (q != qe) {                                                           \
    __m128 xmm_m = _mm_loadu_ps(m);                                        \
    __m128 xmm_q = _mm_broadcast_ss(q);                                    \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=2)
#define ACCUM_FP32_4X2_AVX(m, q, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())              \
  const float *qe = q + (dim << 1);                                        \
  if (((uintptr_t)m & 0x1f) == 0) {                                        \
    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 8, q += 4) {                                                 \
      MATRIX_FP32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_ps,              \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  } else {                                                                 \
    for (const float *qe_aligned = q + ((dim >> 1) << 2); q != qe_aligned; \
         m += 8, q += 4) {                                                 \
      MATRIX_FP32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  }                                                                        \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),     \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));  \
  if (q != qe) {                                                           \
    __m128 xmm_m = _mm_loadu_ps(m);                                        \
    __m128 xmm_q = _mm_broadcast_ss(q);                                    \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \
    xmm_q = _mm_broadcast_ss(q + 1);                                       \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_1)                         \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (FP32, M=4, N=4)
#define ACCUM_FP32_4X4_AVX(m, q, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())              \
  const float *qe = q + (dim << 2);                                        \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {          \
    for (const float *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \
         m += 8, q += 8) {                                                 \
      MATRIX_FP32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_ps,              \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  } else {                                                                 \
    for (const float *qe_aligned = q + ((dim >> 1) << 3); q != qe_aligned; \
         m += 8, q += 8) {                                                 \
      MATRIX_FP32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,             \
                               ACCUM_FP32_STEP_AVX)                        \
    }                                                                      \
  }                                                                        \
  __m128 xmm_sum_0_0 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_0),     \
                                  _mm256_extractf128_ps(ymm_sum_0_0, 1));  \
  __m128 xmm_sum_0_1 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_1),     \
                                  _mm256_extractf128_ps(ymm_sum_0_1, 1));  \
  __m128 xmm_sum_0_2 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_2),     \
                                  _mm256_extractf128_ps(ymm_sum_0_2, 1));  \
  __m128 xmm_sum_0_3 = _mm_add_ps(_mm256_castps256_ps128(ymm_sum_0_3),     \
                                  _mm256_extractf128_ps(ymm_sum_0_3, 1));  \
  if (q != qe) {                                                           \
    __m128 xmm_m = _mm_loadu_ps(m);                                        \
    __m128 xmm_q = _mm_broadcast_ss(q);                                    \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_0)                         \
    xmm_q = _mm_broadcast_ss(q + 1);                                       \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_1)                         \
    xmm_q = _mm_broadcast_ss(q + 2);                                       \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_2)                         \
    xmm_q = _mm_broadcast_ss(q + 3);                                       \
    ACCUM_FP32_STEP_SSE(xmm_m, xmm_q, xmm_sum_0_3)                         \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=1)
#define ACCUM_FP32_8X1_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (const float *qe = q + dim; q != qe; m += 8, ++q) {          \
      MATRIX_FP32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_ps,        \
                               ACCUM_FP32_STEP_AVX)                  \
    }                                                                \
  } else {                                                           \
    for (const float *qe = q + dim; q != qe; m += 8, ++q) {          \
      MATRIX_FP32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,       \
                               ACCUM_FP32_STEP_AVX)                  \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=2)
#define ACCUM_FP32_8X2_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(1, 2, __m256, ymm_sum, _mm256_setzero_ps())         \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
      MATRIX_FP32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=4)
#define ACCUM_FP32_8X4_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(1, 4, __m256, ymm_sum, _mm256_setzero_ps())         \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
      MATRIX_FP32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=8, N=8)
#define ACCUM_FP32_8X8_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(1, 8, __m256, ymm_sum, _mm256_setzero_ps())         \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
      MATRIX_FP32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                               ACCUM_FP32_STEP_AVX)                   \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=1)
#define ACCUM_FP32_16X1_AVX(m, q, dim, out, _NORM)                   \
  MATRIX_VAR_INIT(2, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {         \
      MATRIX_FP32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_ps,       \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {         \
      MATRIX_FP32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,      \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=2)
#define ACCUM_FP32_16X2_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 2, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=4)
#define ACCUM_FP32_16X4_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 4, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=8)
#define ACCUM_FP32_16X8_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 8, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=16)
#define ACCUM_FP32_16X16_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(2, 16, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                     \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                              \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=1)
#define ACCUM_FP32_32X1_AVX(m, q, dim, out, _NORM)                   \
  MATRIX_VAR_INIT(4, 1, __m256, ymm_sum, _mm256_setzero_ps())        \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {         \
      MATRIX_FP32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_ps,       \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {         \
      MATRIX_FP32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_ps,      \
                                ACCUM_FP32_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=2)
#define ACCUM_FP32_32X2_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 2, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=4)
#define ACCUM_FP32_32X4_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 4, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=8)
#define ACCUM_FP32_32X8_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 8, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                ACCUM_FP32_STEP_AVX)                   \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=16)
#define ACCUM_FP32_32X16_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 16, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                     \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                              \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=32)
#define ACCUM_FP32_32X32_AVX(m, q, dim, out, _NORM)                     \
  MATRIX_VAR_INIT(4, 32, __m256, ymm_sum, _mm256_setzero_ps())          \
  if (((uintptr_t)m & 0x1f) == 0) {                                     \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_ps,         \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_ps,        \
                                 ACCUM_FP32_STEP_AVX)                   \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x1f) == 0) {                                   \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)    \
  } else {                                                              \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=1)
#define ACCUM_FP32_16X1_AVX512(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 1, __m512, zmm_sum, _mm512_setzero_ps())         \
  if (((uintptr_t)m & 0x3f) == 0) {                                   \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {          \
      MATRIX_FP32_ITER_16X1_AVX512(m, q, zmm_sum, _mm512_load_ps,     \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + dim; q != qe; m += 16, ++q) {          \
      MATRIX_FP32_ITER_16X1_AVX512(m, q, zmm_sum, _mm512_loadu_ps,    \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x3f) == 0) {                                 \
    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(1, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=2)
#define ACCUM_FP32_16X2_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
      MATRIX_FP32_ITER_16X2_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=4)
#define ACCUM_FP32_16X4_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 4, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
      MATRIX_FP32_ITER_16X4_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=8)
#define ACCUM_FP32_16X8_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 8, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
      MATRIX_FP32_ITER_16X8_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=16, N=16)
#define ACCUM_FP32_16X16_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 16, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                     \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
      MATRIX_FP32_ITER_16X16_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x3f) == 0) {                                   \
    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                              \
    MATRIX_VAR_STORE(1, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=1)
#define ACCUM_FP32_32X1_AVX512(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 1, __m512, zmm_sum, _mm512_setzero_ps())         \
  if (((uintptr_t)m & 0x3f) == 0) {                                   \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {          \
      MATRIX_FP32_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_load_ps,     \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  } else {                                                            \
    for (const float *qe = q + dim; q != qe; m += 32, ++q) {          \
      MATRIX_FP32_ITER_32X1_AVX512(m, q, zmm_sum, _mm512_loadu_ps,    \
                                   ACCUM_FP32_STEP_AVX512)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x3f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 1, 16, zmm_sum, out, _mm512_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=2)
#define ACCUM_FP32_32X2_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 2, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
      MATRIX_FP32_ITER_32X2_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 2, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=4)
#define ACCUM_FP32_32X4_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 4, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
      MATRIX_FP32_ITER_32X4_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 4, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=8)
#define ACCUM_FP32_32X8_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 8, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                    \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  } else {                                                             \
    for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
      MATRIX_FP32_ITER_32X8_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                   ACCUM_FP32_STEP_AVX512)             \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x3f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 8, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=16)
#define ACCUM_FP32_32X16_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 16, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                     \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
      MATRIX_FP32_ITER_32X16_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x3f) == 0) {                                   \
    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                              \
    MATRIX_VAR_STORE(2, 16, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=32, N=32)
#define ACCUM_FP32_32X32_AVX512(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 32, __m512, zmm_sum, _mm512_setzero_ps())          \
  if (((uintptr_t)m & 0x3f) == 0) {                                     \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_load_ps,      \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  } else {                                                              \
    for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
      MATRIX_FP32_ITER_32X32_AVX512(m, q, zmm_sum, _mm512_loadu_ps,     \
                                    ACCUM_FP32_STEP_AVX512)             \
    }                                                                   \
  }                                                                     \
  if (((uintptr_t)out & 0x3f) == 0) {                                   \
    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_store_ps, _NORM)   \
  } else {                                                              \
    MATRIX_VAR_STORE(2, 32, 16, zmm_sum, out, _mm512_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (FP32, M=2, N=1) on A64
#define ACCUM_FP32_2X1_NEON_A64(m, q, dim, out, _NORM)                         \
  float32x4_t v_sum = vdupq_n_f32(0);                                          \
  const float *qe_aligned = q + ((dim >> 1) << 1);                             \
  const float *qe = q + dim;                                                   \
  for (; q != qe_aligned; m += 4, q += 2) {                                    \
    MATRIX_FP32_ITER_2X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)               \
  }                                                                            \
  v_sum = vaddq_f32(                                                           \
      vreinterpretq_f32_u64(vdupq_laneq_u64(vreinterpretq_u64_f32(v_sum), 1)), \
      v_sum);                                                                  \
  if (q != qe) {                                                               \
    float32x4_t v_m = vreinterpretq_f32_u64(                                   \
        vdupq_lane_u64(vld1_u64((const uint64_t *)m), 0));                     \
    float32x4_t v_q = vld1q_dup_f32(q);                                        \
    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum)                                      \
  }                                                                            \
  vst1_f32(out, _NORM(vget_low_f32(v_sum)));

//! Compute the distance between matrix and query (FP32, M=2, N=1) on A32
#define ACCUM_FP32_2X1_NEON_A32(m, q, dim, out, _NORM)                   \
  float32x4_t v_sum = vdupq_n_f32(0);                                    \
  const float *qe_aligned = q + ((dim >> 1) << 1);                       \
  const float *qe = q + dim;                                             \
  for (; q != qe_aligned; m += 4, q += 2) {                              \
    MATRIX_FP32_ITER_2X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)         \
  }                                                                      \
  float32x2_t sum = vadd_f32(vget_low_f32(v_sum), vget_high_f32(v_sum)); \
  v_sum = vcombine_f32(sum, sum);                                        \
  if (q != qe) {                                                         \
    float32x4_t v_m = vreinterpretq_f32_u64(                             \
        vdupq_lane_u64(vld1_u64((const uint64_t *)m), 0));               \
    float32x4_t v_q = vld1q_dup_f32(q);                                  \
    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum)                                \
  }                                                                      \
  vst1_f32(out, _NORM(vget_low_f32(v_sum)));

//! Compute the distance between matrix and query (FP32, M=2, N=2)
#define ACCUM_FP32_2X2_NEON(m, q, dim, out, _NORM)                       \
  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))              \
  const float *qe_aligned = q + ((dim >> 1) << 2);                       \
  const float *qe = q + (dim << 1);                                      \
  for (; q != qe_aligned; m += 4, q += 4) {                              \
    MATRIX_FP32_ITER_2X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)         \
  }                                                                      \
  v_sum_0_0 = vaddq_f32(                                                 \
      vcombine_f32(vget_low_f32(v_sum_0_0), vget_low_f32(v_sum_0_1)),    \
      vcombine_f32(vget_high_f32(v_sum_0_0), vget_high_f32(v_sum_0_1))); \
  if (q != qe) {                                                         \
    float32x2_t v_m_0 = vld1_f32(m);                                     \
    float32x2_t v_q_0 = vld1_f32(q);                                     \
    float32x4_t v_m = vcombine_f32(v_m_0, v_m_0);                        \
    float32x4_t v_q =                                                    \
        vcombine_f32(vdup_lane_f32(v_q_0, 0), vdup_lane_f32(v_q_0, 1));  \
    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                            \
  }                                                                      \
  MATRIX_VAR_STORE(1, 1, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=4, N=1)
#define ACCUM_FP32_4X1_NEON(m, q, dim, out, _NORM)               \
  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))      \
  const float *qe_aligned = q + ((dim >> 1) << 1);               \
  const float *qe = q + dim;                                     \
  for (; q != qe_aligned; m += 8, q += 2) {                      \
    MATRIX_FP32_ITER_4X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \
  }                                                              \
  if (q != qe) {                                                 \
    float32x4_t v_m = vld1q_f32(m);                              \
    float32x4_t v_q = vld1q_dup_f32(q);                          \
    ACCUM_FP32_STEP_NEON(v_m, v_q, v_sum_0_0)                    \
  }                                                              \
  v_sum_0_0 = vaddq_f32(v_sum_0_0, v_sum_0_1);                   \
  MATRIX_VAR_STORE(1, 1, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=4, N=2)
#define ACCUM_FP32_4X2_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \
  for (const float *qe = q + (dim << 1); q != qe; m += 4, q += 2) { \
    MATRIX_FP32_ITER_4X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  MATRIX_VAR_STORE(1, 2, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=4, N=4)
#define ACCUM_FP32_4X4_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 4, float32x4_t, v_sum, vdupq_n_f32(0))         \
  for (const float *qe = q + (dim << 2); q != qe; m += 4, q += 4) { \
    MATRIX_FP32_ITER_4X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  MATRIX_VAR_STORE(1, 4, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=8, N=1)
#define ACCUM_FP32_8X1_NEON(m, q, dim, out, _NORM)               \
  MATRIX_VAR_INIT(2, 1, float32x4_t, v_sum, vdupq_n_f32(0))      \
  for (const float *qe = q + dim; q != qe; m += 8, ++q) {        \
    MATRIX_FP32_ITER_8X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \
  }                                                              \
  MATRIX_VAR_STORE(2, 1, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=8, N=2)
#define ACCUM_FP32_8X2_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \
  for (const float *qe = q + (dim << 1); q != qe; m += 8, q += 2) { \
    MATRIX_FP32_ITER_8X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  MATRIX_VAR_STORE(2, 2, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=8, N=4)
#define ACCUM_FP32_8X4_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 4, float32x4_t, v_sum, vdupq_n_f32(0))         \
  for (const float *qe = q + (dim << 2); q != qe; m += 8, q += 4) { \
    MATRIX_FP32_ITER_8X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  MATRIX_VAR_STORE(2, 4, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=8, N=8)
#define ACCUM_FP32_8X8_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(2, 8, float32x4_t, v_sum, vdupq_n_f32(0))         \
  for (const float *qe = q + (dim << 3); q != qe; m += 8, q += 8) { \
    MATRIX_FP32_ITER_8X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                 \
  MATRIX_VAR_STORE(2, 8, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=16, N=1)
#define ACCUM_FP32_16X1_NEON(m, q, dim, out, _NORM)               \
  MATRIX_VAR_INIT(4, 1, float32x4_t, v_sum, vdupq_n_f32(0))       \
  for (const float *qe = q + dim; q != qe; m += 16, ++q) {        \
    MATRIX_FP32_ITER_16X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \
  }                                                               \
  MATRIX_VAR_STORE(4, 1, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=16, N=2)
#define ACCUM_FP32_16X2_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(4, 2, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 1); q != qe; m += 16, q += 2) { \
    MATRIX_FP32_ITER_16X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(4, 2, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=16, N=4)
#define ACCUM_FP32_16X4_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(4, 4, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 2); q != qe; m += 16, q += 4) { \
    MATRIX_FP32_ITER_16X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(4, 4, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=16, N=8)
#define ACCUM_FP32_16X8_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(4, 8, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 3); q != qe; m += 16, q += 8) { \
    MATRIX_FP32_ITER_16X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(4, 8, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=16, N=16)
#define ACCUM_FP32_16X16_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(4, 16, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 4); q != qe; m += 16, q += 16) { \
    MATRIX_FP32_ITER_16X16_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                   \
  MATRIX_VAR_STORE(4, 16, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=1)
#define ACCUM_FP32_32X1_NEON(m, q, dim, out, _NORM)               \
  MATRIX_VAR_INIT(8, 1, float32x4_t, v_sum, vdupq_n_f32(0))       \
  for (const float *qe = q + dim; q != qe; m += 32, ++q) {        \
    MATRIX_FP32_ITER_32X1_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON) \
  }                                                               \
  MATRIX_VAR_STORE(8, 1, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=2)
#define ACCUM_FP32_32X2_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(8, 2, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 1); q != qe; m += 32, q += 2) { \
    MATRIX_FP32_ITER_32X2_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(8, 2, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=4)
#define ACCUM_FP32_32X4_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(8, 4, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 2); q != qe; m += 32, q += 4) { \
    MATRIX_FP32_ITER_32X4_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(8, 4, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=8)
#define ACCUM_FP32_32X8_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(8, 8, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 3); q != qe; m += 32, q += 8) { \
    MATRIX_FP32_ITER_32X8_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                  \
  MATRIX_VAR_STORE(8, 8, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=16)
#define ACCUM_FP32_32X16_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(8, 16, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 4); q != qe; m += 32, q += 16) { \
    MATRIX_FP32_ITER_32X16_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                   \
  MATRIX_VAR_STORE(8, 16, 4, v_sum, out, vst1q_f32, _NORM)

//! Compute the distance between matrix and query (FP32, M=32, N=32)
#define ACCUM_FP32_32X32_NEON(m, q, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(8, 32, float32x4_t, v_sum, vdupq_n_f32(0))          \
  for (const float *qe = q + (dim << 5); q != qe; m += 32, q += 32) { \
    MATRIX_FP32_ITER_32X32_NEON(m, q, v_sum, ACCUM_FP32_STEP_NEON)    \
  }                                                                   \
  MATRIX_VAR_STORE(8, 32, 4, v_sum, out, vst1q_f32, _NORM)


================================================
FILE: src/ailego/math/distance_matrix_accum_int4.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_int32.i"
#include "matrix_utility.i"

//! Compute the distance between matrix and query (INT4, M=2, N=1)
#define ACCUM_INT4_2X1_SSE(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe_aligned = qi + ((dim >> 5) << 2);                       \
  const uint32_t *qe = qi + (dim >> 3);                                      \
  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \
      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_load_si128,             \
                                ACCUM_INT4_STEP_SSE)                         \
    }                                                                        \
    if (qe >= qe_aligned + 2) {                                              \
      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));                \
      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \
      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \
      mi += 4;                                                               \
      qi += 2;                                                               \
    }                                                                        \
  } else {                                                                   \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \
      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \
                                ACCUM_INT4_STEP_SSE)                         \
    }                                                                        \
    if (qe >= qe_aligned + 2) {                                              \
      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));               \
      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \
      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \
      mi += 4;                                                               \
      qi += 2;                                                               \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \
  xmm_sum_0_0 = _mm_add_epi32(                                               \
      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                      \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                                 \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (INT4, M=2, N=2)
#define ACCUM_INT4_2X2_SSE(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe = qi + ((dim >> 3) << 1);                               \
  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                \
         qi != qe_aligned; mi += 4, qi += 4) {                               \
      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_load_si128,             \
                                ACCUM_INT4_STEP_SSE)                         \
    }                                                                        \
  } else {                                                                   \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                \
         qi != qe_aligned; mi += 4, qi += 4) {                               \
      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \
                                ACCUM_INT4_STEP_SSE)                         \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=1)
#define ACCUM_INT4_4X1_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  const uint32_t *qe = qi + (dim >> 3);                           \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);     \
         qi != qe_aligned; mi += 8, qi += 2) {                    \
      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
    if (qi != qe) {                                               \
      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));     \
      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \
      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);     \
         qi != qe_aligned; mi += 8, qi += 2) {                    \
      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
    if (qi != qe) {                                               \
      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));    \
      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \
      ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \
    }                                                             \
  }                                                               \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);          \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=2)
#define ACCUM_INT4_4X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \
         mi += 4, qi += 2) {                                      \
      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \
         mi += 4, qi += 2) {                                      \
      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=4)
#define ACCUM_INT4_4X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \
         mi += 4, qi += 4) {                                      \
      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \
         mi += 4, qi += 4) {                                      \
      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=1)
#define ACCUM_INT4_8X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())            \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \
  if (((uintptr_t)mi & 0xf) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                ACCUM_INT4_STEP_SSE)                      \
    }                                                                     \
  } else {                                                                \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                ACCUM_INT4_STEP_SSE)                      \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0xf) == 0) {                                      \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \
  } else {                                                                \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=2)
#define ACCUM_INT4_8X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \
         mi += 8, qi += 2) {                                      \
      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;   \
         mi += 8, qi += 2) {                                      \
      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=4)
#define ACCUM_INT4_8X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \
         mi += 8, qi += 4) {                                      \
      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;   \
         mi += 8, qi += 4) {                                      \
      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=8)
#define ACCUM_INT4_8X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;   \
         mi += 8, qi += 8) {                                      \
      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;   \
         mi += 8, qi += 8) {                                      \
      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT4_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=1)
#define ACCUM_INT4_16X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())             \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0xf) == 0) {                                        \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                 ACCUM_INT4_STEP_SSE)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                 ACCUM_INT4_STEP_SSE)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=2)
#define ACCUM_INT4_16X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \
         mi += 16, qi += 2) {                                      \
      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \
         mi += 16, qi += 2) {                                      \
      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=4)
#define ACCUM_INT4_16X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \
         mi += 16, qi += 4) {                                      \
      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \
         mi += 16, qi += 4) {                                      \
      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=8)
#define ACCUM_INT4_16X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \
         mi += 16, qi += 8) {                                      \
      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \
         mi += 16, qi += 8) {                                      \
      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=16)
#define ACCUM_INT4_16X16_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \
         mi += 16, qi += 16) {                                      \
      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \
         mi += 16, qi += 16) {                                      \
      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=1)
#define ACCUM_INT4_32X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())             \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0xf) == 0) {                                        \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                 ACCUM_INT4_STEP_SSE)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                 ACCUM_INT4_STEP_SSE)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=2)
#define ACCUM_INT4_32X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \
         mi += 32, qi += 2) {                                      \
      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;    \
         mi += 32, qi += 2) {                                      \
      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=4)
#define ACCUM_INT4_32X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \
         mi += 32, qi += 4) {                                      \
      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;    \
         mi += 32, qi += 4) {                                      \
      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=8)
#define ACCUM_INT4_32X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \
         mi += 32, qi += 8) {                                      \
      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;    \
         mi += 32, qi += 8) {                                      \
      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT4_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=16)
#define ACCUM_INT4_32X16_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \
         mi += 32, qi += 16) {                                      \
      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;     \
         mi += 32, qi += 16) {                                      \
      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=32)
#define ACCUM_INT4_32X32_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;     \
         mi += 32, qi += 32) {                                      \
      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;     \
         mi += 32, qi += 32) {                                      \
      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT4_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=2, N=1)
#define ACCUM_INT4_2X1_AVX(m, q, dim, out, _NORM)                              \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                  \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                  \
  const uint32_t *qe_aligned = qi + ((dim >> 5) << 2);                         \
  const uint32_t *qe = qi + (dim >> 3);                                        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                           \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \
      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,            \
                                ACCUM_INT4_STEP_AVX)                           \
    }                                                                          \
  } else {                                                                     \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \
      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,           \
                                ACCUM_INT4_STEP_AVX)                           \
    }                                                                          \
  }                                                                            \
  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \
                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \
  if (qe >= qe_aligned + 2) {                                                  \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                   \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);                \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \
    mi += 4;                                                                   \
    qi += 2;                                                                   \
  }                                                                            \
  xmm_sum_0 = _mm_add_epi32(                                                   \
      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \
  if (qi != qe) {                                                              \
    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                        \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                                   \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \
  }                                                                            \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));

//! Compute the distance between matrix and query (INT4, M=2, N=2)
#define ACCUM_INT4_2X2_AVX(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe_aligned = qi + ((dim >> 5) << 3);                       \
  const uint32_t *qe = qi + ((dim >> 3) << 1);                               \
  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {          \
    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \
      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,          \
                                ACCUM_INT4_STEP_AVX)                         \
    }                                                                        \
  } else {                                                                   \
    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \
      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,         \
                                ACCUM_INT4_STEP_AVX)                         \
    }                                                                        \
  }                                                                          \
  __m128i xmm_sum_0_0 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \
  __m128i xmm_sum_0_1 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \
  if (qe >= qe_aligned + 4) {                                                \
    __m128i xmm_qi = _mm_loadu_si128((const __m128i *)(qi));                 \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                 \
    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0));     \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_0)                         \
    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));             \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_1)                         \
    mi += 4;                                                                 \
    qi += 4;                                                                 \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=1)
#define ACCUM_INT4_4X1_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + (dim >> 3);                              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);        \
         qi != qe_aligned; mi += 8, qi += 2) {                       \
      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 1);        \
         qi != qe_aligned; mi += 8, qi += 2) {                       \
      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=2)
#define ACCUM_INT4_4X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + ((dim >> 3) << 1);                       \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);        \
         qi != qe_aligned; mi += 8, qi += 4) {                       \
      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);        \
         qi != qe_aligned; mi += 8, qi += 4) {                       \
      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  __m128i xmm_sum_0_1 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
    xmm_qi = _mm_broadcast_si32(qi + 1);                             \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT4, M=4, N=4)
#define ACCUM_INT4_4X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + ((dim >> 3) << 2);                       \
  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {  \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);        \
         qi != qe_aligned; mi += 8, qi += 8) {                       \
      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);        \
         qi != qe_aligned; mi += 8, qi += 8) {                       \
      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  __m128i xmm_sum_0_1 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \
  __m128i xmm_sum_0_2 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),             \
                    _mm256_extracti128_si256(ymm_sum_0_2, 1));       \
  __m128i xmm_sum_0_3 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),             \
                    _mm256_extracti128_si256(ymm_sum_0_3, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
    xmm_qi = _mm_broadcast_si32(qi + 1);                             \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \
    xmm_qi = _mm_broadcast_si32(qi + 2);                             \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_2)                 \
    xmm_qi = _mm_broadcast_si32(qi + 3);                             \
    ACCUM_INT4_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_3)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=1)
#define ACCUM_INT4_8X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())         \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \
  if (((uintptr_t)mi & 0x1f) == 0) {                                      \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                ACCUM_INT4_STEP_AVX)                      \
    }                                                                     \
  } else {                                                                \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                ACCUM_INT4_STEP_AVX)                      \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x1f) == 0) {                                     \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)       \
  } else {                                                                \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=2)
#define ACCUM_INT4_8X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;      \
         mi += 8, qi += 2) {                                         \
      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;      \
         mi += 8, qi += 2) {                                         \
      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=4)
#define ACCUM_INT4_8X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;      \
         mi += 8, qi += 4) {                                         \
      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;      \
         mi += 8, qi += 4) {                                         \
      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=8, N=8)
#define ACCUM_INT4_8X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;      \
         mi += 8, qi += 8) {                                         \
      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;      \
         mi += 8, qi += 8) {                                         \
      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT4_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=1)
#define ACCUM_INT4_16X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                 ACCUM_INT4_STEP_AVX)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                 ACCUM_INT4_STEP_AVX)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0x1f) == 0) {                                      \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \
  } else {                                                                 \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=2)
#define ACCUM_INT4_16X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \
         mi += 16, qi += 2) {                                         \
      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \
         mi += 16, qi += 2) {                                         \
      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=4)
#define ACCUM_INT4_16X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \
         mi += 16, qi += 4) {                                         \
      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \
         mi += 16, qi += 4) {                                         \
      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=8)
#define ACCUM_INT4_16X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \
         mi += 16, qi += 8) {                                         \
      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \
         mi += 16, qi += 8) {                                         \
      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=16, N=16)
#define ACCUM_INT4_16X16_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \
         mi += 16, qi += 16) {                                         \
      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \
         mi += 16, qi += 16) {                                         \
      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=1)
#define ACCUM_INT4_32X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                 ACCUM_INT4_STEP_AVX)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 3); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                 ACCUM_INT4_STEP_AVX)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0x1f) == 0) {                                      \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \
  } else {                                                                 \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=2)
#define ACCUM_INT4_32X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \
         mi += 32, qi += 2) {                                         \
      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 1); qi != qe;       \
         mi += 32, qi += 2) {                                         \
      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=4)
#define ACCUM_INT4_32X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \
         mi += 32, qi += 4) {                                         \
      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 2); qi != qe;       \
         mi += 32, qi += 4) {                                         \
      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=8)
#define ACCUM_INT4_32X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \
         mi += 32, qi += 8) {                                         \
      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 3) << 3); qi != qe;       \
         mi += 32, qi += 8) {                                         \
      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT4_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=16)
#define ACCUM_INT4_32X16_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \
         mi += 32, qi += 16) {                                         \
      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 3) << 4); qi != qe;        \
         mi += 32, qi += 16) {                                         \
      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT4, M=32, N=32)
#define ACCUM_INT4_32X32_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;        \
         mi += 32, qi += 32) {                                         \
      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 3) << 5); qi != qe;        \
         mi += 32, qi += 32) {                                         \
      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT4_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }


================================================
FILE: src/ailego/math/distance_matrix_accum_int8.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_int32.i"
#include "matrix_utility.i"

//! Compute the distance between matrix and query (INT8, M=2, N=1)
#define ACCUM_INT8_2X1_SSE(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                       \
  const uint32_t *qe = qi + (dim >> 2);                                      \
  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \
      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_load_si128,             \
                                ACCUM_INT8_STEP_SSE)                         \
    }                                                                        \
    if (qe >= qe_aligned + 2) {                                              \
      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));                \
      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \
      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \
      mi += 4;                                                               \
      qi += 2;                                                               \
    }                                                                        \
  } else {                                                                   \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                             \
      MATRIX_INT32_ITER_2X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \
                                ACCUM_INT8_STEP_SSE)                         \
    }                                                                        \
    if (qe >= qe_aligned + 2) {                                              \
      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));               \
      __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);            \
      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                       \
      mi += 4;                                                               \
      qi += 2;                                                               \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \
  xmm_sum_0_0 = _mm_add_epi32(                                               \
      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                      \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                                 \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (INT8, M=2, N=2)
#define ACCUM_INT8_2X2_SSE(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe = qi + ((dim >> 2) << 1);                               \
  if (((uintptr_t)mi & 0xf) == 0 && ((uintptr_t)qi & 0xf) == 0) {            \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);                \
         qi != qe_aligned; mi += 4, qi += 4) {                               \
      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_load_si128,             \
                                ACCUM_INT8_STEP_SSE)                         \
    }                                                                        \
  } else {                                                                   \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);                \
         qi != qe_aligned; mi += 4, qi += 4) {                               \
      MATRIX_INT32_ITER_2X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128,            \
                                ACCUM_INT8_STEP_SSE)                         \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=1)
#define ACCUM_INT8_4X1_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  const uint32_t *qe = qi + (dim >> 2);                           \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);     \
         qi != qe_aligned; mi += 8, qi += 2) {                    \
      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
    if (qi != qe) {                                               \
      __m128i xmm_mi = _mm_load_si128((const __m128i *)(mi));     \
      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \
      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);     \
         qi != qe_aligned; mi += 8, qi += 2) {                    \
      MATRIX_INT32_ITER_4X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
    if (qi != qe) {                                               \
      __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));    \
      __m128i xmm_qi = _mm_broadcast_si32(qi);                    \
      ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)            \
    }                                                             \
  }                                                               \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);          \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=2)
#define ACCUM_INT8_4X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \
         mi += 4, qi += 2) {                                      \
      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \
         mi += 4, qi += 2) {                                      \
      MATRIX_INT32_ITER_4X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=4)
#define ACCUM_INT8_4X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \
         mi += 4, qi += 4) {                                      \
      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \
         mi += 4, qi += 4) {                                      \
      MATRIX_INT32_ITER_4X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=1)
#define ACCUM_INT8_8X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())            \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \
  if (((uintptr_t)mi & 0xf) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                ACCUM_INT8_STEP_SSE)                      \
    }                                                                     \
  } else {                                                                \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                ACCUM_INT8_STEP_SSE)                      \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0xf) == 0) {                                      \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)          \
  } else {                                                                \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)         \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=2)
#define ACCUM_INT8_8X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \
         mi += 8, qi += 2) {                                      \
      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;   \
         mi += 8, qi += 2) {                                      \
      MATRIX_INT32_ITER_8X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=4)
#define ACCUM_INT8_8X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \
         mi += 8, qi += 4) {                                      \
      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;   \
         mi += 8, qi += 4) {                                      \
      MATRIX_INT32_ITER_8X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=8)
#define ACCUM_INT8_8X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);     \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);     \
  if (((uintptr_t)mi & 0xf) == 0) {                               \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;   \
         mi += 8, qi += 8) {                                      \
      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  } else {                                                        \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;   \
         mi += 8, qi += 8) {                                      \
      MATRIX_INT32_ITER_8X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                ACCUM_INT8_STEP_SSE)              \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=1)
#define ACCUM_INT8_16X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())             \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0xf) == 0) {                                        \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                 ACCUM_INT8_STEP_SSE)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                 ACCUM_INT8_STEP_SSE)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=2)
#define ACCUM_INT8_16X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \
         mi += 16, qi += 2) {                                      \
      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \
         mi += 16, qi += 2) {                                      \
      MATRIX_INT32_ITER_16X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=4)
#define ACCUM_INT8_16X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \
         mi += 16, qi += 4) {                                      \
      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \
         mi += 16, qi += 4) {                                      \
      MATRIX_INT32_ITER_16X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=8)
#define ACCUM_INT8_16X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \
         mi += 16, qi += 8) {                                      \
      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \
         mi += 16, qi += 8) {                                      \
      MATRIX_INT32_ITER_16X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=16)
#define ACCUM_INT8_16X16_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \
         mi += 16, qi += 16) {                                      \
      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \
         mi += 16, qi += 16) {                                      \
      MATRIX_INT32_ITER_16X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=1)
#define ACCUM_INT8_32X1_SSE(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())             \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0xf) == 0) {                                        \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_load_si128,          \
                                 ACCUM_INT8_STEP_SSE)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_SSE(mi, qi, xmm_sum, _mm_loadu_si128,         \
                                 ACCUM_INT8_STEP_SSE)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0xf) == 0) {                                       \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)           \
  } else {                                                                 \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)          \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=2)
#define ACCUM_INT8_32X2_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \
         mi += 32, qi += 2) {                                      \
      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;    \
         mi += 32, qi += 2) {                                      \
      MATRIX_INT32_ITER_32X2_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=4)
#define ACCUM_INT8_32X4_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \
         mi += 32, qi += 4) {                                      \
      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;    \
         mi += 32, qi += 4) {                                      \
      MATRIX_INT32_ITER_32X4_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=8)
#define ACCUM_INT8_32X8_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);      \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);      \
  if (((uintptr_t)mi & 0xf) == 0) {                                \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \
         mi += 32, qi += 8) {                                      \
      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  } else {                                                         \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;    \
         mi += 32, qi += 8) {                                      \
      MATRIX_INT32_ITER_32X8_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                 ACCUM_INT8_STEP_SSE)              \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=16)
#define ACCUM_INT8_32X16_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \
         mi += 32, qi += 16) {                                      \
      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;     \
         mi += 32, qi += 16) {                                      \
      MATRIX_INT32_ITER_32X16_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=32)
#define ACCUM_INT8_32X32_SSE(m, q, dim, out, _NORM)                 \
  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);       \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);       \
  if (((uintptr_t)mi & 0xf) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;     \
         mi += 32, qi += 32) {                                      \
      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_load_si128,  \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  } else {                                                          \
    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;     \
         mi += 32, qi += 32) {                                      \
      MATRIX_INT32_ITER_32X32_SSE(mi, qi, xmm_sum, _mm_loadu_si128, \
                                  ACCUM_INT8_STEP_SSE)              \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=2, N=1)
#define ACCUM_INT8_2X1_AVX(m, q, dim, out, _NORM)                              \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                  \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                  \
  const uint32_t *qe_aligned = qi + ((dim >> 4) << 2);                         \
  const uint32_t *qe = qi + (dim >> 2);                                        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                           \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \
      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,            \
                                ACCUM_INT8_STEP_AVX)                           \
    }                                                                          \
  } else {                                                                     \
    for (; qi != qe_aligned; mi += 8, qi += 4) {                               \
      MATRIX_INT32_ITER_2X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,           \
                                ACCUM_INT8_STEP_AVX)                           \
    }                                                                          \
  }                                                                            \
  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \
                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \
  if (qe >= qe_aligned + 2) {                                                  \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                   \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);                \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \
    mi += 4;                                                                   \
    qi += 2;                                                                   \
  }                                                                            \
  xmm_sum_0 = _mm_add_epi32(                                                   \
      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \
  if (qi != qe) {                                                              \
    __m128i xmm_mi = _mm_set_epi32(0, 0, mi[1], mi[0]);                        \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                                   \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0)                             \
  }                                                                            \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));

//! Compute the distance between matrix and query (INT8, M=2, N=2)
#define ACCUM_INT8_2X2_AVX(m, q, dim, out, _NORM)                            \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);                \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);                \
  const uint32_t *qe_aligned = qi + ((dim >> 4) << 3);                       \
  const uint32_t *qe = qi + ((dim >> 2) << 1);                               \
  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {          \
    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \
      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,          \
                                ACCUM_INT8_STEP_AVX)                         \
    }                                                                        \
  } else {                                                                   \
    for (; qi != qe_aligned; mi += 8, qi += 8) {                             \
      MATRIX_INT32_ITER_2X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,         \
                                ACCUM_INT8_STEP_AVX)                         \
    }                                                                        \
  }                                                                          \
  __m128i xmm_sum_0_0 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \
  __m128i xmm_sum_0_1 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \
  if (qe >= qe_aligned + 4) {                                                \
    __m128i xmm_qi = _mm_loadu_si128((const __m128i *)(qi));                 \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));                 \
    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0));     \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_0)                         \
    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));             \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_pi, xmm_sum_0_1)                         \
    mi += 4;                                                                 \
    qi += 4;                                                                 \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (qi != qe) {                                                            \
    __m128i xmm_mi = _mm_set_epi32(mi[1], mi[0], mi[1], mi[0]);              \
    __m128i xmm_qi = _mm_set_epi32(qi[1], qi[1], qi[0], qi[0]);              \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                         \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=1)
#define ACCUM_INT8_4X1_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + (dim >> 2);                              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);        \
         qi != qe_aligned; mi += 8, qi += 2) {                       \
      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 1);        \
         qi != qe_aligned; mi += 8, qi += 2) {                       \
      MATRIX_INT32_ITER_4X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=2)
#define ACCUM_INT8_4X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + ((dim >> 2) << 1);                       \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);        \
         qi != qe_aligned; mi += 8, qi += 4) {                       \
      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 2);        \
         qi != qe_aligned; mi += 8, qi += 4) {                       \
      MATRIX_INT32_ITER_4X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  __m128i xmm_sum_0_1 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
    xmm_qi = _mm_broadcast_si32(qi + 1);                             \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT8, M=4, N=4)
#define ACCUM_INT8_4X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  const uint32_t *qe = qi + ((dim >> 2) << 2);                       \
  if (((uintptr_t)mi & 0x1f) == 0 && ((uintptr_t)qi & 0x1f) == 0) {  \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 3);        \
         qi != qe_aligned; mi += 8, qi += 8) {                       \
      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe_aligned = qi + ((dim >> 3) << 3);        \
         qi != qe_aligned; mi += 8, qi += 8) {                       \
      MATRIX_INT32_ITER_4X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  __m128i xmm_sum_0_0 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),             \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));       \
  __m128i xmm_sum_0_1 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),             \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));       \
  __m128i xmm_sum_0_2 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),             \
                    _mm256_extracti128_si256(ymm_sum_0_2, 1));       \
  __m128i xmm_sum_0_3 =                                              \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),             \
                    _mm256_extracti128_si256(ymm_sum_0_3, 1));       \
  if (qi != qe) {                                                    \
    __m128i xmm_mi = _mm_loadu_si128((const __m128i *)(mi));         \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                         \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_0)                 \
    xmm_qi = _mm_broadcast_si32(qi + 1);                             \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_1)                 \
    xmm_qi = _mm_broadcast_si32(qi + 2);                             \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_2)                 \
    xmm_qi = _mm_broadcast_si32(qi + 3);                             \
    ACCUM_INT8_STEP_SSE(xmm_mi, xmm_qi, xmm_sum_0_3)                 \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)     \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)    \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=1)
#define ACCUM_INT8_8X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())         \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);             \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);             \
  if (((uintptr_t)mi & 0x1f) == 0) {                                      \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                ACCUM_INT8_STEP_AVX)                      \
    }                                                                     \
  } else {                                                                \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 8, ++qi) { \
      MATRIX_INT32_ITER_8X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                ACCUM_INT8_STEP_AVX)                      \
    }                                                                     \
  }                                                                       \
  if (((uintptr_t)out & 0x1f) == 0) {                                     \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)       \
  } else {                                                                \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=2)
#define ACCUM_INT8_8X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;      \
         mi += 8, qi += 2) {                                         \
      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;      \
         mi += 8, qi += 2) {                                         \
      MATRIX_INT32_ITER_8X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=4)
#define ACCUM_INT8_8X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;      \
         mi += 8, qi += 4) {                                         \
      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;      \
         mi += 8, qi += 4) {                                         \
      MATRIX_INT32_ITER_8X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=8, N=8)
#define ACCUM_INT8_8X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);        \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);        \
  if (((uintptr_t)mi & 0x1f) == 0) {                                 \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;      \
         mi += 8, qi += 8) {                                         \
      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  } else {                                                           \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;      \
         mi += 8, qi += 8) {                                         \
      MATRIX_INT32_ITER_8X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                ACCUM_INT8_STEP_AVX)                 \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=1)
#define ACCUM_INT8_16X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                 ACCUM_INT8_STEP_AVX)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 16, ++qi) { \
      MATRIX_INT32_ITER_16X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                 ACCUM_INT8_STEP_AVX)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0x1f) == 0) {                                      \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \
  } else {                                                                 \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=2)
#define ACCUM_INT8_16X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \
         mi += 16, qi += 2) {                                         \
      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \
         mi += 16, qi += 2) {                                         \
      MATRIX_INT32_ITER_16X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=4)
#define ACCUM_INT8_16X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \
         mi += 16, qi += 4) {                                         \
      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \
         mi += 16, qi += 4) {                                         \
      MATRIX_INT32_ITER_16X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=8)
#define ACCUM_INT8_16X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \
         mi += 16, qi += 8) {                                         \
      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \
         mi += 16, qi += 8) {                                         \
      MATRIX_INT32_ITER_16X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=16, N=16)
#define ACCUM_INT8_16X16_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \
         mi += 16, qi += 16) {                                         \
      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \
         mi += 16, qi += 16) {                                         \
      MATRIX_INT32_ITER_16X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=1)
#define ACCUM_INT8_32X1_AVX(m, q, dim, out, _NORM)                         \
  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())          \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);              \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);              \
  if (((uintptr_t)mi & 0x1f) == 0) {                                       \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_load_si256,       \
                                 ACCUM_INT8_STEP_AVX)                      \
    }                                                                      \
  } else {                                                                 \
    for (const uint32_t *qe = qi + (dim >> 2); qi != qe; mi += 32, ++qi) { \
      MATRIX_INT32_ITER_32X1_AVX(mi, qi, ymm_sum, _mm256_loadu_si256,      \
                                 ACCUM_INT8_STEP_AVX)                      \
    }                                                                      \
  }                                                                        \
  if (((uintptr_t)out & 0x1f) == 0) {                                      \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)        \
  } else {                                                                 \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)       \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=2)
#define ACCUM_INT8_32X2_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \
         mi += 32, qi += 2) {                                         \
      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 1); qi != qe;       \
         mi += 32, qi += 2) {                                         \
      MATRIX_INT32_ITER_32X2_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=4)
#define ACCUM_INT8_32X4_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \
         mi += 32, qi += 4) {                                         \
      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 2); qi != qe;       \
         mi += 32, qi += 4) {                                         \
      MATRIX_INT32_ITER_32X4_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=8)
#define ACCUM_INT8_32X8_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);         \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);         \
  if (((uintptr_t)mi & 0x1f) == 0) {                                  \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \
         mi += 32, qi += 8) {                                         \
      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  } else {                                                            \
    for (const uint32_t *qe = qi + ((dim >> 2) << 3); qi != qe;       \
         mi += 32, qi += 8) {                                         \
      MATRIX_INT32_ITER_32X8_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                 ACCUM_INT8_STEP_AVX)                 \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=16)
#define ACCUM_INT8_32X16_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \
         mi += 32, qi += 16) {                                         \
      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 2) << 4); qi != qe;        \
         mi += 32, qi += 16) {                                         \
      MATRIX_INT32_ITER_32X16_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (INT8, M=32, N=32)
#define ACCUM_INT8_32X32_AVX(m, q, dim, out, _NORM)                    \
  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())     \
  const uint32_t *qi = reinterpret_cast<const uint32_t *>(q);          \
  const uint32_t *mi = reinterpret_cast<const uint32_t *>(m);          \
  if (((uintptr_t)mi & 0x1f) == 0) {                                   \
    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;        \
         mi += 32, qi += 32) {                                         \
      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_load_si256,  \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  } else {                                                             \
    for (const uint32_t *qe = qi + ((dim >> 2) << 5); qi != qe;        \
         mi += 32, qi += 32) {                                         \
      MATRIX_INT32_ITER_32X32_AVX(mi, qi, ymm_sum, _mm256_loadu_si256, \
                                  ACCUM_INT8_STEP_AVX)                 \
    }                                                                  \
  }                                                                    \
  if (((uintptr_t)out & 0x1f) == 0) {                                  \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)   \
  } else {                                                             \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM)  \
  }


================================================
FILE: src/ailego/math/distance_matrix_euclidean_utility.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Calculate sum of squared difference (GENERAL)
#define SSD_FP32_GENERAL(m, q, sum) \
  {                                 \
    float x = m - q;                \
    sum += (x * x);                 \
  }

//! Calculate sum of squared difference (SSE)
#define SSD_FP32_SSE(xmm_m, xmm_q, xmm_sum)        \
  {                                                \
    __m128 xmm_d = _mm_sub_ps(xmm_m, xmm_q);       \
    xmm_sum = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum); \
  }

//! Calculate sum of squared difference (AVX)
#define SSD_FP32_AVX(ymm_m, ymm_q, ymm_sum)           \
  {                                                   \
    __m256 ymm_d = _mm256_sub_ps(ymm_m, ymm_q);       \
    ymm_sum = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum); \
  }

//! Calculate sum of squared difference (NEON)
#define SSD_FP32_NEON(v_m, v_q, v_sum)     \
  {                                        \
    float32x4_t v_d = vsubq_f32(v_m, v_q); \
    v_sum = vfmaq_f32(v_sum, v_d, v_d);    \
  }

//! Calculate sum of squared difference (GENERAL)
#define SSD_FP16_GENERAL(m, q, sum) \
  {                                 \
    float x = m - q;                \
    sum += (x * x);                 \
  }

//! Calculate sum of squared difference (NEON)
#define SSD_FP16_NEON(v_m, v_q, v_sum)     \
  {                                        \
    float16x8_t v_d = vsubq_f16(v_m, v_q); \
    v_sum = vfmaq_f16(v_sum, v_d, v_d);    \
  }

//! Calculate sum of squared difference (AVX512)
#define SSD_FP32_AVX512(zmm_m, zmm_q, zmm_sum)        \
  {                                                   \
    __m512 zmm_d = _mm512_sub_ps(zmm_m, zmm_q);       \
    zmm_sum = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum); \
  }

//! Calculate sum of squared difference (GENERAL)
#define SSD_INT4_GENERAL(m, q, sum)                                       \
  sum += Int4SquaredDiffTable[(((m) << 4) & 0xf0) | (((q) >> 0) & 0xf)] + \
         Int4SquaredDiffTable[(((m) >> 0) & 0xf0) | (((q) >> 4) & 0xf)];


#if defined(__SSE4_1__)
static const __m128i MASK_INT4_SSE = _mm_set1_epi32(0xf0f0f0f0);
static const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);
#endif  // __SSE4_1__

//! Compute the square root of value (SSE)
#define SQRT_FP32_SSE(v, ...) _mm_sqrt_ps(_mm_cvtepi32_ps(v))

#if defined(__AVX2__)
static const __m256i MASK_INT4_AVX = _mm256_set1_epi32(0xf0f0f0f0);
static const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);
#endif  // __AVX2__

//! Calculate sum of squared difference (SSE)
#define SSD_INT4_SSE(xmm_m, xmm_q, xmm_sum)                                  \
  {                                                                          \
    __m128i xmm_lhs =                                                        \
        _mm_and_si128(_mm_slli_epi32((xmm_m), 4), MASK_INT4_SSE);            \
    __m128i xmm_rhs =                                                        \
        _mm_and_si128(_mm_slli_epi32((xmm_q), 4), MASK_INT4_SSE);            \
    xmm_lhs = _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),    \
                                          _mm_min_epi8(xmm_lhs, xmm_rhs)),   \
                             4);                                             \
    xmm_sum = _mm_add_epi32(                                                 \
        _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs, xmm_lhs), ONES_INT16_SSE), \
        xmm_sum);                                                            \
    xmm_lhs = _mm_and_si128((xmm_m), MASK_INT4_SSE);                         \
    xmm_rhs = _mm_and_si128((xmm_q), MASK_INT4_SSE);                         \
    xmm_lhs = _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),    \
                                          _mm_min_epi8(xmm_lhs, xmm_rhs)),   \
                             4);                                             \
    xmm_sum = _mm_add_epi32(                                                 \
        _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs, xmm_lhs), ONES_INT16_SSE), \
        xmm_sum);                                                            \
  }

//! Compute the distance between matrix and query
#define SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)                       \
  {                                                                        \
    __m128i xmm_lhs_0 =                                                    \
        _mm_and_si128(_mm_slli_epi32((xmm_lhs), 4), MASK_INT4_SSE);        \
    __m128i xmm_rhs_0 =                                                    \
        _mm_and_si128(_mm_slli_epi32((xmm_rhs), 4), MASK_INT4_SSE);        \
    __m128i xmm_lhs_1 = _mm_and_si128((xmm_lhs), MASK_INT4_SSE);           \
    __m128i xmm_rhs_1 = _mm_and_si128((xmm_rhs), MASK_INT4_SSE);           \
    xmm_lhs_0 =                                                            \
        _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),    \
                                    _mm_min_epi8(xmm_lhs_0, xmm_rhs_0)),   \
                       4);                                                 \
    xmm_rhs_0 =                                                            \
        _mm_srli_epi32(_mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),    \
                                    _mm_min_epi8(xmm_lhs_1, xmm_rhs_1)),   \
                       4);                                                 \
    xmm_lhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_lhs_0, xmm_lhs_0),    \
                               ONES_INT16_SSE);                            \
    xmm_rhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_rhs_0),    \
                               ONES_INT16_SSE);                            \
    xmm_sum = _mm_add_epi32(_mm_add_epi32(xmm_lhs_0, xmm_rhs_0), xmm_sum); \
  }

//! Calculate sum of squared difference (AVX)
#define SSD_INT4_AVX(ymm_m, ymm_q, ymm_sum)                                   \
  {                                                                           \
    __m256i ymm_lhs =                                                         \
        _mm256_and_si256(_mm256_slli_epi32((ymm_m), 4), MASK_INT4_AVX);       \
    __m256i ymm_rhs =                                                         \
        _mm256_and_si256(_mm256_slli_epi32((ymm_q), 4), MASK_INT4_AVX);       \
    ymm_lhs =                                                                 \
        _mm256_srli_epi32(_mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),  \
                                          _mm256_min_epi8(ymm_lhs, ymm_rhs)), \
                          4);                                                 \
    ymm_sum = _mm256_add_epi32(                                               \
        _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs, ymm_lhs),             \
                          ONES_INT16_AVX),                                    \
        ymm_sum);                                                             \
    ymm_lhs = _mm256_and_si256((ymm_m), MASK_INT4_AVX);                       \
    ymm_rhs = _mm256_and_si256((ymm_q), MASK_INT4_AVX);                       \
    ymm_lhs =                                                                 \
        _mm256_srli_epi32(_mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),  \
                                          _mm256_min_epi8(ymm_lhs, ymm_rhs)), \
                          4);                                                 \
    ymm_sum = _mm256_add_epi32(                                               \
        _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs, ymm_lhs),             \
                          ONES_INT16_AVX),                                    \
        ymm_sum);                                                             \
  }

//! Compute the distance between matrix and query
#define SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)                          \
  {                                                                           \
    __m256i ymm_lhs_0 =                                                       \
        _mm256_and_si256(_mm256_slli_epi32((ymm_lhs), 4), MASK_INT4_AVX);     \
    __m256i ymm_rhs_0 =                                                       \
        _mm256_and_si256(_mm256_slli_epi32((ymm_rhs), 4), MASK_INT4_AVX);     \
    __m256i ymm_lhs_1 = _mm256_and_si256((ymm_lhs), MASK_INT4_AVX);           \
    __m256i ymm_rhs_1 = _mm256_and_si256((ymm_rhs), MASK_INT4_AVX);           \
    ymm_lhs_0 = _mm256_srli_epi32(                                            \
        _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),                \
                        _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0)),               \
        4);                                                                   \
    ymm_rhs_0 = _mm256_srli_epi32(                                            \
        _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),                \
                        _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1)),               \
        4);                                                                   \
    ymm_lhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_lhs_0, ymm_lhs_0), \
                                  ONES_INT16_AVX);                            \
    ymm_rhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_rhs_0), \
                                  ONES_INT16_AVX);                            \
    ymm_sum =                                                                 \
        _mm256_add_epi32(_mm256_add_epi32(ymm_lhs_0, ymm_rhs_0), ymm_sum);    \
  }

//! Calculate sum of squared difference (GENERAL)
#define SSD_INT8_GENERAL(m, q, sum)   \
  {                                   \
    int32_t x = m - q;                \
    sum += static_cast<float>(x * x); \
  }

//! Calculate sum of squared difference (SSE)
#define SSD_INT8_SSE(xmm_m, xmm_q, xmm_sum)                                \
  {                                                                        \
    xmm_sum = _mm_add_epi32(                                               \
        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_m),              \
                                         _mm_sign_epi8(xmm_m, xmm_m)),     \
                       ONES_INT16_SSE),                                    \
        xmm_sum);                                                          \
    xmm_sum = _mm_add_epi32(                                               \
        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_q),              \
                                         _mm_sign_epi8(xmm_q, xmm_q)),     \
                       ONES_INT16_SSE),                                    \
        xmm_sum);                                                          \
    xmm_sum = _mm_sub_epi32(                                               \
        xmm_sum,                                                           \
        _mm_slli_epi32(                                                    \
            _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_q),          \
                                             _mm_sign_epi8(xmm_m, xmm_q)), \
                           ONES_INT16_SSE),                                \
            1));                                                           \
  }

//! Calculate sum of squared difference (AVX)
#define SSD_INT8_AVX(ymm_m, ymm_q, ymm_sum)                                    \
  {                                                                            \
    ymm_sum = _mm256_add_epi32(                                                \
        _mm256_madd_epi16(                                                     \
            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_m),                       \
                                 _mm256_sign_epi8(ymm_m, ymm_m)),              \
            ONES_INT16_AVX),                                                   \
        ymm_sum);                                                              \
    ymm_sum = _mm256_add_epi32(                                                \
        _mm256_madd_epi16(                                                     \
            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),                       \
                                 _mm256_sign_epi8(ymm_q, ymm_q)),              \
            ONES_INT16_AVX),                                                   \
        ymm_sum);                                                              \
    ymm_sum = _mm256_sub_epi32(                                                \
        ymm_sum, _mm256_slli_epi32(                                            \
                     _mm256_madd_epi16(                                        \
                         _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),          \
                                              _mm256_sign_epi8(ymm_m, ymm_q)), \
                         ONES_INT16_AVX),                                      \
                     1));                                                      \
  }

//! Compute the square root of value (AVX)
#define SQRT_FP32_AVX(v, ...) _mm256_sqrt_ps(_mm256_cvtepi32_ps(v))

//! Compute the square root of value (AVX512)
#define SQRT_FP32_AVX512(v, ...) _mm512_sqrt_ps(_mm512_cvtepi32_ps(v))

#define ACCUM_FP32_STEP_SSE SSD_FP32_SSE
#define ACCUM_FP32_STEP_AVX SSD_FP32_AVX

#define ACCUM_FP32_STEP_AVX512 SSD_FP32_AVX512
#define ACCUM_FP16_STEP_GENERAL SSD_FP16_GENERAL

#define ACCUM_FP16_STEP_NEON SSD_FP16_NEON
#define ACCUM_FP32_STEP_NEON SSD_FP32_NEON

#define ACCUM_INT4_STEP_SSE SSD_INT4_SSE
#define ACCUM_INT4_STEP_AVX SSD_INT4_AVX
#define ACCUM_INT8_STEP_SSE SSD_INT8_SSE
#define ACCUM_INT8_STEP_AVX SSD_INT8_AVX

================================================
FILE: src/ailego/math/distance_matrix_fp16.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "matrix_define.i"
#include <iostream> 
#if !defined(__AVX__)
#define _mm_broadcast_si32(a) _mm_castps_si128(_mm_load1_ps((const float *)(a)))
#else
#define _mm_broadcast_si32(a) \
  _mm_castps_si128(_mm_broadcast_ss((const float *)(a)))
#define _mm256_broadcast_si32(a) \
  _mm256_castps_si256(_mm256_broadcast_ss((const float *)(a)))
#endif  // !__AVX__

//! Mask process of computing distance (FP16)
#define MATRIX_FP16_MASK_AVX(lhs, rhs, cnt, _MASK, _RES, _PROC)              \
  switch (cnt) {                                                             \
    case 7: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), *((const short *)(lhs) + 6),                       \
          *((const short *)(lhs) + 5), *((const short *)(lhs) + 4),          \
          *((const short *)(lhs) + 3), *((const short *)(lhs) + 2),          \
          *((const short *)(lhs) + 1), *((const short *)(lhs))));            \
      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), *((const short *)(rhs) + 6),                       \
          *((const short *)(rhs) + 5), *((const short *)(rhs) + 4),          \
          *((const short *)(rhs) + 3), *((const short *)(rhs) + 2),          \
          *((const short *)(rhs) + 1), *((const short *)(rhs))));            \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 6: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi32((int)(_MASK), *((const int *)(lhs) + 2),             \
                        *((const int *)(lhs) + 1), *((const int *)(lhs))));  \
      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi32((int)(_MASK), *((const int *)(rhs) + 2),             \
                        *((const int *)(rhs) + 1), *((const int *)(rhs))));  \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 5: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), (short)(_MASK), (short)(_MASK),                    \
          *((const short *)(lhs) + 4), *((const short *)(lhs) + 3),          \
          *((const short *)(lhs) + 2), *((const short *)(lhs) + 1),          \
          *((const short *)(lhs))));                                         \
      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), (short)(_MASK), (short)(_MASK),                    \
          *((const short *)(rhs) + 4), *((const short *)(rhs) + 3),          \
          *((const short *)(rhs) + 2), *((const short *)(rhs) + 1),          \
          *((const short *)(rhs))));                                         \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 4: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi64((__m64)(_MASK), *((const __m64 *)(lhs))));           \
      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi64((__m64)(_MASK), *((const __m64 *)(rhs))));           \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 3: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), (short)(_MASK), (short)(_MASK), (short)(_MASK),    \
          (short)(_MASK), *((const short *)(lhs) + 2),                       \
          *((const short *)(lhs) + 1), *((const short *)(lhs))));            \
      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi16(                        \
          (short)(_MASK), (short)(_MASK), (short)(_MASK), (short)(_MASK),    \
          (short)(_MASK), *((const short *)(rhs) + 2),                       \
          *((const short *)(rhs) + 1), *((const short *)(rhs))));            \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 2: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(_mm_set_epi32(                        \
          (int)(_MASK), (int)(_MASK), (int)(_MASK), *((const int *)(lhs)))); \
      __m256 ymm_rhs = _mm256_cvtph_ps(_mm_set_epi32(                        \
          (int)(_MASK), (int)(_MASK), (int)(_MASK), *((const int *)(rhs)))); \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
    case 1: {                                                                \
      __m256 ymm_lhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi16(*((const short *)(lhs)), (short)(_MASK),             \
                        (short)(_MASK), (short)(_MASK), (short)(_MASK),      \
                        (short)(_MASK), (short)(_MASK), (short)(_MASK)));    \
      __m256 ymm_rhs = _mm256_cvtph_ps(                                      \
          _mm_set_epi16(*((const short *)(rhs)), (short)(_MASK),             \
                        (short)(_MASK), (short)(_MASK), (short)(_MASK),      \
                        (short)(_MASK), (short)(_MASK), (short)(_MASK)));    \
      _PROC(ymm_lhs, ymm_rhs, _RES##_0_0)                                    \
      break;                                                                 \
    }                                                                        \
  }

//! Iterative process of computing distance (FP16, M=1, N=1)
#define MATRIX_FP16_ITER_1X1_AVX(m, q, _RES, _LOAD, _PROC)          \
  {                                                                 \
    __m256i ymm_mi = _LOAD((const __m256i *)m);                     \
    __m256i ymm_qi = _LOAD((const __m256i *)q);                     \
    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \
    __m256 ymm_q = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_qi)); \
    _PROC(ymm_m, ymm_q, _RES##_0_0);                                \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \
    ymm_q = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_qi, 1));   \
    _PROC(ymm_m, ymm_q, _RES##_0_0);                                \
  }

//! Iterative process of computing distance (FP16, M=2, N=1)
#define MATRIX_FP16_ITER_2X1_AVX(m, q, _RES, _LOAD, _PROC)       \
  {                                                              \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
    __m256 ymm_q = _mm256_cvtph_ps(_mm_shufflehi_epi16(          \
        _mm_shufflelo_epi16(_mm_set1_epi64(*(const __m64 *)(q)), \
                            _MM_SHUFFLE(1, 1, 0, 0)),            \
        _MM_SHUFFLE(3, 3, 2, 2)));                               \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                              \
  }

//! Iterative process of computing distance (FP16, M=2, N=2)
#define MATRIX_FP16_ITER_2X2_AVX(m, q, _RES, _LOAD, _PROC)       \
  {                                                              \
    __m256 ymm_q = _mm256_cvtph_ps(_LOAD((const __m128i *)(q))); \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
    __m256 ymm_p = _mm256_moveldup_ps(ymm_q);                    \
    _PROC(ymm_m, ymm_p, _RES##_0_0)                              \
    ymm_p = _mm256_movehdup_ps(ymm_q);                           \
    _PROC(ymm_m, ymm_p, _RES##_0_1)                              \
  }

//! Iterative process of computing distance (FP16, M=4, N=1)
#define MATRIX_FP16_ITER_4X1_AVX(m, q, _RES, _LOAD, _PROC)                 \
  {                                                                        \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));           \
    __m256 ymm_q = _mm256_cvtph_ps(                                        \
        _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_broadcast_si32(q), 0), \
                            _MM_SHUFFLE(1, 1, 1, 1)));                     \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                        \
  }

//! Iterative process of computing distance (FP16, M=4, N=2)
#define MATRIX_FP16_ITER_4X2_AVX(m, q, _RES, _LOAD, _PROC)       \
  {                                                              \
    __m128i xmm_qi = _mm_set1_epi64(*(const __m64 *)(q));        \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
    __m256 ymm_q_0 = _mm256_cvtph_ps(_mm_shufflehi_epi16(        \
        _mm_shufflelo_epi16(xmm_qi, _MM_SHUFFLE(0, 0, 0, 0)),    \
        _MM_SHUFFLE(2, 2, 2, 2)));                               \
    __m256 ymm_q_1 = _mm256_cvtph_ps(_mm_shufflehi_epi16(        \
        _mm_shufflelo_epi16(xmm_qi, _MM_SHUFFLE(1, 1, 1, 1)),    \
        _MM_SHUFFLE(3, 3, 3, 3)));                               \
    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)          \
  }

//! Iterative process of computing distance (FP16, M=4, N=4)
#define MATRIX_FP16_ITER_4X4_AVX(m, q, _RES, _LOAD, _PROC)            \
  {                                                                   \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));      \
    __m256 ymm_q = _mm256_cvtph_ps(_LOAD((const __m128i *)(q)));      \
    __m256 ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(0, 0, 0, 0)); \
    _PROC(ymm_m, ymm_p, _RES##_0_0)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(1, 1, 1, 1));        \
    _PROC(ymm_m, ymm_p, _RES##_0_1)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(2, 2, 2, 2));        \
    _PROC(ymm_m, ymm_p, _RES##_0_2)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(3, 3, 3, 3));        \
    _PROC(ymm_m, ymm_p, _RES##_0_3)                                   \
  }

//! Iterative process of computing distance (FP16, M=8, N=1)
#define MATRIX_FP16_ITER_8X1_AVX(m, q, _RES, _LOAD, _PROC)               \
  {                                                                      \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));         \
    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)(q))); \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                      \
  }

//! Iterative process of computing distance (FP16, M=8, N=2)
#define MATRIX_FP16_ITER_8X2_AVX(m, q, _RES, _LOAD, _PROC)       \
  {                                                              \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));          \
    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                   \
    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                   \
    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)          \
  }

//! Iterative process of computing distance (FP16, M=8, N=4)
#define MATRIX_FP16_ITER_8X4_AVX(m, q, _RES, _LOAD, _PROC)              \
  {                                                                     \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m)));        \
    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(xmm_p[0]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                     \
    ymm_q = _mm256_set1_ps(xmm_p[1]);                                   \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                                     \
    ymm_q = _mm256_set1_ps(xmm_p[2]);                                   \
    _PROC(ymm_m, ymm_q, _RES##_0_2)                                     \
    ymm_q = _mm256_set1_ps(xmm_p[3]);                                   \
    _PROC(ymm_m, ymm_q, _RES##_0_3)                                     \
  }

//! Iterative process of computing distance (FP16, M=8, N=8)
#define MATRIX_FP16_ITER_8X8_AVX(m, q, _RES, _LOAD, _PROC)       \
  {                                                              \
    __m256 ymm_m = _mm256_cvtph_ps(_LOAD((const __m128i *)(m))); \
    __m256 ymm_p = _mm256_cvtph_ps(_LOAD((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                     \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                              \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                              \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_2)                              \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_3)                              \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_4)                              \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_5)                              \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_6)                              \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                            \
    _PROC(ymm_m, ymm_q, _RES##_0_7)                              \
  }

//! Iterative process of computing distance (FP16, M=16, N=1)
#define MATRIX_FP16_ITER_16X1_AVX(m, q, _RES, _LOAD, _PROC)                \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)q));     \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
  }

//! Iterative process of computing distance (FP16, M=16, N=2)
#define MATRIX_FP16_ITER_16X2_AVX(m, q, _RES, _LOAD, _PROC)         \
  {                                                                 \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                   \
    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));             \
    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                      \
    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                      \
    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \
    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)             \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \
    MATRIX_VAR_PROC(1, 2, 1, ymm_m, ymm_q, _RES, _PROC)             \
  }

//! Iterative process of computing distance (FP16, M=16, N=4)
#define MATRIX_FP16_ITER_16X4_AVX(m, q, _RES, _LOAD, _PROC)                \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q)));    \
    __m256 ymm_q = _mm256_set1_ps(xmm_p[0]);                               \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(xmm_p[1]);                                      \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(xmm_p[2]);                                      \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(xmm_p[3]);                                      \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
  }

//! Iterative process of computing distance (FP16, M=16, N=8)
#define MATRIX_FP16_ITER_16X8_AVX(m, q, _RES, _LOAD, _PROC)                \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \
  }

//! Iterative process of computing distance (FP16, M=16, N=16)
#define MATRIX_FP16_ITER_16X16_AVX(m, q, _RES, _LOAD, _PROC)               \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \
    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \
    MATRIX_VAR_PROC(2, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(2, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(2, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(2, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(2, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(2, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(2, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(2, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \
  }

//! Iterative process of computing distance (FP16, M=32, N=1)
#define MATRIX_FP16_ITER_32X1_AVX(m, q, _RES, _LOAD, _PROC)                \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \
    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_q = _mm256_cvtph_ps(_mm_set1_epi16(*(const short *)q));     \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
  }

//! Iterative process of computing distance (FP16, M=32, N=2)
#define MATRIX_FP16_ITER_32X2_AVX(m, q, _RES, _LOAD, _PROC)         \
  {                                                                 \
    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));             \
    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                      \
    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                      \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                   \
    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi)); \
    MATRIX_VAR_PROC(1, 2, 0, ymm_m, ymm_q, _RES, _PROC)             \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \
    MATRIX_VAR_PROC(1, 2, 1, ymm_m, ymm_q, _RES, _PROC)             \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                      \
    ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));        \
    MATRIX_VAR_PROC(1, 2, 2, ymm_m, ymm_q, _RES, _PROC)             \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));   \
    MATRIX_VAR_PROC(1, 2, 3, ymm_m, ymm_q, _RES, _PROC)             \
  }

//! Iterative process of computing distance (FP16, M=32, N=4)
#define MATRIX_FP16_ITER_32X4_AVX(m, q, _RES, _LOAD, _PROC)             \
  {                                                                     \
    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \
    __m256 ymm_q_0 = _mm256_set1_ps(xmm_p[0]);                          \
    __m256 ymm_q_1 = _mm256_set1_ps(xmm_p[1]);                          \
    __m256 ymm_q_2 = _mm256_set1_ps(xmm_p[2]);                          \
    __m256 ymm_q_3 = _mm256_set1_ps(xmm_p[3]);                          \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                       \
    __m256 ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));     \
    MATRIX_VAR_PROC(1, 4, 0, ymm_m, ymm_q, _RES, _PROC)                 \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));       \
    MATRIX_VAR_PROC(1, 4, 1, ymm_m, ymm_q, _RES, _PROC)                 \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                          \
    ymm_m = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));            \
    MATRIX_VAR_PROC(1, 4, 2, ymm_m, ymm_q, _RES, _PROC)                 \
    ymm_m = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1));       \
    MATRIX_VAR_PROC(1, 4, 3, ymm_m, ymm_q, _RES, _PROC)                 \
  }

//! Iterative process of computing distance (FP16, M=32, N=8)
#define MATRIX_FP16_ITER_32X8_AVX(m, q, _RES, _LOAD, _PROC)                \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \
    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \
  }

//! Iterative process of computing distance (FP16, M=32, N=16)
#define MATRIX_FP16_ITER_32X16_AVX(m, q, _RES, _LOAD, _PROC)               \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \
    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \
    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \
    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \
  }

//! Iterative process of computing distance (FP16, M=32, N=32)
#define MATRIX_FP16_ITER_32X32_AVX(m, q, _RES, _LOAD, _PROC)               \
  {                                                                        \
    __m256i ymm_mi = _LOAD((const __m256i *)(m));                          \
    __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    ymm_mi = _LOAD((const __m256i *)(m + 16));                             \
    __m256 ymm_m_2 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
    __m256 ymm_m_3 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m256 ymm_q = _mm256_set1_ps(ymm_p[0]);                               \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 8)));    \
    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \
    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)                    \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 16)));   \
    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \
    MATRIX_VAR_PROC(4, 1, 16, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 17, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 18, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 19, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 20, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 21, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 22, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 23, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q + 24)));   \
    ymm_q = _mm256_set1_ps(ymm_p[0]);                                      \
    MATRIX_VAR_PROC(4, 1, 24, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[1]);                                      \
    MATRIX_VAR_PROC(4, 1, 25, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[2]);                                      \
    MATRIX_VAR_PROC(4, 1, 26, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[3]);                                      \
    MATRIX_VAR_PROC(4, 1, 27, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[4]);                                      \
    MATRIX_VAR_PROC(4, 1, 28, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[5]);                                      \
    MATRIX_VAR_PROC(4, 1, 29, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[6]);                                      \
    MATRIX_VAR_PROC(4, 1, 30, ymm_m, ymm_q, _RES, _PROC)                   \
    ymm_q = _mm256_set1_ps(ymm_p[7]);                                      \
    MATRIX_VAR_PROC(4, 1, 31, ymm_m, ymm_q, _RES, _PROC)                   \
  }

//! Iterative process of computing distance (FP16, M=1, N=1)
#define MATRIX_FP16_ITER_1X1_AVX512(m, q, _RES, _LOAD, _PROC)       \
  {                                                                 \
    __m512i zmm_mi = _LOAD((const __m512i *)m);                     \
    __m512i zmm_qi = _LOAD((const __m512i *)q);                     \
    __m512 zmm_m = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi)); \
    __m512 zmm_q = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_qi)); \
    _PROC(zmm_m, zmm_q, _RES##_0_0);                                \
    zmm_m = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));  \
    zmm_q = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_qi, 1));  \
    _PROC(zmm_m, zmm_q, _RES##_0_0);                                \
  }

//! Iterative process of computing distance (FP16, M=16, N=1)
#define MATRIX_FP16_ITER_16X1_AVX512(m, q, _RES, _LOAD, _PROC)            \
  {                                                                       \
    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));          \
    __m512 zmm_q = _mm512_cvtph_ps(_mm256_set1_epi16(*(const short *)q)); \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                                       \
  }

//! Iterative process of computing distance (FP16, M=16, N=2)
#define MATRIX_FP16_ITER_16X2_AVX512(m, q, _RES, _LOAD, _PROC)   \
  {                                                              \
    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m))); \
    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));          \
    __m512 zmm_q_0 = _mm512_set1_ps(xmm_p[0]);                   \
    __m512 zmm_q_1 = _mm512_set1_ps(xmm_p[1]);                   \
    MATRIX_VAR_PROC(1, 2, 0, zmm_m, zmm_q, _RES, _PROC)          \
  }

//! Iterative process of computing distance (FP16, M=16, N=4)
#define MATRIX_FP16_ITER_16X4_AVX512(m, q, _RES, _LOAD, _PROC)          \
  {                                                                     \
    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));        \
    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q))); \
    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                                     \
    zmm_q = _mm512_set1_ps(xmm_p[1]);                                   \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                                     \
    zmm_q = _mm512_set1_ps(xmm_p[2]);                                   \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                                     \
    zmm_q = _mm512_set1_ps(xmm_p[3]);                                   \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                                     \
  }

//! Iterative process of computing distance (FP16, M=16, N=8)
#define MATRIX_FP16_ITER_16X8_AVX512(m, q, _RES, _LOAD, _PROC)             \
  {                                                                        \
    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m)));           \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q))); \
    __m512 zmm_q = _mm512_set1_ps(ymm_p[0]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[1]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[2]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[3]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[4]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_4)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[5]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_5)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[6]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_6)                                        \
    zmm_q = _mm512_set1_ps(ymm_p[7]);                                      \
    _PROC(zmm_m, zmm_q, _RES##_0_7)                                        \
  }

//! Iterative process of computing distance (FP16, M=16, N=16)
#define MATRIX_FP16_ITER_16X16_AVX512(m, q, _RES, _LOAD, _PROC)  \
  {                                                              \
    __m512 zmm_m = _mm512_cvtph_ps(_LOAD((const __m256i *)(m))); \
    __m512 zmm_p = _mm512_cvtph_ps(_LOAD((const __m256i *)(q))); \
    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                     \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                              \
    zmm_q = _mm512_set1_ps(zmm_p[1]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                              \
    zmm_q = _mm512_set1_ps(zmm_p[2]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                              \
    zmm_q = _mm512_set1_ps(zmm_p[3]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                              \
    zmm_q = _mm512_set1_ps(zmm_p[4]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_4)                              \
    zmm_q = _mm512_set1_ps(zmm_p[5]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_5)                              \
    zmm_q = _mm512_set1_ps(zmm_p[6]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_6)                              \
    zmm_q = _mm512_set1_ps(zmm_p[7]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_7)                              \
    zmm_q = _mm512_set1_ps(zmm_p[8]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_8)                              \
    zmm_q = _mm512_set1_ps(zmm_p[9]);                            \
    _PROC(zmm_m, zmm_q, _RES##_0_9)                              \
    zmm_q = _mm512_set1_ps(zmm_p[10]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_10)                             \
    zmm_q = _mm512_set1_ps(zmm_p[11]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_11)                             \
    zmm_q = _mm512_set1_ps(zmm_p[12]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_12)                             \
    zmm_q = _mm512_set1_ps(zmm_p[13]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_13)                             \
    zmm_q = _mm512_set1_ps(zmm_p[14]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_14)                             \
    zmm_q = _mm512_set1_ps(zmm_p[15]);                           \
    _PROC(zmm_m, zmm_q, _RES##_0_15)                             \
  }

//! Iterative process of computing distance (FP16, M=32, N=1)
#define MATRIX_FP16_ITER_32X1_AVX512(m, q, _RES, _LOAD, _PROC)              \
  {                                                                         \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
    __m512 zmm_q = _mm512_cvtph_ps(_mm256_set1_epi16(*(const short *)q));   \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \
  }

//! Iterative process of computing distance (FP16, M=32, N=2)
#define MATRIX_FP16_ITER_32X2_AVX512(m, q, _RES, _LOAD, _PROC)              \
  {                                                                         \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
    __m128 xmm_p = _mm_cvtph_ps(_mm_broadcast_si32(q));                     \
    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                                \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(xmm_p[1]);                                       \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \
  }

//! Iterative process of computing distance (FP16, M=32, N=4)
#define MATRIX_FP16_ITER_32X4_AVX512(m, q, _RES, _LOAD, _PROC)              \
  {                                                                         \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
    __m128 xmm_p = _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(q)));     \
    __m512 zmm_q = _mm512_set1_ps(xmm_p[0]);                                \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(xmm_p[1]);                                       \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(xmm_p[2]);                                       \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(xmm_p[3]);                                       \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                     \
  }

//! Iterative process of computing distance (FP16, M=32, N=8)
#define MATRIX_FP16_ITER_32X8_AVX512(m, q, _RES, _LOAD, _PROC)              \
  {                                                                         \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                           \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
    __m256 ymm_p = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(q)));  \
    __m512 zmm_q = _mm512_set1_ps(ymm_p[0]);                                \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[1]);                                       \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[2]);                                       \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[3]);                                       \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[4]);                                       \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[5]);                                       \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[6]);                                       \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                     \
    zmm_q = _mm512_set1_ps(ymm_p[7]);                                       \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                     \
  }

//! Iterative process of computing distance (FP16, M=32, N=16)
#define MATRIX_FP16_ITER_32X16_AVX512(m, q, _RES, _LOAD, _PROC)               \
  {                                                                           \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                             \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));         \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));   \
    __m512 zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q))); \
    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                                  \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \
    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \
    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \
    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \
    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \
    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \
    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \
    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \
    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)                      \
  }

//! Iterative process of computing distance (FP16, M=32, N=32)
#define MATRIX_FP16_ITER_32X32_AVX512(m, q, _RES, _LOAD, _PROC)               \
  {                                                                           \
    __m512i zmm_mi = _LOAD((const __m512i *)(m));                             \
    __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));         \
    __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1));   \
    __m512 zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q))); \
    __m512 zmm_q = _mm512_set1_ps(zmm_p[0]);                                  \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \
    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \
    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)                       \
    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \
    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \
    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \
    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \
    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \
    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \
    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_p = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)(q + 16)));   \
    zmm_q = _mm512_set1_ps(zmm_p[0]);                                         \
    MATRIX_VAR_PROC(2, 1, 16, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[1]);                                         \
    MATRIX_VAR_PROC(2, 1, 17, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[2]);                                         \
    MATRIX_VAR_PROC(2, 1, 18, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[3]);                                         \
    MATRIX_VAR_PROC(2, 1, 19, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[4]);                                         \
    MATRIX_VAR_PROC(2, 1, 20, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[5]);                                         \
    MATRIX_VAR_PROC(2, 1, 21, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[6]);                                         \
    MATRIX_VAR_PROC(2, 1, 22, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[7]);                                         \
    MATRIX_VAR_PROC(2, 1, 23, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[8]);                                         \
    MATRIX_VAR_PROC(2, 1, 24, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[9]);                                         \
    MATRIX_VAR_PROC(2, 1, 25, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[10]);                                        \
    MATRIX_VAR_PROC(2, 1, 26, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[11]);                                        \
    MATRIX_VAR_PROC(2, 1, 27, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[12]);                                        \
    MATRIX_VAR_PROC(2, 1, 28, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[13]);                                        \
    MATRIX_VAR_PROC(2, 1, 29, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[14]);                                        \
    MATRIX_VAR_PROC(2, 1, 30, zmm_m, zmm_q, _RES, _PROC)                      \
    zmm_q = _mm512_set1_ps(zmm_p[15]);                                        \
    MATRIX_VAR_PROC(2, 1, 31, zmm_m, zmm_q, _RES, _PROC)                      \
  }

#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
//! Iterative process of computing distance (FP16, M=1, N=1)
#define MATRIX_FP16_ITER_1X1_NEON(m, q, _RES, _PROC)   \
  {                                                    \
    float16x8_t v_m = vld1q_f16((const float16_t *)m); \
    float16x8_t v_q = vld1q_f16((const float16_t *)q); \
    _PROC(v_m, v_q, _RES##_0_0)                        \
  }

#else
//! Iterative process of computing distance (FP16, M=1, N=1)
#define MATRIX_FP16_ITER_1X1_NEON(m, q, _RES, _PROC)     \
  {                                                      \
    float16x8_t v_m = vld1q_f16((const float16_t *)m);   \
    float16x8_t v_q = vld1q_f16((const float16_t *)q);   \
    float32x4_t v_m_0 = vcvt_f32_f16(vget_low_f16(v_m)); \
    float32x4_t v_q_0 = vcvt_f32_f16(vget_low_f16(v_q)); \
    _PROC(v_m_0, v_q_0, _RES##_0_0)                      \
    v_m_0 = vcvt_high_f32_f16(v_m);                      \
    v_q_0 = vcvt_high_f32_f16(v_q);                      \
    _PROC(v_m_0, v_q_0, _RES##_0_0)                      \
  }

#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

================================================
FILE: src/ailego/math/distance_matrix_fp32.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "matrix_define.i"

#if !defined(__AVX__)
#undef _mm_permute_ps
#define _mm_permute_ps(a, b) _mm_shuffle_ps((a), (a), (b))
#define _mm_broadcast_ss(a) _mm_load1_ps(a)
#endif  // !__AVX__

#if defined(__AVX__) && defined(__GNUC__)
#define _mm256_set_m128(a, b) \
  _mm256_insertf128_ps(_mm256_castps128_ps256(b), (a), 1)
#endif  // __AVX__

#if defined(__ARM_NEON) && !defined(__aarch64__)
#define vdupq_laneq_f32(a, b) vdupq_n_f32(vgetq_lane_f32(a, b))
#endif  // __ARM_NEON && __aarch64__

//! Iterative process of computing distance (FP32, M=2, N=1)
#define MATRIX_FP32_ITER_2X1_SSE(m, q, _RES, _LOAD, _PROC)         \
  {                                                                \
    __m128 xmm_m_0 = _LOAD(m + 0);                                 \
    __m128 xmm_m_1 = _LOAD(m + 4);                                 \
    __m128 xmm_q = _LOAD(q);                                       \
    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(1, 1, 0, 0)); \
    _PROC(xmm_m_0, xmm_p, _RES##_0_0)                              \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 2, 2));        \
    _PROC(xmm_m_1, xmm_p, _RES##_0_1)                              \
  }

//! Iterative process of computing distance (FP32, M=2, N=2)
#define MATRIX_FP32_ITER_2X2_SSE(m, q, _RES, _LOAD, _PROC)         \
  {                                                                \
    __m128 xmm_q = _LOAD(q);                                       \
    __m128 xmm_m = _LOAD(m);                                       \
    __m128 xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(2, 2, 0, 0)); \
    _PROC(xmm_m, xmm_p, _RES##_0_0)                                \
    xmm_p = _mm_permute_ps(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));        \
    _PROC(xmm_m, xmm_p, _RES##_0_1)                                \
  }

//! Iterative process of computing distance (FP32, M=4, N=1)
#define MATRIX_FP32_ITER_4X1_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m_0 = _LOAD(m + 0);                         \
    __m128 xmm_m_1 = _LOAD(m + 4);                         \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \
    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                      \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    _PROC(xmm_m_1, xmm_q, _RES##_0_1)                      \
  }

//! Iterative process of computing distance (FP32, M=4, N=2)
#define MATRIX_FP32_ITER_4X2_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m = _LOAD(m);                               \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \
    _PROC(xmm_m, xmm_q, _RES##_0_0)                        \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    _PROC(xmm_m, xmm_q, _RES##_0_1)                        \
  }

//! Iterative process of computing distance (FP32, M=4, N=4)
#define MATRIX_FP32_ITER_4X4_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m = _LOAD(m);                               \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \
    _PROC(xmm_m, xmm_q, _RES##_0_0)                        \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    _PROC(xmm_m, xmm_q, _RES##_0_1)                        \
    xmm_q = _mm_broadcast_ss(q + 2);                       \
    _PROC(xmm_m, xmm_q, _RES##_0_2)                        \
    xmm_q = _mm_broadcast_ss(q + 3);                       \
    _PROC(xmm_m, xmm_q, _RES##_0_3)                        \
  }

//! Iterative process of computing distance (FP32, M=8, N=1)
#define MATRIX_FP32_ITER_8X1_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m_0 = _LOAD(m + 0);                         \
    __m128 xmm_m_1 = _LOAD(m + 4);                         \
    __m128 xmm_q = _mm_broadcast_ss(q);                    \
    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                      \
    _PROC(xmm_m_1, xmm_q, _RES##_1_0)                      \
  }

//! Iterative process of computing distance (FP32, M=8, N=2)
#define MATRIX_FP32_ITER_8X2_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m_0 = _LOAD(m + 0);                         \
    __m128 xmm_m_1 = _LOAD(m + 4);                         \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \
    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \
  }

//! Iterative process of computing distance (FP32, M=8, N=4)
#define MATRIX_FP32_ITER_8X4_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m_0 = _LOAD(m + 0);                         \
    __m128 xmm_m_1 = _LOAD(m + 4);                         \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                \
    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 2);                       \
    MATRIX_VAR_PROC(2, 1, 2, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 3);                       \
    MATRIX_VAR_PROC(2, 1, 3, xmm_m, xmm_q, _RES, _PROC)    \
  }

//! Iterative process of computing distance (FP32, M=8, N=8)
#define MATRIX_FP32_ITER_8X8_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m128 xmm_m_0 = _LOAD(m + 0);                         \
    __m128 xmm_m_1 = _LOAD(m + 4);                         \
    __m128 xmm_q = _mm_broadcast_ss(q);                    \
    MATRIX_VAR_PROC(2, 1, 0, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 1);                       \
    MATRIX_VAR_PROC(2, 1, 1, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 2);                       \
    MATRIX_VAR_PROC(2, 1, 2, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 3);                       \
    MATRIX_VAR_PROC(2, 1, 3, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 4);                       \
    MATRIX_VAR_PROC(2, 1, 4, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 5);                       \
    MATRIX_VAR_PROC(2, 1, 5, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 6);                       \
    MATRIX_VAR_PROC(2, 1, 6, xmm_m, xmm_q, _RES, _PROC)    \
    xmm_q = _mm_broadcast_ss(q + 7);                       \
    MATRIX_VAR_PROC(2, 1, 7, xmm_m, xmm_q, _RES, _PROC)    \
  }

//! Iterative process of computing distance (FP32, M=16, N=1)
#define MATRIX_FP32_ITER_16X1_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    __m128 xmm_q = _mm_broadcast_ss(q);                     \
    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=2)
#define MATRIX_FP32_ITER_16X2_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                 \
    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 1);                        \
    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=4)
#define MATRIX_FP32_ITER_16X4_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    __m128 xmm_q = _mm_broadcast_ss(q + 0);                 \
    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 1);                        \
    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 2);                        \
    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 3);                        \
    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=8)
#define MATRIX_FP32_ITER_16X8_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    __m128 xmm_q = _mm_broadcast_ss(q);                     \
    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 1);                        \
    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 2);                        \
    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 3);                        \
    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 4);                        \
    MATRIX_VAR_PROC(4, 1, 4, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 5);                        \
    MATRIX_VAR_PROC(4, 1, 5, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 6);                        \
    MATRIX_VAR_PROC(4, 1, 6, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 7);                        \
    MATRIX_VAR_PROC(4, 1, 7, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=16)
#define MATRIX_FP32_ITER_16X16_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m128 xmm_m_0 = _LOAD(m + 0);                           \
    __m128 xmm_m_1 = _LOAD(m + 4);                           \
    __m128 xmm_m_2 = _LOAD(m + 8);                           \
    __m128 xmm_m_3 = _LOAD(m + 12);                          \
    __m128 xmm_q = _mm_broadcast_ss(q);                      \
    MATRIX_VAR_PROC(4, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 1);                         \
    MATRIX_VAR_PROC(4, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 2);                         \
    MATRIX_VAR_PROC(4, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 3);                         \
    MATRIX_VAR_PROC(4, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 4);                         \
    MATRIX_VAR_PROC(4, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 5);                         \
    MATRIX_VAR_PROC(4, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 6);                         \
    MATRIX_VAR_PROC(4, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 7);                         \
    MATRIX_VAR_PROC(4, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 8);                         \
    MATRIX_VAR_PROC(4, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 9);                         \
    MATRIX_VAR_PROC(4, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 10);                        \
    MATRIX_VAR_PROC(4, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 11);                        \
    MATRIX_VAR_PROC(4, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 12);                        \
    MATRIX_VAR_PROC(4, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 13);                        \
    MATRIX_VAR_PROC(4, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 14);                        \
    MATRIX_VAR_PROC(4, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 15);                        \
    MATRIX_VAR_PROC(4, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=1)
#define MATRIX_FP32_ITER_32X1_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_q = _mm_broadcast_ss(q);                     \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    _PROC(xmm_m_0, xmm_q, _RES##_0_0)                       \
    _PROC(xmm_m_1, xmm_q, _RES##_1_0)                       \
    _PROC(xmm_m_2, xmm_q, _RES##_2_0)                       \
    _PROC(xmm_m_3, xmm_q, _RES##_3_0)                       \
    xmm_m_0 = _LOAD(m + 16);                                \
    xmm_m_1 = _LOAD(m + 20);                                \
    xmm_m_2 = _LOAD(m + 24);                                \
    xmm_m_3 = _LOAD(m + 28);                                \
    _PROC(xmm_m_0, xmm_q, _RES##_4_0)                       \
    _PROC(xmm_m_1, xmm_q, _RES##_5_0)                       \
    _PROC(xmm_m_2, xmm_q, _RES##_6_0)                       \
    _PROC(xmm_m_3, xmm_q, _RES##_7_0)                       \
  }

//! Iterative process of computing distance (FP32, M=32, N=2)
#define MATRIX_FP32_ITER_32X2_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \
    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    MATRIX_VAR_PROC(1, 2, 0, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 1, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 2, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 3, xmm_m_3, xmm_q, _RES, _PROC)   \
    xmm_m_0 = _LOAD(m + 16);                                \
    xmm_m_1 = _LOAD(m + 20);                                \
    xmm_m_2 = _LOAD(m + 24);                                \
    xmm_m_3 = _LOAD(m + 28);                                \
    MATRIX_VAR_PROC(1, 2, 4, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 5, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 6, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 2, 7, xmm_m_3, xmm_q, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=32, N=4)
#define MATRIX_FP32_ITER_32X4_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \
    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \
    __m128 xmm_q_2 = _mm_broadcast_ss(q + 2);               \
    __m128 xmm_q_3 = _mm_broadcast_ss(q + 3);               \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    MATRIX_VAR_PROC(1, 4, 0, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 1, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 2, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 3, xmm_m_3, xmm_q, _RES, _PROC)   \
    xmm_m_0 = _LOAD(m + 16);                                \
    xmm_m_1 = _LOAD(m + 20);                                \
    xmm_m_2 = _LOAD(m + 24);                                \
    xmm_m_3 = _LOAD(m + 28);                                \
    MATRIX_VAR_PROC(1, 4, 4, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 5, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 6, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 4, 7, xmm_m_3, xmm_q, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=32, N=8)
#define MATRIX_FP32_ITER_32X8_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m128 xmm_q_0 = _mm_broadcast_ss(q + 0);               \
    __m128 xmm_q_1 = _mm_broadcast_ss(q + 1);               \
    __m128 xmm_q_2 = _mm_broadcast_ss(q + 2);               \
    __m128 xmm_q_3 = _mm_broadcast_ss(q + 3);               \
    __m128 xmm_q_4 = _mm_broadcast_ss(q + 4);               \
    __m128 xmm_q_5 = _mm_broadcast_ss(q + 5);               \
    __m128 xmm_q_6 = _mm_broadcast_ss(q + 6);               \
    __m128 xmm_q_7 = _mm_broadcast_ss(q + 7);               \
    __m128 xmm_m_0 = _LOAD(m + 0);                          \
    __m128 xmm_m_1 = _LOAD(m + 4);                          \
    __m128 xmm_m_2 = _LOAD(m + 8);                          \
    __m128 xmm_m_3 = _LOAD(m + 12);                         \
    MATRIX_VAR_PROC(1, 8, 0, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 1, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 2, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 3, xmm_m_3, xmm_q, _RES, _PROC)   \
    xmm_m_0 = _LOAD(m + 16);                                \
    xmm_m_1 = _LOAD(m + 20);                                \
    xmm_m_2 = _LOAD(m + 24);                                \
    xmm_m_3 = _LOAD(m + 28);                                \
    MATRIX_VAR_PROC(1, 8, 4, xmm_m_0, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 5, xmm_m_1, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 6, xmm_m_2, xmm_q, _RES, _PROC)   \
    MATRIX_VAR_PROC(1, 8, 7, xmm_m_3, xmm_q, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=32, N=16)
#define MATRIX_FP32_ITER_32X16_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m128 xmm_m_0 = _LOAD(m + 0);                           \
    __m128 xmm_m_1 = _LOAD(m + 4);                           \
    __m128 xmm_m_2 = _LOAD(m + 8);                           \
    __m128 xmm_m_3 = _LOAD(m + 12);                          \
    __m128 xmm_m_4 = _LOAD(m + 16);                          \
    __m128 xmm_m_5 = _LOAD(m + 20);                          \
    __m128 xmm_m_6 = _LOAD(m + 24);                          \
    __m128 xmm_m_7 = _LOAD(m + 28);                          \
    __m128 xmm_q = _mm_broadcast_ss(q);                      \
    MATRIX_VAR_PROC(8, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 1);                         \
    MATRIX_VAR_PROC(8, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 2);                         \
    MATRIX_VAR_PROC(8, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 3);                         \
    MATRIX_VAR_PROC(8, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 4);                         \
    MATRIX_VAR_PROC(8, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 5);                         \
    MATRIX_VAR_PROC(8, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 6);                         \
    MATRIX_VAR_PROC(8, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 7);                         \
    MATRIX_VAR_PROC(8, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 8);                         \
    MATRIX_VAR_PROC(8, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 9);                         \
    MATRIX_VAR_PROC(8, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 10);                        \
    MATRIX_VAR_PROC(8, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 11);                        \
    MATRIX_VAR_PROC(8, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 12);                        \
    MATRIX_VAR_PROC(8, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 13);                        \
    MATRIX_VAR_PROC(8, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 14);                        \
    MATRIX_VAR_PROC(8, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 15);                        \
    MATRIX_VAR_PROC(8, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=32)
#define MATRIX_FP32_ITER_32X32_SSE(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m128 xmm_m_0 = _LOAD(m + 0);                           \
    __m128 xmm_m_1 = _LOAD(m + 4);                           \
    __m128 xmm_m_2 = _LOAD(m + 8);                           \
    __m128 xmm_m_3 = _LOAD(m + 12);                          \
    __m128 xmm_m_4 = _LOAD(m + 16);                          \
    __m128 xmm_m_5 = _LOAD(m + 20);                          \
    __m128 xmm_m_6 = _LOAD(m + 24);                          \
    __m128 xmm_m_7 = _LOAD(m + 28);                          \
    __m128 xmm_q = _mm_broadcast_ss(q);                      \
    MATRIX_VAR_PROC(8, 1, 0, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 1);                         \
    MATRIX_VAR_PROC(8, 1, 1, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 2);                         \
    MATRIX_VAR_PROC(8, 1, 2, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 3);                         \
    MATRIX_VAR_PROC(8, 1, 3, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 4);                         \
    MATRIX_VAR_PROC(8, 1, 4, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 5);                         \
    MATRIX_VAR_PROC(8, 1, 5, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 6);                         \
    MATRIX_VAR_PROC(8, 1, 6, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 7);                         \
    MATRIX_VAR_PROC(8, 1, 7, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 8);                         \
    MATRIX_VAR_PROC(8, 1, 8, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 9);                         \
    MATRIX_VAR_PROC(8, 1, 9, xmm_m, xmm_q, _RES, _PROC)      \
    xmm_q = _mm_broadcast_ss(q + 10);                        \
    MATRIX_VAR_PROC(8, 1, 10, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 11);                        \
    MATRIX_VAR_PROC(8, 1, 11, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 12);                        \
    MATRIX_VAR_PROC(8, 1, 12, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 13);                        \
    MATRIX_VAR_PROC(8, 1, 13, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 14);                        \
    MATRIX_VAR_PROC(8, 1, 14, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 15);                        \
    MATRIX_VAR_PROC(8, 1, 15, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 16);                        \
    MATRIX_VAR_PROC(8, 1, 16, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 17);                        \
    MATRIX_VAR_PROC(8, 1, 17, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 18);                        \
    MATRIX_VAR_PROC(8, 1, 18, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 19);                        \
    MATRIX_VAR_PROC(8, 1, 19, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 20);                        \
    MATRIX_VAR_PROC(8, 1, 20, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 21);                        \
    MATRIX_VAR_PROC(8, 1, 21, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 22);                        \
    MATRIX_VAR_PROC(8, 1, 22, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 23);                        \
    MATRIX_VAR_PROC(8, 1, 23, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 24);                        \
    MATRIX_VAR_PROC(8, 1, 24, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 25);                        \
    MATRIX_VAR_PROC(8, 1, 25, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 26);                        \
    MATRIX_VAR_PROC(8, 1, 26, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 27);                        \
    MATRIX_VAR_PROC(8, 1, 27, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 28);                        \
    MATRIX_VAR_PROC(8, 1, 28, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 29);                        \
    MATRIX_VAR_PROC(8, 1, 29, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 30);                        \
    MATRIX_VAR_PROC(8, 1, 30, xmm_m, xmm_q, _RES, _PROC)     \
    xmm_q = _mm_broadcast_ss(q + 31);                        \
    MATRIX_VAR_PROC(8, 1, 31, xmm_m, xmm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=2, N=1)
#define MATRIX_FP32_ITER_2X1_AVX(m, q, _RES, _LOAD, _PROC)             \
  {                                                                    \
    __m256 ymm_m = _LOAD(m);                                           \
    __m256 ymm_q =                                                     \
        _mm256_set_ps(q[3], q[3], q[2], q[2], q[1], q[1], q[0], q[0]); \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                    \
  }

//! Iterative process of computing distance (FP32, M=2, N=2)
#define MATRIX_FP32_ITER_2X2_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m256 ymm_q = _LOAD(q);                               \
    __m256 ymm_m = _LOAD(m);                               \
    __m256 ymm_p = _mm256_moveldup_ps(ymm_q);              \
    _PROC(ymm_m, ymm_p, _RES##_0_0)                        \
    ymm_p = _mm256_movehdup_ps(ymm_q);                     \
    _PROC(ymm_m, ymm_p, _RES##_0_1)                        \
  }

//! Iterative process of computing distance (FP32, M=4, N=1)
#define MATRIX_FP32_ITER_4X1_AVX(m, q, _RES, _LOAD, _PROC)             \
  {                                                                    \
    __m256 ymm_m = _LOAD(m);                                           \
    __m256 ymm_q =                                                     \
        _mm256_set_m128(_mm_broadcast_ss(q + 1), _mm_broadcast_ss(q)); \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                    \
  }

//! Iterative process of computing distance (FP32, M=4, N=2)
#define MATRIX_FP32_ITER_4X2_AVX(m, q, _RES, _LOAD, _PROC)                     \
  {                                                                            \
    __m256 ymm_m = _LOAD(m);                                                   \
    __m256 ymm_q =                                                             \
        _mm256_set_m128(_mm_broadcast_ss(q + 2), _mm_broadcast_ss(q + 0));     \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                                            \
    ymm_q = _mm256_set_m128(_mm_broadcast_ss(q + 3), _mm_broadcast_ss(q + 1)); \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                                            \
  }

//! Iterative process of computing distance (FP32, M=4, N=4)
#define MATRIX_FP32_ITER_4X4_AVX(m, q, _RES, _LOAD, _PROC)            \
  {                                                                   \
    __m256 ymm_q = _LOAD(q);                                          \
    __m256 ymm_m = _LOAD(m);                                          \
    __m256 ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(0, 0, 0, 0)); \
    _PROC(ymm_m, ymm_p, _RES##_0_0)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(1, 1, 1, 1));        \
    _PROC(ymm_m, ymm_p, _RES##_0_1)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(2, 2, 2, 2));        \
    _PROC(ymm_m, ymm_p, _RES##_0_2)                                   \
    ymm_p = _mm256_permute_ps(ymm_q, _MM_SHUFFLE(3, 3, 3, 3));        \
    _PROC(ymm_m, ymm_p, _RES##_0_3)                                   \
  }

//! Iterative process of computing distance (FP32, M=8, N=1)
#define MATRIX_FP32_ITER_8X1_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m256 ymm_m = _LOAD(m);                               \
    __m256 ymm_q = _mm256_broadcast_ss(q);                 \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \
  }

//! Iterative process of computing distance (FP32, M=8, N=2)
#define MATRIX_FP32_ITER_8X2_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m256 ymm_m = _LOAD(m);                               \
    __m256 ymm_q = _mm256_broadcast_ss(q);                 \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \
    ymm_q = _mm256_broadcast_ss(q + 1);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \
  }

//! Iterative process of computing distance (FP32, M=8, N=4)
#define MATRIX_FP32_ITER_8X4_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m256 ymm_m = _LOAD(m);                               \
    __m256 ymm_q = _mm256_broadcast_ss(q);                 \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \
    ymm_q = _mm256_broadcast_ss(q + 1);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \
    ymm_q = _mm256_broadcast_ss(q + 2);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_2)                        \
    ymm_q = _mm256_broadcast_ss(q + 3);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_3)                        \
  }

//! Iterative process of computing distance (FP32, M=8, N=8)
#define MATRIX_FP32_ITER_8X8_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                        \
    __m256 ymm_m = _LOAD(m);                               \
    __m256 ymm_q = _mm256_broadcast_ss(q);                 \
    _PROC(ymm_m, ymm_q, _RES##_0_0)                        \
    ymm_q = _mm256_broadcast_ss(q + 1);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_1)                        \
    ymm_q = _mm256_broadcast_ss(q + 2);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_2)                        \
    ymm_q = _mm256_broadcast_ss(q + 3);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_3)                        \
    ymm_q = _mm256_broadcast_ss(q + 4);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_4)                        \
    ymm_q = _mm256_broadcast_ss(q + 5);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_5)                        \
    ymm_q = _mm256_broadcast_ss(q + 6);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_6)                        \
    ymm_q = _mm256_broadcast_ss(q + 7);                    \
    _PROC(ymm_m, ymm_q, _RES##_0_7)                        \
  }

//! Iterative process of computing distance (FP32, M=16, N=1)
#define MATRIX_FP32_ITER_16X1_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=2)
#define MATRIX_FP32_ITER_16X2_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=4)
#define MATRIX_FP32_ITER_16X4_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 2);                     \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 3);                     \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=8)
#define MATRIX_FP32_ITER_16X8_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 2);                     \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 3);                     \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 4);                     \
    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 5);                     \
    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 6);                     \
    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 7);                     \
    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=16)
#define MATRIX_FP32_ITER_16X16_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m256 ymm_m_0 = _LOAD(m + 0);                           \
    __m256 ymm_m_1 = _LOAD(m + 8);                           \
    __m256 ymm_q = _mm256_broadcast_ss(q);                   \
    MATRIX_VAR_PROC(2, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 1);                      \
    MATRIX_VAR_PROC(2, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 2);                      \
    MATRIX_VAR_PROC(2, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 3);                      \
    MATRIX_VAR_PROC(2, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 4);                      \
    MATRIX_VAR_PROC(2, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 5);                      \
    MATRIX_VAR_PROC(2, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 6);                      \
    MATRIX_VAR_PROC(2, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 7);                      \
    MATRIX_VAR_PROC(2, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 8);                      \
    MATRIX_VAR_PROC(2, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 9);                      \
    MATRIX_VAR_PROC(2, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 10);                     \
    MATRIX_VAR_PROC(2, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 11);                     \
    MATRIX_VAR_PROC(2, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 12);                     \
    MATRIX_VAR_PROC(2, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 13);                     \
    MATRIX_VAR_PROC(2, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 14);                     \
    MATRIX_VAR_PROC(2, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 15);                     \
    MATRIX_VAR_PROC(2, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=1)
#define MATRIX_FP32_ITER_32X1_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_m_2 = _LOAD(m + 16);                         \
    __m256 ymm_m_3 = _LOAD(m + 24);                         \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=2)
#define MATRIX_FP32_ITER_32X2_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_m_2 = _LOAD(m + 16);                         \
    __m256 ymm_m_3 = _LOAD(m + 24);                         \
    __m256 ymm_q = _mm256_broadcast_ss(q + 0);              \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=4)
#define MATRIX_FP32_ITER_32X4_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_m_2 = _LOAD(m + 16);                         \
    __m256 ymm_m_3 = _LOAD(m + 24);                         \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=8)
#define MATRIX_FP32_ITER_32X8_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                         \
    __m256 ymm_m_0 = _LOAD(m + 0);                          \
    __m256 ymm_m_1 = _LOAD(m + 8);                          \
    __m256 ymm_m_2 = _LOAD(m + 16);                         \
    __m256 ymm_m_3 = _LOAD(m + 24);                         \
    __m256 ymm_q = _mm256_broadcast_ss(q);                  \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 4);                     \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 5);                     \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 6);                     \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 7);                     \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=16)
#define MATRIX_FP32_ITER_32X16_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m256 ymm_m_0 = _LOAD(m + 0);                           \
    __m256 ymm_m_1 = _LOAD(m + 8);                           \
    __m256 ymm_m_2 = _LOAD(m + 16);                          \
    __m256 ymm_m_3 = _LOAD(m + 24);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                   \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 1);                      \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 2);                      \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 3);                      \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 4);                      \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 5);                      \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 6);                      \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 7);                      \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 8);                      \
    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 9);                      \
    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 10);                     \
    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 11);                     \
    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 12);                     \
    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 13);                     \
    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 14);                     \
    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 15);                     \
    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=32, N=32)
#define MATRIX_FP32_ITER_32X32_AVX(m, q, _RES, _LOAD, _PROC) \
  {                                                          \
    __m256 ymm_m_0 = _LOAD(m + 0);                           \
    __m256 ymm_m_1 = _LOAD(m + 8);                           \
    __m256 ymm_m_2 = _LOAD(m + 16);                          \
    __m256 ymm_m_3 = _LOAD(m + 24);                          \
    __m256 ymm_q = _mm256_broadcast_ss(q);                   \
    MATRIX_VAR_PROC(4, 1, 0, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 1);                      \
    MATRIX_VAR_PROC(4, 1, 1, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 2);                      \
    MATRIX_VAR_PROC(4, 1, 2, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 3);                      \
    MATRIX_VAR_PROC(4, 1, 3, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 4);                      \
    MATRIX_VAR_PROC(4, 1, 4, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 5);                      \
    MATRIX_VAR_PROC(4, 1, 5, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 6);                      \
    MATRIX_VAR_PROC(4, 1, 6, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 7);                      \
    MATRIX_VAR_PROC(4, 1, 7, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 8);                      \
    MATRIX_VAR_PROC(4, 1, 8, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 9);                      \
    MATRIX_VAR_PROC(4, 1, 9, ymm_m, ymm_q, _RES, _PROC)      \
    ymm_q = _mm256_broadcast_ss(q + 10);                     \
    MATRIX_VAR_PROC(4, 1, 10, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 11);                     \
    MATRIX_VAR_PROC(4, 1, 11, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 12);                     \
    MATRIX_VAR_PROC(4, 1, 12, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 13);                     \
    MATRIX_VAR_PROC(4, 1, 13, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 14);                     \
    MATRIX_VAR_PROC(4, 1, 14, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 15);                     \
    MATRIX_VAR_PROC(4, 1, 15, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 16);                     \
    MATRIX_VAR_PROC(4, 1, 16, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 17);                     \
    MATRIX_VAR_PROC(4, 1, 17, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 18);                     \
    MATRIX_VAR_PROC(4, 1, 18, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 19);                     \
    MATRIX_VAR_PROC(4, 1, 19, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 20);                     \
    MATRIX_VAR_PROC(4, 1, 20, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 21);                     \
    MATRIX_VAR_PROC(4, 1, 21, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 22);                     \
    MATRIX_VAR_PROC(4, 1, 22, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 23);                     \
    MATRIX_VAR_PROC(4, 1, 23, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 24);                     \
    MATRIX_VAR_PROC(4, 1, 24, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 25);                     \
    MATRIX_VAR_PROC(4, 1, 25, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 26);                     \
    MATRIX_VAR_PROC(4, 1, 26, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 27);                     \
    MATRIX_VAR_PROC(4, 1, 27, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 28);                     \
    MATRIX_VAR_PROC(4, 1, 28, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 29);                     \
    MATRIX_VAR_PROC(4, 1, 29, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 30);                     \
    MATRIX_VAR_PROC(4, 1, 30, ymm_m, ymm_q, _RES, _PROC)     \
    ymm_q = _mm256_broadcast_ss(q + 31);                     \
    MATRIX_VAR_PROC(4, 1, 31, ymm_m, ymm_q, _RES, _PROC)     \
  }

//! Iterative process of computing distance (FP32, M=16, N=1)
#define MATRIX_FP32_ITER_16X1_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m = _LOAD(m);                                   \
    __m512 zmm_q = _mm512_set1_ps(*q);                         \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \
  }

//! Iterative process of computing distance (FP32, M=16, N=2)
#define MATRIX_FP32_ITER_16X2_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m = _LOAD(m);                                   \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \
  }

//! Iterative process of computing distance (FP32, M=16, N=4)
#define MATRIX_FP32_ITER_16X4_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m = _LOAD(m);                                   \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \
    zmm_q = _mm512_set1_ps(q[2]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                            \
    zmm_q = _mm512_set1_ps(q[3]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                            \
  }

//! Iterative process of computing distance (FP32, M=16, N=8)
#define MATRIX_FP32_ITER_16X8_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m = _LOAD(m);                                   \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                            \
    zmm_q = _mm512_set1_ps(q[2]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                            \
    zmm_q = _mm512_set1_ps(q[3]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                            \
    zmm_q = _mm512_set1_ps(q[4]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_4)                            \
    zmm_q = _mm512_set1_ps(q[5]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_5)                            \
    zmm_q = _mm512_set1_ps(q[6]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_6)                            \
    zmm_q = _mm512_set1_ps(q[7]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_7)                            \
  }

//! Iterative process of computing distance (FP32, M=16, N=16)
#define MATRIX_FP32_ITER_16X16_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                             \
    __m512 zmm_m = _LOAD(m);                                    \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                             \
    zmm_q = _mm512_set1_ps(q[1]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_1)                             \
    zmm_q = _mm512_set1_ps(q[2]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_2)                             \
    zmm_q = _mm512_set1_ps(q[3]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_3)                             \
    zmm_q = _mm512_set1_ps(q[4]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_4)                             \
    zmm_q = _mm512_set1_ps(q[5]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_5)                             \
    zmm_q = _mm512_set1_ps(q[6]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_6)                             \
    zmm_q = _mm512_set1_ps(q[7]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_7)                             \
    zmm_q = _mm512_set1_ps(q[8]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_8)                             \
    zmm_q = _mm512_set1_ps(q[9]);                               \
    _PROC(zmm_m, zmm_q, _RES##_0_9)                             \
    zmm_q = _mm512_set1_ps(q[10]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_10)                            \
    zmm_q = _mm512_set1_ps(q[11]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_11)                            \
    zmm_q = _mm512_set1_ps(q[12]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_12)                            \
    zmm_q = _mm512_set1_ps(q[13]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_13)                            \
    zmm_q = _mm512_set1_ps(q[14]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_14)                            \
    zmm_q = _mm512_set1_ps(q[15]);                              \
    _PROC(zmm_m, zmm_q, _RES##_0_15)                            \
  }

//! Iterative process of computing distance (FP32, M=32, N=1)
#define MATRIX_FP32_ITER_32X1_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_q = _mm512_set1_ps(*q);                         \
    __m512 zmm_m = _LOAD(m);                                   \
    _PROC(zmm_m, zmm_q, _RES##_0_0)                            \
    zmm_m = _LOAD(m + 16);                                     \
    _PROC(zmm_m, zmm_q, _RES##_1_0)                            \
  }

//! Iterative process of computing distance (FP32, M=32, N=2)
#define MATRIX_FP32_ITER_32X2_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m_0 = _LOAD(m + 0);                             \
    __m512 zmm_m_1 = _LOAD(m + 16);                            \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \
  }

//! Iterative process of computing distance (FP32, M=32, N=4)
#define MATRIX_FP32_ITER_32X4_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m_0 = _LOAD(m + 0);                             \
    __m512 zmm_m_1 = _LOAD(m + 16);                            \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[2]);                              \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[3]);                              \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)        \
  }

//! Iterative process of computing distance (FP32, M=32, N=8)
#define MATRIX_FP32_ITER_32X8_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                            \
    __m512 zmm_m_0 = _LOAD(m + 0);                             \
    __m512 zmm_m_1 = _LOAD(m + 16);                            \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                       \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[1]);                              \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[2]);                              \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[3]);                              \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[4]);                              \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[5]);                              \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[6]);                              \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[7]);                              \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)        \
  }

//! Iterative process of computing distance (FP32, M=32, N=16)
#define MATRIX_FP32_ITER_32X16_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                             \
    __m512 zmm_m_0 = _LOAD(m + 0);                              \
    __m512 zmm_m_1 = _LOAD(m + 16);                             \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[1]);                               \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[2]);                               \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[3]);                               \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[4]);                               \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[5]);                               \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[6]);                               \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[7]);                               \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[8]);                               \
    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[9]);                               \
    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[10]);                              \
    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[11]);                              \
    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[12]);                              \
    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[13]);                              \
    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[14]);                              \
    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[15]);                              \
    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)        \
  }

//! Iterative process of computing distance (FP32, M=32, N=32)
#define MATRIX_FP32_ITER_32X32_AVX512(m, q, _RES, _LOAD, _PROC) \
  {                                                             \
    __m512 zmm_m_0 = _LOAD(m + 0);                              \
    __m512 zmm_m_1 = _LOAD(m + 16);                             \
    __m512 zmm_q = _mm512_set1_ps(q[0]);                        \
    MATRIX_VAR_PROC(2, 1, 0, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[1]);                               \
    MATRIX_VAR_PROC(2, 1, 1, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[2]);                               \
    MATRIX_VAR_PROC(2, 1, 2, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[3]);                               \
    MATRIX_VAR_PROC(2, 1, 3, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[4]);                               \
    MATRIX_VAR_PROC(2, 1, 4, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[5]);                               \
    MATRIX_VAR_PROC(2, 1, 5, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[6]);                               \
    MATRIX_VAR_PROC(2, 1, 6, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[7]);                               \
    MATRIX_VAR_PROC(2, 1, 7, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[8]);                               \
    MATRIX_VAR_PROC(2, 1, 8, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[9]);                               \
    MATRIX_VAR_PROC(2, 1, 9, zmm_m, zmm_q, _RES, _PROC)         \
    zmm_q = _mm512_set1_ps(q[10]);                              \
    MATRIX_VAR_PROC(2, 1, 10, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[11]);                              \
    MATRIX_VAR_PROC(2, 1, 11, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[12]);                              \
    MATRIX_VAR_PROC(2, 1, 12, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[13]);                              \
    MATRIX_VAR_PROC(2, 1, 13, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[14]);                              \
    MATRIX_VAR_PROC(2, 1, 14, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[15]);                              \
    MATRIX_VAR_PROC(2, 1, 15, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[16]);                              \
    MATRIX_VAR_PROC(2, 1, 16, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[17]);                              \
    MATRIX_VAR_PROC(2, 1, 17, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[18]);                              \
    MATRIX_VAR_PROC(2, 1, 18, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[19]);                              \
    MATRIX_VAR_PROC(2, 1, 19, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[20]);                              \
    MATRIX_VAR_PROC(2, 1, 20, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[21]);                              \
    MATRIX_VAR_PROC(2, 1, 21, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[22]);                              \
    MATRIX_VAR_PROC(2, 1, 22, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[23]);                              \
    MATRIX_VAR_PROC(2, 1, 23, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[24]);                              \
    MATRIX_VAR_PROC(2, 1, 24, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[25]);                              \
    MATRIX_VAR_PROC(2, 1, 25, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[26]);                              \
    MATRIX_VAR_PROC(2, 1, 26, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[27]);                              \
    MATRIX_VAR_PROC(2, 1, 27, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[28]);                              \
    MATRIX_VAR_PROC(2, 1, 28, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[29]);                              \
    MATRIX_VAR_PROC(2, 1, 29, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[30]);                              \
    MATRIX_VAR_PROC(2, 1, 30, zmm_m, zmm_q, _RES, _PROC)        \
    zmm_q = _mm512_set1_ps(q[31]);                              \
    MATRIX_VAR_PROC(2, 1, 31, zmm_m, zmm_q, _RES, _PROC)        \
  }

//! Iterative process of computing distance (FP32, M=2, N=1)
#define MATRIX_FP32_ITER_2X1_NEON(m, q, _RES, _PROC)                \
  {                                                                 \
    float32x4_t v_m = vld1q_f32(m);                                 \
    float32x2_t v_q = vld1_f32(q);                                  \
    float32x4_t v_p =                                               \
        vcombine_f32(vdup_lane_f32(v_q, 0), vdup_lane_f32(v_q, 1)); \
    _PROC(v_m, v_p, _RES)                                           \
  }

//! Iterative process of computing distance (FP32, M=2, N=2)
#define MATRIX_FP32_ITER_2X2_NEON(m, q, _RES, _PROC)                      \
  {                                                                       \
    float32x4_t v_q = vld1q_f32(q);                                       \
    float32x4_t v_m = vld1q_f32(m);                                       \
    float32x2_t v_q_0 = vget_low_f32(v_q);                                \
    float32x2_t v_q_1 = vget_high_f32(v_q);                               \
    v_q = vcombine_f32(vdup_lane_f32(v_q_0, 0), vdup_lane_f32(v_q_1, 0)); \
    _PROC(v_m, v_q, _RES##_0_0)                                           \
    v_q = vcombine_f32(vdup_lane_f32(v_q_0, 1), vdup_lane_f32(v_q_1, 1)); \
    _PROC(v_m, v_q, _RES##_0_1)                                           \
  }

//! Iterative process of computing distance (FP32, M=4, N=1)
#define MATRIX_FP32_ITER_4X1_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m_0 = vld1q_f32(m + 0);            \
    float32x4_t v_m_1 = vld1q_f32(m + 4);            \
    float32x2_t v_p = vld1_f32(q);                   \
    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \
    _PROC(v_m_0, v_q, _RES##_0_0)                    \
    v_q = vdupq_lane_f32(v_p, 1);                    \
    _PROC(v_m_1, v_q, _RES##_0_1)                    \
  }

//! Iterative process of computing distance (FP32, M=4, N=2)
#define MATRIX_FP32_ITER_4X2_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m = vld1q_f32(m);                  \
    float32x2_t v_p = vld1_f32(q);                   \
    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \
    _PROC(v_m, v_q, _RES##_0_0)                      \
    v_q = vdupq_lane_f32(v_p, 1);                    \
    _PROC(v_m, v_q, _RES##_0_1)                      \
  }

//! Iterative process of computing distance (FP32, M=4, N=4)
#define MATRIX_FP32_ITER_4X4_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m = vld1q_f32(m);                  \
    float32x4_t v_p = vld1q_f32(q);                  \
    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \
    _PROC(v_m, v_q, _RES##_0_0)                      \
    v_q = vdupq_laneq_f32(v_p, 1);                   \
    _PROC(v_m, v_q, _RES##_0_1)                      \
    v_q = vdupq_laneq_f32(v_p, 2);                   \
    _PROC(v_m, v_q, _RES##_0_2)                      \
    v_q = vdupq_laneq_f32(v_p, 3);                   \
    _PROC(v_m, v_q, _RES##_0_3)                      \
  }

//! Iterative process of computing distance (FP32, M=8, N=1)
#define MATRIX_FP32_ITER_8X1_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m_0 = vld1q_f32(m + 0);            \
    float32x4_t v_m_1 = vld1q_f32(m + 4);            \
    float32x4_t v_q = vld1q_dup_f32(q);              \
    _PROC(v_m_0, v_q, _RES##_0_0)                    \
    _PROC(v_m_1, v_q, _RES##_1_0)                    \
  }

//! Iterative process of computing distance (FP32, M=8, N=2)
#define MATRIX_FP32_ITER_8X2_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m_0 = vld1q_f32(m + 0);            \
    float32x4_t v_m_1 = vld1q_f32(m + 4);            \
    float32x2_t v_p = vld1_f32(q);                   \
    float32x4_t v_q = vdupq_lane_f32(v_p, 0);        \
    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_lane_f32(v_p, 1);                    \
    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \
  }

//! Iterative process of computing distance (FP32, M=8, N=4)
#define MATRIX_FP32_ITER_8X4_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m_0 = vld1q_f32(m + 0);            \
    float32x4_t v_m_1 = vld1q_f32(m + 4);            \
    float32x4_t v_p = vld1q_f32(q);                  \
    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \
    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 1);                   \
    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 2);                   \
    MATRIX_VAR_PROC(2, 1, 2, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 3);                   \
    MATRIX_VAR_PROC(2, 1, 3, v_m, v_q, _RES, _PROC)  \
  }

//! Iterative process of computing distance (FP32, M=8, N=8)
#define MATRIX_FP32_ITER_8X8_NEON(m, q, _RES, _PROC) \
  {                                                  \
    float32x4_t v_m_0 = vld1q_f32(m + 0);            \
    float32x4_t v_m_1 = vld1q_f32(m + 4);            \
    float32x4_t v_p = vld1q_f32(q + 0);              \
    float32x4_t v_q = vdupq_laneq_f32(v_p, 0);       \
    MATRIX_VAR_PROC(2, 1, 0, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 1);                   \
    MATRIX_VAR_PROC(2, 1, 1, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 2);                   \
    MATRIX_VAR_PROC(2, 1, 2, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 3);                   \
    MATRIX_VAR_PROC(2, 1, 3, v_m, v_q, _RES, _PROC)  \
    v_p = vld1q_f32(q + 4);                          \
    v_q = vdupq_laneq_f32(v_p, 0);                   \
    MATRIX_VAR_PROC(2, 1, 4, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 1);                   \
    MATRIX_VAR_PROC(2, 1, 5, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 2);                   \
    MATRIX_VAR_PROC(2, 1, 6, v_m, v_q, _RES, _PROC)  \
    v_q = vdupq_laneq_f32(v_p, 3);                   \
    MATRIX_VAR_PROC(2, 1, 7, v_m, v_q, _RES, _PROC)  \
  }

//! Iterative process of computing distance (FP32, M=16, N=1)
#define MATRIX_FP32_ITER_16X1_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    float32x4_t v_q = vld1q_dup_f32(q);               \
    MATRIX_VAR_PROC(4, 1, 0, v_m, v_q, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=16, N=2)
#define MATRIX_FP32_ITER_16X2_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    float32x2_t v_p = vld1_f32(q);                    \
    float32x4_t v_q = vdupq_lane_f32(v_p, 0);         \
    MATRIX_VAR_PROC(4, 1, 0, v_m, v_q, _RES, _PROC)   \
    v_q = vdupq_lane_f32(v_p, 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, v_m, v_q, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=16, N=4)
#define MATRIX_FP32_ITER_16X4_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    float32x4_t v_q = vld1q_f32(q);                   \
    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);        \
    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=16, N=8)
#define MATRIX_FP32_ITER_16X8_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    float32x4_t v_q = vld1q_f32(q + 0);               \
    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);        \
    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 4);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                    \
    MATRIX_VAR_PROC(4, 1, 4, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                    \
    MATRIX_VAR_PROC(4, 1, 5, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                    \
    MATRIX_VAR_PROC(4, 1, 6, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                    \
    MATRIX_VAR_PROC(4, 1, 7, v_m, v_p, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=16, N=16)
#define MATRIX_FP32_ITER_16X16_NEON(m, q, _RES, _PROC) \
  {                                                    \
    float32x4_t v_m_0 = vld1q_f32(m + 0);              \
    float32x4_t v_m_1 = vld1q_f32(m + 4);              \
    float32x4_t v_m_2 = vld1q_f32(m + 8);              \
    float32x4_t v_m_3 = vld1q_f32(m + 12);             \
    float32x4_t v_q = vld1q_f32(q + 0);                \
    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \
    MATRIX_VAR_PROC(4, 1, 0, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 4);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(4, 1, 4, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(4, 1, 5, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(4, 1, 6, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(4, 1, 7, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 8);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(4, 1, 8, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(4, 1, 9, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(4, 1, 10, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(4, 1, 11, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 12);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(4, 1, 12, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(4, 1, 13, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(4, 1, 14, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(4, 1, 15, v_m, v_p, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=32, N=1)
#define MATRIX_FP32_ITER_32X1_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_q = vld1q_dup_f32(q);               \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    _PROC(v_m_0, v_q, _RES##_0_0)                     \
    _PROC(v_m_1, v_q, _RES##_1_0)                     \
    _PROC(v_m_2, v_q, _RES##_2_0)                     \
    _PROC(v_m_3, v_q, _RES##_3_0)                     \
    v_m_0 = vld1q_f32(m + 16);                        \
    v_m_1 = vld1q_f32(m + 20);                        \
    v_m_2 = vld1q_f32(m + 24);                        \
    v_m_3 = vld1q_f32(m + 28);                        \
    _PROC(v_m_0, v_q, _RES##_4_0)                     \
    _PROC(v_m_1, v_q, _RES##_5_0)                     \
    _PROC(v_m_2, v_q, _RES##_6_0)                     \
    _PROC(v_m_3, v_q, _RES##_7_0)                     \
  }

//! Iterative process of computing distance (FP32, M=32, N=2)
#define MATRIX_FP32_ITER_32X2_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x2_t v_p = vld1_f32(q);                    \
    float32x4_t v_q_0 = vdupq_lane_f32(v_p, 0);       \
    float32x4_t v_q_1 = vdupq_lane_f32(v_p, 1);       \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    MATRIX_VAR_PROC(1, 2, 0, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 1, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 2, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 3, v_m_3, v_q, _RES, _PROC) \
    v_m_0 = vld1q_f32(m + 16);                        \
    v_m_1 = vld1q_f32(m + 20);                        \
    v_m_2 = vld1q_f32(m + 24);                        \
    v_m_3 = vld1q_f32(m + 28);                        \
    MATRIX_VAR_PROC(1, 2, 4, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 5, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 6, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 2, 7, v_m_3, v_q, _RES, _PROC) \
  }

//! Iterative process of computing distance (FP32, M=32, N=4)
#define MATRIX_FP32_ITER_32X4_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_p = vld1q_f32(q);                   \
    float32x4_t v_q_0 = vdupq_laneq_f32(v_p, 0);      \
    float32x4_t v_q_1 = vdupq_laneq_f32(v_p, 1);      \
    float32x4_t v_q_2 = vdupq_laneq_f32(v_p, 2);      \
    float32x4_t v_q_3 = vdupq_laneq_f32(v_p, 3);      \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    MATRIX_VAR_PROC(1, 4, 0, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 1, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 2, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 3, v_m_3, v_q, _RES, _PROC) \
    v_m_0 = vld1q_f32(m + 16);                        \
    v_m_1 = vld1q_f32(m + 20);                        \
    v_m_2 = vld1q_f32(m + 24);                        \
    v_m_3 = vld1q_f32(m + 28);                        \
    MATRIX_VAR_PROC(1, 4, 4, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 5, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 6, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 4, 7, v_m_3, v_q, _RES, _PROC) \
  }

//! Iterative process of computing distance (FP32, M=32, N=8)
#define MATRIX_FP32_ITER_32X8_NEON(m, q, _RES, _PROC) \
  {                                                   \
    float32x4_t v_p_0 = vld1q_f32(q + 0);             \
    float32x4_t v_p_1 = vld1q_f32(q + 4);             \
    float32x4_t v_q_0 = vdupq_laneq_f32(v_p_0, 0);    \
    float32x4_t v_q_1 = vdupq_laneq_f32(v_p_0, 1);    \
    float32x4_t v_q_2 = vdupq_laneq_f32(v_p_0, 2);    \
    float32x4_t v_q_3 = vdupq_laneq_f32(v_p_0, 3);    \
    float32x4_t v_q_4 = vdupq_laneq_f32(v_p_1, 0);    \
    float32x4_t v_q_5 = vdupq_laneq_f32(v_p_1, 1);    \
    float32x4_t v_q_6 = vdupq_laneq_f32(v_p_1, 2);    \
    float32x4_t v_q_7 = vdupq_laneq_f32(v_p_1, 3);    \
    float32x4_t v_m_0 = vld1q_f32(m + 0);             \
    float32x4_t v_m_1 = vld1q_f32(m + 4);             \
    float32x4_t v_m_2 = vld1q_f32(m + 8);             \
    float32x4_t v_m_3 = vld1q_f32(m + 12);            \
    MATRIX_VAR_PROC(1, 8, 0, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 1, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 2, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 3, v_m_3, v_q, _RES, _PROC) \
    v_m_0 = vld1q_f32(m + 16);                        \
    v_m_1 = vld1q_f32(m + 20);                        \
    v_m_2 = vld1q_f32(m + 24);                        \
    v_m_3 = vld1q_f32(m + 28);                        \
    MATRIX_VAR_PROC(1, 8, 4, v_m_0, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 5, v_m_1, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 6, v_m_2, v_q, _RES, _PROC) \
    MATRIX_VAR_PROC(1, 8, 7, v_m_3, v_q, _RES, _PROC) \
  }

//! Iterative process of computing distance (FP32, M=32, N=16)
#define MATRIX_FP32_ITER_32X16_NEON(m, q, _RES, _PROC) \
  {                                                    \
    float32x4_t v_m_0 = vld1q_f32(m + 0);              \
    float32x4_t v_m_1 = vld1q_f32(m + 4);              \
    float32x4_t v_m_2 = vld1q_f32(m + 8);              \
    float32x4_t v_m_3 = vld1q_f32(m + 12);             \
    float32x4_t v_m_4 = vld1q_f32(m + 16);             \
    float32x4_t v_m_5 = vld1q_f32(m + 20);             \
    float32x4_t v_m_6 = vld1q_f32(m + 24);             \
    float32x4_t v_m_7 = vld1q_f32(m + 28);             \
    float32x4_t v_q = vld1q_f32(q + 0);                \
    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \
    MATRIX_VAR_PROC(8, 1, 0, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 1, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 2, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 3, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 4);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 4, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 5, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 6, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 7, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 8);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 8, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 9, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 10, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 11, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 12);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 12, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 13, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 14, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 15, v_m, v_p, _RES, _PROC)   \
  }

//! Iterative process of computing distance (FP32, M=32, N=32)
#define MATRIX_FP32_ITER_32X32_NEON(m, q, _RES, _PROC) \
  {                                                    \
    float32x4_t v_m_0 = vld1q_f32(m + 0);              \
    float32x4_t v_m_1 = vld1q_f32(m + 4);              \
    float32x4_t v_m_2 = vld1q_f32(m + 8);              \
    float32x4_t v_m_3 = vld1q_f32(m + 12);             \
    float32x4_t v_m_4 = vld1q_f32(m + 16);             \
    float32x4_t v_m_5 = vld1q_f32(m + 20);             \
    float32x4_t v_m_6 = vld1q_f32(m + 24);             \
    float32x4_t v_m_7 = vld1q_f32(m + 28);             \
    float32x4_t v_q = vld1q_f32(q + 0);                \
    float32x4_t v_p = vdupq_laneq_f32(v_q, 0);         \
    MATRIX_VAR_PROC(8, 1, 0, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 1, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 2, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 3, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 4);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 4, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 5, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 6, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 7, v_m, v_p, _RES, _PROC)    \
    v_q = vld1q_f32(q + 8);                            \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 8, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 9, v_m, v_p, _RES, _PROC)    \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 10, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 11, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 12);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 12, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 13, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 14, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 15, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 16);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 16, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 17, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 18, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 19, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 20);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 20, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 21, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 22, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 23, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 24);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 24, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 25, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 26, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 27, v_m, v_p, _RES, _PROC)   \
    v_q = vld1q_f32(q + 28);                           \
    v_p = vdupq_laneq_f32(v_q, 0);                     \
    MATRIX_VAR_PROC(8, 1, 28, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 1);                     \
    MATRIX_VAR_PROC(8, 1, 29, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 2);                     \
    MATRIX_VAR_PROC(8, 1, 30, v_m, v_p, _RES, _PROC)   \
    v_p = vdupq_laneq_f32(v_q, 3);                     \
    MATRIX_VAR_PROC(8, 1, 31, v_m, v_p, _RES, _PROC)   \
  }


================================================
FILE: src/ailego/math/distance_matrix_inner_product_utility.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#if defined(__SSE4_1__)
//! Four-bits Convert Table
static const AILEGO_ALIGNED(32) int8_t Int4ConvertTable[32] = {
    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1,
    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1};

#define NEGZEROS_FP32_SSE _mm_set1_ps(-0.0f)
#define  MASK_INT4_SSE _mm_set1_epi32(0x0f0f0f0f)
#define ONES_INT16_SSE _mm_set1_epi32(0x00010001)
#define INT4_LOOKUP_SSE _mm_load_si128((const __m128i *)Int4ConvertTable)
#endif  // __SSE4_1__

#if defined(__AVX__)
// #define NEGZEROS_FP32_AVX _mm256_set1_ps(-0.0f)
#define MASK_INT4_AVX _mm256_set1_epi32(0x0f0f0f0f)
#define ONES_INT16_AVX _mm256_set1_epi32(0x00010001)
#define  INT4_LOOKUP_AVX _mm256_load_si256((const __m256i *)Int4ConvertTable)
#endif  // __AVX__

#if defined(__AVX512F__) && !defined(__AVX512DQ__)
#define _mm512_xor_ps(a, b) \
  _mm512_castsi512_ps(      \
      _mm512_xor_epi32(_mm512_castps_si512(a), _mm512_castps_si512(b)))
#endif  // __AVX512DQ__

//! Reverse sign of value (GENERAL)
#define NEGATE_FP32_GENERAL(v) -(v)

//! Calculate Fused-Multiply-Add (SSE)
#define FMA_FP32_SSE(xmm_m, xmm_q, xmm_sum) \
  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_q, xmm_sum);

//! Calculate Fused-Multiply-Add (AVX)
#define FMA_FP32_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_q, ymm_sum);

//! Calculate Fused-Multiply-Add (AVX512)
#define FMA_FP32_AVX512(zmm_m, zmm_q, zmm_sum) \
  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_q, zmm_sum);

//! Calculate Fused-Multiply-Add (AVX512FP16)
#define FMA_FP16_AVX512FP16(zmm_m, zmm_q, zmm_sum) \
  zmm_sum = _mm512_fmadd_ph(zmm_m, zmm_q, zmm_sum);

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_FP16_GENERAL(m, q, sum) sum += (m * q);

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_FP32_GENERAL(m, q, sum) sum += (m * q);

//! Calculate Fused-Multiply-Add (NEON)
#define FMA_FP16_NEON(v_m, v_q, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_q);

//! Calculate Fused-Multiply-Add (NEON)
#define FMA_FP32_NEON(v_m, v_q, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_q);

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_INT4_GENERAL(m, q, sum)                               \
  sum += Int4MulTable[(((m) << 4) & 0xf0) | (((q) >> 0) & 0xf)] + \
         Int4MulTable[(((m) >> 0) & 0xf0) | (((q) >> 4) & 0xf)];

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_INT8_GENERAL(m, q, sum) sum += static_cast<float>(m * q);

//! Calculate Fused-Multiply-Add (SSE)
#define FMA_INT8_SSE(xmm_m, xmm_q, xmm_sum)                                    \
  xmm_sum = _mm_add_epi32(                                                     \
      _mm_madd_epi16(                                                          \
          _mm_maddubs_epi16(_mm_abs_epi8(xmm_q), _mm_sign_epi8(xmm_m, xmm_q)), \
          ONES_INT16_SSE),                                                     \
      xmm_sum);

//! Calculate Fused-Multiply-Add (AVX)
#define FMA_INT8_AVX(ymm_m, ymm_q, ymm_sum)                                   \
  ymm_sum = _mm256_add_epi32(                                                 \
      _mm256_madd_epi16(_mm256_maddubs_epi16(_mm256_abs_epi8(ymm_q),          \
                                             _mm256_sign_epi8(ymm_m, ymm_q)), \
                        ONES_INT16_AVX),                                      \
      ymm_sum);

//! Calculate Fused-Multiply-Add (SSE)
#define FMA_INT4_SSE(xmm_m, xmm_q, xmm_sum)                                    \
  {                                                                            \
    __m128i xmm_lhs = _mm_shuffle_epi8(INT4_LOOKUP_SSE,                        \
                                       _mm_and_si128((xmm_m), MASK_INT4_SSE)); \
    __m128i xmm_rhs = _mm_shuffle_epi8(INT4_LOOKUP_SSE,                        \
                                       _mm_and_si128((xmm_q), MASK_INT4_SSE)); \
    xmm_sum = _mm_add_epi32(                                                   \
        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),                \
                                         _mm_sign_epi8(xmm_lhs, xmm_rhs)),     \
                       ONES_INT16_SSE),                                        \
        xmm_sum);                                                              \
    xmm_lhs = _mm_shuffle_epi8(                                                \
        INT4_LOOKUP_SSE,                                                       \
        _mm_and_si128(_mm_srli_epi32((xmm_m), 4), MASK_INT4_SSE));             \
    xmm_rhs = _mm_shuffle_epi8(                                                \
        INT4_LOOKUP_SSE,                                                       \
        _mm_and_si128(_mm_srli_epi32((xmm_q), 4), MASK_INT4_SSE));             \
    xmm_sum = _mm_add_epi32(                                                   \
        _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),                \
                                         _mm_sign_epi8(xmm_lhs, xmm_rhs)),     \
                       ONES_INT16_SSE),                                        \
        xmm_sum);                                                              \
  }

//! Calculate Fused-Multiply-Add (AVX)
#define FMA_INT4_AVX(ymm_m, ymm_q, ymm_sum)                              \
  {                                                                      \
    __m256i ymm_lhs = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_m), MASK_INT4_AVX));      \
    __m256i ymm_rhs = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_q), MASK_INT4_AVX));      \
    ymm_sum = _mm256_add_epi32(                                          \
        _mm256_madd_epi16(                                               \
            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),               \
                                 _mm256_sign_epi8(ymm_lhs, ymm_rhs)),    \
            ONES_INT16_AVX),                                             \
        ymm_sum);                                                        \
    ymm_lhs = _mm256_shuffle_epi8(                                       \
        INT4_LOOKUP_AVX,                                                 \
        _mm256_and_si256(_mm256_srli_epi32((ymm_m), 4), MASK_INT4_AVX)); \
    ymm_rhs = _mm256_shuffle_epi8(                                       \
        INT4_LOOKUP_AVX,                                                 \
        _mm256_and_si256(_mm256_srli_epi32((ymm_q), 4), MASK_INT4_AVX)); \
    ymm_sum = _mm256_add_epi32(                                          \
        _mm256_madd_epi16(                                               \
            _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),               \
                                 _mm256_sign_epi8(ymm_lhs, ymm_rhs)),    \
            ONES_INT16_AVX),                                             \
        ymm_sum);                                                        \
  }

//! Compute the distance between matrix and query
#define FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)                       \
  {                                                                        \
    __m128i xmm_lhs_0 = _mm_shuffle_epi8(                                  \
        INT4_LOOKUP_SSE, _mm_and_si128((xmm_lhs), MASK_INT4_SSE));         \
    __m128i xmm_rhs_0 = _mm_shuffle_epi8(                                  \
        INT4_LOOKUP_SSE, _mm_and_si128((xmm_rhs), MASK_INT4_SSE));         \
    __m128i xmm_lhs_1 = _mm_shuffle_epi8(                                  \
        INT4_LOOKUP_SSE,                                                   \
        _mm_and_si128(_mm_srli_epi32((xmm_lhs), 4), MASK_INT4_SSE));       \
    __m128i xmm_rhs_1 = _mm_shuffle_epi8(                                  \
        INT4_LOOKUP_SSE,                                                   \
        _mm_and_si128(_mm_srli_epi32((xmm_rhs), 4), MASK_INT4_SSE));       \
    xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);                       \
    xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);                       \
    xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);                                   \
    xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);                                   \
    xmm_lhs_0 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),    \
                               ONES_INT16_SSE);                            \
    xmm_lhs_1 = _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),    \
                               ONES_INT16_SSE);                            \
    xmm_sum = _mm_add_epi32(_mm_add_epi32(xmm_lhs_0, xmm_lhs_1), xmm_sum); \
  }

//! Compute the distance between matrix and query
#define FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)                          \
  {                                                                           \
    __m256i ymm_lhs_0 = _mm256_shuffle_epi8(                                  \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_lhs), MASK_INT4_AVX));         \
    __m256i ymm_rhs_0 = _mm256_shuffle_epi8(                                  \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_rhs), MASK_INT4_AVX));         \
    __m256i ymm_lhs_1 = _mm256_shuffle_epi8(                                  \
        INT4_LOOKUP_AVX,                                                      \
        _mm256_and_si256(_mm256_srli_epi32((ymm_lhs), 4), MASK_INT4_AVX));    \
    __m256i ymm_rhs_1 = _mm256_shuffle_epi8(                                  \
        INT4_LOOKUP_AVX,                                                      \
        _mm256_and_si256(_mm256_srli_epi32((ymm_rhs), 4), MASK_INT4_AVX));    \
    ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);                       \
    ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);                       \
    ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);                                   \
    ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);                                   \
    ymm_lhs_0 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0), \
                                  ONES_INT16_AVX);                            \
    ymm_lhs_1 = _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1), \
                                  ONES_INT16_AVX);                            \
    ymm_sum =                                                                 \
        _mm256_add_epi32(_mm256_add_epi32(ymm_lhs_0, ymm_lhs_1), ymm_sum);    \
  }

#define ACCUM_FP16_STEP_GENERAL FMA_FP16_GENERAL
#define ACCUM_FP16_STEP_NEON FMA_FP16_NEON

#define ACCUM_FP32_STEP_SSE FMA_FP32_SSE
#define ACCUM_FP32_STEP_AVX FMA_FP32_AVX
#define ACCUM_FP32_STEP_AVX512 FMA_FP32_AVX512
#define ACCUM_FP32_STEP_NEON FMA_FP32_NEON

#define ACCUM_INT4_STEP_SSE FMA_INT4_SSE
#define ACCUM_INT4_STEP_AVX FMA_INT4_AVX

#define ACCUM_INT8_STEP_SSE FMA_INT8_SSE
#define ACCUM_INT8_STEP_AVX FMA_INT8_AVX


================================================
FILE: src/ailego/math/distance_matrix_int32.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "matrix_define.i"

#if defined(__AVX__) && defined(__GNUC__)
#define _mm256_set_m128i(a, b) \
  _mm256_inserti128_si256(_mm256_castsi128_si256(b), (a), 1)
#endif  // __AVX__

#if !defined(__AVX__)
#define _mm_broadcast_si32(a) _mm_castps_si128(_mm_load1_ps((const float *)(a)))
#else
#define _mm_broadcast_si32(a) \
  _mm_castps_si128(_mm_broadcast_ss((const float *)(a)))
#define _mm256_broadcast_si32(a) \
  _mm256_castps_si256(_mm256_broadcast_ss((const float *)(a)))
#endif  // !__AVX__

//! Iterative process of computing distance (INT32, M=2, N=1)
#define MATRIX_INT32_ITER_2X1_SSE(mi, qi, _RES, _LOAD, _PROC)            \
  {                                                                      \
    __m128i xmm_qi = _LOAD((const __m128i *)(qi));                       \
    __m128i xmm_mi = _LOAD((const __m128i *)(mi));                       \
    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(1, 1, 0, 0)); \
    _PROC(xmm_mi, xmm_pi, _RES##_0_0)                                    \
    xmm_mi = _LOAD((const __m128i *)(mi + 4));                           \
    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 2, 2));         \
    _PROC(xmm_mi, xmm_pi, _RES##_0_1)                                    \
  }

//! Iterative process of computing distance (INT32, M=2, N=2)
#define MATRIX_INT32_ITER_2X2_SSE(mi, qi, _RES, _LOAD, _PROC)            \
  {                                                                      \
    __m128i xmm_qi = _LOAD((const __m128i *)(qi));                       \
    __m128i xmm_mi = _LOAD((const __m128i *)(mi));                       \
    __m128i xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(2, 2, 0, 0)); \
    _PROC(xmm_mi, xmm_pi, _RES##_0_0)                                    \
    xmm_pi = _mm_shuffle_epi32(xmm_qi, _MM_SHUFFLE(3, 3, 1, 1));         \
    _PROC(xmm_mi, xmm_pi, _RES##_0_1)                                    \
  }

//! Iterative process of computing distance (INT32, M=4, N=1)
#define MATRIX_INT32_ITER_4X1_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \
    _PROC(xmm_mi_0, xmm_qi, _RES##_0_0)                       \
    xmm_qi = _mm_broadcast_si32(qi + 1);                      \
    _PROC(xmm_mi_1, xmm_qi, _RES##_1_0)                       \
  }

//! Iterative process of computing distance (INT32, M=4, N=2)
#define MATRIX_INT32_ITER_4X2_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);            \
    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);            \
    __m128i xmm_mi = _LOAD((const __m128i *)(mi));            \
    MATRIX_VAR_PROC(1, 2, 0, xmm_mi, xmm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=4, N=4)
#define MATRIX_INT32_ITER_4X4_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_mi = _LOAD((const __m128i *)(mi));            \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \
    _PROC(xmm_mi, xmm_qi, _RES##_0_0)                         \
    xmm_qi = _mm_broadcast_si32(qi + 1);                      \
    _PROC(xmm_mi, xmm_qi, _RES##_0_1)                         \
    xmm_qi = _mm_broadcast_si32(qi + 2);                      \
    _PROC(xmm_mi, xmm_qi, _RES##_0_2)                         \
    xmm_qi = _mm_broadcast_si32(qi + 3);                      \
    _PROC(xmm_mi, xmm_qi, _RES##_0_3)                         \
  }

//! Iterative process of computing distance (INT32, M=8, N=1)
#define MATRIX_INT32_ITER_8X1_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                  \
    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=8, N=2)
#define MATRIX_INT32_ITER_8X2_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);            \
    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);            \
    __m128i xmm_mi = _LOAD((const __m128i *)(mi + 0));        \
    MATRIX_VAR_PROC(1, 2, 0, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_mi = _LOAD((const __m128i *)(mi + 4));                \
    MATRIX_VAR_PROC(1, 2, 1, xmm_mi, xmm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=8, N=4)
#define MATRIX_INT32_ITER_8X4_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \
    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 1);                      \
    MATRIX_VAR_PROC(2, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 2);                      \
    MATRIX_VAR_PROC(2, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 3);                      \
    MATRIX_VAR_PROC(2, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=8, N=8)
#define MATRIX_INT32_ITER_8X8_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));      \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);              \
    MATRIX_VAR_PROC(2, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 1);                      \
    MATRIX_VAR_PROC(2, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 2);                      \
    MATRIX_VAR_PROC(2, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 3);                      \
    MATRIX_VAR_PROC(2, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 4);                      \
    MATRIX_VAR_PROC(2, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 5);                      \
    MATRIX_VAR_PROC(2, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 6);                      \
    MATRIX_VAR_PROC(2, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)     \
    xmm_qi = _mm_broadcast_si32(qi + 7);                      \
    MATRIX_VAR_PROC(2, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=16, N=1)
#define MATRIX_INT32_ITER_16X1_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                   \
    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=2)
#define MATRIX_INT32_ITER_16X2_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \
    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 1);                       \
    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=4)
#define MATRIX_INT32_ITER_16X4_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \
    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 1);                       \
    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 2);                       \
    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 3);                       \
    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=8)
#define MATRIX_INT32_ITER_16X8_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);               \
    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 1);                       \
    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 2);                       \
    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 3);                       \
    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 4);                       \
    MATRIX_VAR_PROC(4, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 5);                       \
    MATRIX_VAR_PROC(4, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 6);                       \
    MATRIX_VAR_PROC(4, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 7);                       \
    MATRIX_VAR_PROC(4, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=16)
#define MATRIX_INT32_ITER_16X16_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \
    MATRIX_VAR_PROC(4, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 1);                        \
    MATRIX_VAR_PROC(4, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 2);                        \
    MATRIX_VAR_PROC(4, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 3);                        \
    MATRIX_VAR_PROC(4, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 4);                        \
    MATRIX_VAR_PROC(4, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 5);                        \
    MATRIX_VAR_PROC(4, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 6);                        \
    MATRIX_VAR_PROC(4, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 7);                        \
    MATRIX_VAR_PROC(4, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 8);                        \
    MATRIX_VAR_PROC(4, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 9);                        \
    MATRIX_VAR_PROC(4, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 10);                       \
    MATRIX_VAR_PROC(4, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 11);                       \
    MATRIX_VAR_PROC(4, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 12);                       \
    MATRIX_VAR_PROC(4, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 13);                       \
    MATRIX_VAR_PROC(4, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 14);                       \
    MATRIX_VAR_PROC(4, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 15);                       \
    MATRIX_VAR_PROC(4, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=1)
#define MATRIX_INT32_ITER_32X1_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_qi = _mm_broadcast_si32(qi);                   \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    _PROC(xmm_mi_0, xmm_qi, _RES##_0_0)                        \
    _PROC(xmm_mi_1, xmm_qi, _RES##_1_0)                        \
    _PROC(xmm_mi_2, xmm_qi, _RES##_2_0)                        \
    _PROC(xmm_mi_3, xmm_qi, _RES##_3_0)                        \
    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \
    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \
    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \
    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \
    _PROC(xmm_mi_0, xmm_qi, _RES##_4_0)                        \
    _PROC(xmm_mi_1, xmm_qi, _RES##_5_0)                        \
    _PROC(xmm_mi_2, xmm_qi, _RES##_6_0)                        \
    _PROC(xmm_mi_3, xmm_qi, _RES##_7_0)                        \
  }

//! Iterative process of computing distance (INT32, M=32, N=2)
#define MATRIX_INT32_ITER_32X2_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \
    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    MATRIX_VAR_PROC(1, 2, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \
    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \
    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \
    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \
    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \
    MATRIX_VAR_PROC(1, 2, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 2, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \
  }

//! Iterative process of computing distance (INT32, M=32, N=4)
#define MATRIX_INT32_ITER_32X4_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \
    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \
    __m128i xmm_qi_2 = _mm_broadcast_si32(qi + 2);             \
    __m128i xmm_qi_3 = _mm_broadcast_si32(qi + 3);             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    MATRIX_VAR_PROC(1, 4, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \
    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \
    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \
    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \
    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \
    MATRIX_VAR_PROC(1, 4, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 4, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \
  }

//! Iterative process of computing distance (INT32, M=32, N=8)
#define MATRIX_INT32_ITER_32X8_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m128i xmm_qi_0 = _mm_broadcast_si32(qi + 0);             \
    __m128i xmm_qi_1 = _mm_broadcast_si32(qi + 1);             \
    __m128i xmm_qi_2 = _mm_broadcast_si32(qi + 2);             \
    __m128i xmm_qi_3 = _mm_broadcast_si32(qi + 3);             \
    __m128i xmm_qi_4 = _mm_broadcast_si32(qi + 4);             \
    __m128i xmm_qi_5 = _mm_broadcast_si32(qi + 5);             \
    __m128i xmm_qi_6 = _mm_broadcast_si32(qi + 6);             \
    __m128i xmm_qi_7 = _mm_broadcast_si32(qi + 7);             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));       \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));       \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));       \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));      \
    MATRIX_VAR_PROC(1, 8, 0, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 1, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 2, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 3, xmm_mi_3, xmm_qi, _RES, _PROC)    \
    xmm_mi_0 = _LOAD((const __m128i *)(mi + 16));              \
    xmm_mi_1 = _LOAD((const __m128i *)(mi + 20));              \
    xmm_mi_2 = _LOAD((const __m128i *)(mi + 24));              \
    xmm_mi_3 = _LOAD((const __m128i *)(mi + 28));              \
    MATRIX_VAR_PROC(1, 8, 4, xmm_mi_0, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 5, xmm_mi_1, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 6, xmm_mi_2, xmm_qi, _RES, _PROC)    \
    MATRIX_VAR_PROC(1, 8, 7, xmm_mi_3, xmm_qi, _RES, _PROC)    \
  }

//! Iterative process of computing distance (INT32, M=32, N=16)
#define MATRIX_INT32_ITER_32X16_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \
    __m128i xmm_mi_4 = _LOAD((const __m128i *)(mi + 16));       \
    __m128i xmm_mi_5 = _LOAD((const __m128i *)(mi + 20));       \
    __m128i xmm_mi_6 = _LOAD((const __m128i *)(mi + 24));       \
    __m128i xmm_mi_7 = _LOAD((const __m128i *)(mi + 28));       \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \
    MATRIX_VAR_PROC(8, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 1);                        \
    MATRIX_VAR_PROC(8, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 2);                        \
    MATRIX_VAR_PROC(8, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 3);                        \
    MATRIX_VAR_PROC(8, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 4);                        \
    MATRIX_VAR_PROC(8, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 5);                        \
    MATRIX_VAR_PROC(8, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 6);                        \
    MATRIX_VAR_PROC(8, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 7);                        \
    MATRIX_VAR_PROC(8, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 8);                        \
    MATRIX_VAR_PROC(8, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 9);                        \
    MATRIX_VAR_PROC(8, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 10);                       \
    MATRIX_VAR_PROC(8, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 11);                       \
    MATRIX_VAR_PROC(8, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 12);                       \
    MATRIX_VAR_PROC(8, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 13);                       \
    MATRIX_VAR_PROC(8, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 14);                       \
    MATRIX_VAR_PROC(8, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 15);                       \
    MATRIX_VAR_PROC(8, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=32)
#define MATRIX_INT32_ITER_32X32_SSE(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m128i xmm_mi_0 = _LOAD((const __m128i *)(mi + 0));        \
    __m128i xmm_mi_1 = _LOAD((const __m128i *)(mi + 4));        \
    __m128i xmm_mi_2 = _LOAD((const __m128i *)(mi + 8));        \
    __m128i xmm_mi_3 = _LOAD((const __m128i *)(mi + 12));       \
    __m128i xmm_mi_4 = _LOAD((const __m128i *)(mi + 16));       \
    __m128i xmm_mi_5 = _LOAD((const __m128i *)(mi + 20));       \
    __m128i xmm_mi_6 = _LOAD((const __m128i *)(mi + 24));       \
    __m128i xmm_mi_7 = _LOAD((const __m128i *)(mi + 28));       \
    __m128i xmm_qi = _mm_broadcast_si32(qi + 0);                \
    MATRIX_VAR_PROC(8, 1, 0, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 1);                        \
    MATRIX_VAR_PROC(8, 1, 1, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 2);                        \
    MATRIX_VAR_PROC(8, 1, 2, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 3);                        \
    MATRIX_VAR_PROC(8, 1, 3, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 4);                        \
    MATRIX_VAR_PROC(8, 1, 4, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 5);                        \
    MATRIX_VAR_PROC(8, 1, 5, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 6);                        \
    MATRIX_VAR_PROC(8, 1, 6, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 7);                        \
    MATRIX_VAR_PROC(8, 1, 7, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 8);                        \
    MATRIX_VAR_PROC(8, 1, 8, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 9);                        \
    MATRIX_VAR_PROC(8, 1, 9, xmm_mi, xmm_qi, _RES, _PROC)       \
    xmm_qi = _mm_broadcast_si32(qi + 10);                       \
    MATRIX_VAR_PROC(8, 1, 10, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 11);                       \
    MATRIX_VAR_PROC(8, 1, 11, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 12);                       \
    MATRIX_VAR_PROC(8, 1, 12, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 13);                       \
    MATRIX_VAR_PROC(8, 1, 13, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 14);                       \
    MATRIX_VAR_PROC(8, 1, 14, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 15);                       \
    MATRIX_VAR_PROC(8, 1, 15, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 16);                       \
    MATRIX_VAR_PROC(8, 1, 16, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 17);                       \
    MATRIX_VAR_PROC(8, 1, 17, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 18);                       \
    MATRIX_VAR_PROC(8, 1, 18, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 19);                       \
    MATRIX_VAR_PROC(8, 1, 19, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 20);                       \
    MATRIX_VAR_PROC(8, 1, 20, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 21);                       \
    MATRIX_VAR_PROC(8, 1, 21, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 22);                       \
    MATRIX_VAR_PROC(8, 1, 22, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 23);                       \
    MATRIX_VAR_PROC(8, 1, 23, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 24);                       \
    MATRIX_VAR_PROC(8, 1, 24, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 25);                       \
    MATRIX_VAR_PROC(8, 1, 25, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 26);                       \
    MATRIX_VAR_PROC(8, 1, 26, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 27);                       \
    MATRIX_VAR_PROC(8, 1, 27, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 28);                       \
    MATRIX_VAR_PROC(8, 1, 28, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 29);                       \
    MATRIX_VAR_PROC(8, 1, 29, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 30);                       \
    MATRIX_VAR_PROC(8, 1, 30, xmm_mi, xmm_qi, _RES, _PROC)      \
    xmm_qi = _mm_broadcast_si32(qi + 31);                       \
    MATRIX_VAR_PROC(8, 1, 31, xmm_mi, xmm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=2, N=1)
#define MATRIX_INT32_ITER_2X1_AVX(mi, qi, _RES, _LOAD, _PROC)            \
  {                                                                      \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                       \
    __m256i ymm_qi = _mm256_set_epi32(qi[3], qi[3], qi[2], qi[2], qi[1], \
                                      qi[1], qi[0], qi[0]);              \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                                    \
  }

//! Iterative process of computing distance (INT32, M=2, N=2)
#define MATRIX_INT32_ITER_2X2_AVX(mi, qi, _RES, _LOAD, _PROC)               \
  {                                                                         \
    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                          \
    __m256i ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(2, 2, 0, 0)); \
    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                       \
    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(3, 3, 1, 1));         \
    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                       \
  }

//! Iterative process of computing distance (INT32, M=4, N=1)
#define MATRIX_INT32_ITER_4X1_AVX(mi, qi, _RES, _LOAD, _PROC)                 \
  {                                                                           \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                            \
    __m256i ymm_qi =                                                          \
        _mm256_set_m128i(_mm_broadcast_si32(qi + 1), _mm_broadcast_si32(qi)); \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                                         \
  }

//! Iterative process of computing distance (INT32, M=4, N=2)
#define MATRIX_INT32_ITER_4X2_AVX(mi, qi, _RES, _LOAD, _PROC)      \
  {                                                                \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                 \
    __m256i ymm_qi = _mm256_set_m128i(_mm_broadcast_si32(qi + 2),  \
                                      _mm_broadcast_si32(qi + 0)); \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                              \
    ymm_qi = _mm256_set_m128i(_mm_broadcast_si32(qi + 3),          \
                              _mm_broadcast_si32(qi + 1));         \
    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                              \
  }

//! Iterative process of computing distance (INT32, M=4, N=4)
#define MATRIX_INT32_ITER_4X4_AVX(mi, qi, _RES, _LOAD, _PROC)               \
  {                                                                         \
    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                          \
    __m256i ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(0, 0, 0, 0)); \
    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                       \
    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(1, 1, 1, 1));         \
    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                       \
    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(2, 2, 2, 2));         \
    _PROC(ymm_mi, ymm_pi, _RES##_0_2)                                       \
    ymm_pi = _mm256_shuffle_epi32(ymm_qi, _MM_SHUFFLE(3, 3, 3, 3));         \
    _PROC(ymm_mi, ymm_pi, _RES##_0_3)                                       \
  }

//! Iterative process of computing distance (INT32, M=8, N=1)
#define MATRIX_INT32_ITER_8X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_qi = _mm256_broadcast_si32(qi);               \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
  }

//! Iterative process of computing distance (INT32, M=8, N=2)
#define MATRIX_INT32_ITER_8X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_qi_0 = _mm256_broadcast_si32(qi + 0);         \
    __m256i ymm_qi_1 = _mm256_broadcast_si32(qi + 1);         \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT32, M=8, N=4)
#define MATRIX_INT32_ITER_8X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);           \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \
  }

//! Iterative process of computing distance (INT32, M=8, N=8)
#define MATRIX_INT32_ITER_8X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);           \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_4)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_5)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_6)                         \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_7)                         \
  }

//! Iterative process of computing distance (INT32, M=16, N=1)
#define MATRIX_INT32_ITER_16X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=2)
#define MATRIX_INT32_ITER_16X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=4)
#define MATRIX_INT32_ITER_16X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \
    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \
    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=8)
#define MATRIX_INT32_ITER_16X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \
    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \
    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                    \
    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                    \
    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                    \
    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                    \
    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=16, N=16)
#define MATRIX_INT32_ITER_16X16_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \
    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \
    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \
    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \
    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \
    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \
    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \
    MATRIX_VAR_PROC(2, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \
    MATRIX_VAR_PROC(2, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \
    MATRIX_VAR_PROC(2, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \
    MATRIX_VAR_PROC(2, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \
    MATRIX_VAR_PROC(2, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \
    MATRIX_VAR_PROC(2, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \
    MATRIX_VAR_PROC(2, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \
    MATRIX_VAR_PROC(2, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=1)
#define MATRIX_INT32_ITER_32X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \
    __m256i ymm_qi = _mm256_broadcast_si32(qi);                \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=2)
#define MATRIX_INT32_ITER_32X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=4)
#define MATRIX_INT32_ITER_32X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=8)
#define MATRIX_INT32_ITER_32X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));      \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));      \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);            \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                    \
    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                    \
    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                    \
    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                    \
    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=16)
#define MATRIX_INT32_ITER_32X16_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));       \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \
    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \
    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \
    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \
    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \
    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \
    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \
    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \
    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \
    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \
    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \
    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \
    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT32, M=32, N=32)
#define MATRIX_INT32_ITER_32X32_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 16));       \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 24));       \
    __m256i ymm_qi = _mm256_broadcast_si32(qi + 0);             \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 4);                     \
    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 5);                     \
    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 6);                     \
    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 7);                     \
    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 8);                     \
    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 9);                     \
    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si32(qi + 10);                    \
    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 11);                    \
    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 12);                    \
    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 13);                    \
    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 14);                    \
    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 15);                    \
    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 16);                    \
    MATRIX_VAR_PROC(4, 1, 16, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 17);                    \
    MATRIX_VAR_PROC(4, 1, 17, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 18);                    \
    MATRIX_VAR_PROC(4, 1, 18, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 19);                    \
    MATRIX_VAR_PROC(4, 1, 19, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 20);                    \
    MATRIX_VAR_PROC(4, 1, 20, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 21);                    \
    MATRIX_VAR_PROC(4, 1, 21, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 22);                    \
    MATRIX_VAR_PROC(4, 1, 22, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 23);                    \
    MATRIX_VAR_PROC(4, 1, 23, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 24);                    \
    MATRIX_VAR_PROC(4, 1, 24, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 25);                    \
    MATRIX_VAR_PROC(4, 1, 25, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 26);                    \
    MATRIX_VAR_PROC(4, 1, 26, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 27);                    \
    MATRIX_VAR_PROC(4, 1, 27, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 28);                    \
    MATRIX_VAR_PROC(4, 1, 28, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 29);                    \
    MATRIX_VAR_PROC(4, 1, 29, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 30);                    \
    MATRIX_VAR_PROC(4, 1, 30, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si32(qi + 31);                    \
    MATRIX_VAR_PROC(4, 1, 31, ymm_mi, ymm_qi, _RES, _PROC)      \
  }


================================================
FILE: src/ailego/math/distance_matrix_int64.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "matrix_define.i"

#if defined(__AVX__)
#define _mm256_broadcast_si64(a) \
  _mm256_castpd_si256(_mm256_broadcast_sd((const double *)(a)))
#endif  // __AVX__

//! Iterative process of computing distance (INT64, M=2, N=1)
#define MATRIX_INT64_ITER_2X1_AVX(mi, qi, _RES, _LOAD, _PROC)           \
  {                                                                     \
    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                      \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                      \
    __m256i ymm_pi =                                                    \
        _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(1, 1, 0, 0));      \
    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                   \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                          \
    ymm_pi = _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(3, 3, 2, 2)); \
    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                   \
  }

//! Iterative process of computing distance (INT64, M=2, N=2)
#define MATRIX_INT64_ITER_2X2_AVX(mi, qi, _RES, _LOAD, _PROC)           \
  {                                                                     \
    __m256i ymm_qi = _LOAD((const __m256i *)(qi));                      \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));                      \
    __m256i ymm_pi =                                                    \
        _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(2, 2, 0, 0));      \
    _PROC(ymm_mi, ymm_pi, _RES##_0_0)                                   \
    ymm_pi = _mm256_permute4x64_epi64(ymm_qi, _MM_SHUFFLE(3, 3, 1, 1)); \
    _PROC(ymm_mi, ymm_pi, _RES##_0_1)                                   \
  }

//! Iterative process of computing distance (INT64, M=4, N=1)
#define MATRIX_INT64_ITER_4X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                         \
  }

//! Iterative process of computing distance (INT64, M=4, N=2)
#define MATRIX_INT64_ITER_4X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);         \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);         \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT64, M=4, N=4)
#define MATRIX_INT64_ITER_4X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi));            \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_1)                         \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_2)                         \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \
    _PROC(ymm_mi, ymm_qi, _RES##_0_3)                         \
  }

//! Iterative process of computing distance (INT64, M=8, N=1)
#define MATRIX_INT64_ITER_8X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_qi = _mm256_broadcast_si64(qi);               \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                         \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \
    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                         \
  }

//! Iterative process of computing distance (INT64, M=8, N=2)
#define MATRIX_INT64_ITER_8X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);         \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);         \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));        \
    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                \
    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT64, M=8, N=4)
#define MATRIX_INT64_ITER_8X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));      \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));      \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \
    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \
    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT64, M=8, N=8)
#define MATRIX_INT64_ITER_8X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                           \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));      \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));      \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);           \
    MATRIX_VAR_PROC(2, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                   \
    MATRIX_VAR_PROC(2, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                   \
    MATRIX_VAR_PROC(2, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                   \
    MATRIX_VAR_PROC(2, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 4);                   \
    MATRIX_VAR_PROC(2, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 5);                   \
    MATRIX_VAR_PROC(2, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 6);                   \
    MATRIX_VAR_PROC(2, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)     \
    ymm_qi = _mm256_broadcast_si64(qi + 7);                   \
    MATRIX_VAR_PROC(2, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)     \
  }

//! Iterative process of computing distance (INT64, M=16, N=1)
#define MATRIX_INT64_ITER_16X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi = _mm256_broadcast_si64(qi);                \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    _PROC(ymm_mi, ymm_qi, _RES##_2_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    _PROC(ymm_mi, ymm_qi, _RES##_3_0)                          \
  }

//! Iterative process of computing distance (INT64, M=16, N=2)
#define MATRIX_INT64_ITER_16X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    MATRIX_VAR_PROC(1, 2, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    MATRIX_VAR_PROC(1, 2, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=16, N=4)
#define MATRIX_INT64_ITER_16X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));      \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);            \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=16, N=8)
#define MATRIX_INT64_ITER_16X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));       \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));       \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));       \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));      \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);            \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                    \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                    \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                    \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 4);                    \
    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 5);                    \
    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 6);                    \
    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 7);                    \
    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=16, N=16)
#define MATRIX_INT64_ITER_16X16_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \
    MATRIX_VAR_PROC(4, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \
    MATRIX_VAR_PROC(4, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \
    MATRIX_VAR_PROC(4, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \
    MATRIX_VAR_PROC(4, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \
    MATRIX_VAR_PROC(4, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \
    MATRIX_VAR_PROC(4, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \
    MATRIX_VAR_PROC(4, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \
    MATRIX_VAR_PROC(4, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \
    MATRIX_VAR_PROC(4, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \
    MATRIX_VAR_PROC(4, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \
    MATRIX_VAR_PROC(4, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \
    MATRIX_VAR_PROC(4, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \
    MATRIX_VAR_PROC(4, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \
    MATRIX_VAR_PROC(4, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \
    MATRIX_VAR_PROC(4, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \
    MATRIX_VAR_PROC(4, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=32, N=1)
#define MATRIX_INT64_ITER_32X1_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi = _mm256_broadcast_si64(qi);                \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    _PROC(ymm_mi, ymm_qi, _RES##_0_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    _PROC(ymm_mi, ymm_qi, _RES##_1_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    _PROC(ymm_mi, ymm_qi, _RES##_2_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    _PROC(ymm_mi, ymm_qi, _RES##_3_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \
    _PROC(ymm_mi, ymm_qi, _RES##_4_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \
    _PROC(ymm_mi, ymm_qi, _RES##_5_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \
    _PROC(ymm_mi, ymm_qi, _RES##_6_0)                          \
    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \
    _PROC(ymm_mi, ymm_qi, _RES##_7_0)                          \
  }

//! Iterative process of computing distance (INT64, M=32, N=2)
#define MATRIX_INT64_ITER_32X2_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    MATRIX_VAR_PROC(1, 2, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    MATRIX_VAR_PROC(1, 2, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    MATRIX_VAR_PROC(1, 2, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    MATRIX_VAR_PROC(1, 2, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \
    MATRIX_VAR_PROC(1, 2, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \
    MATRIX_VAR_PROC(1, 2, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \
    MATRIX_VAR_PROC(1, 2, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \
    MATRIX_VAR_PROC(1, 2, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=32, N=4)
#define MATRIX_INT64_ITER_32X4_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \
    __m256i ymm_qi_2 = _mm256_broadcast_si64(qi + 2);          \
    __m256i ymm_qi_3 = _mm256_broadcast_si64(qi + 3);          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    MATRIX_VAR_PROC(1, 4, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    MATRIX_VAR_PROC(1, 4, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    MATRIX_VAR_PROC(1, 4, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    MATRIX_VAR_PROC(1, 4, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \
    MATRIX_VAR_PROC(1, 4, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \
    MATRIX_VAR_PROC(1, 4, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \
    MATRIX_VAR_PROC(1, 4, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \
    MATRIX_VAR_PROC(1, 4, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=32, N=8)
#define MATRIX_INT64_ITER_32X8_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                            \
    __m256i ymm_qi_0 = _mm256_broadcast_si64(qi + 0);          \
    __m256i ymm_qi_1 = _mm256_broadcast_si64(qi + 1);          \
    __m256i ymm_qi_2 = _mm256_broadcast_si64(qi + 2);          \
    __m256i ymm_qi_3 = _mm256_broadcast_si64(qi + 3);          \
    __m256i ymm_qi_4 = _mm256_broadcast_si64(qi + 4);          \
    __m256i ymm_qi_5 = _mm256_broadcast_si64(qi + 5);          \
    __m256i ymm_qi_6 = _mm256_broadcast_si64(qi + 6);          \
    __m256i ymm_qi_7 = _mm256_broadcast_si64(qi + 7);          \
    __m256i ymm_mi = _LOAD((const __m256i *)(mi + 0));         \
    MATRIX_VAR_PROC(1, 8, 0, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 4));                 \
    MATRIX_VAR_PROC(1, 8, 1, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 8));                 \
    MATRIX_VAR_PROC(1, 8, 2, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 12));                \
    MATRIX_VAR_PROC(1, 8, 3, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 16));                \
    MATRIX_VAR_PROC(1, 8, 4, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 20));                \
    MATRIX_VAR_PROC(1, 8, 5, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 24));                \
    MATRIX_VAR_PROC(1, 8, 6, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_mi = _LOAD((const __m256i *)(mi + 28));                \
    MATRIX_VAR_PROC(1, 8, 7, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=32, N=16)
#define MATRIX_INT64_ITER_32X16_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \
    __m256i ymm_mi_4 = _LOAD((const __m256i *)(mi + 16));       \
    __m256i ymm_mi_5 = _LOAD((const __m256i *)(mi + 20));       \
    __m256i ymm_mi_6 = _LOAD((const __m256i *)(mi + 24));       \
    __m256i ymm_mi_7 = _LOAD((const __m256i *)(mi + 28));       \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \
    MATRIX_VAR_PROC(8, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \
    MATRIX_VAR_PROC(8, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \
    MATRIX_VAR_PROC(8, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \
    MATRIX_VAR_PROC(8, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \
    MATRIX_VAR_PROC(8, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \
    MATRIX_VAR_PROC(8, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \
    MATRIX_VAR_PROC(8, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \
    MATRIX_VAR_PROC(8, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \
    MATRIX_VAR_PROC(8, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \
    MATRIX_VAR_PROC(8, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \
    MATRIX_VAR_PROC(8, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \
    MATRIX_VAR_PROC(8, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \
    MATRIX_VAR_PROC(8, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \
    MATRIX_VAR_PROC(8, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \
    MATRIX_VAR_PROC(8, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \
    MATRIX_VAR_PROC(8, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
  }

//! Iterative process of computing distance (INT64, M=32, N=32)
#define MATRIX_INT64_ITER_32X32_AVX(mi, qi, _RES, _LOAD, _PROC) \
  {                                                             \
    __m256i ymm_mi_0 = _LOAD((const __m256i *)(mi + 0));        \
    __m256i ymm_mi_1 = _LOAD((const __m256i *)(mi + 4));        \
    __m256i ymm_mi_2 = _LOAD((const __m256i *)(mi + 8));        \
    __m256i ymm_mi_3 = _LOAD((const __m256i *)(mi + 12));       \
    __m256i ymm_mi_4 = _LOAD((const __m256i *)(mi + 16));       \
    __m256i ymm_mi_5 = _LOAD((const __m256i *)(mi + 20));       \
    __m256i ymm_mi_6 = _LOAD((const __m256i *)(mi + 24));       \
    __m256i ymm_mi_7 = _LOAD((const __m256i *)(mi + 28));       \
    __m256i ymm_qi = _mm256_broadcast_si64(qi + 0);             \
    MATRIX_VAR_PROC(8, 1, 0, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 1);                     \
    MATRIX_VAR_PROC(8, 1, 1, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 2);                     \
    MATRIX_VAR_PROC(8, 1, 2, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 3);                     \
    MATRIX_VAR_PROC(8, 1, 3, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 4);                     \
    MATRIX_VAR_PROC(8, 1, 4, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 5);                     \
    MATRIX_VAR_PROC(8, 1, 5, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 6);                     \
    MATRIX_VAR_PROC(8, 1, 6, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 7);                     \
    MATRIX_VAR_PROC(8, 1, 7, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 8);                     \
    MATRIX_VAR_PROC(8, 1, 8, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 9);                     \
    MATRIX_VAR_PROC(8, 1, 9, ymm_mi, ymm_qi, _RES, _PROC)       \
    ymm_qi = _mm256_broadcast_si64(qi + 10);                    \
    MATRIX_VAR_PROC(8, 1, 10, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 11);                    \
    MATRIX_VAR_PROC(8, 1, 11, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 12);                    \
    MATRIX_VAR_PROC(8, 1, 12, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 13);                    \
    MATRIX_VAR_PROC(8, 1, 13, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 14);                    \
    MATRIX_VAR_PROC(8, 1, 14, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 15);                    \
    MATRIX_VAR_PROC(8, 1, 15, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 16);                    \
    MATRIX_VAR_PROC(8, 1, 16, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 17);                    \
    MATRIX_VAR_PROC(8, 1, 17, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 18);                    \
    MATRIX_VAR_PROC(8, 1, 18, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 19);                    \
    MATRIX_VAR_PROC(8, 1, 19, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 20);                    \
    MATRIX_VAR_PROC(8, 1, 20, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 21);                    \
    MATRIX_VAR_PROC(8, 1, 21, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 22);                    \
    MATRIX_VAR_PROC(8, 1, 22, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 23);                    \
    MATRIX_VAR_PROC(8, 1, 23, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 24);                    \
    MATRIX_VAR_PROC(8, 1, 24, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 25);                    \
    MATRIX_VAR_PROC(8, 1, 25, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 26);                    \
    MATRIX_VAR_PROC(8, 1, 26, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 27);                    \
    MATRIX_VAR_PROC(8, 1, 27, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 28);                    \
    MATRIX_VAR_PROC(8, 1, 28, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 29);                    \
    MATRIX_VAR_PROC(8, 1, 29, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 30);                    \
    MATRIX_VAR_PROC(8, 1, 30, ymm_mi, ymm_qi, _RES, _PROC)      \
    ymm_qi = _mm256_broadcast_si64(qi + 31);                    \
    MATRIX_VAR_PROC(8, 1, 31, ymm_mi, ymm_qi, _RES, _PROC)      \
  }


================================================
FILE: src/ailego/math/distance_matrix_mips_utility.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Calculate Fused-Multiply-Add (AVX512)
#define FMA_FP32_AVX512(zmm_m, zmm_q, zmm_sum) \
  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_q, zmm_sum);

#define FMA_MASK_FP32_AVX512(zmm_m, zmm_q, zmm_sum, mask) \
  zmm_sum = _mm512_mask3_fmadd_ps(zmm_m, zmm_q, zmm_sum, mask);

#define HorizontalAdd_FP16_NEON(v) \
  vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v)), vcvt_high_f32_f16(v)))

#define HorizontalAdd_FP32_V512_TO_V256(zmm) \
  _mm256_add_ps(                             \
      _mm512_castps512_ps256(zmm),           \
      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(zmm), 1)))

//! Calculate Fused-Multiply-Add (AVX, FP16)
#define FMA_FP16_GENERAL(lhs, rhs, sum, norm1, norm2) \
  {                                                   \
    float v1 = lhs;                                   \
    float v2 = rhs;                                   \
    sum += v1 * v2;                                   \
    norm1 += v1 * v1;                                 \
    norm2 += v2 * v2;                                 \
  }

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_FP32_GENERAL(lhs, rhs, sum, norm1, norm2) \
  {                                                   \
    sum += (lhs) * (rhs);                             \
    norm1 += (lhs) * (lhs);                           \
    norm2 += (rhs) * (rhs);                           \
  }

#if defined(__SSE4_1__)
//! Four-bits Convert Table
static const AILEGO_ALIGNED(32) int8_t Int4ConvertTable[32] = {
    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1,
    0, 1, 2, 3, 4, 5, 6, 7, -8, -7, -6, -5, -4, -3, -2, -1};
#endif  // __SSE4_1__

#if defined(__SSE4_1__)
static const __m128i MASK_INT4_SSE = _mm_set1_epi32(0x0f0f0f0f);
static const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);
static const __m128i INT4_LOOKUP_SSE =
    _mm_load_si128((const __m128i *)Int4ConvertTable);
#endif  // __SSE4_1__

#if defined(__AVX2__)
static const __m256i MASK_INT4_AVX = _mm256_set1_epi32(0x0f0f0f0f);
static const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);
static const __m256i INT4_LOOKUP_AVX =
    _mm256_load_si256((const __m256i *)Int4ConvertTable);
#endif  // __AVX2__

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_INT4_GENERAL(lhs, rhs, sum, norm1, norm2)                   \
  {                                                                     \
    sum += Int4MulTable[(((lhs) << 4) & 0xf0) | (((rhs) >> 0) & 0xf)] + \
           Int4MulTable[(((lhs) >> 0) & 0xf0) | (((rhs) >> 4) & 0xf)];  \
    norm1 += static_cast<float>(                                        \
        ((int8_t)((lhs) << 4) >> 4) * ((int8_t)((lhs) << 4) >> 4) +     \
        ((int8_t)((lhs) & 0xf0) >> 4) * ((int8_t)((lhs) & 0xf0) >> 4)); \
    norm2 += static_cast<float>(                                        \
        ((int8_t)((rhs) << 4) >> 4) * ((int8_t)((rhs) << 4) >> 4) +     \
        ((int8_t)((rhs) & 0xf0) >> 4) * ((int8_t)((rhs) & 0xf0) >> 4)); \
  }


//! Compute the distance between matrix and query (SSE)
#define FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum_0, xmm_sum_norm1, \
                          xmm_sum_norm2)                              \
  {                                                                   \
    __m128i xmm_lhs_0 = _mm_shuffle_epi8(                             \
        INT4_LOOKUP_SSE, _mm_and_si128((xmm_lhs), MASK_INT4_SSE));    \
    __m128i xmm_rhs_0 = _mm_shuffle_epi8(                             \
        INT4_LOOKUP_SSE, _mm_and_si128((xmm_rhs), MASK_INT4_SSE));    \
    __m128i xmm_lhs_1 = _mm_shuffle_epi8(                             \
        INT4_LOOKUP_SSE,                                              \
        _mm_and_si128(_mm_srli_epi32((xmm_lhs), 4), MASK_INT4_SSE));  \
    __m128i xmm_rhs_1 = _mm_shuffle_epi8(                             \
        INT4_LOOKUP_SSE,                                              \
        _mm_and_si128(_mm_srli_epi32((xmm_rhs), 4), MASK_INT4_SSE));  \
    FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);                    \
    FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);                \
    FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);                \
    FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum_0);                    \
    FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);                \
    FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);                \
  }

//! Calculate Fused-Multiply-Add (GENERAL)
#define FMA_INT8_GENERAL(lhs, rhs, sum, norm1, norm2) \
  {                                                   \
    sum += static_cast<float>(lhs * rhs);             \
    norm1 += static_cast<float>(lhs * lhs);           \
    norm2 += static_cast<float>(rhs * rhs);           \
  }

//! Calculate Fused-Multiply-Add (SSE)
#define FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum)                          \
  xmm_sum = _mm_add_epi32(                                               \
      _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),            \
                                       _mm_sign_epi8(xmm_lhs, xmm_rhs)), \
                     ONES_INT16_SSE),                                    \
      xmm_sum)

//! Calculate Fused-Multiply-Add (AVX)
#define FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum)                     \
  ymm_sum = _mm256_add_epi32(                                       \
      _mm256_madd_epi16(                                            \
          _mm256_maddubs_epi16(_mm256_abs_epi8(ymm_rhs),            \
                               _mm256_sign_epi8(ymm_lhs, ymm_rhs)), \
          ONES_INT16_AVX),                                          \
      ymm_sum)

#define FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum)                   \
  ymm_sum = _mm256_add_epi32(                                                \
      _mm256_set_m128i(                                                      \
          _mm_setzero_si128(),                                               \
          _mm_madd_epi16(_mm_maddubs_epi16(_mm_abs_epi8(xmm_rhs),            \
                                           _mm_sign_epi8(xmm_lhs, xmm_rhs)), \
                         ONES_INT16_SSE)),                                   \
      ymm_sum)

//! Compute the distance between matrix and query (AVX)
#define FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1,           \
                          ymm_sum_norm1, ymm_sum_norm2)                    \
  {                                                                        \
    __m256i ymm_lhs_0 = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_lhs), MASK_INT4_AVX));      \
    __m256i ymm_rhs_0 = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX, _mm256_and_si256((ymm_rhs), MASK_INT4_AVX));      \
    __m256i ymm_lhs_1 = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX,                                                   \
        _mm256_and_si256(_mm256_srli_epi32((ymm_lhs), 4), MASK_INT4_AVX)); \
    __m256i ymm_rhs_1 = _mm256_shuffle_epi8(                               \
        INT4_LOOKUP_AVX,                                                   \
        _mm256_and_si256(_mm256_srli_epi32((ymm_rhs), 4), MASK_INT4_AVX)); \
    FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);                         \
    FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);                         \
    FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);                     \
    FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);                     \
    FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);                     \
    FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);                     \
  }


================================================
FILE: src/ailego/math/distance_matrix_popcnt.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_int32.i"
#include "distance_matrix_int64.i"
#include "matrix_utility.i"

//! Calculate population count (UINT32 Permute 1 SSE)
#define POPCNT_UINT32_PERMUTE1_SSE(v, ...) \
  _mm_add_epi16(_mm_srli_epi16(v, 8), _mm_and_si128(v, _mm_set1_epi16(0xff)))

//! Calculate population count (UINT32 Permute 2 SSE)
#define POPCNT_UINT32_PERMUTE2_SSE(v, ...) \
  _mm_add_epi32(_mm_srli_epi32(v, 16), _mm_and_si128(v, _mm_set1_epi32(0xffff)))

//! Calculate population count (UINT32 Permute 1 AVX)
#define POPCNT_UINT32_PERMUTE1_AVX(v, ...)  \
  _mm256_add_epi16(_mm256_srli_epi16(v, 8), \
                   _mm256_and_si256(v, _mm256_set1_epi16(0xff)))

//! Calculate population count (UINT32 Permute 2 AVX)
#define POPCNT_UINT32_PERMUTE2_AVX(v, ...)   \
  _mm256_add_epi32(_mm256_srli_epi32(v, 16), \
                   _mm256_and_si256(v, _mm256_set1_epi32(0xffff)))

//! Calculate population count (UINT64 Permute AVX)
#define POPCNT_UINT64_PERMUTE_AVX(v, ...) \
  _mm256_sad_epu8(v, _mm256_setzero_si256())

//! Compute the distance between matrix and query (UINT32, M=2, N=1)
#define POPCNT_UINT32_2X1_SSE(m, q, cnt, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qe_0 = q + ((cnt >> 2) << 2);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 2) : qe_0);       \
  const uint32_t *qe_3 = q + cnt;                                            \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \
    for (; q != qe_1; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP1_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \
    for (; q != qe_2; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP2_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \
    for (; q != qe_0; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP3_SSE)                     \
    }                                                                        \
    if (qe_3 >= qe_0 + 2) {                                                  \
      __m128i xmm_m = _mm_load_si128((const __m128i *)(m));                  \
      __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                 \
      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                     \
      m += 4;                                                                \
      q += 2;                                                                \
    }                                                                        \
  } else {                                                                   \
    for (; q != qe_1; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP1_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \
    for (; q != qe_2; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP2_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \
    for (; q != qe_0; m += 8, q += 4) {                                      \
      MATRIX_INT32_ITER_2X1_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP3_SSE)                     \
    }                                                                        \
    if (qe_3 >= qe_0 + 2) {                                                  \
      __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                 \
      __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                 \
      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                     \
      m += 4;                                                                \
      q += 2;                                                                \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_0_1);                     \
  xmm_sum_0_0 = _mm_add_epi32(                                               \
      xmm_sum_0_0, _mm_shuffle_epi32(xmm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \
  if (q != qe_3) {                                                           \
    __m128i xmm_m = _mm_set_epi32(0, 0, m[1], m[0]);                         \
    __m128i xmm_q = _mm_broadcast_si32(q);                                   \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \
  }                                                                          \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0_0));

//! Compute the distance between matrix and query (UINT32, M=2, N=2)
#define POPCNT_UINT32_2X2_SSE(m, q, cnt, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())               \
  const uint32_t *qe_0 = q + ((cnt >> 1) << 2);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 2) : qe_0);       \
  const uint32_t *qe_3 = q + (cnt << 1);                                     \
  if (((uintptr_t)m & 0xf) == 0 && ((uintptr_t)q & 0xf) == 0) {              \
    for (; q != qe_1; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP1_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \
    for (; q != qe_2; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP2_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \
    for (; q != qe_0; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_load_si128,               \
                                POPCNT_UINT32_STEP3_SSE)                     \
    }                                                                        \
  } else {                                                                   \
    for (; q != qe_1; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP1_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)            \
    for (; q != qe_2; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP2_SSE)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)            \
    for (; q != qe_0; m += 4, q += 4) {                                      \
      MATRIX_INT32_ITER_2X2_SSE(m, q, xmm_sum, _mm_loadu_si128,              \
                                POPCNT_UINT32_STEP3_SSE)                     \
    }                                                                        \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (q != qe_3) {                                                           \
    __m128i xmm_m = _mm_set_epi32(m[1], m[0], m[1], m[0]);                   \
    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                   \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=1)
#define POPCNT_UINT32_4X1_SSE(m, q, cnt, out, _NORM)                   \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())         \
  const uint32_t *qe_0 = q + ((cnt >> 1) << 1);                        \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0);     \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 1) : qe_0); \
  const uint32_t *qe_3 = q + cnt;                                      \
  if (((uintptr_t)m & 0xf) == 0) {                                     \
    for (; q != qe_1; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \
                                POPCNT_UINT32_STEP1_SSE)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)      \
    for (; q != qe_2; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \
                                POPCNT_UINT32_STEP2_SSE)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)      \
    for (; q != qe_0; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_load_si128,         \
                                POPCNT_UINT32_STEP3_SSE)               \
    }                                                                  \
    if (q != qe_3) {                                                   \
      __m128i xmm_m = _mm_load_si128((const __m128i *)(m));            \
      __m128i xmm_q = _mm_broadcast_si32(q);                           \
      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)               \
    }                                                                  \
  } else {                                                             \
    for (; q != qe_1; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \
                                POPCNT_UINT32_STEP1_SSE)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE)      \
    for (; q != qe_2; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \
                                POPCNT_UINT32_STEP2_SSE)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE)      \
    for (; q != qe_0; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_SSE(m, q, xmm_sum, _mm_loadu_si128,        \
                                POPCNT_UINT32_STEP3_SSE)               \
    }                                                                  \
    if (q != qe_3) {                                                   \
      __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));           \
      __m128i xmm_q = _mm_broadcast_si32(q);                           \
      POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)               \
    }                                                                  \
  }                                                                    \
  xmm_sum_0_0 = _mm_add_epi32(xmm_sum_0_0, xmm_sum_1_0);               \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=2)
#define POPCNT_UINT32_4X2_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(1, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 1);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 4, q += 2) {                           \
      MATRIX_INT32_ITER_4X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=4)
#define POPCNT_UINT32_4X4_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(1, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 2);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(1, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 4, q += 4) {                           \
      MATRIX_INT32_ITER_4X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=1)
#define POPCNT_UINT32_8X1_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(2, 1, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + cnt;                                 \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, ++q) {                              \
      MATRIX_INT32_ITER_8X1_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=2)
#define POPCNT_UINT32_8X2_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(2, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 1);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 2) {                           \
      MATRIX_INT32_ITER_8X2_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=4)
#define POPCNT_UINT32_8X4_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(2, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 2);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 4) {                           \
      MATRIX_INT32_ITER_8X4_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=8)
#define POPCNT_UINT32_8X8_SSE(m, q, cnt, out, _NORM)              \
  MATRIX_VAR_INIT(2, 8, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 3);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_load_si128,    \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP1_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP2_SSE)          \
    }                                                             \
    MATRIX_VAR_PERMUTE(2, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 8, q += 8) {                           \
      MATRIX_INT32_ITER_8X8_SSE(m, q, xmm_sum, _mm_loadu_si128,   \
                                POPCNT_UINT32_STEP3_SSE)          \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(2, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=1)
#define POPCNT_UINT32_16X1_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(4, 1, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + cnt;                                 \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, ++q) {                             \
      MATRIX_INT32_ITER_16X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(4, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=2)
#define POPCNT_UINT32_16X2_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(4, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 1);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 2) {                          \
      MATRIX_INT32_ITER_16X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(4, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=4)
#define POPCNT_UINT32_16X4_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(4, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 2);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 4) {                          \
      MATRIX_INT32_ITER_16X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(4, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=8)
#define POPCNT_UINT32_16X8_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(4, 8, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 3);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(4, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 8) {                          \
      MATRIX_INT32_ITER_16X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(4, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=16)
#define POPCNT_UINT32_16X16_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(4, 16, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 4);                           \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);        \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);    \
  if (((uintptr_t)m & 0xf) == 0) {                                 \
    for (; q != qe_1; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(4, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 16, q += 16) {                          \
      MATRIX_INT32_ITER_16X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                         \
    MATRIX_VAR_STORE(4, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=1)
#define POPCNT_UINT32_32X1_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 1, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + cnt;                                 \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);              \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);          \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 1, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, ++q) {                             \
      MATRIX_INT32_ITER_32X1_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(8, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=2)
#define POPCNT_UINT32_32X2_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 2, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 1);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 2, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 2) {                          \
      MATRIX_INT32_ITER_32X2_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(8, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=4)
#define POPCNT_UINT32_32X4_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 4, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 2);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 4, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 4) {                          \
      MATRIX_INT32_ITER_32X4_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(8, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=8)
#define POPCNT_UINT32_32X8_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 8, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 3);                          \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);       \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);   \
  if (((uintptr_t)m & 0xf) == 0) {                                \
    for (; q != qe_1; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  } else {                                                        \
    for (; q != qe_1; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP1_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP2_SSE)         \
    }                                                             \
    MATRIX_VAR_PERMUTE(8, 8, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 8) {                          \
      MATRIX_INT32_ITER_32X8_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                 POPCNT_UINT32_STEP3_SSE)         \
    }                                                             \
  }                                                               \
  if (((uintptr_t)out & 0xf) == 0) {                              \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                        \
    MATRIX_VAR_STORE(8, 8, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=16)
#define POPCNT_UINT32_32X16_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 16, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 4);                           \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);        \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);    \
  if (((uintptr_t)m & 0xf) == 0) {                                 \
    for (; q != qe_1; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 16, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 16) {                          \
      MATRIX_INT32_ITER_32X16_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 16, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=32)
#define POPCNT_UINT32_32X32_SSE(m, q, cnt, out, _NORM)             \
  MATRIX_VAR_INIT(8, 32, __m128i, xmm_sum, _mm_setzero_si128())    \
  const uint32_t *qe_0 = q + (cnt << 5);                           \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);        \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 5) : qe_0);    \
  if (((uintptr_t)m & 0xf) == 0) {                                 \
    for (; q != qe_1; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_load_si128,   \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP1_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE1_SSE) \
    for (; q != qe_2; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP2_SSE)         \
    }                                                              \
    MATRIX_VAR_PERMUTE(8, 32, xmm_sum, POPCNT_UINT32_PERMUTE2_SSE) \
    for (; q != qe_0; m += 32, q += 32) {                          \
      MATRIX_INT32_ITER_32X32_SSE(m, q, xmm_sum, _mm_loadu_si128,  \
                                  POPCNT_UINT32_STEP3_SSE)         \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_store_ps, _NORM)  \
  } else {                                                         \
    MATRIX_VAR_STORE(8, 32, 4, xmm_sum, out, _mm_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=2, N=1)
#define POPCNT_UINT32_2X1_AVX(m, q, cnt, out, _NORM)                           \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())              \
  const uint32_t *qe_0 = q + ((cnt >> 2) << 2);                                \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);             \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 2) : qe_0);         \
  const uint32_t *qe_3 = q + cnt;                                              \
  if (((uintptr_t)m & 0x1f) == 0) {                                            \
    for (; q != qe_1; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \
                                POPCNT_UINT32_STEP1_AVX)                       \
    }                                                                          \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)              \
    for (; q != qe_2; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \
                                POPCNT_UINT32_STEP2_AVX)                       \
    }                                                                          \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)              \
    for (; q != qe_0; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,              \
                                POPCNT_UINT32_STEP3_AVX)                       \
    }                                                                          \
  } else {                                                                     \
    for (; q != qe_1; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \
                                POPCNT_UINT32_STEP1_AVX)                       \
    }                                                                          \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)              \
    for (; q != qe_2; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \
                                POPCNT_UINT32_STEP2_AVX)                       \
    }                                                                          \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)              \
    for (; q != qe_0; m += 8, q += 4) {                                        \
      MATRIX_INT32_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,             \
                                POPCNT_UINT32_STEP3_AVX)                       \
    }                                                                          \
  }                                                                            \
  __m128i xmm_sum_0 = _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),       \
                                    _mm256_extracti128_si256(ymm_sum_0_0, 1)); \
  if (qe_3 >= qe_0 + 2) {                                                      \
    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                     \
    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                     \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0)                           \
    m += 4;                                                                    \
    q += 2;                                                                    \
  }                                                                            \
  xmm_sum_0 = _mm_add_epi32(                                                   \
      xmm_sum_0, _mm_shuffle_epi32(xmm_sum_0, _MM_SHUFFLE(0, 0, 3, 2)));       \
  if (q != qe_3) {                                                             \
    __m128i xmm_m = _mm_set_epi32(0, 0, m[1], m[0]);                           \
    __m128i xmm_q = _mm_broadcast_si32(q);                                     \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0)                           \
  }                                                                            \
  _mm_storel_pi((__m64 *)out, _NORM(xmm_sum_0));

//! Compute the distance between matrix and query (UINT32, M=2, N=2)
#define POPCNT_UINT32_2X2_AVX(m, q, cnt, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \
  const uint32_t *qe_0 = q + ((cnt >> 2) << 3);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 3) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 2) << 3) : qe_0);       \
  const uint32_t *qe_3 = q + (cnt << 1);                                     \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {            \
    for (; q != qe_1; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                                POPCNT_UINT32_STEP1_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)            \
    for (; q != qe_2; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                                POPCNT_UINT32_STEP2_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)            \
    for (; q != qe_0; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                                POPCNT_UINT32_STEP3_AVX)                     \
    }                                                                        \
  } else {                                                                   \
    for (; q != qe_1; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                                POPCNT_UINT32_STEP1_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)            \
    for (; q != qe_2; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                                POPCNT_UINT32_STEP2_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)            \
    for (; q != qe_0; m += 8, q += 8) {                                      \
      MATRIX_INT32_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                                POPCNT_UINT32_STEP3_AVX)                     \
    }                                                                        \
  }                                                                          \
  __m128i xmm_sum_0_0 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),                     \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));               \
  __m128i xmm_sum_0_1 =                                                      \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),                     \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));               \
  if (qe_3 >= qe_0 + 4) {                                                    \
    __m128i xmm_q = _mm_loadu_si128((const __m128i *)(q));                   \
    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));                   \
    __m128i xmm_p = _mm_shuffle_epi32(xmm_q, _MM_SHUFFLE(2, 2, 0, 0));       \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_p, xmm_sum_0_0)                       \
    xmm_p = _mm_shuffle_epi32(xmm_q, _MM_SHUFFLE(3, 3, 1, 1));               \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_p, xmm_sum_0_1)                       \
    m += 4;                                                                  \
    q += 4;                                                                  \
  }                                                                          \
  xmm_sum_0_0 = _mm_add_epi32(_mm_unpacklo_epi64(xmm_sum_0_0, xmm_sum_0_1),  \
                              _mm_unpackhi_epi64(xmm_sum_0_0, xmm_sum_0_1)); \
  if (q != qe_3) {                                                           \
    __m128i xmm_m = _mm_set_epi32(m[1], m[0], m[1], m[0]);                   \
    __m128i xmm_q = _mm_set_epi32(q[1], q[1], q[0], q[0]);                   \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                       \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=1)
#define POPCNT_UINT32_4X1_AVX(m, q, cnt, out, _NORM)                   \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())      \
  const uint32_t *qe_0 = q + ((cnt >> 1) << 1);                        \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0);     \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 1) : qe_0); \
  const uint32_t *qe_3 = q + cnt;                                      \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (; q != qe_1; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  } else {                                                             \
    for (; q != qe_1; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 2) {                                \
      MATRIX_INT32_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  }                                                                    \
  __m128i xmm_sum_0_0 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \
  if (q != qe_3) {                                                     \
    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \
    __m128i xmm_q = _mm_broadcast_si32(q);                             \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 1, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=2)
#define POPCNT_UINT32_4X2_AVX(m, q, cnt, out, _NORM)                   \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())      \
  const uint32_t *qe_0 = q + ((cnt >> 1) << 2);                        \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);     \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 2) : qe_0); \
  const uint32_t *qe_3 = q + (cnt << 1);                               \
  if (((uintptr_t)m & 0x1f) == 0) {                                    \
    for (; q != qe_1; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  } else {                                                             \
    for (; q != qe_1; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 4) {                                \
      MATRIX_INT32_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  }                                                                    \
  __m128i xmm_sum_0_0 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \
  __m128i xmm_sum_0_1 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),               \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));         \
  if (q != qe_3) {                                                     \
    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \
    __m128i xmm_q = _mm_broadcast_si32(q);                             \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \
    xmm_q = _mm_broadcast_si32(q + 1);                                 \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_1)                 \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 2, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (UINT32, M=4, N=4)
#define POPCNT_UINT32_4X4_AVX(m, q, cnt, out, _NORM)                   \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())      \
  const uint32_t *qe_0 = q + ((cnt >> 1) << 3);                        \
  const uint32_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 3) : qe_0);     \
  const uint32_t *qe_2 = (cnt > 4095 ? q + ((4095 >> 1) << 3) : qe_0); \
  const uint32_t *qe_3 = q + (cnt << 2);                               \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {      \
    for (; q != qe_1; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  } else {                                                             \
    for (; q != qe_1; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)      \
    for (; q != qe_2; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP2_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)      \
    for (; q != qe_0; m += 8, q += 8) {                                \
      MATRIX_INT32_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT32_STEP3_AVX)               \
    }                                                                  \
  }                                                                    \
  __m128i xmm_sum_0_0 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_0),               \
                    _mm256_extracti128_si256(ymm_sum_0_0, 1));         \
  __m128i xmm_sum_0_1 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_1),               \
                    _mm256_extracti128_si256(ymm_sum_0_1, 1));         \
  __m128i xmm_sum_0_2 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_2),               \
                    _mm256_extracti128_si256(ymm_sum_0_2, 1));         \
  __m128i xmm_sum_0_3 =                                                \
      _mm_add_epi32(_mm256_castsi256_si128(ymm_sum_0_3),               \
                    _mm256_extracti128_si256(ymm_sum_0_3, 1));         \
  if (q != qe_3) {                                                     \
    __m128i xmm_m = _mm_loadu_si128((const __m128i *)(m));             \
    __m128i xmm_q = _mm_broadcast_si32(q);                             \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_0)                 \
    xmm_q = _mm_broadcast_si32(q + 1);                                 \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_1)                 \
    xmm_q = _mm_broadcast_si32(q + 2);                                 \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_2)                 \
    xmm_q = _mm_broadcast_si32(q + 3);                                 \
    POPCNT_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum_0_3)                 \
  }                                                                    \
  if (((uintptr_t)out & 0xf) == 0) {                                   \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_store_ps, _NORM)       \
  } else {                                                             \
    MATRIX_VAR_STORE(1, 4, 4, xmm_sum, out, _mm_storeu_ps, _NORM)      \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=1)
#define POPCNT_UINT32_8X1_AVX(m, q, cnt, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + cnt;                                    \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, ++q) {                                 \
      MATRIX_INT32_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=2)
#define POPCNT_UINT32_8X2_AVX(m, q, cnt, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 1);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 2) {                              \
      MATRIX_INT32_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=4)
#define POPCNT_UINT32_8X4_AVX(m, q, cnt, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 2);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 4) {                              \
      MATRIX_INT32_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=8, N=8)
#define POPCNT_UINT32_8X8_AVX(m, q, cnt, out, _NORM)                 \
  MATRIX_VAR_INIT(1, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 3);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,    \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP1_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP2_AVX)             \
    }                                                                \
    MATRIX_VAR_PERMUTE(1, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 8, q += 8) {                              \
      MATRIX_INT32_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,   \
                                POPCNT_UINT32_STEP3_AVX)             \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(1, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=1)
#define POPCNT_UINT32_16X1_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + cnt;                                    \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, ++q) {                                \
      MATRIX_INT32_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=2)
#define POPCNT_UINT32_16X2_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 1);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 2) {                             \
      MATRIX_INT32_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=4)
#define POPCNT_UINT32_16X4_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 2);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 4) {                             \
      MATRIX_INT32_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=8)
#define POPCNT_UINT32_16X8_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 3);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 8) {                             \
      MATRIX_INT32_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(2, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=16, N=16)
#define POPCNT_UINT32_16X16_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(2, 16, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 4);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);       \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (; q != qe_1; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  } else {                                                            \
    for (; q != qe_1; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(2, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 16, q += 16) {                             \
      MATRIX_INT32_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(2, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=1)
#define POPCNT_UINT32_32X1_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + cnt;                                    \
  const uint32_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                 \
  const uint32_t *qe_2 = (cnt > 4095 ? q + 4095 : qe_0);             \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, ++q) {                                \
      MATRIX_INT32_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 1, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=2)
#define POPCNT_UINT32_32X2_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 1);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 1) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 2) {                             \
      MATRIX_INT32_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 2, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=4)
#define POPCNT_UINT32_32X4_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 2);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 2) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 4) {                             \
      MATRIX_INT32_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 4, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=8)
#define POPCNT_UINT32_32X8_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 3);                             \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);          \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 3) : qe_0);      \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP1_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP2_AVX)            \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 8) {                             \
      MATRIX_INT32_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                 POPCNT_UINT32_STEP3_AVX)            \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0x1f) == 0) {                                \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 8, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=16)
#define POPCNT_UINT32_32X16_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 4);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 4) : qe_0);       \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (; q != qe_1; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  } else {                                                            \
    for (; q != qe_1; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 16) {                             \
      MATRIX_INT32_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 16, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT32, M=32, N=32)
#define POPCNT_UINT32_32X32_AVX(m, q, cnt, out, _NORM)                \
  MATRIX_VAR_INIT(4, 32, __m256i, ymm_sum, _mm256_setzero_si256())    \
  const uint32_t *qe_0 = q + (cnt << 5);                              \
  const uint32_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);           \
  const uint32_t *qe_2 = (cnt > 4095 ? q + (4095 << 5) : qe_0);       \
  if (((uintptr_t)m & 0x1f) == 0) {                                   \
    for (; q != qe_1; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,   \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  } else {                                                            \
    for (; q != qe_1; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP1_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE1_AVX)    \
    for (; q != qe_2; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP2_AVX)            \
    }                                                                 \
    MATRIX_VAR_PERMUTE(4, 32, ymm_sum, POPCNT_UINT32_PERMUTE2_AVX)    \
    for (; q != qe_0; m += 32, q += 32) {                             \
      MATRIX_INT32_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256,  \
                                  POPCNT_UINT32_STEP3_AVX)            \
    }                                                                 \
  }                                                                   \
  if (((uintptr_t)out & 0x1f) == 0) {                                 \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_store_ps, _NORM)  \
  } else {                                                            \
    MATRIX_VAR_STORE(4, 32, 8, ymm_sum, out, _mm256_storeu_ps, _NORM) \
  }

//! Compute the distance between matrix and query (UINT64, M=2, N=1)
#define POPCNT_UINT64_2X1_AVX(m, q, cnt, out, _NORM)                   \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())      \
  const uint64_t *qe_0 = q + ((cnt >> 2) << 2);                        \
  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 2) << 2) : qe_0);     \
  const uint64_t *qe_2 = q + cnt;                                      \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {      \
    for (; q != qe_1; m += 8, q += 4) {                                \
      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT64_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)       \
    for (; q != qe_0; m += 8, q += 4) {                                \
      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_load_si256,      \
                                POPCNT_UINT64_STEP2_AVX)               \
    }                                                                  \
    if (qe_2 >= qe_0 + 2) {                                            \
      __m256i ymm_m = _mm256_load_si256((const __m256i *)(m));         \
      __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);       \
      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)               \
      m += 4;                                                          \
      q += 2;                                                          \
    }                                                                  \
  } else {                                                             \
    for (; q != qe_1; m += 8, q += 4) {                                \
      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT64_STEP1_AVX)               \
    }                                                                  \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)       \
    for (; q != qe_0; m += 8, q += 4) {                                \
      MATRIX_INT64_ITER_2X1_AVX(m, q, ymm_sum, _mm256_loadu_si256,     \
                                POPCNT_UINT64_STEP2_AVX)               \
    }                                                                  \
    if (qe_2 >= qe_0 + 2) {                                            \
      __m256i ymm_m = _mm256_loadu_si256((const __m256i *)(m));        \
      __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);       \
      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)               \
      m += 4;                                                          \
      q += 2;                                                          \
    }                                                                  \
  }                                                                    \
  ymm_sum_0_0 = _mm256_add_epi64(ymm_sum_0_0, ymm_sum_0_1);            \
  ymm_sum_0_0 = _mm256_add_epi64(                                      \
      ymm_sum_0_0,                                                     \
      _mm256_permute4x64_epi64(ymm_sum_0_0, _MM_SHUFFLE(0, 0, 3, 2))); \
  if (q != qe_2) {                                                     \
    __m256i ymm_m = _mm256_set_epi64x(0, 0, m[1], m[0]);               \
    __m256i ymm_q = _mm256_broadcast_si64(q);                          \
    POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)                 \
  }                                                                    \
  _mm_storel_pi((__m64 *)out, _NORM(ymm_sum_0_0));

//! Compute the distance between matrix and query (UINT64, M=2, N=2)
#define POPCNT_UINT64_2X2_AVX(m, q, cnt, out, _NORM)                         \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())            \
  const uint64_t *qe_0 = q + ((cnt >> 1) << 2);                              \
  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 2) : qe_0);           \
  const uint64_t *qe_2 = q + (cnt << 1);                                     \
  if (((uintptr_t)m & 0x1f) == 0 && ((uintptr_t)q & 0x1f) == 0) {            \
    for (; q != qe_1; m += 4, q += 4) {                                      \
      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                                POPCNT_UINT64_STEP1_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)             \
    for (; q != qe_0; m += 4, q += 4) {                                      \
      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_load_si256,            \
                                POPCNT_UINT64_STEP2_AVX)                     \
    }                                                                        \
  } else {                                                                   \
    for (; q != qe_1; m += 4, q += 4) {                                      \
      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                                POPCNT_UINT64_STEP1_AVX)                     \
    }                                                                        \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)             \
    for (; q != qe_0; m += 4, q += 4) {                                      \
      MATRIX_INT64_ITER_2X2_AVX(m, q, ymm_sum, _mm256_loadu_si256,           \
                                POPCNT_UINT64_STEP2_AVX)                     \
    }                                                                        \
  }                                                                          \
  ymm_sum_0_0 = _mm256_add_epi64(                                            \
      _mm256_inserti128_si256(ymm_sum_0_0,                                   \
                              _mm256_castsi256_si128(ymm_sum_0_1), 1),       \
      _mm256_inserti128_si256(ymm_sum_0_1,                                   \
                              _mm256_extractf128_si256(ymm_sum_0_0, 1), 0)); \
  if (q != qe_2) {                                                           \
    __m256i ymm_m = _mm256_set_epi64x(m[1], m[0], m[1], m[0]);               \
    __m256i ymm_q = _mm256_set_epi64x(q[1], q[1], q[0], q[0]);               \
    POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)                       \
  }                                                                          \
  if (((uintptr_t)out & 0xf) == 0) {                                         \
    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)             \
  } else {                                                                   \
    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)            \
  }

//! Compute the distance between matrix and query (UINT64, M=4, N=1)
#define POPCNT_UINT64_4X1_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + ((cnt >> 1) << 1);                    \
  const uint64_t *qe_1 = (cnt > 31 ? q + ((31 >> 1) << 1) : qe_0); \
  const uint64_t *qe_2 = q + cnt;                                  \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
    if (q != qe_2) {                                               \
      __m256i ymm_m = _mm256_load_si256((const __m256i *)(m));     \
      __m256i ymm_q = _mm256_broadcast_si64(q);                    \
      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_4X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
    if (q != qe_2) {                                               \
      __m256i ymm_m = _mm256_loadu_si256((const __m256i *)(m));    \
      __m256i ymm_q = _mm256_broadcast_si64(q);                    \
      POPCNT_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum_0_0)           \
    }                                                              \
  }                                                                \
  ymm_sum_0_0 = _mm256_add_epi64(ymm_sum_0_0, ymm_sum_1_0);        \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(1, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=4, N=2)
#define POPCNT_UINT64_4X2_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(1, 2, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + (cnt << 1);                           \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);        \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 4, q += 2) {                            \
      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 4, q += 2) {                            \
      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 4, q += 2) {                            \
      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(1, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 4, q += 2) {                            \
      MATRIX_INT64_ITER_4X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(1, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(1, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=4, N=4)
#define POPCNT_UINT64_4X4_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(1, 4, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + (cnt << 2);                           \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);        \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 4, q += 4) {                            \
      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 4, q += 4) {                            \
      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 4, q += 4) {                            \
      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(1, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 4, q += 4) {                            \
      MATRIX_INT64_ITER_4X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(1, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(1, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=8, N=1)
#define POPCNT_UINT64_8X1_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(2, 1, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + cnt;                                  \
  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);               \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 8, ++q) {                               \
      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, ++q) {                               \
      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 8, ++q) {                               \
      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, ++q) {                               \
      MATRIX_INT64_ITER_8X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(2, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(2, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=8, N=2)
#define POPCNT_UINT64_8X2_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(2, 2, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + (cnt << 1);                           \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);        \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 2) {                            \
      MATRIX_INT64_ITER_8X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(2, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(2, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=8, N=4)
#define POPCNT_UINT64_8X4_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(2, 4, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + (cnt << 2);                           \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);        \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 8, q += 4) {                            \
      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 4) {                            \
      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 8, q += 4) {                            \
      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 4) {                            \
      MATRIX_INT64_ITER_8X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(2, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(2, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=8, N=8)
#define POPCNT_UINT64_8X8_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(2, 8, __m256i, ymm_sum, _mm256_setzero_si256())  \
  const uint64_t *qe_0 = q + (cnt << 3);                           \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);        \
  if (((uintptr_t)m & 0x1f) == 0) {                                \
    for (; q != qe_1; m += 8, q += 8) {                            \
      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 8) {                            \
      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  } else {                                                         \
    for (; q != qe_1; m += 8, q += 8) {                            \
      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP1_AVX)           \
    }                                                              \
    MATRIX_VAR_PERMUTE(2, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)   \
    for (; q != qe_0; m += 8, q += 8) {                            \
      MATRIX_INT64_ITER_8X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                POPCNT_UINT64_STEP2_AVX)           \
    }                                                              \
  }                                                                \
  if (((uintptr_t)out & 0xf) == 0) {                               \
    MATRIX_VAR_STORE(2, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)   \
  } else {                                                         \
    MATRIX_VAR_STORE(2, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)  \
  }

//! Compute the distance between matrix and query (UINT64, M=16, N=1)
#define POPCNT_UINT64_16X1_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(4, 1, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + cnt;                                   \
  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 16, ++q) {                               \
      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, ++q) {                               \
      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 16, ++q) {                               \
      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, ++q) {                               \
      MATRIX_INT64_ITER_16X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=16, N=2)
#define POPCNT_UINT64_16X2_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(4, 2, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 1);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 16, q += 2) {                            \
      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 2) {                            \
      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 16, q += 2) {                            \
      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 2) {                            \
      MATRIX_INT64_ITER_16X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=16, N=4)
#define POPCNT_UINT64_16X4_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(4, 4, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 2);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 16, q += 4) {                            \
      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 4) {                            \
      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 16, q += 4) {                            \
      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 4) {                            \
      MATRIX_INT64_ITER_16X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=16, N=8)
#define POPCNT_UINT64_16X8_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(4, 8, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 3);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 16, q += 8) {                            \
      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 8) {                            \
      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 16, q += 8) {                            \
      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(4, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 8) {                            \
      MATRIX_INT64_ITER_16X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(4, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(4, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=16, N=16)
#define POPCNT_UINT64_16X16_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(4, 16, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 4);                             \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);          \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 16, q += 16) {                            \
      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 16) {                            \
      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 16, q += 16) {                            \
      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(4, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 16, q += 16) {                            \
      MATRIX_INT64_ITER_16X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(4, 16, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                           \
    MATRIX_VAR_STORE(4, 16, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=1)
#define POPCNT_UINT64_32X1_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 1, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + cnt;                                   \
  const uint64_t *qe_1 = (cnt > 31 ? q + 31 : qe_0);                \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 32, ++q) {                               \
      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, ++q) {                               \
      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 32, ++q) {                               \
      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 1, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, ++q) {                               \
      MATRIX_INT64_ITER_32X1_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 1, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 1, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=2)
#define POPCNT_UINT64_32X2_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 2, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 1);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 1) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 32, q += 2) {                            \
      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 2) {                            \
      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 32, q += 2) {                            \
      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 2, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 2) {                            \
      MATRIX_INT64_ITER_32X2_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 2, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 2, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=4)
#define POPCNT_UINT64_32X4_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 4, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 2);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 2) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 32, q += 4) {                            \
      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 4) {                            \
      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 32, q += 4) {                            \
      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 4, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 4) {                            \
      MATRIX_INT64_ITER_32X4_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 4, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 4, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=8)
#define POPCNT_UINT64_32X8_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 8, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 3);                            \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 3) : qe_0);         \
  if (((uintptr_t)m & 0x1f) == 0) {                                 \
    for (; q != qe_1; m += 32, q += 8) {                            \
      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 8) {                            \
      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  } else {                                                          \
    for (; q != qe_1; m += 32, q += 8) {                            \
      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP1_AVX)           \
    }                                                               \
    MATRIX_VAR_PERMUTE(8, 8, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 8) {                            \
      MATRIX_INT64_ITER_32X8_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                 POPCNT_UINT64_STEP2_AVX)           \
    }                                                               \
  }                                                                 \
  if (((uintptr_t)out & 0xf) == 0) {                                \
    MATRIX_VAR_STORE(8, 8, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                          \
    MATRIX_VAR_STORE(8, 8, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=16)
#define POPCNT_UINT64_32X16_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 16, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 4);                             \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 4) : qe_0);          \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, q += 16) {                            \
      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(8, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 16) {                            \
      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, q += 16) {                            \
      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(8, 16, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 16) {                            \
      MATRIX_INT64_ITER_32X16_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(8, 16, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                           \
    MATRIX_VAR_STORE(8, 16, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }

//! Compute the distance between matrix and query (UINT64, M=32, N=32)
#define POPCNT_UINT64_32X32_AVX(m, q, cnt, out, _NORM)               \
  MATRIX_VAR_INIT(8, 32, __m256i, ymm_sum, _mm256_setzero_si256())   \
  const uint64_t *qe_0 = q + (cnt << 5);                             \
  const uint64_t *qe_1 = (cnt > 31 ? q + (31 << 5) : qe_0);          \
  if (((uintptr_t)m & 0x1f) == 0) {                                  \
    for (; q != qe_1; m += 32, q += 32) {                            \
      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(8, 32, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 32) {                            \
      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_load_si256,  \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  } else {                                                           \
    for (; q != qe_1; m += 32, q += 32) {                            \
      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP1_AVX)           \
    }                                                                \
    MATRIX_VAR_PERMUTE(8, 32, ymm_sum, POPCNT_UINT64_PERMUTE_AVX)    \
    for (; q != qe_0; m += 32, q += 32) {                            \
      MATRIX_INT64_ITER_32X32_AVX(m, q, ymm_sum, _mm256_loadu_si256, \
                                  POPCNT_UINT64_STEP2_AVX)           \
    }                                                                \
  }                                                                  \
  if (((uintptr_t)out & 0xf) == 0) {                                 \
    MATRIX_VAR_STORE(8, 32, 4, ymm_sum, out, _mm_store_ps, _NORM)    \
  } else {                                                           \
    MATRIX_VAR_STORE(8, 32, 4, ymm_sum, out, _mm_storeu_ps, _NORM)   \
  }


================================================
FILE: src/ailego/math/distance_utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Four-bits Squared Difference Table
 */
static const AILEGO_ALIGNED(64) uint8_t Int4SquaredDiffTable[256] = {
    0,  1,  4,   9,   16,  25,  36,  49,  64,  49,  36,  25,  16,  9,   4,  1,
    1,  0,  1,   4,   9,   16,  25,  36,  81,  64,  49,  36,  25,  16,  9,  4,
    4,  1,  0,   1,   4,   9,   16,  25,  100, 81,  64,  49,  36,  25,  16, 9,
    9,  4,  1,   0,   1,   4,   9,   16,  121, 100, 81,  64,  49,  36,  25, 16,
    16, 9,  4,   1,   0,   1,   4,   9,   144, 121, 100, 81,  64,  49,  36, 25,
    25, 16, 9,   4,   1,   0,   1,   4,   169, 144, 121, 100, 81,  64,  49, 36,
    36, 25, 16,  9,   4,   1,   0,   1,   196, 169, 144, 121, 100, 81,  64, 49,
    49, 36, 25,  16,  9,   4,   1,   0,   225, 196, 169, 144, 121, 100, 81, 64,
    64, 81, 100, 121, 144, 169, 196, 225, 0,   1,   4,   9,   16,  25,  36, 49,
    49, 64, 81,  100, 121, 144, 169, 196, 1,   0,   1,   4,   9,   16,  25, 36,
    36, 49, 64,  81,  100, 121, 144, 169, 4,   1,   0,   1,   4,   9,   16, 25,
    25, 36, 49,  64,  81,  100, 121, 144, 9,   4,   1,   0,   1,   4,   9,  16,
    16, 25, 36,  49,  64,  81,  100, 121, 16,  9,   4,   1,   0,   1,   4,  9,
    9,  16, 25,  36,  49,  64,  81,  100, 25,  16,  9,   4,   1,   0,   1,  4,
    4,  9,  16,  25,  36,  49,  64,  81,  36,  25,  16,  9,   4,   1,   0,  1,
    1,  4,  9,   16,  25,  36,  49,  64,  49,  36,  25,  16,  9,   4,   1,  0,
};

/*! Four-bits Integer Multiplication Table
 */
static const AILEGO_ALIGNED(64) int8_t Int4MulTable[256] = {
    0, 0,  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
    0, 1,  2,   3,   4,   5,   6,   7,   -8,  -7,  -6,  -5,  -4,  -3,  -2,  -1,
    0, 2,  4,   6,   8,   10,  12,  14,  -16, -14, -12, -10, -8,  -6,  -4,  -2,
    0, 3,  6,   9,   12,  15,  18,  21,  -24, -21, -18, -15, -12, -9,  -6,  -3,
    0, 4,  8,   12,  16,  20,  24,  28,  -32, -28, -24, -20, -16, -12, -8,  -4,
    0, 5,  10,  15,  20,  25,  30,  35,  -40, -35, -30, -25, -20, -15, -10, -5,
    0, 6,  12,  18,  24,  30,  36,  42,  -48, -42, -36, -30, -24, -18, -12, -6,
    0, 7,  14,  21,  28,  35,  42,  49,  -56, -49, -42, -35, -28, -21, -14, -7,
    0, -8, -16, -24, -32, -40, -48, -56, 64,  56,  48,  40,  32,  24,  16,  8,
    0, -7, -14, -21, -28, -35, -42, -49, 56,  49,  42,  35,  28,  21,  14,  7,
    0, -6, -12, -18, -24, -30, -36, -42, 48,  42,  36,  30,  24,  18,  12,  6,
    0, -5, -10, -15, -20, -25, -30, -35, 40,  35,  30,  25,  20,  15,  10,  5,
    0, -4, -8,  -12, -16, -20, -24, -28, 32,  28,  24,  20,  16,  12,  8,   4,
    0, -3, -6,  -9,  -12, -15, -18, -21, 24,  21,  18,  15,  12,  9,   6,   3,
    0, -2, -4,  -6,  -8,  -10, -12, -14, 16,  14,  12,  10,  8,   6,   4,   2,
    0, -1, -2,  -3,  -4,  -5,  -6,  -7,  8,   7,   6,   5,   4,   3,   2,   1,
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/euclidean_distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
/*! Squared Euclidean Distance Matrix
 */
template <typename T, size_t M, size_t N, typename = void>
struct SquaredEuclideanDistanceMatrix;

/*! Squared Euclidean Distance Matrix (M=1, N=1)
 */
template <typename T>
struct SquaredEuclideanDistanceMatrix<
    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    float sum = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      sum += MathHelper::SquaredDifference(m[i], q[i]);
    }
    *out = sum;
  }
};

template <>
struct SquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct SquaredEuclideanDistanceMatrix<int8_t, 1, 1> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct SquaredEuclideanDistanceMatrix<Float16, 1, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct SquaredEuclideanDistanceMatrix<float, 1, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

/*! Squared Euclidean Distance Matrix
 */
template <typename T, size_t M, size_t N>
struct SquaredEuclideanDistanceMatrix<
    T, M, N,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = MathHelper::SquaredDifference(m_val, q[j]);
          r += M;
        }
      }
      m += M;
      q += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += MathHelper::SquaredDifference(m_val, q[j]);
          r += M;
        }
      }
      m += M;
      q += N;
    }
  }
};

/*! Squared Euclidean Distance Matrix (N=1)
 */
template <typename T, size_t M>
struct SquaredEuclideanDistanceMatrix<
    T, M, 1,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    const ValueType *q_end = q + dim;
    if (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = MathHelper::SquaredDifference(m[i], q_val);
      }
      m += M;
    }

    while (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += MathHelper::SquaredDifference(m[i], q_val);
      }
      m += M;
    }
  }
};

/*! Squared Euclidean Distance Matrix (INT8)
 */
template <size_t M, size_t N>
struct SquaredEuclideanDistanceMatrix<
    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 2;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = SquaredDifference(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += SquaredDifference(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 24), (int8_t)(rhs >> 24));
    return static_cast<float>(sum);
  }
};

/*! Squared Euclidean Distance Matrix (INT8, N=1)
 */
template <size_t M>
struct SquaredEuclideanDistanceMatrix<int8_t, M, 1,
                                      typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 2);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = SquaredDifference(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += SquaredDifference(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +
                           MathHelper::SquaredDifference<int8_t, int32_t>(
                               (int8_t)(lhs >> 24), (int8_t)(rhs >> 24));
    return static_cast<float>(sum);
  }
};

/*! Squared Euclidean Distance Matrix (INT4)
 */
template <size_t M, size_t N>
struct SquaredEuclideanDistanceMatrix<
    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 3;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = SquaredDifference(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += SquaredDifference(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};

/*! Squared Euclidean Distance Matrix (INT4, N=1)
 */
template <size_t M>
struct SquaredEuclideanDistanceMatrix<uint8_t, M, 1,
                                      typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 3);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = SquaredDifference(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += SquaredDifference(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};

/*! Euclidean Distance Matrix
 */
template <typename T, size_t M, size_t N,
          typename =
              typename std::enable_if<(IsSignedArithmetic<T>::value ||
                                       std::is_same<T, uint8_t>::value) &&
                                      M >= 1 && N >= 1>::type>
struct EuclideanDistanceMatrix {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    SquaredEuclideanDistanceMatrix<T, M, N>::Compute(m, q, dim, out);
    for (size_t i = 0; i < N * M; ++i) {
      float val = *out;
      *out++ = std::sqrt(val);
    }
  }
};

/*! Euclidean Distance Matrix (M=1, N=1)
 */
template <typename T>
struct EuclideanDistanceMatrix<
    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    float sum = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      sum += MathHelper::SquaredDifference(m[i], q[i]);
    }
    *out = std::sqrt(sum);
  }
};

template <>
struct EuclideanDistanceMatrix<uint8_t, 1, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct EuclideanDistanceMatrix<int8_t, 1, 1> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct EuclideanDistanceMatrix<Float16, 1, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct EuclideanDistanceMatrix<float, 1, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};


//--------------------------------------------------
// Sparse
//--------------------------------------------------
/*! Squared Euclidean Distance Sparse Matrix
 */
template <typename T>
struct SquaredEuclideanSparseDistanceMatrix {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  static float ComputeSquaredEuclideanSparseDistanceInSegment(
      uint32_t m_sparse_count, const uint16_t *m_sparse_index,
      const ValueType *m_sparse_value, uint32_t q_sparse_count,
      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);

  //! Compute the distance between matrix and query
  static inline void Compute(const void *m_sparse_data_in,
                             const void *q_sparse_data_in, float *out) {
    ailego_assert(out);

    const uint8_t *m_sparse_data =
        reinterpret_cast<const uint8_t *>(m_sparse_data_in);
    const uint8_t *q_sparse_data =
        reinterpret_cast<const uint8_t *>(q_sparse_data_in);

    const uint32_t m_sparse_count =
        *reinterpret_cast<const uint32_t *>(m_sparse_data);
    const uint32_t q_sparse_count =
        *reinterpret_cast<const uint32_t *>(q_sparse_data);

    const uint32_t m_seg_count =
        *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));
    const uint32_t q_seg_count =
        *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));

    const uint32_t *m_seg_id = reinterpret_cast<const uint32_t *>(
        m_sparse_data + 2 * sizeof(uint32_t));
    const uint32_t *q_seg_id = reinterpret_cast<const uint32_t *>(
        q_sparse_data + 2 * sizeof(uint32_t));

    const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
        m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));
    const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
        q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));

    const uint16_t *m_sparse_index = reinterpret_cast<const uint16_t *>(
        m_sparse_data + 2 * sizeof(uint32_t) +
        m_seg_count * 2 * sizeof(uint32_t));
    const uint16_t *q_sparse_index = reinterpret_cast<const uint16_t *>(
        q_sparse_data + 2 * sizeof(uint32_t) +
        q_seg_count * 2 * sizeof(uint32_t));

    const ValueType *m_sparse_value = reinterpret_cast<const ValueType *>(
        m_sparse_data + 2 * sizeof(uint32_t) +
        m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));
    const ValueType *q_sparse_value = reinterpret_cast<const ValueType *>(
        q_sparse_data + 2 * sizeof(uint32_t) +
        q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));

    float sum = 0.0f;

    size_t m_s = 0;
    size_t q_s = 0;

    size_t m_count = 0;
    size_t q_count = 0;

    while (m_s < m_seg_count && q_s < q_seg_count) {
      if (m_seg_id[m_s] == q_seg_id[q_s]) {
        sum += ComputeSquaredEuclideanSparseDistanceInSegment(
            m_seg_vec_cnt[m_s], m_sparse_index + m_count,
            m_sparse_value + m_count, q_seg_vec_cnt[q_s],
            q_sparse_index + q_count, q_sparse_value + q_count);

        m_count += m_seg_vec_cnt[m_s];
        q_count += q_seg_vec_cnt[q_s];

        ++m_s;
        ++q_s;
      } else if (m_seg_id[m_s] < q_seg_id[q_s]) {
        for (size_t i = 0; i < m_seg_vec_cnt[m_s]; i++) {
          float value = (m_sparse_value + m_count)[i];
          sum += value * value;
        }

        m_count += m_seg_vec_cnt[m_s];

        ++m_s;
      } else {
        for (size_t i = 0; i < q_seg_vec_cnt[q_s]; i++) {
          float value = (q_sparse_value + q_count)[i];
          sum += value * value;
        }

        q_count += q_seg_vec_cnt[q_s];
        ++q_s;
      }
    }

    for (; m_s < m_seg_count; m_s++) {
      for (size_t i = 0; i < m_seg_vec_cnt[m_s]; i++) {
        float diff = (m_sparse_value + m_count)[i];
        sum += diff * diff;
      }

      m_count += m_seg_vec_cnt[m_s];
    }

    for (; q_s < q_seg_count; q_s++) {
      for (size_t i = 0; i < q_seg_vec_cnt[q_s]; i++) {
        float diff = (q_sparse_value + q_count)[i];
        sum += diff * diff;
      }

      q_count += q_seg_vec_cnt[q_s];
    }

    *out = sum;
  }
};

template <typename T>
float SquaredEuclideanSparseDistanceMatrix<T>::
    ComputeSquaredEuclideanSparseDistanceInSegment(
        uint32_t m_sparse_count, const uint16_t *m_sparse_index,
        const ValueType *m_sparse_value, uint32_t q_sparse_count,
        const uint16_t *q_sparse_index, const ValueType *q_sparse_value) {
  float sum = 0.0f;

  size_t m_i = 0;
  size_t q_i = 0;

  while (m_i < m_sparse_count && q_i < q_sparse_count) {
    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {
      float diff = m_sparse_value[m_i] - q_sparse_value[q_i];
      sum += diff * diff;
      ++m_i;
      ++q_i;
    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {
      float diff = m_sparse_value[m_i];
      sum += diff * diff;
      ++m_i;
    } else {
      float diff = q_sparse_value[q_i];
      sum += diff * diff;

      ++q_i;
    }
  }

  for (; m_i < m_sparse_count; m_i++) {
    float diff = m_sparse_value[m_i];
    sum += diff * diff;
  }

  for (; q_i < q_sparse_count; q_i++) {
    float diff = q_sparse_value[q_i];
    sum += diff * diff;
  }

  return sum;
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp16_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX__)

float SquaredEuclideanDistanceFp16AVX(const Float16 *lhs, const Float16 *rhs,
                                      size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, )

  return score;
}

#endif  // __AVX__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp16_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512F__)
float SquaredEuclideanDistanceFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                                         size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, )

  return score;
}
#endif
}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp16_avx512fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512FP16__)
//! Squared Euclidean Distance
float SquaredEuclideanDistanceFp16AVX512FP16(const Float16 *lhs,
                                             const Float16 *rhs, size_t size) {
  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 6) << 6);

  __m512h zmm_sum_0 = _mm512_setzero_ph();
  __m512h zmm_sum_1 = _mm512_setzero_ph();

  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m512h zmm_d_0 =
          _mm512_sub_ph(_mm512_load_ph(lhs + 0), _mm512_load_ph(rhs + 0));
      __m512h zmm_d_1 =
          _mm512_sub_ph(_mm512_load_ph(lhs + 32), _mm512_load_ph(rhs + 32));
      zmm_sum_0 = _mm512_fmadd_ph(zmm_d_0, zmm_d_0, zmm_sum_0);
      zmm_sum_1 = _mm512_fmadd_ph(zmm_d_1, zmm_d_1, zmm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m512h zmm_d = _mm512_sub_ph(_mm512_load_ph(lhs), _mm512_load_ph(rhs));
      zmm_sum_0 = _mm512_fmadd_ph(zmm_d, zmm_d, zmm_sum_0);
      lhs += 32;
      rhs += 32;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m512h zmm_d_0 =
          _mm512_sub_ph(_mm512_loadu_ph(lhs + 0), _mm512_loadu_ph(rhs + 0));
      __m512h zmm_d_1 =
          _mm512_sub_ph(_mm512_loadu_ph(lhs + 32), _mm512_loadu_ph(rhs + 32));
      zmm_sum_0 = _mm512_fmadd_ph(zmm_d_0, zmm_d_0, zmm_sum_0);
      zmm_sum_1 = _mm512_fmadd_ph(zmm_d_1, zmm_d_1, zmm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m512h zmm_d = _mm512_sub_ph(_mm512_loadu_ph(lhs), _mm512_loadu_ph(rhs));
      zmm_sum_0 = _mm512_fmadd_ph(zmm_d, zmm_d, zmm_sum_0);
      lhs += 32;
      rhs += 32;
    }
  }

  zmm_sum_0 = _mm512_add_ph(zmm_sum_0, zmm_sum_1);
  if (lhs != last) {
    __mmask32 mask = (__mmask32)((1 << (last - lhs)) - 1);
    __m512i zmm_undefined = _mm512_undefined_epi32();
    __m512h zmm_undefined_ph = _mm512_undefined_ph();
    __m512h zmm_d = _mm512_mask_sub_ph(
        zmm_undefined_ph, mask,
        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, lhs)),
        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, rhs)));
    zmm_sum_0 = _mm512_mask3_fmadd_ph(zmm_d, zmm_d, zmm_sum_0, mask);
  }

  return HorizontalAdd_FP16_V512(zmm_sum_0);
}
#endif
}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp16_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
float SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                       size_t size);
#endif

#if defined(__AVX512FP16__)
float SquaredEuclideanDistanceFp16AVX512FP16(const Float16 *lhs,
                                             const Float16 *rhs, size_t size);
#endif

#if defined(__AVX512F__)
float SquaredEuclideanDistanceFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                                         size_t size);
#endif

#if defined(__AVX__)
float SquaredEuclideanDistanceFp16AVX(const Float16 *lhs, const Float16 *rhs,
                                      size_t size);
#endif

float SquaredEuclideanDistanceFp16Scalar(const Float16 *lhs, const Float16 *rhs,
                                         size_t size);

//! Compute the distance between matrix and query (FP16, M=1, N=1)
void SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                            const ValueType *q,
                                                            size_t dim,
                                                            float *out) {
#if defined(__ARM_NEON)
  *out = SquaredEuclideanDistanceFp16NEON(m, q, dim);
#else
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    *out = SquaredEuclideanDistanceFp16AVX512FP16(m, q, dim);
    return;
  }
#endif
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = SquaredEuclideanDistanceFp16AVX512(m, q, dim);
    return;
  }
#endif

#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = SquaredEuclideanDistanceFp16AVX(m, q, dim);
    return;
  }
#endif
  *out = SquaredEuclideanDistanceFp16Scalar(m, q, dim);

#endif  //__ARM_NEON
}

//! Compute the distance between matrix and query (FP16, M=1, N=1)
void EuclideanDistanceMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                     const ValueType *q,
                                                     size_t dim, float *out) {
  SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(m, q, dim, out);
  *out = std::sqrt(*out);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp16_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
float SquaredEuclideanDistanceFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                       size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, )

  return score;
}
#endif

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp32_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX__)
float SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,
                                              const float *rhs, size_t size);

float SquaredEuclideanDistanceFp32AVXInternal(const float *lhs,
                                              const float *rhs, size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 4) << 4);

  __m256 ymm_sum_0 = _mm256_setzero_ps();
  __m256 ymm_sum_1 = _mm256_setzero_ps();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_d_0 =
          _mm256_sub_ps(_mm256_load_ps(lhs + 0), _mm256_load_ps(rhs + 0));
      __m256 ymm_d_1 =
          _mm256_sub_ps(_mm256_load_ps(lhs + 8), _mm256_load_ps(rhs + 8));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_d_0, ymm_d_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_d_1, ymm_d_1, ymm_sum_1);
    }

    if (last >= last_aligned + 8) {
      __m256 ymm_d = _mm256_sub_ps(_mm256_load_ps(lhs), _mm256_load_ps(rhs));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_d_0 =
          _mm256_sub_ps(_mm256_loadu_ps(lhs + 0), _mm256_loadu_ps(rhs + 0));
      __m256 ymm_d_1 =
          _mm256_sub_ps(_mm256_loadu_ps(lhs + 8), _mm256_loadu_ps(rhs + 8));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_d_0, ymm_d_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_d_1, ymm_d_1, ymm_sum_1);
    }

    if (last >= last_aligned + 8) {
      __m256 ymm_d = _mm256_sub_ps(_mm256_loadu_ps(lhs), _mm256_loadu_ps(rhs));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_d, ymm_d, ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  }
  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));

  switch (last - lhs) {
    case 7:
      SSD_FP32_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      SSD_FP32_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      SSD_FP32_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      SSD_FP32_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      SSD_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float SquaredEuclideanDistanceFp32AVX(const float *lhs, const float *rhs,
                                      size_t size) {
  if (size > 7) {
    return SquaredEuclideanDistanceFp32AVXInternal(lhs, rhs, size);
  }

  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);
}

#endif  // __AVX__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp32_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512F__)
float SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,
                                              const float *rhs, size_t size);

float SquaredEuclideanDistanceFp32AVXInternal(const float *lhs,
                                              const float *rhs, size_t size);

float SquaredEuclideanDistanceFp32AVX512Internal(const float *lhs,
                                                 const float *rhs,
                                                 size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 5) << 5);

  __m512 zmm_sum_0 = _mm512_setzero_ps();
  __m512 zmm_sum_1 = _mm512_setzero_ps();

  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512 zmm_d_0 =
          _mm512_sub_ps(_mm512_load_ps(lhs + 0), _mm512_load_ps(rhs + 0));
      __m512 zmm_d_1 =
          _mm512_sub_ps(_mm512_load_ps(lhs + 16), _mm512_load_ps(rhs + 16));
      zmm_sum_0 = _mm512_fmadd_ps(zmm_d_0, zmm_d_0, zmm_sum_0);
      zmm_sum_1 = _mm512_fmadd_ps(zmm_d_1, zmm_d_1, zmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m512 zmm_d = _mm512_sub_ps(_mm512_load_ps(lhs), _mm512_load_ps(rhs));
      zmm_sum_0 = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512 zmm_d_0 =
          _mm512_sub_ps(_mm512_loadu_ps(lhs + 0), _mm512_loadu_ps(rhs + 0));
      __m512 zmm_d_1 =
          _mm512_sub_ps(_mm512_loadu_ps(lhs + 16), _mm512_loadu_ps(rhs + 16));
      zmm_sum_0 = _mm512_fmadd_ps(zmm_d_0, zmm_d_0, zmm_sum_0);
      zmm_sum_1 = _mm512_fmadd_ps(zmm_d_1, zmm_d_1, zmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m512 zmm_d = _mm512_sub_ps(_mm512_loadu_ps(lhs), _mm512_loadu_ps(rhs));
      zmm_sum_0 = _mm512_fmadd_ps(zmm_d, zmm_d, zmm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  }

  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);
  if (lhs != last) {
    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);
    __m512 zmm_undefined = _mm512_undefined_ps();
    __m512 zmm_d = _mm512_mask_sub_ps(
        zmm_undefined, mask, _mm512_mask_loadu_ps(zmm_undefined, mask, lhs),
        _mm512_mask_loadu_ps(zmm_undefined, mask, rhs));
    zmm_sum_0 = _mm512_mask3_fmadd_ps(zmm_d, zmm_d, zmm_sum_0, mask);
  }
  return HorizontalAdd_FP32_V512(zmm_sum_0);
}

float SquaredEuclideanDistanceFp32AVX512(const float *lhs, const float *rhs,
                                         size_t size) {
  if (size > 15) {
    return SquaredEuclideanDistanceFp32AVX512Internal(lhs, rhs, size);
  }

  if (size > 7) {
    return SquaredEuclideanDistanceFp32AVXInternal(lhs, rhs, size);
  }

  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);
}

#endif

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp32_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
void SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,
                                      size_t size, float *out);
#endif

#if defined(__AVX512F__)
float SquaredEuclideanDistanceFp32AVX512(const float *lhs, const float *rhs,
                                         size_t size);
#endif

#if defined(__AVX__)
float SquaredEuclideanDistanceFp32AVX(const float *lhs, const float *rhs,
                                      size_t size);
#endif

#if defined(__SSE__)
float SquaredEuclideanDistanceFp32SSE(const float *lhs, const float *rhs,
                                      size_t size);
#endif

float SquaredEuclideanDistanceFp32Scalar(const float *lhs, const float *rhs,
                                         size_t size);

//-----------------------------------------------------------
//  SquaredEuclideanDistance
//-----------------------------------------------------------
//! Compute the distance between matrix and query (FP32, M=1, N=1)
void SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(const ValueType *m,
                                                          const ValueType *q,
                                                          size_t dim,
                                                          float *out) {
#if defined(__ARM_NEON)
  SquaredEuclideanDistanceFp32NEON(m, q, dim, out);
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = SquaredEuclideanDistanceFp32AVX512(m, q, dim);
    return;
  }
#endif  // __AVX512F__
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = SquaredEuclideanDistanceFp32AVX(m, q, dim);
    return;
  }
#endif  // __AVX__

#if defined(__SSE__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {
    *out = SquaredEuclideanDistanceFp32SSE(m, q, dim);
    return;
  }
#endif  // __SSE__
  *out = SquaredEuclideanDistanceFp32Scalar(m, q, dim);
#endif  // __ARM_NEON
}

//-----------------------------------------------------------
//  EuclideanDistance
//-----------------------------------------------------------
//! Compute the distance between matrix and query (FP32, M=1, N=1)
void EuclideanDistanceMatrix<float, 1, 1>::Compute(const ValueType *m,
                                                   const ValueType *q,
                                                   size_t dim, float *out) {
  SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(m, q, dim, out);
  *out = std::sqrt(*out);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp32_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
//! Squared Euclidean Distance
void SquaredEuclideanDistanceFp32NEON(const float *lhs, const float *rhs,
                                      size_t size, float *out) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  float32x4_t v_sum_0 = vdupq_n_f32(0);
  float32x4_t v_sum_1 = vdupq_n_f32(0);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    float32x4_t v_d_0 = vsubq_f32(vld1q_f32(lhs + 0), vld1q_f32(rhs + 0));
    float32x4_t v_d_1 = vsubq_f32(vld1q_f32(lhs + 4), vld1q_f32(rhs + 4));
    v_sum_0 = vfmaq_f32(v_sum_0, v_d_0, v_d_0);
    v_sum_1 = vfmaq_f32(v_sum_1, v_d_1, v_d_1);
  }
  if (last >= last_aligned + 4) {
    float32x4_t v_d = vsubq_f32(vld1q_f32(lhs), vld1q_f32(rhs));
    v_sum_0 = vfmaq_f32(v_sum_0, v_d, v_d);
    lhs += 4;
    rhs += 4;
  }

  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));
  switch (last - lhs) {
    case 3:
      SSD_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  *out = result;
}

#endif  // __ARM_NEON

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_fp32_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE__)
float SquaredEuclideanDistanceFp32SSEInternal(const float *lhs,
                                              const float *rhs, size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  __m128 xmm_sum_0 = _mm_setzero_ps();
  __m128 xmm_sum_1 = _mm_setzero_ps();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_d_0 = _mm_sub_ps(_mm_load_ps(lhs + 0), _mm_load_ps(rhs + 0));
      __m128 xmm_d_1 = _mm_sub_ps(_mm_load_ps(lhs + 4), _mm_load_ps(rhs + 4));
      xmm_sum_0 = _mm_fmadd_ps(xmm_d_0, xmm_d_0, xmm_sum_0);
      xmm_sum_1 = _mm_fmadd_ps(xmm_d_1, xmm_d_1, xmm_sum_1);
    }

    if (last >= last_aligned + 4) {
      __m128 xmm_d = _mm_sub_ps(_mm_load_ps(lhs), _mm_load_ps(rhs));
      xmm_sum_0 = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum_0);
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_d_0 = _mm_sub_ps(_mm_loadu_ps(lhs + 0), _mm_loadu_ps(rhs + 0));
      __m128 xmm_d_1 = _mm_sub_ps(_mm_loadu_ps(lhs + 4), _mm_loadu_ps(rhs + 4));
      xmm_sum_0 = _mm_fmadd_ps(xmm_d_0, xmm_d_0, xmm_sum_0);
      xmm_sum_1 = _mm_fmadd_ps(xmm_d_1, xmm_d_1, xmm_sum_1);
    }

    if (last >= last_aligned + 4) {
      __m128 xmm_d = _mm_sub_ps(_mm_loadu_ps(lhs), _mm_loadu_ps(rhs));
      xmm_sum_0 = _mm_fmadd_ps(xmm_d, xmm_d, xmm_sum_0);
      lhs += 4;
      rhs += 4;
    }
  }
  float result = HorizontalAdd_FP32_V128(_mm_add_ps(xmm_sum_0, xmm_sum_1));

  switch (last - lhs) {
    case 3:
      SSD_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float SquaredEuclideanDistanceFp32SSE(const float *lhs, const float *rhs,
                                      size_t size) {
  return SquaredEuclideanDistanceFp32SSEInternal(lhs, rhs, size);
}

#endif  // __SSE__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int4_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int4.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float SquaredEuclideanDistanceInt4SSEInternal(const uint8_t *lhs,
                                              const uint8_t *rhs, size_t size);

inline float SquaredEuclideanDistanceInt4AVX2Internal(const uint8_t *lhs,
                                                      const uint8_t *rhs,
                                                      size_t size) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);

  __m256i ymm_sum = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));
      SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)
    }
    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),
                                 ymm_sum);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));
      SSD_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)
    }
    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),
                                 ymm_sum);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum));

  switch (last - lhs) {
    case 15:
      SSD_INT4_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      SSD_INT4_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      SSD_INT4_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      SSD_INT4_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      SSD_INT4_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      SSD_INT4_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      SSD_INT4_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      SSD_INT4_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      SSD_INT4_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      SSD_INT4_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      SSD_INT4_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      SSD_INT4_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      SSD_INT4_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_INT4_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_INT4_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float SquaredEuclideanDistanceInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                                       size_t size) {
  if (size > 63) {
    return SquaredEuclideanDistanceInt4AVX2Internal(lhs, rhs, size >> 1);
  }

  return SquaredEuclideanDistanceInt4SSEInternal(lhs, rhs, size >> 1);
}

#endif  // __AVX2__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int4_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float SquaredEuclideanDistanceInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                                       size_t size);
#endif

#if defined(__SSE4_1__)
float SquaredEuclideanDistanceInt4SSE(const uint8_t *lhs, const uint8_t *rhs,
                                      size_t size);
#endif

float SquaredEuclideanDistanceInt4Scalar(const uint8_t *lhs, const uint8_t *rhs,
                                         size_t size);

//! Compute the distance between matrix and query (INT4, M=1, N=1)
void SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(const ValueType *m,
                                                            const ValueType *q,
                                                            size_t dim,
                                                            float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = SquaredEuclideanDistanceInt4AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = SquaredEuclideanDistanceInt4SSE(m, q, dim);
    return;
  }
#endif

  *out = SquaredEuclideanDistanceInt4Scalar(m, q, dim);
}

//! Compute the distance between matrix and query (INT4, M=1, N=1)
void EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(const ValueType *m,
                                                     const ValueType *q,
                                                     size_t dim, float *out) {
  SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(m, q, dim, out);
  *out = std::sqrt(*out);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int4_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int4.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE4_1__)
float SquaredEuclideanDistanceInt4SSEInternal(const uint8_t *lhs,
                                              const uint8_t *rhs, size_t size) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);

  __m128i xmm_sum = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));
      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));
      SSD_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));

  switch (last - lhs) {
    case 15:
      SSD_INT4_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      SSD_INT4_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      SSD_INT4_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      SSD_INT4_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      SSD_INT4_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      SSD_INT4_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      SSD_INT4_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      SSD_INT4_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      SSD_INT4_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      SSD_INT4_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      SSD_INT4_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      SSD_INT4_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      SSD_INT4_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_INT4_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_INT4_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float SquaredEuclideanDistanceInt4SSE(const uint8_t *lhs, const uint8_t *rhs,
                                      size_t size) {
  return SquaredEuclideanDistanceInt4SSEInternal(lhs, rhs, size >> 1);
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int8_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float SquaredEuclideanDistanceInt8SSEInternal(const int8_t *lhs,
                                              const int8_t *rhs, size_t size);

float SquaredEuclideanDistanceInt8AVX2Internal(const int8_t *lhs,
                                               const int8_t *rhs, size_t size) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 6) << 6);
  float result = 0.0;

  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));

      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),
                                      _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0));
      ymm_lhs_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs_0 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_0, ymm_lhs_0), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_0, ymm_rhs_0), ymm_sum_1);

      ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),
                              _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1));
      ymm_lhs_1 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs_1 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_1, ymm_lhs_1), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_1, ymm_rhs_1), ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);
      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),
                                      _mm256_min_epi8(ymm_lhs, ymm_rhs));
      ymm_lhs = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs, ymm_lhs), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs, ymm_rhs), ymm_sum_1);
      lhs += 32;
      rhs += 32;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));

      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_0, ymm_rhs_0),
                                      _mm256_min_epi8(ymm_lhs_0, ymm_rhs_0));
      ymm_lhs_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs_0 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_0, ymm_lhs_0), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_0, ymm_rhs_0), ymm_sum_1);

      ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs_1, ymm_rhs_1),
                              _mm256_min_epi8(ymm_lhs_1, ymm_rhs_1));
      ymm_lhs_1 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs_1 = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs_1, ymm_lhs_1), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs_1, ymm_rhs_1), ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);
      __m256i ymm_d = _mm256_sub_epi8(_mm256_max_epi8(ymm_lhs, ymm_rhs),
                                      _mm256_min_epi8(ymm_lhs, ymm_rhs));
      ymm_lhs = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(ymm_d));
      ymm_rhs = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(ymm_d, 1));
      ymm_sum_0 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_lhs, ymm_lhs), ymm_sum_0);
      ymm_sum_1 =
          _mm256_add_epi32(_mm256_madd_epi16(ymm_rhs, ymm_rhs), ymm_sum_1);
      lhs += 32;
      rhs += 32;
    }
  }
  result = static_cast<float>(
      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));

  if (last >= lhs + 16) {
    __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
    __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
    __m128i xmm_sum = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),
                                   _mm_min_epi8(xmm_lhs, xmm_rhs));
    xmm_lhs = _mm_cvtepu8_epi16(xmm_sum);
    xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_sum, xmm_sum));
    xmm_sum = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs),
                            _mm_madd_epi16(xmm_rhs, xmm_rhs));
    result += static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));
    lhs += 16;
    rhs += 16;
  }
  switch (last - lhs) {
    case 15:
      SSD_INT8_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      SSD_INT8_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      SSD_INT8_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      SSD_INT8_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      SSD_INT8_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      SSD_INT8_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      SSD_INT8_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      SSD_INT8_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      SSD_INT8_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      SSD_INT8_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      SSD_INT8_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      SSD_INT8_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      SSD_INT8_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_INT8_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_INT8_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float SquaredEuclideanDistanceInt8AVX2(const int8_t *lhs, const int8_t *rhs,
                                       size_t size) {
  if (size > 31) {
    return SquaredEuclideanDistanceInt8AVX2Internal(lhs, rhs, size);
  }

  return SquaredEuclideanDistanceInt8SSEInternal(lhs, rhs, size);
}
#endif  // __AVX2__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int8_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float SquaredEuclideanDistanceInt8AVX2(const int8_t *lhs, const int8_t *rhs,
                                       size_t size);
#endif

#if defined(__SSE4_1__)
float SquaredEuclideanDistanceInt8SSE(const int8_t *lhs, const int8_t *rhs,
                                      size_t size);
#endif

float SquaredEuclideanDistanceInt8Scalar(const int8_t *lhs, const int8_t *rhs,
                                         size_t size);

//! Compute the distance between matrix and query (INT8, M=1, N=1)
void SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(const ValueType *m,
                                                           const ValueType *q,
                                                           size_t dim,
                                                           float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = SquaredEuclideanDistanceInt8AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = SquaredEuclideanDistanceInt8SSE(m, q, dim);
    return;
  }
#endif

  *out = SquaredEuclideanDistanceInt8Scalar(m, q, dim);
}

//! Compute the distance between matrix and query (INT8, M=1, N=1)
void EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(const ValueType *m,
                                                    const ValueType *q,
                                                    size_t dim, float *out) {
  SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(m, q, dim, out);
  *out = std::sqrt(*out);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_int8_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_euclidean_utility.i"
#include "euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE4_1__)
inline float SquaredEuclideanDistanceInt8SSEInternal(const int8_t *lhs,
                                                     const int8_t *rhs,
                                                     size_t size) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 5) << 5);

  __m128i xmm_sum_0 = _mm_setzero_si128();
  __m128i xmm_sum_1 = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));

      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),
                                   _mm_min_epi8(xmm_lhs_0, xmm_rhs_0));
      xmm_lhs_0 = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs_0 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));
      xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),
                           _mm_min_epi8(xmm_lhs_1, xmm_rhs_1));
      xmm_lhs_1 = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs_1 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));

      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_0, xmm_lhs_0), xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_0, xmm_rhs_0), xmm_sum_1);
      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_1, xmm_lhs_1), xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_1, xmm_rhs_1), xmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),
                                   _mm_min_epi8(xmm_lhs, xmm_rhs));
      xmm_lhs = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));
      xmm_sum_0 = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs), xmm_sum_0);
      xmm_sum_1 = _mm_add_epi32(_mm_madd_epi16(xmm_rhs, xmm_rhs), xmm_sum_1);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));

      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_0, xmm_rhs_0),
                                   _mm_min_epi8(xmm_lhs_0, xmm_rhs_0));
      xmm_lhs_0 = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs_0 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));
      xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs_1, xmm_rhs_1),
                           _mm_min_epi8(xmm_lhs_1, xmm_rhs_1));
      xmm_lhs_1 = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs_1 = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));

      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_0, xmm_lhs_0), xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_0, xmm_rhs_0), xmm_sum_1);
      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(xmm_lhs_1, xmm_lhs_1), xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(xmm_rhs_1, xmm_rhs_1), xmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      __m128i xmm_d = _mm_sub_epi8(_mm_max_epi8(xmm_lhs, xmm_rhs),
                                   _mm_min_epi8(xmm_lhs, xmm_rhs));
      xmm_lhs = _mm_cvtepu8_epi16(xmm_d);
      xmm_rhs = _mm_cvtepu8_epi16(_mm_unpackhi_epi64(xmm_d, xmm_d));
      xmm_sum_0 = _mm_add_epi32(_mm_madd_epi16(xmm_lhs, xmm_lhs), xmm_sum_0);
      xmm_sum_1 = _mm_add_epi32(_mm_madd_epi16(xmm_rhs, xmm_rhs), xmm_sum_1);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(
      HorizontalAdd_INT32_V128(_mm_add_epi32(xmm_sum_0, xmm_sum_1)));

  switch (last - lhs) {
    case 15:
      SSD_INT8_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      SSD_INT8_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      SSD_INT8_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      SSD_INT8_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      SSD_INT8_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      SSD_INT8_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      SSD_INT8_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      SSD_INT8_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      SSD_INT8_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      SSD_INT8_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      SSD_INT8_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      SSD_INT8_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      SSD_INT8_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      SSD_INT8_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      SSD_INT8_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

//! Squared Euclidean Distance
float SquaredEuclideanDistanceInt8SSE(const int8_t *lhs, const int8_t *rhs,
                                      size_t size) {
  return SquaredEuclideanDistanceInt8SSEInternal(lhs, rhs, size);
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/euclidean_distance_matrix_scalar.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
template <typename T>
inline float SquaredEuclideanDistanceScalar(const T *m, const T *q,
                                            size_t dim) {
  ailego_assert(m && q && dim);

  float sum = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    sum += MathHelper::SquaredDifference(m[i], q[i]);
  }

  return sum;
}

template <typename T>
inline float EuclideanDistanceScalar(const T *m, const T *q, size_t dim) {
  ailego_assert(m && q && dim);

  float sum = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    sum += MathHelper::SquaredDifference(m[i], q[i]);
  }

  return std::sqrt(sum);
}

float SquaredEuclideanDistanceInt4Scalar(const uint8_t *m, const uint8_t *q,
                                         size_t dim) {
  ailego_assert(m && q && dim && !(dim & 1));

  float sum = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    uint8_t m_val = m[i];
    uint8_t q_val = q[i];
    sum += Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }

  return sum;
}


float EuclideanDistanceInt4Scalar(const uint8_t *m, const uint8_t *q,
                                  size_t dim) {
  ailego_assert(m && q && dim && !(dim & 1));

  float sum = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    uint8_t m_val = m[i];
    uint8_t q_val = q[i];
    sum += Int4SquaredDiffTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4SquaredDiffTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }

  return std::sqrt(sum);
}


float SquaredEuclideanDistanceInt8Scalar(const int8_t *m, const int8_t *q,
                                         size_t dim) {
  return SquaredEuclideanDistanceScalar<int8_t>(m, q, dim);
}

float EuclideanDistanceInt8Scalar(const int8_t *m, const int8_t *q,
                                  size_t dim) {
  return EuclideanDistanceScalar<int8_t>(m, q, dim);
}

float SquaredEuclideanDistanceFp16Scalar(const ailego::Float16 *m,
                                         const ailego::Float16 *q, size_t dim) {
  return SquaredEuclideanDistanceScalar<ailego::Float16>(m, q, dim);
}

float EuclideanDistanceFp16Scalar(const ailego::Float16 *m,
                                  const ailego::Float16 *q, size_t dim) {
  return EuclideanDistanceScalar<ailego::Float16>(m, q, dim);
}

float SquaredEuclideanDistanceFp32Scalar(const float *m, const float *q,
                                         size_t dim) {
  return SquaredEuclideanDistanceScalar<float>(m, q, dim);
}

float EuclideanDistanceFp32Scalar(const float *m, const float *q, size_t dim) {
  return EuclideanDistanceScalar<float>(m, q, dim);
}


}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/hamming_distance_matrix.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hamming_distance_matrix.h"
#include <arrow/util/future.h>
#include <zvec/ailego/internal/platform.h>
#include "distance_matrix_popcnt.i"

namespace zvec {
namespace ailego {

#define POPCNT_UINT32_STEP1_SSE HAMMING_UINT32_STEP1_SSE
#define POPCNT_UINT32_STEP2_SSE HAMMING_UINT32_STEP2_SSE
#define POPCNT_UINT32_STEP3_SSE HAMMING_UINT32_STEP3_SSE
#define POPCNT_UINT32_STEP1_AVX HAMMING_UINT32_STEP1_AVX
#define POPCNT_UINT32_STEP2_AVX HAMMING_UINT32_STEP2_AVX
#define POPCNT_UINT32_STEP3_AVX HAMMING_UINT32_STEP3_AVX
#define POPCNT_UINT64_STEP1_AVX HAMMING_UINT64_STEP1_AVX
#define POPCNT_UINT64_STEP2_AVX HAMMING_UINT64_STEP2_AVX

//! Calculate population count (Step 1 SSE)
#define HAMMING_UINT32_STEP1_SSE(xmm_m, xmm_q, xmm_sum) \
  xmm_sum = _mm_add_epi8(                               \
      VerticalPopCount_INT8_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);

//! Calculate population count (Step 2 SSE)
#define HAMMING_UINT32_STEP2_SSE(xmm_m, xmm_q, xmm_sum) \
  xmm_sum = _mm_add_epi16(                              \
      VerticalPopCount_INT16_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);

//! Calculate population count (Step 3 SSE)
#define HAMMING_UINT32_STEP3_SSE(xmm_m, xmm_q, xmm_sum) \
  xmm_sum = _mm_add_epi32(                              \
      VerticalPopCount_INT32_V128(_mm_xor_si128(xmm_m, xmm_q)), xmm_sum);

//! Calculate population count (Step 1 AVX)
#define HAMMING_UINT32_STEP1_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_add_epi8(                            \
      VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);

//! Calculate population count (Step 2 AVX)
#define HAMMING_UINT32_STEP2_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_add_epi16(                           \
      VerticalPopCount_INT16_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);

//! Calculate population count (Step 3 AVX)
#define HAMMING_UINT32_STEP3_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_add_epi32(                           \
      VerticalPopCount_INT32_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);

//! Calculate population count (Step 1 AVX)
#define HAMMING_UINT64_STEP1_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_add_epi8(                            \
      VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);

//! Calculate population count (Step 2 AVX)
#define HAMMING_UINT64_STEP2_AVX(ymm_m, ymm_q, ymm_sum) \
  ymm_sum = _mm256_add_epi64(                           \
      VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_m, ymm_q)), ymm_sum);

#if defined(__AVX512VL__) && defined(__AVX512DQ__)
#define CONVERT_UINT64_TO_FP32(v, ...) _mm256_cvtepu64_ps(v)
#elif defined(__AVX2__)
static const __m256i CONVERT_UINT32_MASK_AVX =
    _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);

#define CONVERT_UINT64_TO_FP32(v, ...)    \
  _mm_cvtepi32_ps(_mm256_castsi256_si128( \
      _mm256_permutevar8x32_epi32(v, CONVERT_UINT32_MASK_AVX)))
#endif  // __AVX512VL__ && __AVX512DQ__

#define SQRT_UINT64_TO_FP32(v, ...) _mm_sqrt_ps(CONVERT_UINT64_TO_FP32(v))
#define SQRT_UINT32_TO_FP32_SSE(v, ...) _mm_sqrt_ps(_mm_cvtepi32_ps(v))
#define SQRT_UINT32_TO_FP32_AVX(v, ...) _mm256_sqrt_ps(_mm256_cvtepi32_ps(v))

#if defined(__AVX2__)
static inline size_t HammingDistanceAVX(const uint32_t *lhs,
                                        const uint32_t *rhs, size_t size) {
  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();

  const uint32_t *lhs_0 = lhs + ((size >> 4) << 4);
  const uint32_t *lhs_1 = (size > 496 ? lhs + 496 : lhs_0);
  const uint32_t *lhs_2 = lhs + size;

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != lhs_1; lhs += 16, rhs += 16) {
      __m256i ymm_lhs_0 = _mm256_load_si256((__m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((__m256i *)(lhs + 8));
      __m256i ymm_rhs_0 = _mm256_load_si256((__m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((__m256i *)(rhs + 8));

      ymm_sum_0 = _mm256_add_epi8(
          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi8(
          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),
          ymm_sum_1);
    }
    ymm_sum_0 = _mm256_sad_epu8(ymm_sum_0, POPCNT_ZERO_AVX);
    ymm_sum_1 = _mm256_sad_epu8(ymm_sum_1, POPCNT_ZERO_AVX);

    for (; lhs != lhs_0; lhs += 16, rhs += 16) {
      __m256i ymm_lhs_0 = _mm256_load_si256((__m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((__m256i *)(lhs + 8));
      __m256i ymm_rhs_0 = _mm256_load_si256((__m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((__m256i *)(rhs + 8));

      ymm_sum_0 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),
          ymm_sum_1);
    }

    if (lhs_2 >= lhs + 8) {
      __m256i ymm_lhs = _mm256_load_si256((__m256i *)(lhs));
      __m256i ymm_rhs = _mm256_load_si256((__m256i *)(rhs));
      ymm_sum_0 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs, ymm_rhs)),
          ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  } else {
    for (; lhs != lhs_1; lhs += 16, rhs += 16) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((__m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((__m256i *)(lhs + 8));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((__m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((__m256i *)(rhs + 8));

      ymm_sum_0 = _mm256_add_epi8(
          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi8(
          VerticalPopCount_INT8_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),
          ymm_sum_1);
    }
    ymm_sum_0 = _mm256_sad_epu8(ymm_sum_0, POPCNT_ZERO_AVX);
    ymm_sum_1 = _mm256_sad_epu8(ymm_sum_1, POPCNT_ZERO_AVX);

    for (; lhs != lhs_0; lhs += 16, rhs += 16) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((__m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((__m256i *)(lhs + 8));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((__m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((__m256i *)(rhs + 8));

      ymm_sum_0 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_0, ymm_rhs_0)),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs_1, ymm_rhs_1)),
          ymm_sum_1);
    }

    if (lhs_2 >= lhs + 8) {
      __m256i ymm_lhs = _mm256_loadu_si256((__m256i *)(lhs));
      __m256i ymm_rhs = _mm256_loadu_si256((__m256i *)(rhs));
      ymm_sum_0 = _mm256_add_epi64(
          VerticalPopCount_INT64_V256(_mm256_xor_si256(ymm_lhs, ymm_rhs)),
          ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  }

  size_t count =
      (size_t)HorizontalAdd_INT64_V256(_mm256_add_epi64(ymm_sum_0, ymm_sum_1));
  switch (lhs_2 - lhs) {
    case 7:
      count += ailego_popcount32(lhs[6] ^ rhs[6]);
      /* FALLTHRU */
    case 6:
      count += ailego_popcount32(lhs[5] ^ rhs[5]);
      /* FALLTHRU */
    case 5:
      count += ailego_popcount32(lhs[4] ^ rhs[4]);
      /* FALLTHRU */
    case 4:
      count += ailego_popcount32(lhs[3] ^ rhs[3]);
      /* FALLTHRU */
    case 3:
      count += ailego_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += ailego_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += ailego_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}

static inline size_t HammingDistanceAVX(const uint64_t *lhs,
                                        const uint64_t *rhs, size_t size) {
  return HammingDistanceAVX(reinterpret_cast<const uint32_t *>(lhs),
                            reinterpret_cast<const uint32_t *>(rhs),
                            (size << 1));
}
#endif  // __AVX2__

#if defined(AILEGO_M64)
static inline size_t HammingDistance(const uint32_t *lhs, const uint32_t *rhs,
                                     size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    count += ailego_popcount64(*(uint64_t *)(&lhs[6]) ^ *(uint64_t *)(&rhs[6]));
    count += ailego_popcount64(*(uint64_t *)(&lhs[4]) ^ *(uint64_t *)(&rhs[4]));
    count += ailego_popcount64(*(uint64_t *)(&lhs[2]) ^ *(uint64_t *)(&rhs[2]));
    count += ailego_popcount64(*(uint64_t *)(&lhs[0]) ^ *(uint64_t *)(&rhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      count += ailego_popcount32(lhs[6] ^ rhs[6]);
      /* FALLTHRU */
    case 6:
      count += ailego_popcount32(lhs[5] ^ rhs[5]);
      /* FALLTHRU */
    case 5:
      count += ailego_popcount32(lhs[4] ^ rhs[4]);
      /* FALLTHRU */
    case 4:
      count += ailego_popcount32(lhs[3] ^ rhs[3]);
      /* FALLTHRU */
    case 3:
      count += ailego_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += ailego_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += ailego_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}

static inline size_t HammingDistance(const uint64_t *lhs, const uint64_t *rhs,
                                     size_t size) {
  const uint64_t *last = lhs + size;
  const uint64_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += ailego_popcount64(lhs[3] ^ rhs[3]);
    count += ailego_popcount64(lhs[2] ^ rhs[2]);
    count += ailego_popcount64(lhs[1] ^ rhs[1]);
    count += ailego_popcount64(lhs[0] ^ rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += ailego_popcount64(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += ailego_popcount64(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += ailego_popcount64(lhs[0] ^ rhs[0]);
  }
  return count;
}
#else
static inline size_t HammingDistance(const uint32_t *lhs, const uint32_t *rhs,
                                     size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += ailego_popcount32(lhs[3] ^ rhs[3]);
    count += ailego_popcount32(lhs[2] ^ rhs[2]);
    count += ailego_popcount32(lhs[1] ^ rhs[1]);
    count += ailego_popcount32(lhs[0] ^ rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += ailego_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += ailego_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += ailego_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}
#endif  // AILEGO_M64

//! Compute the distance between matrix and query (UINT32, M=1, N=1)
void HammingDistanceMatrix<uint32_t, 1, 1>::Compute(const ValueType *m,
                                                    const ValueType *q,
                                                    size_t dim, float *out) {
  size_t cnt = (dim >> 5);
#if defined(__AVX2__)
  if (cnt > 63) {
    *out = static_cast<float>(HammingDistanceAVX(m, q, cnt));
    return;
  }
#endif
  *out = static_cast<float>(HammingDistance(m, q, cnt));
}

#if defined(AILEGO_M64)
//! Compute the distance between matrix and query (UINT64, M=1, N=1)
void HammingDistanceMatrix<uint64_t, 1, 1>::Compute(const ValueType *m,
                                                    const ValueType *q,
                                                    size_t dim, float *out) {
  size_t cnt = (dim >> 6);
#if defined(__AVX2__)
  if (cnt > 31) {
    *out = static_cast<float>(HammingDistanceAVX(m, q, cnt));
    return;
  }
#endif
  *out = static_cast<float>(HammingDistance(m, q, cnt));
}

#endif  // AILEGO_M64

//! Compute the distance between matrix and query (UINT32, M=1, N=1)
void HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute(
    const ValueType *m, const ValueType *q, size_t dim, float *out) {
  size_t cnt = (dim >> 5);
#if defined(__AVX2__)
  if (cnt > 63) {
    *out = std::sqrt(static_cast<float>(HammingDistanceAVX(m, q, cnt)));
    return;
  }
#endif
  *out = std::sqrt(static_cast<float>(HammingDistance(m, q, cnt)));
}


#if defined(AILEGO_M64)
//! Compute the distance between matrix and query (UINT64, M=1, N=1)
void HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute(
    const ValueType *m, const ValueType *q, size_t dim, float *out) {
  size_t cnt = (dim >> 6);
#if defined(__AVX2__)
  if (cnt > 31) {
    *out = std::sqrt(static_cast<float>(HammingDistanceAVX(m, q, cnt)));
    return;
  }
#endif
  *out = std::sqrt(static_cast<float>(HammingDistance(m, q, cnt)));
}

#endif  // AILEGO_M64

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/hamming_distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cmath>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! Hamming Distance Matrix
 */
template <typename T, size_t M, size_t N,
          typename = void>  // NOTE: useless 'typename=void' to avoid clang
                            // compile error
struct HammingDistanceMatrix;

/*! Hamming Distance Matrix (UINT32)
 */
template <size_t M, size_t N>
struct HammingDistanceMatrix<uint32_t, M, N> {
  //! Type of value
  using ValueType = uint32_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && !(dim & 31) && out);

    size_t cnt = (dim >> 5);
    if (cnt > 0) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = static_cast<float>(ailego_popcount32(m_val ^ q[j]));
          r += M;
        }
      }
      m += M;
      q += N;
    }

    for (size_t k = 1; k < cnt; ++k) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += static_cast<float>(ailego_popcount32(m_val ^ q[j]));
          r += M;
        }
      }
      m += M;
      q += N;
    }
  }
};

/*! Hamming Distance Matrix (UINT32, M=1, N=1)
 */
template <>
struct HammingDistanceMatrix<uint32_t, 1, 1> {
  //! Type of value
  using ValueType = uint32_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

#if defined(AILEGO_M64)
/*! Hamming Distance Matrix (UINT64)
 */
template <size_t M, size_t N>
struct HammingDistanceMatrix<uint64_t, M, N> {
  //! Type of value
  using ValueType = uint64_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && !(dim & 63) && out);

    size_t cnt = (dim >> 6);
    if (cnt > 0) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = static_cast<float>(ailego_popcount64(m_val ^ q[j]));
          r += M;
        }
      }
      m += M;
      q += N;
    }

    for (size_t k = 1; k < cnt; ++k) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += static_cast<float>(ailego_popcount64(m_val ^ q[j]));
          r += M;
        }
      }
      m += M;
      q += N;
    }
  }
};

/*! Hamming Distance Matrix (UINT64, M=1, N=1)
 */
template <>
struct HammingDistanceMatrix<uint64_t, 1, 1> {
  //! Type of value
  using ValueType = uint64_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

#endif  // AILEGO_M64

/*! Hamming Square Root Distance Matrix
 */
template <typename T, size_t M, size_t N>
struct HammingSquareRootDistanceMatrix {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    HammingDistanceMatrix<T, M, N>::Compute(m, q, dim, out);
    for (size_t i = 0; i < N * M; ++i) {
      float val = *out;
      *out++ = std::sqrt(val);
    }
  }
};

/*! Hamming Square Root Distance Matrix (UINT32, M=1, N=1)
 */
template <>
struct HammingSquareRootDistanceMatrix<uint32_t, 1, 1> {
  //! Type of value
  using ValueType = uint32_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};


#if defined(AILEGO_M64)
/*! Hamming Square Root Distance Matrix (UINT64, M=1, N=1)
 */
template <>
struct HammingSquareRootDistanceMatrix<uint64_t, 1, 1> {
  //! Type of value
  using ValueType = uint64_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

#endif  // AILEGO_M64

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cmath>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
/*! Inner Product Matrix
 */
template <typename T, size_t M, size_t N, typename = void>
struct InnerProductMatrix;

/*! Inner Product Matrix
 */
template <typename T, size_t M, size_t N, typename = void>
struct MinusInnerProductMatrix;

/*! Inner Product Matrix (M=1, N=1)
 */
template <typename T>
struct InnerProductMatrix<
    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    float sum = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      sum += static_cast<float>(m[i] * q[i]);
    }
    *out = sum;
  }
};

/*! Minus Inner Product Matrix (M=1, N=1)
 */
template <typename T>
struct MinusInnerProductMatrix<
    T, 1, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    float sum = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      sum += static_cast<float>(m[i] * q[i]);
    }
    *out = -sum;
  }
};

template <>
struct InnerProductMatrix<uint8_t, 1, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct InnerProductMatrix<int8_t, 1, 1> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct InnerProductMatrix<Float16, 1, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct InnerProductMatrix<float, 1, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct MinusInnerProductMatrix<uint8_t, 1, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct MinusInnerProductMatrix<int8_t, 1, 1> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct MinusInnerProductMatrix<Float16, 1, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

template <>
struct MinusInnerProductMatrix<float, 1, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the distance between matrix and query
  static void Compute(const ValueType *m, const ValueType *q, size_t dim,
                      float *out);
};

/*! Inner Product Matrix
 */
template <typename T, size_t M, size_t N>
struct InnerProductMatrix<
    T, M, N,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = static_cast<float>(m_val * q[j]);
          r += M;
        }
      }
      m += M;
      q += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += m_val * q[j];
          r += M;
        }
      }
      m += M;
      q += N;
    }
  }
};

/*! Inner Product Matrix (N=1)
 */
template <typename T, size_t M>
struct InnerProductMatrix<
    T, M, 1,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    const ValueType *q_end = q + dim;
    if (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = static_cast<float>(m[i] * q_val);
      }
      m += M;
    }

    while (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += m[i] * q_val;
      }
      m += M;
    }
  }
};

/*! Inner Product Matrix (INT8)
 */
template <size_t M, size_t N>
struct InnerProductMatrix<int8_t, M, N,
                          typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 2;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));

    return static_cast<float>(sum);
  }
};

/*! Inner Product Matrix (INT8, N=1)
 */
template <size_t M>
struct InnerProductMatrix<int8_t, M, 1, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 2);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));

    return static_cast<float>(sum);
  }
};

/*! Inner Product Matrix (INT4)
 */
template <size_t M, size_t N>
struct InnerProductMatrix<uint8_t, M, N,
                          typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 3;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r += FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};

/*! Inner Product Matrix (INT4, N=1)
 */
template <size_t M>
struct InnerProductMatrix<uint8_t, M, 1,
                          typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 3);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) += FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};


/*! Minus Inner Product Matrix
 */
template <typename T, size_t M, size_t N>
struct MinusInnerProductMatrix<
    T, M, N,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = -static_cast<float>(m_val * q[j]);
          r += M;
        }
      }
      m += M;
      q += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        ValueType m_val = m[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r -= m_val * q[j];
          r += M;
        }
      }
      m += M;
      q += N;
    }
  }
};

/*! Minus Inner Product Matrix (N=1)
 */
template <typename T, size_t M>
struct MinusInnerProductMatrix<
    T, M, 1,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && out);

    const ValueType *q_end = q + dim;
    if (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = -static_cast<float>(m[i] * q_val);
      }
      m += M;
    }

    while (q != q_end) {
      ValueType q_val = *q++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) -= m[i] * q_val;
      }
      m += M;
    }
  }
};

/*! Minus Inner Product Matrix (INT8)
 */
template <size_t M, size_t N>
struct MinusInnerProductMatrix<
    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 2;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = -FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r -= FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));

    return static_cast<float>(sum);
  }
};

/*! Minus Inner Product Matrix (INT8, N=1)
 */
template <size_t M>
struct MinusInnerProductMatrix<int8_t, M, 1,
                               typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 2);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = -FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) -= FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    volatile int32_t sum = ((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                            (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                            (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                            (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));

    return static_cast<float>(sum);
  }
};

/*! Minus Inner Product Matrix (INT4)
 */
template <size_t M, size_t N>
struct MinusInnerProductMatrix<
    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);

    dim >>= 3;
    if (dim > 0) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r = -FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        uint32_t m_val = m_it[i];
        float *r = out + i;

        for (size_t j = 0; j < N; ++j) {
          *r -= FusedMultiplyAdd(m_val, q_it[j]);
          r += M;
        }
      }
      m_it += M;
      q_it += N;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};

/*! Minus Inner Product Matrix (INT4, N=1)
 */
template <size_t M>
struct MinusInnerProductMatrix<uint8_t, M, 1,
                               typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the distance between matrix and query
  static inline void Compute(const ValueType *m, const ValueType *q, size_t dim,
                             float *out) {
    ailego_assert(m && q && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 3);

    if (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) = -FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }

    while (q_it != q_end) {
      uint32_t q_val = *q_it++;

      for (size_t i = 0; i < M; ++i) {
        *(out + i) -= FusedMultiplyAdd(m_it[i], q_val);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }
};

//--------------------------------------------------
// Sparse
//--------------------------------------------------
struct SparseSegmentInfo {
 public:
  uint32_t seg_id_{-1U};
  uint32_t vec_cnt_{0};

 public:
  SparseSegmentInfo() : seg_id_{-1U}, vec_cnt_{0} {}

  SparseSegmentInfo(uint32_t seg_id, uint32_t vec_cnt)
      : seg_id_{seg_id}, vec_cnt_{vec_cnt} {}
};

constexpr static uint32_t SEGMENT_ID_BITS = 16;
constexpr static uint32_t SEGMENT_ID_MASK = 0xFFFF;

template <typename T>
struct MinusInnerProductSparseMatrix {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  static inline float ComputeInnerProductSparseInSegment(
      uint32_t m_sparse_count, const uint16_t *m_sparse_index,
      const ValueType *m_sparse_value, uint32_t q_sparse_count,
      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);

  //! Compute the distance between matrix and query
  static inline void Compute(const void *m_sparse_data_in,
                             const void *q_sparse_data_in, float *out);

  static inline void transform_sparse_format(uint32_t sparse_count,
                                             const uint32_t *sparse_index,
                                             const void *sparse_value,
                                             std::string &buffer);
};

template <>
struct MinusInnerProductSparseMatrix<Float16> {
  //! Type of value
  using ValueType = Float16;

  static float ComputeInnerProductSparseInSegment(
      uint32_t m_sparse_count, const uint16_t *m_sparse_index,
      const Float16 *m_sparse_value, uint32_t q_sparse_count,
      const uint16_t *q_sparse_index, const Float16 *q_sparse_value);

  //! Compute the distance between matrix and query
  static void Compute(const void *m_sparse_data_in,
                      const void *q_sparse_data_in, float *out);

  static void transform_sparse_format(uint32_t sparse_count,
                                      const uint32_t *sparse_index,
                                      const void *sparse_value,
                                      std::string &buffer) {
    uint32_t unit_size = sizeof(ValueType);

    uint32_t seg_count = 0;
    if (sparse_count == 0) {
      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&sparse_count),
                    sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&seg_count),
                    sizeof(uint32_t));

      return;
    }

    std::vector<SparseSegmentInfo> seg_infos;

    uint32_t cur_seg_id = -1U;
    uint32_t cur_vec_cnt = 0;

    for (size_t i = 0; i < sparse_count; ++i) {
      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;
      if (cur_seg_id == -1U) {
        cur_seg_id = seg_id;
        cur_vec_cnt++;
      } else {
        if (seg_id == cur_seg_id) {
          cur_vec_cnt++;
        } else if (seg_id > cur_seg_id) {
          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);

          cur_seg_id = seg_id;
          cur_vec_cnt = 1;
        } else {
          // std::abort();
        }
      }
    }

    if (cur_vec_cnt > 0) {
      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);
    }

    uint32_t buffer_len = 2 * sizeof(uint32_t) +
                          seg_infos.size() * 2 * sizeof(uint32_t) +
                          sparse_count * (sizeof(uint16_t) + sizeof(ValueType));

    buffer.reserve(buffer_len);

    buffer.append(reinterpret_cast<const char *>(&sparse_count),
                  sizeof(uint32_t));

    seg_count = seg_infos.size();
    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t seg_id = seg_infos[i].seg_id_;
      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));
    }

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t vec_cnt = seg_infos[i].vec_cnt_;
      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));
    }

    for (size_t i = 0; i < sparse_count; ++i) {
      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;
      buffer.append(reinterpret_cast<const char *>(&temp_dim),
                    sizeof(uint16_t));
    }

    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);
    for (size_t i = 0; i < sparse_count; ++i) {
      buffer.append(sparse_value_ptr, unit_size);
      sparse_value_ptr += unit_size;
    }
  }
};

template <>
struct MinusInnerProductSparseMatrix<float> {
  //! Type of value
  using ValueType = float;

  static float ComputeInnerProductSparseInSegment(
      uint32_t m_sparse_count, const uint16_t *m_sparse_index,
      const float *m_sparse_value, uint32_t q_sparse_count,
      const uint16_t *q_sparse_index, const float *q_sparse_value);

  //! Compute the distance between matrix and query
  static void Compute(const void *m_sparse_data_in,
                      const void *q_sparse_data_in, float *out);

  static void transform_sparse_format(uint32_t sparse_count,
                                      const uint32_t *sparse_index,
                                      const void *sparse_value,
                                      std::string &buffer) {
    uint32_t unit_size = sizeof(ValueType);

    uint32_t seg_count = 0;
    if (sparse_count == 0) {
      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&sparse_count),
                    sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&seg_count),
                    sizeof(uint32_t));

      return;
    }

    std::vector<SparseSegmentInfo> seg_infos;

    uint32_t cur_seg_id = -1U;
    uint32_t cur_vec_cnt = 0;

    for (size_t i = 0; i < sparse_count; ++i) {
      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;
      if (cur_seg_id == -1U) {
        cur_seg_id = seg_id;
        cur_vec_cnt++;
      } else {
        if (seg_id == cur_seg_id) {
          cur_vec_cnt++;
        } else if (seg_id > cur_seg_id) {
          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);

          cur_seg_id = seg_id;
          cur_vec_cnt = 1;
        } else {
          // std::abort();
        }
      }
    }

    if (cur_vec_cnt > 0) {
      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);
    }

    uint32_t buffer_len = 2 * sizeof(uint32_t) +
                          seg_infos.size() * 2 * sizeof(uint32_t) +
                          sparse_count * (sizeof(uint16_t) + sizeof(ValueType));

    buffer.reserve(buffer_len);

    buffer.append(reinterpret_cast<const char *>(&sparse_count),
                  sizeof(uint32_t));

    seg_count = seg_infos.size();
    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t seg_id = seg_infos[i].seg_id_;
      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));
    }

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t vec_cnt = seg_infos[i].vec_cnt_;
      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));
    }

    for (size_t i = 0; i < sparse_count; ++i) {
      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;
      buffer.append(reinterpret_cast<const char *>(&temp_dim),
                    sizeof(uint16_t));
    }

    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);
    for (size_t i = 0; i < sparse_count; ++i) {
      buffer.append(sparse_value_ptr, unit_size);
      sparse_value_ptr += unit_size;
    }
  }
};


}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp16_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX__)
float InnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, )

  return score;
}

float MinusInnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs,
                               size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)

  return score;
}
#endif

//--------------------------------------------------
// Sparse
//--------------------------------------------------
#if defined(__AVX__)
const static __m128i SHUFFLE_MASK256[256] = {
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, -127, -127),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 5,
                 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,
                 6, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,
                 6, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 7,
                 6, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 5, 4, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 9,
                 8, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5,
                 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5,
                 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 11, 10),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 5, 4,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 7, 6, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 11,
                 10, 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 13, 12),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 5, 4,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 7, 6, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 13, 12, 11, 10),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 11, 10, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 11, 10, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 11, 10, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 11, 10, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 13,
                 12, 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11, 10,
                 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6,
                 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,
                 2),
    _mm_set_epi8(-127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, 15, 14),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 5, 4,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 7, 6, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 11, 10),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 11, 10, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 11, 10, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 11, 10, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 11, 10, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11, 10,
                 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6,
                 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3,
                 2),
    _mm_set_epi8(-127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 13, 12),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 5, 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 5, 4, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 7, 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 7, 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 9, 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 9, 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 3, 2,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 9, 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 9, 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,
                 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,
                 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6,
                 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3,
                 2),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127, 15,
                 14, 13, 12, 11, 10),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5,
                 4, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5,
                 4, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,
                 6, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,
                 6, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7,
                 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3,
                 2),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,
                 8, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,
                 8, 3, 2),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,
                 8, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3,
                 2),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9,
                 8, 7, 6),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3,
                 2),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
                 4),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1, 0),
    _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2),
    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
};

constexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;

float InnerProductSparseInSegmentFp16AVX(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const Float16 *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const Float16 *q_sparse_value) {
  float sum = 0.0f;

  // handle if the first dim is zero
  bool m_zero = false;
  Float16 m_zero_value{0.0f};
  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {
    m_sparse_count--;
    m_sparse_index++;
    m_zero_value = *m_sparse_value++;
    m_zero = true;
  }

  bool q_zero = false;
  Float16 q_zero_value{0.0f};
  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {
    q_sparse_count--;
    q_sparse_index++;
    q_zero_value = *q_sparse_value++;
    q_zero = true;
  }

  if (m_zero && q_zero) {
    sum = m_zero_value * q_zero_value;
  }

  size_t i1 = 0, i2 = 0;
  size_t end1 = m_sparse_count / 8 * 8;
  size_t end2 = q_sparse_count / 8 * 8;

  uint16_t fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];
  uint16_t fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];

  Float16 *val_start_1 = reinterpret_cast<Float16 *>(fixed_buffer_1);
  Float16 *val_start_2 = reinterpret_cast<Float16 *>(fixed_buffer_2);

  Float16 *val_1 = val_start_1;
  Float16 *val_2 = val_start_2;

  if (i1 < end1 && i2 < end2) {
    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {
      i1 += 8;
      if (i1 >= end1) goto do_scalar;
    }

    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {
      i2 += 8;
      if (i2 >= end2) goto do_scalar;
    }

    __m128i mm_index_m =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
    __m128i mm_index_q =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));

    while (true) {
#ifdef DEBUG_PRINT
      std::cout << "index 1: " << std::endl;
      print_data16(&mm_index_m);

      std::cout << "index 2: " << std::endl;
      print_data16(&mm_index_q);
#endif

      __m128i mm_cmp_res =
          _mm_cmpistrm(mm_index_q, mm_index_m,
                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);

#ifdef DEBUG_PRINT
      std::cout << "cmp res: " << std::endl;
      print_data16(&mm_cmp_res);
#endif

      int r = _mm_extract_epi32(mm_cmp_res, 0);

      if (r) {
        int r1 = r;

        __m128i v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));
        __m128i vs = _mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(val_1), vs);
        val_1 += _mm_popcnt_u32(r1);

        mm_cmp_res = _mm_cmpistrm(
            mm_index_m, mm_index_q,
            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
        r = _mm_extract_epi32(mm_cmp_res, 0);

        r1 = r;

        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));
        vs = _mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(val_2), vs);
        val_2 += _mm_popcnt_u32(r1);
      }

      const uint16_t id1_max = m_sparse_index[i1 + 7];

      if (id1_max <= q_sparse_index[i2 + 7]) {
        i1 += 8;
        if (i1 >= end1) goto do_scalar;
        mm_index_m = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
      }

      if (id1_max >= q_sparse_index[i2 + 7]) {
        i2 += 8;
        if (i2 >= end2) goto do_scalar;
        mm_index_q = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));
      }
    }
  }

do_scalar:
  while (i1 < m_sparse_count && i2 < q_sparse_count) {
    if (m_sparse_index[i1] == q_sparse_index[i2]) {
      *val_1++ = m_sparse_value[i1];
      *val_2++ = q_sparse_value[i2];

      ++i1;
      ++i2;
    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {
      ++i1;
    } else {
      ++i2;
    }
  }

  size_t res_num = val_1 - val_start_1;

  size_t res_num8 = res_num / 8 * 8;

  if (res_num8) {
    __m256 sum256 = _mm256_setzero_ps();

    for (size_t k = 0; k < res_num8; k += 8) {
      __m256 ymm_1 =
          _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(val_start_1 + k)));
      __m256 ymm_2 =
          _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(val_start_2 + k)));
      ACCUM_FP32_STEP_AVX(ymm_1, ymm_2, sum256);
    }

    sum += HorizontalAdd_FP32_V256(sum256);
  }

  for (size_t k = res_num8; k < res_num; ++k)
    sum += val_start_1[k] * val_start_2[k];

  return sum;
}

#endif  // __AVX__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_fp16_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512F__)
float InnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                             size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, )

  return score;
}

float MinusInnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                                  size_t size) {
  float score{0.0f};

  ACCUM_FP16_1X1_AVX512(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)

  return score;
}
#endif  //__AVX512F__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp16_avx512fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512FP16__)
//! Inner Product
float InnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,
                                 size_t size) {
  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 6) << 6);

  __m512h zmm_sum_0 = _mm512_setzero_ph();
  __m512h zmm_sum_1 = _mm512_setzero_ph();

  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs + 0), _mm512_load_ph(rhs + 0),
                          zmm_sum_0)

      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs + 32), _mm512_load_ph(rhs + 32),
                          zmm_sum_1)
    }

    if (last >= last_aligned + 32) {
      FMA_FP16_AVX512FP16(_mm512_load_ph(lhs), _mm512_load_ph(rhs), zmm_sum_0)
      lhs += 32;
      rhs += 32;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs + 0), _mm512_loadu_ph(rhs + 0),
                          zmm_sum_0)

      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs + 32), _mm512_loadu_ph(rhs + 32),
                          zmm_sum_1)
    }

    if (last >= last_aligned + 32) {
      FMA_FP16_AVX512FP16(_mm512_loadu_ph(lhs), _mm512_loadu_ph(rhs), zmm_sum_0)
      lhs += 32;
      rhs += 32;
    }
  }

  zmm_sum_0 = _mm512_add_ph(zmm_sum_0, zmm_sum_1);

  if (lhs != last) {
    __mmask32 mask = (__mmask32)((1 << (last - lhs)) - 1);
    __m512i zmm_undefined = _mm512_undefined_epi32();
    zmm_sum_0 = _mm512_mask3_fmadd_ph(
        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, lhs)),
        _mm512_castsi512_ph(_mm512_mask_loadu_epi16(zmm_undefined, mask, rhs)),
        zmm_sum_0, mask);
  }

  return HorizontalAdd_FP16_V512(zmm_sum_0);
}

float MinusInnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,
                                      size_t size) {
  return -1 * InnerProductFp16AVX512FP16(lhs, rhs, size);
}
#endif

// sparse
#if defined(__AVX512FP16__)
constexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;

float InnerProductSparseInSegmentFp16AVX512FP16(uint32_t m_sparse_count,
                                                const uint16_t *m_sparse_index,
                                                const Float16 *m_sparse_value,
                                                uint32_t q_sparse_count,
                                                const uint16_t *q_sparse_index,
                                                const Float16 *q_sparse_value) {
  const static __m128i SHUFFLE_MASK256[256] = {
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, -127, -127),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   7, 6, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   7, 6, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   7, 6, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 5, 4,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   9, 8, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 9, 8, 7, 6,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 9, 8, 7, 6, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 11, 10),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 11, 10, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 11, 10, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 11, 10, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 5, 4,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 11, 10, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 7, 6,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 7, 6, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 11, 10, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   11, 10, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                   7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4,
                   3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 13, 12),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 5, 4,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 7, 6,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 7, 6, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 9, 8,
                   7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4,
                   3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 13, 12, 11, 10),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 11, 10, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 11, 10, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 11, 10, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 5, 4, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 11, 10, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   13, 12, 11, 10, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 13, 12, 11,
                   10, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,
                   6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,
                   6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7,
                   6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3,
                   2),
      _mm_set_epi8(-127, -127, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, -127, -127, 15, 14),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 5, 4,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 7, 6,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 7, 6, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 9, 8,
                   7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4,
                   3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 9, 8, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 11, 10),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 11, 10, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 11, 10, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 11, 10, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 5, 4, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 11, 10, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 11, 10, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 11,
                   10, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,
                   6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,
                   6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7,
                   6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3,
                   2),
      _mm_set_epi8(-127, -127, 15, 14, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   -127, -127, 15, 14, 13, 12),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 5, 4, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 3,
                   2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5,
                   4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5,
                   4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 5, 4, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,
                   6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,
                   6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 3, 2, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7,
                   6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 1,
                   0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3,
                   2),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                   15, 14, 13, 12, 11, 10),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 11, 10, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 11, 10, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 11, 10, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   5, 4, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   5, 4, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 5, 4, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 11, 10, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   7, 6, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   7, 6, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   7, 6, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4,
                   3, 2),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 7, 6, 5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13,
                   12, 11, 10, 9, 8),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   9, 8, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   9, 8, 3, 2),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   9, 8, 5, 4),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4,
                   3, 2),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 5, 4, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, -127, -127, 15, 14, 13, 12, 11, 10,
                   9, 8, 7, 6),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,
                   1, 0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,
                   3, 2),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 3, 2, 1, 0),
      _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6,
                   5, 4),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 1, 0),
      _mm_set_epi8(-127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2),
      _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
  };

  float sum = 0.0f;

  // handle if the first dim is zero
  bool m_zero = false;
  Float16 m_zero_value{0.0f};
  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {
    m_sparse_count--;
    m_sparse_index++;
    m_zero_value = *m_sparse_value++;
    m_zero = true;
  }

  bool q_zero = false;
  Float16 q_zero_value{0.0f};
  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {
    q_sparse_count--;
    q_sparse_index++;
    q_zero_value = *q_sparse_value++;
    q_zero = true;
  }

  if (m_zero && q_zero) {
    sum = m_zero_value * q_zero_value;
  }

  size_t i1 = 0, i2 = 0;
  size_t end1 = m_sparse_count / 8 * 8;
  size_t end2 = q_sparse_count / 8 * 8;

  uint16_t fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];
  uint16_t fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];

  Float16 *val_start_1 = reinterpret_cast<Float16 *>(fixed_buffer_1);
  Float16 *val_start_2 = reinterpret_cast<Float16 *>(fixed_buffer_2);

  Float16 *val_1 = val_start_1;
  Float16 *val_2 = val_start_2;

  if (i1 < end1 && i2 < end2) {
    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {
      i1 += 8;
      if (i1 >= end1) goto do_scalar;
    }

    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {
      i2 += 8;
      if (i2 >= end2) goto do_scalar;
    }

    __m128i mm_index_m =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
    __m128i mm_index_q =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));

    while (true) {
#ifdef DEBUG_PRINT
      std::cout << "index 1: " << std::endl;
      print_data16(&mm_index_m);

      std::cout << "index 2: " << std::endl;
      print_data16(&mm_index_q);
#endif

      __m128i mm_cmp_res =
          _mm_cmpistrm(mm_index_q, mm_index_m,
                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);

#ifdef DEBUG_PRINT
      std::cout << "cmp res: " << std::endl;
      print_data16(&mm_cmp_res);
#endif

      int r = _mm_extract_epi32(mm_cmp_res, 0);

      if (r) {
        int r1 = r;

        __m128i v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));
        __m128h vs = _mm_castsi128_ph(_mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]));

        _mm_storeu_ph(val_1, vs);
        val_1 += _mm_popcnt_u32(r1);

        mm_cmp_res = _mm_cmpistrm(
            mm_index_m, mm_index_q,
            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
        r = _mm_extract_epi32(mm_cmp_res, 0);

        r1 = r;

        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));
        vs = _mm_castsi128_ph(_mm_shuffle_epi8(v, SHUFFLE_MASK256[r1]));

        _mm_storeu_ph(val_2, vs);
        val_2 += _mm_popcnt_u32(r1);
      }

      const uint16_t id1_max = m_sparse_index[i1 + 7];

      if (id1_max <= q_sparse_index[i2 + 7]) {
        i1 += 8;
        if (i1 >= end1) goto do_scalar;
        mm_index_m = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
      }

      if (id1_max >= q_sparse_index[i2 + 7]) {
        i2 += 8;
        if (i2 >= end2) goto do_scalar;
        mm_index_q = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));
      }
    }
  }

do_scalar:
  while (i1 < m_sparse_count && i2 < q_sparse_count) {
    if (m_sparse_index[i1] == q_sparse_index[i2]) {
      *val_1++ = m_sparse_value[i1];
      *val_2++ = q_sparse_value[i2];

      ++i1;
      ++i2;
    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {
      ++i1;
    } else {
      ++i2;
    }
  }

  size_t res_num = val_1 - val_start_1;

  size_t res_num8 = res_num / 8 * 8;

  if (res_num8) {
    __m128h sum128 = _mm_set1_ph(0);

    for (size_t k = 0; k < res_num8; k += 8) {
      sum128 = _mm_add_ph(sum128, _mm_mul_ph(_mm_loadu_ph(val_start_1 + k),
                                             _mm_loadu_ph(val_start_2 + k)));
    }

    Float16 __attribute__((aligned(16))) tmp_res[8];
    _mm_store_ph(tmp_res, sum128);
    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3] + tmp_res[4] +
            tmp_res[5] + tmp_res[6] + tmp_res[7]);
  }

  for (size_t k = res_num8; k < res_num; ++k)
    sum += val_start_1[k] * val_start_2[k];

  return sum;
}

#endif  // __AVX512FP16__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp16_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__ARM_NEON)
float InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs, size_t size);
float MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                size_t size);
#endif

#if defined(__AVX__)
float InnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs, size_t size);
float MinusInnerProductFp16AVX(const Float16 *lhs, const Float16 *rhs,
                               size_t size);
#endif

#if defined(__AVX512F__)
float InnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                             size_t size);
float MinusInnerProductFp16AVX512(const Float16 *lhs, const Float16 *rhs,
                                  size_t size);
#endif

#if defined(__AVX512FP16__)
float InnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,
                                 size_t size);
float MinusInnerProductFp16AVX512FP16(const Float16 *lhs, const Float16 *rhs,
                                      size_t size);
#endif

float InnerProductFp16Scalar(const Float16 *lhs, const Float16 *rhs,
                             size_t size);
float MinusInnerProductFp16Scalar(const Float16 *lhs, const Float16 *rhs,
                                  size_t size);

//! Compute the distance between matrix and query (FP16, M=1, N=1)
void InnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                const ValueType *q, size_t dim,
                                                float *out) {
#if defined(__ARM_NEON)
  *out = InnerProductFp16NEON(m, q, dim);
#else
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    *out = InnerProductFp16AVX512FP16(m, q, dim);
    return;
  }
#endif  //__AVX512FP16__
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = InnerProductFp16AVX512(m, q, dim);
    return;
  }
#endif  //__AVX512F__
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = InnerProductFp16AVX(m, q, dim);
    return;
  }
#endif  //__AVX__
  *out = InnerProductFp16Scalar(m, q, dim);

#endif  //__ARM_NEON
}

//! Compute the distance between matrix and query (FP16, M=1, N=1)
void MinusInnerProductMatrix<Float16, 1, 1>::Compute(const ValueType *m,
                                                     const ValueType *q,
                                                     size_t dim, float *out) {
#if defined(__ARM_NEON)
  *out = MinusInnerProductFp16NEON(m, q, dim);
#else
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    *out = MinusInnerProductFp16AVX512FP16(m, q, dim);
    return;
  }
#endif  //__AVX512FP16__
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MinusInnerProductFp16AVX512(m, q, dim);
    return;
  }
#endif  //__AVX512F__
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MinusInnerProductFp16AVX(m, q, dim);
    return;
  }
#endif  //__AVX__

  *out = MinusInnerProductFp16Scalar(m, q, dim);

#endif  //__ARM_NEON
}

//--------------------------------------------------
// Sparse
//--------------------------------------------------
#if defined(__AVX512FP16__)
float InnerProductSparseInSegmentFp16AVX512FP16(uint32_t m_sparse_count,
                                                const uint16_t *m_sparse_index,
                                                const Float16 *m_sparse_value,
                                                uint32_t q_sparse_count,
                                                const uint16_t *q_sparse_index,
                                                const Float16 *q_sparse_value);
#endif  //__AVX512FP16__

#if defined(__AVX__)
float InnerProductSparseInSegmentFp16AVX(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const Float16 *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const Float16 *q_sparse_value);
#endif  //__AVX__

float InnerProductSparseInSegmentFp16Scalar(uint32_t m_sparse_count,
                                            const uint16_t *m_sparse_index,
                                            const Float16 *m_sparse_value,
                                            uint32_t q_sparse_count,
                                            const uint16_t *q_sparse_index,
                                            const Float16 *q_sparse_value);

float MinusInnerProductSparseFp16Scalar(const void *m_sparse_data_in,
                                        const void *q_sparse_data_in);

//! Compute the distance between matrix and query
void MinusInnerProductSparseMatrix<Float16>::Compute(
    const void *m_sparse_data_in, const void *q_sparse_data_in, float *out) {
  *out = MinusInnerProductSparseFp16Scalar(m_sparse_data_in, q_sparse_data_in);
}

float ComputeInnerProductSparseInSegmentFp16(uint32_t m_sparse_count,
                                             const uint16_t *m_sparse_index,
                                             const Float16 *m_sparse_value,
                                             uint32_t q_sparse_count,
                                             const uint16_t *q_sparse_index,
                                             const Float16 *q_sparse_value) {
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    return InnerProductSparseInSegmentFp16AVX512FP16(
        m_sparse_count, m_sparse_index, m_sparse_value, q_sparse_count,
        q_sparse_index, q_sparse_value);
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    return InnerProductSparseInSegmentFp16AVX(m_sparse_count, m_sparse_index,
                                              m_sparse_value, q_sparse_count,
                                              q_sparse_index, q_sparse_value);
  }
#endif
  return InnerProductSparseInSegmentFp16Scalar(m_sparse_count, m_sparse_index,
                                               m_sparse_value, q_sparse_count,
                                               q_sparse_index, q_sparse_value);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp16_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
float InnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,
                           size_t size) {
  float score;

  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, )

  return score;
}

float MinusInnerProductFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                size_t size) {
  float score;

  ACCUM_FP16_1X1_NEON(lhs, rhs, size, &score, 0ull, NEGATE_FP32_GENERAL)

  return score;
}
#endif

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_fp32_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX__)
float InnerProductFp32SSEInternal(const float *lhs, const float *rhs,
                                  size_t size);

//! Inner Product
float InnerProductFp32AVXInternal(const float *lhs, const float *rhs,
                                  size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 4) << 4);

  __m256 ymm_sum_0 = _mm256_setzero_ps();
  __m256 ymm_sum_1 = _mm256_setzero_ps();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_lhs_0 = _mm256_load_ps(lhs + 0);
      __m256 ymm_lhs_1 = _mm256_load_ps(lhs + 8);
      __m256 ymm_rhs_0 = _mm256_load_ps(rhs + 0);
      __m256 ymm_rhs_1 = _mm256_load_ps(rhs + 8);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
    }

    if (last >= last_aligned + 8) {
      ymm_sum_0 =
          _mm256_fmadd_ps(_mm256_load_ps(lhs), _mm256_load_ps(rhs), ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs + 0);
      __m256 ymm_lhs_1 = _mm256_loadu_ps(lhs + 8);
      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs + 0);
      __m256 ymm_rhs_1 = _mm256_loadu_ps(rhs + 8);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
    }

    if (last >= last_aligned + 8) {
      ymm_sum_0 = _mm256_fmadd_ps(_mm256_loadu_ps(lhs), _mm256_loadu_ps(rhs),
                                  ymm_sum_0);
      lhs += 8;
      rhs += 8;
    }
  }
  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));

  switch (last - lhs) {
    case 7:
      FMA_FP32_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_FP32_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_FP32_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_FP32_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductFp32AVX(const float *lhs, const float *rhs, size_t size) {
  if (size > 7) {
    return InnerProductFp32AVXInternal(lhs, rhs, size);
  }

  return InnerProductFp32SSEInternal(lhs, rhs, size);
}

float MinusInnerProductFp32AVX(const float *lhs, const float *rhs,
                               size_t size) {
  return -1 * InnerProductFp32AVX(lhs, rhs, size);
}

#endif  // __AVX__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp32_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX512F__)
float InnerProductFp32AVXInternal(const float *lhs, const float *rhs,
                                  size_t size);

float InnerProductFp32SSEInternal(const float *lhs, const float *rhs,
                                  size_t size);

//! Inner Product
float InnerProductFp32AVX512Internal(const float *lhs, const float *rhs,
                                     size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 5) << 5);

  __m512 zmm_sum_0 = _mm512_setzero_ps();
  __m512 zmm_sum_1 = _mm512_setzero_ps();

  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      FMA_FP32_AVX512(_mm512_load_ps(lhs + 0), _mm512_load_ps(rhs + 0),
                      zmm_sum_0)

      FMA_FP32_AVX512(_mm512_load_ps(lhs + 16), _mm512_load_ps(rhs + 16),
                      zmm_sum_1)
    }

    if (last >= last_aligned + 16) {
      FMA_FP32_AVX512(_mm512_load_ps(lhs), _mm512_load_ps(rhs), zmm_sum_0)
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      FMA_FP32_AVX512(_mm512_loadu_ps(lhs + 0), _mm512_loadu_ps(rhs + 0),
                      zmm_sum_0)

      FMA_FP32_AVX512(_mm512_loadu_ps(lhs + 16), _mm512_loadu_ps(rhs + 16),
                      zmm_sum_1)
    }

    if (last >= last_aligned + 16) {
      FMA_FP32_AVX512(_mm512_loadu_ps(lhs), _mm512_loadu_ps(rhs), zmm_sum_0)
      lhs += 16;
      rhs += 16;
    }
  }

  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);
  if (lhs != last) {
    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);
    __m512 zmm_undefined = _mm512_undefined_ps();
    zmm_sum_0 = _mm512_mask3_fmadd_ps(
        _mm512_mask_loadu_ps(zmm_undefined, mask, lhs),
        _mm512_mask_loadu_ps(zmm_undefined, mask, rhs), zmm_sum_0, mask);
  }
  return HorizontalAdd_FP32_V512(zmm_sum_0);
}

float InnerProductFp32AVX512(const float *lhs, const float *rhs, size_t size) {
  if (size > 15) {
    return InnerProductFp32AVX512Internal(lhs, rhs, size);
  }

  if (size > 7) {
    return InnerProductFp32AVXInternal(lhs, rhs, size);
  }

  return InnerProductFp32SSEInternal(lhs, rhs, size);
}

float MinusInnerProductFp32AVX512(const float *lhs, const float *rhs,
                                  size_t size) {
  return -1 * InnerProductFp32AVX512(lhs, rhs, size);
}

#endif

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp32_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {
//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__ARM_NEON)
float InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size);
float MinusInnerProductFp32NEON(const float *lhs, const float *rhs,
                                size_t size);
#endif

#if defined(__AVX512F__)
float InnerProductFp32AVX512(const float *lhs, const float *rhs, size_t size);
float MinusInnerProductFp32AVX512(const float *lhs, const float *rhs,
                                  size_t size);
#endif

#if defined(__AVX__)
float InnerProductFp32AVX(const float *lhs, const float *rhs, size_t size);
float MinusInnerProductFp32AVX(const float *lhs, const float *rhs, size_t size);
#endif

#if defined(__SSE__)
float InnerProductFp32SSE(const float *lhs, const float *rhs, size_t size);
float MinusInnerProductFp32SSE(const float *lhs, const float *rhs, size_t size);
#endif

float InnerProductFp32Scalar(const float *lhs, const float *rhs, size_t size);
float MinusInnerProductFp32Scalar(const float *lhs, const float *rhs,
                                  size_t size);

//! Compute the distance between matrix and query (FP32, M=1, N=1)
void InnerProductMatrix<float, 1, 1>::Compute(const float *m, const float *q,
                                              size_t dim, float *out) {
#if defined(__ARM_NEON)
  *out = InnerProductFp32NEON(m, q, dim);
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = InnerProductFp32AVX512(m, q, dim);
    return;
  }
#endif  // __AVX512F__

#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = InnerProductFp32AVX(m, q, dim);
    return;
  }
#endif  // __AVX__

#if defined(__SSE__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {
    *out = InnerProductFp32SSE(m, q, dim);
    return;
  }
#endif  // __SSE__
  *out = InnerProductFp32Scalar(m, q, dim);
#endif  // __ARM_NEON
}

//! Compute the distance between matrix and query (FP32, M=1, N=1)
void MinusInnerProductMatrix<float, 1, 1>::Compute(const float *m,
                                                   const float *q, size_t dim,
                                                   float *out) {
#if defined(__ARM_NEON)
  *out = MinusInnerProductFp32NEON(m, q, dim);
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MinusInnerProductFp32AVX512(m, q, dim);
    return;
  }
#endif  // __AVX512F__

#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MinusInnerProductFp32AVX(m, q, dim);
    return;
  }
#endif  // __AVX__

#if defined(__SSE__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {
    *out = MinusInnerProductFp32SSE(m, q, dim);
    return;
  }
#endif  // __SSE__
  *out = MinusInnerProductFp32Scalar(m, q, dim);
#endif  // __ARM_NEON
}

//--------------------------------------------------
// Sparse
//--------------------------------------------------
#if defined(__SSE4_1__)
float InnerProductSparseInSegmentFp32SSE(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const float *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const float *q_sparse_value);
#endif
float InnerProductSparseInSegmentFp32Scalar(uint32_t m_sparse_count,
                                            const uint16_t *m_sparse_index,
                                            const float *m_sparse_value,
                                            uint32_t q_sparse_count,
                                            const uint16_t *q_sparse_index,
                                            const float *q_sparse_value);

float MinusInnerProductSparseFp32Scalar(const void *m_sparse_data_in,
                                        const void *q_sparse_data_in);

void MinusInnerProductSparseMatrix<float>::Compute(const void *m_sparse_data_in,
                                                   const void *q_sparse_data_in,
                                                   float *out) {
  *out = MinusInnerProductSparseFp32Scalar(m_sparse_data_in, q_sparse_data_in);
}

float ComputeInnerProductSparseInSegmentFp32(uint32_t m_sparse_count,
                                             const uint16_t *m_sparse_index,
                                             const float *m_sparse_value,
                                             uint32_t q_sparse_count,
                                             const uint16_t *q_sparse_index,
                                             const float *q_sparse_value) {
#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    return InnerProductSparseInSegmentFp32SSE(m_sparse_count, m_sparse_index,
                                              m_sparse_value, q_sparse_count,
                                              q_sparse_index, q_sparse_value);
  }
#endif
  return InnerProductSparseInSegmentFp32Scalar(m_sparse_count, m_sparse_index,
                                               m_sparse_value, q_sparse_count,
                                               q_sparse_index, q_sparse_value);
}
}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp32_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__ARM_NEON)
float InnerProductFp32NEON(const float *lhs, const float *rhs, size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  float32x4_t v_sum_0 = vdupq_n_f32(0);
  float32x4_t v_sum_1 = vdupq_n_f32(0);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    v_sum_0 = vfmaq_f32(v_sum_0, vld1q_f32(lhs + 0), vld1q_f32(rhs + 0));
    v_sum_1 = vfmaq_f32(v_sum_1, vld1q_f32(lhs + 4), vld1q_f32(rhs + 4));
  }
  if (last >= last_aligned + 4) {
    v_sum_0 = vfmaq_f32(v_sum_0, vld1q_f32(lhs), vld1q_f32(rhs));
    lhs += 4;
    rhs += 4;
  }

  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));
  switch (last - lhs) {
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float MinusInnerProductFp32NEON(const float *lhs, const float *rhs,
                                size_t size) {
  return -1 * InnerProductFp32NEON(lhs, rhs, size);
}

#endif  // __ARM_NEON

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_fp32_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__SSE__)
float InnerProductFp32SSEInternal(const float *lhs, const float *rhs,
                                  size_t size) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  __m128 xmm_sum_0 = _mm_setzero_ps();
  __m128 xmm_sum_1 = _mm_setzero_ps();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_lhs_0 = _mm_load_ps(lhs + 0);
      __m128 xmm_lhs_1 = _mm_load_ps(lhs + 4);
      __m128 xmm_rhs_0 = _mm_load_ps(rhs + 0);
      __m128 xmm_rhs_1 = _mm_load_ps(rhs + 4);
      xmm_sum_0 = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);
      xmm_sum_1 = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum_1);
    }

    if (last >= last_aligned + 4) {
      xmm_sum_0 = _mm_fmadd_ps(_mm_load_ps(lhs), _mm_load_ps(rhs), xmm_sum_0);
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs + 0);
      __m128 xmm_lhs_1 = _mm_loadu_ps(lhs + 4);
      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs + 0);
      __m128 xmm_rhs_1 = _mm_loadu_ps(rhs + 4);
      xmm_sum_0 = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum_0);
      xmm_sum_1 = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum_1);
    }

    if (last >= last_aligned + 4) {
      xmm_sum_0 = _mm_fmadd_ps(_mm_loadu_ps(lhs), _mm_loadu_ps(rhs), xmm_sum_0);
      lhs += 4;
      rhs += 4;
    }
  }
  float result = HorizontalAdd_FP32_V128(_mm_add_ps(xmm_sum_0, xmm_sum_1));

  switch (last - lhs) {
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductFp32SSE(const float *lhs, const float *rhs, size_t size) {
  return InnerProductFp32SSEInternal(lhs, rhs, size);
}

float MinusInnerProductFp32SSE(const float *lhs, const float *rhs,
                               size_t size) {
  return -1 * InnerProductFp32SSE(lhs, rhs, size);
}

#endif  // __SSE__

//--------------------------------------------------
// Sparse
//--------------------------------------------------
#if defined(__SSE4_1__)
const static __m128i SHUFFLE_MASK16[16] = {
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, -127, -127),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 13, 12),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
                 4),
    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
};

constexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;

float InnerProductSparseInSegmentFp32SSE(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const float *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const float *q_sparse_value) {
  float sum = 0.0f;

  // handle if the first dim is zero
  bool m_zero = false;
  float m_zero_value = 0.0f;
  if (m_sparse_count > 0 && m_sparse_index[0] == 0) {
    m_sparse_count--;
    m_sparse_index++;
    m_zero_value = *m_sparse_value++;
    m_zero = true;
  }

  bool q_zero = false;
  float q_zero_value = 0.0f;
  if (q_sparse_count > 0 && q_sparse_index[0] == 0) {
    q_sparse_count--;
    q_sparse_index++;
    q_zero_value = *q_sparse_value++;
    q_zero = true;
  }

  if (m_zero && q_zero) {
    sum = m_zero_value * q_zero_value;
  }

  size_t i1 = 0, i2 = 0;
  size_t end1 = m_sparse_count / 8 * 8;
  size_t end2 = q_sparse_count / 8 * 8;

  // std::vector<float> mem1;
  // std::vector<float> mem2;

  float fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];
  float fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];

  float *val_start_1 = fixed_buffer_1;
  float *val_start_2 = fixed_buffer_2;

  // uint32_t max_count = std::max(m_sparse_count, q_sparse_count);

  // if (MAX_SPARSE_BUFFER_LENGTH < max_count) {
  //   mem1.reserve(max_count);
  //   mem2.reserve(max_count);

  //   val_start_1 = mem1.data();
  //   val_start_2 = mem2.data();
  // }

  float *val_1 = val_start_1;
  float *val_2 = val_start_2;

  if (i1 < end1 && i2 < end2) {
    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {
      i1 += 8;
      if (i1 >= end1) goto do_scalar;
    }

    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {
      i2 += 8;
      if (i2 >= end2) goto do_scalar;
    }

    __m128i mm_index_m =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
    __m128i mm_index_q =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));

    while (true) {
#ifdef DEBUG_PRINT
      std::cout << "index 1: " << std::endl;
      print_data16(&mm_index_m);

      std::cout << "index 2: " << std::endl;
      print_data16(&mm_index_q);
#endif

      __m128i mm_cmp_res =
          _mm_cmpistrm(mm_index_q, mm_index_m,
                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);

#ifdef DEBUG_PRINT
      std::cout << "cmp res: " << std::endl;
      print_data16(&mm_cmp_res);
#endif

      int r = _mm_extract_epi32(mm_cmp_res, 0);

      if (r) {
        int r1 = r & 15;

        __m128i v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));
        __m128 vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));

        _mm_storeu_ps(val_1, vs);
        val_1 += _mm_popcnt_u32(r1);

        int r2 = (r >> 4) & 15;
        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1 + 4]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));
        _mm_storeu_ps(val_1, vs);
        val_1 += _mm_popcnt_u32(r2);

        mm_cmp_res = _mm_cmpistrm(
            mm_index_m, mm_index_q,
            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
        r = _mm_extract_epi32(mm_cmp_res, 0);

        r1 = r & 15;

        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));
        _mm_storeu_ps(val_2, vs);
        val_2 += _mm_popcnt_u32(r1);

        r2 = (r >> 4) & 15;
        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2 + 4]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));
        _mm_storeu_ps(val_2, vs);
        val_2 += _mm_popcnt_u32(r2);
      }

      const uint16_t id1_max = m_sparse_index[i1 + 7];

      if (id1_max <= q_sparse_index[i2 + 7]) {
        i1 += 8;
        if (i1 >= end1) goto do_scalar;
        mm_index_m = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
      }

      if (id1_max >= q_sparse_index[i2 + 7]) {
        i2 += 8;
        if (i2 >= end2) goto do_scalar;
        mm_index_q = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));
      }
    }
  }

do_scalar:
  while (i1 < m_sparse_count && i2 < q_sparse_count) {
    if (m_sparse_index[i1] == q_sparse_index[i2]) {
      *val_1++ = m_sparse_value[i1];
      *val_2++ = q_sparse_value[i2];

      ++i1;
      ++i2;
    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {
      ++i1;
    } else {
      ++i2;
    }
  }

  size_t res_num = val_1 - val_start_1;

  //  if (res_num != val_2 - val_start_2) {
  //   std::cerr << "size mismatch!" << std::endl;
  //  }

  size_t res_num4 = res_num / 4 * 4;

  if (res_num4) {
    __m128 sum128 = _mm_set1_ps(0);

    for (size_t k = 0; k < res_num4; k += 4) {
      sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(val_start_1 + k),
                                             _mm_loadu_ps(val_start_2 + k)));
    }

    float __attribute__((aligned(16))) tmp_res[4];
    _mm_store_ps(tmp_res, sum128);
    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);
  }

  for (size_t k = res_num4; k < res_num; ++k)
    sum += val_start_1[k] * val_start_2[k];

  return sum;
}
#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_int4_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int4.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {
//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX2__)
float InnerProductInt4SSEInternal(const uint8_t *lhs, const uint8_t *rhs,
                                  size_t size);

//! Inner Product
float InnerProductInt4AVX2Internal(const uint8_t *lhs, const uint8_t *rhs,
                                   size_t size) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);
  __m256i ymm_sum = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));
      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),
                                 ymm_sum);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));
      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum)
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
      ymm_sum = _mm256_add_epi32(_mm256_set_m128i(_mm_setzero_si128(), xmm_sum),
                                 ymm_sum);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum));

  switch (last - lhs) {
    case 15:
      FMA_INT4_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      FMA_INT4_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      FMA_INT4_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      FMA_INT4_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      FMA_INT4_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      FMA_INT4_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      FMA_INT4_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      FMA_INT4_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      FMA_INT4_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_INT4_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_INT4_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_INT4_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_INT4_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_INT4_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_INT4_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                           size_t size) {
  if (size > 63) {
    return InnerProductInt4AVX2Internal(lhs, rhs, size >> 1);
  }

  return InnerProductInt4SSEInternal(lhs, rhs, size >> 1);
}

float MinusInnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                                size_t size) {
  return -InnerProductInt4AVX2(lhs, rhs, size);
}

#endif  // __AVX2__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_int4_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {
//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX2__)
float InnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs, size_t size);
float MinusInnerProductInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                                size_t size);
#endif

#if defined(__SSE4_1__)
float InnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs, size_t size);
float MinusInnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs,
                               size_t size);
#endif

float InnerProductInt4Scalar(const uint8_t *m, const uint8_t *q, size_t dim);
float MinusInnerProductInt4Scalar(const uint8_t *m, const uint8_t *q,
                                  size_t dim);

//! Compute the distance between matrix and query (INT4, M=1, N=1)
void InnerProductMatrix<uint8_t, 1, 1>::Compute(const uint8_t *m,
                                                const uint8_t *q, size_t dim,
                                                float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = InnerProductInt4AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = InnerProductInt4SSE(m, q, dim);
    return;
  }
#endif  //__SSE4_1__
  *out = InnerProductInt4Scalar(m, q, dim);
}

//! Compute the distance between matrix and query (INT4, M=1, N=1)
void MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(const uint8_t *m,
                                                     const uint8_t *q,
                                                     size_t dim, float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MinusInnerProductInt4AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MinusInnerProductInt4SSE(m, q, dim);
    return;
  }
#endif  //__SSE4_1__
  *out = MinusInnerProductInt4Scalar(m, q, dim);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_int4_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int4.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {
//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__SSE4_1__)
float InnerProductInt4SSEInternal(const uint8_t *lhs, const uint8_t *rhs,
                                  size_t size) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);
  __m128i xmm_sum = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum)
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));

  switch (last - lhs) {
    case 15:
      FMA_INT4_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      FMA_INT4_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      FMA_INT4_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      FMA_INT4_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      FMA_INT4_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      FMA_INT4_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      FMA_INT4_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      FMA_INT4_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      FMA_INT4_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_INT4_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_INT4_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_INT4_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_INT4_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_INT4_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_INT4_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs, size_t size) {
  return InnerProductInt4SSEInternal(lhs, rhs, size >> 1);
}

float MinusInnerProductInt4SSE(const uint8_t *lhs, const uint8_t *rhs,
                               size_t size) {
  return -InnerProductInt4SSE(lhs, rhs, size);
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_int8_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX2__)
float InnerProductInt8SSEInternal(const int8_t *lhs, const int8_t *rhs,
                                  size_t size);

inline float InnerProductInt8AVX2Internal(const int8_t *lhs, const int8_t *rhs,
                                          size_t size) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 6) << 6);
  float result = 0.0;

  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));

      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);
      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);
      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);
      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);

      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),
                            ONES_INT16_AVX),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),
                            ONES_INT16_AVX),
          ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);
      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);
      ymm_rhs = _mm256_abs_epi8(ymm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),
                            ONES_INT16_AVX),
          ymm_sum_0);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(),
                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),
                                          ONES_INT16_SSE)),
          ymm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));

      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);
      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);
      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);
      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);

      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),
                            ONES_INT16_AVX),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),
                            ONES_INT16_AVX),
          ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);
      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);
      ymm_rhs = _mm256_abs_epi8(ymm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),
                            ONES_INT16_AVX),
          ymm_sum_0);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(),
                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),
                                          ONES_INT16_SSE)),
          ymm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  }
  result = static_cast<float>(
      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));

  switch (last - lhs) {
    case 15:
      FMA_INT8_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      FMA_INT8_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      FMA_INT8_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      FMA_INT8_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      FMA_INT8_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      FMA_INT8_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      FMA_INT8_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      FMA_INT8_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      FMA_INT8_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_INT8_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_INT8_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_INT8_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_INT8_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_INT8_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_INT8_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size) {
  if (size > 31) {
    return InnerProductInt8AVX2Internal(lhs, rhs, size);
  }

  return InnerProductInt8SSEInternal(lhs, rhs, size);
}

float MinusInnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs,
                                size_t size) {
  return -InnerProductInt8AVX2(lhs, rhs, size);
}

#endif  // __AVX2__


}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_int8_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__AVX2__)
float InnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs, size_t size);
float MinusInnerProductInt8AVX2(const int8_t *lhs, const int8_t *rhs,
                                size_t size);
#endif

#if defined(__SSE4_1__)
float InnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs, size_t size);
float MinusInnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs,
                               size_t size);
#endif

float InnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim);
float MinusInnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim);

//! Compute the distance between matrix and query (INT8, M=1, N=1)
void InnerProductMatrix<int8_t, 1, 1>::Compute(const int8_t *m, const int8_t *q,
                                               size_t dim, float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = InnerProductInt8AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = InnerProductInt8SSE(m, q, dim);
    return;
  }

#endif  //__SSE4_1__

  *out = InnerProductInt8Scalar(m, q, dim);
}

//! Compute the distance between matrix and query (INT8, M=1, N=1)
void MinusInnerProductMatrix<int8_t, 1, 1>::Compute(const int8_t *m,
                                                    const int8_t *q, size_t dim,
                                                    float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MinusInnerProductInt8AVX2(m, q, dim);
    return;
  }
#endif  // __AVX2__

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MinusInnerProductInt8SSE(m, q, dim);
    return;
  }
#endif  //__SSE4_1__

  *out = MinusInnerProductInt8Scalar(m, q, dim);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/inner_product_matrix_int8_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_inner_product_utility.i"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
#if defined(__SSE4_1__)
//! Inner Product
float InnerProductInt8SSEInternal(const int8_t *lhs, const int8_t *rhs,
                                  size_t size) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 5) << 5);

  __m128i xmm_sum_0 = _mm_setzero_si128();
  __m128i xmm_sum_1 = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));

      xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);
      xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);
      xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);
      xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);
      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),
                                       ONES_INT16_SSE),
                        xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),
                                       ONES_INT16_SSE),
                        xmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);

      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      xmm_sum_0 = _mm_add_epi32(
          _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs), ONES_INT16_SSE),
          xmm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));

      xmm_lhs_0 = _mm_sign_epi8(xmm_lhs_0, xmm_rhs_0);
      xmm_lhs_1 = _mm_sign_epi8(xmm_lhs_1, xmm_rhs_1);
      xmm_rhs_0 = _mm_abs_epi8(xmm_rhs_0);
      xmm_rhs_1 = _mm_abs_epi8(xmm_rhs_1);
      xmm_sum_0 =
          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_0, xmm_lhs_0),
                                       ONES_INT16_SSE),
                        xmm_sum_0);
      xmm_sum_1 =
          _mm_add_epi32(_mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs_1, xmm_lhs_1),
                                       ONES_INT16_SSE),
                        xmm_sum_1);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);

      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      xmm_sum_0 = _mm_add_epi32(
          _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs), ONES_INT16_SSE),
          xmm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(
      HorizontalAdd_INT32_V128(_mm_add_epi32(xmm_sum_0, xmm_sum_1)));

  switch (last - lhs) {
    case 15:
      FMA_INT8_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      FMA_INT8_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      FMA_INT8_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      FMA_INT8_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      FMA_INT8_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      FMA_INT8_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      FMA_INT8_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      FMA_INT8_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      FMA_INT8_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_INT8_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_INT8_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_INT8_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_INT8_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_INT8_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_INT8_GENERAL(lhs[0], rhs[0], result)
  }
  return result;
}

float InnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs, size_t size) {
  return InnerProductInt8SSEInternal(lhs, rhs, size);
}

float MinusInnerProductInt8SSE(const int8_t *lhs, const int8_t *rhs,
                               size_t size) {
  return -InnerProductInt8SSEInternal(lhs, rhs, size);
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/inner_product_matrix_scalar.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"
#include "inner_product_matrix.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
template <typename T>
inline float InnerProductScalar(const T *m, const T *q, size_t dim) {
  ailego_assert(m && q && dim);

  float sum = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    sum += static_cast<float>(m[i] * q[i]);
  }
  return sum;
}

template <typename T>
inline float MinusInnerProductScalar(const T *m, const T *q, size_t dim) {
  ailego_assert(m && q && dim);

  float sum = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    sum += static_cast<float>(m[i] * q[i]);
  }
  return -sum;
}

float InnerProductInt4Scalar(const uint8_t *m, const uint8_t *q, size_t dim) {
  ailego_assert(m && q && dim && !(dim & 1));

  float sum = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    uint8_t m_val = m[i];
    uint8_t q_val = q[i];
    sum += Int4MulTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4MulTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }

  return sum;
}

float MinusInnerProductInt4Scalar(const uint8_t *m, const uint8_t *q,
                                  size_t dim) {
  ailego_assert(m && q && dim && !(dim & 1));

  float sum = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    uint8_t m_val = m[i];
    uint8_t q_val = q[i];
    sum -= Int4MulTable[((m_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4MulTable[((m_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }
  return sum;
}

float InnerProductInt8Scalar(const int8_t *m, const int8_t *q, size_t dim) {
  return InnerProductScalar<int8_t>(m, q, dim);
}

float MinusInnerProductInt8Scalar(const int8_t *m, const int8_t *q,
                                  size_t dim) {
  return MinusInnerProductScalar<int8_t>(m, q, dim);
}

float InnerProductFp16Scalar(const ailego::Float16 *m, const ailego::Float16 *q,
                             size_t dim) {
  return InnerProductScalar<ailego::Float16>(m, q, dim);
}

float MinusInnerProductFp16Scalar(const ailego::Float16 *m,
                                  const ailego::Float16 *q, size_t dim) {
  return MinusInnerProductScalar<ailego::Float16>(m, q, dim);
}

float InnerProductFp32Scalar(const float *m, const float *q, size_t dim) {
  return InnerProductScalar<float>(m, q, dim);
}

float MinusInnerProductFp32Scalar(const float *m, const float *q, size_t dim) {
  return MinusInnerProductScalar<float>(m, q, dim);
}

//--------------------------------------------------
// Sparse
//--------------------------------------------------
float ComputeInnerProductSparseInSegmentFp32(uint32_t m_sparse_count,
                                             const uint16_t *m_sparse_index,
                                             const float *m_sparse_value,
                                             uint32_t q_sparse_count,
                                             const uint16_t *q_sparse_index,
                                             const float *q_sparse_value);

float ComputeInnerProductSparseInSegmentFp16(uint32_t m_sparse_count,
                                             const uint16_t *m_sparse_index,
                                             const Float16 *m_sparse_value,
                                             uint32_t q_sparse_count,
                                             const uint16_t *q_sparse_index,
                                             const Float16 *q_sparse_value);

template <typename T>
float ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const T *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const T *q_sparse_value);

template <>
float ComputeInnerProductSparseInSegment<float>(uint32_t m_sparse_count,
                                                const uint16_t *m_sparse_index,
                                                const float *m_sparse_value,
                                                uint32_t q_sparse_count,
                                                const uint16_t *q_sparse_index,
                                                const float *q_sparse_value) {
  return ComputeInnerProductSparseInSegmentFp32(m_sparse_count, m_sparse_index,
                                                m_sparse_value, q_sparse_count,
                                                q_sparse_index, q_sparse_value);
}

template <>
float ComputeInnerProductSparseInSegment<Float16>(
    uint32_t m_sparse_count, const uint16_t *m_sparse_index,
    const Float16 *m_sparse_value, uint32_t q_sparse_count,
    const uint16_t *q_sparse_index, const Float16 *q_sparse_value) {
  return ComputeInnerProductSparseInSegmentFp16(m_sparse_count, m_sparse_index,
                                                m_sparse_value, q_sparse_count,
                                                q_sparse_index, q_sparse_value);
}

template <typename T>
float ComputeSegments(const void *m_sparse_data_in,
                      const void *q_sparse_data_in) {
  ailego_assert(m_sparse_data_in && q_sparse_data_in);

  float sum{0.0f};

  const uint8_t *m_sparse_data =
      reinterpret_cast<const uint8_t *>(m_sparse_data_in);
  const uint8_t *q_sparse_data =
      reinterpret_cast<const uint8_t *>(q_sparse_data_in);

  const uint32_t m_sparse_count =
      *reinterpret_cast<const uint32_t *>(m_sparse_data);
  const uint32_t q_sparse_count =
      *reinterpret_cast<const uint32_t *>(q_sparse_data);

  if (m_sparse_count == 0 || q_sparse_count == 0) {
    return 0.0f;
  }

  const uint32_t m_seg_count =
      *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));
  const uint32_t q_seg_count =
      *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));

  const uint32_t *m_seg_id =
      reinterpret_cast<const uint32_t *>(m_sparse_data + 2 * sizeof(uint32_t));
  const uint32_t *q_seg_id =
      reinterpret_cast<const uint32_t *>(q_sparse_data + 2 * sizeof(uint32_t));

  const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
      m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));
  const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
      q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));

  const uint16_t *m_sparse_index =
      reinterpret_cast<const uint16_t *>(m_sparse_data + 2 * sizeof(uint32_t) +
                                         m_seg_count * 2 * sizeof(uint32_t));
  const uint16_t *q_sparse_index =
      reinterpret_cast<const uint16_t *>(q_sparse_data + 2 * sizeof(uint32_t) +
                                         q_seg_count * 2 * sizeof(uint32_t));

  const T *m_sparse_value = reinterpret_cast<const T *>(
      m_sparse_data + 2 * sizeof(uint32_t) +
      m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));
  const T *q_sparse_value = reinterpret_cast<const T *>(
      q_sparse_data + 2 * sizeof(uint32_t) +
      q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));

  size_t m_s = 0;
  size_t q_s = 0;

  size_t m_count = 0;
  size_t q_count = 0;

  while (m_s < m_seg_count && q_s < q_seg_count) {
    if (m_seg_id[m_s] == q_seg_id[q_s]) {
      sum += ComputeInnerProductSparseInSegment(
          m_seg_vec_cnt[m_s], m_sparse_index + m_count,
          m_sparse_value + m_count, q_seg_vec_cnt[q_s],
          q_sparse_index + q_count, q_sparse_value + q_count);

      m_count += m_seg_vec_cnt[m_s];
      q_count += q_seg_vec_cnt[q_s];

      ++m_s;
      ++q_s;
    } else if (m_seg_id[m_s] < q_seg_id[q_s]) {
      m_count += m_seg_vec_cnt[m_s];

      ++m_s;
    } else {
      q_count += q_seg_vec_cnt[q_s];

      ++q_s;
    }
  }

  return -sum;
}

float MinusInnerProductSparseFp16Scalar(const void *m_sparse_data_in,
                                        const void *q_sparse_data_in) {
  return ComputeSegments<Float16>(m_sparse_data_in, q_sparse_data_in);
}

float MinusInnerProductSparseFp32Scalar(const void *m_sparse_data_in,
                                        const void *q_sparse_data_in) {
  return ComputeSegments<float>(m_sparse_data_in, q_sparse_data_in);
}

float InnerProductSparseInSegmentFp16Scalar(uint32_t m_sparse_count,
                                            const uint16_t *m_sparse_index,
                                            const Float16 *m_sparse_value,
                                            uint32_t q_sparse_count,
                                            const uint16_t *q_sparse_index,
                                            const Float16 *q_sparse_value) {
  float sum = 0.0f;

  size_t m_i = 0;
  size_t q_i = 0;
  while (m_i < m_sparse_count && q_i < q_sparse_count) {
    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {
      sum += m_sparse_value[m_i] * q_sparse_value[q_i];

      ++m_i;
      ++q_i;
    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {
      ++m_i;
    } else {
      ++q_i;
    }
  }

  return sum;
}

float InnerProductSparseInSegmentFp32Scalar(uint32_t m_sparse_count,
                                            const uint16_t *m_sparse_index,
                                            const float *m_sparse_value,
                                            uint32_t q_sparse_count,
                                            const uint16_t *q_sparse_index,
                                            const float *q_sparse_value) {
  float sum = 0.0f;

  size_t m_i = 0;
  size_t q_i = 0;
  while (m_i < m_sparse_count && q_i < q_sparse_count) {
    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {
      sum += m_sparse_value[m_i] * q_sparse_value[q_i];

      ++m_i;
      ++q_i;
    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {
      ++m_i;
    } else {
      ++q_i;
    }
  }

  return sum;
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/matrix_define.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#define MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  _VAR_TYPE _VAR_NAME##_0_0 = (_VAR_INIT);

#define MATRIX_VAR_INIT_1X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);

#define MATRIX_VAR_INIT_1X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_1X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);

#define MATRIX_VAR_INIT_1X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_1X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);

#define MATRIX_VAR_INIT_1X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_1X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \
  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_1X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_1_0 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \
  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);

#define MATRIX_VAR_INIT_2X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_2X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_2_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_0 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_7 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \
  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_15 = (_VAR_INIT);

#define MATRIX_VAR_INIT_4X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_31 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_4X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_4_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_0 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_1 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_3 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_4 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_5 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_6 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_7 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_7 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X8(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \
  _VAR_TYPE _VAR_NAME##_0_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_4_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_5_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_6_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_7_8 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_4_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_5_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_6_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_7_9 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_0_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_10 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_11 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_12 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_13 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_14 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_15 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_15 = (_VAR_INIT);

#define MATRIX_VAR_INIT_8X32(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X16(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_16 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_17 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_18 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_19 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_20 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_21 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_22 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_23 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_24 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_25 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_26 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_27 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_28 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_29 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_30 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_1_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_2_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_3_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_4_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_5_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_6_31 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_7_31 = (_VAR_INIT);

#define MATRIX_VAR_INIT_16X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_8X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)        \
  _VAR_TYPE _VAR_NAME##_8_0 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_9_0 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_10_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_11_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_12_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_13_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_14_0 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_15_0 = (_VAR_INIT);

#define MATRIX_VAR_INIT_16X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_16X1(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_4_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_5_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_6_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_7_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_8_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_9_1 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_10_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_11_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_12_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_13_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_14_1 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_15_1 = (_VAR_INIT);

#define MATRIX_VAR_INIT_16X4(_VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_16X2(_VAR_TYPE, _VAR_NAME, _VAR_INIT)       \
  _VAR_TYPE _VAR_NAME##_0_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_4_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_5_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_6_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_7_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_8_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_9_2 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_10_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_11_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_12_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_13_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_14_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_15_2 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_0_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_1_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_2_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_3_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_4_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_5_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_6_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_7_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_8_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_9_3 = (_VAR_INIT);                    \
  _VAR_TYPE _VAR_NAME##_10_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_11_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_12_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_13_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_14_3 = (_VAR_INIT);                   \
  _VAR_TYPE _VAR_NAME##_15_3 = (_VAR_INIT);

#define MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...) \
  _STORE((_ARRAY) + (_STEP) * (0), _NORM((_VAR##_0_0), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_1X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (1), _NORM((_VAR##_0_1), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_1X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_1X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_0_3), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_1X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_1X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_4), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_0_5), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_0_6), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_0_7), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_1X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \
  MATRIX_VAR_STORE_1X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_0_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_0_15), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_1X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (1), _NORM((_VAR##_1_0), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_1_1), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_2X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_0_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_1_3), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_2X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_4), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_4), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_1_7), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \
  MATRIX_VAR_STORE_2X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_0_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_1_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_1_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_1_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_1_15), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_2X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_2X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_0_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_1_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_0_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_1_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_0_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_1_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_0_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_1_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_0_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_1_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_0_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_1_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_0_25), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_1_25), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_0_26), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_1_26), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_0_27), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_1_27), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_28), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_28), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_0_29), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_1_29), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_0_30), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_1_30), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_0_31), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_1_31), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_2X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (2), _NORM((_VAR##_2_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (3), _NORM((_VAR##_3_0), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_2_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_3_1), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_4X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_2_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_3_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_0_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_1_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_2_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_3_3), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_4X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_2_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_3_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_2_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_3_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_1_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_2_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_3_7), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \
  MATRIX_VAR_STORE_4X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_8), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_0_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_1_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_2_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_3_9), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_2_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_3_10), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_0_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_1_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_2_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_3_11), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_2_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_3_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_2_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_3_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_1_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_2_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_3_15), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_4X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_4X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (64), _NORM((_VAR##_0_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (65), _NORM((_VAR##_1_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (66), _NORM((_VAR##_2_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (67), _NORM((_VAR##_3_16), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (68), _NORM((_VAR##_0_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (69), _NORM((_VAR##_1_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (70), _NORM((_VAR##_2_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (71), _NORM((_VAR##_3_17), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (72), _NORM((_VAR##_0_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (73), _NORM((_VAR##_1_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (74), _NORM((_VAR##_2_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (75), _NORM((_VAR##_3_18), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (76), _NORM((_VAR##_0_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (77), _NORM((_VAR##_1_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (78), _NORM((_VAR##_2_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (79), _NORM((_VAR##_3_19), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (80), _NORM((_VAR##_0_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (81), _NORM((_VAR##_1_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (82), _NORM((_VAR##_2_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (83), _NORM((_VAR##_3_20), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (84), _NORM((_VAR##_0_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (85), _NORM((_VAR##_1_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (86), _NORM((_VAR##_2_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (87), _NORM((_VAR##_3_21), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (88), _NORM((_VAR##_0_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (89), _NORM((_VAR##_1_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (90), _NORM((_VAR##_2_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (91), _NORM((_VAR##_3_22), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (92), _NORM((_VAR##_0_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (93), _NORM((_VAR##_1_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (94), _NORM((_VAR##_2_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (95), _NORM((_VAR##_3_23), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (96), _NORM((_VAR##_0_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (97), _NORM((_VAR##_1_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (98), _NORM((_VAR##_2_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (99), _NORM((_VAR##_3_24), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (100), _NORM((_VAR##_0_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (101), _NORM((_VAR##_1_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (102), _NORM((_VAR##_2_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (103), _NORM((_VAR##_3_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (104), _NORM((_VAR##_0_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (105), _NORM((_VAR##_1_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (106), _NORM((_VAR##_2_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (107), _NORM((_VAR##_3_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (108), _NORM((_VAR##_0_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (109), _NORM((_VAR##_1_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (110), _NORM((_VAR##_2_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (111), _NORM((_VAR##_3_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (112), _NORM((_VAR##_0_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (113), _NORM((_VAR##_1_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (114), _NORM((_VAR##_2_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (115), _NORM((_VAR##_3_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (116), _NORM((_VAR##_0_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (117), _NORM((_VAR##_1_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (118), _NORM((_VAR##_2_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (119), _NORM((_VAR##_3_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (120), _NORM((_VAR##_0_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (121), _NORM((_VAR##_1_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (122), _NORM((_VAR##_2_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (123), _NORM((_VAR##_3_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (124), _NORM((_VAR##_0_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (125), _NORM((_VAR##_1_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (126), _NORM((_VAR##_2_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (127), _NORM((_VAR##_3_31), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_4X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (4), _NORM((_VAR##_4_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (5), _NORM((_VAR##_5_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (6), _NORM((_VAR##_6_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (7), _NORM((_VAR##_7_0), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_2_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_3_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_4_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_5_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_6_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_7_1), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_8X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_4_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_5_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_6_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_7_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_0_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_1_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_2_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_3_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_4_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_5_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_6_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_7_3), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_8X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_4_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_5_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_6_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_7_4), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_0_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_1_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_2_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_3_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_4_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_5_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_6_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_7_5), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_4_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_5_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_6_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_7_6), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_0_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_1_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_2_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_3_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_4_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_5_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_6_7), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_7_7), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_8X8(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__)  \
  _STORE((_ARRAY) + (_STEP) * (64), _NORM((_VAR##_0_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (65), _NORM((_VAR##_1_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (66), _NORM((_VAR##_2_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (67), _NORM((_VAR##_3_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (68), _NORM((_VAR##_4_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (69), _NORM((_VAR##_5_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (70), _NORM((_VAR##_6_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (71), _NORM((_VAR##_7_8), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (72), _NORM((_VAR##_0_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (73), _NORM((_VAR##_1_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (74), _NORM((_VAR##_2_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (75), _NORM((_VAR##_3_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (76), _NORM((_VAR##_4_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (77), _NORM((_VAR##_5_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (78), _NORM((_VAR##_6_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (79), _NORM((_VAR##_7_9), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (80), _NORM((_VAR##_0_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (81), _NORM((_VAR##_1_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (82), _NORM((_VAR##_2_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (83), _NORM((_VAR##_3_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (84), _NORM((_VAR##_4_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (85), _NORM((_VAR##_5_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (86), _NORM((_VAR##_6_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (87), _NORM((_VAR##_7_10), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (88), _NORM((_VAR##_0_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (89), _NORM((_VAR##_1_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (90), _NORM((_VAR##_2_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (91), _NORM((_VAR##_3_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (92), _NORM((_VAR##_4_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (93), _NORM((_VAR##_5_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (94), _NORM((_VAR##_6_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (95), _NORM((_VAR##_7_11), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (96), _NORM((_VAR##_0_12), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (97), _NORM((_VAR##_1_12), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (98), _NORM((_VAR##_2_12), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (99), _NORM((_VAR##_3_12), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (100), _NORM((_VAR##_4_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (101), _NORM((_VAR##_5_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (102), _NORM((_VAR##_6_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (103), _NORM((_VAR##_7_12), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (104), _NORM((_VAR##_0_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (105), _NORM((_VAR##_1_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (106), _NORM((_VAR##_2_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (107), _NORM((_VAR##_3_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (108), _NORM((_VAR##_4_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (109), _NORM((_VAR##_5_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (110), _NORM((_VAR##_6_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (111), _NORM((_VAR##_7_13), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (112), _NORM((_VAR##_0_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (113), _NORM((_VAR##_1_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (114), _NORM((_VAR##_2_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (115), _NORM((_VAR##_3_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (116), _NORM((_VAR##_4_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (117), _NORM((_VAR##_5_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (118), _NORM((_VAR##_6_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (119), _NORM((_VAR##_7_14), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (120), _NORM((_VAR##_0_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (121), _NORM((_VAR##_1_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (122), _NORM((_VAR##_2_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (123), _NORM((_VAR##_3_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (124), _NORM((_VAR##_4_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (125), _NORM((_VAR##_5_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (126), _NORM((_VAR##_6_15), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (127), _NORM((_VAR##_7_15), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_8X32(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_8X16(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (128), _NORM((_VAR##_0_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (129), _NORM((_VAR##_1_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (130), _NORM((_VAR##_2_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (131), _NORM((_VAR##_3_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (132), _NORM((_VAR##_4_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (133), _NORM((_VAR##_5_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (134), _NORM((_VAR##_6_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (135), _NORM((_VAR##_7_16), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (136), _NORM((_VAR##_0_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (137), _NORM((_VAR##_1_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (138), _NORM((_VAR##_2_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (139), _NORM((_VAR##_3_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (140), _NORM((_VAR##_4_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (141), _NORM((_VAR##_5_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (142), _NORM((_VAR##_6_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (143), _NORM((_VAR##_7_17), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (144), _NORM((_VAR##_0_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (145), _NORM((_VAR##_1_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (146), _NORM((_VAR##_2_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (147), _NORM((_VAR##_3_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (148), _NORM((_VAR##_4_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (149), _NORM((_VAR##_5_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (150), _NORM((_VAR##_6_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (151), _NORM((_VAR##_7_18), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (152), _NORM((_VAR##_0_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (153), _NORM((_VAR##_1_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (154), _NORM((_VAR##_2_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (155), _NORM((_VAR##_3_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (156), _NORM((_VAR##_4_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (157), _NORM((_VAR##_5_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (158), _NORM((_VAR##_6_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (159), _NORM((_VAR##_7_19), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (160), _NORM((_VAR##_0_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (161), _NORM((_VAR##_1_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (162), _NORM((_VAR##_2_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (163), _NORM((_VAR##_3_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (164), _NORM((_VAR##_4_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (165), _NORM((_VAR##_5_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (166), _NORM((_VAR##_6_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (167), _NORM((_VAR##_7_20), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (168), _NORM((_VAR##_0_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (169), _NORM((_VAR##_1_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (170), _NORM((_VAR##_2_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (171), _NORM((_VAR##_3_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (172), _NORM((_VAR##_4_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (173), _NORM((_VAR##_5_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (174), _NORM((_VAR##_6_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (175), _NORM((_VAR##_7_21), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (176), _NORM((_VAR##_0_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (177), _NORM((_VAR##_1_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (178), _NORM((_VAR##_2_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (179), _NORM((_VAR##_3_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (180), _NORM((_VAR##_4_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (181), _NORM((_VAR##_5_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (182), _NORM((_VAR##_6_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (183), _NORM((_VAR##_7_22), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (184), _NORM((_VAR##_0_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (185), _NORM((_VAR##_1_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (186), _NORM((_VAR##_2_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (187), _NORM((_VAR##_3_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (188), _NORM((_VAR##_4_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (189), _NORM((_VAR##_5_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (190), _NORM((_VAR##_6_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (191), _NORM((_VAR##_7_23), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (192), _NORM((_VAR##_0_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (193), _NORM((_VAR##_1_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (194), _NORM((_VAR##_2_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (195), _NORM((_VAR##_3_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (196), _NORM((_VAR##_4_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (197), _NORM((_VAR##_5_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (198), _NORM((_VAR##_6_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (199), _NORM((_VAR##_7_24), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (200), _NORM((_VAR##_0_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (201), _NORM((_VAR##_1_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (202), _NORM((_VAR##_2_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (203), _NORM((_VAR##_3_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (204), _NORM((_VAR##_4_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (205), _NORM((_VAR##_5_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (206), _NORM((_VAR##_6_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (207), _NORM((_VAR##_7_25), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (208), _NORM((_VAR##_0_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (209), _NORM((_VAR##_1_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (210), _NORM((_VAR##_2_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (211), _NORM((_VAR##_3_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (212), _NORM((_VAR##_4_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (213), _NORM((_VAR##_5_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (214), _NORM((_VAR##_6_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (215), _NORM((_VAR##_7_26), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (216), _NORM((_VAR##_0_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (217), _NORM((_VAR##_1_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (218), _NORM((_VAR##_2_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (219), _NORM((_VAR##_3_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (220), _NORM((_VAR##_4_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (221), _NORM((_VAR##_5_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (222), _NORM((_VAR##_6_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (223), _NORM((_VAR##_7_27), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (224), _NORM((_VAR##_0_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (225), _NORM((_VAR##_1_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (226), _NORM((_VAR##_2_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (227), _NORM((_VAR##_3_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (228), _NORM((_VAR##_4_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (229), _NORM((_VAR##_5_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (230), _NORM((_VAR##_6_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (231), _NORM((_VAR##_7_28), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (232), _NORM((_VAR##_0_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (233), _NORM((_VAR##_1_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (234), _NORM((_VAR##_2_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (235), _NORM((_VAR##_3_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (236), _NORM((_VAR##_4_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (237), _NORM((_VAR##_5_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (238), _NORM((_VAR##_6_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (239), _NORM((_VAR##_7_29), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (240), _NORM((_VAR##_0_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (241), _NORM((_VAR##_1_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (242), _NORM((_VAR##_2_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (243), _NORM((_VAR##_3_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (244), _NORM((_VAR##_4_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (245), _NORM((_VAR##_5_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (246), _NORM((_VAR##_6_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (247), _NORM((_VAR##_7_30), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (248), _NORM((_VAR##_0_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (249), _NORM((_VAR##_1_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (250), _NORM((_VAR##_2_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (251), _NORM((_VAR##_3_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (252), _NORM((_VAR##_4_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (253), _NORM((_VAR##_5_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (254), _NORM((_VAR##_6_31), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (255), _NORM((_VAR##_7_31), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_16X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)    \
  MATRIX_VAR_STORE_8X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (8), _NORM((_VAR##_8_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (9), _NORM((_VAR##_9_0), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (10), _NORM((_VAR##_10_0), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (11), _NORM((_VAR##_11_0), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (12), _NORM((_VAR##_12_0), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (13), _NORM((_VAR##_13_0), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (14), _NORM((_VAR##_14_0), ##__VA_ARGS__)); \
  _STORE((_ARRAY) + (_STEP) * (15), _NORM((_VAR##_15_0), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_16X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_16X1(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (16), _NORM((_VAR##_0_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (17), _NORM((_VAR##_1_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (18), _NORM((_VAR##_2_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (19), _NORM((_VAR##_3_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (20), _NORM((_VAR##_4_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (21), _NORM((_VAR##_5_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (22), _NORM((_VAR##_6_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (23), _NORM((_VAR##_7_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (24), _NORM((_VAR##_8_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (25), _NORM((_VAR##_9_1), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (26), _NORM((_VAR##_10_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (27), _NORM((_VAR##_11_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (28), _NORM((_VAR##_12_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (29), _NORM((_VAR##_13_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (30), _NORM((_VAR##_14_1), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (31), _NORM((_VAR##_15_1), ##__VA_ARGS__));

#define MATRIX_VAR_STORE_16X4(_STEP, _VAR, _ARRAY, _STORE, _NORM, ...)     \
  MATRIX_VAR_STORE_16X2(_STEP, _VAR, _ARRAY, _STORE, _NORM, ##__VA_ARGS__) \
  _STORE((_ARRAY) + (_STEP) * (32), _NORM((_VAR##_0_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (33), _NORM((_VAR##_1_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (34), _NORM((_VAR##_2_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (35), _NORM((_VAR##_3_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (36), _NORM((_VAR##_4_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (37), _NORM((_VAR##_5_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (38), _NORM((_VAR##_6_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (39), _NORM((_VAR##_7_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (40), _NORM((_VAR##_8_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (41), _NORM((_VAR##_9_2), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (42), _NORM((_VAR##_10_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (43), _NORM((_VAR##_11_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (44), _NORM((_VAR##_12_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (45), _NORM((_VAR##_13_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (46), _NORM((_VAR##_14_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (47), _NORM((_VAR##_15_2), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (48), _NORM((_VAR##_0_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (49), _NORM((_VAR##_1_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (50), _NORM((_VAR##_2_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (51), _NORM((_VAR##_3_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (52), _NORM((_VAR##_4_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (53), _NORM((_VAR##_5_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (54), _NORM((_VAR##_6_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (55), _NORM((_VAR##_7_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (56), _NORM((_VAR##_8_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (57), _NORM((_VAR##_9_3), ##__VA_ARGS__));   \
  _STORE((_ARRAY) + (_STEP) * (58), _NORM((_VAR##_10_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (59), _NORM((_VAR##_11_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (60), _NORM((_VAR##_12_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (61), _NORM((_VAR##_13_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (62), _NORM((_VAR##_14_3), ##__VA_ARGS__));  \
  _STORE((_ARRAY) + (_STEP) * (63), _NORM((_VAR##_15_3), ##__VA_ARGS__));

#define MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ...) \
  (_VAR##_0_0) = _PERMUTE((_VAR##_0_0), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_1X2(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_1X4(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_1X2(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \
  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_1X8(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_1X4(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \
  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \
  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \
  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_1X16(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_1X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \
  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \
  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \
  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \
  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \
  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \
  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \
  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \
  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_1X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_1_0) = _PERMUTE((_VAR##_1_0), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X2(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \
  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X4(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_2X2(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \
  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \
  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \
  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X8(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_2X4(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \
  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \
  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \
  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \
  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \
  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \
  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \
  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X16(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_2X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \
  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \
  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \
  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \
  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \
  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \
  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \
  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \
  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \
  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \
  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \
  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \
  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \
  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \
  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \
  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \
  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_2X32(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_2X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \
  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \
  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \
  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \
  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \
  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \
  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \
  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \
  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \
  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \
  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \
  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \
  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \
  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \
  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \
  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \
  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \
  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \
  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \
  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \
  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \
  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \
  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \
  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \
  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \
  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \
  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \
  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \
  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \
  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \
  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \
  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \
  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_2X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_2_0) = _PERMUTE((_VAR##_2_0), ##__VA_ARGS__); \
  (_VAR##_3_0) = _PERMUTE((_VAR##_3_0), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X2(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \
  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__); \
  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__); \
  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X4(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_4X2(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \
  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \
  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__); \
  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__); \
  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \
  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__); \
  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__); \
  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X8(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_4X4(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \
  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \
  (_VAR##_2_4) = _PERMUTE((_VAR##_2_4), ##__VA_ARGS__); \
  (_VAR##_3_4) = _PERMUTE((_VAR##_3_4), ##__VA_ARGS__); \
  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \
  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \
  (_VAR##_2_5) = _PERMUTE((_VAR##_2_5), ##__VA_ARGS__); \
  (_VAR##_3_5) = _PERMUTE((_VAR##_3_5), ##__VA_ARGS__); \
  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \
  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \
  (_VAR##_2_6) = _PERMUTE((_VAR##_2_6), ##__VA_ARGS__); \
  (_VAR##_3_6) = _PERMUTE((_VAR##_3_6), ##__VA_ARGS__); \
  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \
  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__); \
  (_VAR##_2_7) = _PERMUTE((_VAR##_2_7), ##__VA_ARGS__); \
  (_VAR##_3_7) = _PERMUTE((_VAR##_3_7), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X16(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_4X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \
  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \
  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \
  (_VAR##_2_8) = _PERMUTE((_VAR##_2_8), ##__VA_ARGS__);   \
  (_VAR##_3_8) = _PERMUTE((_VAR##_3_8), ##__VA_ARGS__);   \
  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \
  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \
  (_VAR##_2_9) = _PERMUTE((_VAR##_2_9), ##__VA_ARGS__);   \
  (_VAR##_3_9) = _PERMUTE((_VAR##_3_9), ##__VA_ARGS__);   \
  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \
  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \
  (_VAR##_2_10) = _PERMUTE((_VAR##_2_10), ##__VA_ARGS__); \
  (_VAR##_3_10) = _PERMUTE((_VAR##_3_10), ##__VA_ARGS__); \
  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \
  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \
  (_VAR##_2_11) = _PERMUTE((_VAR##_2_11), ##__VA_ARGS__); \
  (_VAR##_3_11) = _PERMUTE((_VAR##_3_11), ##__VA_ARGS__); \
  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \
  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \
  (_VAR##_2_12) = _PERMUTE((_VAR##_2_12), ##__VA_ARGS__); \
  (_VAR##_3_12) = _PERMUTE((_VAR##_3_12), ##__VA_ARGS__); \
  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \
  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \
  (_VAR##_2_13) = _PERMUTE((_VAR##_2_13), ##__VA_ARGS__); \
  (_VAR##_3_13) = _PERMUTE((_VAR##_3_13), ##__VA_ARGS__); \
  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \
  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \
  (_VAR##_2_14) = _PERMUTE((_VAR##_2_14), ##__VA_ARGS__); \
  (_VAR##_3_14) = _PERMUTE((_VAR##_3_14), ##__VA_ARGS__); \
  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \
  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__); \
  (_VAR##_2_15) = _PERMUTE((_VAR##_2_15), ##__VA_ARGS__); \
  (_VAR##_3_15) = _PERMUTE((_VAR##_3_15), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_4X32(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_4X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \
  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \
  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \
  (_VAR##_2_16) = _PERMUTE((_VAR##_2_16), ##__VA_ARGS__); \
  (_VAR##_3_16) = _PERMUTE((_VAR##_3_16), ##__VA_ARGS__); \
  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \
  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \
  (_VAR##_2_17) = _PERMUTE((_VAR##_2_17), ##__VA_ARGS__); \
  (_VAR##_3_17) = _PERMUTE((_VAR##_3_17), ##__VA_ARGS__); \
  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \
  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \
  (_VAR##_2_18) = _PERMUTE((_VAR##_2_18), ##__VA_ARGS__); \
  (_VAR##_3_18) = _PERMUTE((_VAR##_3_18), ##__VA_ARGS__); \
  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \
  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \
  (_VAR##_2_19) = _PERMUTE((_VAR##_2_19), ##__VA_ARGS__); \
  (_VAR##_3_19) = _PERMUTE((_VAR##_3_19), ##__VA_ARGS__); \
  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \
  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \
  (_VAR##_2_20) = _PERMUTE((_VAR##_2_20), ##__VA_ARGS__); \
  (_VAR##_3_20) = _PERMUTE((_VAR##_3_20), ##__VA_ARGS__); \
  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \
  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \
  (_VAR##_2_21) = _PERMUTE((_VAR##_2_21), ##__VA_ARGS__); \
  (_VAR##_3_21) = _PERMUTE((_VAR##_3_21), ##__VA_ARGS__); \
  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \
  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \
  (_VAR##_2_22) = _PERMUTE((_VAR##_2_22), ##__VA_ARGS__); \
  (_VAR##_3_22) = _PERMUTE((_VAR##_3_22), ##__VA_ARGS__); \
  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \
  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \
  (_VAR##_2_23) = _PERMUTE((_VAR##_2_23), ##__VA_ARGS__); \
  (_VAR##_3_23) = _PERMUTE((_VAR##_3_23), ##__VA_ARGS__); \
  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \
  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \
  (_VAR##_2_24) = _PERMUTE((_VAR##_2_24), ##__VA_ARGS__); \
  (_VAR##_3_24) = _PERMUTE((_VAR##_3_24), ##__VA_ARGS__); \
  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \
  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \
  (_VAR##_2_25) = _PERMUTE((_VAR##_2_25), ##__VA_ARGS__); \
  (_VAR##_3_25) = _PERMUTE((_VAR##_3_25), ##__VA_ARGS__); \
  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \
  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \
  (_VAR##_2_26) = _PERMUTE((_VAR##_2_26), ##__VA_ARGS__); \
  (_VAR##_3_26) = _PERMUTE((_VAR##_3_26), ##__VA_ARGS__); \
  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \
  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \
  (_VAR##_2_27) = _PERMUTE((_VAR##_2_27), ##__VA_ARGS__); \
  (_VAR##_3_27) = _PERMUTE((_VAR##_3_27), ##__VA_ARGS__); \
  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \
  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \
  (_VAR##_2_28) = _PERMUTE((_VAR##_2_28), ##__VA_ARGS__); \
  (_VAR##_3_28) = _PERMUTE((_VAR##_3_28), ##__VA_ARGS__); \
  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \
  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \
  (_VAR##_2_29) = _PERMUTE((_VAR##_2_29), ##__VA_ARGS__); \
  (_VAR##_3_29) = _PERMUTE((_VAR##_3_29), ##__VA_ARGS__); \
  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \
  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \
  (_VAR##_2_30) = _PERMUTE((_VAR##_2_30), ##__VA_ARGS__); \
  (_VAR##_3_30) = _PERMUTE((_VAR##_3_30), ##__VA_ARGS__); \
  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \
  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__); \
  (_VAR##_2_31) = _PERMUTE((_VAR##_2_31), ##__VA_ARGS__); \
  (_VAR##_3_31) = _PERMUTE((_VAR##_3_31), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_4X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_4_0) = _PERMUTE((_VAR##_4_0), ##__VA_ARGS__); \
  (_VAR##_5_0) = _PERMUTE((_VAR##_5_0), ##__VA_ARGS__); \
  (_VAR##_6_0) = _PERMUTE((_VAR##_6_0), ##__VA_ARGS__); \
  (_VAR##_7_0) = _PERMUTE((_VAR##_7_0), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X2(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__); \
  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__); \
  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__); \
  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__); \
  (_VAR##_4_1) = _PERMUTE((_VAR##_4_1), ##__VA_ARGS__); \
  (_VAR##_5_1) = _PERMUTE((_VAR##_5_1), ##__VA_ARGS__); \
  (_VAR##_6_1) = _PERMUTE((_VAR##_6_1), ##__VA_ARGS__); \
  (_VAR##_7_1) = _PERMUTE((_VAR##_7_1), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X4(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_8X2(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__); \
  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__); \
  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__); \
  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__); \
  (_VAR##_4_2) = _PERMUTE((_VAR##_4_2), ##__VA_ARGS__); \
  (_VAR##_5_2) = _PERMUTE((_VAR##_5_2), ##__VA_ARGS__); \
  (_VAR##_6_2) = _PERMUTE((_VAR##_6_2), ##__VA_ARGS__); \
  (_VAR##_7_2) = _PERMUTE((_VAR##_7_2), ##__VA_ARGS__); \
  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__); \
  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__); \
  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__); \
  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__); \
  (_VAR##_4_3) = _PERMUTE((_VAR##_4_3), ##__VA_ARGS__); \
  (_VAR##_5_3) = _PERMUTE((_VAR##_5_3), ##__VA_ARGS__); \
  (_VAR##_6_3) = _PERMUTE((_VAR##_6_3), ##__VA_ARGS__); \
  (_VAR##_7_3) = _PERMUTE((_VAR##_7_3), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X8(_VAR, _PERMUTE, ...)     \
  MATRIX_VAR_PERMUTE_8X4(_VAR, _PERMUTE, ##__VA_ARGS__) \
  (_VAR##_0_4) = _PERMUTE((_VAR##_0_4), ##__VA_ARGS__); \
  (_VAR##_1_4) = _PERMUTE((_VAR##_1_4), ##__VA_ARGS__); \
  (_VAR##_2_4) = _PERMUTE((_VAR##_2_4), ##__VA_ARGS__); \
  (_VAR##_3_4) = _PERMUTE((_VAR##_3_4), ##__VA_ARGS__); \
  (_VAR##_4_4) = _PERMUTE((_VAR##_4_4), ##__VA_ARGS__); \
  (_VAR##_5_4) = _PERMUTE((_VAR##_5_4), ##__VA_ARGS__); \
  (_VAR##_6_4) = _PERMUTE((_VAR##_6_4), ##__VA_ARGS__); \
  (_VAR##_7_4) = _PERMUTE((_VAR##_7_4), ##__VA_ARGS__); \
  (_VAR##_0_5) = _PERMUTE((_VAR##_0_5), ##__VA_ARGS__); \
  (_VAR##_1_5) = _PERMUTE((_VAR##_1_5), ##__VA_ARGS__); \
  (_VAR##_2_5) = _PERMUTE((_VAR##_2_5), ##__VA_ARGS__); \
  (_VAR##_3_5) = _PERMUTE((_VAR##_3_5), ##__VA_ARGS__); \
  (_VAR##_4_5) = _PERMUTE((_VAR##_4_5), ##__VA_ARGS__); \
  (_VAR##_5_5) = _PERMUTE((_VAR##_5_5), ##__VA_ARGS__); \
  (_VAR##_6_5) = _PERMUTE((_VAR##_6_5), ##__VA_ARGS__); \
  (_VAR##_7_5) = _PERMUTE((_VAR##_7_5), ##__VA_ARGS__); \
  (_VAR##_0_6) = _PERMUTE((_VAR##_0_6), ##__VA_ARGS__); \
  (_VAR##_1_6) = _PERMUTE((_VAR##_1_6), ##__VA_ARGS__); \
  (_VAR##_2_6) = _PERMUTE((_VAR##_2_6), ##__VA_ARGS__); \
  (_VAR##_3_6) = _PERMUTE((_VAR##_3_6), ##__VA_ARGS__); \
  (_VAR##_4_6) = _PERMUTE((_VAR##_4_6), ##__VA_ARGS__); \
  (_VAR##_5_6) = _PERMUTE((_VAR##_5_6), ##__VA_ARGS__); \
  (_VAR##_6_6) = _PERMUTE((_VAR##_6_6), ##__VA_ARGS__); \
  (_VAR##_7_6) = _PERMUTE((_VAR##_7_6), ##__VA_ARGS__); \
  (_VAR##_0_7) = _PERMUTE((_VAR##_0_7), ##__VA_ARGS__); \
  (_VAR##_1_7) = _PERMUTE((_VAR##_1_7), ##__VA_ARGS__); \
  (_VAR##_2_7) = _PERMUTE((_VAR##_2_7), ##__VA_ARGS__); \
  (_VAR##_3_7) = _PERMUTE((_VAR##_3_7), ##__VA_ARGS__); \
  (_VAR##_4_7) = _PERMUTE((_VAR##_4_7), ##__VA_ARGS__); \
  (_VAR##_5_7) = _PERMUTE((_VAR##_5_7), ##__VA_ARGS__); \
  (_VAR##_6_7) = _PERMUTE((_VAR##_6_7), ##__VA_ARGS__); \
  (_VAR##_7_7) = _PERMUTE((_VAR##_7_7), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X16(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_8X8(_VAR, _PERMUTE, ##__VA_ARGS__)   \
  (_VAR##_0_8) = _PERMUTE((_VAR##_0_8), ##__VA_ARGS__);   \
  (_VAR##_1_8) = _PERMUTE((_VAR##_1_8), ##__VA_ARGS__);   \
  (_VAR##_2_8) = _PERMUTE((_VAR##_2_8), ##__VA_ARGS__);   \
  (_VAR##_3_8) = _PERMUTE((_VAR##_3_8), ##__VA_ARGS__);   \
  (_VAR##_4_8) = _PERMUTE((_VAR##_4_8), ##__VA_ARGS__);   \
  (_VAR##_5_8) = _PERMUTE((_VAR##_5_8), ##__VA_ARGS__);   \
  (_VAR##_6_8) = _PERMUTE((_VAR##_6_8), ##__VA_ARGS__);   \
  (_VAR##_7_8) = _PERMUTE((_VAR##_7_8), ##__VA_ARGS__);   \
  (_VAR##_0_9) = _PERMUTE((_VAR##_0_9), ##__VA_ARGS__);   \
  (_VAR##_1_9) = _PERMUTE((_VAR##_1_9), ##__VA_ARGS__);   \
  (_VAR##_2_9) = _PERMUTE((_VAR##_2_9), ##__VA_ARGS__);   \
  (_VAR##_3_9) = _PERMUTE((_VAR##_3_9), ##__VA_ARGS__);   \
  (_VAR##_4_9) = _PERMUTE((_VAR##_4_9), ##__VA_ARGS__);   \
  (_VAR##_5_9) = _PERMUTE((_VAR##_5_9), ##__VA_ARGS__);   \
  (_VAR##_6_9) = _PERMUTE((_VAR##_6_9), ##__VA_ARGS__);   \
  (_VAR##_7_9) = _PERMUTE((_VAR##_7_9), ##__VA_ARGS__);   \
  (_VAR##_0_10) = _PERMUTE((_VAR##_0_10), ##__VA_ARGS__); \
  (_VAR##_1_10) = _PERMUTE((_VAR##_1_10), ##__VA_ARGS__); \
  (_VAR##_2_10) = _PERMUTE((_VAR##_2_10), ##__VA_ARGS__); \
  (_VAR##_3_10) = _PERMUTE((_VAR##_3_10), ##__VA_ARGS__); \
  (_VAR##_4_10) = _PERMUTE((_VAR##_4_10), ##__VA_ARGS__); \
  (_VAR##_5_10) = _PERMUTE((_VAR##_5_10), ##__VA_ARGS__); \
  (_VAR##_6_10) = _PERMUTE((_VAR##_6_10), ##__VA_ARGS__); \
  (_VAR##_7_10) = _PERMUTE((_VAR##_7_10), ##__VA_ARGS__); \
  (_VAR##_0_11) = _PERMUTE((_VAR##_0_11), ##__VA_ARGS__); \
  (_VAR##_1_11) = _PERMUTE((_VAR##_1_11), ##__VA_ARGS__); \
  (_VAR##_2_11) = _PERMUTE((_VAR##_2_11), ##__VA_ARGS__); \
  (_VAR##_3_11) = _PERMUTE((_VAR##_3_11), ##__VA_ARGS__); \
  (_VAR##_4_11) = _PERMUTE((_VAR##_4_11), ##__VA_ARGS__); \
  (_VAR##_5_11) = _PERMUTE((_VAR##_5_11), ##__VA_ARGS__); \
  (_VAR##_6_11) = _PERMUTE((_VAR##_6_11), ##__VA_ARGS__); \
  (_VAR##_7_11) = _PERMUTE((_VAR##_7_11), ##__VA_ARGS__); \
  (_VAR##_0_12) = _PERMUTE((_VAR##_0_12), ##__VA_ARGS__); \
  (_VAR##_1_12) = _PERMUTE((_VAR##_1_12), ##__VA_ARGS__); \
  (_VAR##_2_12) = _PERMUTE((_VAR##_2_12), ##__VA_ARGS__); \
  (_VAR##_3_12) = _PERMUTE((_VAR##_3_12), ##__VA_ARGS__); \
  (_VAR##_4_12) = _PERMUTE((_VAR##_4_12), ##__VA_ARGS__); \
  (_VAR##_5_12) = _PERMUTE((_VAR##_5_12), ##__VA_ARGS__); \
  (_VAR##_6_12) = _PERMUTE((_VAR##_6_12), ##__VA_ARGS__); \
  (_VAR##_7_12) = _PERMUTE((_VAR##_7_12), ##__VA_ARGS__); \
  (_VAR##_0_13) = _PERMUTE((_VAR##_0_13), ##__VA_ARGS__); \
  (_VAR##_1_13) = _PERMUTE((_VAR##_1_13), ##__VA_ARGS__); \
  (_VAR##_2_13) = _PERMUTE((_VAR##_2_13), ##__VA_ARGS__); \
  (_VAR##_3_13) = _PERMUTE((_VAR##_3_13), ##__VA_ARGS__); \
  (_VAR##_4_13) = _PERMUTE((_VAR##_4_13), ##__VA_ARGS__); \
  (_VAR##_5_13) = _PERMUTE((_VAR##_5_13), ##__VA_ARGS__); \
  (_VAR##_6_13) = _PERMUTE((_VAR##_6_13), ##__VA_ARGS__); \
  (_VAR##_7_13) = _PERMUTE((_VAR##_7_13), ##__VA_ARGS__); \
  (_VAR##_0_14) = _PERMUTE((_VAR##_0_14), ##__VA_ARGS__); \
  (_VAR##_1_14) = _PERMUTE((_VAR##_1_14), ##__VA_ARGS__); \
  (_VAR##_2_14) = _PERMUTE((_VAR##_2_14), ##__VA_ARGS__); \
  (_VAR##_3_14) = _PERMUTE((_VAR##_3_14), ##__VA_ARGS__); \
  (_VAR##_4_14) = _PERMUTE((_VAR##_4_14), ##__VA_ARGS__); \
  (_VAR##_5_14) = _PERMUTE((_VAR##_5_14), ##__VA_ARGS__); \
  (_VAR##_6_14) = _PERMUTE((_VAR##_6_14), ##__VA_ARGS__); \
  (_VAR##_7_14) = _PERMUTE((_VAR##_7_14), ##__VA_ARGS__); \
  (_VAR##_0_15) = _PERMUTE((_VAR##_0_15), ##__VA_ARGS__); \
  (_VAR##_1_15) = _PERMUTE((_VAR##_1_15), ##__VA_ARGS__); \
  (_VAR##_2_15) = _PERMUTE((_VAR##_2_15), ##__VA_ARGS__); \
  (_VAR##_3_15) = _PERMUTE((_VAR##_3_15), ##__VA_ARGS__); \
  (_VAR##_4_15) = _PERMUTE((_VAR##_4_15), ##__VA_ARGS__); \
  (_VAR##_5_15) = _PERMUTE((_VAR##_5_15), ##__VA_ARGS__); \
  (_VAR##_6_15) = _PERMUTE((_VAR##_6_15), ##__VA_ARGS__); \
  (_VAR##_7_15) = _PERMUTE((_VAR##_7_15), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_8X32(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_8X16(_VAR, _PERMUTE, ##__VA_ARGS__)  \
  (_VAR##_0_16) = _PERMUTE((_VAR##_0_16), ##__VA_ARGS__); \
  (_VAR##_1_16) = _PERMUTE((_VAR##_1_16), ##__VA_ARGS__); \
  (_VAR##_2_16) = _PERMUTE((_VAR##_2_16), ##__VA_ARGS__); \
  (_VAR##_3_16) = _PERMUTE((_VAR##_3_16), ##__VA_ARGS__); \
  (_VAR##_4_16) = _PERMUTE((_VAR##_4_16), ##__VA_ARGS__); \
  (_VAR##_5_16) = _PERMUTE((_VAR##_5_16), ##__VA_ARGS__); \
  (_VAR##_6_16) = _PERMUTE((_VAR##_6_16), ##__VA_ARGS__); \
  (_VAR##_7_16) = _PERMUTE((_VAR##_7_16), ##__VA_ARGS__); \
  (_VAR##_0_17) = _PERMUTE((_VAR##_0_17), ##__VA_ARGS__); \
  (_VAR##_1_17) = _PERMUTE((_VAR##_1_17), ##__VA_ARGS__); \
  (_VAR##_2_17) = _PERMUTE((_VAR##_2_17), ##__VA_ARGS__); \
  (_VAR##_3_17) = _PERMUTE((_VAR##_3_17), ##__VA_ARGS__); \
  (_VAR##_4_17) = _PERMUTE((_VAR##_4_17), ##__VA_ARGS__); \
  (_VAR##_5_17) = _PERMUTE((_VAR##_5_17), ##__VA_ARGS__); \
  (_VAR##_6_17) = _PERMUTE((_VAR##_6_17), ##__VA_ARGS__); \
  (_VAR##_7_17) = _PERMUTE((_VAR##_7_17), ##__VA_ARGS__); \
  (_VAR##_0_18) = _PERMUTE((_VAR##_0_18), ##__VA_ARGS__); \
  (_VAR##_1_18) = _PERMUTE((_VAR##_1_18), ##__VA_ARGS__); \
  (_VAR##_2_18) = _PERMUTE((_VAR##_2_18), ##__VA_ARGS__); \
  (_VAR##_3_18) = _PERMUTE((_VAR##_3_18), ##__VA_ARGS__); \
  (_VAR##_4_18) = _PERMUTE((_VAR##_4_18), ##__VA_ARGS__); \
  (_VAR##_5_18) = _PERMUTE((_VAR##_5_18), ##__VA_ARGS__); \
  (_VAR##_6_18) = _PERMUTE((_VAR##_6_18), ##__VA_ARGS__); \
  (_VAR##_7_18) = _PERMUTE((_VAR##_7_18), ##__VA_ARGS__); \
  (_VAR##_0_19) = _PERMUTE((_VAR##_0_19), ##__VA_ARGS__); \
  (_VAR##_1_19) = _PERMUTE((_VAR##_1_19), ##__VA_ARGS__); \
  (_VAR##_2_19) = _PERMUTE((_VAR##_2_19), ##__VA_ARGS__); \
  (_VAR##_3_19) = _PERMUTE((_VAR##_3_19), ##__VA_ARGS__); \
  (_VAR##_4_19) = _PERMUTE((_VAR##_4_19), ##__VA_ARGS__); \
  (_VAR##_5_19) = _PERMUTE((_VAR##_5_19), ##__VA_ARGS__); \
  (_VAR##_6_19) = _PERMUTE((_VAR##_6_19), ##__VA_ARGS__); \
  (_VAR##_7_19) = _PERMUTE((_VAR##_7_19), ##__VA_ARGS__); \
  (_VAR##_0_20) = _PERMUTE((_VAR##_0_20), ##__VA_ARGS__); \
  (_VAR##_1_20) = _PERMUTE((_VAR##_1_20), ##__VA_ARGS__); \
  (_VAR##_2_20) = _PERMUTE((_VAR##_2_20), ##__VA_ARGS__); \
  (_VAR##_3_20) = _PERMUTE((_VAR##_3_20), ##__VA_ARGS__); \
  (_VAR##_4_20) = _PERMUTE((_VAR##_4_20), ##__VA_ARGS__); \
  (_VAR##_5_20) = _PERMUTE((_VAR##_5_20), ##__VA_ARGS__); \
  (_VAR##_6_20) = _PERMUTE((_VAR##_6_20), ##__VA_ARGS__); \
  (_VAR##_7_20) = _PERMUTE((_VAR##_7_20), ##__VA_ARGS__); \
  (_VAR##_0_21) = _PERMUTE((_VAR##_0_21), ##__VA_ARGS__); \
  (_VAR##_1_21) = _PERMUTE((_VAR##_1_21), ##__VA_ARGS__); \
  (_VAR##_2_21) = _PERMUTE((_VAR##_2_21), ##__VA_ARGS__); \
  (_VAR##_3_21) = _PERMUTE((_VAR##_3_21), ##__VA_ARGS__); \
  (_VAR##_4_21) = _PERMUTE((_VAR##_4_21), ##__VA_ARGS__); \
  (_VAR##_5_21) = _PERMUTE((_VAR##_5_21), ##__VA_ARGS__); \
  (_VAR##_6_21) = _PERMUTE((_VAR##_6_21), ##__VA_ARGS__); \
  (_VAR##_7_21) = _PERMUTE((_VAR##_7_21), ##__VA_ARGS__); \
  (_VAR##_0_22) = _PERMUTE((_VAR##_0_22), ##__VA_ARGS__); \
  (_VAR##_1_22) = _PERMUTE((_VAR##_1_22), ##__VA_ARGS__); \
  (_VAR##_2_22) = _PERMUTE((_VAR##_2_22), ##__VA_ARGS__); \
  (_VAR##_3_22) = _PERMUTE((_VAR##_3_22), ##__VA_ARGS__); \
  (_VAR##_4_22) = _PERMUTE((_VAR##_4_22), ##__VA_ARGS__); \
  (_VAR##_5_22) = _PERMUTE((_VAR##_5_22), ##__VA_ARGS__); \
  (_VAR##_6_22) = _PERMUTE((_VAR##_6_22), ##__VA_ARGS__); \
  (_VAR##_7_22) = _PERMUTE((_VAR##_7_22), ##__VA_ARGS__); \
  (_VAR##_0_23) = _PERMUTE((_VAR##_0_23), ##__VA_ARGS__); \
  (_VAR##_1_23) = _PERMUTE((_VAR##_1_23), ##__VA_ARGS__); \
  (_VAR##_2_23) = _PERMUTE((_VAR##_2_23), ##__VA_ARGS__); \
  (_VAR##_3_23) = _PERMUTE((_VAR##_3_23), ##__VA_ARGS__); \
  (_VAR##_4_23) = _PERMUTE((_VAR##_4_23), ##__VA_ARGS__); \
  (_VAR##_5_23) = _PERMUTE((_VAR##_5_23), ##__VA_ARGS__); \
  (_VAR##_6_23) = _PERMUTE((_VAR##_6_23), ##__VA_ARGS__); \
  (_VAR##_7_23) = _PERMUTE((_VAR##_7_23), ##__VA_ARGS__); \
  (_VAR##_0_24) = _PERMUTE((_VAR##_0_24), ##__VA_ARGS__); \
  (_VAR##_1_24) = _PERMUTE((_VAR##_1_24), ##__VA_ARGS__); \
  (_VAR##_2_24) = _PERMUTE((_VAR##_2_24), ##__VA_ARGS__); \
  (_VAR##_3_24) = _PERMUTE((_VAR##_3_24), ##__VA_ARGS__); \
  (_VAR##_4_24) = _PERMUTE((_VAR##_4_24), ##__VA_ARGS__); \
  (_VAR##_5_24) = _PERMUTE((_VAR##_5_24), ##__VA_ARGS__); \
  (_VAR##_6_24) = _PERMUTE((_VAR##_6_24), ##__VA_ARGS__); \
  (_VAR##_7_24) = _PERMUTE((_VAR##_7_24), ##__VA_ARGS__); \
  (_VAR##_0_25) = _PERMUTE((_VAR##_0_25), ##__VA_ARGS__); \
  (_VAR##_1_25) = _PERMUTE((_VAR##_1_25), ##__VA_ARGS__); \
  (_VAR##_2_25) = _PERMUTE((_VAR##_2_25), ##__VA_ARGS__); \
  (_VAR##_3_25) = _PERMUTE((_VAR##_3_25), ##__VA_ARGS__); \
  (_VAR##_4_25) = _PERMUTE((_VAR##_4_25), ##__VA_ARGS__); \
  (_VAR##_5_25) = _PERMUTE((_VAR##_5_25), ##__VA_ARGS__); \
  (_VAR##_6_25) = _PERMUTE((_VAR##_6_25), ##__VA_ARGS__); \
  (_VAR##_7_25) = _PERMUTE((_VAR##_7_25), ##__VA_ARGS__); \
  (_VAR##_0_26) = _PERMUTE((_VAR##_0_26), ##__VA_ARGS__); \
  (_VAR##_1_26) = _PERMUTE((_VAR##_1_26), ##__VA_ARGS__); \
  (_VAR##_2_26) = _PERMUTE((_VAR##_2_26), ##__VA_ARGS__); \
  (_VAR##_3_26) = _PERMUTE((_VAR##_3_26), ##__VA_ARGS__); \
  (_VAR##_4_26) = _PERMUTE((_VAR##_4_26), ##__VA_ARGS__); \
  (_VAR##_5_26) = _PERMUTE((_VAR##_5_26), ##__VA_ARGS__); \
  (_VAR##_6_26) = _PERMUTE((_VAR##_6_26), ##__VA_ARGS__); \
  (_VAR##_7_26) = _PERMUTE((_VAR##_7_26), ##__VA_ARGS__); \
  (_VAR##_0_27) = _PERMUTE((_VAR##_0_27), ##__VA_ARGS__); \
  (_VAR##_1_27) = _PERMUTE((_VAR##_1_27), ##__VA_ARGS__); \
  (_VAR##_2_27) = _PERMUTE((_VAR##_2_27), ##__VA_ARGS__); \
  (_VAR##_3_27) = _PERMUTE((_VAR##_3_27), ##__VA_ARGS__); \
  (_VAR##_4_27) = _PERMUTE((_VAR##_4_27), ##__VA_ARGS__); \
  (_VAR##_5_27) = _PERMUTE((_VAR##_5_27), ##__VA_ARGS__); \
  (_VAR##_6_27) = _PERMUTE((_VAR##_6_27), ##__VA_ARGS__); \
  (_VAR##_7_27) = _PERMUTE((_VAR##_7_27), ##__VA_ARGS__); \
  (_VAR##_0_28) = _PERMUTE((_VAR##_0_28), ##__VA_ARGS__); \
  (_VAR##_1_28) = _PERMUTE((_VAR##_1_28), ##__VA_ARGS__); \
  (_VAR##_2_28) = _PERMUTE((_VAR##_2_28), ##__VA_ARGS__); \
  (_VAR##_3_28) = _PERMUTE((_VAR##_3_28), ##__VA_ARGS__); \
  (_VAR##_4_28) = _PERMUTE((_VAR##_4_28), ##__VA_ARGS__); \
  (_VAR##_5_28) = _PERMUTE((_VAR##_5_28), ##__VA_ARGS__); \
  (_VAR##_6_28) = _PERMUTE((_VAR##_6_28), ##__VA_ARGS__); \
  (_VAR##_7_28) = _PERMUTE((_VAR##_7_28), ##__VA_ARGS__); \
  (_VAR##_0_29) = _PERMUTE((_VAR##_0_29), ##__VA_ARGS__); \
  (_VAR##_1_29) = _PERMUTE((_VAR##_1_29), ##__VA_ARGS__); \
  (_VAR##_2_29) = _PERMUTE((_VAR##_2_29), ##__VA_ARGS__); \
  (_VAR##_3_29) = _PERMUTE((_VAR##_3_29), ##__VA_ARGS__); \
  (_VAR##_4_29) = _PERMUTE((_VAR##_4_29), ##__VA_ARGS__); \
  (_VAR##_5_29) = _PERMUTE((_VAR##_5_29), ##__VA_ARGS__); \
  (_VAR##_6_29) = _PERMUTE((_VAR##_6_29), ##__VA_ARGS__); \
  (_VAR##_7_29) = _PERMUTE((_VAR##_7_29), ##__VA_ARGS__); \
  (_VAR##_0_30) = _PERMUTE((_VAR##_0_30), ##__VA_ARGS__); \
  (_VAR##_1_30) = _PERMUTE((_VAR##_1_30), ##__VA_ARGS__); \
  (_VAR##_2_30) = _PERMUTE((_VAR##_2_30), ##__VA_ARGS__); \
  (_VAR##_3_30) = _PERMUTE((_VAR##_3_30), ##__VA_ARGS__); \
  (_VAR##_4_30) = _PERMUTE((_VAR##_4_30), ##__VA_ARGS__); \
  (_VAR##_5_30) = _PERMUTE((_VAR##_5_30), ##__VA_ARGS__); \
  (_VAR##_6_30) = _PERMUTE((_VAR##_6_30), ##__VA_ARGS__); \
  (_VAR##_7_30) = _PERMUTE((_VAR##_7_30), ##__VA_ARGS__); \
  (_VAR##_0_31) = _PERMUTE((_VAR##_0_31), ##__VA_ARGS__); \
  (_VAR##_1_31) = _PERMUTE((_VAR##_1_31), ##__VA_ARGS__); \
  (_VAR##_2_31) = _PERMUTE((_VAR##_2_31), ##__VA_ARGS__); \
  (_VAR##_3_31) = _PERMUTE((_VAR##_3_31), ##__VA_ARGS__); \
  (_VAR##_4_31) = _PERMUTE((_VAR##_4_31), ##__VA_ARGS__); \
  (_VAR##_5_31) = _PERMUTE((_VAR##_5_31), ##__VA_ARGS__); \
  (_VAR##_6_31) = _PERMUTE((_VAR##_6_31), ##__VA_ARGS__); \
  (_VAR##_7_31) = _PERMUTE((_VAR##_7_31), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_16X1(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_8X1(_VAR, _PERMUTE, ##__VA_ARGS__)   \
  (_VAR##_8_0) = _PERMUTE((_VAR##_8_0), ##__VA_ARGS__);   \
  (_VAR##_9_0) = _PERMUTE((_VAR##_9_0), ##__VA_ARGS__);   \
  (_VAR##_10_0) = _PERMUTE((_VAR##_10_0), ##__VA_ARGS__); \
  (_VAR##_11_0) = _PERMUTE((_VAR##_11_0), ##__VA_ARGS__); \
  (_VAR##_12_0) = _PERMUTE((_VAR##_12_0), ##__VA_ARGS__); \
  (_VAR##_13_0) = _PERMUTE((_VAR##_13_0), ##__VA_ARGS__); \
  (_VAR##_14_0) = _PERMUTE((_VAR##_14_0), ##__VA_ARGS__); \
  (_VAR##_15_0) = _PERMUTE((_VAR##_15_0), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_16X2(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_16X1(_VAR, _PERMUTE, ##__VA_ARGS__)  \
  (_VAR##_0_1) = _PERMUTE((_VAR##_0_1), ##__VA_ARGS__);   \
  (_VAR##_1_1) = _PERMUTE((_VAR##_1_1), ##__VA_ARGS__);   \
  (_VAR##_2_1) = _PERMUTE((_VAR##_2_1), ##__VA_ARGS__);   \
  (_VAR##_3_1) = _PERMUTE((_VAR##_3_1), ##__VA_ARGS__);   \
  (_VAR##_4_1) = _PERMUTE((_VAR##_4_1), ##__VA_ARGS__);   \
  (_VAR##_5_1) = _PERMUTE((_VAR##_5_1), ##__VA_ARGS__);   \
  (_VAR##_6_1) = _PERMUTE((_VAR##_6_1), ##__VA_ARGS__);   \
  (_VAR##_7_1) = _PERMUTE((_VAR##_7_1), ##__VA_ARGS__);   \
  (_VAR##_8_1) = _PERMUTE((_VAR##_8_1), ##__VA_ARGS__);   \
  (_VAR##_9_1) = _PERMUTE((_VAR##_9_1), ##__VA_ARGS__);   \
  (_VAR##_10_1) = _PERMUTE((_VAR##_10_1), ##__VA_ARGS__); \
  (_VAR##_11_1) = _PERMUTE((_VAR##_11_1), ##__VA_ARGS__); \
  (_VAR##_12_1) = _PERMUTE((_VAR##_12_1), ##__VA_ARGS__); \
  (_VAR##_13_1) = _PERMUTE((_VAR##_13_1), ##__VA_ARGS__); \
  (_VAR##_14_1) = _PERMUTE((_VAR##_14_1), ##__VA_ARGS__); \
  (_VAR##_15_1) = _PERMUTE((_VAR##_15_1), ##__VA_ARGS__);

#define MATRIX_VAR_PERMUTE_16X4(_VAR, _PERMUTE, ...)      \
  MATRIX_VAR_PERMUTE_16X2(_VAR, _PERMUTE, ##__VA_ARGS__)  \
  (_VAR##_0_2) = _PERMUTE((_VAR##_0_2), ##__VA_ARGS__);   \
  (_VAR##_1_2) = _PERMUTE((_VAR##_1_2), ##__VA_ARGS__);   \
  (_VAR##_2_2) = _PERMUTE((_VAR##_2_2), ##__VA_ARGS__);   \
  (_VAR##_3_2) = _PERMUTE((_VAR##_3_2), ##__VA_ARGS__);   \
  (_VAR##_4_2) = _PERMUTE((_VAR##_4_2), ##__VA_ARGS__);   \
  (_VAR##_5_2) = _PERMUTE((_VAR##_5_2), ##__VA_ARGS__);   \
  (_VAR##_6_2) = _PERMUTE((_VAR##_6_2), ##__VA_ARGS__);   \
  (_VAR##_7_2) = _PERMUTE((_VAR##_7_2), ##__VA_ARGS__);   \
  (_VAR##_8_2) = _PERMUTE((_VAR##_8_2), ##__VA_ARGS__);   \
  (_VAR##_9_2) = _PERMUTE((_VAR##_9_2), ##__VA_ARGS__);   \
  (_VAR##_10_2) = _PERMUTE((_VAR##_10_2), ##__VA_ARGS__); \
  (_VAR##_11_2) = _PERMUTE((_VAR##_11_2), ##__VA_ARGS__); \
  (_VAR##_12_2) = _PERMUTE((_VAR##_12_2), ##__VA_ARGS__); \
  (_VAR##_13_2) = _PERMUTE((_VAR##_13_2), ##__VA_ARGS__); \
  (_VAR##_14_2) = _PERMUTE((_VAR##_14_2), ##__VA_ARGS__); \
  (_VAR##_15_2) = _PERMUTE((_VAR##_15_2), ##__VA_ARGS__); \
  (_VAR##_0_3) = _PERMUTE((_VAR##_0_3), ##__VA_ARGS__);   \
  (_VAR##_1_3) = _PERMUTE((_VAR##_1_3), ##__VA_ARGS__);   \
  (_VAR##_2_3) = _PERMUTE((_VAR##_2_3), ##__VA_ARGS__);   \
  (_VAR##_3_3) = _PERMUTE((_VAR##_3_3), ##__VA_ARGS__);   \
  (_VAR##_4_3) = _PERMUTE((_VAR##_4_3), ##__VA_ARGS__);   \
  (_VAR##_5_3) = _PERMUTE((_VAR##_5_3), ##__VA_ARGS__);   \
  (_VAR##_6_3) = _PERMUTE((_VAR##_6_3), ##__VA_ARGS__);   \
  (_VAR##_7_3) = _PERMUTE((_VAR##_7_3), ##__VA_ARGS__);   \
  (_VAR##_8_3) = _PERMUTE((_VAR##_8_3), ##__VA_ARGS__);   \
  (_VAR##_9_3) = _PERMUTE((_VAR##_9_3), ##__VA_ARGS__);   \
  (_VAR##_10_3) = _PERMUTE((_VAR##_10_3), ##__VA_ARGS__); \
  (_VAR##_11_3) = _PERMUTE((_VAR##_11_3), ##__VA_ARGS__); \
  (_VAR##_12_3) = _PERMUTE((_VAR##_12_3), ##__VA_ARGS__); \
  (_VAR##_13_3) = _PERMUTE((_VAR##_13_3), ##__VA_ARGS__); \
  (_VAR##_14_3) = _PERMUTE((_VAR##_14_3), ##__VA_ARGS__); \
  (_VAR##_15_3) = _PERMUTE((_VAR##_15_3), ##__VA_ARGS__);

#define MATRIX_VAR_PROC_2X1(_K, _LHS, _RHS, _RES, _PROCESS) \
  _PROCESS((_LHS##_0), (_RHS), (_RES##_0_##_K))             \
  _PROCESS((_LHS##_1), (_RHS), (_RES##_1_##_K))

#define MATRIX_VAR_PROC_4X1(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_2X1(_K, _LHS, _RHS, _RES, _PROCESS)       \
  _PROCESS((_LHS##_2), (_RHS), (_RES##_2_##_K))             \
  _PROCESS((_LHS##_3), (_RHS), (_RES##_3_##_K))

#define MATRIX_VAR_PROC_8X1(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_4X1(_K, _LHS, _RHS, _RES, _PROCESS)       \
  _PROCESS((_LHS##_4), (_RHS), (_RES##_4_##_K))             \
  _PROCESS((_LHS##_5), (_RHS), (_RES##_5_##_K))             \
  _PROCESS((_LHS##_6), (_RHS), (_RES##_6_##_K))             \
  _PROCESS((_LHS##_7), (_RHS), (_RES##_7_##_K))

#define MATRIX_VAR_PROC_16X1(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_8X1(_K, _LHS, _RHS, _RES, _PROCESS)        \
  _PROCESS((_LHS##_8), (_RHS), (_RES##_8_##_K))              \
  _PROCESS((_LHS##_9), (_RHS), (_RES##_9_##_K))              \
  _PROCESS((_LHS##_10), (_RHS), (_RES##_10_##_K))            \
  _PROCESS((_LHS##_11), (_RHS), (_RES##_11_##_K))            \
  _PROCESS((_LHS##_12), (_RHS), (_RES##_12_##_K))            \
  _PROCESS((_LHS##_13), (_RHS), (_RES##_13_##_K))            \
  _PROCESS((_LHS##_14), (_RHS), (_RES##_14_##_K))            \
  _PROCESS((_LHS##_15), (_RHS), (_RES##_15_##_K))

#define MATRIX_VAR_PROC_1X2(_K, _LHS, _RHS, _RES, _PROCESS) \
  _PROCESS((_LHS), (_RHS##_0), (_RES##_##_K##_0))           \
  _PROCESS((_LHS), (_RHS##_1), (_RES##_##_K##_1))

#define MATRIX_VAR_PROC_1X4(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_1X2(_K, _LHS, _RHS, _RES, _PROCESS)       \
  _PROCESS((_LHS), (_RHS##_2), (_RES##_##_K##_2))           \
  _PROCESS((_LHS), (_RHS##_3), (_RES##_##_K##_3))

#define MATRIX_VAR_PROC_1X8(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_1X4(_K, _LHS, _RHS, _RES, _PROCESS)       \
  _PROCESS((_LHS), (_RHS##_4), (_RES##_##_K##_4))           \
  _PROCESS((_LHS), (_RHS##_5), (_RES##_##_K##_5))           \
  _PROCESS((_LHS), (_RHS##_6), (_RES##_##_K##_6))           \
  _PROCESS((_LHS), (_RHS##_7), (_RES##_##_K##_7))

#define MATRIX_VAR_PROC_1X16(_K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_1X8(_K, _LHS, _RHS, _RES, _PROCESS)        \
  _PROCESS((_LHS), (_RHS##_8), (_RES##_##_K##_8))            \
  _PROCESS((_LHS), (_RHS##_9), (_RES##_##_K##_9))            \
  _PROCESS((_LHS), (_RHS##_10), (_RES##_##_K##_10))          \
  _PROCESS((_LHS), (_RHS##_11), (_RES##_##_K##_11))          \
  _PROCESS((_LHS), (_RHS##_12), (_RES##_##_K##_12))          \
  _PROCESS((_LHS), (_RHS##_13), (_RES##_##_K##_13))          \
  _PROCESS((_LHS), (_RHS##_14), (_RES##_##_K##_14))          \
  _PROCESS((_LHS), (_RHS##_15), (_RES##_##_K##_15))

#define MATRIX_VAR_INIT(_M, _N, _VAR_TYPE, _VAR_NAME, _VAR_INIT) \
  MATRIX_VAR_INIT_##_M##X##_N(_VAR_TYPE, _VAR_NAME, _VAR_INIT)

#define MATRIX_VAR_STORE(_M, _N, _STEP, _VAR, _ARRAY, _STORE, _NORM, ...) \
  MATRIX_VAR_STORE_##_M##X##_N(_STEP, _VAR, _ARRAY, _STORE, _NORM,        \
                               ##__VA_ARGS__)

#define MATRIX_VAR_PERMUTE(_M, _N, _VAR, _PERMUTE, ...) \
  MATRIX_VAR_PERMUTE_##_M##X##_N(_VAR, _PERMUTE, ##__VA_ARGS__)

#define MATRIX_VAR_PROC(_M, _N, _K, _LHS, _RHS, _RES, _PROCESS) \
  MATRIX_VAR_PROC_##_M##X##_N(_K, _LHS, _RHS, _RES, _PROCESS)


================================================
FILE: src/ailego/math/matrix_utility.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

//! Absolute value of a float
static inline float FastAbs(float x) {
  uint32_t *p = reinterpret_cast<uint32_t *>(&x);
  *p &= 0x7fffffffu;
  return *reinterpret_cast<float *>(p);
}

#if defined(__SSE__)
static inline float HorizontalMax_FP32_V128(__m128 v) {
  __m128 x1 = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 3, 2));
  __m128 x2 = _mm_max_ps(v, x1);
  __m128 x3 = _mm_shuffle_ps(x2, x2, _MM_SHUFFLE(0, 0, 0, 1));
  __m128 x4 = _mm_max_ps(x2, x3);
  return _mm_cvtss_f32(x4);
}

static inline float HorizontalAdd_FP32_V128(__m128 v) {
#ifdef __SSE3__
  __m128 x1 = _mm_hadd_ps(v, v);
  __m128 x2 = _mm_hadd_ps(x1, x1);
  return _mm_cvtss_f32(x2);
#else
  __m128 x1 = _mm_movehl_ps(v, v);
  __m128 x2 = _mm_add_ps(v, x1);
  __m128 x3 = _mm_shuffle_ps(x2, x2, 1);
  __m128 x4 = _mm_add_ss(x2, x3);
  return _mm_cvtss_f32(x4);
#endif
}
#endif // __SSE__

#if defined(__SSE2__)
static inline int32_t HorizontalAdd_INT32_V128(__m128i v) {
#ifdef __SSE3__
  __m128i x1 = _mm_hadd_epi32(v, v);
  __m128i x2 = _mm_hadd_epi32(x1, x1);
  return _mm_cvtsi128_si32(x2);
#else
  __m128i x1 = _mm_shuffle_epi32(v, _MM_SHUFFLE(0, 0, 3, 2));
  __m128i x2 = _mm_add_epi32(v, x1);
  __m128i x3 = _mm_shuffle_epi32(x2, _MM_SHUFFLE(0, 0, 0, 1));
  __m128i x4 = _mm_add_epi32(x2, x3);
  return _mm_cvtsi128_si32(x4);
#endif
}

static inline int64_t HorizontalAdd_INT64_V128(__m128i v) {
#ifdef __SSE4_1__
  return (_mm_extract_epi64(v, 0) + _mm_extract_epi64(v, 1));
#else
  return _mm_cvtsi128_si64(
      _mm_add_epi64(_mm_shuffle_epi32(v, _MM_SHUFFLE(0, 0, 3, 2)), v));
#endif
}
#endif // __SSE2__

#if defined(__SSSE3__)
static const __m128i POPCNT_LOOKUP_SSE =
    _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);

static inline __m128i VerticalPopCount_INT8_V128(__m128i v) {
#if defined(__AVX512VL__) && defined(__AVX512BITALG__)
  return _mm_popcnt_epi8(v);
#else
  const __m128i low_mask = _mm_set1_epi8(0x0f);
  __m128i lo = _mm_shuffle_epi8(POPCNT_LOOKUP_SSE, _mm_and_si128(v, low_mask));
  __m128i hi = _mm_shuffle_epi8(POPCNT_LOOKUP_SSE,
                                _mm_and_si128(_mm_srli_epi32(v, 4), low_mask));
  return _mm_add_epi8(lo, hi);
#endif // __AVX512VL__ && __AVX512BITALG__
}

static inline __m128i VerticalPopCount_INT16_V128(__m128i v) {
#if defined(__AVX512VL__) && defined(__AVX512BITALG__)
  return _mm_popcnt_epi16(v);
#else
  __m128i total = VerticalPopCount_INT8_V128(v);
  return _mm_add_epi16(_mm_srli_epi16(total, 8),
                       _mm_and_si128(total, _mm_set1_epi16(0xff)));
#endif // __AVX512VL__ && __AVX512BITALG__
}

static inline __m128i VerticalPopCount_INT32_V128(__m128i v) {
#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)
  return _mm_popcnt_epi32(v);
#else
  __m128i total =
      _mm_madd_epi16(VerticalPopCount_INT8_V128(v), _mm_set1_epi16(1));
  return _mm_add_epi32(_mm_srli_epi32(total, 8),
                       _mm_and_si128(total, _mm_set1_epi32(0xff)));
#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__
}

static inline __m128i VerticalPopCount_INT64_V128(__m128i v) {
#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)
  return _mm_popcnt_epi64(v);
#else
  return _mm_sad_epu8(VerticalPopCount_INT8_V128(v), _mm_setzero_si128());
#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__
}
#endif // __SSSE3__

#if defined(__SSE4_1__)
static inline int16_t HorizontalMax_UINT8_V128(__m128i v) {
  v = _mm_max_epu8(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 2, 3, 2)));
  v = _mm_max_epu8(v, _mm_shuffle_epi32(v, _MM_SHUFFLE(1, 1, 1, 1)));
  v = _mm_max_epu8(v, _mm_shufflelo_epi16(v, _MM_SHUFFLE(1, 1, 1, 1)));
  v = _mm_max_epu8(v, _mm_srli_epi16(v, 8));
  return static_cast<uint8_t>(_mm_cvtsi128_si32(v));
}
#endif // __SSE4_1__

#if defined(__AVX__)
static inline float HorizontalMax_FP32_V256(__m256 v) {
  __m256 x1 = _mm256_permute_ps(v, _MM_SHUFFLE(0, 0, 3, 2));
  __m256 x2 = _mm256_max_ps(v, x1);
  __m256 x3 = _mm256_permute_ps(x2, _MM_SHUFFLE(0, 0, 0, 1));
  __m256 x4 = _mm256_max_ps(x2, x3);
  __m128 x5 = _mm256_extractf128_ps(x4, 1);
  __m128 x6 = _mm_max_ss(_mm256_castps256_ps128(x4), x5);
  return _mm_cvtss_f32(x6);
}

static inline float HorizontalAdd_FP32_V256(__m256 v) {
  __m256 x1 = _mm256_hadd_ps(v, v);
  __m256 x2 = _mm256_hadd_ps(x1, x1);
  __m128 x3 = _mm256_extractf128_ps(x2, 1);
  __m128 x4 = _mm_add_ss(_mm256_castps256_ps128(x2), x3);
  return _mm_cvtss_f32(x4);
}
#endif // __AVX__

#if defined(__AVX2__)
#define POPCNT_MASK1_INT8_AVX _mm256_set1_epi8(0x0f)
#define POPCNT_MASK1_INT16_AVX  _mm256_set1_epi16(1)
#define POPCNT_MASK2_INT16_AVX _mm256_set1_epi16(0xff)
#define POPCNT_MASK1_INT32_AVX _mm256_set1_epi32(0xff)
#define POPCNT_ZERO_AVX _mm256_setzero_si256()
#define POPCNT_LOOKUP_AVX _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4)

static inline __m256i VerticalPopCount_INT8_V256(__m256i v) {
#if defined(__AVX512VL__) && defined(__AVX512BITALG__)
  return _mm256_popcnt_epi8(v);
#else
  __m256i lo = _mm256_shuffle_epi8(POPCNT_LOOKUP_AVX,
                                   _mm256_and_si256(v, POPCNT_MASK1_INT8_AVX));
  __m256i hi = _mm256_shuffle_epi8(
      POPCNT_LOOKUP_AVX,
      _mm256_and_si256(_mm256_srli_epi32(v, 4), POPCNT_MASK1_INT8_AVX));
  return _mm256_add_epi8(lo, hi);
#endif // __AVX512VL__ && __AVX512BITALG__
}

static inline __m256i VerticalPopCount_INT16_V256(__m256i v) {
#if defined(__AVX512VL__) && defined(__AVX512BITALG__)
  return _mm256_popcnt_epi16(v);
#else
  __m256i total = VerticalPopCount_INT8_V256(v);
  return _mm256_add_epi16(_mm256_srli_epi16(total, 8),
                          _mm256_and_si256(total, POPCNT_MASK2_INT16_AVX));
#endif // __AVX512VL__ && __AVX512BITALG__
}

static inline __m256i VerticalPopCount_INT32_V256(__m256i v) {
#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)
  return _mm256_popcnt_epi32(v);
#else
  __m256i total =
      _mm256_madd_epi16(VerticalPopCount_INT8_V256(v), POPCNT_MASK1_INT16_AVX);
  return _mm256_add_epi32(_mm256_srli_epi32(total, 8),
                          _mm256_and_si256(total, POPCNT_MASK1_INT32_AVX));
#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__
}

static inline __m256i VerticalPopCount_INT64_V256(__m256i v) {
#if defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__)
  return _mm256_popcnt_epi64(v);
#else
  return _mm256_sad_epu8(VerticalPopCount_INT8_V256(v), POPCNT_ZERO_AVX);
#endif // __AVX512VL__ && __AVX512VPOPCNTDQ__
}

static inline int16_t HorizontalMax_UINT8_V256(__m256i v) {
  v = _mm256_max_epu8(v, _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 2, 3, 2)));
  v = _mm256_max_epu8(v, _mm256_shuffle_epi32(v, _MM_SHUFFLE(1, 1, 1, 1)));
  v = _mm256_max_epu8(v, _mm256_shufflelo_epi16(v, _MM_SHUFFLE(1, 1, 1, 1)));
  __m128i x =
      _mm_max_epu8(_mm256_castsi256_si128(v), _mm256_extractf128_si256(v, 1));
  x = _mm_max_epu8(x, _mm_srli_epi16(x, 8));
  return static_cast<uint8_t>(_mm_cvtsi128_si32(x));
}

static inline int32_t HorizontalAdd_INT32_V256(__m256i v) {
  __m256i x1 = _mm256_hadd_epi32(v, v);
  __m256i x2 = _mm256_hadd_epi32(x1, x1);
  __m128i x3 = _mm256_extractf128_si256(x2, 1);
  __m128i x4 = _mm_add_epi32(_mm256_castsi256_si128(x2), x3);
  return _mm_cvtsi128_si32(x4);
}

static inline int64_t HorizontalAdd_INT64_V256(__m256i v) {
  __m256i x1 = _mm256_shuffle_epi32(v, _MM_SHUFFLE(1, 0, 3, 2));
  __m256i x2 = _mm256_add_epi64(v, x1);
  __m128i x3 = _mm256_extractf128_si256(x2, 1);
  __m128i x4 = _mm_add_epi64(_mm256_extractf128_si256(x2, 0), x3);
  return _mm_cvtsi128_si64(x4);
}
#endif // __AVX2__

#if defined(__AVX512F__)
static inline float HorizontalMax_FP32_V512(__m512 v) {
  __m256 low = _mm512_castps512_ps256(v);
  __m256 high =
      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(v), 1));
  return HorizontalMax_FP32_V256(_mm256_max_ps(low, high));
}

static inline float HorizontalAdd_FP32_V512(__m512 v) {
  __m256 low = _mm512_castps512_ps256(v);
  __m256 high =
      _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(v), 1));
  return HorizontalAdd_FP32_V256(_mm256_add_ps(low, high));
}
#endif // __AVX512F__

#if defined(__AVX512FP16__)
static inline float HorizontalMax_FP16_V512(__m512h v) {
  __m512 low = _mm512_cvtxph_ps(_mm512_castph512_ph256(v));
  __m512 high = _mm512_cvtxph_ps(
      _mm256_castpd_ph(_mm512_extractf64x4_pd(_mm512_castph_pd(v), 1)));
  return HorizontalMax_FP32_V512(_mm512_max_ps(low, high));
}

static inline float HorizontalAdd_FP16_V512(__m512h v) {
  __m512 low = _mm512_cvtxph_ps(_mm512_castph512_ph256(v));
  __m512 high = _mm512_cvtxph_ps(
      _mm256_castpd_ph(_mm512_extractf64x4_pd(_mm512_castph_pd(v), 1)));

  return HorizontalAdd_FP32_V512(_mm512_add_ps(low, high));
}
#endif // __AVX512FP16__

} // namespace ailego
} // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <array>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"

namespace zvec {
namespace ailego {

//--------------------------------------------------
// Dense
//--------------------------------------------------
/*! Compute the Mips SphericalInjection Squared Euclidean Distance with the two
 *  vectors's InnerProduct and each squared l2-normlized value, and the e2 is
 *  1.0 / max_squared_l2_norm
 */
static float inline ComputeSphericalInjection(double ip, double u2, double v2,
                                              double e2) {
  if (e2 == 0.0) {
    // Implies *localized* spherical injection.
    return static_cast<float>(2.0 - 2.0 * ip / std::max(u2, v2));
  }
  auto v = (1.0 - e2 * u2) * (1.0 - e2 * v2);
  auto score = v > 0.0 ? (1.0 - e2 * ip - std::sqrt(v)) : (1.0 - e2 * ip);
  return static_cast<float>(score * 2.0);
}

/*! Mips Squared Euclidean Distance Matrix
 */
template <typename T, size_t M, size_t N, typename = void>
struct MipsSquaredEuclideanDistanceMatrix;

/*! Mips Squared Euclidean Distance Matrix (M=1, N=1)
 */
template <typename T>
struct MipsSquaredEuclideanDistanceMatrix<T, 1, 1> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && out);

    float sum = 0.0;
    float u2 = 0.0;
    float v2 = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      u2 += p[i] * p[i];
      v2 += q[i] * q[i];
      sum += static_cast<float>(p[i] * q[i]);
    }
    *out = ComputeSphericalInjection(sum, u2, v2, e2);
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && out);

    float sum = 0.0;
    float u2 = 0.0;
    float v2 = 0.0;
    for (size_t i = 0; i < dim; ++i) {
      u2 += p[i] * p[i];
      v2 += q[i] * q[i];
      sum += MathHelper::SquaredDifference(p[i], q[i]);
    }

    sum *= e2;
    u2 *= e2;
    v2 *= e2;
    for (size_t i = 0; i < m; ++i) {
      sum += (u2 - v2) * (u2 - v2);
      u2 = u2 * u2;
      v2 = v2 * v2;
    }
    *out = sum;
  }
};

template <>
struct MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1> {
  //! Type of value
  using ValueType = uint8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      float e2, float *out);

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      size_t m, float e2, float *out);
};

template <>
struct MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1> {
  //! Type of value
  using ValueType = int8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      float e2, float *out);

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      size_t m, float e2, float *out);
};

template <>
struct MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1> {
  //! Type of value
  using ValueType = Float16;

  // Compute the distance between matrix and query by SphericalInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      float e2, float *out);

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      size_t m, float e2, float *out);
};

template <>
struct MipsSquaredEuclideanDistanceMatrix<float, 1, 1> {
  //! Type of value
  using ValueType = float;

  // Compute the distance between matrix and query by SphericalInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      float e2, float *out);

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static void Compute(const ValueType *p, const ValueType *q, size_t dim,
                      size_t m, float e2, float *out);
};

/*! Mips Squared Euclidean Distance Matrix (M >= 2, N >= 2)
 */
template <typename T, size_t M, size_t N>
struct MipsSquaredEuclideanDistanceMatrix<
    T, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && out);
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    for (size_t i = 0; i < M; ++i) {
      const ValueType p_val = p[i];
      u2[i] = static_cast<float>(p_val * p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = static_cast<float>(p_val * q[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = static_cast<float>(q[i] * q[i]);
    }
    p += M;
    q += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const ValueType p_val = p[i];
        u2[i] += static_cast<float>(p_val * p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += static_cast<float>(p_val * q[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += static_cast<float>(q[i] * q[i]);
      }
      p += M;
      q += N;
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      float *r = out + i;
      const float u2_val = u2[i];
      for (size_t j = 0; j < N; ++j) {
        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);
        r += M;
      }
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && out);
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    for (size_t i = 0; i < M; ++i) {
      const ValueType p_val = p[i];
      u2[i] = static_cast<float>(p_val * p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = MathHelper::SquaredDifference(p_val, q[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = static_cast<float>(q[i] * q[i]);
    }
    p += M;
    q += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const ValueType p_val = p[i];
        u2[i] += static_cast<float>(p_val * p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += MathHelper::SquaredDifference(p_val, q[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += static_cast<float>(q[i] * q[i]);
      }
      p += M;
      q += N;
    }

    // Compute the injections
    float *r = out;
    for (size_t i = 0; i < M; ++i) {
      u2[i] *= e2;
      for (size_t j = 0; j < N; ++j) {
        (*r++) *= e2;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] *= e2;
    }
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        r = out + i;
        float u2_val = u2[i];
        u2[i] = u2_val * u2_val;
        for (size_t j = 0; j < N; ++j) {
          *r += (u2_val - v2[j]) * (u2_val - v2[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] = v2[i] * v2[i];
      }
    }
  }
};

/*! Mips Squared Euclidean Distance Matrix (N=1)
 */
template <typename T, size_t M>
struct MipsSquaredEuclideanDistanceMatrix<
    T, M, 1, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && out);
    const ValueType *q_end = q + dim;
    if (q == q_end) {
      return;
    }

    std::array<float, M> u2;
    ValueType q_val = *q++;
    float v2 = static_cast<float>(q_val * q_val);
    for (size_t i = 0; i < M; ++i) {
      u2[i] = static_cast<float>(p[i] * p[i]);
      out[i] = static_cast<float>(p[i] * q_val);
    }
    p += M;

    while (q != q_end) {
      q_val = *q++;
      v2 += static_cast<float>(q_val * q_val);
      for (size_t i = 0; i < M; ++i) {
        u2[i] += static_cast<float>(p[i] * p[i]);
        out[i] += static_cast<float>(p[i] * q_val);
      }
      p += M;
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && out);
    const ValueType *q_end = q + dim;
    if (q == q_end) {
      return;
    }

    std::array<float, M> u2;
    ValueType q_val = *q++;
    float v2 = static_cast<float>(q_val * q_val);
    for (size_t i = 0; i < M; ++i) {
      u2[i] = static_cast<float>(p[i] * p[i]);
      out[i] = MathHelper::SquaredDifference(p[i], q_val);
    }
    p += M;

    while (q != q_end) {
      q_val = *q++;
      v2 += static_cast<float>(q_val * q_val);
      for (size_t i = 0; i < M; ++i) {
        u2[i] += static_cast<float>(p[i] * p[i]);
        out[i] += MathHelper::SquaredDifference(p[i], q_val);
      }
      p += M;
    }

    // Compute the injections
    for (size_t i = 0; i < M; ++i) {
      out[i] *= e2;
      u2[i] *= e2;
    }
    v2 *= e2;
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const float u_val = u2[i];
        u2[i] = u_val * u_val;
        out[i] += (u_val - v2) * (u_val - v2);
      }
      v2 = v2 * v2;
    }
  }
};

/*! Mips Squared Euclidean Distance Matrix (INT8, M >=2, N >= 2)
 */
template <size_t M, size_t N>
struct MipsSquaredEuclideanDistanceMatrix<
    int8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 3) && out);
    dim >>= 2;
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = p_it[i];
      u2[i] = Squared(p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = FusedMultiplyAdd(p_val, q_it[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = Squared(q_it[i]);
    }
    p_it += M;
    q_it += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = p_it[i];
        u2[i] += Squared(p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += FusedMultiplyAdd(p_val, q_it[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += Squared(q_it[i]);
      }
      p_it += M;
      q_it += N;
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      float *r = out + i;
      const float u2_val = u2[i];
      for (size_t j = 0; j < N; ++j) {
        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);
        r += M;
      }
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 3) && out);
    dim >>= 2;
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = p_it[i];
      u2[i] = Squared(p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = SquaredDifference(p_val, q_it[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = Squared(q_it[i]);
    }
    p_it += M;
    q_it += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = p_it[i];
        u2[i] += Squared(p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += SquaredDifference(p_val, q_it[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += Squared(q_it[i]);
      }
      p_it += M;
      q_it += N;
    }

    // Compute the injections
    float *r = out;
    for (size_t i = 0; i < M; ++i) {
      u2[i] *= e2;
      for (size_t j = 0; j < N; ++j) {
        (*r++) *= e2;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] *= e2;
    }
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        r = out + i;
        float u2_val = u2[i];
        u2[i] = u2_val * u2_val;
        for (size_t j = 0; j < N; ++j) {
          *r += (u2_val - v2[j]) * (u2_val - v2[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] = v2[i] * v2[i];
      }
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                              (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                              (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                              (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));
  }

  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 24), (int8_t)(rhs >> 24)));
  }

  //! Calculate sum of squared values
  static inline float Squared(uint32_t v) {
    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +
                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +
                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +
                              (int8_t)(v >> 24) * (int8_t)(v >> 24));
  }
};

/*! Mips Squared Euclidean Distance Matrix (INT8, N=1)
 */
template <size_t M>
struct MipsSquaredEuclideanDistanceMatrix<
    int8_t, M, 1, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 3) && out);
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 2);
    if (q_it == q_end) {
      return;
    }

    std::array<float, M> u2;
    uint32_t q_val = *q_it++;
    float v2 = Squared(q_val);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = *p_it++;
      u2[i] = Squared(p_val);
      out[i] = FusedMultiplyAdd(p_val, q_val);
    }

    while (q_it != q_end) {
      q_val = *q_it++;
      v2 += Squared(q_val);
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = *p_it++;
        u2[i] += Squared(p_val);
        out[i] += FusedMultiplyAdd(p_val, q_val);
      }
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 3) && out);
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 2);
    if (q_it == q_end) {
      return;
    }

    std::array<float, M> u2;
    uint32_t q_val = *q_it++;
    float v2 = Squared(q_val);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = *p_it++;
      u2[i] = Squared(p_val);
      out[i] = SquaredDifference(p_val, q_val);
    }

    while (q_it != q_end) {
      q_val = *q_it++;
      v2 += Squared(q_val);
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = *p_it++;
        u2[i] += Squared(p_val);
        out[i] += SquaredDifference(p_val, q_val);
      }
    }

    // Compute the injections
    for (size_t i = 0; i < M; ++i) {
      out[i] *= e2;
      u2[i] *= e2;
    }
    v2 *= e2;
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const float u_val = u2[i];
        u2[i] = u_val * u_val;
        out[i] += (u_val - v2) * (u_val - v2);
      }
      v2 = v2 * v2;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>((int8_t)(lhs >> 0) * (int8_t)(rhs >> 0) +
                              (int8_t)(lhs >> 8) * (int8_t)(rhs >> 8) +
                              (int8_t)(lhs >> 16) * (int8_t)(rhs >> 16) +
                              (int8_t)(lhs >> 24) * (int8_t)(rhs >> 24));
  }

  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 0), (int8_t)(rhs >> 0)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 8), (int8_t)(rhs >> 8)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 16), (int8_t)(rhs >> 16)) +
                              MathHelper::SquaredDifference<int8_t, int32_t>(
                                  (int8_t)(lhs >> 24), (int8_t)(rhs >> 24)));
  }

  //! Calculate sum of squared values
  static inline float Squared(uint32_t v) {
    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +
                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +
                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +
                              (int8_t)(v >> 24) * (int8_t)(v >> 24));
  }
};

/*! Mips Squared Euclidean Distance Matrix (INT4, M >=2, N >= 2)
 */
template <size_t M, size_t N>
struct MipsSquaredEuclideanDistanceMatrix<
    uint8_t, M, N, typename std::enable_if<M >= 2 && N >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 7) && out);
    dim >>= 3;
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = p_it[i];
      u2[i] = Squared(p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = FusedMultiplyAdd(p_val, q_it[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = Squared(q_it[i]);
    }
    p_it += M;
    q_it += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = p_it[i];
        u2[i] += Squared(p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += FusedMultiplyAdd(p_val, q_it[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += Squared(q_it[i]);
      }
      p_it += M;
      q_it += N;
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      float *r = out + i;
      const float u2_val = u2[i];
      for (size_t j = 0; j < N; ++j) {
        *r = ComputeSphericalInjection(*r, u2_val, v2[j], e2);
        r += M;
      }
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 7) && out);
    dim >>= 3;
    if (dim == 0) {
      return;
    }

    std::array<float, M> u2;
    std::array<float, N> v2;
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = p_it[i];
      u2[i] = Squared(p_val);
      float *r = out + i;
      for (size_t j = 0; j < N; ++j) {
        *r = SquaredDifference(p_val, q_it[j]);
        r += M;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] = Squared(q_it[i]);
    }
    p_it += M;
    q_it += N;

    for (size_t k = 1; k < dim; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = p_it[i];
        u2[i] += Squared(p_val);
        float *r = out + i;
        for (size_t j = 0; j < N; ++j) {
          *r += SquaredDifference(p_val, q_it[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] += Squared(q_it[i]);
      }
      p_it += M;
      q_it += N;
    }

    // Compute the injections
    float *r = out;
    for (size_t i = 0; i < M; ++i) {
      u2[i] *= e2;
      for (size_t j = 0; j < N; ++j) {
        (*r++) *= e2;
      }
    }
    for (size_t i = 0; i < N; ++i) {
      v2[i] *= e2;
    }
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        r = out + i;
        float u2_val = u2[i];
        u2[i] = u2_val * u2_val;
        for (size_t j = 0; j < N; ++j) {
          *r += (u2_val - v2[j]) * (u2_val - v2[j]);
          r += M;
        }
      }
      for (size_t i = 0; i < N; ++i) {
        v2[i] = v2[i] * v2[i];
      }
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }

  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }

  //! Calculate sum of squared values
  static inline float Squared(uint32_t u) {
    float sum = 0.0f;
    for (size_t i = 0; i < 32; i += 8) {
      uint8_t v = (uint8_t)(u >> i);
      int8_t lo = (int8_t)(v << 4) >> 4;
      int8_t hi = (int8_t)(v & 0xf0) >> 4;
      sum += hi * hi + lo * lo;
    }
    return sum;
  }
};

/*! Mips Squared Euclidean Distance Matrix (INT4, N=1)
 */
template <size_t M>
struct MipsSquaredEuclideanDistanceMatrix<
    uint8_t, M, 1, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 7) && out);
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 3);
    if (q_it == q_end) {
      return;
    }

    std::array<float, M> u2;
    uint32_t q_val = *q_it++;
    float v2 = Squared(q_val);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = *p_it++;
      u2[i] = Squared(p_val);
      out[i] = FusedMultiplyAdd(p_val, q_val);
    }

    while (q_it != q_end) {
      q_val = *q_it++;
      v2 += Squared(q_val);
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = *p_it++;
        u2[i] += Squared(p_val);
        out[i] += FusedMultiplyAdd(p_val, q_val);
      }
    }

    // Compute the injection
    for (size_t i = 0; i < M; ++i) {
      out[i] = ComputeSphericalInjection(out[i], u2[i], v2, e2);
    }
  }

  // Compute the distance between matrix and query by RepeatedQuadraticInjection
  static inline void Compute(const ValueType *p, const ValueType *q, size_t dim,
                             size_t m, float e2, float *out) {
    ailego_assert(p && q && dim && !(dim & 7) && out);
    const uint32_t *p_it = reinterpret_cast<const uint32_t *>(p);
    const uint32_t *q_it = reinterpret_cast<const uint32_t *>(q);
    const uint32_t *q_end = q_it + (dim >> 3);
    if (q_it == q_end) {
      return;
    }

    std::array<float, M> u2;
    uint32_t q_val = *q_it++;
    float v2 = Squared(q_val);
    for (size_t i = 0; i < M; ++i) {
      const uint32_t p_val = *p_it++;
      u2[i] = Squared(p_val);
      out[i] = SquaredDifference(p_val, q_val);
    }

    while (q_it != q_end) {
      q_val = *q_it++;
      v2 += Squared(q_val);
      for (size_t i = 0; i < M; ++i) {
        const uint32_t p_val = *p_it++;
        u2[i] += Squared(p_val);
        out[i] += SquaredDifference(p_val, q_val);
      }
    }

    // Compute the injections
    for (size_t i = 0; i < M; ++i) {
      out[i] *= e2;
      u2[i] *= e2;
    }
    v2 *= e2;
    for (size_t k = 0; k < m; ++k) {
      for (size_t i = 0; i < M; ++i) {
        const float u_val = u2[i];
        u2[i] = u_val * u_val;
        out[i] += (u_val - v2) * (u_val - v2);
      }
      v2 = v2 * v2;
    }
  }

 protected:
  //! Calculate Fused-Multiply-Add
  static inline float FusedMultiplyAdd(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4MulTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4MulTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4MulTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4MulTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4MulTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4MulTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4MulTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4MulTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }

  //! Calculate the squared difference
  static inline float SquaredDifference(uint32_t lhs, uint32_t rhs) {
    return static_cast<float>(
        Int4SquaredDiffTable[((lhs << 4) & 0xf0) | ((rhs >> 0) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 0) & 0xf0) | ((rhs >> 4) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 4) & 0xf0) | ((rhs >> 8) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 8) & 0xf0) | ((rhs >> 12) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 12) & 0xf0) | ((rhs >> 16) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 16) & 0xf0) | ((rhs >> 20) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 20) & 0xf0) | ((rhs >> 24) & 0xf)] +
        Int4SquaredDiffTable[((lhs >> 24) & 0xf0) | ((rhs >> 28) & 0xf)]);
  }

  //! Calculate sum of squared values
  static inline float Squared(uint32_t u) {
    float sum = 0.0f;
    for (size_t i = 0; i < 32; i += 8) {
      uint8_t v = (uint8_t)(u >> i);
      int8_t lo = (int8_t)(v << 4) >> 4;
      int8_t hi = (int8_t)(v & 0xf0) >> 4;
      sum += hi * hi + lo * lo;
    }
    return sum;
  }
};

//--------------------------------------------------
// Sparse
//--------------------------------------------------
/*! Mips Squared Euclidean Sparse Distance Matrix
 */
template <typename T>
struct MipsSquaredEuclideanSparseDistanceMatrix {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  static float ComputeInnerProductSparseInSegment(
      uint32_t m_sparse_count, const uint16_t *m_sparse_index,
      const ValueType *m_sparse_value, uint32_t q_sparse_count,
      const uint16_t *q_sparse_index, const ValueType *q_sparse_value);

  // Compute the distance between matrix and query by SphericalInjection
  static inline void Compute(const void *m_sparse_data_in,
                             const void *q_sparse_data_in, float *out) {
    ailego_assert(m_sparse_data_in && q_sparse_data_in && out);

    const uint8_t *m_sparse_data =
        reinterpret_cast<const uint8_t *>(m_sparse_data_in);
    const uint8_t *q_sparse_data =
        reinterpret_cast<const uint8_t *>(q_sparse_data_in);

    const uint32_t m_sparse_count =
        *reinterpret_cast<const uint32_t *>(m_sparse_data);
    const uint32_t q_sparse_count =
        *reinterpret_cast<const uint32_t *>(q_sparse_data);

    if (m_sparse_count == 0 && q_sparse_count == 0) {
      *out = 0;
      return;
    }

    if (m_sparse_count == 0 || q_sparse_count == 0) {
      *out = 2;
      return;
    }

    const uint32_t m_seg_count =
        *reinterpret_cast<const uint32_t *>(m_sparse_data + sizeof(uint32_t));
    const uint32_t q_seg_count =
        *reinterpret_cast<const uint32_t *>(q_sparse_data + sizeof(uint32_t));

    const uint32_t *m_seg_id = reinterpret_cast<const uint32_t *>(
        m_sparse_data + 2 * sizeof(uint32_t));
    const uint32_t *q_seg_id = reinterpret_cast<const uint32_t *>(
        q_sparse_data + 2 * sizeof(uint32_t));

    const uint32_t *m_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
        m_sparse_data + 2 * sizeof(uint32_t) + m_seg_count * sizeof(uint32_t));
    const uint32_t *q_seg_vec_cnt = reinterpret_cast<const uint32_t *>(
        q_sparse_data + 2 * sizeof(uint32_t) + q_seg_count * sizeof(uint32_t));

    const uint16_t *m_sparse_index = reinterpret_cast<const uint16_t *>(
        m_sparse_data + 2 * sizeof(uint32_t) +
        m_seg_count * 2 * sizeof(uint32_t));
    const uint16_t *q_sparse_index = reinterpret_cast<const uint16_t *>(
        q_sparse_data + 2 * sizeof(uint32_t) +
        q_seg_count * 2 * sizeof(uint32_t));

    const ValueType *m_sparse_value = reinterpret_cast<const ValueType *>(
        m_sparse_data + 2 * sizeof(uint32_t) +
        m_seg_count * 2 * sizeof(uint32_t) + m_sparse_count * sizeof(uint16_t));
    const ValueType *q_sparse_value = reinterpret_cast<const ValueType *>(
        q_sparse_data + 2 * sizeof(uint32_t) +
        q_seg_count * 2 * sizeof(uint32_t) + q_sparse_count * sizeof(uint16_t));

    float ip = 0.0f;

    size_t m_s = 0;
    size_t q_s = 0;

    size_t m_count = 0;
    size_t q_count = 0;

    while (m_s < m_seg_count && q_s < q_seg_count) {
      if (m_seg_id[m_s] == q_seg_id[q_s]) {
        ip += ComputeInnerProductSparseInSegment(
            m_seg_vec_cnt[m_s], m_sparse_index + m_count,
            m_sparse_value + m_count, q_seg_vec_cnt[q_s],
            q_sparse_index + q_count, q_sparse_value + q_count);

        m_count += m_seg_vec_cnt[m_s];
        q_count += q_seg_vec_cnt[q_s];

        ++m_s;
        ++q_s;
      } else if (m_seg_id[m_s] < q_seg_id[q_s]) {
        m_count += m_seg_vec_cnt[m_s];

        ++m_s;
      } else {
        q_count += q_seg_vec_cnt[q_s];

        ++q_s;
      }
    }

    float l2_m{0.0f};
    SquaredNorm2Matrix<ValueType, 1>::Compute(m_sparse_value, m_sparse_count,
                                              &l2_m);

    float l2_q{0.0f};
    SquaredNorm2Matrix<ValueType, 1>::Compute(q_sparse_value, q_sparse_count,
                                              &l2_q);

    *out = ComputeSphericalInjection(ip, l2_m, l2_q, 0.0f);
  }
};

template <typename T>
float MipsSquaredEuclideanSparseDistanceMatrix<
    T>::ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,
                                           const uint16_t *m_sparse_index,
                                           const ValueType *m_sparse_value,
                                           uint32_t q_sparse_count,
                                           const uint16_t *q_sparse_index,
                                           const ValueType *q_sparse_value) {
  float sum = 0.0f;

  size_t m_i = 0;
  size_t q_i = 0;
  while (m_i < m_sparse_count && q_i < q_sparse_count) {
    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {
      sum += m_sparse_value[m_i] * q_sparse_value[q_i];

      ++m_i;
      ++q_i;
    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {
      ++m_i;
    } else {
      ++q_i;
    }
  }

  return sum;
}

template <>
float MipsSquaredEuclideanSparseDistanceMatrix<
    float>::ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,
                                               const uint16_t *m_sparse_index,
                                               const ValueType *m_sparse_value,
                                               uint32_t q_sparse_count,
                                               const uint16_t *q_sparse_index,
                                               const ValueType *q_sparse_value);

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp16_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX__) && defined(__F16C__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp16AVX(const Float16 *lhs, const Float16 *rhs,
                                        size_t size, float *sql, float *sqr) {
  __m256 ymm_sum_0 = _mm256_setzero_ps();
  __m256 ymm_sum_1 = _mm256_setzero_ps();
  __m256 ymm_sum_norm1 = _mm256_setzero_ps();
  __m256 ymm_sum_norm2 = _mm256_setzero_ps();

  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 4) << 4);
  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);
      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_lhs));
      __m256 ymm_lhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_lhs, 1));
      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_rhs));
      __m256 ymm_rhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_rhs, 1));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }
    if (last >= last_aligned + 8) {
      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)lhs));
      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)rhs));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      lhs += 8;
      rhs += 8;
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);
      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_lhs));
      __m256 ymm_lhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_lhs, 1));
      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_rhs));
      __m256 ymm_rhs_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_rhs, 1));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }
    if (last >= last_aligned + 8) {
      __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)lhs));
      __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)rhs));
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      lhs += 8;
      rhs += 8;
    }
  }

  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));
  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);
  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);
  switch (last - lhs) {
    case 7:
      FMA_FP16_GENERAL(lhs[6], rhs[6], result, norm1, norm2);
      /* FALLTHRU */
    case 6:
      FMA_FP16_GENERAL(lhs[5], rhs[5], result, norm1, norm2);
      /* FALLTHRU */
    case 5:
      FMA_FP16_GENERAL(lhs[4], rhs[4], result, norm1, norm2);
      /* FALLTHRU */
    case 4:
      FMA_FP16_GENERAL(lhs[3], rhs[3], result, norm1, norm2);
      /* FALLTHRU */
    case 3:
      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);
      /* FALLTHRU */
    case 2:
      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);
      /* FALLTHRU */
    case 1:
      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);
  }

  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionFp16AVX(const Float16 *lhs,
                                                     const Float16 *rhs,
                                                     size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16AVX(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16AVX(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

#endif  // __AVX__ && __F16C__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp16_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX512F__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp16AVX512(const Float16 *lhs,
                                           const Float16 *rhs, size_t size,
                                           float *sql, float *sqr) {
  __m512 zmm_sum_0 = _mm512_setzero_ps();
  __m512 zmm_sum_1 = _mm512_setzero_ps();
  __m512 zmm_sum_norm1 = _mm512_setzero_ps();
  __m512 zmm_sum_norm2 = _mm512_setzero_ps();

  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 5) << 5);
  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512i zmm_lhs = _mm512_load_si512((const __m512i *)lhs);
      __m512i zmm_rhs = _mm512_load_si512((const __m512i *)rhs);
      __m512 zmm_lhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_lhs));
      __m512 zmm_lhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_lhs, 1));
      __m512 zmm_rhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_rhs));
      __m512 zmm_rhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_rhs, 1));
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)
    }
    if (last >= last_aligned + 16) {
      __m512 zmm_lhs_0 =
          _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)lhs));
      __m512 zmm_rhs_0 =
          _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)rhs));
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512i zmm_lhs = _mm512_loadu_si512((const __m512i *)lhs);
      __m512i zmm_rhs = _mm512_loadu_si512((const __m512i *)rhs);
      __m512 zmm_lhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_lhs));
      __m512 zmm_lhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_lhs, 1));
      __m512 zmm_rhs_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_rhs));
      __m512 zmm_rhs_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_rhs, 1));
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)
    }
    if (last >= last_aligned + 16) {
      __m512 zmm_lhs_0 =
          _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)lhs));
      __m512 zmm_rhs_0 =
          _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)rhs));
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      lhs += 16;
      rhs += 16;
    }
  }

  __m256 ymm_sum_0 =
      HorizontalAdd_FP32_V512_TO_V256(_mm512_add_ps(zmm_sum_0, zmm_sum_1));
  __m256 ymm_sum_norm1 = HorizontalAdd_FP32_V512_TO_V256(zmm_sum_norm1);
  __m256 ymm_sum_norm2 = HorizontalAdd_FP32_V512_TO_V256(zmm_sum_norm2);
  if (last >= lhs + 8) {
    __m256 ymm_lhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)lhs));
    __m256 ymm_rhs_0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)rhs));
    ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
    ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
    ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
    lhs += 8;
    rhs += 8;
  }

  float result = HorizontalAdd_FP32_V256(ymm_sum_0);
  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);
  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);
  switch (last - lhs) {
    case 7:
      FMA_FP16_GENERAL(lhs[6], rhs[6], result, norm1, norm2);
      /* FALLTHRU */
    case 6:
      FMA_FP16_GENERAL(lhs[5], rhs[5], result, norm1, norm2);
      /* FALLTHRU */
    case 5:
      FMA_FP16_GENERAL(lhs[4], rhs[4], result, norm1, norm2);
      /* FALLTHRU */
    case 4:
      FMA_FP16_GENERAL(lhs[3], rhs[3], result, norm1, norm2);
      /* FALLTHRU */
    case 3:
      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);
      /* FALLTHRU */
    case 2:
      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);
      /* FALLTHRU */
    case 1:
      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);
  }

  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionFp16AVX512(const Float16 *lhs,
                                                        const Float16 *rhs,
                                                        size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16AVX512(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16AVX512(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

#endif  // __AVX512F__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp16_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs,
                                                      const Float16 *rhs,
                                                      size_t size, float e2);
#endif

#if defined(__AVX512F__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp16AVX512(const Float16 *lhs,
                                                        const Float16 *rhs,
                                                        size_t size, float e2);
#endif

#if defined(__AVX__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp16AVX(const Float16 *lhs,
                                                     const Float16 *rhs,
                                                     size_t size, float e2);
#endif

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp16Scalar(
    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, float e2);


//! Compute the distance between matrix and query by SphericalInjection
void MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {
#if defined(__ARM_NEON)
  *out = MipsEuclideanDistanceSphericalInjectionFp16NEON(p, q, dim, e2);
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MipsEuclideanDistanceSphericalInjectionFp16AVX512(p, q, dim, e2);
    return;
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MipsEuclideanDistanceSphericalInjectionFp16AVX(p, q, dim, e2);
    return;
  }
#endif  //__AVX__
  *out = MipsEuclideanDistanceSphericalInjectionFp16Scalar(p, q, dim, e2);
  return;
#endif  //__ARM_NEON
}

//! Compute the distance between matrix and query by RepeatedQuadraticInjection
void MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
    float *out) {
#if defined(__ARM_NEON)
  *out =
      MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(p, q, dim, m, e2);
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX512(p, q, dim,
                                                                     m, e2);
    return;
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16AVX(p, q, dim, m,
                                                                  e2);
    return;
  }
#endif  //__AVX__
  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(p, q, dim, m,
                                                                   e2);
  return;
#endif  //__ARM_NEON
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp16_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp16.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON) && defined(__aarch64__)
#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                         size_t size, float *sql, float *sqr) {
  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 3) << 3);
  float16x8_t v_sum = vdupq_n_f16(0);
  float16x8_t v_sum_norm1 = vdupq_n_f16(0);
  float16x8_t v_sum_norm2 = vdupq_n_f16(0);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    float16x8_t v_lhs = vld1q_f16((const float16_t *)lhs);
    float16x8_t v_rhs = vld1q_f16((const float16_t *)rhs);
    v_sum = vfmaq_f16(v_sum, v_lhs, v_rhs);
    v_sum_norm1 = vfmaq_f16(v_sum_norm1, v_lhs, v_lhs);
    v_sum_norm2 = vfmaq_f16(v_sum_norm2, v_rhs, v_rhs);
  }
  if (last >= last_aligned + 4) {
    float16x8_t v_lhs = vcombine_f16(vld1_f16((const float16_t *)lhs),
                                     vreinterpret_f16_u64(vdup_n_u64(0ul)));
    float16x8_t v_rhs = vcombine_f16(vld1_f16((const float16_t *)rhs),
                                     vreinterpret_f16_u64(vdup_n_u64(0ul)));
    v_sum = vfmaq_f16(v_sum, v_lhs, v_rhs);
    v_sum_norm1 = vfmaq_f16(v_sum_norm1, v_lhs, v_lhs);
    v_sum_norm2 = vfmaq_f16(v_sum_norm2, v_rhs, v_rhs);
    lhs += 4;
    rhs += 4;
  }

  float result = HorizontalAdd_FP16_NEON(v_sum);
  float norm1 = HorizontalAdd_FP16_NEON(v_sum_norm1);
  float norm2 = HorizontalAdd_FP16_NEON(v_sum_norm2);

  switch (last - lhs) {
    case 3:
      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);
      /* FALLTHRU */
    case 2:
      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);
      /* FALLTHRU */
    case 1:
      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}
#else
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp16NEON(const Float16 *lhs, const Float16 *rhs,
                                         size_t size, float *sql, float *sqr) {
  const Float16 *last = lhs + size;
  const Float16 *last_aligned = lhs + ((size >> 3) << 3);
  float32x4_t v_sum_0 = vdupq_n_f32(0);
  float32x4_t v_sum_1 = vdupq_n_f32(0);
  float32x4_t v_sum_norm1 = vdupq_n_f32(0);
  float32x4_t v_sum_norm2 = vdupq_n_f32(0);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    float16x8_t v_lhs = vld1q_f16((const float16_t *)lhs);
    float16x8_t v_rhs = vld1q_f16((const float16_t *)rhs);
    float32x4_t v_lhs_0 = vcvt_f32_f16(vget_low_f16(v_lhs));
    float32x4_t v_rhs_0 = vcvt_f32_f16(vget_low_f16(v_rhs));
    float32x4_t v_lhs_1 = vcvt_high_f32_f16(v_lhs);
    float32x4_t v_rhs_1 = vcvt_high_f32_f16(v_rhs);
    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);
    v_sum_1 = vfmaq_f32(v_sum_1, v_lhs_1, v_rhs_1);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_1, v_lhs_1);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_1, v_rhs_1);
  }
  if (last >= last_aligned + 4) {
    float32x4_t v_lhs_0 = vcvt_f32_f16(vld1_f16((const float16_t *)lhs));
    float32x4_t v_rhs_0 = vcvt_f32_f16(vld1_f16((const float16_t *)rhs));
    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);
    lhs += 4;
    rhs += 4;
  }

  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));
  float norm1 = vaddvq_f32(v_sum_norm1);
  float norm2 = vaddvq_f32(v_sum_norm2);
  switch (last - lhs) {
    case 3:
      FMA_FP16_GENERAL(lhs[2], rhs[2], result, norm1, norm2);
      /* FALLTHRU */
    case 2:
      FMA_FP16_GENERAL(lhs[1], rhs[1], result, norm1, norm2);
      /* FALLTHRU */
    case 1:
      FMA_FP16_GENERAL(lhs[0], rhs[0], result, norm1, norm2);
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

float MipsEuclideanDistanceSphericalInjectionFp16NEON(const Float16 *lhs,
                                                      const Float16 *rhs,
                                                      size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16NEON(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16NEON(
    const Float16 *lhs, const Float16 *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp16NEON(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}
#endif  // __ARM_NEON && __aarch64__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp32_avx.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE__)
float InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,
                                        size_t size, float *sql, float *sqr);
#endif

#if defined(__AVX__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp32AVX(const float *lhs, const float *rhs,
                                        size_t size, float *sql, float *sqr) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 4) << 4);

  __m256 ymm_sum_0 = _mm256_setzero_ps();
  __m256 ymm_sum_1 = _mm256_setzero_ps();
  __m256 ymm_sum_norm1 = _mm256_setzero_ps();
  __m256 ymm_sum_norm2 = _mm256_setzero_ps();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_lhs_0 = _mm256_load_ps(lhs + 0);
      __m256 ymm_lhs_1 = _mm256_load_ps(lhs + 8);
      __m256 ymm_rhs_0 = _mm256_load_ps(rhs + 0);
      __m256 ymm_rhs_1 = _mm256_load_ps(rhs + 8);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }

    if (last >= last_aligned + 8) {
      __m256 ymm_lhs_0 = _mm256_load_ps(lhs);
      __m256 ymm_rhs_0 = _mm256_load_ps(rhs);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      lhs += 8;
      rhs += 8;
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs + 0);
      __m256 ymm_lhs_1 = _mm256_loadu_ps(lhs + 8);
      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs + 0);
      __m256 ymm_rhs_1 = _mm256_loadu_ps(rhs + 8);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }

    if (last >= last_aligned + 8) {
      __m256 ymm_lhs_0 = _mm256_loadu_ps(lhs);
      __m256 ymm_rhs_0 = _mm256_loadu_ps(rhs);
      ymm_sum_0 = _mm256_fmadd_ps(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      ymm_sum_norm1 = _mm256_fmadd_ps(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_fmadd_ps(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      lhs += 8;
      rhs += 8;
    }
  }
  float result = HorizontalAdd_FP32_V256(_mm256_add_ps(ymm_sum_0, ymm_sum_1));
  float norm1 = HorizontalAdd_FP32_V256(ymm_sum_norm1);
  float norm2 = HorizontalAdd_FP32_V256(ymm_sum_norm2);

  switch (last - lhs) {
    case 7:
      FMA_FP32_GENERAL(lhs[6], rhs[6], result, norm1, norm2)
      /* FALLTHRU */
    case 6:
      FMA_FP32_GENERAL(lhs[5], rhs[5], result, norm1, norm2)
      /* FALLTHRU */
    case 5:
      FMA_FP32_GENERAL(lhs[4], rhs[4], result, norm1, norm2)
      /* FALLTHRU */
    case 4:
      FMA_FP32_GENERAL(lhs[3], rhs[3], result, norm1, norm2)
      /* FALLTHRU */
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionFp32AVX(const float *lhs,
                                                     const float *rhs,
                                                     size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  if (size > 7) {
    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);
  } else {
    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);
  }

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  if (size > 7) {
    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);
  } else {
    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);
  }

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}
#endif  // __AVX__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp32_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE__)
float InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,
                                        size_t size, float *sql, float *sqr);
#endif

#if defined(__AVX__)
float InnerProductAndSquaredNormFp32AVX(const float *lhs, const float *rhs,
                                        size_t size, float *sql, float *sqr);
#endif

#if defined(__AVX512F__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp32AVX512(const float *lhs, const float *rhs,
                                           size_t size, float *sql,
                                           float *sqr) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 5) << 5);

  __m512 zmm_sum_0 = _mm512_setzero_ps();
  __m512 zmm_sum_1 = _mm512_setzero_ps();
  __m512 zmm_sum_norm1 = _mm512_setzero_ps();
  __m512 zmm_sum_norm2 = _mm512_setzero_ps();

  if (((uintptr_t)lhs & 0x3f) == 0 && ((uintptr_t)rhs & 0x3f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512 zmm_lhs_0 = _mm512_load_ps(lhs + 0);
      __m512 zmm_lhs_1 = _mm512_load_ps(lhs + 16);
      __m512 zmm_rhs_0 = _mm512_load_ps(rhs + 0);
      __m512 zmm_rhs_1 = _mm512_load_ps(rhs + 16);
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)
    }

    if (last >= last_aligned + 16) {
      __m512 zmm_lhs_0 = _mm512_load_ps(lhs);
      __m512 zmm_rhs_0 = _mm512_load_ps(rhs);
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m512 zmm_lhs_0 = _mm512_loadu_ps(lhs + 0);
      __m512 zmm_lhs_1 = _mm512_loadu_ps(lhs + 16);
      __m512 zmm_rhs_0 = _mm512_loadu_ps(rhs + 0);
      __m512 zmm_rhs_1 = _mm512_loadu_ps(rhs + 16);
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_rhs_1, zmm_sum_1)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_lhs_1, zmm_lhs_1, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      FMA_FP32_AVX512(zmm_rhs_1, zmm_rhs_1, zmm_sum_norm2)
    }

    if (last >= last_aligned + 16) {
      __m512 zmm_lhs_0 = _mm512_loadu_ps(lhs);
      __m512 zmm_rhs_0 = _mm512_loadu_ps(rhs);
      FMA_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0)
      FMA_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1)
      FMA_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2)
      lhs += 16;
      rhs += 16;
    }
  }

  zmm_sum_0 = _mm512_add_ps(zmm_sum_0, zmm_sum_1);
  if (lhs != last) {
    __mmask16 mask = (__mmask16)((1 << (last - lhs)) - 1);
    __m512 zmm_undefined = _mm512_undefined_ps();
    __m512 zmm_lhs_0 = _mm512_mask_loadu_ps(zmm_undefined, mask, lhs);
    __m512 zmm_rhs_0 = _mm512_mask_loadu_ps(zmm_undefined, mask, rhs);
    FMA_MASK_FP32_AVX512(zmm_lhs_0, zmm_rhs_0, zmm_sum_0, mask);
    FMA_MASK_FP32_AVX512(zmm_lhs_0, zmm_lhs_0, zmm_sum_norm1, mask);
    FMA_MASK_FP32_AVX512(zmm_rhs_0, zmm_rhs_0, zmm_sum_norm2, mask);
  }

  *sql = HorizontalAdd_FP32_V512(zmm_sum_norm1);
  *sqr = HorizontalAdd_FP32_V512(zmm_sum_norm2);
  return HorizontalAdd_FP32_V512(zmm_sum_0);
}

float MipsEuclideanDistanceSphericalInjectionFp32AVX512(const float *lhs,
                                                        const float *rhs,
                                                        size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  if (size > 15) {
    sum = InnerProductAndSquaredNormFp32AVX512(lhs, rhs, size, &u2, &v2);
  } else if (size > 7) {
    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);
  } else {
    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);
  }

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  if (size > 15) {
    sum = InnerProductAndSquaredNormFp32AVX512(lhs, rhs, size, &u2, &v2);
  } else if (size > 7) {
    sum = InnerProductAndSquaredNormFp32AVX(lhs, rhs, size, &u2, &v2);
  } else {
    sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);
  }

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}
#endif  // __AVX512F__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp32_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
float InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,
                                         size_t size, float *sql, float *sqr);
#endif

#if defined(__AVX512F__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp32AVX512(const float *lhs,
                                                        const float *rhs,
                                                        size_t size, float e2);
#endif

#if defined(__AVX__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp32AVX(const float *lhs,
                                                     const float *rhs,
                                                     size_t size, float e2);
#endif

#if defined(__SSE__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp32SSE(const float *lhs,
                                                     const float *rhs,
                                                     size_t size, float e2);
#endif

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(
    const float *p, const float *q, size_t dim, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionFp32Scalar(const float *p,
                                                        const float *q,
                                                        size_t dim, float e2);

float MipsInnerProductSparseInSegment(uint32_t m_sparse_count,
                                      const uint16_t *m_sparse_index,
                                      const float *m_sparse_value,
                                      uint32_t q_sparse_count,
                                      const uint16_t *q_sparse_index,
                                      const float *q_sparse_value);

//! Compute the distance between matrix and query by SphericalInjection
void MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {
#if __ARM_NEON
  float u2{0.0f};
  float v2{0.0f};
  float sum = InnerProductAndSquaredNormFp32NEON(p, q, dim, &u2, &v2);

  *out = ComputeSphericalInjection(sum, u2, v2, e2);
  return;
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MipsEuclideanDistanceSphericalInjectionFp32AVX512(p, q, dim, e2);
    return;
  }
#endif  //__AVX512F__
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MipsEuclideanDistanceSphericalInjectionFp32AVX(p, q, dim, e2);
    return;
  }
#endif  // __AVX__
#if defined(__SSE__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {
    *out = MipsEuclideanDistanceSphericalInjectionFp32SSE(p, q, dim, e2);
    return;
  }
#endif  // __SSE__
  *out = MipsEuclideanDistanceSphericalInjectionFp32Scalar(p, q, dim, e2);
  return;
#endif  //__ARM_NEON
}

//! Compute the distance between matrix and query by RepeatedQuadraticInjection
void MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
    float *out) {
#if defined(__ARM_NEON)
  float u2{0.0f};
  float v2{0.0f};
  float sum = InnerProductAndSquaredNormFp32NEON(p, q, dim, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }
  *out = sum;
  return;
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX512(p, q, dim,
                                                                     m, e2);
    return;
  }
#endif  //__AVX512F__
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32AVX(p, q, dim, m,
                                                                  e2);
    return;
  }
#endif  // __AVX__

#if defined(__SSE__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(p, q, dim, m,
                                                                  e2);
    return;
  }
#endif  //__SSE__
  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(p, q, dim, m,
                                                                   e2);

  return;
#endif  //__ARM_NEON
}

// Sparse
#if defined(__SSE4_1__)
float MipsInnerProductSparseInSegmentSSE(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const float *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const float *q_sparse_value);
#endif

template <>
float MipsSquaredEuclideanSparseDistanceMatrix<float>::
    ComputeInnerProductSparseInSegment(uint32_t m_sparse_count,
                                       const uint16_t *m_sparse_index,
                                       const ValueType *m_sparse_value,
                                       uint32_t q_sparse_count,
                                       const uint16_t *q_sparse_index,
                                       const ValueType *q_sparse_value) {
#if defined(__SSE4_1__)
  return MipsInnerProductSparseInSegmentSSE(m_sparse_count, m_sparse_index,
                                            m_sparse_value, q_sparse_count,
                                            q_sparse_index, q_sparse_value);
#else
  return MipsInnerProductSparseInSegment(m_sparse_count, m_sparse_index,
                                         m_sparse_value, q_sparse_count,
                                         q_sparse_index, q_sparse_value);
#endif
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp32_neon.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__ARM_NEON)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp32NEON(const float *lhs, const float *rhs,
                                         size_t size, float *sql, float *sqr) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  float32x4_t v_sum_0 = vdupq_n_f32(0);
  float32x4_t v_sum_1 = vdupq_n_f32(0);
  float32x4_t v_sum_norm1 = vdupq_n_f32(0);
  float32x4_t v_sum_norm2 = vdupq_n_f32(0);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    float32x4_t v_lhs_0 = vld1q_f32(lhs + 0);
    float32x4_t v_lhs_1 = vld1q_f32(lhs + 4);
    float32x4_t v_rhs_0 = vld1q_f32(rhs + 0);
    float32x4_t v_rhs_1 = vld1q_f32(rhs + 4);
    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);
    v_sum_1 = vfmaq_f32(v_sum_1, v_lhs_1, v_rhs_1);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_1, v_lhs_1);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_1, v_rhs_1);
  }
  if (last >= last_aligned + 4) {
    float32x4_t v_lhs_0 = vld1q_f32(lhs);
    float32x4_t v_rhs_0 = vld1q_f32(rhs);
    v_sum_0 = vfmaq_f32(v_sum_0, v_lhs_0, v_rhs_0);
    v_sum_norm1 = vfmaq_f32(v_sum_norm1, v_lhs_0, v_lhs_0);
    v_sum_norm2 = vfmaq_f32(v_sum_norm2, v_rhs_0, v_rhs_0);
    lhs += 4;
    rhs += 4;
  }

  float result = vaddvq_f32(vaddq_f32(v_sum_0, v_sum_1));
  float norm1 = vaddvq_f32(v_sum_norm1);
  float norm2 = vaddvq_f32(v_sum_norm2);
  switch (last - lhs) {
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

#endif  //__ARM_NEON

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_fp32_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_fp32.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormFp32SSE(const float *lhs, const float *rhs,
                                        size_t size, float *sql, float *sqr) {
  const float *last = lhs + size;
  const float *last_aligned = lhs + ((size >> 3) << 3);

  __m128 xmm_sum = _mm_setzero_ps();
  __m128 xmm_sum_norm1 = _mm_setzero_ps();
  __m128 xmm_sum_norm2 = _mm_setzero_ps();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_lhs_0 = _mm_load_ps(lhs + 0);
      __m128 xmm_lhs_1 = _mm_load_ps(lhs + 4);
      __m128 xmm_rhs_0 = _mm_load_ps(rhs + 0);
      __m128 xmm_rhs_1 = _mm_load_ps(rhs + 4);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);
    }

    if (last >= last_aligned + 4) {
      __m128 xmm_lhs_0 = _mm_load_ps(lhs);
      __m128 xmm_rhs_0 = _mm_load_ps(rhs);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs + 0);
      __m128 xmm_lhs_1 = _mm_loadu_ps(lhs + 4);
      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs + 0);
      __m128 xmm_rhs_1 = _mm_loadu_ps(rhs + 4);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_1, xmm_rhs_1, xmm_sum);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);
    }

    if (last >= last_aligned + 4) {
      __m128 xmm_lhs_0 = _mm_loadu_ps(lhs);
      __m128 xmm_rhs_0 = _mm_loadu_ps(rhs);
      xmm_sum = _mm_fmadd_ps(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      xmm_sum_norm1 = _mm_fmadd_ps(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      xmm_sum_norm2 = _mm_fmadd_ps(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      lhs += 4;
      rhs += 4;
    }
  }
  float result = HorizontalAdd_FP32_V128(xmm_sum);
  float norm1 = HorizontalAdd_FP32_V128(xmm_sum_norm1);
  float norm2 = HorizontalAdd_FP32_V128(xmm_sum_norm2);

  switch (last - lhs) {
    case 3:
      FMA_FP32_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_FP32_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_FP32_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionFp32SSE(const float *lhs,
                                                     const float *rhs,
                                                     size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32SSE(
    const float *lhs, const float *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormFp32SSE(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

#endif  // __SSE__

// #if 1
#if defined(__SSE4_1__)
const static __m128i SHUFFLE_MASK16[16] = {
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, -127, -127, -127, -127),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 7, 6, 5, 4, 3,
                 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 11, 10, 9, 8,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, -127, -127,
                 -127, -127, 15, 14, 13, 12),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 3, 2, 1, 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 7, 6, 5, 4),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 7, 6, 5, 4, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, -127, -127, -127, -127, 15, 14, 13, 12,
                 11, 10, 9, 8),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 3, 2, 1,
                 0),
    _mm_set_epi8(-127, -127, -127, -127, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
                 4),
    _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
};

constexpr uint32_t MAX_SPARSE_BUFFER_LENGTH = 65536;

float MipsInnerProductSparseInSegmentSSE(uint32_t m_sparse_count,
                                         const uint16_t *m_sparse_index,
                                         const float *m_sparse_value,
                                         uint32_t q_sparse_count,
                                         const uint16_t *q_sparse_index,
                                         const float *q_sparse_value) {
  float sum = 0.0f;

  // size_t alloc_size = 0;

  size_t i1 = 0, i2 = 0;
  size_t end1 = m_sparse_count / 8 * 8;
  size_t end2 = q_sparse_count / 8 * 8;

  // std::vector<float> mem1;
  // std::vector<float> mem2;

  float fixed_buffer_1[MAX_SPARSE_BUFFER_LENGTH];
  float fixed_buffer_2[MAX_SPARSE_BUFFER_LENGTH];

  float *val_start_1 = fixed_buffer_1;
  float *val_start_2 = fixed_buffer_2;

  // uint32_t max_count = std::max(m_sparse_count, q_sparse_count);

  // if (MAX_SPARSE_BUFFER_LENGTH < max_count) {
  //   mem1.reserve(max_count);
  //   mem2.reserve(max_count);

  //   val_start_1 = mem1.data();
  //   val_start_2 = mem2.data();
  // }

  float *val_1 = val_start_1;
  float *val_2 = val_start_2;

  if (i1 < end1 && i2 < end2) {
    while (m_sparse_index[i1 + 7] < q_sparse_index[i2]) {
      i1 += 8;
      if (i1 >= end1) goto do_scalar;
    }

    while (q_sparse_index[i2 + 7] < m_sparse_index[i1]) {
      i2 += 8;
      if (i2 >= end2) goto do_scalar;
    }

    __m128i mm_index_m =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
    __m128i mm_index_q =
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));

    while (true) {
#ifdef DEBUG_PRINT
      std::cout << "index 1: " << std::endl;
      print_data16(&mm_index_m);

      std::cout << "index 2: " << std::endl;
      print_data16(&mm_index_q);
#endif

      __m128i mm_cmp_res =
          _mm_cmpistrm(mm_index_q, mm_index_m,
                       _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);

#ifdef DEBUG_PRINT
      std::cout << "cmp res: " << std::endl;
      print_data16(&mm_cmp_res);
#endif

      int r = _mm_extract_epi32(mm_cmp_res, 0);

      if (r) {
        int r1 = r & 15;

        __m128i v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1]));
        __m128 vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));

        _mm_storeu_ps(val_1, vs);
        val_1 += _mm_popcnt_u32(r1);

        int r2 = (r >> 4) & 15;
        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_value[i1 + 4]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));
        _mm_storeu_ps(val_1, vs);
        val_1 += _mm_popcnt_u32(r2);

        mm_cmp_res = _mm_cmpistrm(
            mm_index_m, mm_index_q,
            _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
        r = _mm_extract_epi32(mm_cmp_res, 0);

        r1 = r & 15;

        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r1]));
        _mm_storeu_ps(val_2, vs);
        val_2 += _mm_popcnt_u32(r1);

        r2 = (r >> 4) & 15;
        v = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_value[i2 + 4]));
        vs = _mm_castsi128_ps(_mm_shuffle_epi8(v, SHUFFLE_MASK16[r2]));
        _mm_storeu_ps(val_2, vs);
        val_2 += _mm_popcnt_u32(r2);
      }

      const uint16_t id1_max = m_sparse_index[i1 + 7];

      if (id1_max <= q_sparse_index[i2 + 7]) {
        i1 += 8;
        if (i1 >= end1) goto do_scalar;
        mm_index_m = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&m_sparse_index[i1]));
      }

      if (id1_max >= q_sparse_index[i2 + 7]) {
        i2 += 8;
        if (i2 >= end2) goto do_scalar;
        mm_index_q = _mm_loadu_si128(
            reinterpret_cast<const __m128i *>(&q_sparse_index[i2]));
      }
    }
  }

do_scalar:
  while (i1 < m_sparse_count && i2 < q_sparse_count) {
    if (m_sparse_index[i1] == q_sparse_index[i2]) {
      *val_1++ = m_sparse_value[i1];
      *val_2++ = q_sparse_value[i2];

      ++i1;
      ++i2;
    } else if (m_sparse_index[i1] < q_sparse_index[i2]) {
      ++i1;
    } else {
      ++i2;
    }
  }

  size_t res_num = val_1 - val_start_1;

  //  if (res_num != val_2 - val_start_2) {
  //   std::cerr << "size mismatch!" << std::endl;
  //  }

  size_t res_num4 = res_num / 4 * 4;

  if (res_num4) {
    __m128 sum128 = _mm_set1_ps(0);

    for (size_t k = 0; k < res_num4; k += 4) {
      sum128 = _mm_add_ps(sum128, _mm_mul_ps(_mm_loadu_ps(val_start_1 + k),
                                             _mm_loadu_ps(val_start_2 + k)));
    }

    float __attribute__((aligned(16))) tmp_res[4];
    _mm_store_ps(tmp_res, sum128);
    sum += (tmp_res[0] + tmp_res[1] + tmp_res[2] + tmp_res[3]);
  }

  for (size_t k = res_num4; k < res_num; ++k)
    sum += val_start_1[k] * val_start_2[k];

  return sum;
}
#else
float MipsInnerProductSparseInSegment(uint32_t m_sparse_count,
                                      const uint16_t *m_sparse_index,
                                      const float *m_sparse_value,
                                      uint32_t q_sparse_count,
                                      const uint16_t *q_sparse_index,
                                      const float *q_sparse_value) {
  float sum = 0.0f;

  size_t m_i = 0;
  size_t q_i = 0;
  while (m_i < m_sparse_count && q_i < q_sparse_count) {
    if (m_sparse_index[m_i] == q_sparse_index[q_i]) {
      sum += m_sparse_value[m_i] * q_sparse_value[q_i];

      ++m_i;
      ++q_i;
    } else if (m_sparse_index[m_i] < q_sparse_index[q_i]) {
      ++m_i;
    } else {
      ++q_i;
    }
  }

  return sum;
}
#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int4_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_mips_utility.i"
#include "inner_product_matrix.h"
#include "mips_euclidean_distance_matrix.h"
#include "norm_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormInt4AVX2(const uint8_t *lhs, const uint8_t *rhs,
                                         size_t size, float *sql, float *sqr) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 5) << 5);
  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();
  __m256i ymm_sum_norm1 = _mm256_setzero_si256();
  __m256i ymm_sum_norm2 = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)(rhs));
      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1, ymm_sum_norm1,
                        ymm_sum_norm2)
    }
    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      __m128i xmm_sum_norm1 = _mm_setzero_si128();
      __m128i xmm_sum_norm2 = _mm_setzero_si128();
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum), ymm_sum_0);
      ymm_sum_norm1 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm1), ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm2), ymm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)(lhs));
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)(rhs));
      FMA_INT4_ITER_AVX(ymm_lhs, ymm_rhs, ymm_sum_0, ymm_sum1, ymm_sum_norm1,
                        ymm_sum_norm2)
    }
    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      __m128i xmm_sum = _mm_setzero_si128();
      __m128i xmm_sum_norm1 = _mm_setzero_si128();
      __m128i xmm_sum_norm2 = _mm_setzero_si128();
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum), ymm_sum_0);
      ymm_sum_norm1 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm1), ymm_sum_norm1);
      ymm_sum_norm2 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(), xmm_sum_norm2), ymm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(
      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));
  float norm1 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm1));
  float norm2 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm2));

  switch (last - lhs) {
    case 15:
      FMA_INT4_GENERAL(lhs[14], rhs[14], result, norm1, norm2)
      /* FALLTHRU */
    case 14:
      FMA_INT4_GENERAL(lhs[13], rhs[13], result, norm1, norm2)
      /* FALLTHRU */
    case 13:
      FMA_INT4_GENERAL(lhs[12], rhs[12], result, norm1, norm2)
      /* FALLTHRU */
    case 12:
      FMA_INT4_GENERAL(lhs[11], rhs[11], result, norm1, norm2)
      /* FALLTHRU */
    case 11:
      FMA_INT4_GENERAL(lhs[10], rhs[10], result, norm1, norm2)
      /* FALLTHRU */
    case 10:
      FMA_INT4_GENERAL(lhs[9], rhs[9], result, norm1, norm2)
      /* FALLTHRU */
    case 9:
      FMA_INT4_GENERAL(lhs[8], rhs[8], result, norm1, norm2)
      /* FALLTHRU */
    case 8:
      FMA_INT4_GENERAL(lhs[7], rhs[7], result, norm1, norm2)
      /* FALLTHRU */
    case 7:
      FMA_INT4_GENERAL(lhs[6], rhs[6], result, norm1, norm2)
      /* FALLTHRU */
    case 6:
      FMA_INT4_GENERAL(lhs[5], rhs[5], result, norm1, norm2)
      /* FALLTHRU */
    case 5:
      FMA_INT4_GENERAL(lhs[4], rhs[4], result, norm1, norm2)
      /* FALLTHRU */
    case 4:
      FMA_INT4_GENERAL(lhs[3], rhs[3], result, norm1, norm2)
      /* FALLTHRU */
    case 3:
      FMA_INT4_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_INT4_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_INT4_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionInt4AVX2(const uint8_t *lhs,
                                                      const uint8_t *rhs,
                                                      size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt4AVX2(lhs, rhs, size >> 1, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(
    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt4AVX2(lhs, rhs, size >> 1, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}
#endif  // __AVX2__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int4_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "inner_product_matrix.h"
#include "mips_euclidean_distance_matrix.h"
#include "norm_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(
    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt4AVX2(const uint8_t *lhs,
                                                      const uint8_t *rhs,
                                                      size_t size, float e2);
#endif

#if defined(__SSE4_1__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(
    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt4SSE(const uint8_t *lhs,
                                                     const uint8_t *rhs,
                                                     size_t size, float e2);
#endif

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(
    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt4Scalar(const uint8_t *lhs,
                                                        const uint8_t *rhs,
                                                        size_t size, float e2);

//! Compute the distance between matrix and query by SphericalInjection
void MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MipsEuclideanDistanceSphericalInjectionInt4AVX2(p, q, dim, e2);
    return;
  }
#endif

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MipsEuclideanDistanceSphericalInjectionInt4SSE(p, q, dim, e2);
    return;
  }
#endif

  *out = MipsEuclideanDistanceSphericalInjectionInt4Scalar(p, q, dim, e2);
}

//! Compute the distance between matrix and query by RepeatedQuadraticInjection
void MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
    float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4AVX2(p, q, dim, m,
                                                                   e2);
    return;
  }
#endif

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(p, q, dim, m,
                                                                  e2);
    return;
  }
#endif

  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(p, q, dim, m,
                                                                   e2);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int4_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_mips_utility.i"
#include "inner_product_matrix.h"
#include "mips_euclidean_distance_matrix.h"
#include "norm_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE4_1__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormInt4SSE(const uint8_t *lhs, const uint8_t *rhs,
                                        size_t size, float *sql, float *sqr) {
  const uint8_t *last = lhs + size;
  const uint8_t *last_aligned = lhs + ((size >> 4) << 4);
  __m128i xmm_sum = _mm_setzero_si128();
  __m128i xmm_sum_norm1 = _mm_setzero_si128();
  __m128i xmm_sum_norm2 = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)(rhs));
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)
    }
  } else {
    for (; lhs != last_aligned; lhs += 16, rhs += 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)(lhs));
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)(rhs));
      FMA_INT4_ITER_SSE(xmm_lhs, xmm_rhs, xmm_sum, xmm_sum_norm1, xmm_sum_norm2)
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));
  float norm1 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm1));
  float norm2 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm2));

  switch (last - lhs) {
    case 15:
      FMA_INT4_GENERAL(lhs[14], rhs[14], result, norm1, norm2)
      /* FALLTHRU */
    case 14:
      FMA_INT4_GENERAL(lhs[13], rhs[13], result, norm1, norm2)
      /* FALLTHRU */
    case 13:
      FMA_INT4_GENERAL(lhs[12], rhs[12], result, norm1, norm2)
      /* FALLTHRU */
    case 12:
      FMA_INT4_GENERAL(lhs[11], rhs[11], result, norm1, norm2)
      /* FALLTHRU */
    case 11:
      FMA_INT4_GENERAL(lhs[10], rhs[10], result, norm1, norm2)
      /* FALLTHRU */
    case 10:
      FMA_INT4_GENERAL(lhs[9], rhs[9], result, norm1, norm2)
      /* FALLTHRU */
    case 9:
      FMA_INT4_GENERAL(lhs[8], rhs[8], result, norm1, norm2)
      /* FALLTHRU */
    case 8:
      FMA_INT4_GENERAL(lhs[7], rhs[7], result, norm1, norm2)
      /* FALLTHRU */
    case 7:
      FMA_INT4_GENERAL(lhs[6], rhs[6], result, norm1, norm2)
      /* FALLTHRU */
    case 6:
      FMA_INT4_GENERAL(lhs[5], rhs[5], result, norm1, norm2)
      /* FALLTHRU */
    case 5:
      FMA_INT4_GENERAL(lhs[4], rhs[4], result, norm1, norm2)
      /* FALLTHRU */
    case 4:
      FMA_INT4_GENERAL(lhs[3], rhs[3], result, norm1, norm2)
      /* FALLTHRU */
    case 3:
      FMA_INT4_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_INT4_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_INT4_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionInt4SSE(const uint8_t *lhs,
                                                     const uint8_t *rhs,
                                                     size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt4SSE(lhs, rhs, size >> 1, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4SSE(
    const uint8_t *lhs, const uint8_t *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt4SSE(lhs, rhs, size >> 1, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int8_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormInt8AVX2(const int8_t *lhs, const int8_t *rhs,
                                         size_t size, float *sql, float *sqr) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 6) << 6);

  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();
  __m256i ymm_sum_norm1 = _mm256_setzero_si256();
  __m256i ymm_sum_norm2 = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));
      FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);
      FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum_0);
      FMA_INT8_AVX(ymm_lhs, ymm_lhs, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_rhs, ymm_rhs, ymm_sum_norm2);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum_0);
      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_lhs, ymm_sum_norm1);
      FMA_INT8_AVX_SSE_HYBRID(xmm_rhs, xmm_rhs, ymm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));
      FMA_INT8_AVX(ymm_lhs_0, ymm_rhs_0, ymm_sum_0);
      FMA_INT8_AVX(ymm_lhs_1, ymm_rhs_1, ymm_sum_1);
      FMA_INT8_AVX(ymm_lhs_0, ymm_lhs_0, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_lhs_1, ymm_lhs_1, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_rhs_0, ymm_rhs_0, ymm_sum_norm2);
      FMA_INT8_AVX(ymm_rhs_1, ymm_rhs_1, ymm_sum_norm2);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);
      FMA_INT8_AVX(ymm_lhs, ymm_rhs, ymm_sum_0);
      FMA_INT8_AVX(ymm_lhs, ymm_lhs, ymm_sum_norm1);
      FMA_INT8_AVX(ymm_rhs, ymm_rhs, ymm_sum_norm2);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_rhs, ymm_sum_0);
      FMA_INT8_AVX_SSE_HYBRID(xmm_lhs, xmm_lhs, ymm_sum_norm1);
      FMA_INT8_AVX_SSE_HYBRID(xmm_rhs, xmm_rhs, ymm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(
      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));
  float norm1 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm1));
  float norm2 = static_cast<float>(HorizontalAdd_INT32_V256(ymm_sum_norm2));

  switch (last - lhs) {
    case 15:
      FMA_INT8_GENERAL(lhs[14], rhs[14], result, norm1, norm2)
      /* FALLTHRU */
    case 14:
      FMA_INT8_GENERAL(lhs[13], rhs[13], result, norm1, norm2)
      /* FALLTHRU */
    case 13:
      FMA_INT8_GENERAL(lhs[12], rhs[12], result, norm1, norm2)
      /* FALLTHRU */
    case 12:
      FMA_INT8_GENERAL(lhs[11], rhs[11], result, norm1, norm2)
      /* FALLTHRU */
    case 11:
      FMA_INT8_GENERAL(lhs[10], rhs[10], result, norm1, norm2)
      /* FALLTHRU */
    case 10:
      FMA_INT8_GENERAL(lhs[9], rhs[9], result, norm1, norm2)
      /* FALLTHRU */
    case 9:
      FMA_INT8_GENERAL(lhs[8], rhs[8], result, norm1, norm2)
      /* FALLTHRU */
    case 8:
      FMA_INT8_GENERAL(lhs[7], rhs[7], result, norm1, norm2)
      /* FALLTHRU */
    case 7:
      FMA_INT8_GENERAL(lhs[6], rhs[6], result, norm1, norm2)
      /* FALLTHRU */
    case 6:
      FMA_INT8_GENERAL(lhs[5], rhs[5], result, norm1, norm2)
      /* FALLTHRU */
    case 5:
      FMA_INT8_GENERAL(lhs[4], rhs[4], result, norm1, norm2)
      /* FALLTHRU */
    case 4:
      FMA_INT8_GENERAL(lhs[3], rhs[3], result, norm1, norm2)
      /* FALLTHRU */
    case 3:
      FMA_INT8_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_INT8_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_INT8_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionInt8AVX2(const int8_t *lhs,
                                                      const int8_t *rhs,
                                                      size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt8AVX2(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(
    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt8AVX2(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}
#endif  // __AVX2__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int8_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__AVX2__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(
    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt8AVX2(const int8_t *lhs,
                                                      const int8_t *rhs,
                                                      size_t size, float e2);
#endif

#if defined(__SSE4_1__)
float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(
    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt8SSE(const int8_t *lhs,
                                                     const int8_t *rhs,
                                                     size_t size, float e2);
#endif

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(
    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2);
float MipsEuclideanDistanceSphericalInjectionInt8Scalar(const int8_t *lhs,
                                                        const int8_t *rhs,
                                                        size_t size, float e2);

//! Compute the distance between matrix and query by SphericalInjection
void MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, float e2, float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MipsEuclideanDistanceSphericalInjectionInt8AVX2(p, q, dim, e2);
    return;
  }
#endif

#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MipsEuclideanDistanceSphericalInjectionInt8SSE(p, q, dim, e2);
    return;
  }
#endif  //__SSE4_1__

  *out = MipsEuclideanDistanceSphericalInjectionInt8Scalar(p, q, dim, e2);
}

//! Compute the distance between matrix and query by RepeatedQuadraticInjection
void MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
    const ValueType *p, const ValueType *q, size_t dim, size_t m, float e2,
    float *out) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8AVX2(p, q, dim, m,
                                                                   e2);
    return;
  }
#endif
#if defined(__SSE4_1__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.SSE4_1) {
    *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(p, q, dim, m,
                                                                  e2);
    return;
  }
#endif  //__SSE4_1__

  *out = MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(p, q, dim, m,
                                                                   e2);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_int8_sse.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "distance_matrix_accum_int8.i"
#include "distance_matrix_mips_utility.i"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {

#if defined(__SSE4_1__)
//! Compute the Inner Product between p and q, and each Squared L2-Norm value
float InnerProductAndSquaredNormInt8SSE(const int8_t *lhs, const int8_t *rhs,
                                        size_t size, float *sql, float *sqr) {
  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 5) << 5);

  __m128i xmm_sum = _mm_setzero_si128();
  __m128i xmm_sum_norm1 = _mm_setzero_si128();
  __m128i xmm_sum_norm2 = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_load_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_load_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_load_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_load_si128((const __m128i *)(rhs + 16));
      FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum);
      FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum);
      FMA_INT8_SSE(xmm_lhs, xmm_lhs, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs, xmm_rhs, xmm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 32, rhs += 32) {
      __m128i xmm_lhs_0 = _mm_loadu_si128((const __m128i *)(lhs + 0));
      __m128i xmm_lhs_1 = _mm_loadu_si128((const __m128i *)(lhs + 16));
      __m128i xmm_rhs_0 = _mm_loadu_si128((const __m128i *)(rhs + 0));
      __m128i xmm_rhs_1 = _mm_loadu_si128((const __m128i *)(rhs + 16));
      FMA_INT8_SSE(xmm_lhs_0, xmm_rhs_0, xmm_sum);
      FMA_INT8_SSE(xmm_lhs_0, xmm_lhs_0, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs_0, xmm_rhs_0, xmm_sum_norm2);
      FMA_INT8_SSE(xmm_lhs_1, xmm_rhs_1, xmm_sum);
      FMA_INT8_SSE(xmm_lhs_1, xmm_lhs_1, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs_1, xmm_rhs_1, xmm_sum_norm2);
    }

    if (last >= last_aligned + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      FMA_INT8_SSE(xmm_lhs, xmm_rhs, xmm_sum);
      FMA_INT8_SSE(xmm_lhs, xmm_lhs, xmm_sum_norm1);
      FMA_INT8_SSE(xmm_rhs, xmm_rhs, xmm_sum_norm2);
      lhs += 16;
      rhs += 16;
    }
  }
  float result = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum));
  float norm1 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm1));
  float norm2 = static_cast<float>(HorizontalAdd_INT32_V128(xmm_sum_norm2));

  switch (last - lhs) {
    case 15:
      FMA_INT8_GENERAL(lhs[14], rhs[14], result, norm1, norm2)
      /* FALLTHRU */
    case 14:
      FMA_INT8_GENERAL(lhs[13], rhs[13], result, norm1, norm2)
      /* FALLTHRU */
    case 13:
      FMA_INT8_GENERAL(lhs[12], rhs[12], result, norm1, norm2)
      /* FALLTHRU */
    case 12:
      FMA_INT8_GENERAL(lhs[11], rhs[11], result, norm1, norm2)
      /* FALLTHRU */
    case 11:
      FMA_INT8_GENERAL(lhs[10], rhs[10], result, norm1, norm2)
      /* FALLTHRU */
    case 10:
      FMA_INT8_GENERAL(lhs[9], rhs[9], result, norm1, norm2)
      /* FALLTHRU */
    case 9:
      FMA_INT8_GENERAL(lhs[8], rhs[8], result, norm1, norm2)
      /* FALLTHRU */
    case 8:
      FMA_INT8_GENERAL(lhs[7], rhs[7], result, norm1, norm2)
      /* FALLTHRU */
    case 7:
      FMA_INT8_GENERAL(lhs[6], rhs[6], result, norm1, norm2)
      /* FALLTHRU */
    case 6:
      FMA_INT8_GENERAL(lhs[5], rhs[5], result, norm1, norm2)
      /* FALLTHRU */
    case 5:
      FMA_INT8_GENERAL(lhs[4], rhs[4], result, norm1, norm2)
      /* FALLTHRU */
    case 4:
      FMA_INT8_GENERAL(lhs[3], rhs[3], result, norm1, norm2)
      /* FALLTHRU */
    case 3:
      FMA_INT8_GENERAL(lhs[2], rhs[2], result, norm1, norm2)
      /* FALLTHRU */
    case 2:
      FMA_INT8_GENERAL(lhs[1], rhs[1], result, norm1, norm2)
      /* FALLTHRU */
    case 1:
      FMA_INT8_GENERAL(lhs[0], rhs[0], result, norm1, norm2)
  }
  *sql = norm1;
  *sqr = norm2;
  return result;
}

float MipsEuclideanDistanceSphericalInjectionInt8SSE(const int8_t *lhs,
                                                     const int8_t *rhs,
                                                     size_t size, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt8SSE(lhs, rhs, size, &u2, &v2);

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8SSE(
    const int8_t *lhs, const int8_t *rhs, size_t size, size_t m, float e2) {
  float u2{0.0f};
  float v2{0.0f};
  float sum{0.0f};

  sum = InnerProductAndSquaredNormInt8SSE(lhs, rhs, size, &u2, &v2);

  sum = e2 * (u2 + v2 - 2 * sum);
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

#endif  // __SSE4_1__

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/mips_euclidean_distance_matrix_scalar.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "distance_utility.h"
#include "mips_euclidean_distance_matrix.h"

namespace zvec {
namespace ailego {
//--------------------------------------------------
// Dense
//--------------------------------------------------
// Compute the distance between matrix and query by SphericalInjection
template <typename T>
inline float MipsEuclideanDistanceSphericalInjectionScalar(const T *p,
                                                           const T *q,
                                                           size_t dim,
                                                           float e2) {
  ailego_assert(p && q && dim);

  float sum = 0.0;
  float u2 = 0.0;
  float v2 = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    u2 += p[i] * p[i];
    v2 += q[i] * q[i];
    sum += static_cast<float>(p[i] * q[i]);
  }

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

// Compute the distance between matrix and query by RepeatedQuadraticInjection
template <typename T>
inline float MipsEuclideanDistanceRepeatedQuadraticInjectionScalar(
    const T *p, const T *q, size_t dim, size_t m, float e2) {
  ailego_assert(p && q && dim);

  float sum = 0.0;
  float u2 = 0.0;
  float v2 = 0.0;
  for (size_t i = 0; i < dim; ++i) {
    u2 += p[i] * p[i];
    v2 += q[i] * q[i];
    sum += MathHelper::SquaredDifference(p[i], q[i]);
  }

  sum *= e2;
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

/*! Mips Squared Euclidean Distance Matrix (INT4, M=1, N=1)
 */
//! Calculate sum of squared values
static inline float Squared(uint8_t v) {
  return static_cast<float>(((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +
                            ((int8_t)(v & 0xf0) >> 4) *
                                ((int8_t)(v & 0xf0) >> 4));
}

// Compute the distance between matrix and query by SphericalInjection
float MipsEuclideanDistanceSphericalInjectionInt4Scalar(const uint8_t *p,
                                                        const uint8_t *q,
                                                        size_t dim, float e2) {
  ailego_assert(p && q && dim && !(dim & 1));

  float sum = 0.0;
  float u2 = 0.0;
  float v2 = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    const uint8_t p_val = p[i];
    const uint8_t q_val = q[i];
    u2 += Squared(p_val);
    v2 += Squared(q_val);
    sum += Int4MulTable[((p_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4MulTable[((p_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }

  return ComputeSphericalInjection(sum, u2, v2, e2);
}

// Compute the distance between matrix and query by RepeatedQuadraticInjection
float MipsEuclideanDistanceRepeatedQuadraticInjectionInt4Scalar(
    const uint8_t *p, const uint8_t *q, size_t dim, size_t m, float e2) {
  ailego_assert(p && q && dim && !(dim & 1));

  float sum = 0.0;
  float u2 = 0.0;
  float v2 = 0.0;
  for (size_t i = 0; i < (dim >> 1); ++i) {
    const uint8_t p_val = p[i];
    const uint8_t q_val = q[i];
    u2 += Squared(p_val);
    v2 += Squared(q_val);
    sum += Int4SquaredDiffTable[((p_val << 4) & 0xf0) | ((q_val >> 0) & 0xf)] +
           Int4SquaredDiffTable[((p_val >> 0) & 0xf0) | ((q_val >> 4) & 0xf)];
  }
  sum *= e2;
  u2 *= e2;
  v2 *= e2;
  for (size_t i = 0; i < m; ++i) {
    sum += (u2 - v2) * (u2 - v2);
    u2 = u2 * u2;
    v2 = v2 * v2;
  }

  return sum;
}

float MipsEuclideanDistanceSphericalInjectionInt8Scalar(const int8_t *p,
                                                        const int8_t *q,
                                                        size_t dim, float e2) {
  return MipsEuclideanDistanceSphericalInjectionScalar<int8_t>(p, q, dim, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionInt8Scalar(
    const int8_t *p, const int8_t *q, size_t dim, size_t m, float e2) {
  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<int8_t>(
      p, q, dim, m, e2);
}

float MipsEuclideanDistanceSphericalInjectionFp16Scalar(
    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, float e2) {
  return MipsEuclideanDistanceSphericalInjectionScalar<ailego::Float16>(
      p, q, dim, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp16Scalar(
    const ailego::Float16 *p, const ailego::Float16 *q, size_t dim, size_t m,
    float e2) {
  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<ailego::Float16>(
      p, q, dim, m, e2);
}

float MipsEuclideanDistanceSphericalInjectionFp32Scalar(const float *p,
                                                        const float *q,
                                                        size_t dim, float e2) {
  return MipsEuclideanDistanceSphericalInjectionScalar<float>(p, q, dim, e2);
}

float MipsEuclideanDistanceRepeatedQuadraticInjectionFp32Scalar(
    const float *p, const float *q, size_t dim, size_t m, float e2) {
  return MipsEuclideanDistanceRepeatedQuadraticInjectionScalar<float>(p, q, dim,
                                                                      m, e2);
}


}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm1_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! L1-Norm Matrix
 */
template <typename T, size_t M, typename = void>
struct Norm1Matrix;

/*! L1-Norm Matrix
 */
template <typename T, size_t M>
struct Norm1Matrix<T, M,
                   typename std::enable_if<IsSignedArithmetic<T>::value &&
                                           sizeof(T) >= 2 && M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim * M;
    if (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = MathHelper::Absolute(m[i]);
      }
      m += M;
    }
    while (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += MathHelper::Absolute(m[i]);
      }
      m += M;
    }
  }
};

/*! L1-Norm Matrix (INT8)
 */
template <size_t M>
struct Norm1Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *m_end = m_it + (dim >> 2) * M;

    if (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = Absolute(m_it[i]);
      }
      m_it += M;
    }
    while (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += Absolute(m_it[i]);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate sum of absolute values
  static inline float Absolute(uint32_t v) {
    return static_cast<float>(
        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 0)) +
        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 8)) +
        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 16)) +
        MathHelper::Absolute<int8_t, int32_t>((int8_t)(v >> 24)));
  }
};

/*! L1-Norm Matrix (M=1)
 */
template <typename T>
struct Norm1Matrix<
    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim;
    if (m != m_end) {
      *out = MathHelper::Absolute(*m++);
    }
    while (m != m_end) {
      *out += MathHelper::Absolute(*m++);
    }
  }
};

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
/*! L1-Norm Matrix (FP32, M=1)
 */
template <>
struct Norm1Matrix<float, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the L1-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
/*! L1-Norm Matrix (FP16, M=1)
 */
template <>
struct Norm1Matrix<Float16, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the L1-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm1_matrix_fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "ailego/internal/cpu_features.h"
#include "norm1_matrix.h"
#include "norm_matrix_fp16.i"

namespace zvec {
namespace ailego {

#define NORM_FP32_STEP_GENERAL SA_FP32_GENERAL
#define NORM_FP32_STEP_SSE SA_FP32_SSE
#define NORM_FP32_STEP_AVX SA_FP32_AVX
#define NORM_FP32_STEP_AVX512 SA_FP32_AVX512
#define NORM_FP32_STEP_NEON SA_FP32_NEON
#define NORM_FP16_STEP_GENERAL SA_FP16_GENERAL
#define NORM_FP16_STEP_NEON SA_FP16_NEON

#if defined(__SSE__)
static const __m128 ABS_MASK_FP32_SSE =
    _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu));
#endif  // __SSE__

#if defined(__AVX__)
static const __m256 ABS_MASK_FP32_AVX =
    _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffu));
#endif  // __AVX__

#if defined(__AVX512F__)
static const __m512 ABS_MASK_FP32_AVX512 =
    _mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffffu));
#endif  // __AVX512F__

//! Calculate sum of absolute (GENERAL)
#define SA_FP32_GENERAL(m, sum) sum += FastAbs(m);

//! Calculate sum of absolute (SSE)
#define SA_FP32_SSE(xmm_m, xmm_sum) \
  xmm_sum = _mm_add_ps(_mm_and_ps(xmm_m, ABS_MASK_FP32_SSE), xmm_sum);

//! Calculate sum of absolute (AVX)
#define SA_FP32_AVX(ymm_m, ymm_sum) \
  ymm_sum = _mm256_add_ps(_mm256_and_ps(ymm_m, ABS_MASK_FP32_AVX), ymm_sum);

//! Calculate sum of absolute (AVX512)
#define SA_FP32_AVX512(zmm_m, zmm_sum) \
  zmm_sum = _mm512_add_ps(_mm512_and_ps(zmm_m, ABS_MASK_FP32_AVX512), zmm_sum);

//! Calculate sum of absolute (NEON)
#define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum);

//! Calculate sum of absolute (GENERAL)
#define SA_FP16_GENERAL(m, sum) sum += Float16::Absolute(m);

//! Calculate sum of absolute (NEON)
#define SA_FP16_NEON(v_m, v_sum) v_sum = vaddq_f16(vabsq_f16(v_m), v_sum);

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the L1-norm of vectors (FP16, M=1)
void Norm1Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                      float *out) {
#if defined(__ARM_NEON)
  NORM_FP16_1_NEON(m, dim, out, )
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP16_1_AVX512(m, dim, out, )
    return;
  }
#endif
  NORM_FP16_1_AVX(m, dim, out, )
#endif
}
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm1_matrix_fp32.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "ailego/internal/cpu_features.h"
#include "norm1_matrix.h"
#include "norm_matrix_fp32.i"

namespace zvec {
namespace ailego {

#define NORM_FP32_STEP_GENERAL SA_FP32_GENERAL
#define NORM_FP32_STEP_SSE SA_FP32_SSE
#define NORM_FP32_STEP_AVX SA_FP32_AVX
#define NORM_FP32_STEP_AVX512 SA_FP32_AVX512
#define NORM_FP32_STEP_NEON SA_FP32_NEON

#if defined(__SSE__)
#define ABS_MASK_FP32_SSE _mm_castsi128_ps(_mm_set1_epi32(0x7fffffffu))
#endif  // __SSE__

#if defined(__AVX__)
#define ABS_MASK_FP32_AVX _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffffu))
#endif  // __AVX__

#if defined(__AVX512F__)
#define ABS_MASK_FP32_AVX512 _mm512_castsi512_ps(_mm512_set1_epi32(0x7fffffffu))
#endif  // __AVX512F__

//! Calculate sum of absolute (GENERAL)
#define SA_FP32_GENERAL(m, sum) sum += FastAbs(m);

//! Calculate sum of absolute (SSE)
#define SA_FP32_SSE(xmm_m, xmm_sum) \
  xmm_sum = _mm_add_ps(_mm_and_ps(xmm_m, ABS_MASK_FP32_SSE), xmm_sum);

//! Calculate sum of absolute (AVX)
#define SA_FP32_AVX(ymm_m, ymm_sum) \
  ymm_sum = _mm256_add_ps(_mm256_and_ps(ymm_m, ABS_MASK_FP32_AVX), ymm_sum);

//! Calculate sum of absolute (AVX512)
#define SA_FP32_AVX512(zmm_m, zmm_sum) \
  zmm_sum = _mm512_add_ps(_mm512_and_ps(zmm_m, ABS_MASK_FP32_AVX512), zmm_sum);

//! Calculate sum of absolute (NEON)
#define SA_FP32_NEON(v_m, v_sum) v_sum = vaddq_f32(vabsq_f32(v_m), v_sum);

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the L1-norm of vectors (FP32, M=1)
void Norm1Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                    float *out) {
#if defined(__ARM_NEON)
  NORM_FP32_1_NEON(m, dim, out, )
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP32_1_AVX512(m, dim, out, )
    return;
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    NORM_FP32_1_AVX(m, dim, out, )
    return;
  }
#endif
  NORM_FP32_1_SSE(m, dim, out, )
#endif
}
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm2_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cmath>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! L2-Norm Matrix
 */
template <typename T, size_t M, typename = void>
struct Norm2Matrix;

/*! L2-Norm Matrix
 */
template <typename T, size_t M>
struct Norm2Matrix<T, M,
                   typename std::enable_if<IsSignedArithmetic<T>::value &&
                                           sizeof(T) >= 2 && M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim * M;
    if (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        ValueType v = m[i];
        *(out + i) = static_cast<float>(v * v);
      }
      m += M;
    }
    while (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        ValueType v = m[i];
        *(out + i) += static_cast<float>(v * v);
      }
      m += M;
    }
    for (size_t i = 0; i < M; ++i) {
      float v = *out;
      *out++ = std::sqrt(v);
    }
  }
};

/*! L2-Norm Matrix (INT8)
 */
template <size_t M>
struct Norm2Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *m_end = m_it + (dim >> 2) * M;

    if (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = Squared(m_it[i]);
      }
      m_it += M;
    }
    while (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += Squared(m_it[i]);
      }
      m_it += M;
    }
    for (size_t i = 0; i < M; ++i) {
      float v = *out;
      *out++ = std::sqrt(v);
    }
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint32_t v) {
    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +
                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +
                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +
                              (int8_t)(v >> 24) * (int8_t)(v >> 24));
  }
};

/*! L2-Norm Matrix (M=1)
 */
template <typename T>
struct Norm2Matrix<
    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim;
    if (m != m_end) {
      ValueType v = *m++;
      *out = static_cast<float>(v * v);
    }
    while (m != m_end) {
      ValueType v = *m++;
      *out += static_cast<float>(v * v);
    }
    *out = std::sqrt(*out);
  }
};

/*! L2-Norm Matrix (M=1, INT4)
 */
template <>
struct Norm2Matrix<uint8_t, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && !(dim & 1) && dim && out);

    const uint8_t *m_end = m + (dim >> 1);
    float square = 0.0f;
    while (m != m_end) {
      square += Squared(*m++);
    }
    *out = std::sqrt(square);
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint8_t v) {
    return static_cast<float>(
        ((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +
        ((int8_t)(v & 0xf0) >> 4) * ((int8_t)(v & 0xf0) >> 4));
  }
};

/*! L2-Norm Matrix (INT4)
 */
template <size_t M>
struct Norm2Matrix<uint8_t, M, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *m_end = m_it + (dim >> 3) * M;

    if (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = Squared(m_it[i]);
      }
      m_it += M;
    }
    while (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += Squared(m_it[i]);
      }
      m_it += M;
    }
    for (size_t i = 0; i < M; ++i) {
      float v = *out;
      *out++ = std::sqrt(v);
    }
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint32_t u) {
    float sum = 0.0f;
    for (size_t i = 0; i < 32; i += 8) {
      uint8_t v = (uint8_t)(u >> i);
      int8_t lo = (int8_t)(v << 4) >> 4;
      int8_t hi = (int8_t)(v & 0xf0) >> 4;
      sum += hi * hi + lo * lo;
    }
    return sum;
  }
};

/*! Squared L2-Norm Matrix
 */
template <typename T, size_t M, typename = void>
struct SquaredNorm2Matrix;

/*! Squared L2-Norm Matrix
 */
template <typename T, size_t M>
struct SquaredNorm2Matrix<
    T, M,
    typename std::enable_if<IsSignedArithmetic<T>::value && sizeof(T) >= 2 &&
                            M >= 2>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim * M;
    if (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        ValueType v = m[i];
        *(out + i) = static_cast<float>(v * v);
      }
      m += M;
    }
    while (m != m_end) {
      for (size_t i = 0; i < M; ++i) {
        ValueType v = m[i];
        *(out + i) += static_cast<float>(v * v);
      }
      m += M;
    }
  }
};

/*! Squared L2-Norm Matrix (INT8)
 */
template <size_t M>
struct SquaredNorm2Matrix<int8_t, M, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = int8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && !(dim & 3) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *m_end = m_it + (dim >> 2) * M;

    if (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = Squared(m_it[i]);
      }
      m_it += M;
    }
    while (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += Squared(m_it[i]);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint32_t v) {
    return static_cast<float>((int8_t)(v >> 0) * (int8_t)(v >> 0) +
                              (int8_t)(v >> 8) * (int8_t)(v >> 8) +
                              (int8_t)(v >> 16) * (int8_t)(v >> 16) +
                              (int8_t)(v >> 24) * (int8_t)(v >> 24));
  }
};

/*! Squared L2-Norm Matrix (M=1)
 */
template <typename T>
struct SquaredNorm2Matrix<
    T, 1, typename std::enable_if<IsSignedArithmetic<T>::value>::type> {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && out);

    const ValueType *m_end = m + dim;
    if (m != m_end) {
      ValueType v = *m++;
      *out = static_cast<float>(v * v);
    }
    while (m != m_end) {
      ValueType v = *m++;
      *out += static_cast<float>(v * v);
    }
  }
};

/*! L2-Norm Matrix (M=1, INT4)
 */
template <>
struct SquaredNorm2Matrix<uint8_t, 1> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && !(dim & 1) && out);

    const uint8_t *m_end = m + (dim >> 1);
    *out = 0.0f;
    while (m != m_end) {
      *out += Squared(*m++);
    }
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint8_t v) {
    return static_cast<float>(
        ((int8_t)(v << 4) >> 4) * ((int8_t)(v << 4) >> 4) +
        ((int8_t)(v & 0xf0) >> 4) * ((int8_t)(v & 0xf0) >> 4));
  }
};

/*! Squared L2-Norm Matrix (INT4)
 */
template <size_t M>
struct SquaredNorm2Matrix<uint8_t, M, typename std::enable_if<M >= 2>::type> {
  //! Type of value
  using ValueType = uint8_t;

  //! Compute the norm of vectors
  static inline void Compute(const ValueType *m, size_t dim, float *out) {
    ailego_assert(m && dim && !(dim & 7) && out);

    const uint32_t *m_it = reinterpret_cast<const uint32_t *>(m);
    const uint32_t *m_end = m_it + (dim >> 3) * M;

    if (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) = Squared(m_it[i]);
      }
      m_it += M;
    }
    while (m_it != m_end) {
      for (size_t i = 0; i < M; ++i) {
        *(out + i) += Squared(m_it[i]);
      }
      m_it += M;
    }
  }

 protected:
  //! Calculate sum of squared values
  static inline float Squared(uint32_t u) {
    float sum = 0.0f;
    for (size_t i = 0; i < 32; i += 8) {
      uint8_t v = (uint8_t)(u >> i);
      int8_t lo = (int8_t)(v << 4) >> 4;
      int8_t hi = (int8_t)(v & 0xf0) >> 4;
      sum += hi * hi + lo * lo;
    }
    return sum;
  }
};

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
/*! L2-Norm Matrix (FP32, M=1)
 */
template <>
struct Norm2Matrix<float, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the L2-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};

/*! Squared L2-Norm Matrix (FP32, M=1)
 */
template <>
struct SquaredNorm2Matrix<float, 1> {
  //! Type of value
  using ValueType = float;

  //! Compute the squared L2-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
/*! L2-Norm Matrix (FP16, M=1)
 */
template <>
struct Norm2Matrix<Float16, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the L2-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};

/*! Squared L2-Norm Matrix (FP16, M=1)
 */
template <>
struct SquaredNorm2Matrix<Float16, 1> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the squared L2-norm of vectors
  static void Compute(const ValueType *m, size_t dim, float *out);
};
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm2_matrix_fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>
#include "ailego/internal/cpu_features.h"
#include "norm2_matrix.h"
#include "norm_matrix_fp16.i"

namespace zvec {
namespace ailego {

#define NORM_FP32_STEP_GENERAL SS_FP32_GENERAL
#define NORM_FP32_STEP_SSE SS_FP32_SSE
#define NORM_FP32_STEP_AVX SS_FP32_AVX
#define NORM_FP32_STEP_AVX512 SS_FP32_AVX512
#define NORM_FP32_STEP_NEON SS_FP32_NEON
#define NORM_FP16_STEP_GENERAL SS_FP16_GENERAL
#define NORM_FP16_STEP_NEON SS_FP16_NEON

//! Calculate sum of squared (GENERAL)
#define SS_FP32_GENERAL(m, sum) sum += (m) * (m);

//! Calculate sum of squared (SSE)
#define SS_FP32_SSE(xmm_m, xmm_sum) \
  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_m, xmm_sum);

//! Calculate sum of squared (AVX)
#define SS_FP32_AVX(ymm_m, ymm_sum) \
  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_m, ymm_sum);

//! Calculate sum of squared (AVX512)
#define SS_FP32_AVX512(zmm_m, zmm_sum) \
  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_m, zmm_sum);

//! Calculate sum of squared (NEON)
#define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m);

//! Calculate sum of squared (GENERAL)
#define SS_FP16_GENERAL(m, sum) sum += (m) * (m);

//! Calculate sum of squared (NEON)
#define SS_FP16_NEON(v_m, v_sum) v_sum = vfmaq_f16(v_sum, v_m, v_m);

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the L2-norm of vectors (FP16, M=1)
void Norm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                      float *out) {
#if defined(__ARM_NEON)
  NORM_FP16_1_NEON(m, dim, out, std::sqrt)
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP16_1_AVX512(m, dim, out, std::sqrt)
    return;
  }
#endif
  NORM_FP16_1_AVX(m, dim, out, std::sqrt)
#endif
}

//! Compute the L2-norm of vectors (FP16, M=1)
void SquaredNorm2Matrix<Float16, 1>::Compute(const ValueType *m, size_t dim,
                                             float *out) {
#if defined(__ARM_NEON)
  NORM_FP16_1_NEON(m, dim, out, )
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP16_1_AVX512(m, dim, out, )
    return;
  }
#endif
  NORM_FP16_1_AVX(m, dim, out, )
#endif
}
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/norm2_matrix_fp32.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include "norm2_matrix.h"
#include "norm_matrix_fp32.i"

namespace zvec {
namespace ailego {

#define NORM_FP32_STEP_GENERAL SS_FP32_GENERAL
#define NORM_FP32_STEP_SSE SS_FP32_SSE
#define NORM_FP32_STEP_AVX SS_FP32_AVX
#define NORM_FP32_STEP_AVX512 SS_FP32_AVX512
#define NORM_FP32_STEP_NEON SS_FP32_NEON

//! Calculate sum of squared (GENERAL)
#define SS_FP32_GENERAL(m, sum) sum += (m) * (m);

//! Calculate sum of squared (SSE)
#define SS_FP32_SSE(xmm_m, xmm_sum) \
  xmm_sum = _mm_fmadd_ps(xmm_m, xmm_m, xmm_sum);

//! Calculate sum of squared (AVX)
#define SS_FP32_AVX(ymm_m, ymm_sum) \
  ymm_sum = _mm256_fmadd_ps(ymm_m, ymm_m, ymm_sum);

//! Calculate sum of squared (AVX512)
#define SS_FP32_AVX512(zmm_m, zmm_sum) \
  zmm_sum = _mm512_fmadd_ps(zmm_m, zmm_m, zmm_sum);

//! Calculate sum of squared (NEON)
#define SS_FP32_NEON(v_m, v_sum) v_sum = vfmaq_f32(v_sum, v_m, v_m);

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the L2-norm of vectors (FP32, M=1)
void Norm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                    float *out) {
#if defined(__ARM_NEON)
  NORM_FP32_1_NEON(m, dim, out, std::sqrt)
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP32_1_AVX512(m, dim, out, std::sqrt)
    return;
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    NORM_FP32_1_AVX(m, dim, out, std::sqrt)
    return;
  }
#endif
  NORM_FP32_1_SSE(m, dim, out, std::sqrt)
#endif
}

//! Compute the squared L2-norm of vectors (FP32, M=1)
void SquaredNorm2Matrix<float, 1>::Compute(const ValueType *m, size_t dim,
                                           float *out) {
#if defined(__ARM_NEON)
  NORM_FP32_1_NEON(m, dim, out, )
#else
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    NORM_FP32_1_AVX512(m, dim, out, )
    return;
  }
#endif
#if defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    NORM_FP32_1_AVX(m, dim, out, )
    return;
  }
#endif
  NORM_FP32_1_SSE(m, dim, out, )
#endif
}
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math/norm_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "norm1_matrix.h"
#include "norm2_matrix.h"


================================================
FILE: src/ailego/math/norm_matrix_fp16.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "matrix_define.i"
#include "matrix_utility.i"

#if !defined(__FMA__)
#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))
#endif  // !__FMA__

//! Mask process of computing norm (FP16)
#define NORM_FP16_MASK_AVX(m, cnt, _RES)                                       \
  switch (cnt) {                                                               \
    case 7: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(                                          \
          _mm_set_epi16(0, *((const short *)(m) + 6),                          \
                        *((const short *)(m) + 5), *((const short *)(m) + 4),  \
                        *((const short *)(m) + 3), *((const short *)(m) + 2),  \
                        *((const short *)(m) + 1), *((const short *)(m))));    \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 6: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_set_epi32(0, *((const int *)(m) + 2), \
                                                   *((const int *)(m) + 1),    \
                                                   *((const int *)(m))));      \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 5: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(                                          \
          _mm_set_epi16(0, 0, 0, *((const short *)(m) + 4),                    \
                        *((const short *)(m) + 3), *((const short *)(m) + 2),  \
                        *((const short *)(m) + 1), *((const short *)(m))));    \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 4: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(                                          \
          _mm_set_epi64((__m64)(0ull), *((const __m64 *)(m))));                \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 3: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(                                          \
          _mm_set_epi16(0, 0, 0, 0, 0, *((const short *)(m) + 2),              \
                        *((const short *)(m) + 1), *((const short *)(m))));    \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 2: {                                                                  \
      __m256 ymm_m =                                                           \
          _mm256_cvtph_ps(_mm_set_epi32(0, 0, 0, *((const int *)(m))));        \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
    case 1: {                                                                  \
      __m256 ymm_m = _mm256_cvtph_ps(                                          \
          _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, *((const short *)(m))));          \
      NORM_FP32_STEP_AVX(ymm_m, _RES##_0_0)                                    \
      break;                                                                   \
    }                                                                          \
  }

//! Compute the norm of vectors (FP16, M=1)
#define NORM_FP16_1_AVX(m, dim, out, _NORM)                                  \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())                \
  const Float16 *last = m + dim;                                             \
  const Float16 *last_aligned = m + ((dim >> 4) << 4);                       \
  if (((uintptr_t)m & 0x1f) == 0) {                                          \
    for (; m != last_aligned; m += 16) {                                     \
      __m256i ymm_mi = _mm256_load_si256((const __m256i *)m);                \
      __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
      __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                               \
      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                               \
    }                                                                        \
    if (last >= last_aligned + 8) {                                          \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_load_si128((const __m128i *)m));    \
      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                 \
      m += 8;                                                                \
    }                                                                        \
  } else {                                                                   \
    for (; m != last_aligned; m += 16) {                                     \
      __m256i ymm_mi = _mm256_loadu_si256((const __m256i *)m);               \
      __m256 ymm_m_0 = _mm256_cvtph_ps(_mm256_castsi256_si128(ymm_mi));      \
      __m256 ymm_m_1 = _mm256_cvtph_ps(_mm256_extractf128_si256(ymm_mi, 1)); \
      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                               \
      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                               \
    }                                                                        \
    if (last >= last_aligned + 8) {                                          \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));   \
      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                 \
      m += 8;                                                                \
    }                                                                        \
  }                                                                          \
  NORM_FP16_MASK_AVX(m, (last - m), ymm_sum)                                 \
  *out = _NORM(HorizontalAdd_FP32_V256(ymm_sum_0_0));

//! Compute the norm of vectors (FP16, M=1)
#define NORM_FP16_1_AVX512(m, dim, out, _NORM)                                \
  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())                 \
  const Float16 *last = m + dim;                                              \
  const Float16 *last_aligned = m + ((dim >> 5) << 5);                        \
  if (((uintptr_t)m & 0x3f) == 0) {                                           \
    for (; m != last_aligned; m += 32) {                                      \
      __m512i zmm_mi = _mm512_load_si512((const __m512i *)m);                 \
      __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
      __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                             \
      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                             \
    }                                                                         \
    if (last >= last_aligned + 16) {                                          \
      __m512 zmm_m = _mm512_cvtph_ps(_mm256_load_si256((const __m256i *)m));  \
      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                               \
      m += 16;                                                                \
    }                                                                         \
  } else {                                                                    \
    for (; m != last_aligned; m += 32) {                                      \
      __m512i zmm_mi = _mm512_loadu_si512((const __m512i *)m);                \
      __m512 zmm_m_0 = _mm512_cvtph_ps(_mm512_castsi512_si256(zmm_mi));       \
      __m512 zmm_m_1 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(zmm_mi, 1)); \
      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                             \
      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                             \
    }                                                                         \
    if (last >= last_aligned + 16) {                                          \
      __m512 zmm_m = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)m)); \
      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                               \
      m += 16;                                                                \
    }                                                                         \
  }                                                                           \
  float result =                                                              \
      HorizontalAdd_FP32_V512(_mm512_add_ps(zmm_sum_0_0, zmm_sum_0_1));       \
  if (m != last) {                                                            \
    MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps())               \
    if (last >= m + 8) {                                                      \
      __m256 ymm_m = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)m));    \
      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                                  \
      m += 8;                                                                 \
    }                                                                         \
    NORM_FP16_MASK_AVX(m, (last - m), ymm_sum)                                \
    result += HorizontalAdd_FP32_V256(ymm_sum_0_0);                           \
  }                                                                           \
  *out = _NORM(result);

#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
//! Compute the norm of vectors (FP16, M=1)
#define NORM_FP16_1_NEON(m, dim, out, _NORM)                                 \
  MATRIX_VAR_INIT(1, 1, float16x8_t, v_sum, vdupq_n_f16(0))                  \
  const Float16 *last = m + dim;                                             \
  const Float16 *last_aligned = m + ((dim >> 3) << 3);                       \
  for (; m != last_aligned; m += 8) {                                        \
    float16x8_t v_m = vld1q_f16((const float16_t *)m);                       \
    NORM_FP16_STEP_NEON(v_m, v_sum_0_0)                                      \
  }                                                                          \
  if (last >= m + 4) {                                                       \
    float16x8_t v_m = vreinterpretq_f16_u64(                                 \
        vld1q_lane_u64((const uint64_t *)m, vdupq_n_u64(0), 0));             \
    NORM_FP16_STEP_NEON(v_m, v_sum_0_0)                                      \
    m += 4;                                                                  \
  }                                                                          \
  float result = vaddvq_f32(vaddq_f32(vcvt_f32_f16(vget_low_f16(v_sum_0_0)), \
                                      vcvt_high_f32_f16(v_sum_0_0)));        \
  switch (last - m) {                                                        \
    case 3:                                                                  \
      NORM_FP16_STEP_GENERAL(m[2], result)                                   \
      /* FALLTHRU */                                                         \
    case 2:                                                                  \
      NORM_FP16_STEP_GENERAL(m[1], result)                                   \
      /* FALLTHRU */                                                         \
    case 1:                                                                  \
      NORM_FP16_STEP_GENERAL(m[0], result)                                   \
  }                                                                          \
  *out = _NORM(result);

#else
//! Compute the norm of vectors (FP16, M=1)
#define NORM_FP16_1_NEON(m, dim, out, _NORM)                        \
  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))         \
  const Float16 *last = m + dim;                                    \
  const Float16 *last_aligned = m + ((dim >> 3) << 3);              \
  for (; m != last_aligned; m += 8) {                               \
    float16x8_t v_m = vld1q_f16((const float16_t *)m);              \
    float32x4_t v_n_0 = vcvt_f32_f16(vget_low_f16(v_m));            \
    float32x4_t v_n_1 = vcvt_high_f32_f16(v_m);                     \
    NORM_FP32_STEP_NEON(v_n_0, v_sum_0_0)                           \
    NORM_FP32_STEP_NEON(v_n_1, v_sum_0_1)                           \
  }                                                                 \
  if (last >= m + 4) {                                              \
    float32x4_t v_m = vcvt_f32_f16(vld1_f16((const float16_t *)m)); \
    NORM_FP32_STEP_NEON(v_m, v_sum_0_0)                             \
    m += 4;                                                         \
  }                                                                 \
  float result = vaddvq_f32(vaddq_f32(v_sum_0_0, v_sum_0_1));       \
  switch (last - m) {                                               \
    case 3:                                                         \
      NORM_FP16_STEP_GENERAL(m[2], result)                          \
      /* FALLTHRU */                                                \
    case 2:                                                         \
      NORM_FP16_STEP_GENERAL(m[1], result)                          \
      /* FALLTHRU */                                                \
    case 1:                                                         \
      NORM_FP16_STEP_GENERAL(m[0], result)                          \
  }                                                                 \
  *out = _NORM(result);

#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

================================================
FILE: src/ailego/math/norm_matrix_fp32.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "matrix_define.i"
#include "matrix_utility.i"

#if !defined(__FMA__)
#define _mm_fmadd_ps(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c))
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(_mm256_mul_ps((a), (b)), (c))
#endif  // !__FMA__

//! Mask process of computing norm (FP32)
#define NORM_FP32_MASK_SSE(m, cnt, _RES)                 \
  switch (cnt) {                                         \
    case 3: {                                            \
      __m128 xmm_m = _mm_set_ps(0.0f, m[2], m[1], m[0]); \
      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \
      break;                                             \
    }                                                    \
    case 2: {                                            \
      __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, m[1], m[0]); \
      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \
      break;                                             \
    }                                                    \
    case 1: {                                            \
      __m128 xmm_m = _mm_set_ps(0.0f, 0.0f, 0.0f, m[0]); \
      NORM_FP32_STEP_SSE(xmm_m, _RES##_0_0)              \
      break;                                             \
    }                                                    \
  }

//! Compute the norm of vectors (FP32, M=1)
#define NORM_FP32_1_SSE(m, dim, out, _NORM)                \
  MATRIX_VAR_INIT(1, 1, __m128, xmm_sum, _mm_setzero_ps()) \
  const float *last = m + dim;                             \
  const float *last_aligned = m + ((dim >> 3) << 3);       \
  if (((uintptr_t)m & 0xf) == 0) {                         \
    for (; m != last_aligned; m += 8) {                    \
      __m128 xmm_m_0 = _mm_load_ps(m + 0);                 \
      __m128 xmm_m_1 = _mm_load_ps(m + 4);                 \
      NORM_FP32_STEP_SSE(xmm_m_0, xmm_sum_0_0)             \
      NORM_FP32_STEP_SSE(xmm_m_1, xmm_sum_0_0)             \
    }                                                      \
    if (last >= last_aligned + 4) {                        \
      __m128 xmm_m = _mm_load_ps(m);                       \
      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)               \
      m += 4;                                              \
    }                                                      \
  } else {                                                 \
    for (; m != last_aligned; m += 8) {                    \
      __m128 xmm_m_0 = _mm_loadu_ps(m + 0);                \
      __m128 xmm_m_1 = _mm_loadu_ps(m + 4);                \
      NORM_FP32_STEP_SSE(xmm_m_0, xmm_sum_0_0)             \
      NORM_FP32_STEP_SSE(xmm_m_1, xmm_sum_0_0)             \
    }                                                      \
    if (last >= last_aligned + 4) {                        \
      __m128 xmm_m = _mm_loadu_ps(m);                      \
      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)               \
      m += 4;                                              \
    }                                                      \
  }                                                        \
  NORM_FP32_MASK_SSE(m, (last - m), xmm_sum)               \
  *out = _NORM(HorizontalAdd_FP32_V128(xmm_sum_0_0));

//! Compute the norm of vectors (FP32, M=1)
#define NORM_FP32_1_AVX(m, dim, out, _NORM)                   \
  MATRIX_VAR_INIT(1, 1, __m256, ymm_sum, _mm256_setzero_ps()) \
  const float *last = m + dim;                                \
  const float *last_aligned = m + ((dim >> 4) << 4);          \
  if (((uintptr_t)m & 0x1f) == 0) {                           \
    for (; m != last_aligned; m += 16) {                      \
      __m256 ymm_m_0 = _mm256_load_ps(m + 0);                 \
      __m256 ymm_m_1 = _mm256_load_ps(m + 8);                 \
      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                \
      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                \
    }                                                         \
    if (last >= last_aligned + 8) {                           \
      __m256 ymm_m = _mm256_load_ps(m);                       \
      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                  \
      m += 8;                                                 \
    }                                                         \
  } else {                                                    \
    for (; m != last_aligned; m += 16) {                      \
      __m256 ymm_m_0 = _mm256_loadu_ps(m + 0);                \
      __m256 ymm_m_1 = _mm256_loadu_ps(m + 8);                \
      NORM_FP32_STEP_AVX(ymm_m_0, ymm_sum_0_0)                \
      NORM_FP32_STEP_AVX(ymm_m_1, ymm_sum_0_0)                \
    }                                                         \
    if (last >= last_aligned + 8) {                           \
      __m256 ymm_m = _mm256_loadu_ps(m);                      \
      NORM_FP32_STEP_AVX(ymm_m, ymm_sum_0_0)                  \
      m += 8;                                                 \
    }                                                         \
  }                                                           \
  float result = HorizontalAdd_FP32_V256(ymm_sum_0_0);        \
  if (m != last) {                                            \
    __m128 xmm_sum_0_0 = _mm_setzero_ps();                    \
    if (last >= m + 4) {                                      \
      __m128 xmm_m = _mm_loadu_ps(m);                         \
      NORM_FP32_STEP_SSE(xmm_m, xmm_sum_0_0)                  \
      m += 4;                                                 \
    }                                                         \
    NORM_FP32_MASK_SSE(m, (last - m), xmm_sum)                \
    result += HorizontalAdd_FP32_V128(xmm_sum_0_0);           \
  }                                                           \
  *out = _NORM(result);

//! Compute the norm of vectors (FP32, M=1)
#define NORM_FP32_1_AVX512(m, dim, out, _NORM)                          \
  MATRIX_VAR_INIT(1, 2, __m512, zmm_sum, _mm512_setzero_ps())           \
  const float *last = m + dim;                                          \
  const float *last_aligned = m + ((dim >> 5) << 5);                    \
  if (((uintptr_t)m & 0x3f) == 0) {                                     \
    for (; m != last_aligned; m += 32) {                                \
      __m512 zmm_m_0 = _mm512_load_ps(m + 0);                           \
      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                       \
      __m512 zmm_m_1 = _mm512_load_ps(m + 16);                          \
      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                       \
    }                                                                   \
    if (last >= last_aligned + 16) {                                    \
      __m512 zmm_m = _mm512_load_ps(m);                                 \
      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                         \
      m += 16;                                                          \
    }                                                                   \
  } else {                                                              \
    for (; m != last_aligned; m += 32) {                                \
      __m512 zmm_m_0 = _mm512_loadu_ps(m + 0);                          \
      NORM_FP32_STEP_AVX512(zmm_m_0, zmm_sum_0_0)                       \
      __m512 zmm_m_1 = _mm512_loadu_ps(m + 16);                         \
      NORM_FP32_STEP_AVX512(zmm_m_1, zmm_sum_0_1)                       \
    }                                                                   \
    if (last >= last_aligned + 16) {                                    \
      __m512 zmm_m = _mm512_loadu_ps(m);                                \
      NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                         \
      m += 16;                                                          \
    }                                                                   \
  }                                                                     \
  if (m != last) {                                                      \
    __mmask16 mask = (__mmask16)((1 << (last - m)) - 1);                \
    __m512 zmm_m = _mm512_mask_loadu_ps(_mm512_setzero_ps(), mask, m);  \
    NORM_FP32_STEP_AVX512(zmm_m, zmm_sum_0_0)                           \
  }                                                                     \
  float result =                                                        \
      HorizontalAdd_FP32_V512(_mm512_add_ps(zmm_sum_0_0, zmm_sum_0_1)); \
  *out = _NORM(result);

//! Compute the norm of vectors (FP32, M=1)
#define NORM_FP32_1_NEON(m, dim, out, _NORM)                  \
  MATRIX_VAR_INIT(1, 2, float32x4_t, v_sum, vdupq_n_f32(0))   \
  const float *last = m + dim;                                \
  const float *last_aligned = m + ((dim >> 3) << 3);          \
  for (; m != last_aligned; m += 8) {                         \
    float32x4_t v_m_0 = vld1q_f32(m + 0);                     \
    float32x4_t v_m_1 = vld1q_f32(m + 4);                     \
    NORM_FP32_STEP_NEON(v_m_0, v_sum_0_0)                     \
    NORM_FP32_STEP_NEON(v_m_1, v_sum_0_1)                     \
  }                                                           \
  if (last >= last_aligned + 4) {                             \
    float32x4_t v_m = vld1q_f32(m);                           \
    NORM_FP32_STEP_NEON(v_m, v_sum_0_0)                       \
    m += 4;                                                   \
  }                                                           \
  float result = vaddvq_f32(vaddq_f32(v_sum_0_0, v_sum_0_1)); \
  switch (last - m) {                                         \
    case 3:                                                   \
      NORM_FP32_STEP_GENERAL(m[2], result)                    \
      /* FALLTHRU */                                          \
    case 2:                                                   \
      NORM_FP32_STEP_GENERAL(m[1], result)                    \
      /* FALLTHRU */                                          \
    case 1:                                                   \
      NORM_FP32_STEP_GENERAL(m[0], result)                    \
  }                                                           \
  *out = _NORM(result);


================================================
FILE: src/ailego/math/normalizer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "normalizer.h"

namespace zvec {
namespace ailego {

#if (defined(__ARM_NEON) && defined(__aarch64__))
static inline void NormalizeNEON(float *arr, size_t dim, float norm) {
  float *last = arr + dim;
  float *last_aligned = arr + ((dim >> 3) << 3);

  float32x4_t v_norm = vdupq_n_f32(norm);
  for (; arr != last_aligned; arr += 8) {
    vst1q_f32(arr + 0, vdivq_f32(vld1q_f32(arr + 0), v_norm));
    vst1q_f32(arr + 4, vdivq_f32(vld1q_f32(arr + 4), v_norm));
  }
  if (last >= last_aligned + 4) {
    vst1q_f32(arr, vdivq_f32(vld1q_f32(arr), v_norm));
    arr += 4;
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}

#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
static inline void NormalizeNEON(float16_t *arr, size_t dim, float norm) {
  float16_t *last = arr + dim;
  float16_t *last_aligned = arr + ((dim >> 4) << 4);

  float16x8_t v_norm = vdupq_n_f16((float16_t)norm);
  for (; arr != last_aligned; arr += 16) {
    vst1q_f16(arr + 0, vdivq_f16(vld1q_f16(arr + 0), v_norm));
    vst1q_f16(arr + 8, vdivq_f16(vld1q_f16(arr + 8), v_norm));
  }
  if (last >= arr + 8) {
    vst1q_f16(arr, vdivq_f16(vld1q_f16(arr), v_norm));
    arr += 8;
  }
  if (last >= arr + 4) {
    vst1_f16(arr, vdiv_f16(vld1_f16(arr), vget_low_f16(v_norm)));
    arr += 4;
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}
#else
static inline void NormalizeNEON(float16_t *arr, size_t dim, float norm) {
  float16_t *last = arr + dim;
  float16_t *last_aligned = arr + ((dim >> 4) << 4);

  float32x4_t v_norm = vdupq_n_f32(norm);
  for (; arr != last_aligned; arr += 16) {
    float16x8_t vf16_0 = vld1q_f16(arr + 0);
    float16x8_t vf16_1 = vld1q_f16(arr + 8);
    vf16_0 = vcombine_f16(
        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16_0)), v_norm)),
        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16_0), v_norm)));
    vf16_1 = vcombine_f16(
        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16_1)), v_norm)),
        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16_1), v_norm)));
    vst1q_f16(arr + 0, vf16_0);
    vst1q_f16(arr + 8, vf16_1);
  }
  if (last >= arr + 8) {
    float16x8_t vf16 = vld1q_f16(arr);
    vf16 = vcombine_f16(
        vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vget_low_f16(vf16)), v_norm)),
        vcvt_f16_f32(vdivq_f32(vcvt_high_f32_f16(vf16), v_norm)));
    vst1q_f16(arr, vf16);
    arr += 8;
  }
  if (last >= arr + 4) {
    vst1_f16(arr, vcvt_f16_f32(vdivq_f32(vcvt_f32_f16(vld1_f16(arr)), v_norm)));
    arr += 4;
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}
#endif  // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#endif  // __ARM_NEON && __aarch64__

#if defined(__AVX__)
#if defined(__AVX512F__)
static inline void NormalizeAVX512(float *arr, size_t dim, float norm) {
  float *last = arr + dim;
  float *last_aligned = arr + ((dim >> 4) << 4);

  __m512 zmm_norm = _mm512_set1_ps(norm);
  if (((uintptr_t)arr & 0x3f) == 0) {
    for (; arr != last_aligned; arr += 16) {
      _mm512_store_ps(arr, _mm512_div_ps(_mm512_load_ps(arr), zmm_norm));
    }
    if (last >= arr + 8) {
      __m256 ymm_norm = _mm256_set1_ps(norm);
      _mm256_store_ps(arr, _mm256_div_ps(_mm256_load_ps(arr), ymm_norm));
      arr += 8;
    }
    if (last >= arr + 4) {
      __m128 xmm_norm = _mm_set1_ps(norm);
      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));
      arr += 4;
    }
  } else {
    for (; arr != last_aligned; arr += 16) {
      _mm512_storeu_ps(arr, _mm512_div_ps(_mm512_loadu_ps(arr), zmm_norm));
    }
    if (last >= arr + 8) {
      __m256 ymm_norm = _mm256_set1_ps(norm);
      _mm256_storeu_ps(arr, _mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm));
      arr += 8;
    }
    if (last >= arr + 4) {
      __m128 xmm_norm = _mm_set1_ps(norm);
      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));
      arr += 4;
    }
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}
#endif  // __AVX512F__

static inline void NormalizeAVX(float *arr, size_t dim, float norm) {
  float *last = arr + dim;
  float *last_aligned = arr + ((dim >> 4) << 4);

  __m256 ymm_norm = _mm256_set1_ps(norm);
  if (((uintptr_t)arr & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 16) {
      _mm256_store_ps(arr + 0,
                      _mm256_div_ps(_mm256_load_ps(arr + 0), ymm_norm));
      _mm256_store_ps(arr + 8,
                      _mm256_div_ps(_mm256_load_ps(arr + 8), ymm_norm));
    }
    if (last >= arr + 8) {
      _mm256_store_ps(arr, _mm256_div_ps(_mm256_load_ps(arr), ymm_norm));
      arr += 8;
    }
    if (last >= arr + 4) {
      __m128 xmm_norm = _mm_set1_ps(norm);
      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));
      arr += 4;
    }
  } else {
    for (; arr != last_aligned; arr += 16) {
      _mm256_storeu_ps(arr + 0,
                       _mm256_div_ps(_mm256_loadu_ps(arr + 0), ymm_norm));
      _mm256_storeu_ps(arr + 8,
                       _mm256_div_ps(_mm256_loadu_ps(arr + 8), ymm_norm));
    }
    if (last >= arr + 8) {
      _mm256_storeu_ps(arr, _mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm));
      arr += 8;
    }
    if (last >= arr + 4) {
      __m128 xmm_norm = _mm_set1_ps(norm);
      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));
      arr += 4;
    }
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}
#endif  // __AVX__

#if defined(__AVX__) && defined(__F16C__)
#if defined(__AVX512F__)
static inline void NormalizeAVX512(uint16_t *arr, size_t dim, float norm) {
  uint16_t *last = arr + dim;
  uint16_t *last_aligned = arr + ((dim >> 4) << 4);

  __m512 zmm_norm = _mm512_set1_ps(norm);
  if (((uintptr_t)arr & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 16) {
      _mm256_store_si256(
          (__m256i *)arr,
          _mm512_cvtps_ph(_mm512_div_ps(_mm512_cvtph_ps(_mm256_load_si256(
                                            (const __m256i *)arr)),
                                        zmm_norm),
                          _MM_FROUND_NO_EXC));
    }
    if (last >= arr + 8) {
      __m256 ymm_norm = _mm256_set1_ps(norm);
      _mm_store_si128(
          (__m128i *)arr,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128(
                                            (const __m128i *)arr)),
                                        ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16) {
      _mm256_storeu_si256(
          (__m256i *)arr,
          _mm512_cvtps_ph(_mm512_div_ps(_mm512_cvtph_ps(_mm256_loadu_si256(
                                            (const __m256i *)arr)),
                                        zmm_norm),
                          _MM_FROUND_NO_EXC));
    }
    if (last >= arr + 8) {
      __m256 ymm_norm = _mm256_set1_ps(norm);
      _mm_storeu_si128(
          (__m128i *)arr,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128(
                                            (const __m128i *)arr)),
                                        ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
    }
  }
  if (last >= arr + 4) {
    __m128 xmm_norm = _mm_set1_ps(norm);
    _mm_storel_epi64(
        (__m128i *)arr,
        _mm_cvtps_ph(
            _mm_div_ps(_mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)arr)),
                       xmm_norm),
            _MM_FROUND_NO_EXC));
    arr += 8;
  }
  switch (last - arr) {
    case 3:
      arr[2] = _cvtss_sh(_cvtsh_ss(arr[2]) / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 2:
      arr[1] = _cvtss_sh(_cvtsh_ss(arr[1]) / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 1:
      arr[0] = _cvtss_sh(_cvtsh_ss(arr[0]) / norm, _MM_FROUND_NO_EXC);
  }
}
#endif  // __AVX512F__

static inline void NormalizeAVX(uint16_t *arr, size_t dim, float norm) {
  uint16_t *last = arr + dim;
  uint16_t *last_aligned = arr + ((dim >> 4) << 4);

  __m256 ymm_norm = _mm256_set1_ps(norm);
  if (((uintptr_t)arr & 0xf) == 0) {
    for (; arr != last_aligned; arr += 16) {
      __m128i xmm_0 = _mm_load_si128((const __m128i *)(arr + 0));
      __m128i xmm_1 = _mm_load_si128((const __m128i *)(arr + 8));
      __m256 ymm_0 = _mm256_div_ps(_mm256_cvtph_ps(xmm_0), ymm_norm);
      __m256 ymm_1 = _mm256_div_ps(_mm256_cvtph_ps(xmm_1), ymm_norm);
      _mm_store_si128((__m128i *)(arr + 0),
                      _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));
      _mm_store_si128((__m128i *)(arr + 8),
                      _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));
    }
    if (last >= arr + 8) {
      _mm_store_si128(
          (__m128i *)arr,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128(
                                            (const __m128i *)arr)),
                                        ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16) {
      __m128i xmm_0 = _mm_loadu_si128((const __m128i *)(arr + 0));
      __m128i xmm_1 = _mm_loadu_si128((const __m128i *)(arr + 8));
      __m256 ymm_0 = _mm256_div_ps(_mm256_cvtph_ps(xmm_0), ymm_norm);
      __m256 ymm_1 = _mm256_div_ps(_mm256_cvtph_ps(xmm_1), ymm_norm);
      _mm_storeu_si128((__m128i *)(arr + 0),
                       _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));
      _mm_storeu_si128((__m128i *)(arr + 8),
                       _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));
    }
    if (last >= arr + 8) {
      _mm_storeu_si128(
          (__m128i *)arr,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128(
                                            (const __m128i *)arr)),
                                        ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
    }
  }
  if (last >= arr + 4) {
    __m128 xmm_norm = _mm_set1_ps(norm);
    _mm_storel_epi64(
        (__m128i *)arr,
        _mm_cvtps_ph(
            _mm_div_ps(_mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)arr)),
                       xmm_norm),
            _MM_FROUND_NO_EXC));
    arr += 8;
  }
  switch (last - arr) {
    case 3:
      arr[2] = _cvtss_sh(_cvtsh_ss(arr[2]) / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 2:
      arr[1] = _cvtss_sh(_cvtsh_ss(arr[1]) / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 1:
      arr[0] = _cvtss_sh(_cvtsh_ss(arr[0]) / norm, _MM_FROUND_NO_EXC);
  }
}
#endif  // __AVX__ && __F16C__

#if defined(__SSE__)
static inline void NormalizeSSE(float *arr, size_t dim, float norm) {
  float *last = arr + dim;
  float *last_aligned = arr + ((dim >> 3) << 3);

  __m128 xmm_norm = _mm_set1_ps(norm);
  if (((uintptr_t)arr & 0xf) == 0) {
    for (; arr != last_aligned; arr += 8) {
      _mm_store_ps(arr + 0, _mm_div_ps(_mm_load_ps(arr + 0), xmm_norm));
      _mm_store_ps(arr + 4, _mm_div_ps(_mm_load_ps(arr + 4), xmm_norm));
    }
    if (last >= last_aligned + 4) {
      _mm_store_ps(arr, _mm_div_ps(_mm_load_ps(arr), xmm_norm));
      arr += 4;
    }
  } else {
    for (; arr != last_aligned; arr += 8) {
      _mm_storeu_ps(arr + 0, _mm_div_ps(_mm_loadu_ps(arr + 0), xmm_norm));
      _mm_storeu_ps(arr + 4, _mm_div_ps(_mm_loadu_ps(arr + 4), xmm_norm));
    }
    if (last >= last_aligned + 4) {
      _mm_storeu_ps(arr, _mm_div_ps(_mm_loadu_ps(arr), xmm_norm));
      arr += 4;
    }
  }
  switch (last - arr) {
    case 3:
      arr[2] /= norm;
      /* FALLTHRU */
    case 2:
      arr[1] /= norm;
      /* FALLTHRU */
    case 1:
      arr[0] /= norm;
  }
}
#endif  // __SSE__

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the norm of vector
void Normalizer<float>::Compute(ValueType *arr, size_t dim, float norm) {
#if defined(__ARM_NEON)
  NormalizeNEON(arr, dim, norm);
#else
#if defined(__AVX512F__)
  if (dim > 15) {
    NormalizeAVX512(arr, dim, norm);
    return;
  }
#endif  // __AVX512F__
#if defined(__AVX__)
  if (dim > 7) {
    NormalizeAVX(arr, dim, norm);
    return;
  }
#endif  // __AVX__
  NormalizeSSE(arr, dim, norm);
#endif  // __ARM_NEON
}
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
//! Compute the norm of vector
void Normalizer<Float16>::Compute(ValueType *arr, size_t dim, float norm) {
#if defined(__ARM_NEON)
  NormalizeNEON(reinterpret_cast<float16_t *>(arr), dim, norm);
#else
#if defined(__AVX512F__)
  if (dim > 31) {
    NormalizeAVX512(reinterpret_cast<uint16_t *>(arr), dim, norm);
    return;
  }
#endif  // __AVX512F__
  NormalizeAVX(reinterpret_cast<uint16_t *>(arr), dim, norm);
#endif  // __ARM_NEON
}
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/math/normalizer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "norm_matrix.h"

namespace zvec {
namespace ailego {

/*! Normalizer
 */
template <typename T,
          typename = typename std::enable_if<IsFloatingPoint<T>::value>::type>
struct Normalizer {
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! Compute the norm of vector
  static inline void Compute(ValueType *arr, size_t dim, float norm) {
    for (size_t i = 0; i < dim; ++i) {
      arr[i] /= norm;
    }
  }

  //! Normalize a vector (L1)
  static inline void L1(ValueType *arr, size_t dim, float *norm) {
    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }

  //! Normalize a vector (L2)
  static inline void L2(ValueType *arr, size_t dim, float *norm) {
    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }
};

#if defined(__SSE__) || (defined(__ARM_NEON) && defined(__aarch64__))
/*! Normalizer (FP32)
 */
template <>
struct Normalizer<float> {
  //! Type of value
  using ValueType = float;

  //! Compute the norm of vector
  static void Compute(ValueType *arr, size_t dim, float norm);

  //! Normalize a vector (L1)
  static inline void L1(ValueType *arr, size_t dim, float *norm) {
    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }

  //! Normalize a vector (L2)
  static inline void L2(ValueType *arr, size_t dim, float *norm) {
    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }
};
#endif  // __SSE__ || (__ARM_NEON && __aarch64__)

#if (defined(__F16C__) && defined(__AVX__)) || \
    (defined(__ARM_NEON) && defined(__aarch64__))
/*! Normalizer (FP16)
 */
template <>
struct Normalizer<Float16> {
  //! Type of value
  using ValueType = Float16;

  //! Compute the norm of vector
  static void Compute(ValueType *arr, size_t dim, float norm);

  //! Normalize a vector (L1)
  static inline void L1(ValueType *arr, size_t dim, float *norm) {
    Norm1Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }

  //! Normalize a vector (L2)
  static inline void L2(ValueType *arr, size_t dim, float *norm) {
    Norm2Matrix<ValueType, 1>::Compute(arr, dim, norm);
    if (*norm > 0.0f) {
      Compute(arr, dim, *norm);
    }
  }
};
#endif  // (__F16C__ && __AVX__) || (__ARM_NEON && __aarch64__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/math_batch/cosine_distance_batch.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <ailego/internal/cpu_features.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include "inner_product_distance_batch.h"

namespace zvec::ailego::DistanceBatch {


template <typename T, size_t BatchSize, size_t PrefetchStep, typename = void>
struct CosineDistanceBatch;

template <typename T, size_t BatchSize, size_t PrefetchStep, typename>
struct CosineDistanceBatch {
  using ValueType = typename std::remove_cv<T>::type;

  static inline void ComputeBatch(const ValueType **vecs,
                                  const ValueType *query, size_t num_vecs,
                                  size_t dim, float *results) {
    constexpr size_t extra_dim = sizeof(float) / sizeof(ValueType);
    size_t _dim = dim - extra_dim;

    InnerProductDistanceBatch<ValueType, BatchSize, PrefetchStep>::ComputeBatch(
        vecs, query, num_vecs, _dim, results);

    for (size_t i = 0; i < num_vecs; ++i) {
      results[i] = 1 - results[i];
    }
  }

  using IPImplType =
      InnerProductDistanceBatch<ValueType, BatchSize, PrefetchStep>;

  static void QueryPreprocess(void *query, size_t dim) {
    return IPImplType::QueryPreprocess(query,
                                       dim - sizeof(float) / sizeof(ValueType));
  }
};


}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/ailego/math_batch/distance_batch.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/math_batch/utils.h>
#include "ailego/math/distance_matrix.h"
#include "cosine_distance_batch.h"
#include "inner_product_distance_batch.h"

namespace zvec::ailego {

template <
    template <typename, size_t, size_t, typename = void> class DistanceType,
    typename ValueType, size_t BatchSize, size_t PrefetchStep, typename = void>
struct BaseDistance {
  static inline void _ComputeBatch(const ValueType **m, const ValueType *q,
                                   size_t num, size_t dim, float *out) {
    for (size_t i = 0; i < num; ++i) {
      DistanceType<ValueType, 1, 1>::Compute(m[i], q, dim, out + i);
    }
  }

  // If Distance has ComputeBatch, use it; otherwise fall back to _ComputeBatch.
  static inline void ComputeBatch(const ValueType **m, const ValueType *q,
                                  size_t num, size_t dim, float *out) {
    if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,
                                 CosineDistanceMatrix<ValueType, 1, 1>>) {
      return DistanceBatch::CosineDistanceBatch<
          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,
                                                            out);
    }

    _ComputeBatch(m, q, num, dim, out);
  }
};

}  // namespace zvec::ailego

================================================
FILE: src/ailego/math_batch/inner_product_distance_batch.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/math_batch/utils.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

template <typename T, size_t BatchSize, size_t PrefetchStep, typename = void>
struct InnerProductDistanceBatch;

template <typename ValueType, size_t BatchSize>
static void compute_one_to_many_inner_product_fallback(
    const ValueType *query, const ValueType **ptrs,
    std::array<const ValueType *, BatchSize> &prefetch_ptrs, size_t dim,
    float *sums) {
  for (size_t j = 0; j < BatchSize; ++j) {
    sums[j] = 0.0;
    InnerProductMatrix<ValueType, 1, 1>::Compute(ptrs[j], query, dim, sums + j);
    ailego_prefetch(&prefetch_ptrs[j]);
  }
}

// Function template partial specialization is not allowed,
// therefore the wrapper struct is required.
template <typename T, size_t BatchSize>
struct InnerProductDistanceBatchImpl {
  using ValueType = typename std::remove_cv<T>::type;
  static void compute_one_to_many(
      const ValueType *query, const ValueType **ptrs,
      std::array<const ValueType *, BatchSize> &prefetch_ptrs, size_t dim,
      float *sums) {
    return compute_one_to_many_inner_product_fallback(query, ptrs,
                                                      prefetch_ptrs, dim, sums);
  }
  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc() {
    return nullptr;
  }
};

template <typename T, size_t BatchSize, size_t PrefetchStep, typename>
struct InnerProductDistanceBatch {
  using ValueType = typename std::remove_cv<T>::type;

  static inline void ComputeBatch(const ValueType **vecs,
                                  const ValueType *query, size_t num_vecs,
                                  size_t dim, float *results) {
    size_t i = 0;
    for (; i + BatchSize <= num_vecs; i += BatchSize) {
      std::array<const ValueType *, BatchSize> prefetch_ptrs;
      for (size_t j = 0; j < BatchSize; ++j) {
        if (i + j + BatchSize * PrefetchStep < num_vecs) {
          prefetch_ptrs[j] = vecs[i + j + BatchSize * PrefetchStep];
        } else {
          prefetch_ptrs[j] = nullptr;
        }
      }
      InnerProductDistanceBatchImpl<ValueType, BatchSize>::compute_one_to_many(
          query, &vecs[i], prefetch_ptrs, dim, &results[i]);
    }
    for (; i < num_vecs; ++i) {  // TODO: unroll by 1, 2, 4, 8, etc.
      std::array<const ValueType *, 1> prefetch_ptrs{nullptr};
      InnerProductDistanceBatchImpl<ValueType, 1>::compute_one_to_many(
          query, &vecs[i], prefetch_ptrs, dim, &results[i]);
    }
  }

  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc() {
    return InnerProductDistanceBatchImpl<ValueType,
                                         1>::GetQueryPreprocessFunc();
  }
};

template <>
struct InnerProductDistanceBatchImpl<ailego::Float16, 1> {
  using ValueType = ailego::Float16;
  static void compute_one_to_many(
      const ailego::Float16 *query, const ailego::Float16 **ptrs,
      std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,
      float *sums);
};

template <>
struct InnerProductDistanceBatchImpl<float, 1> {
  using ValueType = float;
  static void compute_one_to_many(const float *query, const float **ptrs,
                                  std::array<const float *, 1> &prefetch_ptrs,
                                  size_t dim, float *sums);
};

template <>
struct InnerProductDistanceBatchImpl<int8_t, 1> {
  using ValueType = int8_t;
  static void compute_one_to_many(const int8_t *query, const int8_t **ptrs,
                                  std::array<const int8_t *, 1> &prefetch_ptrs,
                                  size_t dim, float *sums);

  static DistanceBatchQueryPreprocessFunc GetQueryPreprocessFunc();
};

template <>
struct InnerProductDistanceBatchImpl<ailego::Float16, 12> {
  using ValueType = ailego::Float16;
  static void compute_one_to_many(
      const ailego::Float16 *query, const ailego::Float16 **ptrs,
      std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,
      float *sums);
};

template <>
struct InnerProductDistanceBatchImpl<float, 12> {
  using ValueType = float;
  static void compute_one_to_many(const float *query, const float **ptrs,
                                  std::array<const float *, 12> &prefetch_ptrs,
                                  size_t dim, float *sums);
};

template <>
struct InnerProductDistanceBatchImpl<int8_t, 12> {
  using ValueType = int8_t;
  static void compute_one_to_many(const int8_t *query, const int8_t **ptrs,
                                  std::array<const int8_t *, 12> &prefetch_ptrs,
                                  size_t dim, float *sums);
};

}  // namespace zvec::ailego::DistanceBatch


================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_dispatch.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/math_batch/utils.h>
#include <zvec/ailego/utility/type_helper.h>
#include "inner_product_distance_batch.h"

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX512VNNI__)
void compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess(
    void *query, size_t dim);

void compute_one_to_many_inner_product_avx512_vnni_int8_1(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dimensionality,
    float *results);

void compute_one_to_many_inner_product_avx512_vnni_int8_12(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dimensionality,
    float *results);
#endif

#if defined(__AVX512FP16__)
void compute_one_to_many_inner_product_avx512fp16_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,
    size_t dimensionality, float *results);

void compute_one_to_many_inner_product_avx512fp16_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,
    size_t dimensionality, float *results);
#endif  //__AVX512FP16__

#if defined(__AVX512F__)
void compute_one_to_many_inner_product_avx512f_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,
    size_t dimensionality, float *results);

void compute_one_to_many_inner_product_avx512f_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,
    size_t dimensionality, float *results);
#endif  //__AVX512F__

#if defined(__AVX2__)
void compute_one_to_many_inner_product_avx2_fp32_1(
    const float *query, const float **ptrs,
    std::array<const float *, 1> &prefetch_ptrs, size_t dimensionality,
    float *results);

void compute_one_to_many_inner_product_avx2_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs,
    size_t dimensionality, float *results);

void compute_one_to_many_inner_product_avx2_int8_1(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dimensionality,
    float *results);

void compute_one_to_many_inner_product_avx2_fp32_12(
    const float *query, const float **ptrs,
    std::array<const float *, 12> &prefetch_ptrs, size_t dimensionality,
    float *results);

void compute_one_to_many_inner_product_avx2_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs,
    size_t dimensionality, float *results);

void compute_one_to_many_inner_product_avx2_int8_12(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dimensionality,
    float *results);
#endif

void InnerProductDistanceBatchImpl<float, 1>::compute_one_to_many(
    const ValueType *query, const ValueType **ptrs,
    std::array<const ValueType *, 1> &prefetch_ptrs, size_t dim, float *sums) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_fp32_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

void InnerProductDistanceBatchImpl<ailego::Float16, 1>::compute_one_to_many(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,
    float *sums) {
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    return compute_one_to_many_inner_product_avx512fp16_fp16_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return compute_one_to_many_inner_product_avx512f_fp16_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_fp16_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

void InnerProductDistanceBatchImpl<int8_t, 1>::compute_one_to_many(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {
// #if defined(__AVX512BW__) // TODO: this version is problematic
//     return compute_one_to_many_avx512_int8<ValueType, BatchSize>(
//         query, ptrs, prefetch_ptrs, dim, sums);
#if defined(__AVX512VNNI__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
    return compute_one_to_many_inner_product_avx512_vnni_int8_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_int8_1(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

DistanceBatchQueryPreprocessFunc
InnerProductDistanceBatchImpl<int8_t, 1>::GetQueryPreprocessFunc() {
#if defined(__AVX512VNNI__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
    return compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess;
  }
#endif
  return nullptr;
}

void InnerProductDistanceBatchImpl<float, 12>::compute_one_to_many(
    const ValueType *query, const ValueType **ptrs,
    std::array<const ValueType *, 12> &prefetch_ptrs, size_t dim, float *sums) {
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_fp32_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

void InnerProductDistanceBatchImpl<ailego::Float16, 12>::compute_one_to_many(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,
    float *sums) {
#if defined(__AVX512FP16__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_FP16) {
    return compute_one_to_many_inner_product_avx512fp16_fp16_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return compute_one_to_many_inner_product_avx512f_fp16_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_fp16_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

void InnerProductDistanceBatchImpl<int8_t, 12>::compute_one_to_many(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {
// #if defined(__AVX512BW__) // TODO: this version is problematic
//     return compute_one_to_many_avx512_int8<ValueType, BatchSize>(
//         query, ptrs, prefetch_ptrs, dim, sums);
#if defined(__AVX512VNNI__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
    return compute_one_to_many_inner_product_avx512_vnni_int8_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
#if defined(__AVX2__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX2) {
    return compute_one_to_many_inner_product_avx2_int8_12(
        query, ptrs, prefetch_ptrs, dim, sums);
  }
#endif
  return compute_one_to_many_inner_product_fallback(query, ptrs, prefetch_ptrs,
                                                    dim, sums);
}

}  // namespace zvec::ailego::DistanceBatch


================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/math/matrix_utility.i>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX2__)

template <typename ValueType, size_t dp_batch>
static std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>
compute_one_to_many_inner_product_avx2_fp16(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
    size_t dimensionality, float *results) {
  __m256 accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm256_setzero_ps();
  }

  size_t dim = 0;
  for (; dim + 16 <= dimensionality; dim += 16) {
    __m256i q =
        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim));

    __m256 q1 = _mm256_cvtph_ps(_mm256_castsi256_si128(q));
    __m256 q2 = _mm256_cvtph_ps(_mm256_extractf128_si256(q, 1));

    __m256 data_regs_1[dp_batch];
    __m256 data_regs_2[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      __m256i m =
          _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim));

      data_regs_1[i] = _mm256_cvtph_ps(_mm256_castsi256_si128(m));
      data_regs_2[i] = _mm256_cvtph_ps(_mm256_extractf128_si256(m, 1));
    }

    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }

    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] = _mm256_fmadd_ps(q1, data_regs_1[i], accs[i]);
      accs[i] = _mm256_fmadd_ps(q2, data_regs_2[i], accs[i]);
    }
  }

  if (dim + 8 <= dimensionality) {
    __m256 q = _mm256_cvtph_ps(
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));

    __m256 data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm256_cvtph_ps(
          _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));
      accs[i] = _mm256_fmadd_ps(q, data_regs[i], accs[i]);
    }

    dim += 8;
  }

  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = HorizontalAdd_FP32_V256(accs[i]);
  }

  for (; dim < dimensionality; ++dim) {
    for (size_t i = 0; i < dp_batch; ++i) {
      results[i] += (*(query + dim)) * (*(ptrs[i] + dim));
    }
  }
}

void compute_one_to_many_inner_product_avx2_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx2_fp16<ailego::Float16, 1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx2_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx2_fp16<ailego::Float16, 12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

#endif

}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx512.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/math/matrix_utility.i>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX512F__)

template <typename ValueType, size_t dp_batch>
static std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>
compute_one_to_many_inner_product_avx512f_fp16(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
    size_t dimensionality, float *results) {
  __m512 accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm512_setzero_ps();
  }

  size_t dim = 0;
  for (; dim + 32 <= dimensionality; dim += 32) {
    __m512i q =
        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));

    __m512 q1 = _mm512_cvtph_ps(_mm512_castsi512_si256(q));
    __m512 q2 = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(q, 1));

    __m512 data_regs_1[dp_batch];
    __m512 data_regs_2[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      __m512i m =
          _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));

      data_regs_1[i] = _mm512_cvtph_ps(_mm512_castsi512_si256(m));
      data_regs_2[i] = _mm512_cvtph_ps(_mm512_extracti64x4_epi64(m, 1));
    }

    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }

    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] = _mm512_fmadd_ps(q1, data_regs_1[i], accs[i]);
      accs[i] = _mm512_fmadd_ps(q2, data_regs_2[i], accs[i]);
    }
  }

  if (dim + 16 <= dimensionality) {
    __m512 q = _mm512_cvtph_ps(
        _mm256_loadu_si256(reinterpret_cast<const __m256i *>(query + dim)));

    __m512 data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm512_cvtph_ps(
          _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptrs[i] + dim)));
      accs[i] = _mm512_fmadd_ps(q, data_regs[i], accs[i]);
    }

    dim += 16;
  }

  __m256 acc_new[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    acc_new[i] = _mm256_add_ps(
        _mm512_castps512_ps256(accs[i]),
        _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(accs[i]), 1)));
  }

  if (dim + 8 <= dimensionality) {
    __m256 q = _mm256_cvtph_ps(
        _mm_loadu_si128(reinterpret_cast<const __m128i *>(query + dim)));

    for (size_t i = 0; i < dp_batch; ++i) {
      __m256 m = _mm256_cvtph_ps(
          _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptrs[i] + dim)));
      acc_new[i] = _mm256_fmadd_ps(m, q, acc_new[i]);
    }

    dim += 8;
  }

  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = HorizontalAdd_FP32_V256(acc_new[i]);
  }

  for (; dim < dimensionality; ++dim) {
    for (size_t i = 0; i < dp_batch; ++i) {
      results[i] += (*(query + dim)) * (*(ptrs[i] + dim));
    }
  }
}

void compute_one_to_many_inner_product_avx512f_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx512f_fp16<ailego::Float16, 1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx512f_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx512f_fp16<ailego::Float16, 12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}
#endif

}  // namespace zvec::ailego::DistanceBatch


================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_fp16_avx512fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/math/matrix_utility.i>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX512FP16__)
template <typename ValueType, size_t dp_batch>
static std::enable_if_t<std::is_same_v<ValueType, ailego::Float16>, void>
compute_one_to_many_inner_product_avx512fp16_fp16(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, dp_batch> &prefetch_ptrs,
    size_t dimensionality, float *results) {
  __m512h accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm512_setzero_ph();
  }

  size_t dim = 0;
  for (; dim + 32 <= dimensionality; dim += 32) {
    __m512h q = _mm512_loadu_ph(query + dim);

    __m512h data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm512_loadu_ph(ptrs[i] + dim);
    }

    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }

    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] = _mm512_fmadd_ph(data_regs[i], q, accs[i]);
    }
  }

  if (dim < dimensionality) {
    __mmask32 mask = (__mmask32)((1 << (dimensionality - dim)) - 1);

    for (size_t i = 0; i < dp_batch; ++i) {
      __m512i zmm_undefined = _mm512_undefined_epi32();

      accs[i] =
          _mm512_mask3_fmadd_ph(_mm512_castsi512_ph(_mm512_mask_loadu_epi16(
                                    zmm_undefined, mask, query + dim)),
                                _mm512_castsi512_ph(_mm512_mask_loadu_epi16(
                                    zmm_undefined, mask, ptrs[i] + dim)),
                                accs[i], mask);
    }
  }

  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = HorizontalAdd_FP16_V512(accs[i]);
  }
}

void compute_one_to_many_inner_product_avx512fp16_fp16_1(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 1> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx512fp16_fp16<ailego::Float16, 1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx512fp16_fp16_12(
    const ailego::Float16 *query, const ailego::Float16 **ptrs,
    std::array<const ailego::Float16 *, 12> &prefetch_ptrs, size_t dim,
    float *sums) {
  return compute_one_to_many_inner_product_avx512fp16_fp16<ailego::Float16, 12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}
#endif

}  // namespace zvec::ailego::DistanceBatch


================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_fp32_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX2__)

inline float sum4(__m128 v) {
  v = _mm_add_ps(v, _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v), 8)));
  return v[0] + v[1];
}

inline __m128 sum_top_bottom_avx(__m256 v) {
  const __m128 high = _mm256_extractf128_ps(v, 1);
  const __m128 low = _mm256_castps256_ps128(v);
  return _mm_add_ps(high, low);
}

template <typename ValueType, size_t dp_batch>
static std::enable_if_t<std::is_same_v<ValueType, float>, void>
compute_one_to_many_inner_product_avx2_fp32(
    const ValueType *query, const ValueType **ptrs,
    std::array<const ValueType *, dp_batch> &prefetch_ptrs,
    size_t dimensionality, float *results) {
  __m256 accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm256_setzero_ps();
  }
  size_t dim = 0;
  for (; dim + 8 <= dimensionality; dim += 8) {
    __m256 q = _mm256_loadu_ps(query + dim);

    __m256 data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm256_loadu_ps(ptrs[i] + dim);
    }
    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }
    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] = _mm256_fnmadd_ps(q, data_regs[i], accs[i]);
    }
  }

  __m128 sum128_regs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    sum128_regs[i] = sum_top_bottom_avx(accs[i]);
  }
  if (dim + 4 <= dimensionality) {
    __m128 q = _mm_loadu_ps(query + dim);

    __m128 data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm_loadu_ps(ptrs[i] + dim);
    }
    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }
    for (size_t i = 0; i < dp_batch; ++i) {
      sum128_regs[i] = _mm_fnmadd_ps(q, data_regs[i], sum128_regs[i]);
    }
    dim += 4;
  }
  if (dim + 2 <= dimensionality) {
    __m128 q = _mm_setzero_ps();

    __m128 data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm_setzero_ps();
    }

    q = _mm_loadh_pi(q, (const __m64 *)(query + dim));
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm_loadh_pi(data_regs[i], (const __m64 *)(ptrs[i] + dim));
    }
    for (size_t i = 0; i < dp_batch; ++i) {
      sum128_regs[i] = _mm_fnmadd_ps(q, data_regs[i], sum128_regs[i]);
    }
    dim += 2;
  }

  float res[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    res[i] = sum4(sum128_regs[i]);
  }
  if (dim < dimensionality) {
    float q = query[dim];
    for (size_t i = 0; i < dp_batch; ++i) {
      res[i] -= q * ptrs[i][dim];
    }
  }
  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = -res[i];
  }
}

void compute_one_to_many_inner_product_avx2_fp32_1(
    const float *query, const float **ptrs,
    std::array<const float *, 1> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx2_fp32<float, 1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx2_fp32_12(
    const float *query, const float **ptrs,
    std::array<const float *, 12> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx2_fp32<float, 12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

#endif

}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_int8_avx2.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX2__)

template <typename ValueType, size_t dp_batch>
static std::enable_if_t<std::is_same_v<ValueType, int8_t>, void>
compute_one_to_many_inner_product_avx2_int8(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t dimensionality,
    float *results) {
  __m256i accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm256_setzero_si256();
  }
  size_t dim = 0;
  for (; dim + 32 <= dimensionality; dim += 32) {
    __m256i q = _mm256_loadu_si256((const __m256i *)(query + dim));

    __m256i data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] = _mm256_loadu_si256((const __m256i *)(ptrs[i] + dim));
    }
    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }
    __m256i q_lo = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(q));
    __m256i q_hi = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(q, 1));
    __m256i data_lo[dp_batch];
    __m256i data_hi[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_lo[i] = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(data_regs[i]));
      data_hi[i] =
          _mm256_cvtepi8_epi16(_mm256_extracti128_si256(data_regs[i], 1));
    }
    __m256i prod_lo[dp_batch];
    __m256i prod_hi[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      prod_lo[i] = _mm256_madd_epi16(q_lo, data_lo[i]);
      prod_hi[i] = _mm256_madd_epi16(q_hi, data_hi[i]);
    }
    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] =
          _mm256_add_epi32(accs[i], _mm256_add_epi32(prod_lo[i], prod_hi[i]));
    }
  }

  int temp_results[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    __m128i lo = _mm256_castsi256_si128(accs[i]);
    __m128i hi = _mm256_extracti128_si256(accs[i], 1);
    __m128i sum128 = _mm_add_epi32(lo, hi);
    sum128 = _mm_hadd_epi32(sum128, sum128);
    sum128 = _mm_hadd_epi32(sum128, sum128);
    temp_results[i] = _mm_cvtsi128_si32(sum128);
  }
  for (; dim < dimensionality; ++dim) {
    int8_t q = query[dim];
    for (size_t i = 0; i < dp_batch; ++i) {
      temp_results[i] += q * static_cast<int>(ptrs[i][dim]);
    }
  }
  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = static_cast<float>(temp_results[i]);
  }
}

void compute_one_to_many_inner_product_avx2_int8_1(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx2_int8<int8_t, 1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx2_int8_12(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx2_int8<int8_t, 12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

#endif

}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/ailego/math_batch/inner_product_distance_batch_impl_int8_avx512fp16.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <array>
#include <ailego/utility/math_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec::ailego::DistanceBatch {

#if defined(__AVX512VNNI__)

void compute_one_to_many_inner_product_avx512_vnni_int8_query_preprocess(
    void *query, size_t dim) {
  const int8_t *input = reinterpret_cast<const int8_t *>(query);
  uint8_t *output = reinterpret_cast<uint8_t *>(query);

  // // AVX512 constant: 128 in each byte (cast to int8_t, which becomes -128
  // // in signed representation, but addition works correctly due to two's
  // // complement arithmetic)
  const __m512i offset = _mm512_set1_epi8(static_cast<int8_t>(128));
  //
  size_t i = 0;
  // // Process 64 bytes at a time using AVX512
  for (; i + 64 <= dim; i += 64) {
    __m512i data =
        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(input + i));
    __m512i result = _mm512_add_epi8(data, offset);
    _mm512_storeu_si512(reinterpret_cast<__m512i *>(output + i), result);
  }

  // Handle remaining elements with scalar loop
  for (; i < dim; ++i) {
    output[i] = static_cast<uint8_t>(static_cast<int>(input[i]) + 128);
  }
}

// query is unsigned
template <size_t dp_batch>
static void compute_one_to_many_inner_product_avx512_vnni_int8(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t dimensionality,
    float *results) {
  __m512i accs[dp_batch];
  for (size_t i = 0; i < dp_batch; ++i) {
    accs[i] = _mm512_setzero_si512();
  }
  size_t dim = 0;
  for (; dim + 64 <= dimensionality; dim += 64) {
    __m512i q =
        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));

    __m512i data_regs[dp_batch];
    for (size_t i = 0; i < dp_batch; ++i) {
      data_regs[i] =
          _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] + dim));
    }
    if (prefetch_ptrs[0]) {
      for (size_t i = 0; i < dp_batch; ++i) {
        ailego_prefetch(prefetch_ptrs[i] + dim);
      }
    }
    for (size_t i = 0; i < dp_batch; ++i) {
      accs[i] = _mm512_dpbusd_epi32(accs[i], q, data_regs[i]);
    }
  }

  int temp_results[dp_batch]{};
  for (size_t i = 0; i < dp_batch; ++i) {
    temp_results[i] = _mm512_reduce_add_epi32(accs[i]);
  }
  for (; dim < dimensionality; ++dim) {
    uint q = reinterpret_cast<const u_int8_t *>(query)[dim];
    for (size_t i = 0; i < dp_batch; ++i) {
      temp_results[i] += q * static_cast<int>(ptrs[i][dim]);
    }
  }
  for (size_t i = 0; i < dp_batch; ++i) {
    results[i] = static_cast<float>(temp_results[i]);
  }
}

//
// #elif defined(__AVX512BW__)
// // TODO: this version is problematic
// template <typename ValueType, size_t dp_batch>
// static std::enable_if_t<std::is_same_v<ValueType, int8_t>, void>
// compute_one_to_many_avx512_int8(
//     const int8_t *query, const int8_t **ptrs,
//     std::array<const int8_t *, dp_batch> &prefetch_ptrs, size_t
//     dimensionality, float *results) {
//   std::array<__m512i, dp_batch> accs;
//   size_t dim = 0;
//   for (; dim + 64 <= dimensionality; dim += 64) {
//     __m512i q =
//         _mm512_loadu_si512(reinterpret_cast<const __m512i *>(query + dim));
//     std::array<__m512i, dp_batch> data_regs;
//     for (size_t i = 0; i < dp_batch; ++i) {
//       data_regs[i] =
//           _mm512_loadu_si512(reinterpret_cast<const __m512i *>(ptrs[i] +
//           dim));
//     }
//     if (prefetch_ptrs[0]) {
//       for (size_t i = 0; i < dp_batch; ++i) {
//         ailego_prefetch(prefetch_ptrs[i] + dim);
//       }
//     }
//     __m512i q_lo = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 0));
//     __m512i q_hi = _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(q, 1));
//     std::array<__m512i, dp_batch> data_lo;
//     std::array<__m512i, dp_batch> data_hi;
//     for (size_t i = 0; i < dp_batch; ++i) {
//       data_lo[i] =
//           _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 0));
//       data_hi[i] =
//           _mm512_cvtepi8_epi16(_mm512_extracti64x4_epi64(data_regs[i], 1));
//     }
//     std::array<__m512i, dp_batch> prod_lo;
//     std::array<__m512i, dp_batch> prod_hi;
//     for (size_t i = 0; i < dp_batch; ++i) {
//       prod_lo[i] = _mm512_madd_epi16(q_lo, data_lo[i]);
//       prod_hi[i] = _mm512_madd_epi16(q_hi, data_hi[i]);
//     }
//     for (size_t i = 0; i < dp_batch; ++i) {
//       accs[i] = _mm512_add_epi32(
//           accs[i], _mm512_add_epi32(
//                        _mm512_madd_epi16(prod_lo[i], _mm512_set1_epi16(1)),
//                        _mm512_madd_epi16(prod_hi[i], _mm512_set1_epi16(1))));
//     }
//   }
//   std::array<int, dp_batch> temp_results;
//   for (size_t i = 0; i < dp_batch; ++i) {
//     temp_results[i] = _mm512_reduce_add_epi32(accs[i]);
//   }
//   for (; dim < dimensionality; ++dim) {
//     int8_t q = query[dim];
//     for (size_t i = 0; i < dp_batch; ++i) {
//       temp_results[i] += q * static_cast<int>(ptrs[i][dim]);
//     }
//   }
//   for (size_t i = 0; i < dp_batch; ++i) {
//     results[i] = static_cast<float>(temp_results[i]);
//   }
// }

void compute_one_to_many_inner_product_avx512_vnni_int8_1(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 1> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx512_vnni_int8<1>(
      query, ptrs, prefetch_ptrs, dim, sums);
}

void compute_one_to_many_inner_product_avx512_vnni_int8_12(
    const int8_t *query, const int8_t **ptrs,
    std::array<const int8_t *, 12> &prefetch_ptrs, size_t dim, float *sums) {
  return compute_one_to_many_inner_product_avx512_vnni_int8<12>(
      query, ptrs, prefetch_ptrs, dim, sums);
}


#endif

}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/ailego/parallel/lock.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <condition_variable>
#include <mutex>
#if __cplusplus >= 201703L
#include <shared_mutex>
#endif
#include <ailego/pattern/defer.h>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

// Test if atomic_bool is always lock free.
// Arm may be always lock free using some compiler flags,
// see https://stackoverflow.com/a/64253858/486350.
#if ATOMIC_BOOL_LOCK_FREE == 2

/*! Spin Mutex (The atomic type is always lock-free)
 */
class SpinMutex {
 public:
  //! Constructor
  SpinMutex(void) {}

  //! Locking
  void lock(void) {
    bool expected = false;
    while (!flag_.compare_exchange_weak(
        expected, true, std::memory_order_acquire, std::memory_order_relaxed)) {
      expected = false;
      // Provide a hint to the processor that the code sequence is a spin-wait
      // loop. This can help improve the performance and power consumption of
      // spin-wait loops.
      ailego_yield();
    }
  }

  //! Try locking
  bool try_lock(void) {
    bool expected = false;
    return flag_.compare_exchange_strong(
        expected, true, std::memory_order_acquire, std::memory_order_relaxed);
  }

  //! Unlocking
  void unlock(void) {
    flag_.store(false, std::memory_order_release);
  }

 private:
  //! Disable them
  SpinMutex(const SpinMutex &) = delete;
  SpinMutex(SpinMutex &&) = delete;
  SpinMutex &operator=(const SpinMutex &) = delete;
  SpinMutex &operator=(SpinMutex &&) = delete;

  //! Members
  std::atomic_bool flag_{false};
};
#else

/*! Spin Mutex (General)
 */
class SpinMutex {
 public:
  //! Constructor
  SpinMutex(void) {}

  //! Locking
  void lock(void) {
    while (flag_.test_and_set(std::memory_order_acquire));
  }

  //! Try locking
  bool try_lock(void) {
    return (!flag_.test_and_set(std::memory_order_acquire));
  }

  //! Unlocking
  void unlock(void) {
    flag_.clear(std::memory_order_release);
  }

 private:
  //! Disable them
  SpinMutex(const SpinMutex &) = delete;
  SpinMutex(SpinMutex &&) = delete;
  SpinMutex &operator=(const SpinMutex &) = delete;
  SpinMutex &operator=(SpinMutex &&) = delete;

  //! Members
  std::atomic_flag flag_{};
};
#endif  // ATOMIC_BOOL_LOCK_FREE == 2

#if __cplusplus >= 201703L

using SharedMutex = std::shared_mutex;

#else

/*! Shared Mutex
 */
class SharedMutex {
 public:
  //! Constructor
  SharedMutex(void) {}

  //! Locking
  void lock(void) {
    std::unique_lock<std::mutex> q(mutex_);
    ++write_count_;
    write_cond_.wait(q, [this]() { return (pending_count_ == 0); });
    --write_count_;
    --pending_count_;
  }

  //! Try locking
  bool try_lock(void) {
    std::unique_lock<std::mutex> q(mutex_, std::defer_lock);
    if (q.try_lock()) {
      if (pending_count_ == 0) {
        --pending_count_;
        return true;
      }
    }
    return false;
  }

  //! Unlocking
  void unlock(void) {
    std::lock_guard<std::mutex> q(mutex_);
    ++pending_count_;

    if (write_count_ != 0) {
      write_cond_.notify_one();
    } else {
      read_cond_.notify_all();
    }
  }

  //! Locking (shared)
  void lock_shared(void) {
    std::unique_lock<std::mutex> q(mutex_);
    ++read_count_;
    read_cond_.wait(
        q, [this]() { return (write_count_ == 0 && pending_count_ >= 0); });
    --read_count_;
    ++pending_count_;
  }

  //! Try locking (shared)
  bool try_lock_shared(void) {
    std::lock_guard<std::mutex> q(mutex_);
    if (write_count_ == 0 && pending_count_ >= 0) {
      ++pending_count_;
      return true;
    }
    return false;
  }

  //! Unlocking (shared)
  void unlock_shared(void) {
    std::lock_guard<std::mutex> q(mutex_);
    --pending_count_;

    if (write_count_ != 0 && pending_count_ == 0) {
      write_cond_.notify_one();
    } else {
      read_cond_.notify_all();
    }
  }

 private:
  //! Disable them
  SharedMutex(const SharedMutex &) = delete;
  SharedMutex(SharedMutex &&) = delete;
  SharedMutex &operator=(const SharedMutex &) = delete;
  SharedMutex &operator=(SharedMutex &&) = delete;

  //! Members
  int32_t pending_count_{0};
  int32_t read_count_{0};
  int32_t write_count_{0};
  std::mutex mutex_{};
  std::condition_variable read_cond_{};
  std::condition_variable write_cond_{};
};

#endif  // __cplusplus >= 201703L

/*! Write Lock
 */
class WriteLock {
 public:
  //! Constructor
  WriteLock(SharedMutex &mutex) : mutex_(mutex) {}

  //! Locking
  void lock(void) {
    mutex_.lock();
  }

  //! Try locking
  bool try_lock(void) {
    return mutex_.try_lock();
  }

  //! Unlocking
  void unlock(void) {
    mutex_.unlock();
  }

 private:
  //! Disable them
  WriteLock(void) = delete;
  WriteLock(const WriteLock &) = delete;
  WriteLock(WriteLock &&) = delete;
  WriteLock &operator=(const WriteLock &) = delete;
  WriteLock &operator=(WriteLock &&) = delete;

  //! Members
  SharedMutex &mutex_;
};

/*! Read Lock
 */
class ReadLock {
 public:
  //! Constructor
  ReadLock(SharedMutex &mutex) : mutex_(mutex) {}

  //! Locking
  void lock(void) {
    mutex_.lock_shared();
  }

  //! Try locking
  bool try_lock(void) {
    return mutex_.try_lock_shared();
  }

  //! Unlocking
  void unlock(void) {
    mutex_.unlock_shared();
  }

 private:
  //! Disable them
  ReadLock(void) = delete;
  ReadLock(const ReadLock &) = delete;
  ReadLock(ReadLock &&) = delete;
  ReadLock &operator=(const ReadLock &) = delete;
  ReadLock &operator=(ReadLock &&) = delete;

  //! Members
  SharedMutex &mutex_;
};

/*
  Atomic Close Lock
 */

#define AILEGO_SAFE_ACCESS(CLOSE_ERR)              \
  counter_.fetch_add(1);                           \
  AILEGO_DEFER([this] { counter_.fetch_sub(1); }); \
  if (!opened_.load()) {                           \
    return CLOSE_ERR;                              \
  }

#define AILEGO_SAFE_CLOSE                                      \
  opened_.store(false);                                        \
  while (counter_.load() > 0) {                                \
    std::this_thread::sleep_for(std::chrono::milliseconds(1)); \
  }

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/parallel/multi_thread_list.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <condition_variable>
#include <deque>
#include <mutex>

namespace zvec {
namespace ailego {

/*! Multi-Thread list
 */
template <typename T>
class MultiThreadList {
 public:
  MultiThreadList(size_t size_limit = 1000) : size_limit_(size_limit) {}
  bool produce(const T &item) {
    std::unique_lock<std::mutex> lk(lock_);
    not_full_.wait(
        lk, [&]() { return (list_.size() < size_limit_) || done_.load(); });
    if (done_.load()) {
      return false;
    }
    list_.emplace_back(item);
    not_empty_.notify_one();
    return true;
  }

  bool produce(T &&item) {
    std::unique_lock<std::mutex> lk(lock_);
    not_full_.wait(
        lk, [&]() { return (list_.size() < size_limit_) || done_.load(); });
    if (done_.load()) {
      return false;
    }
    list_.emplace_back(std::move(item));
    not_empty_.notify_one();
    return true;
  }

  bool consume(T *item) {
    std::unique_lock<std::mutex> lk(lock_);
    not_empty_.wait(lk, [&]() {
      return !list_.empty() || done_.load() || consume_stopped_.load();
    });
    if ((list_.empty() && done_.load()) || consume_stopped_.load()) {
      return false;
    }
    *item = std::move(list_.front());
    list_.pop_front();
    not_full_.notify_one();
    return true;
  }

  void done() {
    std::unique_lock<std::mutex> lk(lock_);
    done_.store(true);
    not_empty_.notify_all();
    not_full_.notify_all();
  }

  void reset() {
    done_.store(false);
    list_.clear();
  }

  void stop_consume() {
    std::unique_lock<std::mutex> lk(lock_);
    consume_stopped_.store(true);
    not_empty_.notify_all();
  }

  void resume_consume() {
    consume_stopped_.store(false);
  }

 private:
  std::deque<T> list_;
  size_t size_limit_{0};
  std::mutex lock_;
  std::condition_variable not_empty_, not_full_;

  std::atomic<bool> done_{false};
  std::atomic<bool> consume_stopped_{false};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/parallel/semaphore.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Semaphore
 */
class Semaphore {
 public:
  //! Constructor
  Semaphore(void) : Semaphore{1} {}

  //! Constructor
  Semaphore(uint32_t count) : count_(count) {}

  //! Acquire a permit from this semaphore, suspending until one is available
  void lock(void) {
    while (!this->try_lock()) {
      std::unique_lock<std::mutex> latch(mutex_);
      cond_.wait(latch, [this]() { return (count_ > 0); });
    }
  }

  //! Try to acquire a permit from this semaphore without suspension
  bool try_lock(void) {
    uint32_t count = count_.load(std::memory_order_acquire);
    return (count > 0 ? count_.compare_exchange_strong(
                            count, count - 1, std::memory_order_release,
                            std::memory_order_relaxed)
                      : false);
  }

  //! Release a permit, returning it into this semaphore
  void unlock(void) {
    ++count_;
    std::lock_guard<std::mutex> latch(mutex_);
    cond_.notify_one();
  }

 private:
  //! Disable them
  Semaphore(const Semaphore &) = delete;
  Semaphore(Semaphore &&) = delete;
  Semaphore &operator=(const Semaphore &) = delete;
  Semaphore &operator=(Semaphore &&) = delete;

  //! Members
  std::atomic<uint32_t> count_{0};
  std::mutex mutex_{};
  std::condition_variable cond_{};
};

/*! Binary Semaphores
 */
template <size_t N, typename = typename std::enable_if<N <= 64u>::type>
class BinarySemaphores {
 public:
  using BitwiseType = typename std::conditional<
      N <= 32u,
      typename std::conditional<
          N <= 16u, typename std::conditional<N <= 8u, uint8_t, uint16_t>::type,
          uint32_t>::type,
      uint64_t>::type;

  //! Constructor
  BinarySemaphores(void) : BinarySemaphores{1} {}

  //! Constructor
  BinarySemaphores(uint32_t count) {
    if (count == 0 || count > N) {
      count = N;
    }
    count_ = count;
    mask_ = static_cast<BitwiseType>(BitwiseType(1) << (count - 1));
    mask_ |= static_cast<BitwiseType>(mask_ - 1);
    flags_.store(mask_);
  }

  //! Acquire a permit from this semaphore, suspending until one is available
  int acquire(void) {
    int index = -1;
    while ((index = this->try_acquire()) < 0) {
      std::unique_lock<std::mutex> latch(mutex_);
      cond_.wait(latch, [this]() { return (flags_ > 0); });
    }
    return index;
  }

  //! Try to acquire a permit from this semaphore without suspension
  int try_acquire(void) {
    BitwiseType flags = flags_.load(std::memory_order_relaxed);
    while (flags > 0) {
      int index = CountTrailingZeros<BitwiseType>(flags);
      if (flags_.compare_exchange_weak(
              flags, flags & (~(BitwiseType(1) << index)),
              std::memory_order_release, std::memory_order_relaxed)) {
        return index;
      }
      flags = flags_.load(std::memory_order_relaxed);
    }
    return -1;
  }

  //! Acquire a specified permit from this semaphore, suspending until index is
  //! available
  int acquire(int index) {
    if (index < 0 || (uint32_t)index >= count_) {
      return -1;
    }
    BitwiseType flags = flags_.load(std::memory_order_relaxed);
    BitwiseType mask = BitwiseType(1) << index;
    while (true) {
      if ((flags & mask) &&
          flags_.compare_exchange_weak(flags, flags & (~mask),
                                       std::memory_order_release,
                                       std::memory_order_relaxed)) {
        return index;
      }
      flags = flags_.load(std::memory_order_relaxed);
    }
  }

  //! Release a permit, returning it into this semaphore
  void release(int index) {
    flags_.fetch_or((BitwiseType(1) << index) & mask_);
    std::lock_guard<std::mutex> latch(mutex_);
    cond_.notify_one();
  }

 protected:
  //! Count the trailing zeros (32 bits)
  template <typename T>
  static inline auto CountTrailingZeros(T val) ->
      typename std::enable_if<sizeof(T) <= 4, int>::type {
    return ailego_ctz32(val);
  }

  //! Count the trailing zeros (64 bits)
  template <typename T>
  static inline auto CountTrailingZeros(T val) ->
      typename std::enable_if<sizeof(T) <= 8 && 4 < sizeof(T), int>::type {
    return ailego_ctz64(val);
  }

 private:
  //! Disable them
  BinarySemaphores(const BinarySemaphores &) = delete;
  BinarySemaphores(BinarySemaphores &&) = delete;
  BinarySemaphores &operator=(const BinarySemaphores &) = delete;
  BinarySemaphores &operator=(BinarySemaphores &&) = delete;

  //! Members
  uint32_t count_{0};
  BitwiseType mask_{0};
  std::atomic<BitwiseType> flags_{0};
  std::mutex mutex_{};
  std::condition_variable cond_{};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/parallel/thread_pool.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/parallel/thread_pool.h>

#if (defined(__linux) || defined(__linux__)) && !defined(__ANDROID__)
#include <pthread.h>

static inline void BindThreads(std::vector<std::thread> &pool) {
  uint32_t hc = std::thread::hardware_concurrency();
  if (hc > 1) {
    cpu_set_t mask;

    for (size_t i = 0u; i < pool.size(); ++i) {
      CPU_ZERO(&mask);
      CPU_SET(i % hc, &mask);
      pthread_setaffinity_np(pool[i].native_handle(), sizeof(mask), &mask);
    }
  }
}

static inline void UnbindThreads(std::vector<std::thread> &pool) {
  cpu_set_t mask;
  CPU_ZERO(&mask);

  for (size_t i = 0u; i < CPU_SETSIZE; ++i) {
    CPU_SET(i, &mask);
  }
  for (size_t i = 0u; i < pool.size(); ++i) {
    pthread_setaffinity_np(pool[i].native_handle(), sizeof(mask), &mask);
  }
}
#else
static inline void BindThreads(std::vector<std::thread> &) {}
static inline void UnbindThreads(std::vector<std::thread> &) {}
#endif

namespace zvec {
namespace ailego {

ThreadPool::ThreadPool(uint32_t size, bool binding) {
  for (uint32_t i = 0u; i < size; ++i) {
    pool_.emplace_back(&ThreadPool::worker, this);
  }
  if (binding) {
    this->bind();
  }
}

void ThreadPool::bind(void) {
  BindThreads(pool_);
}

void ThreadPool::unbind(void) {
  UnbindThreads(pool_);
}

void ThreadPool::worker(void) {
  // Counter of workers
  ++worker_count_;

  ThreadPool::Task task;
  while (this->picking(&task)) {
    // Run the task
    task.handle->run();
    task.handle = nullptr;

    // Notify task finished
    if (task.control) {
      task.control->notify();
    }

    // Notify task group
    if (task.group) {
      task.group->notify();
      task.group = nullptr;
    }

    // Decrease count of active works
    std::lock_guard<std::mutex> lock(wait_mutex_);
    if (--active_count_ == 0 && pending_count_ == 0) {
      finished_cond_.notify_all();
    }
  }

  // Decrease count of workers
  std::lock_guard<std::mutex> lock(wait_mutex_);
  if (--worker_count_ == 0) {
    stopped_cond_.notify_all();
  }
}

bool ThreadPool::picking(ThreadPool::Task *task) {
  std::unique_lock<std::mutex> latch(queue_mutex_);
  work_cond_.wait(latch,
                  [this]() { return (pending_count_ > 0 || stopping_); });
  if (stopping_) {
    return false;
  }

  // Pop a task
  auto &head = queue_.front();
  task->control = head.control;
  task->group = std::move(head.group);
  task->handle = std::move(head.handle);
  queue_.pop();

  // Update group control
  if (task->group) {
    task->group->mark_task_actived();
  }

  // Counter of active tasks
  std::unique_lock<std::mutex> lock(wait_mutex_);
  ++active_count_;
  --pending_count_;

  return true;
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/pattern/defer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "scope_guard.h"

#define AILEGO_DEFER_NAME_(x, y) x##y
#define AILEGO_DEFER_NAME(x) AILEGO_DEFER_NAME_(__ailegoDefer_, x)

//! Defer operator
#define AILEGO_DEFER(...) \
  auto AILEGO_DEFER_NAME(__LINE__) = ailego::ScopeGuard::Make(__VA_ARGS__)


================================================
FILE: src/ailego/pattern/scope_guard.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/pattern/closure.h>

namespace zvec {
namespace ailego {

/*! Scope Guard Implementation
 */
template <typename T, typename TFunc>
class ScopeGuardImpl {
 public:
  using Object = CallbackObject<T>;
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  ScopeGuardImpl(ScopeGuardImpl &&rhs)
      : obj_(rhs.obj_),
        impl_(std::move(rhs.impl_)),
        tuple_(std::move(rhs.tuple_)) {
    rhs.obj_ = nullptr;
  }

  //! Constructor
  template <typename... TArgs>
  ScopeGuardImpl(typename Object::Type *obj, const typename Functor::Type &impl,
                 TArgs &&...args)
      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}

  //! Constructor
  template <typename... TArgs>
  ScopeGuardImpl(typename Object::Type *obj, typename Functor::Type &&impl,
                 TArgs &&...args)
      : obj_(obj),
        impl_(std::move(impl)),
        tuple_(std::forward<TArgs>(args)...) {}

  // Destructor
  ~ScopeGuardImpl(void) {
    if (obj_) {
      Functor::Run(obj_, impl_, tuple_);
    }
  }

 protected:
  //! Disable them
  ScopeGuardImpl(void) = delete;
  ScopeGuardImpl(const ScopeGuardImpl &) = delete;
  ScopeGuardImpl &operator=(const ScopeGuardImpl &) = delete;

 private:
  //! Members
  typename Object::Type *obj_;
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
};

/*! Scope Guard Implementation (void, TFunc)
 */
template <typename TFunc>
class ScopeGuardImpl<void, TFunc> {
 public:
  //! Callback Functor Type
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  ScopeGuardImpl(ScopeGuardImpl &&rhs)
      : impl_(std::move(rhs.impl_)),
        tuple_(std::move(rhs.tuple_)),
        valid_(rhs.valid_) {
    rhs.valid_ = false;
  }

  //! Constructor
  template <typename... TArgs>
  ScopeGuardImpl(const typename Functor::Type &impl, TArgs &&...args)
      : impl_(impl), tuple_(std::forward<TArgs>(args)...), valid_(true) {}

  //! Constructor
  template <typename... TArgs>
  ScopeGuardImpl(typename Functor::Type &&impl, TArgs &&...args)
      : impl_(std::move(impl)),
        tuple_(std::forward<TArgs>(args)...),
        valid_(true) {}

  // Destructor
  ~ScopeGuardImpl(void) {
    if (valid_) {
      Functor::Run(impl_, tuple_);
    }
  }

 protected:
  //! Disable them
  ScopeGuardImpl(void) = delete;
  ScopeGuardImpl(const ScopeGuardImpl &) = delete;
  ScopeGuardImpl &operator=(const ScopeGuardImpl &) = delete;

 private:
  //! Members
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
  bool valid_;
};

/*! Scope Guard
 */
struct ScopeGuard {
  //! Make a scope guard object (member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static inline auto Make(T *obj, R (T::*impl)(TParams...), TArgs &&...args)
      -> ScopeGuardImpl<T, typename CallbackTraits<decltype(impl)>::Type> {
    return ScopeGuardImpl<T, typename CallbackTraits<decltype(impl)>::Type>(
        obj, impl, std::forward<TArgs>(args)...);
  }

  //! Make a scope guard object (constable member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static inline auto Make(const T *obj, R (T::*impl)(TParams...) const,
                          TArgs &&...args)
      -> ScopeGuardImpl<const T,
                        typename CallbackTraits<decltype(impl)>::Type> {
    return ScopeGuardImpl<const T,
                          typename CallbackTraits<decltype(impl)>::Type>(
        obj, impl, std::forward<TArgs>(args)...);
  }

  //! Make a scope guard object (volatile member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static inline auto Make(volatile T *obj, R (T::*impl)(TParams...) volatile,
                          TArgs &&...args)
      -> ScopeGuardImpl<volatile T,
                        typename CallbackTraits<decltype(impl)>::Type> {
    return ScopeGuardImpl<volatile T,
                          typename CallbackTraits<decltype(impl)>::Type>(
        obj, impl, std::forward<TArgs>(args)...);
  }

  //! Make a scope guard object (constable volatile member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static inline auto Make(const volatile T *obj,
                          R (T::*impl)(TParams...) const volatile,
                          TArgs &&...args)
      -> ScopeGuardImpl<const volatile T,
                        typename CallbackTraits<decltype(impl)>::Type> {
    return ScopeGuardImpl<const volatile T,
                          typename CallbackTraits<decltype(impl)>::Type>(
        obj, impl, std::forward<TArgs>(args)...);
  }

  //! Make a scope guard object (function)
  template <
      typename TFunc, typename... TArgs,
      typename = typename std::enable_if<CallbackValidator<TFunc>::Value>::type>
  static inline auto Make(TFunc &&impl, TArgs &&...args)
      -> ScopeGuardImpl<void, typename CallbackTraits<TFunc>::Type> {
    return ScopeGuardImpl<void, typename CallbackTraits<TFunc>::Type>(
        std::forward<TFunc>(impl), std::forward<TArgs>(args)...);
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/bit_string_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <zvec/ailego/internal/platform.h>

namespace zvec {

namespace ailego {

class BitStringWriter {
 public:
  BitStringWriter(uint8_t *buffer, size_t buffer_size)
      : buffer_(buffer), buffer_size_(buffer_size), offset_(0) {
    ::memset(buffer_, 0, buffer_size_);
  }

  bool write(uint64_t data, int nbit) {
    if (buffer_size_ * 8 < nbit + offset_) {
      return false;
    }

    int bits_remain = 8 - (offset_ & 7);

    if (nbit <= bits_remain) {
      buffer_[offset_ >> 3] |= data << (offset_ & 7);
      offset_ += nbit;
    } else {
      size_t j = offset_ >> 3;
      buffer_[j++] |= data << (offset_ & 7);
      offset_ += nbit;
      data >>= bits_remain;
      while (data != 0) {
        buffer_[j++] |= data;
        data >>= 8;
      }
    }

    return true;
  }

  size_t offset() {
    return offset_;
  }

 private:
  uint8_t *buffer_;
  size_t buffer_size_;
  size_t offset_;
};

class BitStringReader {
 public:
  BitStringReader(const uint8_t *buffer, size_t buffer_size)
      : buffer_(buffer), buffer_size_(buffer_size), offset_(0) {}

  bool read(uint64_t &data, int nbit) {
    if (buffer_size_ * 8 < nbit + offset_) {
      return false;
    }

    int bits_remain = 8 - (offset_ & 7);

    uint64_t result = buffer_[offset_ >> 3] >> (offset_ & 7);
    if (nbit <= bits_remain) {
      result &= (1 << nbit) - 1;
      offset_ += nbit;

      data = result;
    } else {
      int temp = bits_remain;
      size_t i = (offset_ >> 3) + 1;
      offset_ += nbit;
      nbit -= bits_remain;

      while (nbit > 8) {
        result |= ((uint64_t)buffer_[i++]) << temp;
        temp += 8;
        nbit -= 8;
      }

      uint64_t last_byte = buffer_[i];

      last_byte &= (1 << nbit) - 1;
      result |= last_byte << temp;

      data = result;
    }

    return true;
  }

  size_t offset() {
    return offset_;
  }

 private:
  const uint8_t *buffer_;
  size_t buffer_size_;
  size_t offset_;
};

}  // namespace ailego

}  // namespace zvec


================================================
FILE: src/ailego/utility/bitset_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "bitset_helper.h"
#include <zvec/ailego/internal/platform.h>

#ifndef __SSE4_2__
#define bitset_popcount32 ailego_popcount32
#define bitset_popcount64 ailego_popcount64
#else
#define bitset_popcount32 _mm_popcnt_u32
#define bitset_popcount64 _mm_popcnt_u64
#endif  // !__SSE4_2__

#if defined(__ARM_NEON)
static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    vst1q_u32(lhs, vandq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= rhs[0];
  }
}

static inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    vst1q_u32(lhs, vbicq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= ~rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= ~rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= ~rhs[0];
  }
}

static inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    vst1q_u32(lhs, vorrq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] |= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] |= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] |= rhs[0];
  }
}

static inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    vst1q_u32(lhs, veorq_u32(vld1q_u32(lhs), vld1q_u32(rhs)));
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] ^= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] ^= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] ^= rhs[0];
  }
}

static inline void bitset_not(uint32_t *lhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  static const uint32x4_t v_zero = vdupq_n_u32(0);

  for (; lhs != last_aligned; lhs += 4) {
    vst1q_u32(lhs, vornq_u32(v_zero, vld1q_u32(lhs)));
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] = ~lhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] = ~lhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] = ~lhs[0];
  }
}

static inline bool bitset_test_all(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);
    if ((vgetq_lane_u64(vu64, 0) & vgetq_lane_u64(vu64, 1)) != (uint64_t)-1) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0xffffffffu) {
        return false;
      }
  }
  return true;
}

static inline bool bitset_test_any(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);
    if (vgetq_lane_u64(vu64, 0) | vgetq_lane_u64(vu64, 1)) {
      return true;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return true;
      }
  }
  return false;
}

static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    uint64x2_t vu64 = vld1q_u64((const uint64_t *)lhs);
    if (vgetq_lane_u64(vu64, 0) | vgetq_lane_u64(vu64, 1)) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return false;
      }
  }
  return true;
}

#elif defined(__AVX2__)
static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);
      _mm256_store_si256((__m256i *)lhs, _mm256_and_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);
      _mm256_storeu_si256((__m256i *)lhs, _mm256_and_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      lhs[2] &= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= rhs[0];
  }
}

static inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);
      _mm256_store_si256((__m256i *)lhs, _mm256_andnot_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);
      _mm256_storeu_si256((__m256i *)lhs, _mm256_andnot_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      lhs[2] &= ~rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= ~rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= ~rhs[0];
  }
}

static inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);
      _mm256_store_si256((__m256i *)lhs, _mm256_or_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);
      _mm256_storeu_si256((__m256i *)lhs, _mm256_or_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      lhs[2] |= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] |= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] |= rhs[0];
  }
}

static inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_load_si256((__m256i *)rhs);
      _mm256_store_si256((__m256i *)lhs, _mm256_xor_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8, rhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      __m256i ymm1 = _mm256_loadu_si256((__m256i *)rhs);
      _mm256_storeu_si256((__m256i *)lhs, _mm256_xor_si256(ymm1, ymm0));
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));
      lhs += 4;
      rhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      lhs[2] ^= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] ^= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] ^= rhs[0];
  }
}

static inline void bitset_not(uint32_t *lhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  static const __m256i mask_256 = _mm256_set1_epi32(0xffffffffu);
  static const __m128i mask_128 = _mm_set1_epi32(0xffffffffu);

  if (((uintptr_t)lhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8) {
      _mm256_store_si256(
          (__m256i *)lhs,
          _mm256_andnot_si256(_mm256_load_si256((__m256i *)lhs), mask_256));
    }
    if (last >= last_aligned + 4) {
      _mm_store_si128(
          (__m128i *)lhs,
          _mm_andnot_si128(_mm_load_si128((__m128i *)lhs), mask_128));
      lhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8) {
      _mm256_storeu_si256(
          (__m256i *)lhs,
          _mm256_andnot_si256(_mm256_loadu_si256((__m256i *)lhs), mask_256));
    }
    if (last >= last_aligned + 4) {
      _mm_storeu_si128(
          (__m128i *)lhs,
          _mm_andnot_si128(_mm_lddqu_si128((__m128i *)lhs), mask_128));
      lhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      lhs[2] = ~lhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] = ~lhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] = ~lhs[0];
  }
}

static inline bool bitset_test_all(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  static const __m256i mask_256 = _mm256_set1_epi32(0xffffffffu);
  static const __m128i mask_128 = _mm_set1_epi32(0xffffffffu);

  if (((uintptr_t)lhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i neq =
          _mm256_xor_si256(_mm256_load_si256((__m256i *)lhs), mask_256);
      if (!_mm256_testz_si256(neq, neq)) {
        return false;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i neq = _mm_xor_si128(_mm_load_si128((__m128i *)lhs), mask_128);
      if (!_mm_testz_si128(neq, neq)) {
        return false;
      }
      lhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i neq =
          _mm256_xor_si256(_mm256_loadu_si256((__m256i *)lhs), mask_256);
      if (!_mm256_testz_si256(neq, neq)) {
        return false;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i neq = _mm_xor_si128(_mm_lddqu_si128((__m128i *)lhs), mask_128);
      if (!_mm_testz_si128(neq, neq)) {
        return false;
      }
      lhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      if (lhs[2] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0xffffffffu) {
        return false;
      }
  }
  return true;
}

static inline bool bitset_test_any(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      if (!_mm256_testz_si256(ymm0, ymm0)) {
        return true;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return true;
      }
      lhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      if (!_mm256_testz_si256(ymm0, ymm0)) {
        return true;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return true;
      }
      lhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      if (lhs[2] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return true;
      }
  }
  return false;
}

static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  if (((uintptr_t)lhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i ymm0 = _mm256_load_si256((__m256i *)lhs);
      if (!_mm256_testz_si256(ymm0, ymm0)) {
        return false;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return false;
      }
      lhs += 4;
    }
  } else {
    for (; lhs != last_aligned; lhs += 8) {
      __m256i ymm0 = _mm256_loadu_si256((__m256i *)lhs);
      if (!_mm256_testz_si256(ymm0, ymm0)) {
        return false;
      }
    }
    if (last >= last_aligned + 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return false;
      }
      lhs += 4;
    }
  }
  switch (last - lhs) {
    case 3:
      if (lhs[2] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return false;
      }
  }
  return true;
}

#elif defined(__SSE2__)
#ifndef __SSE3__
#define _mm_lddqu_si128 _mm_loadu_si128
#endif  // !__SSE3__

static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));
    }
  } else {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_and_si128(xmm1, xmm0));
    }
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= rhs[0];
  }
}

static inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));
    }
  } else {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_andnot_si128(xmm1, xmm0));
    }
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= ~rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= ~rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= ~rhs[0];
  }
}

static inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));
    }
  } else {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_or_si128(xmm1, xmm0));
    }
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] |= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] |= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] |= rhs[0];
  }
}

static inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  if (((uintptr_t)lhs & 0xf) == 0 && ((uintptr_t)rhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_load_si128((__m128i *)rhs);
      _mm_store_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));
    }
  } else {
    for (; lhs != last_aligned; lhs += 4, rhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      __m128i xmm1 = _mm_lddqu_si128((__m128i *)rhs);
      _mm_storeu_si128((__m128i *)lhs, _mm_xor_si128(xmm1, xmm0));
    }
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] ^= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] ^= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] ^= rhs[0];
  }
}

static inline void bitset_not(uint32_t *lhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  static const __m128i mask = _mm_set1_epi32(0xffffffffu);

  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      _mm_store_si128((__m128i *)lhs,
                      _mm_andnot_si128(_mm_load_si128((__m128i *)lhs), mask));
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      _mm_storeu_si128((__m128i *)lhs,
                       _mm_andnot_si128(_mm_lddqu_si128((__m128i *)lhs), mask));
    }
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] = ~lhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] = ~lhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] = ~lhs[0];
  }
}

static inline bool bitset_test_all(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  static const __m128i mask = _mm_set1_epi32(0xffffffffu);

#ifndef __SSE4_1__
  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), mask);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return false;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), mask);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return false;
      }
    }
  }
#else
  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i neq = _mm_xor_si128(_mm_load_si128((__m128i *)lhs), mask);
      if (!_mm_testz_si128(neq, neq)) {
        return false;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i neq = _mm_xor_si128(_mm_lddqu_si128((__m128i *)lhs), mask);
      if (!_mm_testz_si128(neq, neq)) {
        return false;
      }
    }
  }
#endif  // !__SSE4_1__

  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0xffffffffu) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0xffffffffu) {
        return false;
      }
  }
  return true;
}

static inline bool bitset_test_any(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

#ifndef __SSE4_1__
  static const __m128i zero = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), zero);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return true;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), zero);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return true;
      }
    }
  }
#else
  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return true;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return true;
      }
    }
  }
#endif  // !__SSE4_1__

  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return true;
      }
  }
  return false;
}

static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

#ifndef __SSE4_1__
  static __m128i zero = _mm_setzero_si128();

  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_load_si128((__m128i *)lhs), zero);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return false;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i eq = _mm_cmpeq_epi32(_mm_lddqu_si128((__m128i *)lhs), zero);
      if (_mm_movemask_epi8(eq) != 0xffffu) {
        return false;
      }
    }
  }
#else
  if (((uintptr_t)lhs & 0xf) == 0) {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i xmm0 = _mm_load_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return false;
      }
    }
  } else {
    for (; lhs != last_aligned; lhs += 4) {
      __m128i xmm0 = _mm_lddqu_si128((__m128i *)lhs);
      if (!_mm_testz_si128(xmm0, xmm0)) {
        return false;
      }
    }
  }
#endif  // !__SSE4_1__

  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return false;
      }
  }
  return true;
}

#else
#if defined(AILEGO_M64)
static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    *(uint64_t *)(&lhs[6]) &= *(uint64_t *)(&rhs[6]);
    *(uint64_t *)(&lhs[4]) &= *(uint64_t *)(&rhs[4]);
    *(uint64_t *)(&lhs[2]) &= *(uint64_t *)(&rhs[2]);
    *(uint64_t *)(&lhs[0]) &= *(uint64_t *)(&rhs[0]);
  }
  switch (last - last_aligned) {
    case 7:
      lhs[6] &= rhs[6];
      /* FALLTHRU */
    case 6:
      lhs[5] &= rhs[5];
      /* FALLTHRU */
    case 5:
      lhs[4] &= rhs[4];
      /* FALLTHRU */
    case 4:
      lhs[3] &= rhs[3];
      /* FALLTHRU */
    case 3:
      lhs[2] &= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= rhs[0];
  }
}

static inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    *(uint64_t *)(&lhs[6]) &= ~(*(uint64_t *)(&rhs[6]));
    *(uint64_t *)(&lhs[4]) &= ~(*(uint64_t *)(&rhs[4]));
    *(uint64_t *)(&lhs[2]) &= ~(*(uint64_t *)(&rhs[2]));
    *(uint64_t *)(&lhs[0]) &= ~(*(uint64_t *)(&rhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      lhs[6] &= ~rhs[6];
      /* FALLTHRU */
    case 6:
      lhs[5] &= ~rhs[5];
      /* FALLTHRU */
    case 5:
      lhs[4] &= ~rhs[4];
      /* FALLTHRU */
    case 4:
      lhs[3] &= ~rhs[3];
      /* FALLTHRU */
    case 3:
      lhs[2] &= ~rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= ~rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= ~rhs[0];
  }
}

static inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    *(uint64_t *)(&lhs[6]) |= *(uint64_t *)(&rhs[6]);
    *(uint64_t *)(&lhs[4]) |= *(uint64_t *)(&rhs[4]);
    *(uint64_t *)(&lhs[2]) |= *(uint64_t *)(&rhs[2]);
    *(uint64_t *)(&lhs[0]) |= *(uint64_t *)(&rhs[0]);
  }
  switch (last - last_aligned) {
    case 7:
      lhs[6] |= rhs[6];
      /* FALLTHRU */
    case 6:
      lhs[5] |= rhs[5];
      /* FALLTHRU */
    case 5:
      lhs[4] |= rhs[4];
      /* FALLTHRU */
    case 4:
      lhs[3] |= rhs[3];
      /* FALLTHRU */
    case 3:
      lhs[2] |= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] |= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] |= rhs[0];
  }
}

static inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    *(uint64_t *)(&lhs[6]) ^= *(uint64_t *)(&rhs[6]);
    *(uint64_t *)(&lhs[4]) ^= *(uint64_t *)(&rhs[4]);
    *(uint64_t *)(&lhs[2]) ^= *(uint64_t *)(&rhs[2]);
    *(uint64_t *)(&lhs[0]) ^= *(uint64_t *)(&rhs[0]);
  }
  switch (last - last_aligned) {
    case 7:
      lhs[6] ^= rhs[6];
      /* FALLTHRU */
    case 6:
      lhs[5] ^= rhs[5];
      /* FALLTHRU */
    case 5:
      lhs[4] ^= rhs[4];
      /* FALLTHRU */
    case 4:
      lhs[3] ^= rhs[3];
      /* FALLTHRU */
    case 3:
      lhs[2] ^= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] ^= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] ^= rhs[0];
  }
}

static inline void bitset_not(uint32_t *lhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8) {
    *(uint64_t *)(&lhs[6]) = ~(*(uint64_t *)(&lhs[6]));
    *(uint64_t *)(&lhs[4]) = ~(*(uint64_t *)(&lhs[4]));
    *(uint64_t *)(&lhs[2]) = ~(*(uint64_t *)(&lhs[2]));
    *(uint64_t *)(&lhs[0]) = ~(*(uint64_t *)(&lhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      lhs[6] = ~lhs[6];
      /* FALLTHRU */
    case 6:
      lhs[5] = ~lhs[5];
      /* FALLTHRU */
    case 5:
      lhs[4] = ~lhs[4];
      /* FALLTHRU */
    case 4:
      lhs[3] = ~lhs[3];
      /* FALLTHRU */
    case 3:
      lhs[2] = ~lhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] = ~lhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] = ~lhs[0];
  }
}

static inline bool bitset_test_all(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8) {
    if (*(uint64_t *)(&lhs[6]) != (uint64_t)-1) {
      return false;
    }
    if (*(uint64_t *)(&lhs[4]) != (uint64_t)-1) {
      return false;
    }
    if (*(uint64_t *)(&lhs[2]) != (uint64_t)-1) {
      return false;
    }
    if (*(uint64_t *)(&lhs[0]) != (uint64_t)-1) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 7:
      if (lhs[6] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 6:
      if (lhs[5] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 5:
      if (lhs[4] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 4:
      if (lhs[3] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 3:
      if (lhs[2] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != (uint32_t)-1) {
        return false;
      }
  }
  return true;
}

static inline bool bitset_test_any(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8) {
    if (*(uint64_t *)(&lhs[6]) != 0u) {
      return true;
    }
    if (*(uint64_t *)(&lhs[4]) != 0u) {
      return true;
    }
    if (*(uint64_t *)(&lhs[2]) != 0u) {
      return true;
    }
    if (*(uint64_t *)(&lhs[0]) != 0u) {
      return true;
    }
  }
  switch (last - last_aligned) {
    case 7:
      if (lhs[6] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 6:
      if (lhs[5] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 5:
      if (lhs[4] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 4:
      if (lhs[3] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 3:
      if (lhs[2] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return true;
      }
  }
  return false;
}

static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);

  for (; lhs != last_aligned; lhs += 8) {
    if (*(uint64_t *)(&lhs[6]) != 0u) {
      return false;
    }
    if (*(uint64_t *)(&lhs[4]) != 0u) {
      return false;
    }
    if (*(uint64_t *)(&lhs[2]) != 0u) {
      return false;
    }
    if (*(uint64_t *)(&lhs[0]) != 0u) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 7:
      if (lhs[6] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 6:
      if (lhs[5] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 5:
      if (lhs[4] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 4:
      if (lhs[3] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 3:
      if (lhs[2] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return false;
      }
  }
  return true;
}

#else   // AILEGO_M64
static inline void bitset_and(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    lhs[3] &= rhs[3];
    lhs[2] &= rhs[2];
    lhs[1] &= rhs[1];
    lhs[0] &= rhs[0];
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= rhs[0];
  }
}

static inline void bitset_andnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    lhs[3] &= ~rhs[3];
    lhs[2] &= ~rhs[2];
    lhs[1] &= ~rhs[1];
    lhs[0] &= ~rhs[0];
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] &= ~rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] &= ~rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] &= ~rhs[0];
  }
}

static inline void bitset_or(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    lhs[3] |= rhs[3];
    lhs[2] |= rhs[2];
    lhs[1] |= rhs[1];
    lhs[0] |= rhs[0];
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] |= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] |= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] |= rhs[0];
  }
}

static inline void bitset_xor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    lhs[3] ^= rhs[3];
    lhs[2] ^= rhs[2];
    lhs[1] ^= rhs[1];
    lhs[0] ^= rhs[0];
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] ^= rhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] ^= rhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] ^= rhs[0];
  }
}

static inline void bitset_not(uint32_t *lhs, size_t size) {
  uint32_t *last = lhs + size;
  uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    lhs[3] = ~lhs[3];
    lhs[2] = ~lhs[2];
    lhs[1] = ~lhs[1];
    lhs[0] = ~lhs[0];
  }
  switch (last - last_aligned) {
    case 3:
      lhs[2] = ~lhs[2];
      /* FALLTHRU */
    case 2:
      lhs[1] = ~lhs[1];
      /* FALLTHRU */
    case 1:
      lhs[0] = ~lhs[0];
  }
}

static inline bool bitset_test_all(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    if (lhs[3] != (uint32_t)-1) {
      return false;
    }
    if (lhs[2] != (uint32_t)-1) {
      return false;
    }
    if (lhs[1] != (uint32_t)-1) {
      return false;
    }
    if (lhs[0] != (uint32_t)-1) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != (uint32_t)-1) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != (uint32_t)-1) {
        return false;
      }
  }
  return true;
}

static inline bool bitset_test_any(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    if (lhs[3] != 0u) {
      return true;
    }
    if (lhs[2] != 0u) {
      return true;
    }
    if (lhs[1] != 0u) {
      return true;
    }
    if (lhs[0] != 0u) {
      return true;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return true;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return true;
      }
  }
  return false;
}

static inline bool bitset_test_none(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);

  for (; lhs != last_aligned; lhs += 4) {
    if (lhs[3] != 0u) {
      return false;
    }
    if (lhs[2] != 0u) {
      return false;
    }
    if (lhs[1] != 0u) {
      return false;
    }
    if (lhs[0] != 0u) {
      return false;
    }
  }
  switch (last - last_aligned) {
    case 3:
      if (lhs[2] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 2:
      if (lhs[1] != 0u) {
        return false;
      }
      /* FALLTHRU */
    case 1:
      if (lhs[0] != 0u) {
        return false;
      }
  }
  return true;
}
#endif  // AILEGO_M64
#endif  // __AVX2__

#if (defined(__ARM_NEON) && defined(__aarch64__))
static inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  while (lhs != last_aligned) {
    const uint32_t *last_stage =
        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;

    uint8x16_t v_count = vdupq_n_u8(0);
    for (; lhs != last_stage; lhs += 4) {
      v_count = vaddq_u8(vcntq_u8(vld1q_u8((const uint8_t *)lhs)), v_count);
    }

    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));
    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));
  }

  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0]);
  }
  return count;
}

static inline size_t bitset_xor_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  while (lhs != last_aligned) {
    const uint32_t *last_stage =
        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;

    uint8x16_t v_count = vdupq_n_u8(0);
    for (; lhs != last_stage; lhs += 4, rhs += 4) {
      v_count = vaddq_u8(vcntq_u8(veorq_u8(vld1q_u8((const uint8_t *)lhs),
                                           vld1q_u8((const uint8_t *)rhs))),
                         v_count);
    }

    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));
    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));
  }

  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}

static inline size_t bitset_and_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  while (lhs != last_aligned) {
    const uint32_t *last_stage =
        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;

    uint8x16_t v_count = vdupq_n_u8(0);
    for (; lhs != last_stage; lhs += 4, rhs += 4) {
      v_count = vaddq_u8(vcntq_u8(vandq_u8(vld1q_u8((const uint8_t *)lhs),
                                           vld1q_u8((const uint8_t *)rhs))),
                         v_count);
    }

    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));
    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));
  }

  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] & rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & rhs[0]);
  }
  return count;
}

static inline size_t bitset_andnot_cardinality(const uint32_t *lhs,
                                               const uint32_t *rhs,
                                               size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  while (lhs != last_aligned) {
    const uint32_t *last_stage =
        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;

    uint8x16_t v_count = vdupq_n_u8(0);
    for (; lhs != last_stage; lhs += 4, rhs += 4) {
      v_count = vaddq_u8(vcntq_u8(vbicq_u8(vld1q_u8((const uint8_t *)lhs),
                                           vld1q_u8((const uint8_t *)rhs))),
                         v_count);
    }

    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));
    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));
  }

  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] & ~rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & ~rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & ~rhs[0]);
  }
  return count;
}

static inline size_t bitset_or_cardinality(const uint32_t *lhs,
                                           const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  while (lhs != last_aligned) {
    const uint32_t *last_stage =
        (last_aligned <= lhs + 124u) ? last_aligned : lhs + 124u;

    uint8x16_t v_count = vdupq_n_u8(0);
    for (; lhs != last_stage; lhs += 4, rhs += 4) {
      v_count = vaddq_u8(vcntq_u8(vorrq_u8(vld1q_u8((const uint8_t *)lhs),
                                           vld1q_u8((const uint8_t *)rhs))),
                         v_count);
    }

    v_count = vreinterpretq_u8_u16(vpaddlq_u8(v_count));
    count += vaddvq_u16(vreinterpretq_u16_u8(v_count));
  }

  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] | rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] | rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] | rhs[0]);
  }
  return count;
}

#elif defined(AILEGO_M64)
static inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8) {
    count += bitset_popcount64(*(uint64_t *)(&lhs[6]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[4]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[2]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      count += bitset_popcount32(lhs[6]);
      /* FALLTHRU */
    case 6:
      count += bitset_popcount32(lhs[5]);
      /* FALLTHRU */
    case 5:
      count += bitset_popcount32(lhs[4]);
      /* FALLTHRU */
    case 4:
      count += bitset_popcount32(lhs[3]);
      /* FALLTHRU */
    case 3:
      count += bitset_popcount32(lhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0]);
  }
  return count;
}

static inline size_t bitset_xor_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) ^ *(uint64_t *)(&rhs[6]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) ^ *(uint64_t *)(&rhs[4]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) ^ *(uint64_t *)(&rhs[2]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) ^ *(uint64_t *)(&rhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      count += bitset_popcount32(lhs[6] ^ rhs[6]);
      /* FALLTHRU */
    case 6:
      count += bitset_popcount32(lhs[5] ^ rhs[5]);
      /* FALLTHRU */
    case 5:
      count += bitset_popcount32(lhs[4] ^ rhs[4]);
      /* FALLTHRU */
    case 4:
      count += bitset_popcount32(lhs[3] ^ rhs[3]);
      /* FALLTHRU */
    case 3:
      count += bitset_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}

static inline size_t bitset_and_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) & *(uint64_t *)(&rhs[6]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) & *(uint64_t *)(&rhs[4]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) & *(uint64_t *)(&rhs[2]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) & *(uint64_t *)(&rhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      count += bitset_popcount32(lhs[6] & rhs[6]);
      /* FALLTHRU */
    case 6:
      count += bitset_popcount32(lhs[5] & rhs[5]);
      /* FALLTHRU */
    case 5:
      count += bitset_popcount32(lhs[4] & rhs[4]);
      /* FALLTHRU */
    case 4:
      count += bitset_popcount32(lhs[3] & rhs[3]);
      /* FALLTHRU */
    case 3:
      count += bitset_popcount32(lhs[2] & rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & rhs[0]);
  }
  return count;
}

static inline size_t bitset_andnot_cardinality(const uint32_t *lhs,
                                               const uint32_t *rhs,
                                               size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    count +=
        bitset_popcount64(*(uint64_t *)(&lhs[6]) & ~(*(uint64_t *)(&rhs[6])));
    count +=
        bitset_popcount64(*(uint64_t *)(&lhs[4]) & ~(*(uint64_t *)(&rhs[4])));
    count +=
        bitset_popcount64(*(uint64_t *)(&lhs[2]) & ~(*(uint64_t *)(&rhs[2])));
    count +=
        bitset_popcount64(*(uint64_t *)(&lhs[0]) & ~(*(uint64_t *)(&rhs[0])));
  }
  switch (last - last_aligned) {
    case 7:
      count += bitset_popcount32(lhs[6] & ~rhs[6]);
      /* FALLTHRU */
    case 6:
      count += bitset_popcount32(lhs[5] & ~rhs[5]);
      /* FALLTHRU */
    case 5:
      count += bitset_popcount32(lhs[4] & ~rhs[4]);
      /* FALLTHRU */
    case 4:
      count += bitset_popcount32(lhs[3] & ~rhs[3]);
      /* FALLTHRU */
    case 3:
      count += bitset_popcount32(lhs[2] & ~rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & ~rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & ~rhs[0]);
  }
  return count;
}

static inline size_t bitset_or_cardinality(const uint32_t *lhs,
                                           const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 3) << 3);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 8, rhs += 8) {
    count += bitset_popcount64(*(uint64_t *)(&lhs[6]) | *(uint64_t *)(&rhs[6]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[4]) | *(uint64_t *)(&rhs[4]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[2]) | *(uint64_t *)(&rhs[2]));
    count += bitset_popcount64(*(uint64_t *)(&lhs[0]) | *(uint64_t *)(&rhs[0]));
  }
  switch (last - last_aligned) {
    case 7:
      count += bitset_popcount32(lhs[6] | rhs[6]);
      /* FALLTHRU */
    case 6:
      count += bitset_popcount32(lhs[5] | rhs[5]);
      /* FALLTHRU */
    case 5:
      count += bitset_popcount32(lhs[4] | rhs[4]);
      /* FALLTHRU */
    case 4:
      count += bitset_popcount32(lhs[3] | rhs[3]);
      /* FALLTHRU */
    case 3:
      count += bitset_popcount32(lhs[2] | rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] | rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] | rhs[0]);
  }
  return count;
}

#else   // !__ARM_NEON && !AILEGO_M64
static inline size_t bitset_cardinality(const uint32_t *lhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4) {
    count += bitset_popcount32(lhs[3]);
    count += bitset_popcount32(lhs[2]);
    count += bitset_popcount32(lhs[1]);
    count += bitset_popcount32(lhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0]);
  }
  return count;
}

static inline size_t bitset_xor_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += bitset_popcount32(lhs[3] ^ rhs[3]);
    count += bitset_popcount32(lhs[2] ^ rhs[2]);
    count += bitset_popcount32(lhs[1] ^ rhs[1]);
    count += bitset_popcount32(lhs[0] ^ rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] ^ rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] ^ rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] ^ rhs[0]);
  }
  return count;
}

static inline size_t bitset_and_cardinality(const uint32_t *lhs,
                                            const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += bitset_popcount32(lhs[3] & rhs[3]);
    count += bitset_popcount32(lhs[2] & rhs[2]);
    count += bitset_popcount32(lhs[1] & rhs[1]);
    count += bitset_popcount32(lhs[0] & rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] & rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & rhs[0]);
  }
  return count;
}

static inline size_t bitset_andnot_cardinality(const uint32_t *lhs,
                                               const uint32_t *rhs,
                                               size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += bitset_popcount32(lhs[3] & ~rhs[3]);
    count += bitset_popcount32(lhs[2] & ~rhs[2]);
    count += bitset_popcount32(lhs[1] & ~rhs[1]);
    count += bitset_popcount32(lhs[0] & ~rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] & ~rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] & ~rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] & ~rhs[0]);
  }
  return count;
}

static inline size_t bitset_or_cardinality(const uint32_t *lhs,
                                           const uint32_t *rhs, size_t size) {
  const uint32_t *last = lhs + size;
  const uint32_t *last_aligned = lhs + ((size >> 2) << 2);
  size_t count = 0;

  for (; lhs != last_aligned; lhs += 4, rhs += 4) {
    count += bitset_popcount32(lhs[3] | rhs[3]);
    count += bitset_popcount32(lhs[2] | rhs[2]);
    count += bitset_popcount32(lhs[1] | rhs[1]);
    count += bitset_popcount32(lhs[0] | rhs[0]);
  }
  switch (last - last_aligned) {
    case 3:
      count += bitset_popcount32(lhs[2] | rhs[2]);
      /* FALLTHRU */
    case 2:
      count += bitset_popcount32(lhs[1] | rhs[1]);
      /* FALLTHRU */
    case 1:
      count += bitset_popcount32(lhs[0] | rhs[0]);
  }
  return count;
}
#endif  // __ARM_NEON && __aarch64__

namespace zvec {

namespace ailego {

void BitsetHelper::BitwiseAnd(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  bitset_and(lhs, rhs, size);
}

void BitsetHelper::BitwiseAndnot(uint32_t *lhs, const uint32_t *rhs,
                                 size_t size) {
  bitset_andnot(lhs, rhs, size);
}

void BitsetHelper::BitwiseOr(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  bitset_or(lhs, rhs, size);
}

void BitsetHelper::BitwiseXor(uint32_t *lhs, const uint32_t *rhs, size_t size) {
  bitset_xor(lhs, rhs, size);
}

void BitsetHelper::BitwiseNot(uint32_t *arr, size_t size) {
  bitset_not(arr, size);
}

bool BitsetHelper::TestAll(const uint32_t *arr, size_t size) {
  return bitset_test_all(arr, size);
}

bool BitsetHelper::TestAny(const uint32_t *arr, size_t size) {
  return bitset_test_any(arr, size);
}

bool BitsetHelper::TestNone(const uint32_t *arr, size_t size) {
  return bitset_test_none(arr, size);
}

size_t BitsetHelper::BitwiseAndCardinality(const uint32_t *lhs,
                                           const uint32_t *rhs, size_t size) {
  return bitset_and_cardinality(lhs, rhs, size);
}

size_t BitsetHelper::BitwiseOrCardinality(const uint32_t *lhs,
                                          const uint32_t *rhs, size_t size) {
  return bitset_or_cardinality(lhs, rhs, size);
}

size_t BitsetHelper::BitwiseAndnotCardinality(const uint32_t *lhs,
                                              const uint32_t *rhs,
                                              size_t size) {
  return bitset_andnot_cardinality(lhs, rhs, size);
}

size_t BitsetHelper::BitwiseXorCardinality(const uint32_t *lhs,
                                           const uint32_t *rhs, size_t size) {
  return bitset_xor_cardinality(lhs, rhs, size);
}

size_t BitsetHelper::Cardinality(const uint32_t *arr, size_t size) {
  return bitset_cardinality(arr, size);
}

bool BitsetHelper::test_all(void) const {
  return bitset_test_all(array_, size_);
}

bool BitsetHelper::test_any(void) const {
  return bitset_test_any(array_, size_);
}

bool BitsetHelper::test_none(void) const {
  return bitset_test_none(array_, size_);
}

size_t BitsetHelper::cardinality(void) const {
  return bitset_cardinality(array_, size_);
}

}  // namespace ailego

}  // namespace zvec

================================================
FILE: src/ailego/utility/bitset_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <zvec/ailego/internal/platform.h>

namespace zvec {

namespace ailego {

/*! Bitset Helper
 */
class BitsetHelper {
 public:
  //! Constructor
  BitsetHelper(void) {}

  //! Constructor
  BitsetHelper(void *buf, size_t len)
      : array_(reinterpret_cast<uint32_t *>(buf)),
        size_(len / sizeof(uint32_t)) {}

  //! Mount a buffer as bitset
  void mount(void *buf, size_t len) {
    array_ = reinterpret_cast<uint32_t *>(buf);
    size_ = len / sizeof(uint32_t);
  }

  //! Umount the buffer
  void umount(void) {
    array_ = nullptr;
    size_ = 0u;
  }

  // ！Clear the bitset
  void clear(void) {
    memset(array_, 0, sizeof(uint32_t) * size_);
  }

  //! Test a bit in bitset
  bool test(size_t num) const {
    ailego_assert_with((size_ << 5) > num, "overflow argument");
    return ((array_[num >> 5] & (1u << (num & 0x1f))) != 0);
  }

  //! Set a bit in bitset
  void set(size_t num) {
    ailego_assert_with((size_ << 5) > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] |= mask;
  }

  //! Reset a bit in bitset
  void reset(size_t num) {
    ailego_assert_with((size_ << 5) > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] &= ~mask;
  }

  //! Toggle a bit in bitset
  void flip(size_t num) {
    ailego_assert_with((size_ << 5) > num, "overflow argument");
    uint32_t mask = (1u << (num & 0x1f));
    array_[num >> 5] ^= mask;
  }

  //! Extract the bitset to an array
  void extract(size_t base, std::vector<size_t> *out) const {
    const uint32_t *iter = array_;
    const uint32_t *last = array_ + size_;

    for (; iter != last; ++iter) {
      uint32_t w = *iter;

      while (w != 0) {
        uint32_t c = ailego_ctz32(w);
        w &= ~(1u << c);
        out->push_back(base + c);
      }
      base += 32u;
    }
  }

  //! Extract the bitset to an array
  void extract(std::vector<size_t> *out) const {
    this->extract(0, out);
  }

  //! Check if all bits are set to true
  bool test_all(void) const;

  //! Check if any bits are set to true
  bool test_any(void) const;

  //! Check if none of the bits are set to true
  bool test_none(void) const;

  //! Compute the cardinality of a bitset
  size_t cardinality(void) const;

  //! Calculate the size of buffer if it contains N bits
  static size_t BufferSize(size_t N) {
    return (((N + 0x1f) >> 5) << 2);
  }

  //! Calculate the count of bits can be contained
  static size_t BitsCount(size_t len) {
    return ((len >> 2) << 2);
  }

  //! Check if all bits are set to true
  static bool TestAll(const uint32_t *arr, size_t size);

  //! Check if cube bits are set to true
  static bool TestAny(const uint32_t *arr, size_t size);

  //! Check if none of the bits are set to true
  static bool TestNone(const uint32_t *arr, size_t size);

  //! Compute the AND cardinality between two bitsets
  static size_t BitwiseAndCardinality(const uint32_t *lhs, const uint32_t *rhs,
                                      size_t size);

  //! Compute the OR cardinality between two bitsets
  static size_t BitwiseOrCardinality(const uint32_t *lhs, const uint32_t *rhs,
                                     size_t size);

  //! Compute the ANDNOT cardinality between two bitsets
  static size_t BitwiseAndnotCardinality(const uint32_t *lhs,
                                         const uint32_t *rhs, size_t size);

  //! Compute the XOR cardinality between two bitsets
  static size_t BitwiseXorCardinality(const uint32_t *lhs, const uint32_t *rhs,
                                      size_t size);

  //! Compute the cardinality of a bitset
  static size_t Cardinality(const uint32_t *arr, size_t size);

  //! Perform binary AND
  static void BitwiseAnd(uint32_t *lhs, const uint32_t *rhs, size_t size);

  //! Perform binary AND_NOT
  static void BitwiseAndnot(uint32_t *lhs, const uint32_t *rhs, size_t size);

  //! Perform binary OR
  static void BitwiseOr(uint32_t *lhs, const uint32_t *rhs, size_t size);

  //! Perform binary XOR
  static void BitwiseXor(uint32_t *lhs, const uint32_t *rhs, size_t size);

  //! Perform binary NOT
  static void BitwiseNot(uint32_t *arr, size_t size);

 private:
  uint32_t *array_{nullptr};
  size_t size_{0u};
};

}  // namespace ailego

}  // namespace zvec


================================================
FILE: src/ailego/utility/concurrency_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "concurrency_helper.h"
#include <fstream>
#include <iostream>
#include <thread>
#include <zvec/ailego/utility/file_helper.h>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec {
namespace ailego {

// Refer to:
// https://stackoverflow.com/questions/65551215/get-docker-cpu-memory-limit-inside-container
ConcurrencyHelper::ConcurrencyHelper() {
  std::string cfs_quota_us = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us";
  std::string cfs_period_us = "/sys/fs/cgroup/cpu/cpu.cfs_period_us";

  concurrency_ = std::thread::hardware_concurrency();
  if (FileHelper::IsExist(cfs_quota_us.c_str()) &&
      FileHelper::IsExist(cfs_period_us.c_str())) {
    std::ifstream quota_ifs;
    std::string quota_str{""};
    uint32_t quota_val = 0;
    quota_ifs.open(cfs_quota_us, std::ios::in);
    if (quota_ifs.is_open()) {
      quota_ifs >> quota_str;
      if (quota_str != "-1") {
        StringHelper::ToUint32(quota_str, &quota_val);
      }
      quota_ifs.close();
    }

    if (quota_val > 0) {
      std::ifstream period_ifs;
      std::string period_str{""};
      uint32_t period_val = 0;
      period_ifs.open(cfs_period_us, std::ios::in);
      if (period_ifs.is_open()) {
        period_ifs >> period_str;
        StringHelper::ToUint32(period_str, &period_val);
        period_ifs.close();
      }

      if (period_val > 0) {
        concurrency_ = (quota_val + period_val - 1) / period_val;
      }
    }
  }
}

uint32_t ConcurrencyHelper::container_aware_concurrency() {
  static ConcurrencyHelper concurrency_helper;
  return concurrency_helper.concurrency_;
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/concurrency_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>

namespace zvec {
namespace ailego {

class ConcurrencyHelper {
 public:
  ConcurrencyHelper();

  //! get hardware concurrency from either vm or container
  static uint32_t container_aware_concurrency();

 private:
  uint32_t concurrency_{0};
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/utility/dl_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "dl_helper.h"
#if !defined(_WIN64) && !defined(_WIN32)
#include <dlfcn.h>
#else
#include <Windows.h>
#endif

namespace zvec {
namespace ailego {

#if !defined(_WIN64) && !defined(_WIN32)
void *DLHelper::Load(const char *path, std::string *err) {
  void *handle = dlopen(path, RTLD_NOW);
  if (!handle && err) {
    *err = dlerror();
  }
  return handle;
}

void DLHelper::Unload(void *handle) {
  ailego_return_if_false(handle);
  dlclose(handle);
}

void *DLHelper::Symbol(void *handle, const char *symbol) {
  ailego_null_if_false(handle && symbol);
  return dlsym(handle, symbol);
}

#else
void *DLHelper::Load(const char *path, std::string *err) {
  HMODULE handle = LoadLibraryA(path);
  if (!handle && err) {
    DWORD error_code = GetLastError();
    LPSTR error_msg = nullptr;

    DWORD len = FormatMessageA(
        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
            FORMAT_MESSAGE_IGNORE_INSERTS,
        nullptr, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
        (LPSTR)&error_msg, 0, nullptr);
    err->assign(error_msg, len);
    LocalFree(error_msg);
  }
  return handle;
}

void DLHelper::Unload(void *handle) {
  ailego_return_if_false(handle);
  FreeLibrary((HMODULE)handle);
}

void *DLHelper::Symbol(void *handle, const char *symbol) {
  ailego_null_if_false(handle && symbol);
  return GetProcAddress((HMODULE)handle, symbol);
}
#endif  // !_WIN64 && !_WIN32

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/utility/dl_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Dynamic Library Helper
 */
struct DLHelper {
  //! Load library from path
  static void *Load(const char *path, std::string *err);

  //! Unload a library
  static void Unload(void *handle);

  //! Retrieve a symbol from a library handle
  static void *Symbol(void *handle, const char *symbol);

  //! Load library from path
  static void *Load(const std::string &path, std::string *err) {
    return DLHelper::Load(path.c_str(), err);
  }

  //! Retrieve a symbol from a library handle
  static void *Symbol(void *handle, const std::string &symbol) {
    return DLHelper::Symbol(handle, symbol.c_str());
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/file_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/utility/file_helper.h>

#if defined(_WIN32) || defined(_WIN64)
#include <Windows.h>
#else
#if defined(__APPLE__) || defined(__MACH__)
#include <mach-o/dyld.h>
#endif
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#endif

namespace zvec {
namespace ailego {

bool FileHelper::GetSelfPath(std::string *path) {
#if defined(_WIN32) || defined(_WIN64)
  char buf[MAX_PATH];
  DWORD len = GetModuleFileNameA(NULL, buf, MAX_PATH);
#elif defined(__APPLE__) || defined(__MACH__)
  char buf[PATH_MAX];
  size_t len = 0;

  char dirty_buf[PATH_MAX];
  uint32_t size = sizeof(dirty_buf);
  if (_NSGetExecutablePath(dirty_buf, &size) == 0) {
    realpath(dirty_buf, buf);
    len = strlen(buf);
  }
#elif defined(__FreeBSD__)
  char buf[PATH_MAX];
  size_t len = PATH_MAX;
  int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
  if (sysctl(mib, 4, &buf, &len, NULL, 0) != 0) {
    len = 0;
  }
#else
  char buf[PATH_MAX];
  ssize_t len = readlink("/proc/self/exe", buf, PATH_MAX);
#endif

  if (len <= 0) {
    return false;
  }
  path->assign(buf, len);
  return true;
}

bool FileHelper::GetFilePath(NativeHandle handle, std::string *path) {
#if defined(_WIN32) || defined(_WIN64)
  char buf[MAX_PATH];
  DWORD len =
      GetFinalPathNameByHandleA(handle, buf, MAX_PATH, FILE_NAME_OPENED);
#elif defined(__linux) || defined(__linux__)
  char buf[PATH_MAX];
  char src[32];
  snprintf(src, sizeof(src), "/proc/self/fd/%d", handle);
  ssize_t len = readlink(src, buf, PATH_MAX);
#else
  char buf[PATH_MAX];
  size_t len = 0;
  if (fcntl(handle, F_GETPATH, buf) != -1) {
    len = strlen(buf);
  }
#endif

  if (len <= 0) {
    return false;
  }
  path->assign(buf, len);
  return true;
}

#if !defined(_WIN32) && !defined(_WIN64)

static inline char *JoinFilePath(const char *prefix, const char *suffix) {
  size_t prefix_len = strlen(prefix);
  size_t suffix_len = strlen(suffix);

  char *path = (char *)malloc(prefix_len + suffix_len + 2);
  if (path) {
    memcpy(path, prefix, prefix_len);
    memcpy(path + prefix_len + 1, suffix, suffix_len);
    path[prefix_len] = '/';
    path[prefix_len + suffix_len + 1] = '\0';
  }
  return path;
}

bool FileHelper::GetWorkingDirectory(std::string *path) {
  char buf[PATH_MAX];

  if (!getcwd(buf, PATH_MAX)) {
    return false;
  }
  path->assign(buf);
  return !path->empty();
}

bool FileHelper::GetFileSize(const char *path, size_t *psz) {
  struct stat buf;
  if (stat(path, &buf) != 0) {
    return false;
  }
  *psz = buf.st_size;
  return true;
}

bool FileHelper::DeleteFile(const char *path) {
  // Delete a file by the path
  return (unlink(path) == 0);
}

bool FileHelper::RenameFile(const char *oldpath, const char *newpath) {
  return (rename(oldpath, newpath) == 0);
}

bool FileHelper::MakePath(const char *path) {
  char pathbuf[PATH_MAX];
  char *sp, *pp;

  strncpy(pathbuf, path, sizeof(pathbuf) - 1);
  pathbuf[PATH_MAX - 1] = '\0';

  pp = pathbuf;
  while ((sp = strchr(pp, '/')) != nullptr) {
    // Neither root nor double slash in path
    if (sp != pp) {
      *sp = '\0';
      if (mkdir(pathbuf, 0755) == -1 && errno != EEXIST) {
        return false;
      }
      *sp = '/';
    }
    pp = sp + 1;
  }
  return !(*pp != '\0' && mkdir(pathbuf, 0755) == -1 && errno != EEXIST);
}

bool FileHelper::RemoveDirectory(const char *path) {
  DIR *dir = opendir(path);
  if (!dir) {
    return false;
  }

  struct dirent *dent;
  while ((dent = readdir(dir)) != nullptr) {
    if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
      continue;
    }
    char *fullpath = JoinFilePath(path, dent->d_name);
    if (!fullpath) {
      continue;
    }

    if (FileHelper::IsDirectory(fullpath)) {
      FileHelper::RemoveDirectory(fullpath);
    } else {
      FileHelper::DeleteFile(fullpath);
    }
    free(fullpath);
  }
  closedir(dir);
  return (rmdir(path) == 0);
}

bool FileHelper::IsExist(const char *path) {
  return (access(path, F_OK) == 0);
}

bool FileHelper::IsRegular(const char *path) {
  struct stat buf;
  if (stat(path, &buf) != 0) {
    return false;
  }
  return ((buf.st_mode & S_IFREG) != 0);
}

bool FileHelper::IsDirectory(const char *path) {
  struct stat buf;
  if (stat(path, &buf) != 0) {
    return false;
  }
  return ((buf.st_mode & S_IFDIR) != 0);
}

bool FileHelper::IsSymbolicLink(const char *path) {
  struct stat buf;
  if (stat(path, &buf) != 0) {
    return false;
  }
  return ((buf.st_mode & S_IFLNK) != 0);
}

bool FileHelper::IsSame(const char *path1, const char *path2) {
  char real_path1[PATH_MAX];
  char real_path2[PATH_MAX];
  if (!realpath(path1, real_path1)) {
    return false;
  }
  if (!realpath(path2, real_path2)) {
    return false;
  }
  return (!strcmp(real_path1, real_path2));
}

#else
#undef RemoveDirectory
#undef DeleteFile
#undef GetFileSize

static inline char *JoinFilePath(const char *prefix, const char *suffix) {
  size_t prefix_len = strlen(prefix);
  size_t suffix_len = strlen(suffix);

  char *path = (char *)malloc(prefix_len + suffix_len + 2);
  if (path) {
    memcpy(path, prefix, prefix_len);
    memcpy(path + prefix_len + 1, suffix, suffix_len);
    path[prefix_len] = '\\';
    path[prefix_len + suffix_len + 1] = '\0';
  }
  return path;
}

bool FileHelper::GetWorkingDirectory(std::string *path) {
  char buf[MAX_PATH];
  DWORD len = GetCurrentDirectoryA(MAX_PATH, buf);

  if (len <= 0) {
    return false;
  }
  path->assign(buf, len);
  return true;
}

bool FileHelper::GetFileSize(const char *path, size_t *psz) {
  HANDLE handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
                              OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);

  LARGE_INTEGER file_size;
  if (!GetFileSizeEx(handle, &file_size)) {
    return false;
  }
  *psz = (size_t)file_size.QuadPart;
  return true;
}

bool FileHelper::DeleteFile(const char *path) {
  // Delete a file by the path
  return (DeleteFileA(path));
}

bool FileHelper::RenameFile(const char *oldpath, const char *newpath) {
  return (MoveFileA(oldpath, newpath));
}

bool FileHelper::MakePath(const char *path) {
  char pathbuf[MAX_PATH];
  char *sp, *pp;

  strncpy(pathbuf, path, sizeof(pathbuf) - 1);
  pathbuf[MAX_PATH - 1] = '\0';

  pp = pathbuf;
  while ((sp = strpbrk(pp, "/\\")) != nullptr) {
    // Neither root nor double slash in path
    if (sp != pp) {
      *sp = '\0';
      if (!CreateDirectoryA(pathbuf, nullptr) &&
          GetLastError() != ERROR_ALREADY_EXISTS) {
        return false;
      }
      *sp = '\\';
    }
    pp = sp + 1;
  }
  return !(*pp != '\0' && !CreateDirectoryA(pathbuf, nullptr) &&
           GetLastError() != ERROR_ALREADY_EXISTS);
}

bool FileHelper::RemoveDirectory(const char *path) {
  char *pathbuf = JoinFilePath(path, "*.*");
  ailego_false_if_false(pathbuf);

  WIN32_FIND_DATAA file_info;
  HANDLE file = FindFirstFileA(pathbuf, &file_info);

  ailego_do_if_false(file != INVALID_HANDLE_VALUE) {
    free(pathbuf);
    FindClose(file);
    return false;
  }

  do {
    if (!strcmp(file_info.cFileName, ".") ||
        !strcmp(file_info.cFileName, "..")) {
      continue;
    }

    char *fullpath = JoinFilePath(path, file_info.cFileName);
    if (!fullpath) {
      continue;
    }

    if (file_info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
      FileHelper::RemoveDirectory(fullpath);
    } else {
      FileHelper::DeleteFile(fullpath);
    }
    free(fullpath);
  } while (FindNextFileA(file, &file_info));

  free(pathbuf);
  FindClose(file);
  return (!!RemoveDirectoryA(path));
}

bool FileHelper::IsExist(const char *path) {
  DWORD attr = GetFileAttributesA(path);
  return (attr != INVALID_FILE_ATTRIBUTES);
}

bool FileHelper::IsRegular(const char *path) {
  DWORD attr = GetFileAttributesA(path);
  return (attr != INVALID_FILE_ATTRIBUTES &&
          !(attr & FILE_ATTRIBUTE_DIRECTORY));
}

bool FileHelper::IsDirectory(const char *path) {
  DWORD attr = GetFileAttributesA(path);
  return (attr != INVALID_FILE_ATTRIBUTES && (attr & FILE_ATTRIBUTE_DIRECTORY));
}

bool FileHelper::IsSymbolicLink(const char *path) {
  DWORD attr = GetFileAttributesA(path);
  return (attr != INVALID_FILE_ATTRIBUTES &&
          (attr & FILE_ATTRIBUTE_REPARSE_POINT));
}

bool FileHelper::IsSame(const char *path1, const char *path2) {
  char real_path1[MAX_PATH];
  char real_path2[MAX_PATH];
  char **part_path1 = nullptr;
  char **part_path2 = nullptr;
  DWORD path1_size =
      GetFullPathNameA(path1, sizeof(real_path1), real_path1, part_path1);
  DWORD path2_size =
      GetFullPathNameA(path2, sizeof(real_path2), real_path2, part_path2);

  if ((part_path1 && *part_path1 != 0) || (part_path2 && *part_path2 != 0) ||
      (path1_size != path2_size)) {
    return false;
  }
  return (!strcmp(real_path1, real_path2));
}

#endif  // !_WIN32 && !_WIN64

bool FileHelper::RemovePath(const char *path) {
  if (FileHelper::IsDirectory(path)) {
    return FileHelper::RemoveDirectory(path);
  }
  return FileHelper::DeleteFile(path);
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/utility/float_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/float_helper.h>

// #if defined(__F16C__) && defined(__AVX__)
// #define float16(x) _cvtss_sh((x), _MM_FROUND_NO_EXC)
// #define float32(x) _cvtsh_ss(x)
// #endif  // __F16C__ && __AVX__

#if defined(__aarch64__)
static inline float float32(uint16_t val) {
  __fp16 *p = reinterpret_cast<__fp16 *>(&val);
  return *p;
}

static inline uint16_t float16(float val) {
  __fp16 f = static_cast<__fp16>(val);
  uint16_t *fp = reinterpret_cast<uint16_t *>(&f);
  return *fp;
}

static inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,
                                        float *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float32(arr[i]);
  }
}

static inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,
                                        float norm, float *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float32(arr[i]) / norm;
  }
}

static inline void convert_fp32_to_fp16(const float *arr, size_t size,
                                        uint16_t *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float16(arr[i]);
  }
}

static inline void convert_fp32_to_fp16(const float *arr, size_t size,
                                        float norm, uint16_t *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float16(arr[i] / norm);
  }
}
#else
// Refer: https://github.com/Maratyszcza/FP16/blob/master/third-party/half.hpp
static inline float float32(uint16_t val) {
  static const uint32_t mantissa_table[2048] = {
      0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000,
      0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
      0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
      0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000,
      0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000,
      0x35F00000, 0x35F80000, 0x36000000, 0x36040000, 0x36080000, 0x360C0000,
      0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000,
      0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
      0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000,
      0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000,
      0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000,
      0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000,
      0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000,
      0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000,
      0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000,
      0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
      0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000,
      0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000,
      0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000,
      0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000,
      0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000,
      0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
      0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
      0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
      0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
      0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000,
      0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 0x37200000, 0x37210000,
      0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
      0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000,
      0x372E0000, 0x372F0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
      0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
      0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
      0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
      0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000,
      0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000,
      0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
      0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000,
      0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
      0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
      0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
      0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
      0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000,
      0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000,
      0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
      0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
      0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
      0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000,
      0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
      0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
      0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
      0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
      0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000,
      0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000,
      0x379F0000, 0x379F8000, 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000,
      0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000,
      0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
      0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000,
      0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000,
      0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000,
      0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000,
      0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000,
      0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000,
      0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000,
      0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
      0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000,
      0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000,
      0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000,
      0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000,
      0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000,
      0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000,
      0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000,
      0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
      0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000,
      0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000,
      0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 0x37E00000, 0x37E08000,
      0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000,
      0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000,
      0x37E70000, 0x37E78000, 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000,
      0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000,
      0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
      0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000,
      0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000,
      0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000,
      0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000,
      0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000,
      0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000,
      0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000,
      0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
      0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000,
      0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000,
      0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000,
      0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000,
      0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000,
      0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000,
      0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000,
      0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
      0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000,
      0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000,
      0x38130000, 0x38134000, 0x38138000, 0x3813C000, 0x38140000, 0x38144000,
      0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000,
      0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000,
      0x38178000, 0x3817C000, 0x38180000, 0x38184000, 0x38188000, 0x3818C000,
      0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000,
      0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
      0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000,
      0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000,
      0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000,
      0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000,
      0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000,
      0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000,
      0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000,
      0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
      0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000,
      0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000,
      0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000,
      0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000,
      0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000,
      0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000,
      0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000,
      0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
      0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000,
      0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000,
      0x38370000, 0x38374000, 0x38378000, 0x3837C000, 0x38380000, 0x38384000,
      0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000,
      0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000,
      0x383B8000, 0x383BC000, 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000,
      0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000,
      0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
      0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000,
      0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000,
      0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000,
      0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000,
      0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000,
      0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000,
      0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000,
      0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
      0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000,
      0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000,
      0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000,
      0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000,
      0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000,
      0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000,
      0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000,
      0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
      0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000,
      0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000,
      0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 0x385C0000, 0x385C4000,
      0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000,
      0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000,
      0x385F8000, 0x385FC000, 0x38600000, 0x38604000, 0x38608000, 0x3860C000,
      0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000,
      0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
      0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000,
      0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000,
      0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000,
      0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000,
      0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000,
      0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000,
      0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000,
      0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
      0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000,
      0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000,
      0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000,
      0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000,
      0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000,
      0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000,
      0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000,
      0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
      0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000,
      0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000,
      0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 0x38000000, 0x38002000,
      0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000,
      0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000,
      0x3801C000, 0x3801E000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
      0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000,
      0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
      0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000,
      0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
      0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000,
      0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000,
      0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000,
      0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
      0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000,
      0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
      0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000,
      0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000,
      0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000,
      0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000,
      0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000,
      0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000,
      0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000,
      0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
      0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000,
      0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
      0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 0x38120000, 0x38122000,
      0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000,
      0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000,
      0x3813C000, 0x3813E000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
      0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000,
      0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
      0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000,
      0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
      0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000,
      0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000,
      0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000,
      0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000,
      0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000,
      0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
      0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000,
      0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000,
      0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000,
      0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000,
      0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000,
      0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
      0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000,
      0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
      0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000,
      0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
      0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 0x38240000, 0x38242000,
      0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000,
      0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000,
      0x3825C000, 0x3825E000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
      0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000,
      0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
      0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000,
      0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
      0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000,
      0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000,
      0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000,
      0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000,
      0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000,
      0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
      0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000,
      0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000,
      0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000,
      0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000,
      0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000,
      0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
      0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000,
      0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
      0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000,
      0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
      0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 0x38360000, 0x38362000,
      0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000,
      0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000,
      0x3837C000, 0x3837E000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
      0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000,
      0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
      0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000,
      0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000,
      0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000,
      0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000,
      0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000,
      0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000,
      0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000,
      0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
      0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000,
      0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
      0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000,
      0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000,
      0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000,
      0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
      0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000,
      0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
      0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000,
      0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
      0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 0x38480000, 0x38482000,
      0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000,
      0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000,
      0x3849C000, 0x3849E000, 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000,
      0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000,
      0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
      0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000,
      0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000,
      0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000,
      0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000,
      0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000,
      0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
      0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000,
      0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
      0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000,
      0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
      0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000,
      0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000,
      0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000,
      0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
      0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000,
      0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
      0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000,
      0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
      0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 0x385A0000, 0x385A2000,
      0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000,
      0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000,
      0x385BC000, 0x385BE000, 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000,
      0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000,
      0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
      0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000,
      0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000,
      0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000,
      0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000,
      0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000,
      0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
      0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000,
      0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
      0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000,
      0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
      0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000,
      0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000,
      0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000,
      0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
      0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000,
      0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
      0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000,
      0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000,
      0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 0x386C0000, 0x386C2000,
      0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000,
      0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000,
      0x386DC000, 0x386DE000, 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000,
      0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000,
      0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
      0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000,
      0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
      0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000,
      0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000,
      0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000,
      0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
      0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000,
      0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
      0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000,
      0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
      0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000,
      0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000,
      0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000,
      0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000,
      0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000,
      0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
      0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000,
      0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000,
      0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 0x387E0000, 0x387E2000,
      0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000,
      0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000,
      0x387FC000, 0x387FE000};
  static const uint32_t exponent_table[64] = {
      0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
      0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
      0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
      0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000,
      0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000,
      0x0F000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
      0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
      0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
      0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000,
      0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000,
      0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000};
  static const uint16_t offset_table[64] = {
      0,    1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0,
      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
      1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024};
  uint16_t hval = static_cast<uint16_t>(val >> 10);
  uint32_t bits =
      mantissa_table[offset_table[hval] + (val & 0x3FF)] + exponent_table[hval];
  float *p = reinterpret_cast<float *>(&bits);
  return (*p);
}

// Refer: https://github.com/Maratyszcza/FP16/blob/master/third-party/half.hpp
static inline uint16_t float16(float val) {
  static const uint16_t base_table[512] = {
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
      0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,
      0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000,
      0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400,
      0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800,
      0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
      0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
      0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
      0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,
      0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400,
      0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800,
      0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00,
      0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
      0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00};
  static const uint8_t shift_table[512] = {
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19,
      18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23,
      22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13,
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
      13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
      24, 24, 24, 24, 24, 24, 24, 13};
  uint32_t *p = reinterpret_cast<uint32_t *>(&val);
  uint32_t hbits =
      base_table[*p >> 23] +
      static_cast<uint16_t>((*p & 0x7FFFFF) >> shift_table[*p >> 23]);
  hbits += (((*p & 0x7FFFFF) >> (shift_table[*p >> 23] - 1)) |
            (((*p >> 23) & 0xFF) == 102)) &
           ((hbits & 0x7C00) != 0x7C00);
  return static_cast<uint16_t>(hbits);
}
#if defined(__F16C__) && defined(__AVX512F__)
static inline void convert_fp16_to_fp32_avx512f(const uint16_t *arr,
                                                size_t size, float *out) {
  const uint16_t *last = arr + size;
  const uint16_t *last_aligned = arr + ((size >> 5) << 5);

  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0x3f) == 0) {
    for (; arr != last_aligned; arr += 32, out += 32) {
      _mm512_store_ps(out + 0,
                      _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 0))));
      _mm512_store_ps(
          out + 16, _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 16))));
    }

    if (last >= last_aligned + 16) {
      _mm512_store_ps(out, _mm512_cvtph_ps(_mm256_load_si256((__m256i *)arr)));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm256_store_ps(out, _mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 32, out += 32) {
      _mm512_storeu_ps(
          out + 0, _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 0))));
      _mm512_storeu_ps(
          out + 16, _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 16))));
    }

    if (last >= last_aligned + 16) {
      _mm512_storeu_ps(out,
                       _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)arr)));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm256_storeu_ps(out, _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = float32(arr[6]);
      /* FALLTHRU */
    case 6:
      out[5] = float32(arr[5]);
      /* FALLTHRU */
    case 5:
      out[4] = float32(arr[4]);
      /* FALLTHRU */
    case 4:
      out[3] = float32(arr[3]);
      /* FALLTHRU */
    case 3:
      out[2] = float32(arr[2]);
      /* FALLTHRU */
    case 2:
      out[1] = float32(arr[1]);
      /* FALLTHRU */
    case 1:
      out[0] = float32(arr[0]);
  }
}

static inline void convert_fp16_to_fp32_avx512f(const uint16_t *arr,
                                                size_t size, float norm,
                                                float *out) {
  const uint16_t *last = arr + size;
  const uint16_t *last_aligned = arr + ((size >> 5) << 5);
  __m512 zmm_norm = _mm512_set1_ps(norm);

  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0x3f) == 0) {
    for (; arr != last_aligned; arr += 32, out += 32) {
      __m512 zmm_0 = _mm512_div_ps(
          _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 0))), zmm_norm);
      __m512 zmm_1 = _mm512_div_ps(
          _mm512_cvtph_ps(_mm256_load_si256((__m256i *)(arr + 16))), zmm_norm);
      _mm512_store_ps(out + 0, zmm_0);
      _mm512_store_ps(out + 16, zmm_1);
    }

    if (last >= last_aligned + 16) {
      _mm512_store_ps(
          out, _mm512_div_ps(_mm512_cvtph_ps(_mm256_load_si256((__m256i *)arr)),
                             zmm_norm));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm256_store_ps(
          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)),
                             _mm256_set1_ps(norm)));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 32, out += 32) {
      __m512 zmm_0 = _mm512_div_ps(
          _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 0))), zmm_norm);
      __m512 zmm_1 = _mm512_div_ps(
          _mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)(arr + 16))), zmm_norm);
      _mm512_storeu_ps(out + 0, zmm_0);
      _mm512_storeu_ps(out + 16, zmm_1);
    }

    if (last >= last_aligned + 16) {
      _mm512_storeu_ps(
          out,
          _mm512_div_ps(_mm512_cvtph_ps(_mm256_loadu_si256((__m256i *)arr)),
                        zmm_norm));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm256_storeu_ps(
          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)),
                             _mm256_set1_ps(norm)));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = float32(arr[6]) / norm;
      /* FALLTHRU */
    case 6:
      out[5] = float32(arr[5]) / norm;
      /* FALLTHRU */
    case 5:
      out[4] = float32(arr[4]) / norm;
      /* FALLTHRU */
    case 4:
      out[3] = float32(arr[3]) / norm;
      /* FALLTHRU */
    case 3:
      out[2] = float32(arr[2]) / norm;
      /* FALLTHRU */
    case 2:
      out[1] = float32(arr[1]) / norm;
      /* FALLTHRU */
    case 1:
      out[0] = float32(arr[0]) / norm;
  }
}

static inline void convert_fp32_to_fp16_avx512f(const float *arr, size_t size,
                                                uint16_t *out) {
  const float *last = arr + size;
  const float *last_aligned = arr + ((size >> 5) << 5);

  if (((uintptr_t)arr & 0x3f) == 0 && ((uintptr_t)out & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 32, out += 32) {
      _mm256_store_si256(
          (__m256i *)(out + 0),
          _mm512_cvtps_ph(_mm512_load_ps(arr + 0), _MM_FROUND_NO_EXC));
      _mm256_store_si256(
          (__m256i *)(out + 16),
          _mm512_cvtps_ph(_mm512_load_ps(arr + 16), _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 16) {
      _mm256_store_si256(
          (__m256i *)(out + 0),
          _mm512_cvtps_ph(_mm512_load_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm_store_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 32, out += 32) {
      _mm256_storeu_si256(
          (__m256i *)(out + 0),
          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));
      _mm256_storeu_si256(
          (__m256i *)(out + 16),
          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 16), _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 16) {
      _mm256_storeu_si256(
          (__m256i *)(out + 0),
          _mm512_cvtps_ph(_mm512_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm_storeu_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = float16(arr[6]);
      /* FALLTHRU */
    case 6:
      out[5] = float16(arr[5]);
      /* FALLTHRU */
    case 5:
      out[4] = float16(arr[4]);
      /* FALLTHRU */
    case 4:
      out[3] = float16(arr[3]);
      /* FALLTHRU */
    case 3:
      out[2] = float16(arr[2]);
      /* FALLTHRU */
    case 2:
      out[1] = float16(arr[1]);
      /* FALLTHRU */
    case 1:
      out[0] = float16(arr[0]);
  }
}

static inline void convert_fp32_to_fp16_avx512f(const float *arr, size_t size,
                                                float norm, uint16_t *out) {
  const float *last = arr + size;
  const float *last_aligned = arr + ((size >> 5) << 5);
  __m512 zmm_norm = _mm512_set1_ps(norm);

  if (((uintptr_t)arr & 0x3f) == 0 && ((uintptr_t)out & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 32, out += 32) {
      __m512 zmm_0 = _mm512_div_ps(_mm512_load_ps(arr + 0), zmm_norm);
      __m512 zmm_1 = _mm512_div_ps(_mm512_load_ps(arr + 16), zmm_norm);
      _mm256_store_si256((__m256i *)(out + 0),
                         _mm512_cvtps_ph(zmm_0, _MM_FROUND_NO_EXC));
      _mm256_store_si256((__m256i *)(out + 16),
                         _mm512_cvtps_ph(zmm_1, _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 16) {
      _mm256_store_si256(
          (__m256i *)out,
          _mm512_cvtps_ph(_mm512_div_ps(_mm512_load_ps(arr), zmm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm_store_si128((__m128i *)out,
                      _mm256_cvtps_ph(_mm256_div_ps(_mm256_load_ps(arr),
                                                    _mm256_set1_ps(norm)),
                                      _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 32, out += 32) {
      __m512 zmm_0 = _mm512_div_ps(_mm512_loadu_ps(arr + 0), zmm_norm);
      __m512 zmm_1 = _mm512_div_ps(_mm512_loadu_ps(arr + 16), zmm_norm);
      _mm256_storeu_si256((__m256i *)(out + 0),
                          _mm512_cvtps_ph(zmm_0, _MM_FROUND_NO_EXC));
      _mm256_storeu_si256((__m256i *)(out + 16),
                          _mm512_cvtps_ph(zmm_1, _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 16) {
      _mm256_storeu_si256(
          (__m256i *)out,
          _mm512_cvtps_ph(_mm512_div_ps(_mm512_loadu_ps(arr), zmm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 16;
      out += 16;
    }
    if (last >= arr + 8) {
      _mm_storeu_si128((__m128i *)out,
                       _mm256_cvtps_ph(_mm256_div_ps(_mm256_loadu_ps(arr),
                                                     _mm256_set1_ps(norm)),
                                       _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = float16(arr[6] / norm);
      /* FALLTHRU */
    case 6:
      out[5] = float16(arr[5] / norm);
      /* FALLTHRU */
    case 5:
      out[4] = float16(arr[4] / norm);
      /* FALLTHRU */
    case 4:
      out[3] = float16(arr[3] / norm);
      /* FALLTHRU */
    case 3:
      out[2] = float16(arr[2] / norm);
      /* FALLTHRU */
    case 2:
      out[1] = float16(arr[1] / norm);
      /* FALLTHRU */
    case 1:
      out[0] = float16(arr[0] / norm);
  }
}
#endif  //__F16C__ && __AVX512F__

#if defined(__F16C__) && defined(__AVX__)
static inline void convert_fp16_to_fp32_avx(const uint16_t *arr, size_t size,
                                            float *out) {
  const uint16_t *last = arr + size;
  const uint16_t *last_aligned = arr + ((size >> 4) << 4);

  if (((uintptr_t)arr & 0xf) == 0 && ((uintptr_t)out & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 16, out += 16) {
      _mm256_store_ps(out + 0,
                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0))));
      _mm256_store_ps(out + 8,
                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 8))));
    }

    if (last >= last_aligned + 8) {
      _mm256_store_ps(out + 0,
                      _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0))));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16, out += 16) {
      _mm256_storeu_ps(out + 0,
                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0))));
      _mm256_storeu_ps(out + 8,
                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 8))));
    }

    if (last >= last_aligned + 8) {
      _mm256_storeu_ps(out + 0,
                       _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0))));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = _cvtsh_ss(arr[6]);
      /* FALLTHRU */
    case 6:
      out[5] = _cvtsh_ss(arr[5]);
      /* FALLTHRU */
    case 5:
      out[4] = _cvtsh_ss(arr[4]);
      /* FALLTHRU */
    case 4:
      out[3] = _cvtsh_ss(arr[3]);
      /* FALLTHRU */
    case 3:
      out[2] = _cvtsh_ss(arr[2]);
      /* FALLTHRU */
    case 2:
      out[1] = _cvtsh_ss(arr[1]);
      /* FALLTHRU */
    case 1:
      out[0] = _cvtsh_ss(arr[0]);
  }
}

static inline void convert_fp16_to_fp32_avx(const uint16_t *arr, size_t size,
                                            float norm, float *out) {
  const uint16_t *last = arr + size;
  const uint16_t *last_aligned = arr + ((size >> 4) << 4);
  __m256 ymm_norm = _mm256_set1_ps(norm);

  if (((uintptr_t)arr & 0xf) == 0 && ((uintptr_t)out & 0x1f) == 0) {
    for (; arr != last_aligned; arr += 16, out += 16) {
      __m256 ymm_0 = _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 0)));
      __m256 ymm_1 = _mm256_cvtph_ps(_mm_load_si128((__m128i *)(arr + 8)));
      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);
      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);
      _mm256_store_ps(out + 0, ymm_0);
      _mm256_store_ps(out + 8, ymm_1);
    }

    if (last >= last_aligned + 8) {
      _mm256_store_ps(
          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_load_si128((__m128i *)arr)),
                             ymm_norm));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16, out += 16) {
      __m256 ymm_0 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 0)));
      __m256 ymm_1 = _mm256_cvtph_ps(_mm_loadu_si128((__m128i *)(arr + 8)));
      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);
      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);
      _mm256_storeu_ps(out + 0, ymm_0);
      _mm256_storeu_ps(out + 8, ymm_1);
    }

    if (last >= last_aligned + 8) {
      _mm256_storeu_ps(
          out, _mm256_div_ps(_mm256_cvtph_ps(_mm_loadu_si128((__m128i *)arr)),
                             ymm_norm));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = _cvtsh_ss(arr[6]) / norm;
      /* FALLTHRU */
    case 6:
      out[5] = _cvtsh_ss(arr[5]) / norm;
      /* FALLTHRU */
    case 5:
      out[4] = _cvtsh_ss(arr[4]) / norm;
      /* FALLTHRU */
    case 4:
      out[3] = _cvtsh_ss(arr[3]) / norm;
      /* FALLTHRU */
    case 3:
      out[2] = _cvtsh_ss(arr[2]) / norm;
      /* FALLTHRU */
    case 2:
      out[1] = _cvtsh_ss(arr[1]) / norm;
      /* FALLTHRU */
    case 1:
      out[0] = _cvtsh_ss(arr[0]) / norm;
  }
}

static inline void convert_fp32_to_fp16_avx(const float *arr, size_t size,
                                            uint16_t *out) {
  const float *last = arr + size;
  const float *last_aligned = arr + ((size >> 4) << 4);

  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0xf) == 0) {
    for (; arr != last_aligned; arr += 16, out += 16) {
      _mm_store_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));
      _mm_store_si128(
          (__m128i *)(out + 8),
          _mm256_cvtps_ph(_mm256_load_ps(arr + 8), _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 8) {
      _mm_store_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_load_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16, out += 16) {
      _mm_storeu_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));
      _mm_storeu_si128(
          (__m128i *)(out + 8),
          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 8), _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 8) {
      _mm_storeu_si128(
          (__m128i *)(out + 0),
          _mm256_cvtps_ph(_mm256_loadu_ps(arr + 0), _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = _cvtss_sh(arr[6], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 6:
      out[5] = _cvtss_sh(arr[5], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 5:
      out[4] = _cvtss_sh(arr[4], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 4:
      out[3] = _cvtss_sh(arr[3], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 3:
      out[2] = _cvtss_sh(arr[2], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 2:
      out[1] = _cvtss_sh(arr[1], _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 1:
      out[0] = _cvtss_sh(arr[0], _MM_FROUND_NO_EXC);
  }
}

static inline void convert_fp32_to_fp16_avx(const float *arr, size_t size,
                                            float norm, uint16_t *out) {
  const float *last = arr + size;
  const float *last_aligned = arr + ((size >> 4) << 4);
  __m256 ymm_norm = _mm256_set1_ps(norm);

  if (((uintptr_t)arr & 0x1f) == 0 && ((uintptr_t)out & 0xf) == 0) {
    for (; arr != last_aligned; arr += 16, out += 16) {
      __m256 ymm_0 = _mm256_load_ps(arr + 0);
      __m256 ymm_1 = _mm256_load_ps(arr + 8);
      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);
      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);
      _mm_store_si128((__m128i *)(out + 0),
                      _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));
      _mm_store_si128((__m128i *)(out + 8),
                      _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 8) {
      _mm_store_si128(
          (__m128i *)out,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_load_ps(arr), ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  } else {
    for (; arr != last_aligned; arr += 16, out += 16) {
      __m256 ymm_0 = _mm256_loadu_ps(arr + 0);
      __m256 ymm_1 = _mm256_loadu_ps(arr + 8);
      ymm_0 = _mm256_div_ps(ymm_0, ymm_norm);
      ymm_1 = _mm256_div_ps(ymm_1, ymm_norm);
      _mm_storeu_si128((__m128i *)(out + 0),
                       _mm256_cvtps_ph(ymm_0, _MM_FROUND_NO_EXC));
      _mm_storeu_si128((__m128i *)(out + 8),
                       _mm256_cvtps_ph(ymm_1, _MM_FROUND_NO_EXC));
    }

    if (last >= last_aligned + 8) {
      _mm_storeu_si128(
          (__m128i *)out,
          _mm256_cvtps_ph(_mm256_div_ps(_mm256_loadu_ps(arr), ymm_norm),
                          _MM_FROUND_NO_EXC));
      arr += 8;
      out += 8;
    }
  }
  switch (last - arr) {
    case 7:
      out[6] = _cvtss_sh(arr[6] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 6:
      out[5] = _cvtss_sh(arr[5] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 5:
      out[4] = _cvtss_sh(arr[4] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 4:
      out[3] = _cvtss_sh(arr[3] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 3:
      out[2] = _cvtss_sh(arr[2] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 2:
      out[1] = _cvtss_sh(arr[1] / norm, _MM_FROUND_NO_EXC);
      /* FALLTHRU */
    case 1:
      out[0] = _cvtss_sh(arr[0] / norm, _MM_FROUND_NO_EXC);
  }
}
#endif  // __F16C__ && __AVX__

static inline void convert_fp16_to_fp32_fallback(const uint16_t *arr,
                                                 size_t size, float *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float32(arr[i]);
  }
}

static inline void convert_fp16_to_fp32_fallback(const uint16_t *arr,
                                                 size_t size, float norm,
                                                 float *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float32(arr[i]) / norm;
  }
}

static inline void convert_fp32_to_fp16_fallback(const float *arr, size_t size,
                                                 uint16_t *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float16(arr[i]);
  }
}

static inline void convert_fp32_to_fp16_fallback(const float *arr, size_t size,
                                                 float norm, uint16_t *out) {
  for (size_t i = 0; i != size; ++i) {
    out[i] = float16(arr[i] / norm);
  }
}

static inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,
                                        float *out) {
#if defined(__F16C__) && defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return convert_fp16_to_fp32_avx512f(arr, size, out);
  }
#endif

#if defined(__F16C__) && defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    return convert_fp16_to_fp32_avx(arr, size, out);
  }
#endif

  return convert_fp16_to_fp32_fallback(arr, size, out);
}

static inline void convert_fp16_to_fp32(const uint16_t *arr, size_t size,
                                        float norm, float *out) {
#if defined(__F16C__) && defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return convert_fp16_to_fp32_avx512f(arr, size, norm, out);
  }
#endif

#if defined(__F16C__) && defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    return convert_fp16_to_fp32_avx(arr, size, norm, out);
  }
#endif

  return convert_fp16_to_fp32_fallback(arr, size, norm, out);
}

static inline void convert_fp32_to_fp16(const float *arr, size_t size,
                                        uint16_t *out) {
#if defined(__F16C__) && defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return convert_fp32_to_fp16_avx512f(arr, size, out);
  }
#endif

#if defined(__F16C__) && defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    return convert_fp32_to_fp16_avx(arr, size, out);
  }
#endif

  return convert_fp32_to_fp16_fallback(arr, size, out);
}

static inline void convert_fp32_to_fp16(const float *arr, size_t size,
                                        float norm, uint16_t *out) {
#if defined(__F16C__) && defined(__AVX512F__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX512F) {
    return convert_fp32_to_fp16_avx512f(arr, size, norm, out);
  }
#endif

#if defined(__F16C__) && defined(__AVX__)
  if (zvec::ailego::internal::CpuFeatures::static_flags_.F16C &&
      zvec::ailego::internal::CpuFeatures::static_flags_.AVX) {
    return convert_fp32_to_fp16_avx(arr, size, norm, out);
  }
#endif

  return convert_fp32_to_fp16_fallback(arr, size, norm, out);
}

#endif  //

namespace zvec {
namespace ailego {

float FloatHelper::ToFP32(uint16_t val) {
  return float32(val);
}

void FloatHelper::ToFP32(const uint16_t *arr, size_t size, float *out) {
  return convert_fp16_to_fp32(arr, size, out);
}

void FloatHelper::ToFP32(const uint16_t *arr, size_t size, float norm,
                         float *out) {
  return convert_fp16_to_fp32(arr, size, norm, out);
}

uint16_t FloatHelper::ToFP16(float val) {
  return float16(val);
}

void FloatHelper::ToFP16(const float *arr, size_t size, uint16_t *out) {
  return convert_fp32_to_fp16(arr, size, out);
}

void FloatHelper::ToFP16(const float *arr, size_t size, float norm,
                         uint16_t *out) {
  return convert_fp32_to_fp16(arr, size, norm, out);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/math_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <cmath>
#include <limits>
#include <type_traits>
#include <zvec/ailego/utility/float_helper.h>
namespace zvec {
namespace ailego {

/*! Math Helper
 */
struct MathHelper {
  //! Calculate the absolute value
  template <typename T, typename R = float>
  static inline auto Absolute(const T &x) ->
      typename std::enable_if<std::is_arithmetic<T>::value, R>::type {
    return static_cast<R>(std::abs(x));
  }

  //! Calculate the absolute value
  template <typename R = float>
  static inline R Absolute(const Float16 &x) {
    return static_cast<R>(Float16::Absolute(x));
  }

  //! Calculate the absolute difference
  template <typename T, typename R = float>
  static inline auto AbsoluteDifference(const T &x, const T &y) ->
      typename std::enable_if<std::is_integral<T>::value, R>::type {
    auto m = ((x ^ y) & -(x < y));
    auto d =
        static_cast<typename std::make_unsigned<T>::type>((x ^ m) - (y ^ m));
    return static_cast<R>(d);
  }

  //! Calculate the absolute difference
  template <typename T, typename R = float>
  static inline auto AbsoluteDifference(const T &x, const T &y) ->
      typename std::enable_if<std::is_floating_point<T>::value, R>::type {
    return static_cast<R>(std::abs(x - y));
  }

  //! Calculate the absolute difference
  template <typename R = float>
  static inline R AbsoluteDifference(const Float16 &x, const Float16 &y) {
    return static_cast<R>(std::abs(x - y));
  }

  //! Calculate the squared difference
  template <typename T, typename R = float>
  static inline auto SquaredDifference(const T &x, const T &y) ->
      typename std::enable_if<std::is_integral<T>::value, R>::type {
    auto m = ((x ^ y) & -(x < y));
    auto d =
        static_cast<typename std::make_unsigned<T>::type>((x ^ m) - (y ^ m));
    return static_cast<R>(d * d);
  }

  //! Calculate the squared difference
  template <typename T, typename R = float>
  static inline auto SquaredDifference(const T &x, const T &y) ->
      typename std::enable_if<std::is_floating_point<T>::value, R>::type {
    auto d = x - y;
    return static_cast<R>(d * d);
  }

  //! Calculate the squared difference
  template <typename R = float>
  static inline R SquaredDifference(const Float16 &x, const Float16 &y) {
    auto d = x - y;
    return static_cast<R>(d * d);
  }

  //! Test whether two integral numbers are equal
  template <class T>
  static inline auto IsAlmostEqual(const T &x, const T &y, int) ->
      typename std::enable_if<std::is_integral<T>::value, bool>::type {
    return (x == y);
  }

  //! Test whether two floating point numbers are equal
  template <class T>
  static inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->
      typename std::enable_if<std::is_floating_point<T>::value, bool>::type {
    // the machine epsilon has to be scaled to the magnitude of the values used
    // and multiplied by the desired precision in ULPs (units in the last place)
    return ((std::fabs(x - y) <=
             std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||
            (std::fabs(x - y) < std::numeric_limits<T>::min()));
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/matrix_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

struct MatrixHelper {
  //! Transpose a matrix
  template <typename T, size_t M>
  static inline void Transpose(const void *src, size_t N, void *dst) {
    for (size_t i = 0; i < M; ++i) {
      for (size_t j = 0; j < N; ++j) {
        *(reinterpret_cast<T *>(dst) + (j * M + i)) =
            *(reinterpret_cast<const T *>(src) + (i * N + j));
      }
    }
  }

  //! Reverse transpose a matrix
  template <typename T, size_t M>
  static inline void ReverseTranspose(const void *src, size_t N, void *dst) {
    for (size_t i = 0; i < N; ++i) {
      for (size_t j = 0; j < M; ++j) {
        *(reinterpret_cast<T *>(dst) + (j * N + i)) =
            *(reinterpret_cast<const T *>(src) + (i * M + j));
      }
    }
  }

  //! Transpose a matrix
  template <typename T>
  static inline void Transpose(const void *src, size_t M, size_t N, void *dst) {
    for (size_t i = 0; i < M; ++i) {
      for (size_t j = 0; j < N; ++j) {
        *(reinterpret_cast<T *>(dst) + (j * M + i)) =
            *(reinterpret_cast<const T *>(src) + (i * N + j));
      }
    }
  }

  //! Reverse transpose a matrix
  template <typename T>
  static inline void ReverseTranspose(const void *src, size_t M, size_t N,
                                      void *dst) {
    for (size_t i = 0; i < N; ++i) {
      for (size_t j = 0; j < M; ++j) {
        *(reinterpret_cast<T *>(dst) + (j * N + i)) =
            *(reinterpret_cast<const T *>(src) + (i * M + j));
      }
    }
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/memory_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "memory_helper.h"
#include <cstdio>
#include <cstring>
#include <fstream>
#include <zvec/ailego/utility/file_helper.h>
#include <zvec/ailego/utility/string_helper.h>

#if defined(_WIN64) || defined(_WIN32)
#include <Windows.h>
#include <psapi.h>
#else
#if defined(__linux__) || defined(__linux)
#include <sys/resource.h>
#elif defined(__APPLE__) && defined(__MACH__)
#include <mach/mach.h>
#include <sys/sysctl.h>
#endif
#include <unistd.h>
#endif

namespace zvec {
namespace ailego {

#if defined(__linux__) || defined(__linux)
bool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {
  FILE *fp = fopen("/proc/self/statm", "r");
  if (!fp) {
    return false;
  }

  if (fscanf(fp, "%zd %zd", vsz, rss) == EOF) {
    fclose(fp);
    return false;
  }
  fclose(fp);

  long pagesz = sysconf(_SC_PAGESIZE);
  *vsz *= (size_t)pagesz;
  *rss *= (size_t)pagesz;
  return true;
}

size_t MemoryHelper::SelfRSS(void) {
  FILE *fp = fopen("/proc/self/statm", "r");
  if (!fp) {
    return 0;
  }

  size_t rss = 0;
  if (fscanf(fp, "%*d %zd %*d", &rss) == EOF) {
    fclose(fp);
    return 0;
  }
  fclose(fp);
  return (rss * sysconf(_SC_PAGESIZE));
}

size_t MemoryHelper::SelfPeakRSS(void) {
  struct rusage rusage;
  getrusage(RUSAGE_SELF, &rusage);
  return (size_t)(rusage.ru_maxrss * 1024);
}

size_t MemoryHelper::TotalRamSize(void) {
  return (sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE));
}

size_t MemoryHelper::AvailableRamSize(void) {
  FILE *fp = fopen("/proc/meminfo", "r");
  if (!fp) {
    return 0;
  }

  size_t avail = 0;
  char buf[128];
  while (fgets(buf, sizeof(buf), fp)) {
    if (strncmp(buf, "MemAvailable:", 13) == 0) {
      avail = (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
      break;
    }
  }

  // No found 'MemAvailable'
  if (avail == 0) {
    fseek(fp, 0L, SEEK_SET);

    size_t count = 0;
    while (fgets(buf, sizeof(buf), fp)) {
      switch (buf[0]) {
        case 'M':
          if (strncmp(buf, "MemFree:", 8) == 0) {
            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
            ++count;
          }
          break;

        case 'B':
          if (strncmp(buf, "Buffers:", 8) == 0) {
            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
            ++count;
          }
          break;

        case 'C':
          if (strncmp(buf, "Cached:", 7) == 0) {
            avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
            ++count;
          }
          break;
      }
      // All read
      if (count == 3) {
        break;
      }
    }
  }
  fclose(fp);
  return (avail * 1024);
}

size_t MemoryHelper::UsedRamSize(void) {
  FILE *fp = fopen("/proc/meminfo", "r");
  if (!fp) {
    return 0;
  }

  size_t total = 0, avail = 0, count = 0;
  char buf[128];

  while (fgets(buf, sizeof(buf), fp)) {
    switch (buf[0]) {
      case 'M':
        if (strncmp(buf, "MemTotal:", 9) == 0) {
          total = (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
          ++count;
        } else if (strncmp(buf, "MemFree:", 8) == 0) {
          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
          ++count;
        }
        break;

      case 'B':
        if (strncmp(buf, "Buffers:", 8) == 0) {
          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
          ++count;
        }
        break;

      case 'C':
        if (strncmp(buf, "Cached:", 7) == 0) {
          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
          ++count;
        }
        break;

      case 'S':
        if (strncmp(buf, "Slab:", 5) == 0) {
          avail += (size_t)strtoull(strchr(buf, ':') + 1, NULL, 10);
          ++count;
        }
        break;
    }
    // All read
    if (count == 5) {
      break;
    }
  }
  fclose(fp);

  if (total == 0) {
    total = (sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE)) / 1024;
  }
  return ((total - avail) * 1024);
}

size_t MemoryHelper::ContainerAwareTotalRamSize(void) {
  size_t total_ram_size = TotalRamSize();
  std::string limit_in_bytes = "/sys/fs/cgroup/memory/memory.limit_in_bytes";
  if (FileHelper::IsExist(limit_in_bytes.c_str())) {
    std::ifstream memory_limit_ifs;
    std::string memory_limit_str{""};
    memory_limit_ifs.open(limit_in_bytes, std::ios::in);
    if (memory_limit_ifs.is_open()) {
      uint64_t limit = 0;
      memory_limit_ifs >> memory_limit_str;
      if (memory_limit_str != "-1") {
        // Refer to:
        // https://access.redhat.com/documentation/zh-cn/red_hat_enterprise_linux/7/html/resource_management_guide/sec-memory
        StringHelper::ToUint64(memory_limit_str, &limit);
        if (limit != 0x7FFFFFFFFFFFF000) {
          // Refer to:
          // https://stackoverflow.com/questions/70332396/why-cgroups-file-memory-limit-in-bytes-use-9223372036854771712-as-a-default-valu
          total_ram_size = static_cast<size_t>(limit);
        }
      }
      memory_limit_ifs.close();
    }
  }
  return total_ram_size;
}

#elif defined(__APPLE__) && defined(__MACH__)
bool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {
  struct mach_task_basic_info info;
  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;

  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,
                &count) != KERN_SUCCESS) {
    return false;
  }
  *vsz = info.virtual_size;
  *rss = info.resident_size;
  return true;
}

size_t MemoryHelper::SelfRSS(void) {
  struct mach_task_basic_info info;
  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;

  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,
                &count) != KERN_SUCCESS) {
    return 0;
  }
  return info.resident_size;
}

size_t MemoryHelper::SelfPeakRSS(void) {
  struct mach_task_basic_info info;
  mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT;

  if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info,
                &count) != KERN_SUCCESS) {
    return 0;
  }
  return info.resident_size_max;
}

size_t MemoryHelper::TotalRamSize(void) {
  int mib[2] = {CTL_HW, HW_MEMSIZE};
  uint64_t size = 0;
  size_t len = sizeof(size);
  if (sysctl(mib, 2, &size, &len, 0, 0) != 0) {
    return 0;
  }
  return (size_t)size;
}

size_t MemoryHelper::AvailableRamSize(void) {
  struct vm_statistics stat;
  mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
  vm_size_t pagesize = 0;

  if (host_page_size(mach_host_self(), &pagesize) != KERN_SUCCESS) {
    return 0;
  }
  if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&stat,
                      &count) != KERN_SUCCESS) {
    return 0;
  }
  return ((stat.free_count + stat.inactive_count) * pagesize);
}

size_t MemoryHelper::UsedRamSize(void) {
  struct vm_statistics stat;
  mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
  vm_size_t pagesize = 0;

  if (host_page_size(mach_host_self(), &pagesize) != KERN_SUCCESS) {
    return 0;
  }
  if (host_statistics(mach_host_self(), HOST_VM_INFO, (host_info_t)&stat,
                      &count) != KERN_SUCCESS) {
    return 0;
  }
  return ((stat.active_count + stat.wire_count) * pagesize);
}

size_t MemoryHelper::ContainerAwareTotalRamSize(void) {
  return 0u;
}

#elif defined(_WIN64) || defined(_WIN32)
static inline int getpagesize(void) {
  SYSTEM_INFO info;
  GetSystemInfo(&info);
  return info.dwPageSize;
}

bool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {
  PROCESS_MEMORY_COUNTERS info;
  if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) {
    return false;
  }
  *vsz = (size_t)info.PagefileUsage;
  *rss = (size_t)info.WorkingSetSize;
  return true;
}

size_t MemoryHelper::SelfRSS(void) {
  PROCESS_MEMORY_COUNTERS info;
  if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) {
    return 0u;
  }
  return (size_t)info.WorkingSetSize;
}

size_t MemoryHelper::SelfPeakRSS(void) {
  PROCESS_MEMORY_COUNTERS info;
  GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
  return (size_t)info.PeakWorkingSetSize;
}

size_t MemoryHelper::TotalRamSize(void) {
  MEMORYSTATUSEX status;
  status.dwLength = sizeof(status);
  GlobalMemoryStatusEx(&status);
  return (size_t)status.ullTotalPhys;
}

size_t MemoryHelper::AvailableRamSize(void) {
  MEMORYSTATUSEX status;
  status.dwLength = sizeof(status);
  GlobalMemoryStatusEx(&status);
  return (size_t)status.ullAvailPhys;
}

size_t MemoryHelper::UsedRamSize(void) {
  MEMORYSTATUSEX status;
  status.dwLength = sizeof(status);
  GlobalMemoryStatusEx(&status);
  return (size_t)(status.ullTotalPhys - status.ullAvailPhys);
}

size_t MemoryHelper::ContainerAwareTotalRamSize(void) {
  return 0u;
}

#else
bool MemoryHelper::SelfUsage(size_t *vsz, size_t *rss) {
  *vsz = 0u;
  *rss = 0u;
  return false;
}

size_t MemoryHelper::SelfRSS(void) {
  return 0u;
}

size_t MemoryHelper::SelfPeakRSS(void) {
  return 0u;
}

size_t MemoryHelper::TotalRamSize(void) {
  return 0u;
}

size_t MemoryHelper::AvailableRamSize(void) {
  return 0u;
}

size_t MemoryHelper::UsedRamSize(void) {
  return 0u;
}

size_t MemoryHelper::ContainerAwareTotalRamSize(void) {
  return 0u;
}
#endif

size_t MemoryHelper::PageSize(void) {
  static size_t page_size = static_cast<size_t>(getpagesize());
  return page_size;
}

size_t MemoryHelper::HugePageSize(void) {
  static size_t page_size = static_cast<size_t>(2 * 1024 * 1024);
  return page_size;
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/utility/memory_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Memory Helper
 */
struct MemoryHelper {
  //! Retrieve the page size of memory
  static size_t PageSize(void);

  //! Retrieve the huge page size of memory
  static size_t HugePageSize(void);

  //! Retrieve the VSZ and RSS of self process in bytes
  static bool SelfUsage(size_t *vsz, size_t *rss);

  //! Retrieve the RSS of self process in bytes
  static size_t SelfRSS(void);

  //! Retrieve the peak RSS of self process in bytes
  static size_t SelfPeakRSS(void);

  //! Retrieve the total size of physical memory (RAM) in bytes
  static size_t TotalRamSize(void);

  //! Retrieve the available size of physical memory (RAM) in bytes
  static size_t AvailableRamSize(void);

  //! Retrieve the used size of physical memory (RAM) in bytes
  static size_t UsedRamSize(void);

  //! Retrieve the total size of physical memory (RAM) in bytes in container
  static size_t ContainerAwareTotalRamSize(void);
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/ailego/utility/string_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
#include <utility>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec {
namespace ailego {

bool StringHelper::StartsWith(const std::string &ref,
                              const std::string &prefix) {
  return (ref.size() >= prefix.size()) &&
         (ref.compare(0, prefix.size(), prefix) == 0);
}

bool StringHelper::EndsWith(const std::string &ref, const std::string &suffix) {
  size_t s1 = ref.size();
  size_t s2 = suffix.size();
  return (s1 >= s2) && (ref.compare(s1 - s2, s2, suffix) == 0);
}

void StringHelper::LeftTrim(std::string &str) {
  str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) {
              return !std::isspace(ch);
            }));
}

void StringHelper::RightTrim(std::string &str) {
  str.erase(std::find_if(str.rbegin(), str.rend(),
                         [](int ch) { return !std::isspace(ch); })
                .base(),
            str.end());
}

void StringHelper::Trim(std::string &str) {
  StringHelper::RightTrim(str);
  StringHelper::LeftTrim(str);
}

std::string StringHelper::CopyLeftTrim(std::string str) {
  StringHelper::LeftTrim(str);
  return str;
}

std::string StringHelper::CopyRightTrim(std::string str) {
  StringHelper::RightTrim(str);
  return str;
}

std::string StringHelper::CopyTrim(std::string str) {
  StringHelper::Trim(str);
  return str;
}

#if defined(_MSC_VER)
#define strncasecmp _strnicmp
#endif

bool StringHelper::CompareIgnoreCase(const std::string &a,
                                     const std::string &b) {
  if (a.size() != b.size()) {
    return false;
  }
  return (strncasecmp(a.data(), b.data(), a.size()) == 0);
}

void StringHelper::Append(std::string *str, const internal::Alphameric &a) {
  str->reserve(str->size() + a.size());
  str->append(a.data(), a.size());
}

void StringHelper::Append(std::string *str, const internal::Alphameric &a,
                          const internal::Alphameric &b) {
  str->reserve(str->size() + a.size() + b.size());
  str->append(a.data(), a.size());
  str->append(b.data(), b.size());
}

void StringHelper::Append(std::string *str, const internal::Alphameric &a,
                          const internal::Alphameric &b,
                          const internal::Alphameric &c) {
  str->reserve(str->size() + a.size() + b.size() + c.size());
  str->append(a.data(), a.size());
  str->append(b.data(), b.size());
  str->append(c.data(), c.size());
}

void StringHelper::Append(std::string *str, const internal::Alphameric &a,
                          const internal::Alphameric &b,
                          const internal::Alphameric &c,
                          const internal::Alphameric &d) {
  str->reserve(str->size() + a.size() + b.size() + c.size() + d.size());
  str->append(a.data(), a.size());
  str->append(b.data(), b.size());
  str->append(c.data(), c.size());
  str->append(d.data(), d.size());
}

void StringHelper::AppendViews(std::string *str,
                               std::initializer_list<StringView> views) {
  size_t new_size = str->size();
  for (auto &v : views) {
    new_size += v.size();
  }
  str->reserve(new_size);
  for (auto &v : views) {
    str->append(v.data(), v.size());
  }
}

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/utility/time_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/utility/time_helper.h>

#if defined(_WIN64) || defined(_WIN32)
#include <Windows.h>
#endif

namespace zvec {
namespace ailego {

#if defined(_WIN64) || defined(_WIN32)
uint64_t Monotime::NanoSeconds(void) {
  LARGE_INTEGER stamp, freq;
  QueryPerformanceFrequency(&freq);
  QueryPerformanceCounter(&stamp);
  return (uint64_t)((double)stamp.QuadPart *
                    (1000000000.0 / (double)freq.QuadPart));
}

uint64_t Monotime::MicroSeconds(void) {
  LARGE_INTEGER stamp, freq;
  QueryPerformanceFrequency(&freq);
  QueryPerformanceCounter(&stamp);
  return (stamp.QuadPart * 1000000u / freq.QuadPart);
}

uint64_t Monotime::MilliSeconds(void) {
  LARGE_INTEGER stamp, freq;
  QueryPerformanceFrequency(&freq);
  QueryPerformanceCounter(&stamp);
  return (stamp.QuadPart * 1000u / freq.QuadPart);
}

uint64_t Monotime::Seconds(void) {
  LARGE_INTEGER stamp, freq;
  QueryPerformanceFrequency(&freq);
  QueryPerformanceCounter(&stamp);
  return (stamp.QuadPart / freq.QuadPart);
}

// January 1, 1970 (start of Unix epoch) in "ticks"
#define UNIX_TIME_START 0x019DB1DED53E8000ull

uint64_t Realtime::NanoSeconds(void) {
  LARGE_INTEGER stamp;
  FILETIME file;
  GetSystemTimeAsFileTime(&file);
  stamp.HighPart = file.dwHighDateTime;
  stamp.LowPart = file.dwLowDateTime;
  return (stamp.QuadPart - UNIX_TIME_START) * 100u;
}

uint64_t Realtime::MicroSeconds(void) {
  LARGE_INTEGER stamp;
  FILETIME file;
  GetSystemTimeAsFileTime(&file);
  stamp.HighPart = file.dwHighDateTime;
  stamp.LowPart = file.dwLowDateTime;
  return (stamp.QuadPart - UNIX_TIME_START) / 10u;
}

uint64_t Realtime::MilliSeconds(void) {
  LARGE_INTEGER stamp;
  FILETIME file;
  GetSystemTimeAsFileTime(&file);
  stamp.HighPart = file.dwHighDateTime;
  stamp.LowPart = file.dwLowDateTime;
  return (stamp.QuadPart - UNIX_TIME_START) / 10000u;
}

uint64_t Realtime::Seconds(void) {
  LARGE_INTEGER stamp;
  FILETIME file;
  GetSystemTimeAsFileTime(&file);
  stamp.HighPart = file.dwHighDateTime;
  stamp.LowPart = file.dwLowDateTime;
  return (stamp.QuadPart - UNIX_TIME_START) / 10000000u;
}

size_t Realtime::Localtime(uint64_t stamp, const char *format, char *buf,
                           size_t len) {
  time_t val = static_cast<time_t>(stamp);
  return strftime(buf, len, format, localtime(&val));
}

size_t Realtime::Gmtime(uint64_t stamp, const char *format, char *buf,
                        size_t len) {
  time_t val = static_cast<time_t>(stamp);
  return strftime(buf, len, format, gmtime(&val));
}

size_t Realtime::Localtime(const char *format, char *buf, size_t len) {
  time_t now = time(0);
  return strftime(buf, len, format, localtime(&now));
}

size_t Realtime::Gmtime(const char *format, char *buf, size_t len) {
  time_t now = time(0);
  return strftime(buf, len, format, gmtime(&now));
}
#else
uint64_t Monotime::NanoSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_MONOTONIC, &tspec);
  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);
}

uint64_t Monotime::MicroSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_MONOTONIC, &tspec);
  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);
}

uint64_t Monotime::MilliSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_MONOTONIC, &tspec);
  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);
}

uint64_t Monotime::Seconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_MONOTONIC, &tspec);
  return (tspec.tv_sec);
}

uint64_t Realtime::NanoSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_REALTIME, &tspec);
  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);
}

uint64_t Realtime::MicroSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_REALTIME, &tspec);
  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);
}

uint64_t Realtime::MilliSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_REALTIME, &tspec);
  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);
}

uint64_t Realtime::Seconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_REALTIME, &tspec);
  return (tspec.tv_sec);
}

size_t Realtime::Localtime(uint64_t stamp, const char *format, char *buf,
                           size_t len) {
  struct tm tmbuf;
  time_t val = static_cast<time_t>(stamp);
  return strftime(buf, len, format, localtime_r(&val, &tmbuf));
}

size_t Realtime::Gmtime(uint64_t stamp, const char *format, char *buf,
                        size_t len) {
  struct tm tmbuf;
  time_t val = static_cast<time_t>(stamp);
  return strftime(buf, len, format, gmtime_r(&val, &tmbuf));
}

size_t Realtime::Localtime(const char *format, char *buf, size_t len) {
  struct tm tmbuf;
  time_t now = time(0);
  return strftime(buf, len, format, localtime_r(&now, &tmbuf));
}

size_t Realtime::Gmtime(const char *format, char *buf, size_t len) {
  struct tm tmbuf;
  time_t now = time(0);
  return strftime(buf, len, format, gmtime_r(&now, &tmbuf));
}

uint64_t CPUtime::NanoSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
  return (tspec.tv_sec * 1000000000u + tspec.tv_nsec);
}

uint64_t CPUtime::MicroSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
  return (tspec.tv_sec * 1000000u + tspec.tv_nsec / 1000u);
}

uint64_t CPUtime::MilliSeconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
  return (tspec.tv_sec * 1000u + tspec.tv_nsec / 1000000u);
}

uint64_t CPUtime::Seconds(void) {
  struct timespec tspec;
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tspec);
  return (tspec.tv_sec);
}
#endif  // _WIN64 || _WIN32

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/ailego/version.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "version.h"
#include "version.i"

#ifdef ailego_VERSION
#define AILEGO_VERSION_STRING ailego_VERSION
#else
#define AILEGO_VERSION_STRING "unknown"
#endif

namespace zvec {

namespace ailego {

static const char AILEGO_VERSION_DETAILS[] = AILEGO_VERSION_COMPILE_DETAILS(
    "AiLego Library Version " AILEGO_VERSION_STRING
    ".\nCopyright (C) The Software Authors. All rights reserved.\n");

const char *Version::String(void) {
  return AILEGO_VERSION_STRING;
}

const char *Version::Details(void) {
  return AILEGO_VERSION_DETAILS;
}

}  // namespace ailego
}  // namespace zvec

// extern "C" int __wrap_main(int, char *[]) {
//   fwrite(ailego::AILEGO_VERSION_DETAILS, 1,
//          strlen(ailego::AILEGO_VERSION_DETAILS), stdout);
//   fflush(stdout);
//   _Exit(0);
// }


================================================
FILE: src/ailego/version.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace zvec {

namespace ailego {

/*! AiLego Version
 */
struct Version {
  //! Retrieve the version number in string
  static const char *String(void);

  //! Retrieve the detailed version information
  static const char *Details(void);
};

}  // namespace ailego

}  // namespace zvec

================================================
FILE: src/ailego/version.i
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/internal/platform.h>

#ifndef AILEGO_VERSION_TO_STRING_
#define AILEGO_VERSION_TO_STRING_(x) #x
#endif

#ifndef AILEGO_VERSION_TO_STRING
#define AILEGO_VERSION_TO_STRING(x) AILEGO_VERSION_TO_STRING_(x)
#endif

/*! http://nadeausoftware.com/articles/2012/01/
 *  c_c_tip_how_use_compiler_predefined_macros_detect_operating_system
 */
#if defined(__linux) || defined(__linux__)
#define AILEGO_VERSION_PLATFORM "Linux"
#elif defined(__FreeBSD__)
#define AILEGO_VERSION_PLATFORM "FreeBSD"
#elif defined(__NetBSD__)
#define AILEGO_VERSION_PLATFORM "NetBSD"
#elif defined(__OpenBSD__)
#define AILEGO_VERSION_PLATFORM "OpenBSD"
#elif defined(__APPLE__) || defined(__MACH__)
#define AILEGO_VERSION_PLATFORM "Darwin"
#elif defined(__CYGWIN__) && !defined(_WIN32)
#define AILEGO_VERSION_PLATFORM "Cygwin"
#elif defined(_WIN64)
#define AILEGO_VERSION_PLATFORM "Microsoft Windows (64-bit)"
#elif defined(_WIN32)
#define AILEGO_VERSION_PLATFORM "Microsoft Windows (32-bit)"
#elif defined(__sun) && defined(__SVR4)
#define AILEGO_VERSION_PLATFORM "Solaris"
#elif defined(_AIX)
#define AILEGO_VERSION_PLATFORM "AIX"
#elif defined(__hpux)
#define AILEGO_VERSION_PLATFORM "HP-UX"
#elif defined(__unix) || defined(__unix__)
#define AILEGO_VERSION_PLATFORM "Unix"
#else
#define AILEGO_VERSION_PLATFORM "Unknown Platform"
#endif

/*! http://nadeausoftware.com/articles/2012/10/
 *  c_c_tip_how_detect_compiler_name_and_version_using_compiler_predefined_macros
 */
#if defined(__NVCC__)
#define AILEGO_VERSION_COMPILER_NAME "Nvidia CUDA Compiler"
#elif defined(__clang__)
#define AILEGO_VERSION_COMPILER_NAME "Clang/LLVM"
#elif defined(__ICC) || defined(__INTEL_COMPILER)
#define AILEGO_VERSION_COMPILER_NAME "Intel ICC/ICPC"
#elif defined(__GNUC__) || defined(__GNUG__)
#define AILEGO_VERSION_COMPILER_NAME "GNU GCC/G++"
#elif defined(__HP_cc) || defined(__HP_aCC)
#define AILEGO_VERSION_COMPILER_NAME "Hewlett-Packard C/aC++"
#elif defined(__IBMC__) || defined(__IBMCPP__)
#define AILEGO_VERSION_COMPILER_NAME "IBM XL C/C++"
#elif defined(_MSC_VER)
#define AILEGO_VERSION_COMPILER_NAME "Microsoft Visual C++"
#elif defined(__PGI)
#define AILEGO_VERSION_COMPILER_NAME "Portland Group PGCC/PGCPP"
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define AILEGO_VERSION_COMPILER_NAME "Oracle Solaris Studio"
#else
#define AILEGO_VERSION_COMPILER_NAME "Unknown Compiler"
#endif

#if defined(__CUDACC_VER_MAJOR__)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME  \
  " (" AILEGO_VERSION_TO_STRING(__CUDACC_VER_MAJOR__) \
  "." AILEGO_VERSION_TO_STRING(__CUDACC_VER_MINOR__)  \
  "." AILEGO_VERSION_TO_STRING(__CUDACC_VER_BUILD__) ")"
#elif defined(__VERSION__)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME " (" __VERSION__ ")"
#elif defined(_MSC_FULL_VER)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME " (" AILEGO_VERSION_TO_STRING(_MSC_FULL_VER) ")"
#elif defined(_MSC_VER)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME " (" AILEGO_VERSION_TO_STRING(_MSC_VER) ")"
#elif defined(__PGIC__)
#define AILEGO_VERSION_COMPILER                                         \
  AILEGO_VERSION_COMPILER_NAME                                          \
  " (" AILEGO_VERSION_TO_STRING(__PGIC__) "." AILEGO_VERSION_TO_STRING( \
      __PGIC_MINOR__) "." AILEGO_VERSION_TO_STRING(__PGIC_PATCHLEVEL__) ")"
#elif defined(__xlc__)
#define AILEGO_VERSION_COMPILER AILEGO_VERSION_COMPILER_NAME " (" __xlc__ ")"
#elif defined(__SUNPRO_C)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME " (" AILEGO_VERSION_TO_STRING(__SUNPRO_C) ")"
#elif defined(__HP_cc)
#define AILEGO_VERSION_COMPILER \
  AILEGO_VERSION_COMPILER_NAME " (" AILEGO_VERSION_TO_STRING(__HP_cc) ")"
#else
#define AILEGO_VERSION_COMPILER AILEGO_VERSION_COMPILER_NAME
#endif

#if defined(__x86_64__) || defined(_M_X64)
#define AILEGO_VERSION_PROCESSOR "x86 64-bit Processor"
#elif defined(__i386) || defined(_M_IX86)
#define AILEGO_VERSION_PROCESSOR "x86 32-bit Processor"
#elif defined(__ARM_ARCH)
#if defined(__ARM_64BIT_STATE)
#define AILEGO_VERSION_PROCESSOR "ARM 64-bit Processor"
#else
#define AILEGO_VERSION_PROCESSOR "ARM 32-bit Processor"
#endif
#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)
#define AILEGO_VERSION_PROCESSOR "Itanium Processor"
#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
#define AILEGO_VERSION_PROCESSOR "PowerPC 64-bit Processor"
#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
#define AILEGO_VERSION_PROCESSOR "PowerPC 32-bit Processor"
#elif defined(__sparc)
#define AILEGO_VERSION_PROCESSOR "SPARC Processor"
#else
#define AILEGO_VERSION_PROCESSOR "Unknown Processor"
#endif

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define AILEGO_VERSION_BYTE_ORDER "  Little-endian Byte Order\n"
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define AILEGO_VERSION_BYTE_ORDER "  Big-endian Byte Order\n"
#elif __BYTE_ORDER__ == __ORDER_PDP_ENDIAN__
#define AILEGO_VERSION_BYTE_ORDER "  PDP-endian Byte Order\n"
#else
#define AILEGO_VERSION_BYTE_ORDER ""
#endif

#if defined(_DEBUG) || (!defined(__OPTIMIZE__) && !defined(NDEBUG))
#define AILEGO_VERSION_DEBUG_INFO "  Debug Information\n"
#else
#define AILEGO_VERSION_DEBUG_INFO ""
#endif

#if defined(__SANITIZE_ADDRESS__)
#define AILEGO_VERSION_ASAN "  Address Sanitizer\n"
#else
#define AILEGO_VERSION_ASAN ""
#endif

#if defined(__STDC_VERSION__)
#define AILEGO_VERSION_STDC \
  "  C Standard " AILEGO_VERSION_TO_STRING(__STDC_VERSION__) "\n"
#else
#define AILEGO_VERSION_STDC ""
#endif

#if defined(__cplusplus)
#define AILEGO_VERSION_CPLUSPLUS \
  "  C++ Standard " AILEGO_VERSION_TO_STRING(__cplusplus) "\n"
#else
#define AILEGO_VERSION_CPLUSPLUS ""
#endif

#if defined(__GXX_ABI_VERSION)
#define AILEGO_VERSION_GXX_ABI \
  "  GNU C++ ABI " AILEGO_VERSION_TO_STRING(__GXX_ABI_VERSION) "\n"
#else
#define AILEGO_VERSION_GXX_ABI ""
#endif

#if defined(__GLIBC__)
#define AILEGO_VERSION_GLIBC               \
  "  GNU glibc " AILEGO_VERSION_TO_STRING( \
      __GLIBC__) "." AILEGO_VERSION_TO_STRING(__GLIBC_MINOR__) "\n"
#else
#define AILEGO_VERSION_GLIBC ""
#endif

#if defined(WINVER)
#define AILEGO_VERSION_WINSDK \
  "  Microsoft Windows SDK " AILEGO_VERSION_TO_STRING(WINVER) "\n"
#else
#define AILEGO_VERSION_WINSDK ""
#endif

#if defined(__CLR_VER)
#define AILEGO_VERSION_CLR \
  "  Microsoft CLR " AILEGO_VERSION_TO_STRING(__CLR_VER) "\n"
#else
#define AILEGO_VERSION_CLR ""
#endif

#if defined(__LSB_VERSION__)
#define AILEGO_VERSION_LSB \
  "  Linux Standards Base " AILEGO_VERSION_TO_STRING(__LSB_VERSION__) "\n"
#else
#define AILEGO_VERSION_LSB ""
#endif

#if defined(_POSIX_VERSION)
#define AILEGO_VERSION_POSIX \
  "  POSIX Specification " AILEGO_VERSION_TO_STRING(_POSIX_VERSION) "\n"
#else
#define AILEGO_VERSION_POSIX ""
#endif

#if defined(_XOPEN_VERSION)
#define AILEGO_VERSION_XOPEN \
  "  X/Open Specification " AILEGO_VERSION_TO_STRING(_XOPEN_VERSION) "\n"
#else
#define AILEGO_VERSION_XOPEN ""
#endif

#if defined(_OPENMP)
#define AILEGO_VERSION_OPENMP \
  "  OpenMP API " AILEGO_VERSION_TO_STRING(_OPENMP) "\n"
#else
#define AILEGO_VERSION_OPENMP ""
#endif

#if defined(__ARM_NEON)
#define AILEGO_VERSION_SIMD "  Arm Neon Instruction Set\n"
#elif defined(__AVX512FP16__)
#define AILEGO_VERSION_SIMD "  AVX-512FP16 Instruction Set\n"
#elif defined(__AVX512F__)
#define AILEGO_VERSION_SIMD "  AVX-512F Instruction Set\n"
#elif defined(__AVX2__)
#define AILEGO_VERSION_SIMD "  AVX-2 Instruction Set\n"
#elif defined(__AVX__)
#define AILEGO_VERSION_SIMD "  AVX Instruction Set\n"
#elif defined(__SSE4_2__)
#define AILEGO_VERSION_SIMD "  SSE-4.2 Instruction Set\n"
#elif defined(__SSE4_1__)
#define AILEGO_VERSION_SIMD "  SSE-4.1 Instruction Set\n"
#elif defined(__SSSE3__)
#define AILEGO_VERSION_SIMD "  SSSE-3 Instruction Set\n"
#elif defined(__SSE3__)
#define AILEGO_VERSION_SIMD "  SSE-3 Instruction Set\n"
#elif defined(__SSE2__)
#define AILEGO_VERSION_SIMD "  SSE-2 Instruction Set\n"
#elif defined(__SSE__)
#define AILEGO_VERSION_SIMD "  SSE Instruction Set\n"
#elif defined(__MMX__)
#define AILEGO_VERSION_SIMD "  MMX Instruction Set\n"
#else
#define AILEGO_VERSION_SIMD ""
#endif

#if defined(PY_VERSION)
#if PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_ALPHA
#define AILEGO_VERSION_PYTHON \
  "  Python API " PY_VERSION  \
  " Alpha " AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) "\n"
#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_BETA
#define AILEGO_VERSION_PYTHON \
  "  Python API " PY_VERSION  \
  " Beta " AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) "\n"
#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_GAMMA
#define AILEGO_VERSION_PYTHON \
  "  Python API " PY_VERSION  \
  " Release Candidate " AILEGO_VERSION_TO_STRING(PY_RELEASE_SERIAL) "\n"
#elif PY_RELEASE_LEVEL == PY_RELEASE_LEVEL_FINAL
#define AILEGO_VERSION_PYTHON "  Python API " PY_VERSION " Final\n"
#else
#define AILEGO_VERSION_PYTHON "  Python API " PY_VERSION "\n"
#endif
#else
#define AILEGO_VERSION_PYTHON ""
#endif

//! Gather information of compiling
#define AILEGO_VERSION_COMPILE_DETAILS(__PREFIX_INFO__)                      \
  __PREFIX_INFO__                                                            \
  "Compiled by " AILEGO_VERSION_COMPILER                                     \
  ".\n"                                                                      \
  "Compiled for " AILEGO_VERSION_PROCESSOR                                   \
  ".\n"                                                                      \
  "Compiled on " AILEGO_VERSION_PLATFORM " on " __DATE__ " " __TIME__        \
  ".\n"                                                                      \
  "Compiled with: \n"                                                        \
  "" AILEGO_VERSION_BYTE_ORDER "" AILEGO_VERSION_SIMD                        \
  "" AILEGO_VERSION_DEBUG_INFO "" AILEGO_VERSION_ASAN "" AILEGO_VERSION_STDC \
  "" AILEGO_VERSION_CPLUSPLUS "" AILEGO_VERSION_GXX_ABI                      \
  "" AILEGO_VERSION_POSIX "" AILEGO_VERSION_XOPEN "" AILEGO_VERSION_LSB      \
  "" AILEGO_VERSION_GLIBC "" AILEGO_VERSION_WINSDK "" AILEGO_VERSION_CLR     \
  "" AILEGO_VERSION_OPENMP "" AILEGO_VERSION_PYTHON "\n"


================================================
FILE: src/binding/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

# Retrieve version from git repository
git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})

# Add repository
cc_directory(python)

================================================
FILE: src/binding/python/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

find_package(pybind11 REQUIRED)

set(SRC_LISTS
        binding.cc
        model/python_collection.cc
        model/python_doc.cc
        model/param/python_param.cc
        model/schema/python_schema.cc
        model/common/python_config.cc
        typing/python_type.cc
)

pybind11_add_module(_zvec ${SRC_LISTS})

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
    target_link_libraries(_zvec PRIVATE
            -Wl,--whole-archive
            $<TARGET_FILE:core_knn_flat_static>
            $<TARGET_FILE:core_knn_flat_sparse_static>
            $<TARGET_FILE:core_knn_hnsw_static>
            $<TARGET_FILE:core_knn_hnsw_rabitq_static>
            $<TARGET_FILE:core_knn_hnsw_sparse_static>
            $<TARGET_FILE:core_knn_ivf_static>
            $<TARGET_FILE:core_knn_cluster_static>
            $<TARGET_FILE:core_mix_reducer_static>
            $<TARGET_FILE:core_metric_static>
            $<TARGET_FILE:core_utility_static>
            $<TARGET_FILE:core_quantizer_static>
            -Wl,--no-whole-archive
            zvec_db
    )
    target_link_options(_zvec PRIVATE
            "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
    )
elseif (APPLE)
    target_link_libraries(_zvec PRIVATE
            -Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_flat_sparse_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_rabitq_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_sparse_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_ivf_static>
            -Wl,-force_load,$<TARGET_FILE:core_knn_cluster_static>
            -Wl,-force_load,$<TARGET_FILE:core_mix_reducer_static>
            -Wl,-force_load,$<TARGET_FILE:core_metric_static>
            -Wl,-force_load,$<TARGET_FILE:core_utility_static>
            -Wl,-force_load,$<TARGET_FILE:core_quantizer_static>
            zvec_db
    )
    target_link_libraries(_zvec PRIVATE
            -Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/exports.mac
    )
endif ()

target_include_directories(_zvec PRIVATE ${PYBIND11_INCLUDE_DIR} ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/binding/python/include)


================================================
FILE: src/binding/python/binding.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_collection.h"
#include "python_config.h"
#include "python_doc.h"
#include "python_param.h"
#include "python_schema.h"
#include "python_type.h"

namespace zvec {
PYBIND11_MODULE(_zvec, m) {
  m.doc() = "Zvec core module";

  ZVecPyTyping::Initialize(m);
  ZVecPyParams::Initialize(m);
  ZVecPySchemas::Initialize(m);
  ZVecPyConfig::Initialize(m);
  ZVecPyDoc::Initialize(m);
  ZVecPyCollection::Initialize(m);
}
}  // namespace zvec


================================================
FILE: src/binding/python/exports.mac
================================================
_PyInit__zvec


================================================
FILE: src/binding/python/include/python_collection.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/collection.h>

namespace py = pybind11;

namespace zvec {

class ZVecPyCollection {
 public:
  ZVecPyCollection() = delete;

 public:
  static void Initialize(py::module_ &m);

 private:
  static void bind_db_methods(py::class_<Collection, Collection::Ptr> &col);
  static void bind_ddl_methods(py::class_<Collection, Collection::Ptr> &col);
  static void bind_dml_methods(py::class_<Collection, Collection::Ptr> &col);
  static void bind_dql_methods(py::class_<Collection, Collection::Ptr> &col);
};

}  // namespace zvec


================================================
FILE: src/binding/python/include/python_config.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/config.h>

namespace py = pybind11;

namespace zvec {

class ZVecPyConfig {
 public:
  ZVecPyConfig() = delete;

 public:
  static void Initialize(py::module_ &m);
};

}  // namespace zvec


================================================
FILE: src/binding/python/include/python_doc.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/doc.h>

namespace py = pybind11;

namespace zvec {

class ZVecPyDoc {
 public:
  ZVecPyDoc() = delete;

 public:
  static void Initialize(py::module_ &m);

 private:
  static void bind_doc_operator(py::module_ &m);
  static void bind_doc(py::module_ &m);
};

}  // namespace zvec


================================================
FILE: src/binding/python/include/python_param.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/options.h>
#include <zvec/db/type.h>

namespace py = pybind11;

namespace zvec {

class ZVecPyParams {
 public:
  ZVecPyParams() = delete;

 public:
  static void Initialize(py::module_ &m);

 private:
  static void bind_index_params(py::module_ &m);
  static void bind_query_params(py::module_ &m);
  static void bind_options(py::module_ &m);
  static void bind_vector_query(py::module_ &m);
};

}  // namespace zvec


================================================
FILE: src/binding/python/include/python_schema.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/type.h>

namespace py = pybind11;

namespace zvec {

class ZVecPySchemas {
 public:
  ZVecPySchemas() = delete;

 public:
  static void Initialize(py::module_ &m);

 private:
  static void bind_field_schema(py::module_ &m);
  static void bind_collection_schema(py::module_ &m);
  static void bind_collection_stats(py::module_ &m);
};

}  // namespace zvec


================================================
FILE: src/binding/python/include/python_type.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.#pragma once

#include <pybind11/pybind11.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>

namespace py = pybind11;

namespace zvec {

class ZVecPyTyping {
 public:
  ZVecPyTyping() = delete;

 public:
  static void Initialize(py::module_ &m);

 private:
  static void bind_datatypes(py::module_ &m);
  static void bind_index_types(py::module_ &m);
  static void bind_metric_types(py::module_ &m);
  static void bind_quantize_types(py::module_ &m);
  static void bind_status(py::module_ &m);
};

}  // namespace zvec


================================================
FILE: src/binding/python/model/common/python_config.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_config.h"
#include <pybind11/stl.h>

namespace zvec {

inline bool has_key(py::dict d, const std::string &key) {
  return py::bool_(d.contains(key));
}

template <typename T>
std::optional<T> get_if(py::dict d, const std::string &key) {
  if (has_key(d, key)) {
    try {
      py::object obj = d[py::str(key)];
      return obj.cast<T>();
    } catch (const py::cast_error &) {
      throw py::type_error("Key '" + key + "' is not of expected type.");
    }
  }
  return std::nullopt;
}

inline std::string to_lower(const std::string &s) {
  std::string lower;
  lower.reserve(s.size());
  std::transform(s.begin(), s.end(), std::back_inserter(lower), ::tolower);
  return lower;
}

inline bool iequals(const std::string &a, const std::string &b) {
  return to_lower(a) == to_lower(b);
}

GlobalConfig::LogLevel str_to_loglevel(const std::string &s) {
  if (iequals(s, "debug")) return GlobalConfig::LogLevel::DEBUG;
  if (iequals(s, "info")) return GlobalConfig::LogLevel::INFO;
  if (iequals(s, "warn") || iequals(s, "warning"))
    return GlobalConfig::LogLevel::WARN;
  if (iequals(s, "error")) return GlobalConfig::LogLevel::ERROR;
  if (iequals(s, "fatal")) return GlobalConfig::LogLevel::FATAL;
  throw py::value_error("Invalid log level: ");
}


void ZVecPyConfig::Initialize(pybind11::module_ &m) {
  m.def("Initialize", [](py::args args, py::kwargs kwargs) -> py::none {
    py::dict config_dict;
    // parse args
    for (auto &arg : args) {
      if (py::isinstance<py::dict>(arg)) {
        for (auto item : arg.cast<py::dict>()) {
          config_dict[item.first] = item.second;
        }
      } else {
        throw py::type_error("Positional argument must be a dict if provided");
      }
    }

    // parser kwargs
    if (kwargs) {
      for (auto item : kwargs) {
        config_dict[item.first] = item.second;
      }
    }

    if (config_dict.empty()) {
      return py::none();
    }

    GlobalConfig::ConfigData data;
    // config memory_limit_mb
    if (has_key(config_dict, "memory_limit_mb")) {
      auto mb = get_if<int64_t>(config_dict, "memory_limit_mb").value();
      if (mb <= 0) throw py::value_error("memory_limit_mb must be positive");
      data.memory_limit_bytes = static_cast<uint64_t>(mb) * 1024 * 1024;
    }

    // config log
    bool has_log_type = has_key(config_dict, "log_type");
    bool has_log_level = has_key(config_dict, "log_level");
    if (has_log_type || has_log_level) {
      std::string log_type = "console";
      std::string log_level_str = "warn";

      if (has_log_type) {
        log_type = config_dict["log_type"].cast<std::string>();
      }
      if (has_log_level) {
        log_level_str = config_dict["log_level"].cast<std::string>();
      }
      auto log_level = str_to_loglevel(log_level_str);
      if (iequals(log_type, "file")) {
        std::string dir = DEFAULT_LOG_DIR;
        std::string basename = DEFAULT_LOG_BASENAME;
        uint32_t file_size = DEFAULT_LOG_FILE_SIZE;
        uint32_t overdue_days = DEFAULT_LOG_OVERDUE_DAYS;

        if (has_key(config_dict, "log_dir")) {
          dir = get_if<std::string>(config_dict, "log_dir").value();
        }
        if (has_key(config_dict, "log_basename")) {
          basename = get_if<std::string>(config_dict, "log_basename").value();
        }
        if (has_key(config_dict, "log_file_size")) {
          auto s = get_if<int32_t>(config_dict, "log_file_size").value();
          if (s <= 0) {
            throw py::value_error("log_file_size must be positive");
          }
          file_size = static_cast<uint32_t>(s);
        }
        if (has_key(config_dict, "log_overdue_days")) {
          std::cout << " ** log_overdue_days: " << overdue_days << std::endl;
          auto d = get_if<int32_t>(config_dict, "log_overdue_days").value();
          if (d <= 0) {
            throw py::value_error("log_overdue_days must be positive");
          }
          overdue_days = static_cast<uint32_t>(d);
        }

        data.log_config = std::make_shared<GlobalConfig::FileLogConfig>(
            log_level, dir, basename, file_size, overdue_days);

      } else if (iequals(log_type, "console")) {
        data.log_config =
            std::make_shared<GlobalConfig::ConsoleLogConfig>(log_level);
      } else {
        throw py::value_error("log_type must be 'console' or 'file'");
      }
    }

    // set query thread count
    if (has_key(config_dict, "query_threads")) {
      auto q = get_if<int32_t>(config_dict, "query_threads").value();
      if (q <= 0) throw py::value_error("query_threads must be positive");
      data.query_thread_count = static_cast<uint32_t>(q);
    }

    // set optimize thread count
    if (has_key(config_dict, "optimize_threads")) {
      auto o = get_if<int32_t>(config_dict, "optimize_threads").value();
      if (o <= 0) throw py::value_error("optimize_threads must be positive");
      data.optimize_thread_count = static_cast<uint32_t>(o);
    }

    // set invert_to_forward_scan_ratio
    if (has_key(config_dict, "invert_to_forward_scan_ratio")) {
      auto v =
          get_if<double>(config_dict, "invert_to_forward_scan_ratio").value();
      if (v < 0.0 || v > 1.0) {
        throw py::value_error(
            "invert_to_forward_scan_ratio must be in [0.0, 1.0]");
      }
      data.invert_to_forward_scan_ratio = static_cast<float>(v);
    }

    // set brute_force_by_keys_ratio
    if (has_key(config_dict, "brute_force_by_keys_ratio")) {
      auto v = get_if<double>(config_dict, "brute_force_by_keys_ratio").value();
      if (v < 0.0 || v > 1.0) {
        throw py::value_error(
            "brute_force_by_keys_ratio must be in [0.0, 1.0]");
      }
      data.brute_force_by_keys_ratio = static_cast<float>(v);
    }

    // initialize (contains validate)
    Status status = GlobalConfig::Instance().Initialize(data);
    if (!status.ok()) {
      throw std::runtime_error("Initialization failed: " + status.message());
    }
    return py::none();
  });
}


}  // namespace zvec

================================================
FILE: src/binding/python/model/param/python_param.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_param.h"
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <zvec/core/interface/constants.h>
#include <zvec/db/index_params.h>
#include "python_doc.h"

namespace zvec {

static std::string index_type_to_string(const IndexType type) {
  switch (type) {
    case IndexType::INVERT:
      return "INVERT";
    case IndexType::FLAT:
      return "FLAT";
    case IndexType::IVF:
      return "IVF";
    case IndexType::HNSW:
      return "HNSW";
    case IndexType::HNSW_RABITQ:
      return "HNSW_RABITQ";
    default:
      return "UNDEFINED";
  }
}

static std::string metric_type_to_string(const MetricType type) {
  switch (type) {
    case MetricType::COSINE:
      return "COSINE";
    case MetricType::IP:
      return "IP";
    case MetricType::L2:
      return "L2";
    default:
      return "UNDEFINED";
  }
}

static std::string quantize_type_to_string(const QuantizeType type) {
  switch (type) {
    case QuantizeType::UNDEFINED:
      return "UNDEFINED";
    case QuantizeType::INT8:
      return "INT8";
    case QuantizeType::INT4:
      return "INT4";
    case QuantizeType::FP16:
      return "FP16";
    case QuantizeType::RABITQ:
      return "RABITQ";
    default:
      return "UNDEFINED";
  }
}

template <typename T>
T checked_cast(const py::handle &h, const std::string &vector_field,
               const std::string &expected_type) {
  try {
    return py::cast<T>(h);
  } catch (const py::cast_error &e) {
    std::string actual_type = std::string(py::str(py::type::of(h)));
    std::string msg =
        vector_field + ": expected " + expected_type + ", got " + actual_type;
    throw py::type_error(msg);
  }
}

template <typename T>
std::string serialize_vector(const T *data, size_t n) {
  std::string buf;
  buf.resize(n * sizeof(T));
  std::memcpy(buf.data(), data, n * sizeof(T));
  return buf;
}

template <typename ValueType, typename ValueCastFn>
std::pair<std::string, std::string> serialize_sparse_vector(
    const py::dict &sparse_dict, ValueCastFn &&value_caster) {
  const size_t n = sparse_dict.size();
  if (n == 0) return {{}, {}};

  std::string indices_buf;
  indices_buf.resize(n * sizeof(uint32_t));
  auto *indices_ptr = reinterpret_cast<uint32_t *>(indices_buf.data());

  std::string values_buf;
  values_buf.resize(n * sizeof(ValueType));
  auto *values_ptr = reinterpret_cast<ValueType *>(values_buf.data());

  size_t i = 0;
  for (const auto &[py_key, py_val] : sparse_dict) {
    indices_ptr[i] = checked_cast<uint32_t>(py_key, "Sparse indices", "UINT32");
    values_ptr[i] = value_caster(py_val, i);
    ++i;
  }
  return {std::move(indices_buf), std::move(values_buf)};
}

void ZVecPyParams::Initialize(pybind11::module_ &parent) {
  auto m =
      parent.def_submodule("param", "This module contains the params of Zvec");

  // binding index_params [invert/hnsw/flat/ivf]
  bind_index_params(m);

  // bind query_params [hnsw/ivf]
  bind_query_params(m);

  // bind options [collection/index/optimize/column]
  bind_options(m);

  // bind vector query
  bind_vector_query(m);
}

void ZVecPyParams::bind_index_params(pybind11::module_ &m) {
  // binding base index params
  py::class_<IndexParams, std::shared_ptr<IndexParams>> index_params(
      m, "IndexParam", R"pbdoc(
Base class for all index parameter configurations.

This abstract base class defines the common interface for index types.
It should not be instantiated directly; use derived classes instead.

Attributes:
    type (IndexType): The type of the index (e.g., HNSW, FLAT, INVERT).
)pbdoc");
  index_params
      .def_property_readonly(
          "type",
          [](const IndexParams &self) -> IndexType { return self.type(); },
          "IndexType: The type of the index.")
      .def("clone", &IndexParams::clone, py::return_value_policy::copy)
      .def(
          "__eq__",
          [](const IndexParams &self, const py::object &other) {
            if (!py::isinstance<IndexParams>(other)) return false;
            return self == other.cast<const IndexParams &>();
          },
          py::is_operator())
      .def(
          "to_dict",
          [](const IndexParams &self) -> py::dict {
            py::dict dict;
            dict["type"] = index_type_to_string(self.type());
            return dict;
          },
          "Convert to dictionary with all fields")
      .def(py::pickle(
          [](const IndexParams &self) {  // __getstate__
            return py::make_tuple(self.type());
          },
          [](py::tuple t) {  // __setstate__
            if (t.size() != 1)
              throw std::runtime_error("Invalid state for IndexParams");
            return std::shared_ptr<IndexParams>();
          }));

  // binding invert index params
  py::class_<InvertIndexParams, IndexParams, std::shared_ptr<InvertIndexParams>>
      invert_params(m, "InvertIndexParam", R"pbdoc(
Parameters for configuring an invert index.

This class controls whether range query
optimization is enabled for invert index structures.

Attributes:
    type (IndexType): Always `IndexType.INVERTED`.
    enable_range_optimization (bool): Whether range optimization is enabled.
    enable_extended_wildcard (bool): Whether extended wildcard (suffix and infix) search is enabled.

Examples:
    >>> params = InvertIndexParam(enable_range_optimization=True, enable_extended_wildcard=False)
    >>> print(params.enable_range_optimization)
    True
    >>> print(params.enable_extended_wildcard)
    False
    >>> config = params.to_dict()
    >>> print(config)
    {'enable_range_optimization': True, 'enable_extended_wildcard': False}
)pbdoc");
  invert_params
      .def(py::init<bool, bool>(), py::arg("enable_range_optimization") = false,
           py::arg("enable_extended_wildcard") = false,
           R"pbdoc(
Constructs an InvertIndexParam instance.

Args:
    enable_range_optimization (bool, optional): If True, enables range query
        optimization for the invert index. Defaults to False.
    enable_extended_wildcard (bool, optional): If True, enables extended wildcard
        search including suffix and infix patterns. Defaults to False.
)pbdoc")
      .def_property_readonly("enable_range_optimization",
                             &InvertIndexParams::enable_range_optimization,
                             R"pbdoc(
bool: Whether range optimization is enabled for this inverted index.
)pbdoc")
      .def_property_readonly("enable_extended_wildcard",
                             &InvertIndexParams::enable_extended_wildcard,
                             R"pbdoc(
bool: Whether extended wildcard (suffix and infix) search is enabled.
Note: Prefix search is always enabled regardless of this setting.
)pbdoc")
      .def(
          "to_dict",
          [](const InvertIndexParams &self) -> py::dict {
            py::dict dict;
            dict["enable_range_optimization"] =
                self.enable_range_optimization();
            dict["enable_extended_wildcard"] = self.enable_extended_wildcard();
            return dict;
          },
          "Convert to dictionary with all fields")
      .def("__repr__",
           [](const InvertIndexParams &self) -> std::string {
             return "{"
                    "\"enable_range_optimization\":" +
                    std::to_string(self.enable_range_optimization()) +
                    ","
                    "\"enable_extended_wildcard\":" +
                    std::to_string(self.enable_extended_wildcard()) + "}";
           })
      .def(py::pickle(
          [](const InvertIndexParams &self) {  // __getstate__
            return py::make_tuple(self.enable_range_optimization(),
                                  self.enable_extended_wildcard());
          },
          [](py::tuple t) {  // __setstate__
            if (t.size() != 2)
              throw std::runtime_error("Invalid state for InvertIndexParams");
            return std::make_shared<InvertIndexParams>(t[0].cast<bool>(),
                                                       t[1].cast<bool>());
          }));

  // binding base vector index params
  py::class_<VectorIndexParams, IndexParams, std::shared_ptr<VectorIndexParams>>
      vector_params(m, "VectorIndexParam", R"pbdoc(
Base class for vector index parameter configurations.

Encapsulates common settings for all vector index types.

Attributes:
    type (IndexType): The specific vector index type (e.g., HNSW, FLAT).
    metric_type (MetricType): Distance metric used for similarity search.
    quantize_type (QuantizeType): Optional vector quantization type.
)pbdoc");
  vector_params
      .def_property_readonly(
          "metric_type",
          [](const VectorIndexParams &self) -> MetricType {
            return self.metric_type();
          },
          "MetricType: Distance metric (e.g., IP, COSINE, L2).")
      .def_property_readonly(
          "quantize_type",
          [](const VectorIndexParams &self) -> QuantizeType {
            return self.quantize_type();
          },
          "QuantizeType: Vector quantization type (e.g., FP16, INT8).")
      .def(
          "to_dict",
          [](const VectorIndexParams &self) -> py::dict {
            py::dict dict;
            dict["type"] = index_type_to_string(self.type());
            dict["metric_type"] = metric_type_to_string(self.metric_type());
            dict["quantize_type"] =
                quantize_type_to_string(self.quantize_type());
            return dict;
          },
          "Convert to dictionary with all fields")
      .def(py::pickle(
          [](const VectorIndexParams &self) {  // __getstate__
            return py::make_tuple(self.type(), self.metric_type(),
                                  self.quantize_type());
          },
          [](py::tuple t) {  // __setstate__
            if (t.size() != 3)
              throw std::runtime_error("Invalid state for VectorIndexParams");
            // 基类，不能直接实例化，用于子类
            return std::shared_ptr<VectorIndexParams>();
          }));

  // binding hnsw index params
  py::class_<HnswIndexParams, VectorIndexParams,
             std::shared_ptr<HnswIndexParams>>
      hnsw_params(m, "HnswIndexParam", R"pbdoc(
Parameters for configuring an HNSW (Hierarchical Navigable Small World) index.

HNSW is a graph-based approximate nearest neighbor search index. This class
encapsulates its construction hyperparameters.

Attributes:
    metric_type (MetricType): Distance metric used for similarity computation.
        Default is ``MetricType.IP`` (inner product).
    m (int): Number of bi-directional links created for every new element
        during construction. Higher values improve accuracy but increase
        memory usage and construction time. Default is 50.
    ef_construction (int): Size of the dynamic candidate list for nearest
        neighbors during index construction. Larger values yield better
        graph quality at the cost of slower build time. Default is 500.
    quantize_type (QuantizeType): Optional quantization type for vector
        compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
        disable quantization.

Examples:
    >>> from zvec.typing import MetricType, QuantizeType
    >>> params = HnswIndexParam(
    ...     metric_type=MetricType.COSINE,
    ...     m=16,
    ...     ef_construction=200,
    ...     quantize_type=QuantizeType.INT8
    ... )
    >>> print(params)
    {'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
)pbdoc");
  hnsw_params
      .def(py::init<MetricType, int, int, QuantizeType>(),
           py::arg("metric_type") = MetricType::IP,
           py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
           py::arg("ef_construction") =
               core_interface::kDefaultHnswEfConstruction,
           py::arg("quantize_type") = QuantizeType::UNDEFINED)
      .def_property_readonly(
          "m", &HnswIndexParams::m,
          "int: Maximum number of neighbors per node in upper layers.")
      .def_property_readonly(
          "ef_construction", &HnswIndexParams::ef_construction,
          "int: Candidate list size during index construction.")
      .def(
          "to_dict",
          [](const HnswIndexParams &self) -> py::dict {
            py::dict dict;
            dict["type"] = index_type_to_string(self.type());
            dict["metric_type"] = metric_type_to_string(self.metric_type());
            dict["m"] = self.m();
            dict["ef_construction"] = self.ef_construction();
            dict["quantize_type"] =
                quantize_type_to_string(self.quantize_type());
            return dict;
          },
          "Convert to dictionary with all fields")
      .def("__repr__",
           [](const HnswIndexParams &self) -> std::string {
             return "{"
                    "\"metric_type\":" +
                    metric_type_to_string(self.metric_type()) +
                    ", \"m\":" + std::to_string(self.m()) +
                    ", \"ef_construction\":" +
                    std::to_string(self.ef_construction()) +
                    ", \"quantize_type\":" +
                    quantize_type_to_string(self.quantize_type()) + "}";
           })
      .def(py::pickle(
          [](const HnswIndexParams &self) {
            return py::make_tuple(self.metric_type(), self.m(),
                                  self.ef_construction(), self.quantize_type());
          },
          [](py::tuple t) {
            if (t.size() != 4)
              throw std::runtime_error("Invalid state for HnswIndexParams");
            return std::make_shared<HnswIndexParams>(
                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                t[3].cast<QuantizeType>());
          }));

  // binding hnsw rabitq index params
  py::class_<HnswRabitqIndexParams, VectorIndexParams,
             std::shared_ptr<HnswRabitqIndexParams>>
      hnsw_rabitq_params(m, "HnswRabitqIndexParam", R"pbdoc(
Parameters for configuring an HNSW (Hierarchical Navigable Small World) index with RabitQ quantization.

HNSW is a graph-based approximate nearest neighbor search index. RabitQ is a
quantization method that provides high compression with minimal accuracy loss.

Attributes:
    metric_type (MetricType): Distance metric used for similarity computation.
        Default is ``MetricType.IP`` (inner product).
    m (int): Number of bi-directional links created for every new element
        during construction. Higher values improve accuracy but increase
        memory usage and construction time. Default is 50.
    ef_construction (int): Size of the dynamic candidate list for nearest
        neighbors during index construction. Larger values yield better
        graph quality at the cost of slower build time. Default is 500.

Examples:
    >>> from zvec.typing import MetricType
    >>> params = HnswRabitqIndexParam(
    ...     metric_type=MetricType.COSINE,
    ...     m=16,
    ...     ef_construction=200
    ... )
    >>> print(params)
    {'metric_type': 'COSINE', 'm': 16, 'ef_construction': 200}
)pbdoc");
  hnsw_rabitq_params
      .def(py::init<MetricType, int, int, int, int, int>(),
           py::arg("metric_type") = MetricType::IP,
           py::arg("total_bits") = core_interface::kDefaultRabitqTotalBits,
           py::arg("num_clusters") = core_interface::kDefaultRabitqNumClusters,
           py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
           py::arg("ef_construction") =
               core_interface::kDefaultHnswEfConstruction,
           py::arg("sample_count") = 0)
      .def_property_readonly("m", &HnswRabitqIndexParams::m,
                             "int: Maximum number of neighbors per node.")
      .def_property_readonly(
          "ef_construction", &HnswRabitqIndexParams::ef_construction,
          "int: Candidate list size during index construction.")
      .def_property_readonly("total_bits", &HnswRabitqIndexParams::total_bits,
                             "int: Total bits for RabitQ quantization.")
      .def_property_readonly("num_clusters",
                             &HnswRabitqIndexParams::num_clusters,
                             "int: Number of clusters for RabitQ.")
      .def_property_readonly("sample_count",
                             &HnswRabitqIndexParams::sample_count,
                             "int: Sample count for RabitQ training.")
      .def(
          "to_dict",
          [](const HnswRabitqIndexParams &self) -> py::dict {
            py::dict dict;
            dict["type"] = index_type_to_string(self.type());
            dict["metric_type"] = metric_type_to_string(self.metric_type());
            dict["quantize_type"] =
                quantize_type_to_string(self.quantize_type());
            dict["total_bits"] = self.total_bits();
            dict["num_clusters"] = self.num_clusters();
            dict["sample_count"] = self.sample_count();
            dict["m"] = self.m();
            dict["ef_construction"] = self.ef_construction();
            return dict;
          },
          "Convert to dictionary with all fields")
      .def(
          "__repr__",
          [](const HnswRabitqIndexParams &self) -> std::string {
            return "{"
                   "\"type\":\"" +
                   index_type_to_string(self.type()) +
                   "\", \"metric_type\":\"" +
                   metric_type_to_string(self.metric_type()) +
                   "\", \"total_bits\":" + std::to_string(self.total_bits()) +
                   ", \"num_clusters\":" + std::to_string(self.num_clusters()) +
                   ", \"sample_count\":" + std::to_string(self.sample_count()) +
                   ", \"m\":" + std::to_string(self.m()) +
                   ", \"ef_construction\":" +
                   std::to_string(self.ef_construction()) +
                   ", \"quantize_type\":\"" +
                   quantize_type_to_string(self.quantize_type()) + "\"}";
          })
      .def(py::pickle(
          [](const HnswRabitqIndexParams &self) {
            return py::make_tuple(self.metric_type(), self.total_bits(),
                                  self.num_clusters(), self.m(),
                                  self.ef_construction(), self.sample_count());
          },
          [](py::tuple t) {
            if (t.size() != 6)
              throw std::runtime_error(
                  "Invalid state for HnswRabitqIndexParams");
            return std::make_shared<HnswRabitqIndexParams>(
                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                t[3].cast<int>(), t[4].cast<int>(), t[5].cast<int>());
          }));

  // FlatIndexParams
  py::class_<FlatIndexParams, VectorIndexParams,
             std::shared_ptr<FlatIndexParams>>
      flat_params(m, "FlatIndexParam", R"pbdoc(
Parameters for configuring a flat (brute-force) index.

A flat index performs exact nearest neighbor search by comparing the query
vector against all vectors in the collection. It is simple, accurate, and
suitable for small to medium datasets or as a baseline.

Attributes:
    metric_type (MetricType): Distance metric used for similarity computation.
        Default is ``MetricType.IP`` (inner product).
    quantize_type (QuantizeType): Optional quantization type for vector
        compression (e.g., FP16, INT8). Use ``QuantizeType.UNDEFINED`` to
        disable quantization. Default is ``QuantizeType.UNDEFINED``.

Examples:
    >>> from zvec.typing import MetricType, QuantizeType
    >>> params = FlatIndexParam(
    ...     metric_type=MetricType.L2,
    ...     quantize_type=QuantizeType.FP16
    ... )
    >>> print(params)
    {'metric_type': 'L2', 'quantize_type': 'FP16'}
)pbdoc");
  flat_params
      .def(py::init<MetricType, QuantizeType>(),
           py::arg("metric_type") = MetricType::IP,
           py::arg("quantize_type") = QuantizeType::UNDEFINED,
           R"pbdoc(
Constructs a FlatIndexParam instance.

Args:
    metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.
    quantize_type (QuantizeType, optional): Vector quantization type.
        Defaults to QuantizeType.UNDEFINED (no quantization).
)pbdoc")
      .def(
          "to_dict",
          [](const FlatIndexParams &self) -> py::dict {
            py::dict dict;
            dict["metric_type"] = metric_type_to_string(self.metric_type());
            dict["quantize_type"] =
                quantize_type_to_string(self.quantize_type());
            return dict;
          },
          "Convert to dictionary with all fields")
      .def("__repr__",
           [](const FlatIndexParams &self) -> std::string {
             return "{"
                    "\"metric_type\":" +
                    metric_type_to_string(self.metric_type()) +
                    ", \"quantize_type\":" +
                    quantize_type_to_string(self.quantize_type()) + "}";
           })
      .def(py::pickle(
          [](const FlatIndexParams &self) {
            return py::make_tuple(self.metric_type(), self.quantize_type());
          },
          [](py::tuple t) {
            if (t.size() != 2)
              throw std::runtime_error("Invalid state for FlatIndexParams");
            return std::make_shared<FlatIndexParams>(t[0].cast<MetricType>(),
                                                     t[1].cast<QuantizeType>());
          }));

  // IVFIndexParams
  py::class_<IVFIndexParams, VectorIndexParams, std::shared_ptr<IVFIndexParams>>
      ivf_params(m, "IVFIndexParam", R"pbdoc(
Parameters for configuring an IVF (Inverted File Index) index.

IVF partitions the vector space into clusters (inverted lists). At query time,
only a subset of clusters is searched, providing a trade-off between speed
and accuracy.

Attributes:
    metric_type (MetricType): Distance metric used for similarity computation.
        Default is ``MetricType.IP`` (inner product).
    n_list (int): Number of clusters (inverted lists) to partition the dataset into.
        If set to 0, the system will auto-select a reasonable value based on data size.
        Default is 0 (auto).
    n_iters (int): Number of iterations for k-means clustering during index training.
        Higher values yield more stable centroids. Default is 10.
    use_soar (bool): Whether to enable SOAR (Scalable Optimized Adaptive Routing)
        for improved IVF search performance. Default is False.
    quantize_type (QuantizeType): Optional quantization type for vector
        compression (e.g., FP16, INT8). Default is ``QuantizeType.UNDEFINED``.

Examples:
    >>> from zvec.typing import MetricType, QuantizeType
    >>> params = IVFIndexParam(
    ...     metric_type=MetricType.COSINE,
    ...     n_list=100,
    ...     n_iters=15,
    ...     use_soar=True,
    ...     quantize_type=QuantizeType.INT8
    ... )
    >>> print(params.n_list)
    100
)pbdoc");
  ivf_params
      .def(py::init<MetricType, int, int, bool, QuantizeType>(),
           py::arg("metric_type") = MetricType::IP, py::arg("n_list") = 0,
           py::arg("n_iters") = 10, py::arg("use_soar") = false,
           py::arg("quantize_type") = QuantizeType::UNDEFINED,
           R"pbdoc(
Constructs an IVFIndexParam instance.

Args:
    metric_type (MetricType, optional): Distance metric. Defaults to MetricType.IP.
    n_list (int, optional): Number of inverted lists (clusters). Set to 0 for auto.
        Defaults to 0.
    n_iters (int, optional): Number of k-means iterations during training.
        Defaults to 10.
    use_soar (bool, optional): Enable SOAR optimization. Defaults to False.
    quantize_type (QuantizeType, optional): Vector quantization type.
        Defaults to QuantizeType.UNDEFINED.
)pbdoc")
      .def_property_readonly("n_list", &IVFIndexParams::n_list,
                             "int: Number of inverted lists (0 = auto).")
      .def_property_readonly(
          "n_iters", &IVFIndexParams::n_iters,
          "int: Number of k-means iterations during training.")
      .def_property_readonly("use_soar", &IVFIndexParams::use_soar,
                             "bool: Whether SOAR optimization is enabled.")
      .def(
          "to_dict",
          [](const IVFIndexParams &self) -> py::dict {
            py::dict dict;
            dict["type"] = index_type_to_string(self.type());
            dict["metric_type"] = metric_type_to_string(self.metric_type());
            dict["n_list"] = self.n_list();
            dict["n_iters"] = self.n_iters();
            dict["use_soar"] = self.use_soar();
            dict["quantize_type"] =
                quantize_type_to_string(self.quantize_type());
            return dict;
          },
          "Convert to dictionary with all fields")
      .def("__repr__",
           [](const IVFIndexParams &self) {
             return "{"
                    "\"metric_type\":" +
                    metric_type_to_string(self.metric_type()) +
                    ", \"n_list\":" + std::to_string(self.n_list()) +
                    ", \"n_iters\":" + std::to_string(self.n_iters()) +
                    ", \"use_soar\":" + std::to_string(self.use_soar()) +
                    ", \"quantize_type\":" +
                    quantize_type_to_string(self.quantize_type()) + "}";
           })
      .def(py::pickle(
          [](const IVFIndexParams &self) {
            return py::make_tuple(self.metric_type(), self.n_list(),
                                  self.n_iters(), self.use_soar(),
                                  self.quantize_type());
          },
          [](py::tuple t) {
            if (t.size() != 5)
              throw std::runtime_error("Invalid state for IVFIndexParams");
            return std::make_shared<IVFIndexParams>(
                t[0].cast<MetricType>(), t[1].cast<int>(), t[2].cast<int>(),
                t[3].cast<bool>(), t[4].cast<QuantizeType>());
          }));
}

void ZVecPyParams::bind_query_params(py::module_ &m) {
  // binding base query params
  py::class_<QueryParams, std::shared_ptr<QueryParams>> query_params(
      m, "QueryParam", R"pbdoc(
Base class for all query parameter configurations.

This abstract base class defines common query settings such as search radius
and whether to force linear (brute-force) search. It should not be instantiated
directly; use derived classes like `HnswQueryParam` or `IVFQueryParam`.

Attributes:
    type (IndexType): The index type this query is configured for.
    radius (float): Search radius for range queries. Used in combination with
        top-k to filter results. Default is 0.0 (disabled).
    is_linear (bool): If True, forces brute-force linear search instead of
        using the index. Useful for debugging or small datasets. Default is False.
    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
)pbdoc");
  query_params
      .def_property_readonly(
          "type",
          [](const QueryParams &self) -> IndexType { return self.type(); },
          "IndexType: The type of index this query targets.")
      .def_property_readonly(
          "radius",
          [](const QueryParams &self) -> float { return self.radius(); },
          "IndexType: The type of index this query targets.")
      .def_property_readonly(
          "is_linear",
          [](const QueryParams &self) -> bool { return self.is_linear(); },
          "bool: Whether to bypass the index and use brute-force linear "
          "search.")
      .def_property_readonly(
          "is_using_refiner",
          [](const QueryParams &self) -> bool {
            return self.is_using_refiner();
          },
          "bool: Whether to use refiner for the query.")
      .def(py::pickle(
          [](const QueryParams &self) {  // __getstate__
            return py::make_tuple(self.type(), self.radius(), self.is_linear());
          },
          [](py::tuple t) {  // __setstate__
            if (t.size() != 3)
              throw std::runtime_error("Invalid state for QueryParams");
            return std::shared_ptr<QueryParams>();
          }));

  // binding hnsw query params
  py::class_<HnswQueryParams, QueryParams, std::shared_ptr<HnswQueryParams>>
      hnsw_params(m, "HnswQueryParam", R"pbdoc(
Query parameters for HNSW (Hierarchical Navigable Small World) index.

Controls the trade-off between search speed and accuracy via the `ef` parameter.

Attributes:
    type (IndexType): Always ``IndexType.HNSW``.
    ef (int): Size of the dynamic candidate list during search.
        Larger values improve recall but slow down search.
        Default is 300.
    radius (float): Search radius for range queries. Default is 0.0.
    is_linear (bool): Force linear search. Default is False.
    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.

Examples:
    >>> params = HnswQueryParam(ef=300)
    >>> print(params.ef)
    300
    >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)
    {"type":"HNSW", "ef":300}
)pbdoc");
  hnsw_params
      .def(py::init<int, float, bool, bool>(),
           py::arg("ef") = core_interface::kDefaultHnswEfSearch,
           py::arg("radius") = 0.0f, py::arg("is_linear") = false,
           py::arg("is_using_refiner") = false,
           R"pbdoc(
Constructs an HnswQueryParam instance.

Args:
    ef (int, optional): Search-time candidate list size.
        Higher values improve accuracy. Defaults to 100.
    radius (float, optional): Search radius for range queries. Default is 0.0.
    is_linear (bool, optional): Force linear search. Default is False.
    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
)pbdoc")
      .def_property_readonly(
          "ef", [](const HnswQueryParams &self) -> int { return self.ef(); },
          "int: Size of the dynamic candidate list during HNSW search.")
      .def("__repr__",
           [](const HnswQueryParams &self) -> std::string {
             return "{"
                    "\"type\":" +
                    index_type_to_string(self.type()) +
                    ", \"ef\":" + std::to_string(self.ef()) +
                    ", \"radius\":" + std::to_string(self.radius()) +
                    ", \"is_linear\":" + std::to_string(self.is_linear()) +
                    ", \"is_using_refiner\":" +
                    std::to_string(self.is_using_refiner()) + "}";
           })
      .def(py::pickle(
          [](const HnswQueryParams &self) {
            return py::make_tuple(self.ef(), self.radius(), self.is_linear(),
                                  self.is_using_refiner());
          },
          [](py::tuple t) {
            if (t.size() != 4)
              throw std::runtime_error("Invalid state for HnswQueryParams");
            auto obj = std::make_shared<HnswQueryParams>(t[0].cast<int>());
            obj->set_radius(t[1].cast<float>());
            obj->set_is_linear(t[2].cast<bool>());
            obj->set_is_using_refiner(t[3].cast<bool>());
            return obj;
          }));

  // binding ivf query params
  py::class_<IVFQueryParams, QueryParams, std::shared_ptr<IVFQueryParams>>
      ivf_params(m, "IVFQueryParam", R"pbdoc(
Query parameters for IVF (Inverted File Index) index.

Controls how many inverted lists (`nprobe`) to visit during search.

Attributes:
    type (IndexType): Always ``IndexType.IVF``.
    nprobe (int): Number of closest clusters (inverted lists) to search.
        Higher values improve recall but increase latency.
        Default is 10.
    radius (float): Search radius for range queries. Default is 0.0.
    is_linear (bool): Force linear search. Default is False.

Examples:
    >>> params = IVFQueryParam(nprobe=20)
    >>> print(params.nprobe)
    20
)pbdoc");
  ivf_params
      .def(py::init<int>(), py::arg("nprobe") = 10, R"pbdoc(
Constructs an IVFQueryParam instance.

Args:
    nprobe (int, optional): Number of inverted lists to probe during search.
        Higher values improve accuracy. Defaults to 10.
)pbdoc")
      .def_property_readonly(
          "nprobe",
          [](const IVFQueryParams &self) -> int { return self.nprobe(); },
          "int: Number of inverted lists to search during IVF query.")
      .def("__repr__",
           [](const IVFQueryParams &self) -> std::string {
             return "{"
                    "\"type\":" +
                    index_type_to_string(self.type()) +
                    ", \"nprobe\":" + std::to_string(self.nprobe()) + "}";
           })
      .def(py::pickle(
          [](const IVFQueryParams &self) {
            return py::make_tuple(self.nprobe(), self.radius(),
                                  self.is_linear());
          },
          [](py::tuple t) {
            if (t.size() != 3)
              throw std::runtime_error("Invalid state for IVFQueryParams");
            auto obj = std::make_shared<IVFQueryParams>(t[0].cast<int>());
            obj->set_radius(t[1].cast<float>());
            obj->set_is_linear(t[2].cast<bool>());
            return obj;
          }));

  // binding hnsw rabitq query params
  py::class_<HnswRabitqQueryParams, QueryParams,
             std::shared_ptr<HnswRabitqQueryParams>>
      hnsw_rabitq_query_params(m, "HnswRabitqQueryParam", R"pbdoc(
Query parameters for HNSW RaBitQ (Hierarchical Navigable Small World with RaBitQ quantization) index.

Controls the trade-off between search speed and accuracy via the `ef` parameter.
RaBitQ provides efficient quantization while maintaining high search quality.

Attributes:
    type (IndexType): Always ``IndexType.HNSW_RABITQ``.
    ef (int): Size of the dynamic candidate list during search.
        Larger values improve recall but slow down search.
        Default is 300.
    radius (float): Search radius for range queries. Default is 0.0.
    is_linear (bool): Force linear search. Default is False.
    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.

Examples:
    >>> params = HnswRabitqQueryParam(ef=300)
    >>> print(params.ef)
    300
    >>> print(params.to_dict() if hasattr(params, 'to_dict') else params)
    {"type":"HNSW_RABITQ", "ef":300}
)pbdoc");
  hnsw_rabitq_query_params
      .def(py::init<int, float, bool, bool>(),
           py::arg("ef") = core_interface::kDefaultHnswEfSearch,
           py::arg("radius") = 0.0f, py::arg("is_linear") = false,
           py::arg("is_using_refiner") = false,
           R"pbdoc(
Constructs an HnswRabitqQueryParam instance.

Args:
    ef (int, optional): Search-time candidate list size.
        Higher values improve accuracy. Defaults to 300.
    radius (float, optional): Search radius for range queries. Default is 0.0.
    is_linear (bool, optional): Force linear search. Default is False.
    is_using_refiner (bool, optional): Whether to use refiner for the query. Default is False.
)pbdoc")
      .def_property_readonly(
          "ef",
          [](const HnswRabitqQueryParams &self) -> int { return self.ef(); },
          "int: Size of the dynamic candidate list during HNSW RaBitQ search.")
      .def("__repr__",
           [](const HnswRabitqQueryParams &self) -> std::string {
             return "{"
                    "\"type\":\"" +
                    index_type_to_string(self.type()) +
                    "\", \"ef\":" + std::to_string(self.ef()) +
                    ", \"radius\":" + std::to_string(self.radius()) +
                    ", \"is_linear\":" + std::to_string(self.is_linear()) +
                    ", \"is_using_refiner\":" +
                    std::to_string(self.is_using_refiner()) + "}";
           })
      .def(py::pickle(
          [](const HnswRabitqQueryParams &self) {
            return py::make_tuple(self.ef(), self.radius(), self.is_linear(),
                                  self.is_using_refiner());
          },
          [](py::tuple t) {
            if (t.size() != 4)
              throw std::runtime_error(
                  "Invalid state for HnswRabitqQueryParams");
            auto obj =
                std::make_shared<HnswRabitqQueryParams>(t[0].cast<int>());
            obj->set_radius(t[1].cast<float>());
            obj->set_is_linear(t[2].cast<bool>());
            obj->set_is_using_refiner(t[3].cast<bool>());
            return obj;
          }));
}

void ZVecPyParams::bind_options(py::module_ &m) {  // binding collection options
  py::class_<CollectionOptions>(m, "CollectionOption", R"pbdoc(
Options for opening or creating a collection.

Attributes:
    read_only (bool): Whether the collection is opened in read-only mode.
        Default is False.
    enable_mmap (bool): Whether to use memory-mapped I/O for data files.
        Default is True.

Examples:
    >>> opt = CollectionOption(read_only=True, enable_mmap=False)
    >>> print(opt.read_only)
    True
)pbdoc")
      .def(py::init<bool, bool>(), py::arg("read_only") = false,
           py::arg("enable_mmap") = true,
           R"pbdoc(
Constructs a CollectionOption instance.

Args:
    read_only (bool, optional): Open collection in read-only mode.
        Defaults to False.
    enable_mmap (bool, optional): Enable memory-mapped I/O.
        Defaults to True.
)pbdoc")
      .def_property_readonly(
          "enable_mmap",
          [](const CollectionOptions &self) { return self.enable_mmap_; })
      .def_property_readonly(
          "read_only",
          [](const CollectionOptions &self) { return self.read_only_; })
      .def("__repr__",
           [](const CollectionOptions &self) -> std::string {
             return "{"
                    "\"enable_mmap\":" +
                    std::to_string(self.enable_mmap_) +
                    ", \"read_only\":" + std::to_string(self.read_only_) + "}";
           })
      .def(py::pickle(
          [](const CollectionOptions &self) {
            return py::make_tuple(self.read_only_, self.enable_mmap_,
                                  self.max_buffer_size_);
          },
          [](py::tuple t) {
            if (t.size() != 3)
              throw std::runtime_error(
                  "Invalid pickle data for CollectionOptions");
            CollectionOptions obj{};
            obj.read_only_ = t[0].cast<bool>();
            obj.enable_mmap_ = t[1].cast<bool>();
            obj.max_buffer_size_ = t[2].cast<uint32_t>();
            return obj;
          }));

  // SegmentOptions
  py::class_<SegmentOptions>(m, "SegmentOption", R"pbdoc(
Options for segment-level operations.

Currently, this class mirrors CollectionOption and is used internally.
It supports read-only mode, memory mapping, and buffer configuration.

Note:
    This class is primarily for internal use. Most users should use
    CollectionOption instead.

Examples:
    >>> opt = SegmentOption()
    >>> print(opt.enable_mmap)
    True
)pbdoc")
      .def(py::init<>(), "Constructs a SegmentOption with default settings.")
      .def_property_readonly(
          "enable_mmap",
          [](const SegmentOptions &self) { return self.enable_mmap_; },
          "bool: Whether memory-mapped I/O is enabled.")
      .def_property_readonly(
          "read_only",
          [](const SegmentOptions &self) { return self.read_only_; },
          "bool: Whether the segment is read-only.")
      .def_property_readonly(
          "max_buffer_size",
          [](const SegmentOptions &self) { return self.max_buffer_size_; },
          "int: Maximum buffer size in bytes (internal use).")
      .def("__repr__",
           [](const SegmentOptions &self) -> std::string {
             return "{"
                    "\"enable_mmap\":" +
                    std::to_string(self.enable_mmap_) +
                    ", \"read_only\":" + std::to_string(self.read_only_) +
                    ", \"max_buffer_size\":" +
                    std::to_string(self.max_buffer_size_) + "}";
           })
      .def(py::pickle(
          [](const SegmentOptions &self) {
            return py::make_tuple(self.read_only_, self.enable_mmap_,
                                  self.max_buffer_size_);
          },
          [](py::tuple t) {
            if (t.size() != 3)
              throw std::runtime_error(
                  "Invalid pickle data for SegmentOptions");
            SegmentOptions obj{};
            obj.read_only_ = t[0].cast<bool>();
            obj.enable_mmap_ = t[1].cast<bool>();
            obj.max_buffer_size_ = t[2].cast<uint32_t>();
            return obj;
          }));

  // CreateIndexOptions
  py::class_<CreateIndexOptions>(m, "IndexOption",
                                 R"pbdoc(
Options for creating an index.

Attributes:
    concurrency (int): Number of threads to use during index creation.
        If 0, the system will choose an optimal value automatically.
        Default is 0.

Examples:
    >>> opt = IndexOption(concurrency=4)
    >>> print(opt.concurrency)
    4
)pbdoc")
      .def(py::init<int>(), py::arg("concurrency") = 0,
           R"pbdoc(
Constructs an IndexOption instance.

Args:
    concurrency (int, optional): Number of concurrent threads.
        0 means auto-detect. Defaults to 0.
)pbdoc")
      .def_property_readonly(
          "concurrency",
          [](const CreateIndexOptions &self) { return self.concurrency_; },
          "int: Number of threads used for index creation (0 = auto).")
      .def(py::pickle(
          [](const CreateIndexOptions &self) {
            return py::make_tuple(self.concurrency_);
          },
          [](py::tuple t) {
            if (t.size() != 1)
              throw std::runtime_error(
                  "Invalid pickle data for CreateIndexOptions");
            CreateIndexOptions obj{};
            obj.concurrency_ = t[0].cast<int>();
            return obj;
          }));

  // OptimizeOptions
  py::class_<OptimizeOptions>(m, "OptimizeOption", R"pbdoc(
Options for optimizing a collection (e.g., merging segments).

Attributes:
    concurrency (int): Number of threads to use during optimization.
        If 0, the system will choose an optimal value automatically.
        Default is 0.

Examples:
    >>> opt = OptimizeOption(concurrency=2)
    >>> print(opt.concurrency)
    2
)pbdoc")
      .def(py::init<int>(), py::arg("concurrency") = 0,
           R"pbdoc(
Constructs an OptimizeOption instance.

Args:
    concurrency (int, optional): Number of concurrent threads.
        0 means auto-detect. Defaults to 0.
)pbdoc")
      .def_property_readonly(
          "concurrency",
          [](const OptimizeOptions &self) { return self.concurrency_; },
          "int: Number of threads used for optimization (0 = auto).")
      .def(py::pickle(
          [](const OptimizeOptions &self) {
            return py::make_tuple(self.concurrency_);
          },
          [](py::tuple t) {
            if (t.size() != 1)
              throw std::runtime_error(
                  "Invalid pickle data for OptimizeOptions");
            OptimizeOptions obj{};
            obj.concurrency_ = t[0].cast<int>();
            return obj;
          }));

  // AddColumnOptions
  py::class_<AddColumnOptions>(m, "AddColumnOption",
                               R"pbdoc(
Options for adding a new column to a collection.

Attributes:
    concurrency (int): Number of threads to use when backfilling data
        for the new column. If 0, auto-detect is used. Default is 0.

Examples:
    >>> opt = AddColumnOption(concurrency=1)
    >>> print(opt.concurrency)
    1
)pbdoc")
      .def(py::init<int>(), py::arg("concurrency") = 0,
           R"pbdoc(
Constructs an AddColumnOption instance.

Args:
    concurrency (int, optional): Number of threads for data backfill.
        0 means auto-detect. Defaults to 0.
)pbdoc")
      .def_property_readonly(
          "concurrency",
          [](const AddColumnOptions &self) { return self.concurrency_; },
          "int: Number of threads used when adding a column (0 = auto).")
      .def(py::pickle(
          [](const AddColumnOptions &self) {
            return py::make_tuple(self.concurrency_);
          },
          [](py::tuple t) {
            if (t.size() != 1)
              throw std::runtime_error(
                  "Invalid pickle data for AddColumnOptions");
            AddColumnOptions obj{};
            obj.concurrency_ = t[0].cast<int>();
            return obj;
          }));

  // AlterColumnOptions
  py::class_<AlterColumnOptions>(m, "AlterColumnOption", R"pbdoc(
Options for altering an existing column (e.g., changing index settings).

Attributes:
    concurrency (int): Number of threads to use during the alteration process.
        If 0, the system will choose an optimal value automatically.
        Default is 0.

Examples:
    >>> opt = AlterColumnOption(concurrency=1)
    >>> print(opt.concurrency)
    1
)pbdoc")
      .def(py::init<int>(), py::arg("concurrency") = 0,
           R"pbdoc(
Constructs an AlterColumnOption instance.

Args:
    concurrency (int, optional): Number of threads for column alteration.
        0 means auto-detect. Defaults to 0.
)pbdoc")
      .def_property_readonly(
          "concurrency",
          [](const AlterColumnOptions &self) { return self.concurrency_; },
          "int: Number of threads used when altering a column (0 = auto).")
      .def(py::pickle(
          [](const AlterColumnOptions &self) {
            return py::make_tuple(self.concurrency_);
          },
          [](py::tuple t) {
            if (t.size() != 1)
              throw std::runtime_error(
                  "Invalid pickle data for AlterColumnOptions");
            AlterColumnOptions obj{};
            obj.concurrency_ = t[0].cast<int>();
            return obj;
          }));
}

void ZVecPyParams::bind_vector_query(py::module_ &m) {
  py::class_<VectorQuery>(m, "_VectorQuery")
      .def(py::init<>())
      // properties
      .def_readwrite("topk", &VectorQuery::topk_)
      .def_readwrite("field_name", &VectorQuery::field_name_)
      .def_readwrite("filter", &VectorQuery::filter_)
      .def_readwrite("include_vector", &VectorQuery::include_vector_)
      .def_readwrite("query_params", &VectorQuery::query_params_)
      .def_readwrite("output_fields", &VectorQuery::output_fields_)
      // vector
      .def("set_vector",
           [](VectorQuery &self, const FieldSchema &field_schema,
              const py::object &obj) {
             const DataType data_type = field_schema.data_type();

             // dense vector
             if (FieldSchema::is_dense_vector_field(data_type)) {
               if (!py::isinstance<py::array>(obj)) {
                 throw py::type_error("Dense vector[" + field_schema.name() +
                                      "] expects a ndarray, got " +
                                      std::string(py::str(py::type::of(obj))));
               }
               const auto arr = obj.cast<py::array>();
               if (arr.ndim() != 1) {
                 throw py::type_error("Dense vector expects 1D array, got " +
                                      std::to_string(arr.ndim()) + "D");
               }
               const auto buf = arr.request();
               switch (data_type) {
                 case DataType::VECTOR_FP32: {
                   self.query_vector_ = serialize_vector<float>(
                       static_cast<const float *>(buf.ptr), buf.size);
                   return;
                 }
                 case DataType::VECTOR_FP64: {
                   self.query_vector_ = serialize_vector<double>(
                       static_cast<const double *>(buf.ptr), buf.size);
                   return;
                 }
                 case DataType::VECTOR_INT8: {
                   self.query_vector_ = serialize_vector<int8_t>(
                       static_cast<const int8_t *>(buf.ptr), buf.size);
                   return;
                 }
                 case DataType::VECTOR_FP16: {
                   self.query_vector_ = serialize_vector<uint16_t>(
                       static_cast<const uint16_t *>(buf.ptr), buf.size);
                   return;
                 }
                 default:
                   throw py::type_error(
                       "Unsupported dense vector type for ndarray input: " +
                       std::to_string(static_cast<int>(data_type)));
               }
             }
             // sparse vector
             if (FieldSchema::is_sparse_vector_field(data_type)) {
               if (!py::isinstance<py::dict>(obj)) {
                 throw py::type_error("Sparse vector[" + field_schema.name() +
                                      "] expects a Python dict, got " +
                                      std::string(py::str(py::type::of(obj))));
               }
               const auto sparse = obj.cast<py::dict>();

               switch (data_type) {
                 case DataType::SPARSE_VECTOR_FP16: {
                   auto [indices, values] =
                       serialize_sparse_vector<ailego::Float16>(
                           sparse, [](const py::handle &h, size_t idx) {
                             float f = checked_cast<float>(
                                 h, "Sparse value[" + std::to_string(idx) + "]",
                                 "FLOAT");
                             return ailego::Float16(f);
                           });
                   self.query_sparse_indices_ = std::move(indices);
                   self.query_sparse_values_ = std::move(values);
                   break;
                 }
                 case DataType::SPARSE_VECTOR_FP32: {
                   auto [indices, values] = serialize_sparse_vector<float>(
                       sparse, [](const py::handle &h, size_t idx) {
                         return checked_cast<float>(
                             h, "Sparse value[" + std::to_string(idx) + "]",
                             "FLOAT");
                       });
                   self.query_sparse_indices_ = std::move(indices);
                   self.query_sparse_values_ = std::move(values);
                   break;
                 }
                 default:
                   throw py::type_error(
                       "Unsupported sparse vector type: " +
                       std::to_string(static_cast<int>(data_type)));
               }
               return;
             }

             throw py::type_error("Unsupported vector field type for field: " +
                                  field_schema.name());
           })
      .def(
          "get_vector",
          [](const VectorQuery &self,
             const FieldSchema &field_schema) -> py::object {
            DataType data_type = field_schema.data_type();
            if (FieldSchema::is_dense_vector_field(data_type)) {
              if (self.query_vector_.empty()) {
                throw std::runtime_error("No dense vector has been set");
              }

              size_t byte_size = self.query_vector_.size();
              const void *data = self.query_vector_.data();

              switch (data_type) {
                case DataType::VECTOR_FP32: {
                  if (byte_size % sizeof(float) != 0) {
                    throw std::runtime_error(
                        "Invalid buffer size for VECTOR_FP32");
                  }
                  size_t dim = byte_size / sizeof(float);
                  return py::array_t<float>({dim}, {sizeof(float)},
                                            static_cast<const float *>(data));
                }
                case DataType::VECTOR_FP64: {
                  if (byte_size % sizeof(double) != 0) {
                    throw std::runtime_error(
                        "Invalid buffer size for VECTOR_FP64");
                  }
                  size_t dim = byte_size / sizeof(double);
                  return py::array_t<double>({dim}, {sizeof(double)},
                                             static_cast<const double *>(data));
                }
                case DataType::VECTOR_INT8: {
                  if (byte_size % sizeof(int8_t) != 0) {
                    throw std::runtime_error(
                        "Invalid buffer size for VECTOR_INT8");
                  }
                  size_t dim = byte_size / sizeof(int8_t);
                  return py::array_t<int8_t>({dim}, {sizeof(int8_t)},
                                             static_cast<const int8_t *>(data));
                }
                case DataType::VECTOR_FP16: {
                  if (byte_size % 2 != 0) {
                    throw std::runtime_error(
                        "Invalid buffer size for VECTOR_FP16");
                  }
                  size_t dim = byte_size / 2;
                  return py::array(py::dtype("float16"), {dim}, {2}, data);
                }

                default:
                  throw py::type_error(
                      "Unsupported dense vector type for get_vector: " +
                      std::to_string(static_cast<int>(data_type)));
              }
            }
            if (FieldSchema::is_sparse_vector_field(data_type)) {
              if (self.query_sparse_indices_.empty()) {
                return py::dict();
              }

              // Deserialize indices: stored as uint32_t[]
              size_t indices_byte_size = self.query_sparse_indices_.size();
              if (indices_byte_size % sizeof(uint32_t) != 0) {
                throw std::runtime_error(
                    "Sparse indices buffer size not aligned to uint32_t");
              }
              size_t n = indices_byte_size / sizeof(uint32_t);
              const uint32_t *indices = reinterpret_cast<const uint32_t *>(
                  self.query_sparse_indices_.data());

              // Deserialize values
              switch (data_type) {
                case DataType::SPARSE_VECTOR_FP32: {
                  if (self.query_sparse_values_.size() != n * sizeof(float)) {
                    throw std::runtime_error(
                        "Sparse FP32 values buffer size mismatch");
                  }
                  const float *values = reinterpret_cast<const float *>(
                      self.query_sparse_values_.data());
                  py::dict result;
                  for (size_t i = 0; i < n; ++i) {
                    result[py::int_(indices[i])] = py::float_(values[i]);
                  }
                  return result;
                }
                case DataType::SPARSE_VECTOR_FP16: {
                  if (self.query_sparse_values_.size() !=
                      n * sizeof(uint16_t)) {
                    throw std::runtime_error(
                        "Sparse FP16 values buffer size mismatch");
                  }
                  const uint16_t *raw_bits = reinterpret_cast<const uint16_t *>(
                      self.query_sparse_values_.data());
                  py::dict result;
                  for (size_t i = 0; i < n; ++i) {
                    float f = ailego::FloatHelper::ToFP32(raw_bits[i]);
                    result[py::int_(indices[i])] = py::float_(f);
                  }
                  return result;
                }
                default:
                  throw py::type_error("Unsupported sparse vector type...");
              }
            }


            throw py::type_error("Unsupported vector field type: " +
                                 field_schema.name());
          },
          py::arg("field_schema"))
      .def(py::pickle(
          [](const VectorQuery &self) {
            return py::make_tuple(
                self.topk_, self.field_name_, self.query_vector_,
                self.query_sparse_indices_, self.query_sparse_values_,
                self.filter_, self.include_vector_, self.output_fields_,
                self.query_params_ ? py::cast(self.query_params_) : py::none());
          },
          [](py::tuple t) {
            if (t.size() != 9)
              throw std::runtime_error("Invalid pickle data for VectorQuery");

            VectorQuery obj{};
            obj.topk_ = t[0].cast<int>();
            obj.field_name_ = t[1].cast<std::string>();
            obj.query_vector_ = t[2].cast<std::string>();
            obj.query_sparse_indices_ = t[3].cast<std::string>();
            obj.query_sparse_values_ = t[4].cast<std::string>();
            obj.filter_ = t[5].cast<std::string>();
            obj.include_vector_ = t[6].cast<bool>();
            obj.output_fields_ = t[7].cast<std::vector<std::string>>();

            if (!t[8].is_none()) {
              obj.query_params_ = t[8].cast<QueryParams::Ptr>();
            }
            return obj;
          }));
}
}  // namespace zvec

================================================
FILE: src/binding/python/model/python_collection.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_collection.h"
#include <pybind11/stl.h>
#include <zvec/db/collection.h>

namespace zvec {

inline void throw_if_error(const Status &status) {
  switch (status.code()) {
    case StatusCode::OK:
      return;
    case StatusCode::NOT_FOUND:
      throw py::key_error(status.message());
    case StatusCode::INVALID_ARGUMENT:
      throw py::value_error(status.message());
    case StatusCode::INTERNAL_ERROR:
    case StatusCode::ALREADY_EXISTS:
    case StatusCode::NOT_SUPPORTED:
    case StatusCode::PERMISSION_DENIED:
    case StatusCode::FAILED_PRECONDITION:
    case StatusCode::UNKNOWN:
    default:
      throw std::runtime_error(status.message());
  }
}


template <typename T>
T unwrap_expected(const tl::expected<T, Status> &exp) {
  if (exp.has_value()) {
    return exp.value();
  }
  throw_if_error(exp.error());
  return T{};
}

void ZVecPyCollection::Initialize(pybind11::module_ &m) {
  py::class_<Collection, Collection::Ptr> collection(m, "_Collection");
  bind_db_methods(collection);
  bind_ddl_methods(collection);
  bind_dml_methods(collection);
  bind_dql_methods(collection);
  collection.def(py::pickle(
      [](const Collection &c) {
        return py::make_tuple(c.Path(), c.Schema(), c.Options());
      },
      [](py::tuple t) {
        if (t.size() != 3) {
          throw std::runtime_error("Invalid tuple size for Collection pickle");
        }
        std::string path = t[0].cast<std::string>();
        auto schema = t[1].cast<CollectionSchema>();
        CollectionOptions options = t[2].cast<CollectionOptions>();
        auto result = Collection::Open(path, options);
        // auto result = Collection::CreateAndOpen(path, schema, options);
        return unwrap_expected(result);
      }));
}

void ZVecPyCollection::bind_db_methods(
    py::class_<Collection, Collection::Ptr> &col) {
  col.def_static("CreateAndOpen",
                 [](const std::string &path, const CollectionSchema &schema,
                    const CollectionOptions &options) {
                   auto result =
                       Collection::CreateAndOpen(path, schema, options);
                   return unwrap_expected(result);
                 })
      .def_static("Open", [](const std::string &path,
                             const CollectionOptions &options) {
        auto result = Collection::Open(path, options);
        return unwrap_expected(result);
      });
}


void ZVecPyCollection::bind_ddl_methods(
    py::class_<Collection, Collection::Ptr> &col) {
  // bind collection properties
  col.def("Path",
          [](const Collection &self) {
            auto ret = self.Path();
            return unwrap_expected(ret);
          })
      .def("Options",
           [](const Collection &self) {
             auto ret = self.Options();
             return unwrap_expected(ret);
           })
      .def("Schema",
           [](const Collection &self) {
             auto ret = self.Schema();
             return unwrap_expected(ret);
           })
      .def("Stats", [](const Collection &self) {
        auto ret = self.Stats();
        return unwrap_expected(ret);
      });

  // bind collection ddl methods
  col.def("Destroy",
          [](Collection &self) {
            const auto status = self.Destroy();
            throw_if_error(status);
          })
      .def("Flush", [](Collection &self) {
        auto status = self.Flush();
        throw_if_error(status);
      });

  // binding index ddl methods
  col.def("CreateIndex",
          [](Collection &self, const std::string &column_name,
             const IndexParams::Ptr &index_options,
             const CreateIndexOptions &options) {
            const auto status =
                self.CreateIndex(column_name, index_options, options);
            throw_if_error(status);
          })
      .def("DropIndex",
           [](Collection &self, const std::string &column_name) {
             const auto status = self.DropIndex(column_name);
             throw_if_error(status);
           })
      .def("Optimize", [](Collection &self, const OptimizeOptions &options) {
        const auto status = self.Optimize(options);
        throw_if_error(status);
      });

  // binding column ddl methods
  col.def("AddColumn",
          [](Collection &self, const FieldSchema::Ptr &column_schema,
             const std::string &expression, const AddColumnOptions &options) {
            const auto status =
                self.AddColumn(column_schema, expression, options);
            throw_if_error(status);
          })
      .def("DropColumn",
           [](Collection &self, std::string &column_name) {
             auto status = self.DropColumn(column_name);
             throw_if_error(status);
           })
      .def("AlterColumn", [](Collection &self, std::string &column_name,
                             const std::string &rename,
                             const FieldSchema::Ptr &new_column_schema,
                             const AlterColumnOptions &options) {
        const auto status =
            self.AlterColumn(column_name, rename, new_column_schema, options);
        throw_if_error(status);
      });
}

void ZVecPyCollection::bind_dml_methods(
    py::class_<Collection, Collection::Ptr> &col) {
  // bind collection upsert/insert/update/delete methods
  col.def("Insert",
          [](Collection &self, std::vector<Doc> &docs) {
            const auto result = self.Insert(docs);
            return unwrap_expected(result);
          })
      .def("Update",
           [](Collection &self, std::vector<Doc> &docs) {
             const auto result = self.Update(docs);
             return unwrap_expected(result);
           })
      .def("Upsert",
           [](Collection &self, std::vector<Doc> &docs) {
             const auto result = self.Upsert(docs);
             return unwrap_expected(result);
           })
      .def("Delete",
           [](Collection &self, const std::vector<std::string> &pks) {
             const auto result = self.Delete(pks);
             return unwrap_expected(result);
           })
      .def("DeleteByFilter", [](Collection &self, const std::string &filter) {
        const auto status = self.DeleteByFilter(filter);
        throw_if_error(status);
      });
}

void ZVecPyCollection::bind_dql_methods(
    py::class_<Collection, Collection::Ptr> &col) {
  col.def("Query",
          [](const Collection &self, const VectorQuery &query) {
            const auto result = self.Query(query);
            // return DocPtrList
            return unwrap_expected(result);
          })
      .def("GroupByQuery",
           [](const Collection &self, const GroupByVectorQuery &query) {
             const auto result = self.GroupByQuery(query);
             // return GroupResults
             return unwrap_expected(result);
           })
      .def("Fetch",
           [](const Collection &self, const std::vector<std::string> &pks) {
             const auto result = self.Fetch(pks);
             // return DocPtrMap
             return unwrap_expected(result);
           });
}

}  // namespace zvec

================================================
FILE: src/binding/python/model/python_doc.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_doc.h"
#include <pybind11/numpy.h>
#include <pybind11/stl.h>

namespace zvec {

template <typename T>
T checked_cast(const py::object &obj, const std::string &field,
               const std::string &expected_type) {
  try {
    return obj.cast<T>();
  } catch (const py::cast_error &e) {
    std::string actual_type = std::string(py::str(py::type::of(obj)));
    std::string msg = "Field '" + field + "': expected " + expected_type +
                      ", got " + actual_type;
    throw py::type_error(msg);
  }
}

void ZVecPyDoc::Initialize(pybind11::module_ &m) {
  bind_doc_operator(m);
  bind_doc(m);
}


void ZVecPyDoc::bind_doc_operator(py::module_ &m) {
  py::enum_<Operator>(m, "_DocOp")
      .value("INSERT", Operator::INSERT)
      .value("UPDATE", Operator::UPDATE)
      .value("DELETE", Operator::DELETE)
      .value("UPSERT", Operator::UPSERT);
}


void ZVecPyDoc::bind_doc(py::module_ &m) {
  // binding doc
  py::class_<Doc, Doc::Ptr> doc(m, "_Doc");

  doc.def(py::init([]() { return std::make_shared<Doc>(); }))
      .def("set_pk", &Doc::set_pk)
      .def("pk", &Doc::pk)
      .def("set_score", &Doc::set_score)
      .def("score", &Doc::score)
      .def("has_field", &Doc::has)
      .def("field_names", &Doc::field_names)
      .def(py::pickle(
          [](const Doc &d) {
            std::vector<uint8_t> data = d.serialize();
            return py::bytes(reinterpret_cast<const char *>(data.data()),
                             data.size());
          },
          [](py::bytes b) {
            py::buffer_info info(py::buffer(b).request());
            const uint8_t *buf = reinterpret_cast<const uint8_t *>(info.ptr);
            size_t size = static_cast<size_t>(info.size);
            Doc::Ptr d = Doc::deserialize(buf, size);
            if (!d) throw std::runtime_error("Failed to unpickle Doc");
            return d;
          }));


  // binding doc set field
  doc.def(
      "set_any",
      [](Doc &self, const std::string &field, const FieldSchema &field_schema,
         const py::object &obj) -> bool {
        if (obj.is_none()) {
          if (field_schema.nullable()) {
            self.set_null(field);
            return true;
          }
          throw py::value_error("Field '" + field +
                                "': expected non-nullable type");
        }
        switch (field_schema.data_type()) {
          // base datatypes
          case DataType::STRING:
            return self.set(field,
                            checked_cast<std::string>(obj, field, "STRING"));
          case DataType::BOOL:
            return self.set(field, checked_cast<bool>(obj, field, "BOOL"));
          case DataType::INT32:
            return self.set(field, checked_cast<int32_t>(obj, field, "INT32"));
          case DataType::INT64:
            return self.set(field, checked_cast<int64_t>(obj, field, "INT64"));
          case DataType::UINT32:
            return self.set(field,
                            checked_cast<uint32_t>(obj, field, "UINT32"));
          case DataType::UINT64:
            return self.set(field,
                            checked_cast<uint64_t>(obj, field, "UINT64"));
          case DataType::FLOAT:
            return self.set(field, checked_cast<float>(obj, field, "FLOAT"));
          case DataType::DOUBLE:
            return self.set(field, checked_cast<double>(obj, field, "DOUBLE"));

          // array datatypes
          case DataType::ARRAY_STRING:
            return self.set(field, checked_cast<std::vector<std::string>>(
                                       obj, field, "ARRAY_STRING"));
          case DataType::ARRAY_BOOL:
            return self.set(field, checked_cast<std::vector<bool>>(
                                       obj, field, "ARRAY_BOOL"));
          case DataType::ARRAY_INT32:
            return self.set(field, checked_cast<std::vector<int32_t>>(
                                       obj, field, "ARRAY_INT32"));
          case DataType::ARRAY_UINT32:
            return self.set(field, checked_cast<std::vector<uint32_t>>(
                                       obj, field, "ARRAY_UINT32"));
          case DataType::ARRAY_INT64:
            return self.set(field, checked_cast<std::vector<int64_t>>(
                                       obj, field, "ARRAY_INT64"));
          case DataType::ARRAY_UINT64:
            return self.set(field, checked_cast<std::vector<uint64_t>>(
                                       obj, field, "ARRAY_UINT64"));
          case DataType::ARRAY_FLOAT:
            return self.set(field, checked_cast<std::vector<float>>(
                                       obj, field, "ARRAY_FLOAT"));
          case DataType::ARRAY_DOUBLE:
            return self.set(field, checked_cast<std::vector<double>>(
                                       obj, field, "ARRAY_DOUBLE"));

          // dense vector datatypes
          case DataType::VECTOR_FP16: {
            const auto value = checked_cast<py::list>(
                obj, field, "VECTOR_FP16 (list of numbers)");
            std::vector<ailego::Float16> new_value;
            new_value.reserve(value.size());
            for (const auto &item : value) {
              try {
                new_value.emplace_back(item.cast<float>());
              } catch (const py::cast_error &e) {
                throw py::type_error("Vector '" + field +
                                     "': expected VECTOR_FP16, got " +
                                     std::string(py::str(py::type::of(obj))));
              }
            }
            return self.set(field, new_value);
          }
          case DataType::VECTOR_FP32:
            return self.set(field, checked_cast<std::vector<float>>(
                                       obj, field, "VECTOR_FP32"));
          case DataType::VECTOR_FP64:
            return self.set(field, checked_cast<std::vector<double>>(
                                       obj, field, "VECTOR_FP64"));
          case DataType::VECTOR_INT8:
            return self.set(field, checked_cast<std::vector<int8_t>>(
                                       obj, field, "VECTOR_INT8"));

          // sparse vector datatypes
          case DataType::SPARSE_VECTOR_FP32: {
            const auto sparse_dict =
                checked_cast<py::dict>(obj, field, "SPARSE_VECTOR_FP32 (dict)");
            std::vector<uint32_t> indices;
            std::vector<float> values;
            for (const auto &item : sparse_dict) {
              try {
                indices.push_back(item.first.cast<uint32_t>());
                values.push_back(item.second.cast<float>());
              } catch (const py::cast_error &e) {
                throw py::type_error(
                    "Vector '" + field +
                    "': sparse vector key/value must be (uint32, float), "
                    "got key=" +
                    std::string(py::str(py::type::of(item.first))) +
                    ", value=" +
                    std::string(py::str(py::type::of(item.second))));
              }
            }
            const std::pair<std::vector<uint32_t>, std::vector<float>>
                sparse_vector{std::move(indices), std::move(values)};
            return self.set(field, sparse_vector);
          }
          case DataType::SPARSE_VECTOR_FP16: {
            const auto sparse_dict =
                checked_cast<py::dict>(obj, field, "SPARSE_VECTOR_FP16 (dict)");
            std::vector<uint32_t> indices;
            std::vector<ailego::Float16> values;
            for (const auto &item : sparse_dict) {
              try {
                indices.push_back(item.first.cast<uint32_t>());
                values.push_back(ailego::Float16(item.second.cast<float>()));
              } catch (const py::cast_error &e) {
                throw py::type_error(
                    "Field '" + field +
                    "': sparse vector key/value must be (uint32, float), "
                    "got key=" +
                    std::string(py::str(py::type::of(item.first))) +
                    ", value=" +
                    std::string(py::str(py::type::of(item.second))));
              }
            }
            const std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>
                sparse_vector{std::move(indices), std::move(values)};
            return self.set(field, sparse_vector);
          }
          default:
            throw py::type_error("Unsupported type for field: " + field);
        }
      });

  // binding doc get field
  doc.def(
      "get_any",
      [](Doc &self, const std::string &field,
         const DataType &type) -> py::object {
        switch (type) {
          // base datatypes
          case DataType::STRING:
            return py::cast(self.get<std::string>(field));
          case DataType::BOOL:
            return py::cast(self.get<bool>(field));
          case DataType::INT32:
            return py::cast(self.get<int32_t>(field));
          case DataType::UINT32:
            return py::cast(self.get<uint32_t>(field));
          case DataType::INT64:
            return py::cast(self.get<int64_t>(field));
          case DataType::UINT64:
            return py::cast(self.get<uint64_t>(field));
          case DataType::FLOAT:
            return py::cast(self.get<float>(field));
          case DataType::DOUBLE:
            return py::cast(self.get<double>(field));

          // array datatypes
          case DataType::ARRAY_STRING:
            return py::cast(self.get<std::vector<std::string>>(field));
          case DataType::ARRAY_INT32:
            return py::cast(self.get<std::vector<int32_t>>(field));
          case DataType::ARRAY_INT64:
            return py::cast(self.get<std::vector<int64_t>>(field));
          case DataType::ARRAY_UINT32:
            return py::cast(self.get<std::vector<uint32_t>>(field));
          case DataType::ARRAY_UINT64:
            return py::cast(self.get<std::vector<uint64_t>>(field));
          case DataType::ARRAY_FLOAT:
            return py::cast(self.get<std::vector<float>>(field));
          case DataType::ARRAY_DOUBLE:
            return py::cast(self.get<std::vector<double>>(field));
          case DataType::ARRAY_BOOL:
            return py::cast(self.get<std::vector<bool>>(field));

          // vector datatypes
          case DataType::VECTOR_INT8:
            return py::cast(self.get<std::vector<int8_t>>(field));
          case DataType::VECTOR_FP16: {
            auto value = self.get<std::vector<ailego::Float16>>(field);
            if (value.has_value()) {
              std::vector<float> new_value;
              new_value.reserve(value.value().size());
              for (auto &item : value.value()) {
                new_value.push_back(static_cast<float>(item));
              }
              return py::cast(new_value);
            }
            return py::none();
          }
          case DataType::VECTOR_FP32:
            return py::cast(self.get<std::vector<float>>(field));
          case DataType::VECTOR_FP64:
            return py::cast(self.get<std::vector<double>>(field));
          case DataType::SPARSE_VECTOR_FP16: {
            auto vector = self.get<
                std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
                field);
            const auto &indices = vector->first;
            const auto &values = vector->second;
            py::dict d;
            for (size_t i = 0; i < indices.size(); ++i) {
              d[py::int_(indices[i])] =
                  py::float_(static_cast<float>(values[i]));
            }
            return std::move(d);
          }
          case DataType::SPARSE_VECTOR_FP32: {
            auto vector =
                self.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
                    field);
            const auto &indices = vector->first;
            const auto &values = vector->second;
            py::dict d;
            for (size_t i = 0; i < indices.size(); ++i) {
              d[py::int_(indices[i])] = py::float_(values[i]);
            }
            return std::move(d);
          }
          default:
            throw py::type_error("Unsupported type for field: " + field);
        }
      });
  doc.def(
      "get_all",
      [](Doc &self, const CollectionSchema &schema) -> py::tuple {
        py::tuple result(4);
        // 1. set doc id and score
        result[0] = py::str(self.pk());
        result[1] = py::float_(self.score());

        if (self.is_empty()) {
          result[2] = py::none();
          result[3] = py::none();
          return result;
        }
        // 2. set scalar fields
        py::dict fields;
        for (const auto &field_meta : schema.forward_fields()) {
          const std::string &field = field_meta->name();
          if (!self.has_value(field)) {
            continue;
          }

          try {
            auto val = [&]() -> py::object {
              switch (field_meta->data_type()) {
                // base datatypes
                case DataType::STRING:
                  return py::str(self.get<std::string>(field).value());
                case DataType::BOOL:
                  return py::cast(self.get<bool>(field));
                case DataType::INT32:
                  return py::cast(self.get<int32_t>(field));
                case DataType::UINT32:
                  return py::cast(self.get<uint32_t>(field));
                case DataType::INT64:
                  return py::cast(self.get<int64_t>(field));
                case DataType::UINT64:
                  return py::cast(self.get<uint64_t>(field));
                case DataType::FLOAT:
                  return py::cast(self.get<float>(field));
                case DataType::DOUBLE:
                  return py::cast(self.get<double>(field));

                // array datatypes
                case DataType::ARRAY_STRING:
                  return py::cast(self.get<std::vector<std::string>>(field));
                case DataType::ARRAY_INT32:
                  return py::cast(self.get<std::vector<int32_t>>(field));
                case DataType::ARRAY_INT64:
                  return py::cast(self.get<std::vector<int64_t>>(field));
                case DataType::ARRAY_UINT32:
                  return py::cast(self.get<std::vector<uint32_t>>(field));
                case DataType::ARRAY_UINT64:
                  return py::cast(self.get<std::vector<uint64_t>>(field));
                case DataType::ARRAY_FLOAT:
                  return py::cast(self.get<std::vector<float>>(field));
                case DataType::ARRAY_DOUBLE:
                  return py::cast(self.get<std::vector<double>>(field));
                case DataType::ARRAY_BOOL:
                  return py::cast(self.get<std::vector<bool>>(field));
                default:
                  throw py::type_error("Unsupported type for field: " + field);
              }
            }();
            fields[py::str(field)] = val;
          } catch (const std::exception &e) {
            fields[py::str(field)] = py::none();
          }
        }
        if (!fields.empty()) {
          result[2] = fields;
        } else {
          result[2] = py::none();
        }
        // 3. set vector fields
        py::dict vectors;
        for (const auto &vec_meta : schema.vector_fields()) {
          const std::string &vec = vec_meta->name();
          if (!self.has_value(vec)) continue;

          try {
            auto array = [&]() -> py::object {
              switch (vec_meta->data_type()) {
                case DataType::VECTOR_INT8:
                  return py::cast(self.get<std::vector<int8_t>>(vec));
                case DataType::VECTOR_FP16: {
                  auto value = self.get<std::vector<ailego::Float16>>(vec);
                  if (value.has_value()) {
                    std::vector<float> new_value;
                    new_value.reserve(value.value().size());
                    for (auto &item : value.value()) {
                      new_value.push_back(static_cast<float>(item));
                    }
                    return py::cast(new_value);
                  }
                  return py::none();
                }
                case DataType::VECTOR_FP32:
                  return py::cast(self.get<std::vector<float>>(vec));
                case DataType::VECTOR_FP64:
                  return py::cast(self.get<std::vector<double>>(vec));
                case DataType::SPARSE_VECTOR_FP16: {
                  auto vector =
                      self.get<std::pair<std::vector<uint32_t>,
                                         std::vector<ailego::Float16>>>(vec);
                  const auto &indices = vector->first;
                  const auto &values = vector->second;
                  py::dict d;
                  for (size_t i = 0; i < indices.size(); ++i) {
                    d[py::int_(indices[i])] =
                        py::float_(static_cast<float>(values[i]));
                  }
                  return std::move(d);
                }
                case DataType::SPARSE_VECTOR_FP32: {
                  auto vector = self.get<
                      std::pair<std::vector<uint32_t>, std::vector<float>>>(
                      vec);
                  const auto &indices = vector->first;
                  const auto &values = vector->second;
                  py::dict d;
                  for (size_t i = 0; i < indices.size(); ++i) {
                    d[py::int_(indices[i])] = py::float_(values[i]);
                  }
                  return std::move(d);
                }
                default:
                  throw py::type_error("Unsupported type for field: " + vec);
              }
            }();
            vectors[py::str(vec)] = array;
          } catch (const std::exception &e) {
            vectors[py::str(vec)] = py::none();
          }
        }
        if (!vectors.empty()) {
          result[3] = vectors;
        } else {
          result[3] = py::none();
        }
        return result;
      },
      py::arg("schema"),
      "Get all fields and vectors as a tuple: (id, score, fields, vectors). "
      "Vectors are zero-copy numpy arrays (dense: ndarray, sparse: (indices, "
      "values) tuple).");
}
}  // namespace zvec

================================================
FILE: src/binding/python/model/schema/python_schema.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_schema.h"
#include <pybind11/stl.h>
#include <zvec/db/schema.h>
#include <zvec/db/stats.h>

namespace zvec {

void ZVecPySchemas::Initialize(pybind11::module_ &parent) {
  auto m =
      parent.def_submodule("schema", "This module contains the schema of Zvec");

  bind_field_schema(m);
  bind_collection_schema(m);
  bind_collection_stats(m);
}

void ZVecPySchemas::bind_field_schema(py::module_ &m) {
  py::class_<FieldSchema, FieldSchema::Ptr>(m, "_FieldSchema")
      .def(py::init<const std::string &, DataType, uint32_t, bool,
                    const IndexParams::Ptr &>(),
           py::arg("name"), py::arg("data_type"), py::arg("dimension") = 0,
           py::arg("nullable") = false, py::arg("index_param") = nullptr)
      .def_property_readonly("name", &FieldSchema::name)
      .def_property_readonly("data_type", &FieldSchema::data_type)
      .def_property_readonly("nullable", &FieldSchema::nullable)
      .def_property_readonly("dimension", &FieldSchema::dimension)
      .def_property_readonly("is_dense_vector", &FieldSchema::is_dense_vector)
      .def_property_readonly("is_sparse_vector", &FieldSchema::is_sparse_vector)
      .def_property_readonly("index_type",
                             [](const FieldSchema &self) {
                               return self.index_params()
                                          ? self.index_type()
                                          : IndexType::UNDEFINED;
                             })
      .def_property_readonly("index_param",
                             [](const FieldSchema &self) -> py::object {
                               if (self.index_params()) {
                                 return py::cast(self.index_params());
                               }
                               return py::none();
                             })
      .def("__eq__", &FieldSchema::operator==)
      .def("__ne__", &FieldSchema::operator!=)
      .def(py::pickle(
          [](const FieldSchema &self) {
            return py::make_tuple(self.name(), self.data_type(),
                                  self.dimension(), self.nullable(),
                                  self.index_params()
                                      ? py::cast(self.index_params())
                                      : py::none());
          },
          [](py::tuple t) {
            if (t.size() != 5) {
              throw std::runtime_error(
                  "Invalid tuple size for FieldSchema pickle");
            }
            std::string name = t[0].cast<std::string>();
            DataType dtype = t[1].cast<DataType>();
            uint32_t dim = t[2].cast<uint32_t>();
            bool nullable = t[3].cast<bool>();

            IndexParams::Ptr idx_params = nullptr;
            if (!t[4].is_none()) {
              idx_params = t[4].cast<IndexParams::Ptr>();
            }

            return std::make_shared<FieldSchema>(name, dtype, dim, nullable,
                                                 idx_params);
          }));
}

void ZVecPySchemas::bind_collection_schema(py::module_ &m) {
  py::class_<CollectionSchema, CollectionSchema::Ptr>(m, "_CollectionSchema")
      .def(py::init<const std::string &, const FieldSchemaPtrList &>(),
           py::arg("name"), py::arg("fields"),
           "Construct with name and list of fields")
      .def_property_readonly("name", &CollectionSchema::name)
      .def("has_field", &CollectionSchema::has_field, py::arg("field_name"),
           "Check if a field exists.")
      .def(
          "get_field",
          [](const CollectionSchema &self, const std::string &name)
              -> const FieldSchema * { return self.get_field(name); },
          py::arg("field_name"), py::return_value_policy::reference_internal,
          "Get field by name (const pointer), returns None if not found.")
      .def(
          "get_forward_field",
          [](const CollectionSchema &self, const std::string &name)
              -> const FieldSchema * { return self.get_forward_field(name); },
          py::arg("field_name"), py::return_value_policy::reference_internal,
          "Get forward field (used for filtering).")
      .def(
          "get_vector_field",
          [](const CollectionSchema &self, const std::string &name)
              -> const FieldSchema * { return self.get_vector_field(name); },
          py::arg("field_name"), py::return_value_policy::reference_internal,
          "Get vector field by name.")
      .def("fields", &CollectionSchema::fields,
           "Return list of all field schemas.", py::return_value_policy::copy)
      .def("forward_fields", &CollectionSchema::forward_fields,
           "Return list of forward-indexed fields.",
           py::return_value_policy::copy)
      .def("vector_fields", &CollectionSchema::vector_fields,
           "Return list of vector fields.", py::return_value_policy::copy)
      .def("__eq__", &CollectionSchema::operator==)
      .def("__ne__", &CollectionSchema::operator!=)
      .def(py::pickle(
          [](const CollectionSchema &cs) {
            return py::make_tuple(cs.name(), cs.fields(),
                                  cs.max_doc_count_per_segment());
          },
          [](py::tuple t) {
            if (t.size() != 3)
              throw std::runtime_error("Invalid state for CollectionSchema!");

            auto name = t[0].cast<std::string>();
            auto fields = t[1].cast<FieldSchemaPtrList>();
            auto max_docs = t[2].cast<uint64_t>();

            auto cs = std::make_shared<CollectionSchema>(name, fields);
            cs->set_max_doc_count_per_segment(max_docs);
            return cs;
          }));
}

void ZVecPySchemas::bind_collection_stats(py::module_ &m) {
  pybind11::class_<CollectionStats>(m, "CollectionStats")
      .def(pybind11::init<>())
      .def_property_readonly(
          "doc_count", [](const CollectionStats &c) { return c.doc_count; })
      .def_property_readonly(
          "index_completeness",
          [](const CollectionStats &c) { return c.index_completeness; })
      .def("__repr__", [](const CollectionStats &c) {
        std::string map_str = "{";
        bool first = true;
        for (const auto &[k, v] : c.index_completeness) {
          if (!first) map_str += ", ";
          map_str += "\"" + k + "\":" + std::to_string(v);
          first = false;
        }
        map_str += "}";
        return "{\"doc_count\":" + std::to_string(c.doc_count) +
               ", \"index_completeness\":" + map_str + "}";
      });
}

}  // namespace zvec

================================================
FILE: src/binding/python/typing/python_type.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "python_type.h"

namespace zvec {

void ZVecPyTyping::Initialize(pybind11::module_ &parent) {
  auto m = parent.def_submodule(
      "typing", "This module contains the basic data types of Zvec");
  // binding base types
  bind_datatypes(m);
  bind_index_types(m);
  bind_metric_types(m);
  bind_quantize_types(m);
  bind_status(m);
}

void ZVecPyTyping::bind_datatypes(pybind11::module_ &m) {
  py::enum_<DataType>(m, "DataType", R"pbdoc(
Enumeration of supported data types in Zvec.

Includes scalar types, dense/sparse vector types, and array types.

Examples:
    >>> from zvec.typing import DataType
    >>> print(DataType.FLOAT)
    DataType.FLOAT
    >>> print(DataType.VECTOR_FP32)
    DataType.VECTOR_FP32
)pbdoc")
      // field type
      .value("STRING", DataType::STRING)
      .value("BOOL", DataType::BOOL)
      .value("INT32", DataType::INT32)
      .value("INT64", DataType::INT64)
      .value("FLOAT", DataType::FLOAT)
      .value("DOUBLE", DataType::DOUBLE)
      .value("UINT32", DataType::UINT32)
      .value("UINT64", DataType::UINT64)


      // dense vector type
      .value("VECTOR_FP16", DataType::VECTOR_FP16)
      .value("VECTOR_FP32", DataType::VECTOR_FP32)
      .value("VECTOR_FP64", DataType::VECTOR_FP64)
      .value("VECTOR_INT8", DataType::VECTOR_INT8)


      // sparse vector type
      .value("SPARSE_VECTOR_FP32", DataType::SPARSE_VECTOR_FP32)
      .value("SPARSE_VECTOR_FP16", DataType::SPARSE_VECTOR_FP16)


      // array type [not support bool/bytes]
      .value("ARRAY_STRING", DataType::ARRAY_STRING)
      .value("ARRAY_INT32", DataType::ARRAY_INT32)
      .value("ARRAY_INT64", DataType::ARRAY_INT64)
      .value("ARRAY_FLOAT", DataType::ARRAY_FLOAT)
      .value("ARRAY_DOUBLE", DataType::ARRAY_DOUBLE)
      .value("ARRAY_BOOL", DataType::ARRAY_BOOL)
      .value("ARRAY_UINT32", DataType::ARRAY_UINT32)
      .value("ARRAY_UINT64", DataType::ARRAY_UINT64)


      // non support
      // .value("BINARY",    DataType::BINARY)
      // .value("ARRAY_BINARY", DataType::ARRAY_BINARY)
      // .value("VECTOR_INT4",    DataType::VECTOR_INT4)
      // .value("VECTOR_INT16",   DataType::VECTOR_INT16)
      // .value("VECTOR_BINARY32", DataType::VECTOR_BINARY32)
      // .value("VECTOR_BINARY64", DataType::VECTOR_BINARY64)
      // .value("UNDEFINED", DataType::UNDEFINED)
      ;
}

void ZVecPyTyping::bind_index_types(pybind11::module_ &m) {
  py::enum_<IndexType>(m, "IndexType", R"pbdoc(
Enumeration of supported index types in Zvec.

Examples:
    >>> from zvec.typing import IndexType
    >>> print(IndexType.HNSW)
    IndexType.HNSW
)pbdoc")
      .value("UNDEFINED", IndexType::UNDEFINED)
      .value("HNSW", IndexType::HNSW)
      .value("HNSW_RABITQ", IndexType::HNSW_RABITQ)
      .value("IVF", IndexType::IVF)
      .value("FLAT", IndexType::FLAT)
      .value("INVERT", IndexType::INVERT);
}

void ZVecPyTyping::bind_metric_types(pybind11::module_ &m) {
  py::enum_<MetricType>(m, "MetricType", R"pbdoc(
Enumeration of supported distance/similarity metrics.

- COSINE: Cosine similarity.
- IP: Inner product (dot product).
- L2: Euclidean distance (L2 norm).

Examples:
    >>> from zvec.typing import MetricType
    >>> print(MetricType.COSINE)
    MetricType.COSINE
)pbdoc")
      .value("COSINE", MetricType::COSINE)
      .value("IP", MetricType::IP)
      .value("L2", MetricType::L2);
}

void ZVecPyTyping::bind_quantize_types(py::module_ &m) {
  py::enum_<QuantizeType>(m, "QuantizeType", R"pbdoc(
Enumeration of supported quantization types for vector compression.

Examples:
    >>> from zvec.typing import QuantizeType
    >>> print(QuantizeType.INT8)
    QuantizeType.INT8
)pbdoc")
      .value("UNDEFINED", QuantizeType::UNDEFINED)
      .value("FP16", QuantizeType::FP16)
      .value("INT8", QuantizeType::INT8)
      .value("INT4", QuantizeType::INT4)
      .value("RABITQ", QuantizeType::RABITQ);
}

void ZVecPyTyping::bind_status(py::module_ &m) {
  // bind status code
  py::enum_<StatusCode>(m, "StatusCode", R"pbdoc(
Enumeration of possible status codes for Zvec operations.

Used by the `Status` class to indicate success or failure reason.
)pbdoc")
      .value("OK", StatusCode::OK)
      .value("NOT_FOUND", StatusCode::NOT_FOUND)
      .value("ALREADY_EXISTS", StatusCode::ALREADY_EXISTS)
      .value("INVALID_ARGUMENT", StatusCode::INVALID_ARGUMENT)
      .value("PERMISSION_DENIED", StatusCode::PERMISSION_DENIED)
      .value("FAILED_PRECONDITION", StatusCode::FAILED_PRECONDITION)
      .value("RESOURCE_EXHAUSTED", StatusCode::RESOURCE_EXHAUSTED)
      .value("UNAVAILABLE", StatusCode::UNAVAILABLE)
      .value("INTERNAL_ERROR", StatusCode::INTERNAL_ERROR)
      .value("NOT_SUPPORTED", StatusCode::NOT_SUPPORTED)
      .value("UNKNOWN", StatusCode::UNKNOWN);

  // bind status
  py::class_<Status>(m, "Status", R"pbdoc(
Represents the outcome of a Zvec operation.

A `Status` object is either OK (success) or carries an error code and message.

Examples:
    >>> from zvec.typing import Status, StatusCode
    >>> s = Status()
    >>> print(s.ok())
    True
    >>> s = Status(StatusCode.INVALID_ARGUMENT, "Field not found")
    >>> print(s.code() == StatusCode.INVALID_ARGUMENT)
    True
    >>> print(s.message())
    Field not found
)pbdoc")
      .def(py::init<>())
      .def(py::init<StatusCode, const std::string &>(), py::arg("code"),
           py::arg("message") = "", R"pbdoc(
Construct a status with the given code and optional message.

Args:
    code (StatusCode): The status code.
    message (str, optional): Error message. Defaults to empty string.
)pbdoc")
      .def("ok", &Status::ok, "bool: Returns True if the status is OK.")
      .def("code", &Status::code, "StatusCode: Returns the status code.")
      .def("message", &Status::message,
           "str: Returns the error message (may be empty).")
      .def_static("OK", &Status::OK, "Create an OK status.")
      .def_static(
          "InvalidArgument",
          [](const std::string &msg) { return Status::InvalidArgument(msg); },
          py::arg("message"))
      .def_static(
          "NotFound",
          [](const std::string &msg) { return Status::NotFound(msg); },
          py::arg("message"))
      .def_static(
          "AlreadyExists",
          [](const std::string &msg) { return Status::AlreadyExists(msg); },
          py::arg("message"))
      .def_static(
          "InternalError",
          [](const std::string &msg) { return Status::InternalError(msg); },
          py::arg("message"))
      .def_static(
          "PermissionDenied",
          [](const std::string &msg) { return Status::PermissionDenied(msg); },
          py::arg("message"))
      .def("__eq__", [](const Status &self,
                        const Status &other) { return self == other; })
      .def("__ne__", [](const Status &self,
                        const Status &other) { return self != other; })
      .def("__repr__", [](const Status &self) {
        std::string result =
            "{"
            "\"code\":" +
            std::to_string(static_cast<int>(self.code()));

        if (!self.message().empty()) {
          result += ", \"message\":\"" + self.message() + "\"";
        }

        result += "}";
        return result;
      });
}

}  // namespace zvec

================================================
FILE: src/core/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(RABITQ_SUPPORTED AND AUTO_DETECT_ARCH)
  set(HNSW_RABITQ_FILES
      hnsw_rabitq_query_algorithm.cc
      hnsw_rabitq_streamer.cc
      hnsw_rabitq_searcher.cc
      hnsw_rabitq_entity.cc
      rabitq_reformer.cc
      rabitq_converter.cc
  )
  set(HNSW_RABITQ_FILES_FULL ${HNSW_RABITQ_FILES})
  list(TRANSFORM HNSW_RABITQ_FILES_FULL PREPEND "algorithm/hnsw_rabitq/")
  foreach(FILE ${HNSW_RABITQ_FILES_FULL})
      set_source_files_properties(
          ${FILE}
          PROPERTIES
          COMPILE_FLAGS "${RABITQ_ARCH_FLAG}"
      )
  endforeach()
endif()

cc_directory(framework)
cc_directory(algorithm)
cc_directory(metric)
cc_directory(quantizer)
cc_directory(utility)
cc_directory(interface)
cc_directory(mixed_reducer)

git_version(GIT_SRCS_VER ${CMAKE_CURRENT_SOURCE_DIR})
file(GLOB_RECURSE ALL_CORE_SRCS *.cc *.c *.h)

# Remove algorithm/hnsw_rabitq implementation files if not supported.
# interface/indexes/hnsw_rabitq_index.cc is kept because it provides the vtable
# for HNSWRabitqIndex and guards rabitqlib usage with #if RABITQ_SUPPORTED.
if(NOT RABITQ_SUPPORTED)
  list(FILTER ALL_CORE_SRCS EXCLUDE REGEX ".*/algorithm/hnsw_rabitq/.*")
endif()

cc_library(
    NAME zvec_core STATIC STRICT PACKED
    SRCS ${ALL_CORE_SRCS}
    LIBS zvec_ailego zvec_turbo sparsehash magic_enum rabitqlib
    INCS . ${PROJECT_ROOT_DIR}/src/core
    VERSION "${GIT_SRCS_VER}"
)


================================================
FILE: src/core/algorithm/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_directory(cluster)
cc_directory(flat)
cc_directory(flat_sparse)
cc_directory(ivf)
cc_directory(hnsw)
cc_directory(hnsw_sparse)
if(RABITQ_SUPPORTED)
  message(STATUS "BUILD RABITQ")
  cc_directory(hnsw_rabitq)
else()
  message(STATUS "NOT BUILD RABITQ")
  # Empty stub library for unsupported platforms
  file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/rabitq_stub.cc
    "// Stub implementation for unsupported platforms\n"
    "// RaBitQ only supports Linux x86_64\n"
    "namespace zvec { namespace core { /* empty namespace for compatibility */ } }\n"
  )

  cc_library(
      NAME core_knn_hnsw_rabitq
      STATIC SHARED STRICT ALWAYS_LINK
      SRCS ${CMAKE_CURRENT_BINARY_DIR}/rabitq_stub.cc
      LIBS core_framework
      INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
      VERSION "${PROXIMA_ZVEC_VERSION}"
    )
endif()


================================================
FILE: src/core/algorithm/cluster/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_knn_cluster STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS zvec_ailego core_framework 
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/cluster
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/cluster/cluster_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

//! General
static const std::string GENERAL_CLUSTER_COUNT =
    "proxima.general.cluster.count";
static const std::string GENERAL_THREAD_COUNT =
    "proxima.general.cluster.thread_count";

//! Optimize K-means
static const std::string OPTKMEANS_CLUSTER_COUNT =
    "proxima.optkmeans.cluster.count";
static const std::string OPTKMEANS_CLUSTER_MAX_ITERATIONS =
    "proxima.optkmeans.cluster.max_iterations";
static const std::string OPTKMEANS_CLUSTER_EPSILON =
    "proxima.optkmeans.cluster.epsilon";
static const std::string OPTKMEANS_CLUSTER_SHARD_FACTOR =
    "proxima.optkmeans.cluster.shard_factor";
static const std::string OPTKMEANS_CLUSTER_PURGE_EMPTY =
    "proxima.optkmeans.cluster.purge_empty";
static const std::string OPTKMEANS_CLUSTER_MARKOV_CHAIN_LENGTH =
    "proxima.optkmeans.cluster.markov_chain_length";
static const std::string OPTKMEANS_CLUSTER_ASSUMPTION_FREE =
    "proxima.optkmeans.cluster.assumption_free";

//! K-means
static const std::string KMEANS_CLUSTER_COUNT = "proxima.kmeans.cluster.count";
static const std::string KMEANS_CLUSTER_SHARD_FACTOR =
    "proxima.kmeans.cluster.shard_factor";
static const std::string KMEANS_CLUSTER_EPSILON =
    "proxima.kmeans.cluster.epsilon";
static const std::string KMEANS_CLUSTER_MAX_ITERATIONS =
    "proxima.kmeans.cluster.max_iterations";
static const std::string KMEANS_CLUSTER_PURGE_EMPTY =
    "proxima.kmeans.cluster.purge_empty";
static const std::string KMEANS_CLUSTER_BATCH = "proxima.kmeans.cluster.batch";
static const std::string KMEANS_CLUSTER_SEEKER_CLASS =
    "proxima.kmeans.cluster.seeker_class";
static const std::string KMEANS_CLUSTER_SEEKER_PARAMS =
    "proxima.kmeans.cluster.seeker_params";

//! Mini Batch K-means
static const std::string MINIBATCHKMEANS_CLUSTER_COUNT =
    "proxima.minibatchkmeans.cluster.count";
static const std::string MINIBATCHKMEANS_CLUSTER_SHARD_FACTOR =
    "proxima.minibatchkmeans.cluster.shard_factor";
static const std::string MINIBATCHKMEANS_CLUSTER_EPSILON =
    "proxima.minibatchkmeans.cluster.epsilon";
static const std::string MINIBATCHKMEANS_CLUSTER_MAX_ITERATIONS =
    "proxima.minibatchkmeans.cluster.max_iterations";
static const std::string MINIBATCHKMEANS_CLUSTER_PURGE_EMPTY =
    "proxima.minibatchkmeans.cluster.purge_empty";
static const std::string MINIBATCHKMEANS_CLUSTER_TRY_COUNT =
    "proxima.minibatchkmeans.cluster.try_count";
static const std::string MINIBATCHKMEANS_CLUSTER_BATCH_COUNT =
    "proxima.minibatchkmeans.cluster.batch_count";
static const std::string MINIBATCHKMEANS_CLUSTER_SEEKER_CLASS =
    "proxima.minibatchkmeans.cluster.seeker_class";
static const std::string MINIBATCHKMEANS_CLUSTER_SEEKER_PARAMS =
    "proxima.minibatchkmeans.cluster.seeker_params";

//! K-means++
static const std::string KMEANSPP_CLUSTER_COUNT =
    "proxima.kmeanspp.cluster.count";
static const std::string KMEANSPP_CLUSTER_SHARD_FACTOR =
    "proxima.kmeanspp.cluster.shard_factor";
static const std::string KMEANSPP_CLUSTER_CLASS =
    "proxima.kmeanspp.cluster.class";
static const std::string KMEANSPP_CLUSTER_PARAMS =
    "proxima.kmeanspp.cluster.params";

//! K-MC2
static const std::string KMC2_CLUSTER_COUNT = "proxima.kmc2.cluster.count";
static const std::string KMC2_CLUSTER_SHARD_FACTOR =
    "proxima.kmc2.cluster.shard_factor";
static const std::string KMC2_CLUSTER_MARKOV_CHAIN_LENGTH =
    "proxima.kmc2.cluster.markov_chain_length";
static const std::string KMC2_CLUSTER_ASSUMPTION_FREE =
    "proxima.kmc2.cluster.assumption_free";
static const std::string KMC2_CLUSTER_CLASS = "proxima.kmc2.cluster.class";
static const std::string KMC2_CLUSTER_PARAMS = "proxima.kmc2.cluster.params";

//! Bisecting K-means
static const std::string BIKMEANS_CLUSTER_COUNT =
    "proxima.bikmeans.cluster.count";
static const std::string BIKMEANS_CLUSTER_INIT_COUNT =
    "proxima.bikmeans.cluster.init_count";
static const std::string BIKMEANS_CLUSTER_PURGE_EMPTY =
    "proxima.bikmeans.cluster.purge_empty";
static const std::string BIKMEANS_CLUSTER_FIRST_CLASS =
    "proxima.bikmeans.cluster.first_class";
static const std::string BIKMEANS_CLUSTER_SECOND_CLASS =
    "proxima.bikmeans.cluster.second_class";
static const std::string BIKMEANS_CLUSTER_FIRST_PARAMS =
    "proxima.bikmeans.cluster.first_params";
static const std::string BIKMEANS_CLUSTER_SECOND_PARAMS =
    "proxima.bikmeans.cluster.second_params";

//! K-medoids
static const std::string KMEDOIDS_CLUSTER_COUNT =
    "proxima.kmedoids.cluster.count";
static const std::string KMEDOIDS_CLUSTER_SHARD_FACTOR =
    "proxima.kmedoids.cluster.shard_factor";
static const std::string KMEDOIDS_CLUSTER_EPSILON =
    "proxima.kmedoids.cluster.epsilon";
static const std::string KMEDOIDS_CLUSTER_MAX_ITERATIONS =
    "proxima.kmedoids.cluster.max_iterations";
static const std::string KMEDOIDS_CLUSTER_PURGE_EMPTY =
    "proxima.kmedoids.cluster.purge_empty";
static const std::string KMEDOIDS_CLUSTER_BENCH_RATIO =
    "proxima.kmedoids.cluster.bench_ratio";
static const std::string KMEDOIDS_CLUSTER_ONLY_MEANS =
    "proxima.kmedoids.cluster.only_means";
static const std::string KMEDOIDS_CLUSTER_WITHOUT_MEANS =
    "proxima.kmedoids.cluster.without_means";
static const std::string KMEDOIDS_CLUSTER_SEEKER_CLASS =
    "proxima.kmedoids.cluster.seeker_class";
static const std::string KMEDOIDS_CLUSTER_SEEKER_PARAMS =
    "proxima.kmedoids.cluster.seeker_params";

//! Stratified
static const std::string STRATIFIED_CLUSTER_COUNT =
    "proxima.stratified.cluster.count";
static const std::string STRATIFIED_CLUSTER_FIRST_CLASS =
    "proxima.stratified.cluster.first_class";
static const std::string STRATIFIED_CLUSTER_SECOND_CLASS =
    "proxima.stratified.cluster.second_class";
static const std::string STRATIFIED_CLUSTER_FIRST_COUNT =
    "proxima.stratified.cluster.first_count";
static const std::string STRATIFIED_CLUSTER_SECOND_COUNT =
    "proxima.stratified.cluster.second_count";
static const std::string STRATIFIED_CLUSTER_FIRST_PARAMS =
    "proxima.stratified.cluster.first_params";
static const std::string STRATIFIED_CLUSTER_SECOND_PARAMS =
    "proxima.stratified.cluster.second_params";
static const std::string STRATIFIED_CLUSTER_AUTO_TUNING =
    "proxima.stratified.cluster.auto_tuning";
static const std::string STRATIFIED_CLUSTER_SECOND_POOL_COUNT =
    "proxima.stratified.cluster.second_pool_count";

//! Gap Statistics
static const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MIN =
    "proxima.gapstats.cluster_estimater.k_min";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MAX =
    "proxima.gapstats.cluster_estimater.k_max";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MIN_STEP =
    "proxima.gapstats.cluster_estimater.k_min_step";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_K_MAX_STEP =
    "proxima.gapstats.cluster_estimater.k_max_step";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_TRY_COUNT =
    "proxima.gapstats.cluster_estimater.try_count";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_SHARD_FACTOR =
    "proxima.gapstats.cluster_estimater.shard_factor";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_ENABLE_MC2 =
    "proxima.gapstats.cluster_estimater.enable_mc2";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_MARKOV_CHAIN_LENGTH =
    "proxima.gapstats.cluster_estimater.markov_chain_length";
static const std::string GAPSTATS_CLUSTER_ESTIMATER_CLUSTER_CLASS =
    "proxima.gapstats.cluster_estimater.cluster_class";

static const std::string CLUSTER_TRAINER_SAMPLE_COUNT =
    "proxima.cluster.trainer.sample_count";
static const std::string CLUSTER_TRAINER_SAMPLE_RATIO =
    "proxima.cluster.trainer.sample_ratio";
static const std::string CLUSTER_TRAINER_THREAD_COUNT =
    "proxima.cluster.trainer.thread_count";
static const std::string CLUSTER_TRAINER_FILE_NAME =
    "proxima.cluster.trainer.file_name";
static const std::string CLUSTER_TRAINER_CLASS_NAME =
    "proxima.cluster.trainer.class_name";

static const std::string STRATIFIED_TRAINER_SAMPLE_COUNT =
    "proxima.stratified.trainer.sample_count";
static const std::string STRATIFIED_TRAINER_SAMPLE_RATIO =
    "proxima.stratified.trainer.sample_ratio";
static const std::string STRATIFIED_TRAINER_THREAD_COUNT =
    "proxima.stratified.trainer.thread_count";
static const std::string STRATIFIED_TRAINER_FILE_NAME =
    "proxima.stratified.trainer.file_name";
static const std::string STRATIFIED_TRAINER_CLASS_NAME =
    "proxima.stratified.trainer.class_name";
static const std::string STRATIFIED_TRAINER_CLUSTER_COUNT =
    "proxima.stratified.trainer.cluster_count";
static const std::string STRATIFIED_TRAINER_AUTOAUNE =
    "proxima.stratified.trainer.autotune";
static const std::string STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX =
    "proxima.stratified.trainer.cluster_params_in_level_";

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/kmeans_cluster.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/container/reservoir.h>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "cluster_params.h"
#include "linear_seeker.h"
#include "vector_mean.h"

namespace zvec {
namespace core {

/*! Kmeans Cluster
 */
class KmeansCluster : public IndexCluster {
 public:
  //! Constructor
  KmeansCluster(void) {}

  //! Constructor
  KmeansCluster(size_t iters, bool batch)
      : max_iterations_(iters), batch_(batch) {}

  //! Constructor
  KmeansCluster(bool batch) : batch_(batch) {}

  //! Destructor
  virtual ~KmeansCluster(void) {}

  //! Initialize Cluster
  virtual int init(const IndexMeta &meta, const ailego::Params &params);

  //! Cleanup Cluster
  virtual int cleanup(void);

  //! Reset Cluster
  virtual int reset(void);

  //! Update Cluster
  virtual int update(const ailego::Params &params);

  //! Suggest dividing to K clusters
  virtual void suggest(uint32_t k);

  //! Mount features
  virtual int mount(IndexFeatures::Pointer feats);

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

  //! Classify
  virtual int classify(IndexThreads::Pointer threads,
                       IndexCluster::CentroidList &cents);

  //! Label
  virtual int label(IndexThreads::Pointer threads,
                    const IndexCluster::CentroidList &cents,
                    std::vector<uint32_t> *out);

 protected:
  //! Test if it is valid
  bool is_valid(void) const;

  //! Cluster once
  int clustering(IndexThreads *threads, IndexCluster::CentroidList &cents,
                 double *cost);

  //! Update parameters
  void update_params(const ailego::Params &params);

  //! Init seeker
  int init_seeker(void);

  //! Build seeker
  int build_seeker(const IndexCluster::CentroidList &cents);

  //! Check Centroids
  bool check_centroids(const IndexCluster::CentroidList &cents);

  //! Initialize Centroids
  void init_centroids(size_t count, IndexCluster::CentroidList *out);

  //! Initialize Shard Containers
  void init_containers(size_t shard_count);

  //! Initialize Shard Features Containers
  void init_features_containers(size_t shard_count);

  //! Split Clusters
  void split_clusters(IndexThreads *threads,
                      const IndexCluster::CentroidList &cents);

  //! Update Centroids
  void update_centroids(IndexThreads *threads,
                        IndexCluster::CentroidList &cents);

  //! Update Clusters
  void update_clusters(IndexThreads *threads,
                       const IndexCluster::CentroidList &cents);

  //! Update Clusters' Features
  void update_features(IndexThreads *threads,
                       IndexCluster::CentroidList &cents);

  //! Update Labels
  void update_labels(IndexThreads *threads, std::vector<uint32_t> *labels);

  //! Split Clusters in Thread
  void split_clusters_thread(size_t index_begin, size_t index_end,
                             const IndexThreads *threads);

  //! Update Centroid in Thread
  void update_centroid_thread(size_t column, IndexCluster::CentroidList *out);

  //! Update Cluster in Thread
  void update_cluster_thread(size_t index_begin, size_t index_end,
                             const IndexThreads *threads);

  //! Update Cluster's Features in Thread
  void update_features_thread(size_t column, IndexCluster::CentroidList *out);

  //! Update Labels in Thread
  void update_labels_thread(size_t index_begin, size_t index_end,
                            std::vector<uint32_t> *labels);

 protected:
  //! Members
  IndexMeta meta_{};
  IndexFeatures::Pointer features_{};
  LinearSeeker::Pointer seeker_{};
  std::vector<double> shard_cluster_scores_{};
  std::vector<std::vector<const void *>> shard_cluster_features_{};
  std::shared_ptr<VectorMeanArray> shard_cluster_means_{};
  std::shared_ptr<VectorMeanArray> batch_means_{};
  std::vector<double> batch_scores_{};
  double epsilon_{std::numeric_limits<float>::epsilon()};
  float shard_factor_{16.0f};
  uint32_t max_iterations_{20u};
  uint32_t cluster_count_{0u};
  uint32_t thread_count_{0u};
  bool batch_{false};
  bool purge_empty_{false};
};

/*! Centroid Features
 */
class KmeansCentroidFeatures : public IndexFeatures {
 public:
  //! Constructor
  KmeansCentroidFeatures(const IndexMeta &meta,
                         const IndexCluster::CentroidList &cents)
      : centroids_(cents),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  virtual size_t count(void) const {
    return centroids_.size();
  }

  virtual size_t dimension(void) const {
    return feature_dimension_;
  }

  virtual const void *element(size_t i) const {
    return centroids_[i].feature();
  }

  virtual IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  virtual size_t element_size(void) const {
    return feature_size_;
  }

 private:
  const IndexCluster::CentroidList &centroids_;
  size_t feature_size_;
  size_t feature_dimension_;
  IndexMeta::DataType data_type_;
};

static inline std::shared_ptr<VectorMean> NewVectorMean(const IndexMeta &meta) {
  switch (meta.data_type()) {
    case IndexMeta::DataType::DT_FP16:
      return std::make_shared<NumericalVectorMean<ailego::Float16>>(
          meta.dimension());

    case IndexMeta::DataType::DT_FP32:
      return std::make_shared<NumericalVectorMean<float>>(meta.dimension());

    case IndexMeta::DataType::DT_FP64:
      return std::make_shared<NumericalVectorMean<double>>(meta.dimension());

    case IndexMeta::DataType::DT_INT8:
      return std::make_shared<NumericalVectorMean<int8_t>>(meta.dimension());

    case IndexMeta::DataType::DT_INT4:
      return std::make_shared<NibbleVectorMean<uint8_t>>(meta.dimension());

    case IndexMeta::DataType::DT_INT16:
      return std::make_shared<NumericalVectorMean<int16_t>>(meta.dimension());

    default:
      break;
  }
  // As binary default
  return std::make_shared<BinaryVectorMean>(meta.dimension());
}

static inline std::shared_ptr<VectorMeanArray> NewVectorMeanArray(
    const IndexMeta &meta) {
  switch (meta.data_type()) {
    case IndexMeta::DataType::DT_FP16:
      return std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<ailego::Float16>>>(
          meta.dimension());

    case IndexMeta::DataType::DT_FP32:
      return std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<float>>>(meta.dimension());

    case IndexMeta::DataType::DT_FP64:
      return std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<double>>>(
          meta.dimension());

    case IndexMeta::DataType::DT_INT8:
      return std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<int8_t>>>(
          meta.dimension());

    case IndexMeta::DataType::DT_INT4:
      return std::make_shared<
          GeneralVectorMeanArray<NibbleVectorMean<uint8_t>>>(meta.dimension());

    case IndexMeta::DataType::DT_INT16:
      return std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<int16_t>>>(
          meta.dimension());

    default:
      break;
  }
  // As binary default
  return std::make_shared<GeneralVectorMeanArray<BinaryVectorMean>>(
      meta.dimension());
}

static inline std::shared_ptr<VectorMeanArray> NewVectorMeanArray(
    const IndexMeta &meta, const IndexCluster::CentroidList &cents) {
  switch (meta.data_type()) {
    case IndexMeta::DataType::DT_FP16: {
      auto ptr = std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<ailego::Float16>>>(
          meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const ailego::Float16 *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    case IndexMeta::DataType::DT_FP32: {
      auto ptr =
          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<float>>>(
              meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const float *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    case IndexMeta::DataType::DT_FP64: {
      auto ptr =
          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<double>>>(
              meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const double *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    case IndexMeta::DataType::DT_INT8: {
      auto ptr =
          std::make_shared<GeneralVectorMeanArray<NumericalVectorMean<int8_t>>>(
              meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const int8_t *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    case IndexMeta::DataType::DT_INT4: {
      auto ptr =
          std::make_shared<GeneralVectorMeanArray<NibbleVectorMean<uint8_t>>>(
              meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const uint8_t *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    case IndexMeta::DataType::DT_INT16: {
      auto ptr = std::make_shared<
          GeneralVectorMeanArray<NumericalVectorMean<int16_t>>>(
          meta.dimension());

      for (const auto &it : cents) {
        ptr->emplace(reinterpret_cast<const int16_t *>(it.feature()),
                     meta.dimension(), it.follows());
      }
      return ptr;
    }

    default:
      break;
  }

  // As binary default
  auto ptr = std::make_shared<GeneralVectorMeanArray<BinaryVectorMean>>(
      meta.dimension());

  for (const auto &it : cents) {
    ptr->emplace(it.feature(), meta.dimension(), it.follows());
  }
  return ptr;
}

static inline double CalculateSSE(const IndexCluster::CentroidList &cents) {
  double accum = 0.0;
  for (const auto &it : cents) {
    accum += it.score();
  }
  return accum;
}

static inline void PurgeCentroids(IndexCluster::CentroidList &cents,
                                  bool cutting) {
  size_t index = 0;
  size_t tamp = cents.size();

  while (index < tamp) {
    if (cents[index].follows() == 0) {
      size_t last_index = tamp - 1;

      if (index != last_index) {
        std::swap(cents[index], cents[last_index]);
      }
      tamp = last_index;
      continue;
    }
    ++index;
  }
  if (cutting) {
    cents.resize(tamp);
  }
}

int KmeansCluster::init(const IndexMeta &meta, const ailego::Params &params) {
  meta_ = meta;
  this->update_params(params);

  return this->init_seeker();
}

int KmeansCluster::cleanup(void) {
  features_.reset();
  shard_cluster_scores_.clear();
  shard_cluster_features_.clear();
  shard_cluster_means_.reset();
  batch_means_.reset();
  batch_scores_.clear();
  seeker_->cleanup();
  return 0;
}

int KmeansCluster::reset(void) {
  features_.reset();
  shard_cluster_scores_.clear();
  shard_cluster_features_.clear();
  shard_cluster_means_->clear();
  batch_means_->clear();
  batch_scores_.clear();
  seeker_->reset();
  return 0;
}

int KmeansCluster::update(const ailego::Params &params) {
  this->update_params(params);
  return 0;
}

void KmeansCluster::suggest(uint32_t k) {
  cluster_count_ = k;
}

int KmeansCluster::mount(IndexFeatures::Pointer feats) {
  if (!feats) {
    return IndexError_InvalidArgument;
  }
  if (!feats->is_matched(meta_)) {
    return IndexError_Mismatch;
  }

  // Check dimension
  auto data_type = meta_.data_type();
  switch (data_type) {
    case IndexMeta::DataType::DT_INT4:
      if (feats->dimension() % 2 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of int4 "
            "must be an integer multiple of 2).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    case IndexMeta::DataType::DT_BINARY32:
      if (feats->dimension() % 32 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of binary32 "
            "must be an integer multiple of 32).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    case IndexMeta::DataType::DT_BINARY64:
      if (feats->dimension() % 64 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of binary64 "
            "must be an integer multiple of 64).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    default:
      break;
  }

  features_ = std::move(feats);
  return 0;
}

int KmeansCluster::cluster(IndexThreads::Pointer threads,
                           IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  if (cents.empty()) {
    if (cluster_count_ == 0) {
      LOG_ERROR("The count of cluster is unknown.");
      return IndexError_NoReady;
    }
    this->init_centroids(cluster_count_, &cents);
  }

  if (batch_) {
    batch_means_ = NewVectorMeanArray(meta_, cents);
    batch_scores_.clear();
    for (const auto &it : cents) {
      batch_scores_.push_back(it.score());
    }
  }

  double cost = 0.0;

  // we need to do clustering and update the centroids' follows, even if
  // cents.size() == 1. Otherwise, the centroid with empty follows will be
  // removed if purge_empty enabled
  for (uint32_t i = 0; (i < max_iterations_) && (cents.size() > 0); ++i) {
    double new_cost, new_epsilon;

    int result = this->clustering(threads.get(), cents, &new_cost);
    if (result != 0) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return result;
    }

    new_epsilon = new_cost - cost;
    LOG_DEBUG("(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f",
              i, cents.size(), features_->count(),
              (size_t)stamp.milli_seconds(), cost, new_cost, new_epsilon);
    stamp.reset();

    new_epsilon = std::abs(new_epsilon);
    if (new_epsilon < epsilon_) {
      break;
    }
    cost = new_cost;
  }

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

int KmeansCluster::classify(IndexThreads::Pointer threads,
                            IndexCluster::CentroidList &cents) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  int result = this->build_seeker(cents);
  if (result != 0) {
    LOG_ERROR("Failed to build the seeker.");
    return result;
  }

  this->update_clusters(threads.get(), cents);
  this->update_features(threads.get(), cents);
  return 0;
}

int KmeansCluster::label(IndexThreads::Pointer threads,
                         const IndexCluster::CentroidList &cents,
                         std::vector<uint32_t> *out) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  int result = this->build_seeker(cents);
  if (result != 0) {
    LOG_ERROR("Failed to build the seeker.");
    return result;
  }

  this->update_labels(threads.get(), out);
  return 0;
}

bool KmeansCluster::is_valid(void) const {
  if (!seeker_ || !features_ || !features_->count()) {
    return false;
  }
  return true;
}

int KmeansCluster::clustering(IndexThreads *threads,
                              IndexCluster::CentroidList &cents, double *cost) {
  int result = this->build_seeker(cents);
  if (result != 0) {
    LOG_ERROR("Failed to build the seeker.");
    return result;
  }

  this->split_clusters(threads, cents);
  this->update_centroids(threads, cents);
  *cost = CalculateSSE(cents);
  return 0;
}

void KmeansCluster::update_params(const ailego::Params &params) {
  params.get(GENERAL_THREAD_COUNT, &thread_count_);
  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);
  params.get(KMEANS_CLUSTER_COUNT, &cluster_count_);
  params.get(KMEANS_CLUSTER_SHARD_FACTOR, &shard_factor_);
  params.get(KMEANS_CLUSTER_EPSILON, &epsilon_);
  params.get(KMEANS_CLUSTER_MAX_ITERATIONS, &max_iterations_);
  params.get(KMEANS_CLUSTER_BATCH, &batch_);
  params.get(KMEANS_CLUSTER_PURGE_EMPTY, &purge_empty_);
}

int KmeansCluster::init_seeker(void) {
  seeker_.reset(new (std::nothrow) LinearSeeker);
  if (!seeker_) {
    LOG_ERROR("Failed to create linear seeker.");
    return IndexError_NoMemory;
  }

  int result = seeker_->init(meta_);
  if (result != 0) {
    LOG_ERROR("Failed to initialize linear seeker.");
    return result;
  }

  return 0;
}

int KmeansCluster::build_seeker(const IndexCluster::CentroidList &cents) {
  int result =
      seeker_->mount(std::make_shared<KmeansCentroidFeatures>(meta_, cents));
  if (result != 0) {
    LOG_ERROR("Failed to mount features for linear seeker.");
    return result;
  }

  return 0;
}

bool KmeansCluster::check_centroids(const IndexCluster::CentroidList &cents) {
  for (const auto &it : cents) {
    if (it.size() != meta_.element_size()) {
      return false;
    }
  }
  return true;
}

void KmeansCluster::init_centroids(size_t count,
                                   IndexCluster::CentroidList *out) {
  size_t feature_size = features_->element_size();
  size_t features_count = features_->count();
  size_t sample_count = std::min<size_t>(count, features_count);

  ailego::Reservoir<size_t> sampler(sample_count);
  for (size_t i = 0; i < features_count; ++i) {
    sampler.fill(i);
  }

  // Save centroids
  out->reserve(sampler.pool().size());
  for (auto i : sampler.pool()) {
    out->emplace_back(features_->element(i), feature_size);
  }
}

void KmeansCluster::init_containers(size_t shard_count) {
  if (!shard_cluster_means_) {
    shard_cluster_means_ = NewVectorMeanArray(meta_);
  }
  shard_cluster_means_->clear();
  shard_cluster_means_->resize(shard_count);
  shard_cluster_scores_.clear();
  shard_cluster_scores_.resize(shard_count);
}

void KmeansCluster::init_features_containers(size_t shard_count) {
  shard_cluster_features_.resize(shard_count);
  for (auto &features : shard_cluster_features_) {
    features.clear();
  }
}

void KmeansCluster::split_clusters(IndexThreads *threads,
                                   const IndexCluster::CentroidList &cents) {
  // Initilize containers
  this->init_containers(threads->count() * cents.size());
  auto task_group = threads->make_group();

  // Initilize base information
  size_t features_count = features_->count();
  size_t shard_count = std::max<size_t>(
      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);
  size_t fregment_count = (features_count + shard_count - 1) / shard_count;

  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);
       ++i) {
    size_t next_index = index + fregment_count;
    if (next_index > features_count) {
      next_index = features_count;
    }

    // Process in work thread
    task_group->submit(
        ailego::Closure::New(this, &KmeansCluster::split_clusters_thread, index,
                             next_index, threads));

    // Next index
    index = next_index;
  }
  task_group->wait_finish();
}

void KmeansCluster::update_centroids(IndexThreads *threads,
                                     IndexCluster::CentroidList &cents) {
  auto task_group = threads->make_group();
  for (size_t i = 0; i < cents.size(); ++i) {
    task_group->submit(ailego::Closure::New(
        this, &KmeansCluster::update_centroid_thread, i, &cents));
  }
  task_group->wait_finish();
}

void KmeansCluster::update_clusters(IndexThreads *threads,
                                    const IndexCluster::CentroidList &cents) {
  // Initilize containers
  this->init_features_containers(threads->count() * cents.size());
  auto task_group = threads->make_group();

  size_t features_count = features_->count();
  size_t shard_count = std::max<size_t>(
      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);
  size_t fregment_count = (features_count + shard_count - 1) / shard_count;

  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);
       ++i) {
    size_t next_index = index + fregment_count;
    if (next_index > features_count) {
      next_index = features_count;
    }
    // Process in work thread
    task_group->submit(
        ailego::Closure::New(this, &KmeansCluster::update_cluster_thread, index,
                             next_index, threads));

    // Next index
    index = next_index;
  }
  task_group->wait_finish();
}

void KmeansCluster::update_features(IndexThreads *threads,
                                    IndexCluster::CentroidList &cents) {
  auto task_group = threads->make_group();
  for (size_t i = 0; i < cents.size(); ++i) {
    // Process in work thread
    task_group->submit(ailego::Closure::New(
        this, &KmeansCluster::update_features_thread, i, &cents));
  }
  task_group->wait_finish();
}

void KmeansCluster::update_labels(IndexThreads *threads,
                                  std::vector<uint32_t> *labels) {
  size_t features_count = features_->count();
  size_t shard_count = std::max<size_t>(
      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);
  size_t fregment_count = (features_count + shard_count - 1) / shard_count;
  auto task_group = threads->make_group();

  // Prepare buffer
  labels->resize(features_count);

  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);
       ++i) {
    size_t next_index = index + fregment_count;
    if (next_index > features_count) {
      next_index = features_count;
    }

    // Process in work thread
    task_group->submit(ailego::Closure::New(
        this, &KmeansCluster::update_labels_thread, index, next_index, labels));

    // Next index
    index = next_index;
  }
  task_group->wait_finish();
}

void KmeansCluster::split_clusters_thread(size_t index_begin, size_t index_end,
                                          const IndexThreads *threads) {
  size_t feature_size = features_->element_size();
  size_t thread_offset = threads->indexof_this() * seeker_->original()->count();

  for (size_t i = index_begin; i != index_end; ++i) {
    const void *feat = features_->element(i);
    LinearSeeker::Document result(0, std::numeric_limits<float>::max());

    // ignore error
    seeker_->seek(feat, meta_.element_size(), &result);

    size_t sel_column = thread_offset + result.index;
    shard_cluster_scores_[sel_column] += result.score;
    shard_cluster_means_->at(sel_column).plus(feat, feature_size);
  }
}

void KmeansCluster::update_centroid_thread(size_t column,
                                           IndexCluster::CentroidList *out) {
  size_t cluster_count = out->size();
  double cluster_score = 0.0;

  // Create Accumulator
  std::shared_ptr<VectorMean> accum = NewVectorMean(meta_);
  if (batch_) {
    cluster_score += batch_scores_[column];
    accum->merge(batch_means_->at(column));
  }

  // Compute the score of centroid
  for (size_t i = column; i < shard_cluster_scores_.size();
       i += cluster_count) {
    cluster_score += shard_cluster_scores_[i];
    accum->merge(shard_cluster_means_->at(i));
  }

  // Update centroid
  IndexCluster::Centroid *centroid = &(out->at(column));
  centroid->set_score(cluster_score);
  centroid->set_follows(accum->count());
  accum->mean(centroid->mutable_buffer());
}

void KmeansCluster::update_cluster_thread(size_t index_begin, size_t index_end,
                                          const IndexThreads *threads) {
  size_t thread_offset = threads->indexof_this() * seeker_->original()->count();

  for (size_t i = index_begin; i != index_end; ++i) {
    const void *feat = features_->element(i);
    LinearSeeker::Document result(0, std::numeric_limits<float>::max());

    // ignore error
    seeker_->seek(feat, meta_.element_size(), &result);

    size_t sel_column = thread_offset + result.index;
    shard_cluster_features_[sel_column].emplace_back(feat);
  }
}

void KmeansCluster::update_features_thread(size_t column,
                                           IndexCluster::CentroidList *out) {
  size_t cluster_count = out->size();
  size_t cluster_follows = 0u;

  // Compute the follows of cluster
  for (size_t i = column; i < shard_cluster_features_.size();
       i += cluster_count) {
    cluster_follows += shard_cluster_features_[i].size();
  }

  // Merge all features in cluster
  std::vector<const void *> &cluster_features =
      *(out->at(column).mutable_similars());
  cluster_features.resize(cluster_follows);

  for (size_t i = column, j = 0; i < shard_cluster_features_.size();
       i += cluster_count) {
    const std::vector<const void *> &it = shard_cluster_features_[i];
    std::memcpy(&cluster_features[j], it.data(), it.size() * sizeof(void *));
    j += it.size();
  }
}

void KmeansCluster::update_labels_thread(size_t index_begin, size_t index_end,
                                         std::vector<uint32_t> *labels) {
  for (size_t i = index_begin; i != index_end; ++i) {
    const void *feat = features_->element(i);
    LinearSeeker::Document result(0, std::numeric_limits<float>::max());

    // ignore error
    seeker_->seek(feat, meta_.element_size(), &result);
    (*labels)[i] = static_cast<uint32_t>(result.index);
  }
}

INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(KmeansCluster, KmeansCluster, false);
INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(BatchKmeansCluster, KmeansCluster, true);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/linear_seeker.cc
================================================

// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "linear_seeker.h"

namespace zvec {
namespace core {

int LinearSeeker::seek(const void *query, size_t len, Document *out) {
  if (ailego_unlikely(!query || !out || meta_.element_size() != len)) {
    return IndexError_InvalidArgument;
  }

  float sel_score = std::numeric_limits<float>::max();
  uint32_t sel_column = 0;
  uint32_t total = static_cast<uint32_t>(features_->count());

  for (uint32_t i = 0; i < total; ++i) {
    float score = 0.0f;

    distance_func_(features_->element(i), query, meta_.dimension(), &score);
    if (score < sel_score) {
      sel_score = score;
      sel_column = i;
    }
  }

  out->index = sel_column;
  out->score = sel_score;
  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/linear_seeker.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "seeker.h"

namespace zvec {
namespace core {

/*! Linear Seeker
 */
class LinearSeeker : public Seeker {
 public:
  typedef std::shared_ptr<LinearSeeker> Pointer;

  //! Constructor
  LinearSeeker(void) : meta_(), metric_(), features_() {}

  //! Destructor
  ~LinearSeeker(void) {}

  //! Initialize Seeker
  int init(const IndexMeta &meta) override {
    meta_ = meta;

    metric_ = IndexFactory::CreateMetric(meta_.metric_name());
    if (!metric_) {
      LOG_ERROR("Create Metric %s failed.", meta_.metric_name().c_str());

      return IndexError_Unsupported;
    }
    int ret = metric_->init(meta_, meta_.metric_params());
    if (ret != 0) {
      LOG_ERROR("IndexMetric init failed wit ret %d.", ret);

      return ret;
    }
    distance_func_ = metric_->distance_matrix(1, 1);
    if (!distance_func_) {
      LOG_ERROR("DistanceMatrix function is nullptr.");

      return IndexError_Unsupported;
    }
    return 0;
  }

  //! Cleanup Seeker
  int cleanup(void) override {
    features_.reset();
    return 0;
  }

  //! Reset Seeker
  int reset(void) override {
    features_.reset();
    return 0;
  }

  //! Mount features
  int mount(IndexFeatures::Pointer feats) override {
    if (!feats) {
      return IndexError_InvalidArgument;
    }
    if (!feats->is_matched(meta_)) {
      return IndexError_Mismatch;
    }
    features_ = std::move(feats);
    return 0;
  }

  //! Seek (TOP 1 Document)
  int seek(const void *query, size_t len, Document *out) override;

  //! Retrieve the original features
  IndexFeatures::Pointer original(void) const override {
    return features_;
  }

 private:
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};
  IndexFeatures::Pointer features_{};
  IndexMetric::MatrixDistance distance_func_{nullptr};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/opt_kmeans_cluster.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/algorithm/kmeans.h>
#include <ailego/container/reservoir.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "cluster_params.h"

namespace zvec {
namespace core {

/*! Optimize K-Means cluster algorithm
 */
class OptKmeansAlgorithm : public IndexCluster {
 public:
  //! Constructor
  OptKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~OptKmeansAlgorithm(void) {}

  //! Initialize Cluster
  int init(const IndexMeta &meta, const ailego::Params &params);

  //! Mount features
  virtual int mount(IndexFeatures::Pointer feats);

  //! Suggest dividing to K clusters
  virtual void suggest(uint32_t k);

  //! Classify
  virtual int classify(IndexThreads::Pointer threads,
                       IndexCluster::CentroidList &cents);

  //! Label
  virtual int label(IndexThreads::Pointer threads,
                    const IndexCluster::CentroidList &cents,
                    std::vector<uint32_t> *out);

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents) = 0;

  //! Cleanup Cluster
  virtual int cleanup(void);

  //! Reset Cluster
  virtual int reset(void);

  //! Update Cluster
  virtual int update(const ailego::Params &params);

 protected:
  //! Update parameters
  void update_params(const ailego::Params &params);

  //! Init Kmeans Algorithm
  int init_algorithm();

  //! Init Distance function
  int init_distance_func();

  //! Check Centroids
  bool check_centroids(const IndexCluster::CentroidList &cents);

  //! Test if it is valid
  bool is_valid(void) const;

  //! Update Clusters
  void update_clusters(IndexThreads *threads,
                       const IndexCluster::CentroidList &cents);

  //! Update Cluster in Thread
  void update_cluster_thread(size_t index_begin, size_t index_end,
                             const IndexThreads *threads,
                             const IndexCluster::CentroidList &cents);

  //! Initialize Shard Features Containers
  void init_features_containers(size_t shard_count);

  //! Update Clusters' Features
  void update_features(IndexThreads *threads,
                       IndexCluster::CentroidList &cents);

  //! Update Cluster's Features in Thread
  void update_features_thread(size_t column, IndexCluster::CentroidList *out);

  //! Update Labels
  void update_labels(IndexThreads *threads, std::vector<uint32_t> *labels,
                     const IndexCluster::CentroidList &cents);

  //! Update Labels in Thread
  void update_labels_thread(size_t index_begin, size_t index_end,
                            std::vector<uint32_t> *labels,
                            const IndexCluster::CentroidList &cents);

  //! Initialize Centroids
  void init_centroids(size_t count, IndexCluster::CentroidList *out);

 protected:
  uint32_t cluster_count_{0u};
  uint32_t thread_count_{0u};
  uint32_t max_iterations_{20u};
  double epsilon_{std::numeric_limits<float>::epsilon()};
  float shard_factor_{16.0f};
  bool purge_empty_{false};
  bool assumption_free_{false};
  uint32_t markov_chain_length_{32};
  IndexMeta meta_{};
  IndexFeatures::Pointer features_{};
  std::vector<std::vector<const void *>> shard_cluster_features_{};
  IndexMetric::MatrixDistance distance_func_{nullptr};
};

bool OptKmeansAlgorithm::is_valid(void) const {
  if (!features_ || !features_->count()) {
    return false;
  }
  return true;
}

bool OptKmeansAlgorithm::check_centroids(
    const IndexCluster::CentroidList &cents) {
  for (const auto &it : cents) {
    if (it.size() != meta_.element_size()) {
      return false;
    }
  }
  return true;
}

void OptKmeansAlgorithm::update_params(const ailego::Params &params) {
  params.get(GENERAL_THREAD_COUNT, &thread_count_);
  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);
  params.get(OPTKMEANS_CLUSTER_COUNT, &cluster_count_);
  params.get(OPTKMEANS_CLUSTER_SHARD_FACTOR, &shard_factor_);
  params.get(OPTKMEANS_CLUSTER_EPSILON, &epsilon_);
  params.get(OPTKMEANS_CLUSTER_MAX_ITERATIONS, &max_iterations_);
  params.get(OPTKMEANS_CLUSTER_PURGE_EMPTY, &purge_empty_);
  params.get(OPTKMEANS_CLUSTER_MARKOV_CHAIN_LENGTH, &markov_chain_length_);
  params.get(OPTKMEANS_CLUSTER_ASSUMPTION_FREE, &assumption_free_);
}

int OptKmeansAlgorithm::init_distance_func() {
  IndexMetric::Pointer metric_{};
  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("Create Metric %s failed.", meta_.metric_name().c_str());
    return IndexError_Unsupported;
  }
  int ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("IndexMetric init failed wit ret %d.", ret);
    return ret;
  }
  distance_func_ = metric_->distance_matrix(1, 1);
  if (!distance_func_) {
    LOG_ERROR("DistanceMatrix function is nullptr.");
    return IndexError_Unsupported;
  }
  return 0;
}

void OptKmeansAlgorithm::update_clusters(
    IndexThreads *threads, const IndexCluster::CentroidList &cents) {
  // Initilize containers
  this->init_features_containers(threads->count() * cents.size());
  auto task_group = threads->make_group();

  size_t features_count = features_->count();
  size_t shard_count = std::max<size_t>(
      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);
  size_t fregment_count = (features_count + shard_count - 1) / shard_count;

  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);
       ++i) {
    size_t next_index = index + fregment_count;
    if (next_index > features_count) {
      next_index = features_count;
    }

    // Process in work thread·
    task_group->submit(
        ailego::Closure::New(this, &OptKmeansAlgorithm::update_cluster_thread,
                             index, next_index, threads, cents));

    // Next index
    index = next_index;
  }
  task_group->wait_finish();
}

void OptKmeansAlgorithm::update_cluster_thread(
    size_t index_begin, size_t index_end, const IndexThreads *threads,
    const IndexCluster::CentroidList &cents) {
  size_t thread_offset = threads->indexof_this() * cents.size();

  for (size_t i = index_begin; i != index_end; ++i) {
    const void *feat = features_->element(i);
    uint32_t sel_index = 0;
    float sel_score = std::numeric_limits<float>::max();

    // todo: get min distance
    uint32_t total = static_cast<uint32_t>(cents.size());
    for (uint32_t j = 0; j < total; ++j) {
      float score = 0.0f;

      distance_func_(cents[j].feature(), feat, meta_.dimension(), &score);
      if (score < sel_score) {
        sel_score = score;
        sel_index = j;
      }
    }

    size_t sel_column = thread_offset + sel_index;
    shard_cluster_features_[sel_column].emplace_back(feat);
  }
}

void OptKmeansAlgorithm::init_features_containers(size_t shard_count) {
  shard_cluster_features_.resize(shard_count);
  for (auto &features : shard_cluster_features_) {
    features.clear();
  }
}

void OptKmeansAlgorithm::update_features(IndexThreads *threads,
                                         IndexCluster::CentroidList &cents) {
  auto task_group = threads->make_group();
  for (size_t i = 0; i < cents.size(); ++i) {
    // Process in work thread
    task_group->submit(ailego::Closure::New(
        this, &OptKmeansAlgorithm::update_features_thread, i, &cents));
  }
  task_group->wait_finish();
}

void OptKmeansAlgorithm::update_labels(
    IndexThreads *threads, std::vector<uint32_t> *labels,
    const IndexCluster::CentroidList &cents) {
  size_t features_count = features_->count();
  size_t shard_count = std::max<size_t>(
      static_cast<size_t>(std::ceil(threads->count() * shard_factor_)), 1u);
  size_t fregment_count = (features_count + shard_count - 1) / shard_count;
  auto task_group = threads->make_group();

  // Prepare buffer
  labels->resize(features_count);

  for (size_t i = 0, index = 0; (i != shard_count) && (index < features_count);
       ++i) {
    size_t next_index = index + fregment_count;
    if (next_index > features_count) {
      next_index = features_count;
    }

    // Process in work thread
    task_group->submit(
        ailego::Closure::New(this, &OptKmeansAlgorithm::update_labels_thread,
                             index, next_index, labels, cents));

    // Next index
    index = next_index;
  }
  task_group->wait_finish();
}

void OptKmeansAlgorithm::update_labels_thread(
    size_t index_begin, size_t index_end, std::vector<uint32_t> *labels,
    const IndexCluster::CentroidList &cents) {
  for (size_t i = index_begin; i != index_end; ++i) {
    const void *feat = features_->element(i);

    uint32_t sel_index = 0;
    float sel_score = std::numeric_limits<float>::max();

    // todo: get min distance
    uint32_t total = static_cast<uint32_t>(cents.size());
    for (uint32_t j = 0; j < total; ++j) {
      float score = 0.0f;

      distance_func_(cents[j].feature(), feat, meta_.dimension(), &score);
      if (score < sel_score) {
        sel_score = score;
        sel_index = j;
      }
    }

    (*labels)[i] = static_cast<uint32_t>(sel_index);
  }
}

void OptKmeansAlgorithm::init_centroids(size_t count,
                                        IndexCluster::CentroidList *out) {
  // Just resize, because the get random centroid step is done by cluster_once
  out->resize(count);
}

void OptKmeansAlgorithm::update_features_thread(
    size_t column, IndexCluster::CentroidList *out) {
  size_t cluster_count = out->size();
  size_t cluster_follows = 0u;

  // Compute the follows of cluster
  for (size_t i = column; i < shard_cluster_features_.size();
       i += cluster_count) {
    cluster_follows += shard_cluster_features_[i].size();
  }

  // Merge all features in cluster
  std::vector<const void *> &cluster_features =
      *(out->at(column).mutable_similars());
  cluster_features.resize(cluster_follows);

  for (size_t i = column, j = 0; i < shard_cluster_features_.size();
       i += cluster_count) {
    const std::vector<const void *> &it = shard_cluster_features_[i];
    std::memcpy(&cluster_features[j], it.data(), it.size() * sizeof(void *));
    j += it.size();
  }
}

static inline void PurgeCentroids(IndexCluster::CentroidList &cents,
                                  bool cutting) {
  size_t index = 0;
  size_t tamp = cents.size();

  while (index < tamp) {
    if (cents[index].follows() == 0) {
      size_t last_index = tamp - 1;

      if (index != last_index) {
        std::swap(cents[index], cents[last_index]);
      }
      tamp = last_index;
      continue;
    }
    ++index;
  }
  if (cutting) {
    cents.resize(tamp);
  }
}

int OptKmeansAlgorithm::init(const IndexMeta &meta,
                             const ailego::Params &params) {
  meta_ = meta;
  this->update_params(params);

  return init_distance_func();
}

int OptKmeansAlgorithm::mount(IndexFeatures::Pointer feats) {
  if (!feats) {
    return IndexError_InvalidArgument;
  }
  if (!feats->is_matched(meta_)) {
    return IndexError_Mismatch;
  }

  // Check dimension
  auto type_ = meta_.data_type();
  switch (type_) {
    case IndexMeta::DataType::DT_INT4:
      if (feats->dimension() % 8 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of int4 "
            "must be an integer multiple of 8).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    case IndexMeta::DataType::DT_INT8:
      if (feats->dimension() % 4 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of int8 "
            "must be an integer multiple of 4).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    case IndexMeta::DataType::DT_BINARY32:
    case IndexMeta::DataType::DT_BINARY64:
      if (feats->dimension() % 32 != 0) {
        LOG_ERROR(
            "Unsupported feature dimension %zu (dimension of binary "
            "must be an integer multiple of 32).",
            feats->dimension());
        return IndexError_Mismatch;
      }
      break;
    default:
      break;
  }

  features_ = std::move(feats);
  return 0;
}

void OptKmeansAlgorithm::suggest(uint32_t k) {
  cluster_count_ = k;
}

int OptKmeansAlgorithm::classify(IndexThreads::Pointer threads,
                                 IndexCluster::CentroidList &cents) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  this->update_clusters(threads.get(), cents);
  this->update_features(threads.get(), cents);
  return 0;
}

int OptKmeansAlgorithm::label(IndexThreads::Pointer threads,
                              const IndexCluster::CentroidList &cents,
                              std::vector<uint32_t> *out) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  this->update_labels(threads.get(), out, cents);
  return 0;
}

int OptKmeansAlgorithm::update(const ailego::Params &params) {
  this->update_params(params);
  // algorithm_->reset(cluster_count_);
  return 0;
}

int OptKmeansAlgorithm::reset(void) {
  features_.reset();
  shard_cluster_features_.clear();

  return 0;
}

int OptKmeansAlgorithm::cleanup(void) {
  features_.reset();
  shard_cluster_features_.clear();

  return 0;
}


/*! Numerical K-Means cluster algorithm
 */
template <typename T>
class NumericalKmeansAlgorithm : public OptKmeansAlgorithm {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(ailego::IsArithmetic<ValueType>::value,
                "ValueType must be arithmetic");

  //! Constructor
  NumericalKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~NumericalKmeansAlgorithm(void) {}

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

 protected:
  void update_centroids(
      IndexCluster::CentroidList &cents,
      const ailego::NumericalKmeans<T, IndexThreads> &algorithm);
};

template <typename T>
void NumericalKmeansAlgorithm<T>::update_centroids(
    IndexCluster::CentroidList &cents,
    const ailego::NumericalKmeans<T, IndexThreads> &algorithm) {
  this->init_centroids(algorithm.centroids().count(), &cents);
  for (size_t i = 0; i < cents.size(); ++i) {
    IndexCluster::Centroid *centroid = &(cents.at(i));
    centroid->set_score(algorithm.context().clusters()[i].cost());
    centroid->set_follows(algorithm.context().clusters()[i].count());
    centroid->set_feature(algorithm.centroids()[i],
                          meta_.dimension() * sizeof(T));
  }
}

template <typename T>
int NumericalKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,
                                         IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  // get cluster algorithm
  size_t centroid_count =
      cents.empty()
          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))
          : cents.size();
  if (centroid_count == 0) {
    LOG_ERROR("The count of cluster is unknown.");
    return IndexError_NoReady;
  }
  ailego::NumericalKmeans<T, IndexThreads> algorithm(centroid_count,
                                                     meta_.dimension());

  // mount features into algorithm
  auto features_count = features_->count();
  auto dim = meta_.dimension();

  algorithm.feature_matrix_reserve(features_count);

  for (size_t i = 0; i < features_count; ++i) {
    auto vec = reinterpret_cast<const T *>(features_->element(i));
    algorithm.append(vec, dim);
  }

  if (!cents.empty()) {
    auto centroids = algorithm.mutable_centroids();
    centroids->reserve(cents.size());
    for (const auto &it : cents) {
      centroids->append(reinterpret_cast<const T *>(it.feature()),
                        meta_.dimension());
    }
  } else {
    ailego::Kmc2CentroidsGenerator<
        ailego::NumericalKmeans<ValueType, IndexThreads>, IndexThreads>
        g;
    g.set_chain_length(markov_chain_length_);
    g.set_assumption_free(assumption_free_);
    algorithm.init_centroids(*threads, g);
  }

  double cost = 0.0;

  for (uint32_t i = 0; i < max_iterations_; ++i) {
    double old_cost, new_epsilon;
    old_cost = cost;

    bool result = algorithm.cluster_once(*threads, &cost);
    if (result != true) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return -1;
    }

    new_epsilon = std::abs(cost - old_cost);
    LOG_DEBUG("(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f",
              i, algorithm.centroids().count(), features_->count(),
              (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);
    stamp.reset();

    if (new_epsilon < epsilon_) {
      break;
    }
  }

  // update_centroids(cents);
  update_centroids(cents, algorithm);

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

/*! Nibble K-Means cluster algorithm
 */
template <typename T>
class NibbleKmeansAlgorithm : public OptKmeansAlgorithm {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(ailego::IsArithmetic<ValueType>::value,
                "ValueType must be arithmetic");

  //! Constructor
  NibbleKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~NibbleKmeansAlgorithm(void) {}

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

 protected:
  //! update centroids
  void update_centroids(IndexCluster::CentroidList &cents,
                        const ailego::NibbleKmeans<T, IndexThreads> &algorithm);
};

template <typename T>
void NibbleKmeansAlgorithm<T>::update_centroids(
    IndexCluster::CentroidList &cents,
    const ailego::NibbleKmeans<T, IndexThreads> &algorithm) {
  this->init_centroids(algorithm.centroids().count(), &cents);
  for (size_t i = 0; i < cents.size(); ++i) {
    IndexCluster::Centroid *centroid = &(cents.at(i));
    centroid->set_score(algorithm.context().clusters()[i].cost());
    centroid->set_follows(algorithm.context().clusters()[i].count());
    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 1));
  }
}

template <typename T>
int NibbleKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,
                                      IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  // get cluster algorithm
  size_t centroid_count =
      cents.empty()
          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))
          : cents.size();
  if (centroid_count == 0) {
    LOG_ERROR("The count of cluster is unknown.");
    return IndexError_NoReady;
  }
  ailego::NibbleKmeans<T, IndexThreads> algorithm(centroid_count,
                                                  meta_.dimension());

  // mount features into algorithm
  auto features_count = features_->count();
  auto dim = meta_.dimension();
  for (size_t i = 0; i < features_count; ++i) {
    auto vec = reinterpret_cast<const typename std::make_unsigned<T>::type *>(
        features_->element(i));
    algorithm.append(vec, dim);
  }

  if (!cents.empty()) {
    auto centroids = algorithm.mutable_centroids();
    centroids->reserve(cents.size());
    for (const auto &it : cents) {
      centroids->append(
          reinterpret_cast<const typename std::make_unsigned<T>::type *>(
              it.feature()),
          size_t(meta_.dimension()));
    }
  } else {
    ailego::Kmc2CentroidsGenerator<
        ailego::NibbleKmeans<ValueType, IndexThreads>, IndexThreads>
        g;
    g.set_chain_length(markov_chain_length_);
    g.set_assumption_free(assumption_free_);
    algorithm.init_centroids(*threads, g);
  }

  double cost = 0.0;

  for (uint32_t i = 0; i < max_iterations_; ++i) {
    double old_cost, new_epsilon;
    old_cost = cost;

    bool result = algorithm.cluster_once(*threads, &cost);
    if (result != true) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return -1;
    }

    new_epsilon = std::abs(cost - old_cost);
    LOG_DEBUG(
        "(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> "
        "%f = %f",
        i, algorithm.centroids().count(), features_->count(),
        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);
    stamp.reset();

    if (new_epsilon < epsilon_) {
      break;
    }
  }

  // update centroids
  update_centroids(cents, algorithm);

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

/*! Binary K-Means cluster algorithm
 */
template <typename T>
class BinaryKmeansAlgorithm : public OptKmeansAlgorithm {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(ailego::IsArithmetic<ValueType>::value,
                "ValueType must be arithmetic");

  //! Constructor
  BinaryKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~BinaryKmeansAlgorithm(void) {}

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

 protected:
  //! update centroids
  void update_centroids(IndexCluster::CentroidList &cents,
                        const ailego::BinaryKmeans<T, IndexThreads> &algorithm);
};

template <typename T>
void BinaryKmeansAlgorithm<T>::update_centroids(
    IndexCluster::CentroidList &cents,
    const ailego::BinaryKmeans<T, IndexThreads> &algorithm) {
  this->init_centroids(algorithm.centroids().count(), &cents);
  for (size_t i = 0; i < cents.size(); ++i) {
    IndexCluster::Centroid *centroid = &(cents.at(i));
    centroid->set_score(algorithm.context().clusters()[i].cost());
    centroid->set_follows(algorithm.context().clusters()[i].count());
    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 3));
  }
}

template <typename T>
int BinaryKmeansAlgorithm<T>::cluster(IndexThreads::Pointer threads,
                                      IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  // get cluster algorithm
  size_t centroid_count =
      cents.empty()
          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))
          : cents.size();
  if (centroid_count == 0) {
    LOG_ERROR("The count of cluster is unknown.");
    return IndexError_NoReady;
  }
  ailego::BinaryKmeans<T, IndexThreads> algorithm(centroid_count,
                                                  meta_.dimension());

  // mount features into algorithm
  auto features_count = features_->count();
  auto dim = meta_.dimension();
  for (size_t i = 0; i < features_count; ++i) {
    auto vec = reinterpret_cast<const T *>(features_->element(i));
    algorithm.append(vec, dim);
  }

  if (!cents.empty()) {
    auto centroids = algorithm.mutable_centroids();
    centroids->reserve(cents.size());
    for (const auto &it : cents) {
      centroids->append(reinterpret_cast<const T *>(it.feature()),
                        meta_.dimension());
    }
  } else {
    ailego::Kmc2CentroidsGenerator<
        ailego::BinaryKmeans<ValueType, IndexThreads>, IndexThreads>
        g;
    g.set_chain_length(markov_chain_length_);
    g.set_assumption_free(assumption_free_);
    algorithm.init_centroids(*threads, g);
  }

  double cost = 0.0;

  for (uint32_t i = 0; i < max_iterations_; ++i) {
    double old_cost, new_epsilon;
    old_cost = cost;

    bool result = algorithm.cluster_once(*threads, &cost);
    if (result != true) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return -1;
    }

    new_epsilon = std::abs(cost - old_cost);
    LOG_DEBUG(
        "(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> "
        "%f = %f",
        i, algorithm.centroids().count(), features_->count(),
        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);
    stamp.reset();

    if (new_epsilon < epsilon_) {
      break;
    }
  }

  // update centroids
  update_centroids(cents, algorithm);

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

/*! Numerical K-Means cluster algorithm
 */
template <typename T>
class NumericalInnerProductKmeansAlgorithm : public OptKmeansAlgorithm {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(ailego::IsArithmetic<ValueType>::value,
                "ValueType must be arithmetic");

  //! Constructor
  NumericalInnerProductKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~NumericalInnerProductKmeansAlgorithm(void) {}

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

 protected:
  void update_centroids(
      IndexCluster::CentroidList &cents,
      const ailego::NumericalInnerProductKmeans<T, IndexThreads> &algorithm);
};

template <typename T>
void NumericalInnerProductKmeansAlgorithm<T>::update_centroids(
    IndexCluster::CentroidList &cents,
    const ailego::NumericalInnerProductKmeans<T, IndexThreads> &algorithm) {
  this->init_centroids(algorithm.centroids().count(), &cents);
  for (size_t i = 0; i < cents.size(); ++i) {
    IndexCluster::Centroid *centroid = &(cents.at(i));
    centroid->set_score(algorithm.context().clusters()[i].cost());
    centroid->set_follows(algorithm.context().clusters()[i].count());
    centroid->set_feature(algorithm.centroids()[i],
                          meta_.dimension() * sizeof(T));
  }
}

template <typename T>
int NumericalInnerProductKmeansAlgorithm<T>::cluster(
    IndexThreads::Pointer threads, IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  // get cluster algorithm
  size_t centroid_count =
      cents.empty()
          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))
          : cents.size();
  if (centroid_count == 0) {
    LOG_ERROR("The count of cluster is unknown.");
    return IndexError_NoReady;
  }
  ailego::NumericalInnerProductKmeans<T, IndexThreads> algorithm(
      centroid_count, meta_.dimension(), true);

  // mount features into algorithm
  auto features_count = features_->count();
  auto dim = meta_.dimension();

  algorithm.feature_matrix_reserve(features_count);

  for (size_t i = 0; i < features_count; ++i) {
    auto vec = reinterpret_cast<const T *>(features_->element(i));
    algorithm.append(vec, dim);
  }

  if (!cents.empty()) {
    auto centroids = algorithm.mutable_centroids();
    centroids->reserve(cents.size());
    for (const auto &it : cents) {
      centroids->append(reinterpret_cast<const T *>(it.feature()),
                        meta_.dimension());
    }
  } else {
    ailego::Kmc2CentroidsGenerator<
        ailego::NumericalInnerProductKmeans<ValueType, IndexThreads>,
        IndexThreads>
        g;
    g.set_chain_length(markov_chain_length_);
    g.set_assumption_free(assumption_free_);
    algorithm.init_centroids(*threads, g);
  }

  double cost = 0.0;

  for (uint32_t i = 0; i < max_iterations_; ++i) {
    double old_cost, new_epsilon;
    old_cost = cost;

    bool result = algorithm.cluster_once(*threads, &cost);
    if (result != true) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return -1;
    }

    new_epsilon = std::abs(cost - old_cost);
    LOG_DEBUG("(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> %f = %f",
              i, algorithm.centroids().count(), features_->count(),
              (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);
    stamp.reset();

    if (new_epsilon < epsilon_) {
      break;
    }
  }

  // update_centroids(cents);
  update_centroids(cents, algorithm);

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

/*! Nibble Inner Product K-Means cluster algorithm
 */
template <typename T>
class NibbleInnerProductKmeansAlgorithm : public OptKmeansAlgorithm {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  // Check supporting type
  static_assert(ailego::IsArithmetic<ValueType>::value,
                "ValueType must be arithmetic");

  //! Constructor
  NibbleInnerProductKmeansAlgorithm(void) {}

  //! Destructor
  virtual ~NibbleInnerProductKmeansAlgorithm(void) {}

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

 protected:
  //! update centroids
  void update_centroids(
      IndexCluster::CentroidList &cents,
      const ailego::NibbleInnerProductKmeans<T, IndexThreads> &algorithm);
};

template <typename T>
void NibbleInnerProductKmeansAlgorithm<T>::update_centroids(
    IndexCluster::CentroidList &cents,
    const ailego::NibbleInnerProductKmeans<T, IndexThreads> &algorithm) {
  this->init_centroids(algorithm.centroids().count(), &cents);
  for (size_t i = 0; i < cents.size(); ++i) {
    IndexCluster::Centroid *centroid = &(cents.at(i));
    centroid->set_score(algorithm.context().clusters()[i].cost());
    centroid->set_follows(algorithm.context().clusters()[i].count());
    centroid->set_feature(algorithm.centroids()[i], (meta_.dimension() >> 1));
  }
}

template <typename T>
int NibbleInnerProductKmeansAlgorithm<T>::cluster(
    IndexThreads::Pointer threads, IndexCluster::CentroidList &cents) {
  ailego::ElapsedTime stamp;

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  // get cluster algorithm
  size_t centroid_count =
      cents.empty()
          ? std::min(cluster_count_, static_cast<uint32_t>(features_->count()))
          : cents.size();
  if (centroid_count == 0) {
    LOG_ERROR("The count of cluster is unknown.");
    return IndexError_NoReady;
  }
  ailego::NibbleInnerProductKmeans<T, IndexThreads> algorithm(
      centroid_count, meta_.dimension());

  // mount features into algorithm
  auto features_count = features_->count();
  auto dim = meta_.dimension();
  for (size_t i = 0; i < features_count; ++i) {
    auto vec = reinterpret_cast<const typename std::make_unsigned<T>::type *>(
        features_->element(i));
    algorithm.append(vec, dim);
  }

  if (!cents.empty()) {
    auto centroids = algorithm.mutable_centroids();
    centroids->reserve(cents.size());
    for (const auto &it : cents) {
      centroids->append(
          reinterpret_cast<const typename std::make_unsigned<T>::type *>(
              it.feature()),
          size_t(meta_.dimension()));
    }
  } else {
    ailego::Kmc2CentroidsGenerator<
        ailego::NibbleInnerProductKmeans<ValueType, IndexThreads>, IndexThreads>
        g;
    g.set_chain_length(markov_chain_length_);
    g.set_assumption_free(assumption_free_);
    algorithm.init_centroids(*threads, g);
  }

  double cost = 0.0;

  for (uint32_t i = 0; i < max_iterations_; ++i) {
    double old_cost, new_epsilon;
    old_cost = cost;

    bool result = algorithm.cluster_once(*threads, &cost);
    if (result != true) {
      LOG_ERROR("(%u) Failed to cluster.", i + 1);
      return -1;
    }

    new_epsilon = std::abs(cost - old_cost);
    LOG_DEBUG(
        "(%u) Updated %zu Clusters, %zu Features: %zu ms, %f -> "
        "%f = %f",
        i, algorithm.centroids().count(), features_->count(),
        (size_t)stamp.milli_seconds(), old_cost, cost, new_epsilon);
    stamp.reset();

    if (new_epsilon < epsilon_) {
      break;
    }
  }

  // update centroids
  update_centroids(cents, algorithm);

  // Purge the empty centroids
  PurgeCentroids(cents, purge_empty_);
  return 0;
}

/*! Kmeans Cluster
 */
class OptKmeansCluster : public IndexCluster {
 public:
  //! Constructor
  OptKmeansCluster(void) {}

  //! Destructor
  virtual ~OptKmeansCluster(void) {}

  //! Initialize Cluster
  virtual int init(const IndexMeta &meta, const ailego::Params &params);

  //! Cleanup Cluster
  virtual int cleanup(void);

  //! Reset Cluster
  virtual int reset(void);

  //! Update Cluster
  virtual int update(const ailego::Params &params);

  //! Suggest dividing to K clusters
  virtual void suggest(uint32_t k);

  //! Mount features
  virtual int mount(IndexFeatures::Pointer feats);

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

  //! Classify
  virtual int classify(IndexThreads::Pointer threads,
                       IndexCluster::CentroidList &cents);

  //! Label
  virtual int label(IndexThreads::Pointer threads,
                    const IndexCluster::CentroidList &cents,
                    std::vector<uint32_t> *out);

 protected:
  //! Members
  IndexCluster::Pointer algorithm_{};
};

//! Cluster
int OptKmeansCluster::cluster(IndexThreads::Pointer threads,
                              IndexCluster::CentroidList &cents) {
  return algorithm_->cluster(std::move(threads), cents);
}

//! Classify
int OptKmeansCluster::classify(IndexThreads::Pointer threads,
                               IndexCluster::CentroidList &cents) {
  return algorithm_->classify(std::move(threads), cents);
}

//! Label
int OptKmeansCluster::label(IndexThreads::Pointer threads,
                            const IndexCluster::CentroidList &cents,
                            std::vector<uint32_t> *out) {
  return algorithm_->label(std::move(threads), cents, out);
}

//! Update Cluster
int OptKmeansCluster::update(const ailego::Params &params) {
  return algorithm_->update(params);
}

//! Reset Cluster
int OptKmeansCluster::reset(void) {
  return algorithm_->reset();
}

//! Cleanup Cluster
int OptKmeansCluster::cleanup(void) {
  return algorithm_->cleanup();
}

//! Suggest dividing to K clusters
void OptKmeansCluster::suggest(uint32_t k) {
  algorithm_->suggest(k);
}

int OptKmeansCluster::mount(IndexFeatures::Pointer feats) {
  return algorithm_->mount(feats);
}

int OptKmeansCluster::init(const IndexMeta &meta,
                           const ailego::Params &params) {
  auto type_ = meta.data_type();

  if (meta.metric_name() == "InnerProduct" || meta.metric_name() == "Cosine") {
    switch (type_) {
      case IndexMeta::DataType::DT_FP16: {
        algorithm_.reset(
            new (std::nothrow)
                NumericalInnerProductKmeansAlgorithm<ailego::Float16>);
        break;
      }
      case IndexMeta::DataType::DT_FP32: {
        algorithm_.reset(new (std::nothrow)
                             NumericalInnerProductKmeansAlgorithm<float>);
        break;
      }
      case IndexMeta::DataType::DT_FP64: {
        algorithm_.reset(new (std::nothrow)
                             NumericalInnerProductKmeansAlgorithm<double>);
        break;
      }
      case IndexMeta::DataType::DT_INT8: {
        algorithm_.reset(new (std::nothrow)
                             NumericalInnerProductKmeansAlgorithm<int8_t>);
        break;
      }
      case IndexMeta::DataType::DT_INT16: {
        algorithm_.reset(new (std::nothrow)
                             NumericalInnerProductKmeansAlgorithm<int16_t>);
        break;
      }
      case IndexMeta::DataType::DT_INT4: {
        algorithm_.reset(new (std::nothrow)
                             NibbleInnerProductKmeansAlgorithm<int32_t>);
        break;
      }
      default: {
        LOG_ERROR("Unsupported feature types %d.", type_);
        return IndexError_Mismatch;
      }
    }
  } else {
    switch (type_) {
      case IndexMeta::DataType::DT_FP16: {
        algorithm_.reset(new (std::nothrow)
                             NumericalKmeansAlgorithm<ailego::Float16>);
        break;
      }
      case IndexMeta::DataType::DT_FP32: {
        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<float>);
        break;
      }
      case IndexMeta::DataType::DT_FP64: {
        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<double>);
        break;
      }
      case IndexMeta::DataType::DT_INT8: {
        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<int8_t>);
        break;
      }
      case IndexMeta::DataType::DT_INT16: {
        algorithm_.reset(new (std::nothrow) NumericalKmeansAlgorithm<int16_t>);
        break;
      }
      case IndexMeta::DataType::DT_INT4: {
        algorithm_.reset(new (std::nothrow) NibbleKmeansAlgorithm<int32_t>);
        break;
      }
      // TODO
      case IndexMeta::DataType::DT_BINARY32: {
        algorithm_.reset(new (std::nothrow) BinaryKmeansAlgorithm<uint32_t>);
        break;
      }

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64: {
        algorithm_.reset(new (std::nothrow) BinaryKmeansAlgorithm<uint64_t>);
        break;
      }
#endif  // AILEGO_M64

      default: {
        LOG_ERROR("Unsupported feature types %d.", type_);
        return IndexError_Mismatch;
      }
    }
  }

  algorithm_->init(meta, params);

  return 0;
}

INDEX_FACTORY_REGISTER_CLUSTER(OptKmeansCluster);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/seeker.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_framework.h>

namespace zvec {
namespace core {

class Seeker {
 public:
  struct Document {
    uint32_t index;
    float score;

    //! Constructor
    Document(void) : index(0), score(0.0f) {}

    //! Constructor
    Document(uint32_t i, float v) : index(i), score(v) {}

    //! Constructor
    Document(const Document &rhs) : index(rhs.index), score(rhs.score) {}

    //! Assignment
    Document &operator=(const Document &rhs) {
      index = rhs.index;
      score = rhs.score;
      return *this;
    }

    //! Less than
    bool operator<(const Document &rhs) const {
      return (this->score < rhs.score);
    }

    //! Greater than
    bool operator>(const Document &rhs) const {
      return (this->score > rhs.score);
    }
  };

 public:
  //! Destructor
  virtual ~Seeker(void) {}

  virtual int init(const IndexMeta &meta) = 0;

  virtual int cleanup(void) = 0;

  virtual int reset(void) = 0;

  virtual int mount(IndexFeatures::Pointer feats) = 0;

  virtual int seek(const void *query, size_t len, Document *out) = 0;

  virtual IndexFeatures::Pointer original(void) const = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/stratified_cluster.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "cluster_params.h"

namespace zvec {
namespace core {

/*! Stratified Cluster
 */
class StratifiedCluster : public IndexCluster {
 public:
  //! Constructor
  StratifiedCluster(void) {}

  //! Destructor
  virtual ~StratifiedCluster(void) {}

  //! Initialize Cluster
  virtual int init(const IndexMeta &meta, const ailego::Params &params) {
    meta_ = meta;
    this->update_params(params);
    return 0;
  }

  //! Cleanup Cluster
  virtual int cleanup(void) {
    features_.reset();
    return 0;
  }

  //! Reset Cluster
  virtual int reset(void) {
    features_.reset();
    return 0;
  }

  //! Update Cluster
  virtual int update(const ailego::Params &params) {
    this->update_params(params);
    return 0;
  }

  //! Suggest dividing to K clusters
  virtual void suggest(uint32_t k) {
    cluster_count_ = k;
  }

  //! Mount features
  virtual int mount(IndexFeatures::Pointer feats) {
    if (!feats) {
      return IndexError_InvalidArgument;
    }
    if (!feats->is_matched(meta_)) {
      return IndexError_Mismatch;
    }
    features_ = std::move(feats);
    return 0;
  }

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads,
                      IndexCluster::CentroidList &cents);

  //! Classify
  virtual int classify(IndexThreads::Pointer threads,
                       IndexCluster::CentroidList &cents);

  //! Label
  virtual int label(IndexThreads::Pointer threads,
                    const IndexCluster::CentroidList &cents,
                    std::vector<uint32_t> *out);

 protected:
  //! Test if it is valid
  bool is_valid(void) const {
    if (!features_ || !features_->count()) {
      return false;
    }
    return true;
  }

  //! Update parameters
  void update_params(const ailego::Params &params);

  //! Check Centroids
  bool check_centroids(const IndexCluster::CentroidList &cents);

  //! Initialize Sub Clusters
  int init_sub_clusters(IndexCluster::Pointer *first,
                        IndexCluster::Pointer *second);

  //! Initialize First Cluster
  int init_first_cluster(IndexCluster::Pointer *first);

  //! Initialize Second Cluster
  int init_second_cluster(IndexCluster::Pointer *second,
                          IndexFeatures::Pointer features);

 private:
  //! Members
  IndexMeta meta_{};
  IndexFeatures::Pointer features_{};
  uint32_t cluster_count_{0u};
  uint32_t thread_count_{0u};
  uint32_t first_cluster_count_{0u};
  uint32_t second_cluster_count_{0u};
  bool auto_tuning_{false};
  std::string first_cluster_class_{"OptKmeansCluster"};
  std::string second_cluster_class_{"OptKmeansCluster"};
  ailego::Params first_cluster_params_{};
  ailego::Params second_cluster_params_{};

  // TODO: Maybe optimize later
  uint32_t second_threads_count_{10u};  // todo
};

int StratifiedCluster::cluster(IndexThreads::Pointer threads,
                               IndexCluster::CentroidList &cents) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  IndexCluster::Pointer first_cluster;
  int result = init_first_cluster(&first_cluster);
  if (result != 0) {
    LOG_ERROR("Failed to initialize the first cluster.");
    return result;
  }

  if (first_cluster_count_) {
    first_cluster->suggest(first_cluster_count_);
  }

  // The first clustering
  LOG_DEBUG("Clustering with first cluster: %s.", first_cluster_class_.c_str());
  result = first_cluster->cluster(threads, cents);
  if (result != 0) {
    LOG_ERROR("Failed to cluster in first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  result = first_cluster->classify(threads, cents);
  if (result != 0) {
    LOG_ERROR("Failed to classify in first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  // Cleanup for saving memory
  first_cluster.reset();

  // Calculate the total cluster count
  uint32_t total_cluster_count = cents.size() * second_cluster_count_;
  if (cluster_count_) {
    total_cluster_count = cluster_count_;
  }

  // Use thread_threads cluster instead
  uint32_t tail_threads = threads->count() % second_threads_count_;
  std::vector<std::shared_ptr<IndexThreads>> threads_cluster;

  // TODO: reusing threads pool?
  // Incase the threads count less than second threads count
  if (threads->count() / second_threads_count_ == 0) {
    for (size_t threads_idx = 0; threads_idx < tail_threads; threads_idx++) {
      std::shared_ptr<IndexThreads> curr_threads =
          std::make_shared<SingleQueueIndexThreads>(1, false);
      threads_cluster.push_back(curr_threads);
    }
  } else {
    for (size_t threads_idx = 0; threads_idx < second_threads_count_;
         threads_idx++) {
      uint32_t curr_threads_count = threads->count() / second_threads_count_;
      if (threads_idx >= second_threads_count_ - tail_threads) {
        curr_threads_count++;
      }
      std::shared_ptr<IndexThreads> curr_threads =
          std::make_shared<SingleQueueIndexThreads>(curr_threads_count, false);
      threads_cluster.push_back(curr_threads);
    }
  }

  auto task_group = threads->make_group();
  // The second clustering
  for (size_t i = 0; i < cents.size(); ++i) {
    if (cents[i].similars().empty()) {
      continue;
    }

    IndexThreads::Pointer &curr_threads =
        threads_cluster[i % (threads_cluster.size())];

    task_group->submit(ailego::Closure::New(
        [this, &curr_threads, &total_cluster_count, &cents](size_t index) {
          auto &it = cents[index];
          IndexCluster::Pointer second_cluster;
          std::shared_ptr<FlexibleIndexFeatures> features =
              std::make_shared<FlexibleIndexFeatures>(
                  meta_, it.similars().data(), it.similars().size());

          int ret = this->init_second_cluster(&second_cluster, features);
          if (ret != 0) {
            LOG_ERROR("Failed to initialize the second cluster.");
            return;
          }

          if (auto_tuning_) {
            if (total_cluster_count) {
              double factor = static_cast<double>(it.similars().size()) /
                              static_cast<double>(this->features_->count());
              second_cluster->suggest(
                  std::max(static_cast<uint32_t>(
                               std::floor(total_cluster_count * factor)),
                           1u));
            }
          } else if (second_cluster_count_) {
            second_cluster->suggest(second_cluster_count_);
          }

          LOG_DEBUG("Clustering with second cluster: %s.",
                    second_cluster_class_.c_str());
          ret = second_cluster->cluster(curr_threads, *(it.mutable_subitems()));
          if (ret != 0) {
            LOG_ERROR("Failed to cluster in second cluster: %s.",
                      second_cluster_class_.c_str());
          }
        },
        i));
  }
  task_group->wait_finish();
  return 0;
}

int StratifiedCluster::classify(IndexThreads::Pointer threads,
                                IndexCluster::CentroidList &cents) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  IndexCluster::Pointer first_cluster, second_cluster;
  int result = init_sub_clusters(&first_cluster, &second_cluster);
  if (result != 0) {
    LOG_ERROR("Failed to initialize the subclusters.");
    return result;
  }

  // The first classifying
  result = first_cluster->classify(threads, cents);
  if (result != 0) {
    LOG_ERROR("Failed to classify in first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  // Cleanup for saving memory
  first_cluster.reset();

  std::shared_ptr<FlexibleIndexFeatures> shell =
      std::make_shared<FlexibleIndexFeatures>(meta_);

  // The second classifying
  for (IndexCluster::Centroid &it : cents) {
    const auto &feats = it.similars();

    if (feats.empty()) {
      continue;
    }

    shell->mount(feats.data(), feats.size());
    result = second_cluster->mount(shell);
    if (result != 0) {
      LOG_ERROR("Failed to mount features for second cluster: %s.",
                second_cluster_class_.c_str());
      return result;
    }

    result = second_cluster->classify(threads, *it.mutable_subitems());
    if (result != 0) {
      LOG_ERROR("Failed to classify in second cluster: %s.",
                second_cluster_class_.c_str());
      return result;
    }
  }
  return 0;
}

int StratifiedCluster::label(IndexThreads::Pointer threads,
                             const IndexCluster::CentroidList &cents,
                             std::vector<uint32_t> *out) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  if (cents.empty()) {
    LOG_ERROR("The input centroid's list is empty.");
    return IndexError_InvalidArgument;
  }

  if (!this->check_centroids(cents)) {
    LOG_ERROR("The input centroid's list includes some invalid centroids.");
    return IndexError_InvalidArgument;
  }

  if (!this->is_valid()) {
    LOG_ERROR("The cluster is not ready.");
    return IndexError_NoReady;
  }

  IndexCluster::Pointer first_cluster;
  int result = init_first_cluster(&first_cluster);
  if (result != 0) {
    LOG_ERROR("Failed to initialize the subclusters.");
    return result;
  }

  result = first_cluster->label(threads, cents, out);
  if (result != 0) {
    LOG_ERROR("Failed to label in first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }
  return 0;
}

void StratifiedCluster::update_params(const ailego::Params &params) {
  params.get(GENERAL_THREAD_COUNT, &thread_count_);
  params.get(GENERAL_CLUSTER_COUNT, &cluster_count_);
  params.get(STRATIFIED_CLUSTER_COUNT, &cluster_count_);
  params.get(STRATIFIED_CLUSTER_FIRST_COUNT, &first_cluster_count_);
  params.get(STRATIFIED_CLUSTER_SECOND_COUNT, &second_cluster_count_);
  params.get(STRATIFIED_CLUSTER_FIRST_CLASS, &first_cluster_class_);
  params.get(STRATIFIED_CLUSTER_SECOND_CLASS, &second_cluster_class_);
  params.get(STRATIFIED_CLUSTER_FIRST_PARAMS, &first_cluster_params_);
  params.get(STRATIFIED_CLUSTER_SECOND_PARAMS, &second_cluster_params_);
  params.get(STRATIFIED_CLUSTER_AUTO_TUNING, &auto_tuning_);
  params.get(STRATIFIED_CLUSTER_SECOND_POOL_COUNT, &second_threads_count_);
}

bool StratifiedCluster::check_centroids(
    const IndexCluster::CentroidList &cents) {
  for (const auto &it : cents) {
    if (it.size() != meta_.element_size()) {
      return false;
    }
  }
  return true;
}

int StratifiedCluster::init_sub_clusters(IndexCluster::Pointer *first,
                                         IndexCluster::Pointer *second) {
  IndexCluster::Pointer first_cluster =
      IndexFactory::CreateCluster(first_cluster_class_);

  if (!first_cluster) {
    LOG_ERROR("Failed to create first cluster: %s.",
              first_cluster_class_.c_str());
    return IndexError_NoExist;
  }

  IndexCluster::Pointer second_cluster =
      IndexFactory::CreateCluster(second_cluster_class_);

  if (!second_cluster) {
    LOG_ERROR("Failed to create second cluster: %s.",
              first_cluster_class_.c_str());
    return IndexError_NoExist;
  }

  int result = first_cluster->init(meta_, first_cluster_params_);
  if (result != 0) {
    LOG_ERROR("Failed to initialize first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  result = second_cluster->init(meta_, second_cluster_params_);
  if (result != 0) {
    LOG_ERROR("Failed to initialize second cluster: %s.",
              second_cluster_class_.c_str());
    return result;
  }

  result = first_cluster->mount(features_);
  if (result != 0) {
    LOG_ERROR("Failed to mount features for first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  *first = std::move(first_cluster);
  *second = std::move(second_cluster);
  return 0;
}

int StratifiedCluster::init_first_cluster(IndexCluster::Pointer *first) {
  IndexCluster::Pointer first_cluster =
      IndexFactory::CreateCluster(first_cluster_class_);

  if (!first_cluster) {
    LOG_ERROR("Failed to create first cluster: %s.",
              first_cluster_class_.c_str());
    return IndexError_NoExist;
  }

  int result = first_cluster->init(meta_, first_cluster_params_);
  if (result != 0) {
    LOG_ERROR("Failed to initialize first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  result = first_cluster->mount(features_);
  if (result != 0) {
    LOG_ERROR("Failed to mount features for first cluster: %s.",
              first_cluster_class_.c_str());
    return result;
  }

  *first = std::move(first_cluster);
  return 0;
}

int StratifiedCluster::init_second_cluster(IndexCluster::Pointer *second,
                                           IndexFeatures::Pointer features) {
  IndexCluster::Pointer second_cluster =
      IndexFactory::CreateCluster(second_cluster_class_);

  if (!second_cluster) {
    LOG_ERROR("Failed to create second cluster: %s.",
              second_cluster_class_.c_str());
    return IndexError_NoExist;
  }

  int result = second_cluster->init(meta_, second_cluster_params_);
  if (result != 0) {
    LOG_ERROR("Failed to initialize second cluster: %s.",
              second_cluster_class_.c_str());
    return result;
  }

  result = second_cluster->mount(features);
  if (result != 0) {
    LOG_ERROR("Failed to mount features for second cluster: %s.",
              second_cluster_class_.c_str());
    return result;
  }

  *second = std::move(second_cluster);
  return 0;
}

INDEX_FACTORY_REGISTER_CLUSTER(StratifiedCluster);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/stratified_cluster_trainer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "stratified_cluster_trainer.h"
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_helper.h>
#include "cluster_params.h"

namespace zvec {
namespace core {

const std::string StratifiedClusterTrainer::SEP_TOKEN = "*";
const std::string StratifiedClusterTrainer::DEFAULT_CLUSTER_CLASS =
    "OptKmeansCluster";

int StratifiedClusterTrainer::init_params(const ailego::Params &params) {
  params.get(STRATIFIED_TRAINER_SAMPLE_COUNT, &sample_count_);
  params.get(STRATIFIED_TRAINER_SAMPLE_RATIO, &sample_ratio_);
  params.get(STRATIFIED_TRAINER_THREAD_COUNT, &thread_count_);
  cluster_auto_tuning_ = params.get_as_bool(STRATIFIED_TRAINER_AUTOAUNE);

  std::string centroids_num =
      params.get_as_string(STRATIFIED_TRAINER_CLUSTER_COUNT);
  if (!centroids_num.empty()) {
    ailego::StringHelper::Split(centroids_num, SEP_TOKEN, &centroid_num_vec_);
    for (size_t i = 0; i < centroid_num_vec_.size(); ++i) {
      if (centroid_num_vec_[i] == 0) {
        LOG_ERROR("Invalid centroid num %s", centroids_num.c_str());
        return IndexError_InvalidArgument;
      }
    }
  } else {
    LOG_ERROR("No centroids_num configed.");
    return IndexError_InvalidArgument;
  }

  size_t level_cnt = centroid_num_vec_.size();
  for (size_t i = 1; i <= level_cnt; ++i) {
    std::string level_params_key =
        STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);
    ailego::Params level_params;
    params.get(level_params_key, &level_params);
    cluster_params_.push_back(level_params);
  }

  std::string cluster_class(DEFAULT_CLUSTER_CLASS);
  params.get(STRATIFIED_TRAINER_CLASS_NAME, &cluster_class);
  ailego::StringHelper::Split(cluster_class, SEP_TOKEN, &cluster_class_);
  if (cluster_class_.size() == 1) {
    // repeat the cluster class to level_cnt
    for (size_t i = 1; i < level_cnt; ++i) {
      cluster_class_.push_back(cluster_class_[0]);
    }
  } else if (cluster_class_.size() != level_cnt) {
    LOG_ERROR("Cluster class should be equal to level count");
    return IndexError_InvalidArgument;
  }
  return 0;
}

int StratifiedClusterTrainer::init(const IndexMeta &index_meta,
                                   const ailego::Params &params) {
  int err = init_params(params);
  if (err != 0) {
    LOG_ERROR("init params failed, errno:%d,%s", err, IndexError::What(err));
    return err;
  }

  meta_ = index_meta;
  ailego::Params cluster_params;
  if (centroid_num_vec_.size() == 0) {
    LOG_ERROR("invalid centroid num");
    return IndexError_InvalidArgument;
  } else if (centroid_num_vec_.size() == 1) {
    // one level clustering
    class_name_ = cluster_class_[0];
    cluster_params = cluster_params_[0];
    suggest_centriod_cnt_ = centroid_num_vec_[0];
  } else if (centroid_num_vec_.size() == 2) {
    // cluster level > 1
    class_name_ = "StratifiedCluster";
    int level_cnt = centroid_num_vec_.size();
    cluster_params.set(STRATIFIED_CLUSTER_FIRST_CLASS,
                       cluster_class_[level_cnt - 2]);
    cluster_params.set(STRATIFIED_CLUSTER_SECOND_CLASS,
                       cluster_class_[level_cnt - 1]);
    cluster_params.set(STRATIFIED_CLUSTER_FIRST_COUNT,
                       centroid_num_vec_[level_cnt - 2]);
    cluster_params.set(STRATIFIED_CLUSTER_SECOND_COUNT,
                       centroid_num_vec_[level_cnt - 1]);
    cluster_params.set(STRATIFIED_CLUSTER_FIRST_PARAMS,
                       cluster_params_[level_cnt - 2]);
    cluster_params.set(STRATIFIED_CLUSTER_SECOND_PARAMS,
                       cluster_params_[level_cnt - 1]);
    cluster_params.set(STRATIFIED_CLUSTER_AUTO_TUNING, cluster_auto_tuning_);
    suggest_centriod_cnt_ =
        centroid_num_vec_[level_cnt - 1] * centroid_num_vec_[level_cnt - 2];
  } else {
    LOG_ERROR("Unsupported more than 2 level clustering.");
    return IndexError_Unsupported;
  }

  cluster_ = IndexFactory::CreateCluster(class_name_);
  if (!cluster_) {
    LOG_ERROR("Failed to create cluster[%s]", class_name_.c_str());
    return IndexError_InvalidArgument;
  }
  int result = cluster_->init(meta_, cluster_params);
  if (result != 0) {
    LOG_ERROR("Failed to initialize of cluster[%s], error: %d, %s",
              class_name_.c_str(), result, IndexError::What(result));
    return result;
  }
  if (suggest_centriod_cnt_ > 0) {
    cluster_->suggest(suggest_centriod_cnt_);
  }

  return 0;
}

int StratifiedClusterTrainer::cleanup(void) {
  cluster_ = nullptr;
  centroids_.clear();
  return 0;
}

int StratifiedClusterTrainer::train(IndexThreads::Pointer threads,
                                    IndexHolder::Pointer holder) {
  ailego::ElapsedTime timer;
  if (!holder) {
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    return IndexError_Mismatch;
  }
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }

  size_t train_sample_count = std::max(
      sample_count_, static_cast<uint32_t>(sample_ratio_ * holder->count()));

  IndexFeatures::Pointer features;
  if (train_sample_count > 0) {
    LOG_INFO(
        "Train sampling, SampleCount=%u, SampleRatio=%f, HolderCount=%lu, "
        "TrainCount=%lu",
        sample_count_, sample_ratio_, holder->count(), train_sample_count);

    auto sampler = std::make_shared<SampleIndexFeatures<CompactIndexFeatures>>(
        meta_, train_sample_count);
    size_t pre_reserve = train_sample_count < holder->count()
                             ? train_sample_count
                             : holder->count();
    sampler->reserve(pre_reserve);
    for (auto iter = holder->create_iterator(); iter && iter->is_valid();
         iter->next()) {
      sampler->emplace(iter->data());
    }
    features = sampler;
    stats_.set_trained_count(train_sample_count);
  } else {
    LOG_INFO(
        "Do no sampling, SampleCount=%u, SampleRatio=%f, "
        "HolderCount=%lu, TrainCount=%lu",
        sample_count_, sample_ratio_, holder->count(), holder->count());

    auto no_sampler = std::make_shared<CompactIndexFeatures>(meta_);
    for (auto iter = holder->create_iterator(); iter && iter->is_valid();
         iter->next()) {
      no_sampler->emplace(iter->data());
    }

    features = no_sampler;
    stats_.set_trained_count(holder->count());
  }
  stats_.set_discarded_count(0);

  // Holder is not needed, cleanup it.
  holder.reset();

  int result = cluster_->mount(features);
  if (result != 0) {
    LOG_ERROR("Failed to mount features of cluster[%s], error: %d, %s",
              class_name_.c_str(), result, IndexError::What(result));
    return result;
  }

  centroids_.clear();
  result = cluster_->cluster(std::move(threads), centroids_);
  if (result != 0) {
    LOG_ERROR("Failed to cluster features of cluster[%s], error: %d, %s",
              class_name_.c_str(), result, IndexError::What(result));
    return result;
  }

  // check build result
  std::vector<size_t> level_size;
  std::function<void(const IndexCluster::CentroidList &, size_t)>
      cal_centroid_cnt =
          [&cal_centroid_cnt, &level_size](
              const IndexCluster::CentroidList &cents, size_t level) {
            if (level > level_size.size()) {
              level_size.resize(level);
            }
            level_size[level - 1] += cents.size();
            for (const auto &it : cents) {
              if (!it.subitems().empty()) {
                cal_centroid_cnt(it.subitems(), level + 1);
              }
            }
          };
  cal_centroid_cnt(centroids_, 1);

  size_t centroids_num = level_size[level_size.size() - 1];
  if (centroids_num > suggest_centriod_cnt_) {
    LOG_WARN(
        "Built centroid(%zd level) count[%zd] bigger than expected "
        "count[%d]",
        level_size.size(), centroids_num, suggest_centriod_cnt_);
  } else {
    LOG_INFO("Built centroid(%zd level) count[%zd], expected count[%d]",
             level_size.size(), centroids_num, suggest_centriod_cnt_);
  }

  stats_.set_trained_costtime(timer.milli_seconds());

  return 0;
}

int StratifiedClusterTrainer::load(IndexStorage::Pointer cntr) {
  if (!cntr) {
    LOG_ERROR("IndexStorage is nullptr.");
    return IndexError_InvalidArgument;
  }
  std::shared_ptr<MemoryIndexBundle> bundle =
      std::make_shared<MemoryIndexBundle>();
  if (!bundle) {
    LOG_ERROR("New MemoryInndexBundle failed.");
    return IndexError_NoMemory;
  }

  auto results = cntr->get_all();
  for (auto &it : results) {
    IndexStorage::Segment::Pointer &seg = it.second;
    if (!seg) {
      LOG_ERROR("Get Segment %s failed.", it.first.c_str());
      return IndexError_InvalidArgument;
    }
    size_t data_size = seg->data_size();
    const void *data = nullptr;
    size_t actual_size = seg->read(0, &data, data_size);
    if (actual_size != data_size) {
      LOG_ERROR("Read data failed expect %zu, actual %zu.", data_size,
                actual_size);
      return IndexError_ReadData;
    }
    bundle->set(it.first, data, data_size);
  }

  int result = IndexHelper::DeserializeFromStorage(cntr.get(), &meta_);
  if (result != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return result;
  }

  result = IndexCluster::Deserialize(meta_, std::move(bundle), &centroids_);
  if (result != 0) {
    LOG_ERROR("Failed to deserialize index: %d", result);
    return result;
  }
  return 0;
}

int StratifiedClusterTrainer::dump(const IndexDumper::Pointer &dumper) {
  IndexBundle::Pointer bundle;
  int result = IndexCluster::Serialize(meta_, centroids_, &bundle);
  if (result != 0) {
    LOG_ERROR("IndexCluster Serialize failed with ret %d.", result);
    return result;
  }

  result = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (result != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return result;
  }

  for (const auto &it : bundle->all()) {
    size_t data_size = it.second.size();
    result = dumper->append(it.first, data_size, 0, 0);
    if (result != 0) {
      LOG_ERROR("Dumper append meta %s %zu failed.", it.first.c_str(),
                data_size);
      return IndexError_PackIndex;
    }
    size_t actual_size = dumper->write(it.second.buffer(), data_size);
    if (actual_size != data_size) {
      LOG_ERROR("Dumper segment %s expect %zu, actual %zu.", it.first.c_str(),
                data_size, actual_size);
      return IndexError_PackIndex;
    }
  }
  return 0;
}

const IndexMeta &StratifiedClusterTrainer::meta(void) const {
  return meta_;
}

const IndexTrainer::Stats &StratifiedClusterTrainer::stats(void) const {
  return stats_;
}

IndexBundle::Pointer StratifiedClusterTrainer::indexes(void) const {
  IndexBundle::Pointer bundle;
  IndexCluster::Serialize(meta_, centroids_, &bundle);
  return bundle;
}

//! Register Cluster Trainer in Factory
INDEX_FACTORY_REGISTER_TRAINER(StratifiedClusterTrainer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/stratified_cluster_trainer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_trainer.h>

namespace zvec {
namespace core {

/*! Cluster Trainer
 */
class StratifiedClusterTrainer : public IndexTrainer {
 public:
  typedef std::shared_ptr<StratifiedClusterTrainer> Pointer;

  //! Constructor
  StratifiedClusterTrainer(void) {}

  //! Destructor
  ~StratifiedClusterTrainer(void) {}

 protected:
  //! Initialize Trainer
  virtual int init(const IndexMeta &meta, const ailego::Params &params);

  //! Cleanup Trainer
  virtual int cleanup(void);

  //! Train the data
  virtual int train(IndexThreads::Pointer threads, IndexHolder::Pointer holder);

  //! Load index from file path or dir
  virtual int load(IndexStorage::Pointer cntr);

  //! Dump index into file path or dir
  virtual int dump(const IndexDumper::Pointer &dumper);

  //! Retrieve Index Meta
  virtual const IndexMeta &meta(void) const;

  //! Retrieve statistics
  virtual const IndexTrainer::Stats &stats(void) const;

  //! Retrieve the output indexes
  virtual IndexBundle::Pointer indexes(void) const;

 private:
  int init_params(const ailego::Params &params);

 private:
  IndexMeta meta_{};
  uint32_t sample_count_{0u};
  float sample_ratio_{0.0};
  uint32_t thread_count_{0u};
  bool cluster_auto_tuning_{false};
  IndexCluster::Pointer cluster_{};
  IndexCluster::CentroidList centroids_{};

  uint32_t suggest_centriod_cnt_{0u};
  std::string class_name_;
  std::vector<std::string> cluster_class_;
  std::vector<uint64_t> centroid_num_vec_;
  std::vector<ailego::Params> cluster_params_;
  IndexTrainer::Stats stats_{};

 private:
  static const std::string SEP_TOKEN;
  static const std::string DEFAULT_CLUSTER_CLASS;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/cluster/vector_mean.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cmath>
#include <cstring>
#include <type_traits>
#include <vector>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace core {

/*! Vector Mean
 */
struct VectorMean {
  //! Destructor
  virtual ~VectorMean(void) {}

  //! Reset accumulator
  virtual void reset(void) = 0;

  //! Plus a vector
  virtual bool plus(const void *vec, size_t len) = 0;

  //! Retrieve the mean of vectors
  virtual bool mean(void *out, size_t len) const = 0;

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const = 0;

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) = 0;

  //! Retrieve the count of vectors
  virtual size_t count(void) const = 0;

  //! Retrieve the dimension of vectors
  virtual size_t dimension(void) const = 0;
};

/*! Vector Mean Array
 */
struct VectorMeanArray {
  //! Destructor
  virtual ~VectorMeanArray(void) {}

  //! Operator []
  VectorMean &operator[](size_t i) {
    return this->at(i);
  }

  //! Operator []
  const VectorMean &operator[](size_t i) const {
    return this->at(i);
  }

  //! Resize accumulators
  virtual void resize(size_t cnt) = 0;

  //! Clear accumulators
  virtual void clear(void) = 0;

  //! Retrieve an accumulator
  virtual VectorMean &at(size_t i) = 0;

  //! Retrieve an accumulator
  virtual const VectorMean &at(size_t i) const = 0;

  //! Retrieve the count of accumulators
  virtual size_t count(void) const = 0;

  //! Retrieve the dimension of accumulators
  virtual size_t dimension(void) const = 0;
};

/*! General Vector Mean Array
 */
template <typename T, typename = typename std::is_base_of<VectorMean, T>::type>
class GeneralVectorMeanArray : public VectorMeanArray {
 public:
  //! Constructor
  GeneralVectorMeanArray(size_t dim) : dimension_(dim), array_() {}

  //! Constructor
  GeneralVectorMeanArray(const GeneralVectorMeanArray &rhs)
      : dimension_(rhs.dimension_), array_(rhs.array_) {}

  //! Constructor
  GeneralVectorMeanArray(GeneralVectorMeanArray &&rhs)
      : dimension_(rhs.dimension_), array_(std::move(rhs.array_)) {}

  //! Emplace an accumulator
  template <typename... TArgs>
  bool emplace(TArgs &&...args) {
    T accum(std::forward<TArgs>(args)...);
    if (accum.dimension() != dimension_) {
      return false;
    }
    array_.push_back(std::move(accum));
    return true;
  }

  //! Resize accumulators
  virtual void resize(size_t cnt) {
    if (array_.size() < cnt) {
      for (size_t i = array_.size(); i < cnt; ++i) {
        array_.emplace_back(dimension_);
      }
    } else {
      array_.resize(cnt);
    }
  }

  //! Clear accumulators
  virtual void clear(void) {
    array_.clear();
  }

  //! Retrieve an accumulator
  virtual VectorMean &at(size_t i) {
    return array_[i];
  }

  //! Retrieve an accumulator
  virtual const VectorMean &at(size_t i) const {
    return array_[i];
  }

  //! Retrieve the count of accumulators
  virtual size_t count(void) const {
    return array_.size();
  }

  //! Retrieve the dimension of accumulators
  virtual size_t dimension(void) const {
    return dimension_;
  }

 private:
  //! Disable them
  GeneralVectorMeanArray(void) = delete;

  //! Members
  size_t dimension_;
  std::vector<T> array_;
};

/*! Numerical Vector Mean
 */
template <typename T,
          typename =
              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>
class NumericalVectorMean : public VectorMean {
 public:
  //! Constructor
  NumericalVectorMean(void) : count_(0), accums_() {}

  //! Constructor
  NumericalVectorMean(const NumericalVectorMean &rhs)
      : count_(rhs.count_), accums_(rhs.accums_) {}

  //! Constructor
  NumericalVectorMean(NumericalVectorMean &&rhs)
      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}

  //! Constructor
  NumericalVectorMean(size_t dim) : count_(0), accums_(dim) {}

  //! Constructor
  NumericalVectorMean(const T *means, size_t dim, size_t cnt)
      : count_(cnt), accums_(dim) {
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] = static_cast<double>(means[i]) * count_;
    }
  }

  //! Reset accumulator
  void reset(size_t dim) {
    count_ = 0u;
    accums_.clear();
    accums_.resize(dim, 0.0);
  }

  //! Reset accumulator
  virtual void reset(void) {
    this->reset(accums_.size());
  }

  //! Plus a vector
  virtual bool plus(const void *vec, size_t len) {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += *(static_cast<const T *>(vec) + i);
    }
    ++count_;
    return true;
  }

  //! Retrieve the mean of vectors
  virtual bool mean(void *out, size_t len) const {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      *(static_cast<T *>(out) + i) = FloatCast<T>(accums_[i] / count_);
    }
    return true;
  }

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const {
    ailego::NumericalVector<T> &vec =
        *static_cast<ailego::NumericalVector<T> *>(out);

    size_t dim = accums_.size();
    vec.resize(dim);
    for (size_t i = 0; i < dim; ++i) {
      vec[i] = FloatCast<T>(accums_[i] / count_);
    }
  }

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) {
    const NumericalVectorMean<T> &src =
        dynamic_cast<const NumericalVectorMean<T> &>(rhs);

    size_t dim = accums_.size();
    if (dim != src.accums_.size()) {
      return false;
    }
    count_ += src.count_;
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += src.accums_[i];
    }
    return true;
  }

  //! Retrieve the count of vectors
  virtual size_t count(void) const {
    return count_;
  }

  //! Retrieve dimension of accumulator
  virtual size_t dimension(void) const {
    return accums_.size();
  }

 protected:
  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<!std::is_integral<U>::value, U>::type {
    return static_cast<U>(val);
  }

  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<std::is_integral<U>::value, U>::type {
    return static_cast<U>(std::round(val));
  }

 private:
  //! Members
  size_t count_;
  std::vector<double> accums_;
};

/*! Numerical Vector Harmonic Mean
 */
template <typename T,
          typename =
              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>
class NumericalVectorHarmonicMean : public VectorMean {
 public:
  //! Constructor
  NumericalVectorHarmonicMean(void) : count_(0), accums_() {}

  //! Constructor
  NumericalVectorHarmonicMean(const NumericalVectorHarmonicMean &rhs)
      : count_(rhs.count_), accums_(rhs.accums_) {}

  //! Constructor
  NumericalVectorHarmonicMean(NumericalVectorHarmonicMean &&rhs)
      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}

  //! Constructor
  NumericalVectorHarmonicMean(size_t dim) : count_(0), accums_(dim) {}

  //! Constructor
  NumericalVectorHarmonicMean(const T *means, size_t dim, size_t cnt)
      : count_(cnt), accums_(dim) {
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] = static_cast<double>(count_) / static_cast<double>(means[i]);
    }
  }

  //! Reset accumulator
  void reset(size_t dim) {
    count_ = 0u;
    accums_.clear();
    accums_.resize(dim, 0.0);
  }

  //! Reset accumulator
  virtual void reset(void) {
    this->reset(accums_.size());
  }

  //! Plus a vector (harmonic)
  virtual bool plus(const void *vec, size_t len) {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += 1.0 / *(static_cast<const T *>(vec) + i);
    }
    ++count_;
    return true;
  }

  //! Retrieve the mean of vectors (harmonic)
  virtual bool mean(void *out, size_t len) const {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      *(static_cast<T *>(out) + i) = FloatCast<T>(count_ / accums_[i]);
    }
    return true;
  }

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const {
    ailego::NumericalVector<T> &vec =
        *static_cast<ailego::NumericalVector<T> *>(out);

    size_t dim = accums_.size();
    vec.resize(dim);
    for (size_t i = 0; i < dim; ++i) {
      vec[i] = FloatCast<T>(count_ / accums_[i]);
    }
  }

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) {
    const NumericalVectorHarmonicMean<T> &src =
        dynamic_cast<const NumericalVectorHarmonicMean<T> &>(rhs);

    size_t dim = accums_.size();
    if (dim != src.accums_.size()) {
      return false;
    }
    count_ += src.count_;
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += src.accums_[i];
    }
    return true;
  }

  //! Retrieve the count of vectors
  virtual size_t count(void) const {
    return count_;
  }

  //! Retrieve dimension of accumulator
  virtual size_t dimension(void) const {
    return accums_.size();
  }

 protected:
  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<!std::is_integral<U>::value, U>::type {
    return static_cast<U>(val);
  }

  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<std::is_integral<U>::value, U>::type {
    return static_cast<U>(std::round(val));
  }

 private:
  //! Members
  size_t count_;
  std::vector<double> accums_;
};

/*! Numerical Vector Geometric Mean
 */
template <typename T,
          typename =
              typename std::enable_if<ailego::IsArithmetic<T>::value>::type>
class NumericalVectorGeometricMean : public VectorMean {
 public:
  //! Constructor
  NumericalVectorGeometricMean(void) : count_(0), accums_() {}

  //! Constructor
  NumericalVectorGeometricMean(const NumericalVectorGeometricMean &rhs)
      : count_(rhs.count_), accums_(rhs.accums_) {}

  //! Constructor
  NumericalVectorGeometricMean(NumericalVectorGeometricMean &&rhs)
      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}

  //! Constructor
  NumericalVectorGeometricMean(size_t dim) : count_(0), accums_(dim, 1.0) {}

  //! Constructor
  NumericalVectorGeometricMean(const T *means, size_t dim, size_t cnt)
      : count_(cnt), accums_(dim, 1.0) {
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] = std::pow(static_cast<double>(means[i]), count_);
    }
  }

  //! Reset accumulator
  void reset(size_t dim) {
    count_ = 0u;
    accums_.clear();
    accums_.resize(dim, 1.0);
  }

  //! Reset accumulator
  virtual void reset(void) {
    this->reset(accums_.size());
  }

  //! Plus a vector (geometric)
  virtual bool plus(const void *vec, size_t len) {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] *= *(static_cast<const T *>(vec) + i);
    }
    ++count_;
    return true;
  }

  //! Retrieve the mean of vectors (geometric)
  virtual bool mean(void *out, size_t len) const {
    size_t dim = accums_.size();
    if (dim * sizeof(T) != len) {
      return false;
    }
    for (size_t i = 0; i < dim; ++i) {
      *(static_cast<T *>(out) + i) =
          FloatCast<T>(std::pow(accums_[i], 1.0 / count_));
    }
    return true;
  }

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const {
    ailego::NumericalVector<T> &vec =
        *static_cast<ailego::NumericalVector<T> *>(out);

    size_t dim = accums_.size();
    vec.resize(dim);
    for (size_t i = 0; i < dim; ++i) {
      vec[i] = FloatCast<T>(std::pow(accums_[i], 1.0 / count_));
    }
  }

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) {
    const NumericalVectorGeometricMean<T> &src =
        dynamic_cast<const NumericalVectorGeometricMean<T> &>(rhs);

    size_t dim = accums_.size();
    if (dim != src.accums_.size()) {
      return false;
    }
    count_ += src.count_;
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] *= src.accums_[i];
    }
    return true;
  }

  //! Retrieve the count of vectors
  virtual size_t count(void) const {
    return count_;
  }

  //! Retrieve dimension of accumulator
  virtual size_t dimension(void) const {
    return accums_.size();
  }

 protected:
  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<!std::is_integral<U>::value, U>::type {
    return static_cast<U>(val);
  }

  //! Convert float type to another type
  template <typename U>
  static auto FloatCast(const double &val) ->
      typename std::enable_if<std::is_integral<U>::value, U>::type {
    return static_cast<U>(std::round(val));
  }

 private:
  //! Members
  size_t count_;
  std::vector<double> accums_;
};

/*! Binary Vector Mean
 */
class BinaryVectorMean : public VectorMean {
 public:
  //! Constructor
  BinaryVectorMean(void) : count_(0), accums_() {}

  //! Constructor
  BinaryVectorMean(const BinaryVectorMean &rhs)
      : count_(rhs.count_), accums_(rhs.accums_) {}

  //! Constructor
  BinaryVectorMean(BinaryVectorMean &&rhs)
      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}

  //! Constructor
  BinaryVectorMean(size_t dim) : count_(0), accums_(((dim + 7) >> 3) << 3) {}

  //! Constructor
  BinaryVectorMean(const void *means, size_t dim, size_t cnt)
      : count_(cnt), accums_(((dim + 7) >> 3) << 3) {
    const uint8_t *bits = reinterpret_cast<const uint8_t *>(means);
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] = (count_ >> 1);

      if (bits[i >> 3] & static_cast<uint8_t>(1 << (i & 0x7))) {
        accums_[i] += 1;
      }
    }
  }

  //! Reset accumulator
  void reset(size_t dim) {
    count_ = 0u;
    accums_.clear();
    accums_.resize(dim);
  }

  //! Reset accumulator
  virtual void reset(void) {
    this->reset(accums_.size());
  }

  //! Plus a vector
  virtual bool plus(const void *vec, size_t len) {
    size_t dim = accums_.size();
    if (dim != (len << 3)) {
      return false;
    }

    const uint8_t *bits = reinterpret_cast<const uint8_t *>(vec);
    for (size_t i = 0; i < dim; ++i) {
      if (bits[i >> 3] & static_cast<uint8_t>(1 << (i & 0x7))) {
        accums_[i] += 1;
      }
    }
    ++count_;
    return true;
  }

  //! Retrieve the mean of vectors
  virtual bool mean(void *out, size_t len) const {
    size_t dim = accums_.size();
    if (dim != (len << 3)) {
      return false;
    }
    memset(out, 0, len);

    uint8_t *bits = reinterpret_cast<uint8_t *>(out);
    size_t half_count = count_ >> 1;
    for (size_t i = 0; i < dim; ++i) {
      if (accums_[i] > half_count) {
        bits[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));
      }
    }
    return true;
  }

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const {
    size_t dim = accums_.size();
    out->clear();
    out->resize((dim + 7) / 8);

    uint8_t *bits =
        reinterpret_cast<uint8_t *>(const_cast<char *>(out->data()));
    size_t half_count = count_ >> 1;
    for (size_t i = 0; i < dim; ++i) {
      if (accums_[i] > half_count) {
        bits[i >> 3] |= static_cast<uint8_t>(1 << (i & 0x7));
      }
    }
  }

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) {
    const BinaryVectorMean &src = dynamic_cast<const BinaryVectorMean &>(rhs);

    size_t dim = accums_.size();
    if (dim != src.accums_.size()) {
      return false;
    }
    count_ += src.count_;
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += src.accums_[i];
    }
    return true;
  }

  //! Retrieve the count of vectors
  virtual size_t count(void) const {
    return count_;
  }

  //! Retrieve dimension of accumulator
  virtual size_t dimension(void) const {
    return accums_.size();
  }

 private:
  //! Members
  size_t count_;
  std::vector<size_t> accums_;
};

/*! Numerical Vector Mean
 */
template <typename T,
          typename = typename std::enable_if<std::is_integral<T>::value>::type>
class NibbleVectorMean : public VectorMean {
 public:
  //! Constructor
  NibbleVectorMean(void) : count_(0), accums_() {}

  //! Constructor
  NibbleVectorMean(const NibbleVectorMean &rhs)
      : count_(rhs.count_), accums_(rhs.accums_) {}

  //! Constructor
  NibbleVectorMean(NibbleVectorMean &&rhs)
      : count_(rhs.count_), accums_(std::move(rhs.accums_)) {}

  //! Constructor
  NibbleVectorMean(size_t dim) : count_(0), accums_(dim) {}

  //! Constructor
  NibbleVectorMean(const void *means, size_t dim, size_t cnt)
      : count_(cnt), accums_(dim) {
    const uint8_t *arr = reinterpret_cast<const uint8_t *>(means);
    for (size_t i = 0; i != dim; i += 2) {
      uint8_t val = arr[i >> 1];
      int lo = ((int8_t)(val << 4) >> 4);
      int hi = ((int8_t)(val) >> 4);
      accums_[i] = static_cast<double>(lo) * count_;
      accums_[i + 1] = static_cast<double>(hi) * count_;
    }
  }

  //! Reset accumulator
  void reset(size_t dim) {
    count_ = 0u;
    accums_.clear();
    accums_.resize(dim, 0.0);
  }

  //! Reset accumulator
  virtual void reset(void) {
    this->reset(accums_.size());
  }

  //! Plus a vector
  virtual bool plus(const void *vec, size_t len) {
    size_t dim = accums_.size();
    if (dim != (len << 1)) {
      return false;
    }

    const uint8_t *arr = reinterpret_cast<const uint8_t *>(vec);
    for (size_t i = 0; i != dim; i += 2) {
      uint8_t val = arr[i >> 1];
      accums_[i] += ((int8_t)(val << 4) >> 4);
      accums_[i + 1] += ((int8_t)(val) >> 4);
    }
    ++count_;
    return true;
  }

  //! Retrieve the mean of vectors
  virtual bool mean(void *out, size_t len) const {
    size_t dim = accums_.size();
    if (dim != (len << 1)) {
      return false;
    }
    memset(out, 0, len);

    uint8_t *arr = reinterpret_cast<uint8_t *>(out);

    for (size_t i = 0; i != dim; i += 2) {
      int lo = static_cast<int>(std::round(accums_[i] / count_));
      int hi = static_cast<int>(std::round(accums_[i + 1] / count_));
      arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);
    }

    return true;
  }

  //! Retrieve the mean of vectors
  virtual void mean(std::string *out) const {
    size_t dim = accums_.size();
    out->clear();
    out->resize(dim >> 1);

    uint8_t *arr = reinterpret_cast<uint8_t *>(const_cast<char *>(out->data()));

    for (size_t i = 0; i != dim; i += 2) {
      int lo = static_cast<int>(std::round(accums_[i] / count_));
      int hi = static_cast<int>(std::round(accums_[i + 1] / count_));
      arr[i >> 1] = (uint8_t)((hi << 4) & 0xf0) | (uint8_t)(lo & 0xf);
    }
  }

  //! Merge another vector mean
  virtual bool merge(const VectorMean &rhs) {
    const NibbleVectorMean &src = dynamic_cast<const NibbleVectorMean &>(rhs);

    size_t dim = accums_.size();
    if (dim != src.accums_.size()) {
      return false;
    }
    count_ += src.count_;
    for (size_t i = 0; i < dim; ++i) {
      accums_[i] += src.accums_[i];
    }
    return true;
  }

  //! Retrieve the count of vectors
  virtual size_t count(void) const {
    return count_;
  }

  //! Retrieve dimension of accumulator
  virtual size_t dimension(void) const {
    return accums_.size();
  }

 private:
  //! Members
  size_t count_;
  std::vector<double> accums_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
#message(STATUS "PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}")
cc_library(
    NAME core_knn_flat 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS core_framework 
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/src/core/framework
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/flat/flat_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_builder.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::init(const IndexMeta &meta,
                                  const ailego::Params &params) {
  meta_ = meta;

  // Set the major order
  bool column_major_order = false;
  if (params.get(PARAM_FLAT_COLUMN_MAJOR_ORDER, &column_major_order)) {
    meta_.set_major_order(column_major_order ? IndexMeta::MO_COLUMN
                                             : IndexMeta::MO_ROW);
  }

  // Verify column major order
  if (meta_.major_order() != IndexMeta::MO_ROW) {
    IndexMeta::DataType dt = meta_.data_type();

    bool support_column_major = false;
    if ((dt != IndexMeta::DataType::DT_FP32 &&
         dt != IndexMeta::DataType::DT_FP16 &&
         dt != IndexMeta::DataType::DT_INT8 && dt != IndexMeta::DT_INT4 &&
         dt != IndexMeta::DT_BINARY32 && dt != IndexMeta::DT_BINARY64) ||
        (meta_.unit_size() != IndexMeta::UnitSizeof(dt))) {
      if (meta_.major_order() == IndexMeta::MO_COLUMN) {
        LOG_ERROR("Unsupported type %d with unit size %u.", dt,
                  meta_.unit_size());
        return IndexError_Unsupported;
      } else {
        support_column_major = false;
      }
    }
    if (meta_.element_size() % IndexMeta::AlignSizeof(dt) != 0) {
      if (meta_.major_order() == IndexMeta::MO_COLUMN) {
        LOG_ERROR("Unsupported type %d with dimension %u.", dt,
                  meta_.dimension());
        return IndexError_Unsupported;
      } else {
        support_column_major = false;
      }
    }

    if (meta_.major_order() == IndexMeta::MO_UNDEFINED &&
        support_column_major) {
      meta_.set_major_order(IndexMeta::MO_COLUMN);
    }
  }

  if (!VerifyMetric(meta_)) {
    LOG_ERROR("Invalid index measure %s.", meta_.metric_name().c_str());
    return IndexError_InvalidArgument;
  }

  std::string tag = std::to_string(BATCH_SIZE);
  ailego::Params searcher_params;
  searcher_params.set(PARAM_FLAT_BATCH_SIZE, BATCH_SIZE);
  meta_.set_searcher("FlatSearcher" + tag, 0, searcher_params);
  meta_.set_builder("FlatBuilder" + tag, 0, params);
  return 0;
}

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::build(IndexThreads::Pointer,
                                   IndexHolder::Pointer holder) {
  ailego::ElapsedTime stamp;
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("The holder is unmatched with initialized meta.");
    return IndexError_Mismatch;
  }

  holder_ = std::move(holder);
  stats_.set_built_count(holder_->count());
  stats_.set_built_costtime(stamp.milli_seconds());
  return 0;
}

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::dump(const IndexDumper::Pointer &dumper) {
  ailego::ElapsedTime stamp;
  if (!holder_) {
    return IndexError_NoReady;
  }

  std::vector<uint64_t> keys;
  if (meta_.major_order() == IndexMeta::MO_COLUMN) {
    int error_code = this->write_column_index(dumper.get(), &keys);
    if (error_code != 0) {
      return error_code;
    }
  } else {
    int error_code = this->write_row_index(dumper.get(), &keys);
    if (error_code != 0) {
      return error_code;
    }
  }

  int error_code = this->write_keys(keys, dumper.get());
  if (error_code != 0) {
    return error_code;
  }

  error_code = this->write_mapping(keys, dumper.get());
  if (error_code != 0) {
    return error_code;
  }

  error_code = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (error_code != 0) {
    return error_code;
  }

  stats_.set_dumped_count(keys.size());
  stats_.set_dumped_costtime(stamp.milli_seconds());
  return 0;
}

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::write_keys(const std::vector<uint64_t> &keys,
                                        IndexDumper *dumper) {
  size_t keys_size = keys.size() * sizeof(uint64_t);
  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;
  if (dumper->write(keys.data(), keys_size) != keys_size) {
    LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }

  // Write the padding if need
  if (keys_padding_size) {
    std::string padding(keys_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(FLAT_SEGMENT_KEYS_SEG_ID, keys_size, keys_padding_size,
                        0);
}

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::write_mapping(const std::vector<uint64_t> &keys,
                                           IndexDumper *dumper) {
  std::vector<uint32_t> mapping(keys.size());
  std::iota(mapping.begin(), mapping.end(), 0);
  std::sort(
      mapping.begin(), mapping.end(),
      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });

  size_t mapping_size = mapping.size() * sizeof(uint32_t);
  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;
  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {
    LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }

  // Write the padding if need
  if (mapping_padding_size) {
    std::string padding(mapping_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(FLAT_SEGMENT_MAPPING_SEG_ID, mapping_size,
                        mapping_padding_size, 0);
}

template <size_t BATCH_SIZE>
template <typename T>
int FlatBuilder<BATCH_SIZE>::write_column_index(IndexDumper *dumper,
                                                std::vector<uint64_t> *keys) {
  auto iter = holder_->create_iterator();
  if (!iter) {
    LOG_ERROR("Failed to create iterator of holder");
    return IndexError_Runtime;
  }

  // Write features
  size_t element_size = holder_->element_size();
  size_t block_size = element_size * BATCH_SIZE;
  std::string block1, block2;
  block1.reserve(block_size);
  block2.reserve(block_size);

  for (; iter->is_valid(); iter->next()) {
    block1.append(reinterpret_cast<const char *>(iter->data()), element_size);
    keys->emplace_back(iter->key());

    if (block1.size() == block_size) {
      ailego::MatrixHelper::Transpose<T, BATCH_SIZE>(
          block1.data(), element_size / sizeof(T), (void *)block2.data());

      if (dumper->write(block2.data(), block_size) != block_size) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      block1.clear();
    }
  }

  if (!block1.empty()) {
    if (dumper->write(block1.data(), block1.size()) != block1.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }

  // Write the padding if need
  size_t features_size = keys->size() * element_size;
  size_t features_padding_size =
      ailego_align(features_size, 32) - features_size;
  if (features_padding_size) {
    std::string padding(features_padding_size, '\0');

    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,
                        features_padding_size, 0);
}

template <size_t BATCH_SIZE>
int FlatBuilder<BATCH_SIZE>::write_row_index(IndexDumper *dumper,
                                             std::vector<uint64_t> *keys) {
  auto iter = holder_->create_iterator();
  if (!iter) {
    LOG_ERROR("Failed to create iterator of holder");
    return IndexError_Runtime;
  }

  // Write features
  size_t element_size = holder_->element_size();
  for (; iter->is_valid(); iter->next()) {
    if (dumper->write(iter->data(), element_size) != element_size) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
    keys->emplace_back(iter->key());
  }

  // Write the padding if need
  size_t features_size = keys->size() * element_size;
  size_t features_padding_size =
      ailego_align(features_size, 32) - features_size;
  if (features_padding_size) {
    std::string padding(features_padding_size, '\0');

    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,
                        features_padding_size, 0);
}

INDEX_FACTORY_REGISTER_BUILDER_ALIAS(LinearBuilder, FlatBuilder<32>);
INDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder, FlatBuilder<32>);
INDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder16, FlatBuilder<16>);
INDEX_FACTORY_REGISTER_BUILDER_ALIAS(FlatBuilder32, FlatBuilder<32>);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_helper.h>
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Flat Builder
 */
template <size_t BATCH_SIZE>
class FlatBuilder : public IndexBuilder {
 public:
  //! Destructor
  virtual ~FlatBuilder(void) {}

  //! Initialize the builder
  int init(const IndexMeta &meta, const ailego::Params &params) override;

  //! Cleanup the builder
  int cleanup(void) override {
    holder_ = nullptr;
    return 0;
  }

  //! Train the data
  int train(IndexThreads::Pointer, IndexHolder::Pointer) override {
    stats_.set_trained_count(0u);
    stats_.set_trained_costtime(0u);
    return 0;
  }

  //! Train the data
  int train(const IndexTrainer::Pointer &) override {
    stats_.set_trained_count(0u);
    stats_.set_trained_costtime(0u);
    return 0;
  }

  //! Build the index
  int build(IndexThreads::Pointer, IndexHolder::Pointer holder) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const IndexBuilder::Stats &stats(void) const override {
    return stats_;
  }

 protected:
  //! Dump index keys
  int write_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  //! Dump index keys mapping
  int write_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  //! Dump index using column-major-order format
  template <typename T>
  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys);

  //! Dump index using column-major-order format
  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {
    switch (IndexMeta::AlignSizeof(meta_.data_type())) {
      case 2:
        return this->write_column_index<uint16_t>(dumper, keys);
      case 4:
        return this->write_column_index<uint32_t>(dumper, keys);
      case 8:
        return this->write_column_index<uint64_t>(dumper, keys);
      default:
        ailego_check_with(0, "BAD CASE");
    }
    return IndexError_Runtime;
  }

  //! Dump index using row-major-order format
  int write_row_index(IndexDumper *dumper, std::vector<uint64_t> *keys);

 private:
  IndexMeta meta_{};
  IndexBuilder::Stats stats_{};
  IndexHolder::Pointer holder_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_distance_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Brute Force Distance Tuple
 */
template <size_t K, typename = void>
class FlatDistanceTuple;

/*! Brute Force Distance Tuple
 */
template <>
class FlatDistanceTuple<1> {
 public:
  //! Retrieve non-zero if all distances are valid.
  bool is_valid(void) const {
    return !!distance_;
  }

  //! Retrieve non-zero if a distance is valid.
  bool is_valid(size_t m) const {
    return m == 1 && !!distance_;
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure) {
    distance_ = measure.distance_matrix(1, 1);
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure, size_t m) {
    distance_ = measure.distance_matrix(m, 1);
  }

  //! Compute the distance between matrix and query
  template <size_t M>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<M == 1>::type {
    distance_(m, q, dim, out);
  }

 private:
  IndexMetric::MatrixDistance distance_{};
};

/*! Brute Force Distance Tuple
 */
template <size_t K>
class FlatDistanceTuple<
    K, typename std::enable_if<IsEqualPowerofTwo<K>::value>::type> {
 public:
  //! Retrieve non-zero if all distances are valid.
  bool is_valid(void) const {
    return (distance_tuple_.is_valid() && !!distance_);
  }

  //! Retrieve non-zero if a distance is valid.
  bool is_valid(size_t m) const {
    return (m == K ? (!!distance_)
                   : (m < K ? distance_tuple_.is_valid(m) : false));
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure) {
    distance_tuple_.initialize(measure);
    distance_ = measure.distance_matrix(K, 1);
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure, size_t m) {
    distance_tuple_.initialize(measure, m);
    distance_ = measure.distance_matrix(m, K);
  }

  //! Compute the distance between matrix and query
  template <size_t M>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<K == M>::type {
    distance_(m, q, dim, out);
  }

  //! Compute the distance between matrix and query
  template <size_t M>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<(K > M) && IsEqualPowerofTwo<M>::value>::type {
    distance_tuple_.template distance<M>(m, q, dim, out);
  }

 private:
  FlatDistanceTuple<(K >> 1)> distance_tuple_{};
  IndexMetric::MatrixDistance distance_{};
};

/*! Brute Force Distance Matrix
 */
template <size_t K, typename = void>
class FlatDistanceMatrix;

/*! Brute Force Distance Matrix
 */
template <>
class FlatDistanceMatrix<1> {
 public:
  //! Retrieve non-zero if all distances are valid.
  bool is_valid(void) const {
    return (!!distance_);
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure) {
    distance_ = measure.distance_matrix(1, 1);
  }

  //! Compute the distance between matrix and query
  template <size_t M, size_t N = 1u>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<M == 1u && N == 1u>::type {
    distance_(m, q, dim, out);
  }

 private:
  IndexMetric::MatrixDistance distance_{};
};

/*! Brute Force Distance Matrix
 */
template <size_t K>
class FlatDistanceMatrix<
    K, typename std::enable_if<IsEqualPowerofTwo<K>::value>::type> {
 public:
  //! Retrieve non-zero if all distances are valid.
  bool is_valid(void) const {
    return (tuple_h_.is_valid() && tuple_v_.is_valid());
  }

  //! Retrieve non-zero if a distance is valid.
  bool is_valid(size_t m, size_t n) const {
    return (m == K ? tuple_h_.is_valid(n)
                   : (m < K && n == 1 ? tuple_v_.is_valid(m) : false));
  }

  //! Initialize the distance tuple
  void initialize(const IndexMetric &measure) {
    tuple_h_.initialize(measure, K);
    tuple_v_.initialize(measure);
  }

  //! Compute the distance between matrix and query
  template <size_t M, size_t N>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<(K == M) && (K >= N)>::type {
    tuple_h_.template distance<N>(m, q, dim, out);
  }

  //! Compute the distance between matrix and query
  template <size_t M, size_t N = 1u>
  auto distance(const void *m, const void *q, size_t dim, float *out) const ->
      typename std::enable_if<(K > M) && (N == 1u)>::type {
    tuple_v_.template distance<M>(m, q, dim, out);
  }

 private:
  FlatDistanceTuple<K> tuple_h_{};
  FlatDistanceTuple<(K >> 1)> tuple_v_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_index_format.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <ailego/container/bitmap.h>

namespace zvec {
namespace core {

using node_id_t = uint32_t;
using key_t = uint64_t;
using level_t = int32_t;
using dist_t = float;
using TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;
using CandidateHeap =
    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;
constexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);
constexpr key_t kInvalidKey = static_cast<key_t>(-1);

/*! Index Format of Linear Index Header
 */
struct LinearIndexHeader {
  LinearIndexHeader()
      : header_size(0),
        total_vector_count(0),
        linear_body_size(0),
        linear_list_count(0),
        block_vector_count(0),
        block_size(0),
        block_count(0),
        index_meta_size(0) {
    memset(reserved_, 0, sizeof(reserved_));
    memset(index_meta, 0, sizeof(index_meta));
  }
  uint32_t header_size{0};
  uint32_t total_vector_count{0};
  uint64_t linear_body_size{0};
  uint32_t linear_list_count{0};
  uint32_t block_vector_count{0};
  uint32_t block_size{0};
  uint32_t block_count{0};
  uint32_t index_meta_size{0};
  char reserved_[28] = {0};
  char index_meta[0];
};

/*! Index Format of Linear Index Meta for each Linear list
 */
struct LinearListMeta {
  LinearListMeta() : offset(0), block_count(0), vector_count(0), id_offset(0) {
    memset(reserved_, 0, sizeof(reserved_));
  }
  uint64_t offset{0};
  uint32_t block_count{0};
  uint32_t vector_count{0};
  uint32_t id_offset{0};
  char reserved_[16] = {0};
};

/*! Index Format of Location in Linear Index for each vector
 */
struct LinearVecLocation {
  LinearVecLocation(size_t off, bool col)
      : offset(off), column_major(col), reserved(0u) {}

  uint64_t offset : 48;       // feature offset in posting block segment
  uint64_t column_major : 1;  // coloum major if true
  uint64_t reserved : 15;
};

/*! Index Format of Integer Quantizer params for each linear list
 */
struct LinearIntegerQuantizerParams {
  float scale{1.0};
  float bias{0.0};
};

/*! Location of Vectors Block in Storage Segment
 */
struct BlockLocation {
  uint32_t segment_id{0};
  uint32_t block_index{0};
};

/*! The Header of a Block in Storage Segment
 */
struct BlockHeader {
  BlockHeader() : vector_count(0u), column_major(0u), reserved(0u) {}
  BlockLocation next;
  uint16_t vector_count{0};
  uint16_t column_major : 1;
  uint16_t reserved : 15;
};

struct DeletionMap {
  void set(uint32_t index) {
    bitset.set(index);
  }

  void reset(uint32_t index) {
    bitset.reset(index);
  }

  bool test(uint32_t index) const {
    return bitset.test(index);
  }

  bool is_dirty() const {
    return bitset.test_any();
  }

  ailego::FixedBitset<32> bitset{};
};

static_assert(sizeof(DeletionMap) == 4, "DeletionMap must be 4 bytes");

/*! Meta Information of Streamer Entity
 */
struct StreamerLinearMeta {
  StreamerLinearMeta()
      : create_time(0),
        update_time(0),
        revision_id(0),
        segment_count(0),
        segment_size(0) {
    memset(reserved_, 0, sizeof(reserved_));
  }
  uint64_t create_time{0};
  uint64_t update_time{0};
  uint64_t revision_id{0};
  uint32_t segment_count{0};
  uint32_t segment_size{0};
  uint8_t reserved_[32] = {0};
  LinearIndexHeader header;
};

/*! Location of Vector in Storage Segment
 */
struct VectorLocation {
  //! Constructor
  VectorLocation(void)
      : segment_id(0u), column_major(0u), reserved(0u), offset(0u) {}

  //! Constructor
  VectorLocation(uint32_t id, bool col, uint32_t off)
      : segment_id(id), column_major(col), reserved(0u), offset(off) {}

  uint32_t segment_id{0};
  uint16_t column_major : 1;
  uint16_t reserved : 15;
  uint32_t offset{0};

 public:
  bool operator==(const VectorLocation &other) const {
    return segment_id == other.segment_id &&
           column_major == other.column_major && offset == other.offset;
  }
};

// static_assert(sizeof(VectorLocation) == sizeof(uint64_t),
//               "VectorLocation must be size of 8 bytes");

struct KeyInfo {
  KeyInfo(void) : centroid_idx(0u) {}
  KeyInfo(uint32_t idx, const VectorLocation &loc)
      : centroid_idx(idx), location(loc) {}
  KeyInfo(VectorLocation loc) : location(loc) {}
  uint32_t centroid_idx{0};
  VectorLocation location;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flat_searcher.h"
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_searcher.h>
#include "flat_distance_matrix.h"
#include "flat_searcher_context.h"
#include "flat_searcher_provider.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

template <size_t BATCH_SIZE>
IndexProvider::Pointer FlatSearcher<BATCH_SIZE>::create_provider(void) const {
  std::lock_guard<std::mutex> lock(mapping_mutex_);

  if (mapping_.empty()) {
    auto mapping_segment = container_->get(FLAT_SEGMENT_MAPPING_SEG_ID);
    if (!mapping_segment) {
      LOG_ERROR("Failed to fetch segment %s",
                FLAT_SEGMENT_MAPPING_SEG_ID.c_str());
      return nullptr;
    }

    if (mapping_segment->data_size() % sizeof(uint32_t) != 0) {
      LOG_ERROR("Invalid data size %zu of mapping segment",
                mapping_segment->data_size());
      return nullptr;
    }

    size_t mapping_count = mapping_segment->data_size() / sizeof(uint32_t);
    if (mapping_count * meta_.element_size() !=
        features_segment_->data_size()) {
      LOG_ERROR("Invalid data size %zd of mapping segment",
                features_segment_->data_size());
      return nullptr;
    }

    const uint32_t *mapping = nullptr;
    if (mapping_segment->read(0, reinterpret_cast<const void **>(&mapping),
                              mapping_segment->data_size()) !=
        mapping_segment->data_size()) {
      LOG_ERROR("Failed to read data (%zu bytes) from mapping segment",
                mapping_segment->data_size());
      return nullptr;
    }
    mapping_.clear();
    mapping_.reserve(mapping_count);
    std::copy(mapping, mapping + mapping_count, std::back_inserter(mapping_));
  }
  return IndexProvider::Pointer(new (std::nothrow)
                                    FlatSearcherProvider<BATCH_SIZE>(this));
}

template <size_t BATCH_SIZE>
int FlatSearcher<BATCH_SIZE>::load(IndexStorage::Pointer cntr,
                                   IndexMetric::Pointer measure) {
  ailego::ElapsedTime stamp;
  if (!cntr) {
    return IndexError_InvalidArgument;
  }

  int error_code = IndexHelper::DeserializeFromStorage(cntr.get(), &meta_);
  if (error_code != 0) {
    LOG_ERROR(
        "Failed to deserialize index meta from container %s, error=%d, %s",
        cntr->name().c_str(), error_code, IndexError::What(error_code));
    return error_code;
  }

  if (!measure) {
    error_code = InitializeMetric(meta_, &measure_);
    if (error_code != 0) {
      LOG_ERROR("Failed to initialize index measure %s, error=%d, %s",
                meta_.metric_name().c_str(), error_code,
                IndexError::What(error_code));
      return error_code;
    }
    if (measure_->query_metric()) {
      measure_ = measure_->query_metric();
    }
  } else {
    if (!measure->is_matched(meta_)) {
      LOG_ERROR(
          "The index measure is unmatched with index meta from container.");
      return IndexError_Mismatch;
    }
    measure_ = std::move(measure);
  }

  column_major_order_ = (meta_.major_order() == IndexMeta::MO_COLUMN);
  distance_matrix_.initialize(*measure_);

  if (column_major_order_) {
    if (!distance_matrix_.is_valid()) {
      LOG_ERROR("Lack of distance functions to support column index.");
      return IndexError_Unsupported;
    }
  } else {
    if (!distance_matrix_.is_valid(1, 1)) {
      LOG_ERROR("Lack of distance functions to support row index.");
      return IndexError_Unsupported;
    }
  }

  auto keys_segment = cntr->get(FLAT_SEGMENT_KEYS_SEG_ID);
  if (!keys_segment) {
    LOG_ERROR("Failed to fetch segment %s", FLAT_SEGMENT_KEYS_SEG_ID.c_str());
    return IndexError_NoExist;
  }
  features_segment_ = cntr->get(FLAT_SEGMENT_FEATURES_SEG_ID);
  if (!features_segment_) {
    LOG_ERROR("Failed to fetch segment %s", FLAT_SEGMENT_KEYS_SEG_ID.c_str());
    return IndexError_NoExist;
  }

  if (keys_segment->data_size() % sizeof(uint64_t) != 0) {
    LOG_ERROR("Invalid data size %zu of keys segment",
              keys_segment->data_size());
    return IndexError_InvalidLength;
  }

  size_t keys_count = keys_segment->data_size() / sizeof(uint64_t);
  if (keys_count * meta_.element_size() != features_segment_->data_size()) {
    LOG_ERROR("Invalid data size %zd of features segment",
              features_segment_->data_size());
    return IndexError_Mismatch;
  }

  if (keys_segment->read(0, reinterpret_cast<const void **>(&keys_),
                         keys_segment->data_size()) !=
      keys_segment->data_size()) {
    LOG_ERROR("Failed to read data (%zu bytes) from keys segment",
              keys_segment->data_size());
    return IndexError_ReadData;
  }

  for (size_t i = 0; i < keys_count; i++) {
    key_id_mapping_[keys_[i]] = i;
  }

  container_ = cntr;
  magic_ = IndexContext::GenerateMagic();
  stats_.set_loaded_count(keys_count);
  stats_.set_loaded_costtime(stamp.milli_seconds());
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcher<BATCH_SIZE>::search_impl(const void *query,
                                          const IndexQueryMeta &qmeta,
                                          Context::Pointer &context) const {
  ailego_assert(query && !!context);
  ailego_assert(measure_->is_matched(meta_, qmeta));

  FlatSearcherContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force searcher context");
    return IndexError_InvalidArgument;
  }

  if (bf_context->magic() != magic_) {
    bf_context->reset(this);
  }
  if (bf_context->group_by_search()) {
    return bf_context->group_by_search_impl(query, qmeta, 1);
  } else {
    return (column_major_order_ ? bf_context->search_column(query, qmeta)
                                : bf_context->search_row(query, qmeta));
  }
}

template <size_t BATCH_SIZE>
int FlatSearcher<BATCH_SIZE>::search_impl(const void *query,
                                          const IndexQueryMeta &qmeta,
                                          uint32_t count,
                                          Context::Pointer &context) const {
  ailego_assert(query && count && !!context);
  ailego_assert(measure_->is_matched(meta_, qmeta));

  FlatSearcherContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force searcher context");
    return IndexError_InvalidArgument;
  }

  if (bf_context->magic() != magic_) {
    bf_context->reset(this);
  }

  if (bf_context->group_by_search()) {
    return bf_context->group_by_search_impl(query, qmeta, count);
  } else {
    return (column_major_order_ ? bf_context->search_column(query, qmeta, count)
                                : bf_context->search_row(query, qmeta, count));
  }
}

template <size_t BATCH_SIZE>
int FlatSearcher<BATCH_SIZE>::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  ailego_assert(query && count && !!context);
  ailego_assert(measure_->is_matched(meta_, qmeta));

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  FlatSearcherContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatSearcherContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force searcher context");
    return IndexError_InvalidArgument;
  }

  if (bf_context->magic() != magic_) {
    bf_context->reset(this);
  }

  return bf_context->search_bf_by_p_keys_impl(query, p_keys, qmeta, count);
}

template <size_t BATCH_SIZE>
IndexSearcher::Context::Pointer FlatSearcher<BATCH_SIZE>::create_context(
    void) const {
  return IndexSearcher::Context::Pointer(
      new FlatSearcherContext<BATCH_SIZE>(this));
}

INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(LinearSearcher, FlatSearcher<32>);
INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher, FlatSearcher<32>);
INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher16, FlatSearcher<16>);
INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(FlatSearcher32, FlatSearcher<32>);
}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_searcher.h>
#include "flat_distance_matrix.h"
#include "flat_index_format.h"

namespace zvec {
namespace core {

/*! Flat Searcher
 */
template <size_t BATCH_SIZE>
class FlatSearcher : public IndexSearcher {
 public:
  //! Destructor
  virtual ~FlatSearcher(void) = default;

  //! Initialize Searcher
  int init(const ailego::Params &index_params) override {
    params_ = index_params;
    read_block_size_ = FLAT_DEFAULT_READ_BLOCK_SIZE;
    index_params.get(PARAM_FLAT_READ_BLOCK_SIZE, &read_block_size_);
    return 0;
  }

  //! Cleanup Searcher
  int cleanup(void) override {
    return this->unload();
  }

  //! Load index from container
  int load(IndexStorage::Pointer cntr, IndexMetric::Pointer measure) override;

  //! Unload index
  int unload(void) override {
    container_ = nullptr;
    measure_ = nullptr;
    features_segment_ = nullptr;
    keys_ = nullptr;
    key_id_mapping_.clear();
    return 0;
  }

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const override {
    return this->search_impl(query, qmeta, context);
  }

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     uint32_t count, Context::Pointer &context) const override {
    return this->search_impl(query, qmeta, count, context);
  }

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const override;

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const void *query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               Context::Pointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const void *query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               Context::Pointer &context) const override;

  //! Retrieve statistics
  const IndexSearcher::Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Create a searcher context
  IndexSearcher::Context::Pointer create_context(void) const override;

  //! Create a searcher provider
  IndexProvider::Pointer create_provider(void) const override;

  //! Retrieve magic number
  uint32_t magic(void) const {
    return magic_;
  }

  //! Retrieve block size of data read
  uint32_t read_block_size(void) const {
    return read_block_size_;
  }

  //! Retrieve primary key via index id
  uint64_t key(size_t i) const {
    return keys_[i];
  }

  // Retrieve index id via primary key
  node_id_t get_id(key_t key) const {
    auto it = key_id_mapping_.find(key);
    if (it != key_id_mapping_.end()) {
      return it->second;
    } else {
      return kInvalidNodeId;
    }
  }

  //! Retrieve primary key via index id
  uint32_t local_index(size_t i) const {
    return mapping_[i];
  }

  //! Retrieve primary key via index id
  inline bool column_major_order(void) const {
    return column_major_order_;
  }

  //! Retrieve the distance matrix
  const FlatDistanceMatrix<BATCH_SIZE> &distance_matrix(void) const {
    return distance_matrix_;
  }

  //! Clone a features segment
  IndexStorage::Segment::Pointer clone_features_segment(void) const {
    return features_segment_->clone();
  }

  const void *get_vector(key_t key) const override {
    auto provider = this->create_provider();
    return provider->get_vector(key);
  }

 private:
  //! Members
  const uint64_t *keys_{nullptr};
  std::unordered_map<key_t, node_id_t> key_id_mapping_;
  uint32_t magic_{IndexContext::GenerateMagic()};
  uint32_t read_block_size_{FLAT_DEFAULT_READ_BLOCK_SIZE};
  bool column_major_order_{false};
  IndexMeta meta_{};
  IndexStorage::Pointer container_{};
  IndexMetric::Pointer measure_{};
  ailego::Params params_{};
  IndexStorage::Segment::Pointer features_segment_{};
  mutable std::vector<uint32_t> mapping_{};
  mutable std::mutex mapping_mutex_{};
  FlatDistanceMatrix<BATCH_SIZE> distance_matrix_{};
  IndexSearcher::Stats stats_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_searcher_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_document.h>
#include <zvec/core/framework/index_error.h>
#include "flat_index_format.h"
#include "flat_searcher.h"
#include "flat_utility.h"


namespace zvec {
namespace core {

/*! Brute Force Searcher Context
 */
template <size_t BATCH_SIZE>
class FlatSearcherContext : public IndexSearcher::Context {
 public:
  //! Constructor
  FlatSearcherContext(const FlatSearcher<BATCH_SIZE> *owner) {
    this->reset(owner);
  }

  //! Destructor
  virtual ~FlatSearcherContext(void) {}

  //! Set topk of search result
  void set_topk(uint32_t topk) override {
    topk_ = topk;
  }

  //! Retrieve search result
  const IndexDocumentList &result(void) const override {
    return result_heaps_.at(0);
  }

  //! Retrieve search result with index
  const IndexDocumentList &result(size_t index) const override {
    return result_heaps_.at(index);
  }

  //! Retrieve result object for output
  IndexDocumentList *mutable_result(size_t idx) override {
    return &result_heaps_.at(idx);
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(
      size_t idx) const override {
    return group_results_[idx];
  }

  //! Update the parameters of context
  int update(const ailego::Params & /*params*/) override {
    return 0;
  }

  //! Retrieve magic number
  uint32_t magic(void) const override {
    return magic_;
  }

  //! Get group topk
  inline uint32_t group_topk() const {
    return group_topk_;
  }

  //! Get group num
  inline uint32_t group_num() const {
    return group_num_;
  }

  inline std::map<std::string, TopkHeap> &group_topk_heaps() {
    return group_topk_heaps_;
  }

  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  bool fetch_vector() const override {
    return fetch_vector_;
  }

  inline void resize_group_results(size_t size) {
    if (group_by_search()) {
      group_results_.resize(size);
    }
  }

  void topk_to_group_result(uint32_t idx) {
    ailego_assert_with(idx < group_results_.size(), "invalid idx");

    group_results_[idx].clear();

    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;
    std::vector<std::pair<std::string, float>> best_score_in_groups;
    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();
         itr++) {
      const std::string &group_id = (*itr).first;
      auto &heap = (*itr).second;
      heap.sort();

      if (heap.size() > 0) {
        float best_score = heap[0].second;
        best_score_in_groups.push_back(std::make_pair(group_id, best_score));
      }
    }

    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
              [](const std::pair<std::string, float> &a,
                 const std::pair<std::string, float> &b) -> int {
                return a.second < b.second;
              });

    // truncate to group num
    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();
         ++i) {
      const std::string &group_id = best_score_in_groups[i].first;

      group_topk_list.emplace_back(
          std::make_pair(group_id, group_topk_heaps_[group_id]));
    }

    group_results_[idx].resize(group_topk_list.size());

    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {
      const std::string &group_id = group_topk_list[i].first;
      group_results_[idx][i].set_group_id(group_id);

      uint32_t size = std::min(
          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));

      for (uint32_t j = 0; j < size; ++j) {
        auto score = group_topk_list[i].second[j].second;
        if (score > this->threshold()) {
          break;
        }

        node_id_t id = group_topk_list[i].second[j].first;

        auto provider = owner_->create_provider();

        if (fetch_vector_) {
          group_results_[idx][i].mutable_docs()->emplace_back(
              id, score, id, provider->get_vector(id));
        } else {
          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id);
        }
      }
    }
  }

  //! Get if group by search
  bool group_by_search() {
    return group_num_ > 0;
  }

  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;
    group_topk_heaps_.clear();
  }

  void reset() override {}

  //! Reset the context
  void reset(const FlatSearcher<BATCH_SIZE> *owner) {
    magic_ = owner->magic();
    feature_size_ = owner->meta().element_size();

    uint32_t block_size = feature_size_ * BATCH_SIZE;
    actual_read_size_ =
        (owner->read_block_size() + block_size - 1) / block_size * block_size;
    features_segment_ = owner->clone_features_segment();
    owner_ = owner;
  }

  //! Similarity search
  int search_row(const void *query, const IndexQueryMeta &qmeta) {
    return (this->filter().is_valid()
                ? this->search_row_filter(query, qmeta)
                : this->search_row_nofilter(query, qmeta));
  }

  //! Similarity search
  int search_row(const void *query, const IndexQueryMeta &qmeta, size_t count) {
    return (this->filter().is_valid()
                ? this->batch_search_row_filter(query, qmeta, count)
                : this->batch_search_row_nofilter(query, qmeta, count));
  }

  //! Similarity search
  int search_column(const void *query, const IndexQueryMeta &qmeta) {
    return (this->filter().is_valid()
                ? this->search_column_filter(query, qmeta)
                : this->search_column_nofilter(query, qmeta));
  }

  //! Similarity search
  int search_column(const void *query, const IndexQueryMeta &qmeta,
                    size_t count) {
    return (this->filter().is_valid()
                ? this->batch_search_column_filter(query, qmeta, count)
                : this->batch_search_column_nofilter(query, qmeta, count));
  }

  int group_by_search_impl(const void *query, const IndexQueryMeta &qmeta,
                           uint32_t count);

  int search_bf_by_p_keys_impl(const void *query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count);

 protected:
  //! Enqueue items into the search heaps (without filter)
  template <size_t K>
  auto batch_enqueue_nofilter(const void *block, size_t block_index,
                              size_t query_index, const IndexQueryMeta &qmeta,
                              size_t query_count) ->
      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {
    size_t query_batch_count = query_count / K;

    for (size_t i = 0; i != query_batch_count; ++i) {
      owner_->distance_matrix().template distance<BATCH_SIZE, K>(
          block, &batch_queries_[query_index * qmeta.element_size()],
          qmeta.dimension(), scores_);

      for (size_t k = 0; k != K; ++k) {
        IndexDocumentHeap *heap = &result_heaps_[query_index++];
        for (size_t j = 0; j != BATCH_SIZE; ++j) {
          heap->emplace(0, scores_[k * BATCH_SIZE + j], block_index + j);
        }
      }  // end of for
    }  // end of for

    size_t query_left_count = query_count % K;
    if (query_left_count != 0) {
      this->batch_enqueue_nofilter<(K >> 1)>(block, block_index, query_index,
                                             qmeta, query_left_count);
    }
  }

  //! Enqueue items into the search heaps (without filter)
  template <size_t K>
  auto batch_enqueue_nofilter(const void *block, size_t block_index,
                              size_t query_index, const IndexQueryMeta &qmeta,
                              size_t query_count) ->
      typename std::enable_if<K == 1>::type {
    ailego_assert(query_count == 1);
    (void)query_count;

    owner_->distance_matrix().template distance<BATCH_SIZE, 1>(
        block, &batch_queries_[query_index * qmeta.element_size()],
        qmeta.dimension(), scores_);

    IndexDocumentHeap *heap = &result_heaps_[query_index];
    for (size_t i = 0; i != BATCH_SIZE; ++i) {
      heap->emplace(0, scores_[i], block_index + i);
    }
  }

  //! Enqueue items into the search heaps (with filter)
  template <size_t K>
  auto batch_enqueue_filter(const void *block, size_t block_index,
                            size_t block_mask, size_t query_index,
                            const IndexQueryMeta &qmeta, size_t query_count) ->
      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {
    size_t query_batch_count = query_count / K;

    for (size_t i = 0; i != query_batch_count; ++i) {
      owner_->distance_matrix().template distance<BATCH_SIZE, K>(
          block, &batch_queries_[query_index * qmeta.element_size()],
          qmeta.dimension(), scores_);

      for (size_t k = 0; k != K; ++k) {
        IndexDocumentHeap *heap = &result_heaps_[query_index++];
        for (size_t j = 0; j != BATCH_SIZE; ++j) {
          if ((block_mask & (1 << j)) != 0) {
            heap->emplace(0, scores_[k * BATCH_SIZE + j], block_index + j);
          }
        }
      }  // end of for
    }  // end of for

    size_t query_left_count = query_count % K;
    if (query_left_count != 0) {
      this->batch_enqueue_filter<(K >> 1)>(
          block, block_index, block_mask, query_index, qmeta, query_left_count);
    }
  }

  //! Enqueue items into the search heaps (with filter)
  template <size_t K>
  auto batch_enqueue_filter(const void *block, size_t block_index,
                            size_t block_mask, size_t query_index,
                            const IndexQueryMeta &qmeta, size_t query_count) ->
      typename std::enable_if<K == 1>::type {
    ailego_assert(query_count == 1);
    (void)query_count;

    owner_->distance_matrix().template distance<BATCH_SIZE, 1>(
        block, &batch_queries_[query_index * qmeta.element_size()],
        qmeta.dimension(), scores_);

    IndexDocumentHeap *heap = &result_heaps_[query_index];
    for (size_t i = 0; i != BATCH_SIZE; ++i) {
      if ((block_mask & (1 << i)) != 0) {
        heap->emplace(0, scores_[i], block_index + i);
      }
    }
  }

  //! Enqueue items into the search heaps (without filter)
  template <size_t K>
  auto single_enqueue_nofilter(const void *feature, size_t feature_index,
                               size_t query_index, const IndexQueryMeta &qmeta,
                               size_t query_count) ->
      typename std::enable_if<K != 1 && IsEqualPowerofTwo<K>::value>::type {
    size_t query_batch_count = query_count / K;

    for (size_t i = 0; i != query_batch_count; ++i) {
      owner_->distance_matrix().template distance<K, 1>(
          &batch_queries_[query_index * qmeta.element_size()], feature,
          qmeta.dimension(), scores_);

      for (size_t k = 0; k != K; ++k) {
        result_heaps_[query_index++].emplace(0, scores_[k], feature_index);
      }
    }
    size_t query_left_count = query_count % K;
    if (query_left_count != 0) {
      this->single_enqueue_nofilter<(K >> 1)>(
          feature, feature_index, query_index, qmeta, query_left_count);
    }
  }

  //! Enqueue items into the search heaps (without filter)
  template <size_t K>
  auto single_enqueue_nofilter(const void *feature, size_t feature_index,
                               size_t query_index, const IndexQueryMeta &qmeta,
                               size_t query_count) ->
      typename std::enable_if<K == 1>::type {
    ailego_assert(query_count == 1);
    (void)query_count;

    owner_->distance_matrix().template distance<1>(
        feature, &batch_queries_[query_index * qmeta.element_size()],
        qmeta.dimension(), scores_);
    result_heaps_[query_index].emplace(0, scores_[0], feature_index);
  }

 protected:
  //! Similarity search (1 column without filter)
  int search_column_nofilter(const void *query, const IndexQueryMeta &qmeta);

  //! Similarity search (1 column with filter)
  int search_column_filter(const void *query, const IndexQueryMeta &qmeta);

  //! Similarity search (1 row without filter)
  int search_row_nofilter(const void *query, const IndexQueryMeta &qmeta);

  //! Similarity search (1 row with filter)
  int search_row_filter(const void *query, const IndexQueryMeta &qmeta);

  //! Similarity search (n columns without filter)
  int batch_search_column_nofilter(const void *query,
                                   const IndexQueryMeta &qmeta,
                                   size_t query_count);

  //! Similarity search (n columns with filter)
  int batch_search_column_filter(const void *query, const IndexQueryMeta &qmeta,
                                 size_t query_count);

  //! Similarity search (n rows without filter)
  int batch_search_row_nofilter(const void *query, const IndexQueryMeta &qmeta,
                                size_t query_count);

  //! Similarity search (n rows with filter)
  int batch_search_row_filter(const void *query, const IndexQueryMeta &qmeta,
                              size_t query_count);

 private:
  const FlatSearcher<BATCH_SIZE> *owner_{nullptr};
  uint32_t magic_{0};
  uint32_t topk_{0};
  uint32_t feature_size_{0};
  uint32_t actual_read_size_{0};
  IndexStorage::Segment::Pointer features_segment_{};
  std::vector<IndexDocumentHeap> result_heaps_{1};
  std::string batch_queries_{};
  float scores_[BATCH_SIZE * BATCH_SIZE];
  bool fetch_vector_{false};

  // group
  uint32_t group_num_{0}, group_topk_{0};
  std::map<std::string, TopkHeap> group_topk_heaps_{};
  std::vector<IndexGroupDocumentList> group_results_{};
};

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::search_column_nofilter(
    const void *query, const IndexQueryMeta &qmeta) {
  IndexDocumentHeap *heap = &result_heaps_[0];
  heap->clear();
  heap->limit(topk_);
  heap->set_threshold(this->threshold());

  size_t left_size = features_segment_->data_size();
  size_t block_size = feature_size_ * BATCH_SIZE;
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = this->owner_->distance_matrix();

  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {
      matrix.template distance<BATCH_SIZE, 1>(
          (const char *)data + offset, query, qmeta.dimension(), scores_);

      for (size_t i = 0; i != BATCH_SIZE; ++i) {
        heap->emplace(0, scores_[i], feature_index++);
      }
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left block features
  size_t left_size_aligned = left_size / block_size * block_size;
  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {
    matrix.template distance<BATCH_SIZE, 1>((const char *)data + offset, query,
                                            qmeta.dimension(), scores_);

    for (size_t i = 0; i != BATCH_SIZE; ++i) {
      heap->emplace(0, scores_[i], feature_index++);
    }
  }

  // Process left single features
  for (size_t offset = left_size_aligned; offset < left_size;
       offset += feature_size_) {
    float score;
    matrix.template distance<1>((const char *)data + offset, query,
                                qmeta.dimension(), &score);
    heap->emplace(0, score, feature_index++);
  }

  for (auto &it : *heap) {
    it.set_key(owner_->key(it.index()));
  }
  heap->sort();
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::search_column_filter(
    const void *query, const IndexQueryMeta &qmeta) {
  IndexDocumentHeap *heap = &result_heaps_[0];
  heap->clear();
  heap->limit(topk_);
  heap->set_threshold(this->threshold());

  size_t left_size = features_segment_->data_size();
  size_t block_size = feature_size_ * BATCH_SIZE;
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = owner_->distance_matrix();

  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {
      matrix.template distance<BATCH_SIZE, 1>(
          (const char *)data + offset, query, qmeta.dimension(), scores_);

      for (size_t i = 0; i != BATCH_SIZE; ++i) {
        uint64_t feature_key = owner_->key(feature_index);

        if (!this->filter()(feature_key)) {
          if (group_by_search()) {
          }
          heap->emplace(feature_key, scores_[i], feature_index);
        }
        feature_index += 1;
      }
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left block features
  size_t left_size_aligned = left_size / block_size * block_size;
  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {
    matrix.template distance<BATCH_SIZE, 1>((const char *)data + offset, query,
                                            qmeta.dimension(), scores_);

    for (size_t i = 0; i != BATCH_SIZE; ++i) {
      uint64_t feature_key = owner_->key(feature_index);

      if (!this->filter()(feature_key)) {
        heap->emplace(feature_key, scores_[i], feature_index);
      }
      feature_index += 1;
    }
  }

  // Process left single features
  for (size_t offset = left_size_aligned; offset < left_size;
       offset += feature_size_) {
    uint64_t feature_key = owner_->key(feature_index);
    if (!this->filter()(feature_key)) {
      float score;
      matrix.template distance<1>((const char *)data + offset, query,
                                  qmeta.dimension(), &score);
      heap->emplace(feature_key, score, feature_index);
    }
    feature_index += 1;
  }
  heap->sort();
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::search_row_nofilter(
    const void *query, const IndexQueryMeta &qmeta) {
  IndexDocumentHeap *heap = &result_heaps_[0];
  heap->clear();
  heap->limit(topk_);
  heap->set_threshold(this->threshold());

  size_t left_size = features_segment_->data_size();
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = owner_->distance_matrix();

  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_;
         offset += feature_size_) {
      float score;
      matrix.template distance<1>((const char *)data + offset, query,
                                  qmeta.dimension(), &score);
      heap->emplace(0, score, feature_index++);
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  for (size_t offset = 0; offset < left_size; offset += feature_size_) {
    float score;
    matrix.template distance<1>((const char *)data + offset, query,
                                qmeta.dimension(), &score);
    heap->emplace(0, score, feature_index++);
  }
  for (auto &it : *heap) {
    it.set_key(owner_->key(it.index()));
  }
  heap->sort();
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::search_row_filter(
    const void *query, const IndexQueryMeta &qmeta) {
  IndexDocumentHeap *heap = &result_heaps_[0];
  heap->clear();
  heap->limit(topk_);
  heap->set_threshold(this->threshold());

  size_t left_size = features_segment_->data_size();
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = owner_->distance_matrix();

  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_;
         offset += feature_size_) {
      uint64_t feature_key = owner_->key(feature_index);
      if (!this->filter()(feature_key)) {
        float score;
        matrix.template distance<1>((const char *)data + offset, query,
                                    qmeta.dimension(), &score);
        heap->emplace(feature_key, score, feature_index);
      }
      feature_index += 1;
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  for (size_t offset = 0; offset < left_size; offset += feature_size_) {
    uint64_t feature_key = owner_->key(feature_index);
    if (!this->filter()(feature_key)) {
      float score;
      matrix.template distance<1>((const char *)data + offset, query,
                                  qmeta.dimension(), &score);
      heap->emplace(feature_key, score, feature_index);
    }
    feature_index += 1;
  }
  heap->sort();
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::batch_search_column_nofilter(
    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {
  // Initialize resources
  result_heaps_.resize(query_count);
  for (auto &heap : result_heaps_) {
    heap.clear();
    heap.limit(topk_);
    heap.set_threshold(this->threshold());
  }

  // Transpose queries
  batch_queries_.clear();
  batch_queries_.reserve(query_count * qmeta.element_size());
  TransposeQueries<BATCH_SIZE>(query, qmeta, query_count, &batch_queries_);

  size_t left_size = features_segment_->data_size();
  size_t block_size = feature_size_ * BATCH_SIZE;
  size_t read_offset = 0;
  size_t block_index = 0;

  // Process feature blocks
  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {
      this->batch_enqueue_nofilter<BATCH_SIZE>(
          (const char *)data + offset, block_index, 0, qmeta, query_count);
      block_index += BATCH_SIZE;
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left block features
  size_t left_size_aligned = left_size / block_size * block_size;
  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {
    this->batch_enqueue_nofilter<BATCH_SIZE>(
        (const char *)data + offset, block_index, 0, qmeta, query_count);
    block_index += BATCH_SIZE;
  }

  // Process left single features
  for (size_t offset = left_size_aligned; offset < left_size;
       offset += feature_size_) {
    this->single_enqueue_nofilter<BATCH_SIZE>(
        (const char *)data + offset, block_index, 0, qmeta, query_count);
    block_index += 1;
  }

  // Normalize results
  for (auto &heap : result_heaps_) {
    for (auto &it : heap) {
      it.set_key(owner_->key(it.index()));
    }
    heap.sort();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::batch_search_column_filter(
    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {
  // Initialize resources
  result_heaps_.resize(query_count);
  for (auto &heap : result_heaps_) {
    heap.clear();
    heap.limit(topk_);
    heap.set_threshold(this->threshold());
  }

  // Transpose queries
  batch_queries_.clear();
  batch_queries_.reserve(query_count * qmeta.element_size());
  TransposeQueries<BATCH_SIZE>(query, qmeta, query_count, &batch_queries_);

  size_t left_size = features_segment_->data_size();
  size_t block_size = feature_size_ * BATCH_SIZE;
  size_t read_offset = 0;
  size_t block_index = 0;

  // Process feature blocks
  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_; offset += block_size) {
      size_t block_mask = 0;
      for (size_t i = 0; i != BATCH_SIZE; ++i) {
        if (!this->filter()(this->owner_->key(block_index + i))) {
          block_mask |= (1 << i);
        }
      }
      if (block_mask != 0) {
        this->batch_enqueue_filter<BATCH_SIZE>((const char *)data + offset,
                                               block_index, block_mask, 0,
                                               qmeta, query_count);
      }
      block_index += BATCH_SIZE;
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left block features
  size_t left_size_aligned = left_size / block_size * block_size;
  for (size_t offset = 0; offset != left_size_aligned; offset += block_size) {
    size_t block_mask = 0;
    for (size_t i = 0; i != BATCH_SIZE; ++i) {
      if (!this->filter()(this->owner_->key(block_index + i))) {
        block_mask |= (1 << i);
      }
    }
    if (block_mask != 0) {
      this->batch_enqueue_filter<BATCH_SIZE>((const char *)data + offset,
                                             block_index, block_mask, 0, qmeta,
                                             query_count);
    }
    block_index += BATCH_SIZE;
  }

  // Process left single features
  for (size_t offset = left_size_aligned; offset < left_size;
       offset += feature_size_) {
    if (!this->filter()(owner_->key(block_index))) {
      this->single_enqueue_nofilter<BATCH_SIZE>(
          (const char *)data + offset, block_index, 0, qmeta, query_count);
    }
    block_index += 1;
  }

  // Normalize results
  for (auto &heap : result_heaps_) {
    for (auto &it : heap) {
      it.set_key(owner_->key(it.index()));
    }
    heap.sort();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::batch_search_row_nofilter(
    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {
  // Initialize resources
  result_heaps_.resize(query_count);
  for (auto &heap : result_heaps_) {
    heap.clear();
    heap.limit(topk_);
    heap.set_threshold(this->threshold());
  }

  size_t left_size = features_segment_->data_size();
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = owner_->distance_matrix();

  // Process feature blocks
  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_;
         offset += feature_size_) {
      size_t query_offset = 0;
      const void *feature = (const char *)data + offset;

      for (auto &heap : result_heaps_) {
        float score;
        matrix.template distance<1>(feature, (const char *)query + query_offset,
                                    qmeta.dimension(), &score);
        heap.emplace(0, score, feature_index);
        query_offset += qmeta.element_size();
      }
      feature_index += 1;
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left features
  for (size_t offset = 0; offset < left_size; offset += feature_size_) {
    size_t query_offset = 0;
    const void *feature = (const char *)data + offset;

    for (auto &heap : result_heaps_) {
      float score;
      matrix.template distance<1>(feature, (const char *)query + query_offset,
                                  qmeta.dimension(), &score);
      heap.emplace(0, score, feature_index);
      query_offset += qmeta.element_size();
    }
    feature_index += 1;
  }

  // Normalize results
  for (auto &heap : result_heaps_) {
    for (auto &it : heap) {
      it.set_key(owner_->key(it.index()));
    }
    heap.sort();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::batch_search_row_filter(
    const void *query, const IndexQueryMeta &qmeta, size_t query_count) {
  // Initialize resources
  result_heaps_.resize(query_count);
  for (auto &heap : result_heaps_) {
    heap.clear();
    heap.limit(topk_);
    heap.set_threshold(this->threshold());
  }

  size_t left_size = features_segment_->data_size();
  size_t read_offset = 0;
  size_t feature_index = 0;
  auto matrix = owner_->distance_matrix();

  // Process feature blocks
  while (left_size >= actual_read_size_) {
    const void *data = nullptr;
    if (features_segment_->read(read_offset, &data, actual_read_size_) !=
        actual_read_size_) {
      LOG_ERROR("Failed to read data (%u bytes) from features segment",
                actual_read_size_);
      return IndexError_ReadData;
    }

    for (size_t offset = 0; offset < actual_read_size_;
         offset += feature_size_) {
      uint64_t feature_key = owner_->key(feature_index);

      if (!this->filter()(feature_key)) {
        size_t query_offset = 0;
        const void *feature = (const char *)data + offset;

        for (auto &heap : result_heaps_) {
          float score;
          matrix.template distance<1>(feature,
                                      (const char *)query + query_offset,
                                      qmeta.dimension(), &score);
          heap.emplace(feature_key, score, feature_index);
          query_offset += qmeta.element_size();
        }
      }
      feature_index += 1;
    }
    read_offset += actual_read_size_;
    left_size -= actual_read_size_;
  }

  const void *data = nullptr;
  if (features_segment_->read(read_offset, &data, left_size) != left_size) {
    LOG_ERROR("Failed to read data (%zu bytes) from features segment",
              left_size);
    return IndexError_ReadData;
  }

  // Process left features
  for (size_t offset = 0; offset < left_size; offset += feature_size_) {
    uint64_t feature_key = owner_->key(feature_index);

    if (!this->filter()(feature_key)) {
      size_t query_offset = 0;
      const void *feature = (const char *)data + offset;

      for (auto &heap : result_heaps_) {
        float score;
        matrix.template distance<1>(feature, (const char *)query + query_offset,
                                    qmeta.dimension(), &score);
        heap.emplace(feature_key, score, feature_index);
        query_offset += qmeta.element_size();
      }
    }
    feature_index += 1;
  }

  // Normalize results
  for (auto &heap : result_heaps_) {
    heap.sort();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::group_by_search_impl(
    const void *query, const IndexQueryMeta &qmeta, uint32_t count) {
  this->resize_group_results(count);
  if (!this->group_by().is_valid()) {
    LOG_ERROR("Invalid group-by function");
    return IndexError_InvalidArgument;
  }

  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {
    return this->group_by()(key);
  };

  auto provider = owner_->create_provider();

  for (size_t q = 0; q < count; ++q) {
    this->group_topk_heaps().clear();

    for (node_id_t id = 0; id < provider->count(); ++id) {
      if (!this->filter().is_valid() || !this->filter()(owner_->key(id))) {
        dist_t dist = 0;
        owner_->distance_matrix().template distance<1>(
            query, provider->get_vector(owner_->key(id)), provider->dimension(),
            &dist);

        std::string group_id = group_by(owner_->key(id));
        auto &topk_heap = this->group_topk_heaps()[group_id];
        if (topk_heap.empty()) {
          topk_heap.limit(this->group_topk());
        }
        topk_heap.emplace(id, dist);
      }
    }
    this->topk_to_group_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatSearcherContext<BATCH_SIZE>::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count) {
  auto provider = owner_->create_provider();
  if (this->group_by_search()) {
    this->resize_group_results(count);
    if (!this->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {
      return this->group_by()(key);
    };

    for (size_t q = 0; q < count; ++q) {
      this->group_topk_heaps().clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!this->filter().is_valid() || !this->filter()(pk)) {
          dist_t dist = 0;
          owner_->distance_matrix().template distance<1>(
              query, provider->get_vector(pk), provider->dimension(), &dist);

          std::string group_id = group_by(pk);
          auto &topk_heap = this->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(this->group_topk());
          }
          topk_heap.emplace(owner_->get_id(pk), dist);
        }
      }
      this->topk_to_group_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    result_heaps_.resize(count);
    for (auto &heap : result_heaps_) {
      heap.clear();
      heap.limit(topk_);
      heap.set_threshold(this->threshold());
    }
    for (size_t q = 0; q < count; ++q) {
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!this->filter().is_valid() || !this->filter()(pk)) {
          dist_t dist = 0;
          owner_->distance_matrix().template distance<1>(
              query, provider->get_vector(pk), provider->dimension(), &dist);
          result_heaps_[q].emplace(pk, dist, owner_->get_id(pk));
        }
      }
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
    for (auto &heap : result_heaps_) {
      heap.sort();
    }
  }
  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_searcher_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include "flat_distance_matrix.h"
#include "flat_searcher.h"
// #include "flat_streamer.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Brute Force Searcher Provider
 */
template <size_t BATCH_SIZE>
class FlatSearcherProvider : public IndexProvider {
 public:
  //! Constructor
  FlatSearcherProvider(const FlatSearcher<BATCH_SIZE> *owner) {
    feature_size_ = owner->meta().element_size();
    features_segment_ = owner->clone_features_segment();
    total_vector_count_ =
        features_segment_->data_size() / owner->meta().element_size();
    owner_ = owner;
    block_buffer_.resize(BATCH_SIZE * feature_size_);
  }

  //! Create a new iterator
  IndexProvider::Iterator::Pointer create_iterator(void) override {
    return IndexProvider::Iterator::Pointer(
        new (std::nothrow) FlatSearcherProvider::Iterator(owner_));
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return total_vector_count_;
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const override {
    return owner_->meta().dimension();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return owner_->meta().data_type();
  }

  //! Retrieve vector size in bytes
  size_t element_size(void) const override {
    return owner_->meta().element_size();
  }

  //! Retrieve a vector using a primary key
  const void *get_vector(uint64_t key) const override {
    return this->get_vector_by_index(owner_->get_id(key));
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_->name();
  }

 protected:
  /*! Brute Force Provider Iterator
   */
  class Iterator : public IndexProvider::Iterator {
   public:
    //! Constructor
    Iterator(const FlatSearcher<BATCH_SIZE> *owner) {
      block_buffer_.resize(BATCH_SIZE * owner->meta().element_size());
      feature_size_ = owner->meta().element_size();
      features_segment_ = owner->clone_features_segment();
      total_vector_count_ =
          features_segment_->data_size() / owner->meta().element_size();
      owner_ = owner;
      cursor_index_ = 0;
      offset_ = 0;
      this->next_block();
    }

    //! Retrieve pointer of data
    //! NOTICE: the vec feature will be changed after iterating to next, so
    //! the caller need to keep a copy of it before iterator to next vector
    const void *data(void) const override {
      return data_;
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (!invalid_ && cursor_index_ < total_vector_count_);
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return owner_->key(cursor_index_);
    }

    //! Next iterator
    void next(void) override {
      ++cursor_index_;

      if ((cursor_index_ % BATCH_SIZE) != 0) {
        data_ += feature_size_;
      } else {
        this->next_block();
      }
    }

   protected:
    //! Read a block of data
    void next_block(void) {
      const void *read_data = nullptr;
      size_t read_size = 0;

      if (cursor_index_ >= total_vector_count_) {
        invalid_ = true;
        return;
      }

      if (cursor_index_ + BATCH_SIZE < total_vector_count_) {
        read_size = BATCH_SIZE * feature_size_;
      } else {
        read_size = (total_vector_count_ - cursor_index_) * feature_size_;
      }
      if (features_segment_->read(offset_, &read_data, read_size) !=
          read_size) {
        LOG_ERROR("Failed to read data (%zu bytes) from features segment",
                  read_size);
        invalid_ = true;
        return;
      }
      offset_ += read_size;

      // The order of data may be a column format, convert it to the row format.
      if (owner_->column_major_order() &&
          read_size == BATCH_SIZE * feature_size_) {
        uint32_t align_size =
            IndexMeta::AlignSizeof(owner_->meta().data_type());
        ReverseTranspose<BATCH_SIZE>(align_size, read_data,
                                     feature_size_ / align_size,
                                     &block_buffer_[0]);
        data_ = block_buffer_.data();
      } else {
        data_ = reinterpret_cast<const uint8_t *>(read_data);
      }
    }

   private:
    const FlatSearcher<BATCH_SIZE> *owner_{nullptr};
    IndexStorage::Segment::Pointer features_segment_{};
    uint32_t total_vector_count_{0};
    uint32_t feature_size_{0};
    std::vector<uint8_t> block_buffer_{};
    const uint8_t *data_{nullptr};
    uint64_t offset_{0};
    uint32_t cursor_index_{0};
    bool invalid_{false};
  };

  //! Retrieve a vector via local index
  const void *get_vector_by_index(uint32_t index) const {
    const void *read_data = nullptr;
    if (index == kInvalidNodeId) {
      LOG_ERROR("Failed to get vector by Invalid Id.");
      return nullptr;
    }

    if (owner_->column_major_order() &&
        index < (total_vector_count_ - (total_vector_count_ % BATCH_SIZE))) {
      uint32_t block_size = feature_size_ * BATCH_SIZE;
      uint64_t offset = (index - (index % BATCH_SIZE)) * feature_size_;

      if (features_segment_->read(offset, &read_data, block_size) !=
          block_size) {
        LOG_ERROR("Failed to read data (%u bytes) from features segment",
                  block_size);
        return nullptr;
      }

      uint32_t align_size = IndexMeta::AlignSizeof(owner_->meta().data_type());
      ReverseTranspose<BATCH_SIZE>(
          align_size, read_data, feature_size_ / align_size, &block_buffer_[0]);
      read_data = block_buffer_.data() + ((index % BATCH_SIZE) * feature_size_);

    } else {
      if (features_segment_->read(index * feature_size_, &read_data,
                                  feature_size_) != feature_size_) {
        LOG_ERROR("Failed to read data (%u bytes) from features segment",
                  feature_size_);
        return nullptr;
      }
    }
    return read_data;
  }

 private:
  //! Members
  const FlatSearcher<BATCH_SIZE> *owner_{nullptr};
  IndexStorage::Segment::Pointer features_segment_{};
  uint32_t feature_size_{0};
  uint32_t total_vector_count_{0};
  mutable std::vector<uint8_t> block_buffer_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_streamer.h"
#include <zvec/core/framework/index_factory.h>
#include "flat_streamer_context.h"
#include "flat_streamer_dumper.h"
#include "flat_streamer_provider.h"

namespace zvec {
namespace core {

#define WRITE_LOCK_GUARD(MUTEX, LOCK_NAME) \
  ailego::WriteLock write_lock(MUTEX);     \
  std::unique_lock<ailego::WriteLock> LOCK_NAME(write_lock);

#define READ_LOCK_GUARD_DEFER(MUTEX, LOCK_NAME) \
  ailego::ReadLock read_lock(MUTEX);            \
  std::unique_lock<ailego::ReadLock> LOCK_NAME(read_lock, std::defer_lock);

template <size_t BATCH_SIZE>
FlatStreamer<BATCH_SIZE>::FlatStreamer() : entity_(stats_) {}

template <size_t BATCH_SIZE>
FlatStreamer<BATCH_SIZE>::~FlatStreamer() {
  if (state_ == STATE_INITED) {
    this->cleanup();
  }
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::init(const IndexMeta &imeta,
                                   const ailego::Params &params) {
  meta_ = imeta;
  meta_.set_streamer("FlatStreamer", 0U, params);

  int error_code = InitializeMetric(meta_, &metric_);
  if (error_code != 0) {
    LOG_ERROR("Failed to initialize index metric %s, error=%d, %s",
              meta_.metric_name().c_str(), error_code,
              IndexError::What(error_code));
    return error_code;
  }
  if (metric_->query_metric()) {
    metric_ = metric_->query_metric();
  }

  // 参数设置
  if (params.get(PARAM_FLAT_COLUMN_MAJOR_ORDER, &column_major_order_)) {
    meta_.set_major_order(column_major_order_ ? IndexMeta::MO_COLUMN
                                              : IndexMeta::MO_ROW);
  }
  // Verify column major order
  if (meta_.major_order() != IndexMeta::MO_ROW) {
    IndexMeta::DataType ft = meta_.data_type();

    bool support_column_major = true;
    if ((ft != IndexMeta::DT_FP32 && ft != IndexMeta::DT_FP16 &&
         ft != IndexMeta::DT_INT8 && ft != IndexMeta::DT_INT4 &&
         ft != IndexMeta::DT_BINARY32 && ft != IndexMeta::DT_BINARY64) ||
        (meta_.unit_size() != IndexMeta::UnitSizeof(ft))) {
      if (meta_.major_order() == IndexMeta::MO_COLUMN) {
        LOG_ERROR("Unsupported type %d with unit size %u.", ft,
                  meta_.unit_size());
        return IndexError_Unsupported;
      } else {
        support_column_major = false;
      }
    }
    if (meta_.element_size() % IndexMeta::AlignSizeof(ft) != 0) {
      if (meta_.major_order() == IndexMeta::MO_COLUMN) {
        LOG_ERROR("Unsupported type %d with dimension %u.", ft,
                  meta_.dimension());
        return IndexError_Unsupported;
      } else {
        support_column_major = false;
      }
    }

    if (meta_.major_order() == IndexMeta::MO_UNDEFINED &&
        support_column_major) {
      meta_.set_major_order(IndexMeta::MO_ROW);
    }
  }

  if (!VerifyMetric(meta_)) {
    LOG_ERROR("Invalid index metric %s.", meta_.metric_name().c_str());
    return IndexError_InvalidArgument;
  }

  read_block_size_ = FLAT_DEFAULT_READ_BLOCK_SIZE;
  params.get(PARAM_FLAT_READ_BLOCK_SIZE, &read_block_size_);
  params.get(PARAM_FLAT_USE_ID_MAP, &use_key_info_map_);

  // entity init
  uint32_t block_vector_count = kDefaultBlockVecCount;
  uint32_t segment_size = kDefaultSegmentSize;
  bool filter_same_key = true;
  entity_.set_block_vector_count(block_vector_count);
  entity_.set_segment_size(segment_size);
  entity_.enable_filter_same_key(filter_same_key);
  entity_.set_linear_list_count(1);
  entity_.set_use_key_info_map(use_key_info_map_);
  *entity_.mutable_meta() = meta_;

  state_ = STATE_INITED;

  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::cleanup() {
  if (state_ == STATE_OPENED) {
    this->close();
  }

  LOG_DEBUG("FlatStreamer cleanup");
  state_ = STATE_INIT;
  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::open(IndexStorage::Pointer stg) {
  if (!stg) {
    LOG_ERROR("Failed to open for invalid storage");
    return IndexError_InvalidArgument;
  }
  if (ailego_unlikely(state_ != STATE_INITED)) {
    LOG_ERROR("Open storage failed, init streamer first!");
    return IndexError_NoReady;
  }

  LOG_DEBUG("FlatStreamer open with %s", stg->name().c_str());

  int ret = entity_.open(std::move(stg), meta_);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to open storage");
    return ret;
  }
  magic_ = IndexContext::GenerateMagic();

  state_ = STATE_OPENED;

  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::close(void) {
  LOG_DEBUG("FlatStreamer close");

  entity_.flush_linear_meta();

  stats_.clear();

  int ret = entity_.close();
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  state_ = STATE_INITED;
  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::flush(uint64_t checkpoint) {
  LOG_INFO("FlatStreamer flush with checkpoint %zu", (size_t)checkpoint);
  return entity_.flush(checkpoint);
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::dump(const IndexDumper::Pointer &dumper) {
  std::string searcher_name = "FlatSearcher";
  if (BATCH_SIZE == 16) {
    searcher_name = "FlatSearcher16";
  }
  meta_.set_searcher(searcher_name, 0U, ailego::Params());
  WRITE_LOCK_GUARD(dump_mutex_, dump_lock);
  std::shared_ptr<FlatStreamerDumper<BATCH_SIZE>> bf_dumper =
      std::make_shared<FlatStreamerDumper<BATCH_SIZE>>(this);
  int ret = bf_dumper->dump(dumper);
  *(stats_.mutable_dumped_size()) += bf_dumper->dump_size();
  return ret;
}

template <size_t BATCH_SIZE>
IndexStreamer::Context::UPointer FlatStreamer<BATCH_SIZE>::create_context(
    void) const {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to create Context, open storage first!");
    return Context::UPointer();
  }
  return IndexStreamer::Context::Pointer(
      new FlatStreamerContext<BATCH_SIZE>(this));
}

template <size_t BATCH_SIZE>
IndexProvider::Pointer FlatStreamer<BATCH_SIZE>::create_provider(void) const {
  return IndexProvider::Pointer(new (std::nothrow)
                                    FlatStreamerProvider<BATCH_SIZE>(this));
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::add_impl(uint64_t pkey, const void *query,
                                       const IndexQueryMeta &qmeta,
                                       Context::UPointer &context) {
  if (!query || qmeta.dimension() != meta_.dimension() ||
      qmeta.data_type() != meta_.data_type() ||
      qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR(
        "Failed to add for invalid arguments, query=%p, qmeta(type=%u "
        "dim=%u size=%u) vs meta(type=%u dim=%u size=%u)",
        query, qmeta.data_type(), qmeta.dimension(), qmeta.element_size(),
        meta_.data_type(), meta_.dimension(), meta_.element_size());
    (*stats_.mutable_discarded_count())++;
    return IndexError_InvalidArgument;
  }

  auto *ctx = dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!ctx) {
    LOG_ERROR("Failed to cast FlatStreamerContext");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Cast;
  }

  READ_LOCK_GUARD_DEFER(dump_mutex_, dump_lock);

  if (!dump_lock.try_lock()) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }

  // IndexQueryMeta iv_qmeta;
  // int ret = entity_.convert(query, qmeta, &query, &iv_qmeta);
  // if (ret != 0) {
  //   LOG_ERROR("Failed to convert record for %s",
  //             IndexError::What(ret));
  //   (*stats_.mutable_discarded_count())++;
  //   return ret;
  // }

  int ret = entity_.add(pkey, query, qmeta.element_size());
  if (ret != 0) {
    LOG_ERROR("Failed to add record for %s", IndexError::What(ret));
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::add_with_id_impl(uint32_t id, const void *query,
                                               const IndexQueryMeta &qmeta,
                                               Context::Pointer &context) {
  if (!query || qmeta.dimension() != meta_.dimension() ||
      qmeta.data_type() != meta_.data_type() ||
      qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR(
        "Failed to add for invalid arguments, query=%p, qmeta(type=%u "
        "dim=%u size=%u) vs meta(type=%u dim=%u size=%u)",
        query, qmeta.data_type(), qmeta.dimension(), qmeta.element_size(),
        meta_.data_type(), meta_.dimension(), meta_.element_size());
    (*stats_.mutable_discarded_count())++;
    return IndexError_InvalidArgument;
  }

  auto *ctx = dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!ctx) {
    LOG_ERROR("Failed to cast FlatStreamerContext");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Cast;
  }

  READ_LOCK_GUARD_DEFER(dump_mutex_, dump_lock);

  if (!dump_lock.try_lock()) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }

  int ret = entity_.add_vector_with_id(id, query, qmeta.element_size());
  if (ret != 0) {
    LOG_ERROR("Failed to add record for %s", IndexError::What(ret));
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::search_bf_impl(const void *query,
                                             const IndexQueryMeta &qmeta,
                                             uint32_t count,
                                             Context::Pointer &context) const {
  ailego_assert(query && count && !!context);
  ailego_assert(metric_->is_matched(meta_, qmeta));

  FlatStreamerContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force streamer context");
    return IndexError_InvalidArgument;
  }

  if (bf_context->magic() != magic_) {
    bf_context->reset(this);
  }

  if (bf_context->group_by_search()) {
    return group_by_search_impl(query, qmeta, count, context);
  }

  bf_context->reset_results(count);
  auto &filter = bf_context->filter();

  for (size_t q = 0; q < count; ++q) {
    auto *heap = bf_context->result_heap();
    auto *context_stats = bf_context->mutable_stats(q);
    uint32_t scan_count = 0;
    int ret = entity_.search(query, filter, &scan_count, heap, context_stats);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to search for %s", IndexError::What(ret));
      return ret;
    }
    heap->sort();
    bf_context->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  ailego_assert(query && count && !!context);
  ailego_assert(metric_->is_matched(meta_, qmeta));

  FlatStreamerContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force streamer context");
    return IndexError_InvalidArgument;
  }

  if (bf_context->magic() != magic_) {
    bf_context->reset(this);
  }

  if (bf_context->group_by_search()) {
    return group_by_search_p_keys_impl(query, p_keys, qmeta, count, context);
  }

  bf_context->reset_results(count);
  auto &filter = bf_context->filter();

  for (size_t q = 0; q < count; ++q) {
    auto *heap = bf_context->result_heap();
    for (node_id_t idx = 0; idx < p_keys[q].size(); ++idx) {
      uint64_t key = p_keys[q][idx];
      if (!filter.is_valid() || !filter(key)) {
        dist_t dist = 0;
        IndexStorage::MemoryBlock block;
        if (entity_.get_vector_by_key(key, block) != 0) continue;
        entity_.row_major_distance(query, block.data(), 1, &dist);
        heap->emplace(key, dist);
      }
    }
    heap->sort();
    bf_context->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::group_by_search_impl(
    const void *query, const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  FlatStreamerContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force streamer context");
    return IndexError_InvalidArgument;
  }

  bf_context->resize_group_results(count);
  if (!bf_context->group_by().is_valid()) {
    LOG_ERROR("Invalid group-by function");
    return IndexError_InvalidArgument;
  }

  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {
    return bf_context->group_by()(key);
  };

  auto iterator = entity_.creater_iterator();

  for (size_t q = 0; q < count; ++q) {
    bf_context->group_topk_heaps().clear();
    for (node_id_t id = 0; id < entity_.vector_count(); ++id) {
      uint64_t key = entity_.key(id);
      if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {
        dist_t dist = 0;
        IndexStorage::MemoryBlock block;
        if (entity_.get_vector_by_key(key, block) != 0) continue;
        entity_.row_major_distance(query, block.data(), 1, &dist);

        std::string group_id = group_by(key);
        auto &topk_heap = bf_context->group_topk_heaps()[group_id];
        if (topk_heap.empty()) {
          topk_heap.limit(bf_context->group_topk());
        }
        topk_heap.emplace(key, dist);
      }
    }
    bf_context->topk_to_group_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }
  return 0;
}

template <size_t BATCH_SIZE>
int FlatStreamer<BATCH_SIZE>::group_by_search_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  FlatStreamerContext<BATCH_SIZE> *bf_context =
      dynamic_cast<FlatStreamerContext<BATCH_SIZE> *>(context.get());
  if (!bf_context) {
    LOG_ERROR("Invalid brute-force streamer context");
    return IndexError_InvalidArgument;
  }

  bf_context->resize_group_results(count);
  if (!bf_context->group_by().is_valid()) {
    LOG_ERROR("Invalid group-by function");
    return IndexError_InvalidArgument;
  }

  std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {
    return bf_context->group_by()(key);
  };

  auto iterator = entity_.creater_iterator();

  for (size_t q = 0; q < count; ++q) {
    bf_context->group_topk_heaps().clear();
    for (node_id_t idx = 0; idx < p_keys[q].size(); ++idx) {
      uint64_t key = p_keys[q][idx];
      if (!bf_context->filter().is_valid() || !bf_context->filter()(key)) {
        dist_t dist = 0;
        IndexStorage::MemoryBlock block;
        if (entity_.get_vector_by_key(key, block) != 0) continue;
        entity_.row_major_distance(query, block.data(), 1, &dist);

        std::string group_id = group_by(key);
        auto &topk_heap = bf_context->group_topk_heaps()[group_id];
        if (topk_heap.empty()) {
          topk_heap.limit(bf_context->group_topk());
        }
        topk_heap.emplace(key, dist);
      }
    }
    bf_context->topk_to_group_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }
  return 0;
}

INDEX_FACTORY_REGISTER_STREAMER_ALIAS(LinearStreamer, FlatStreamer<32>);
INDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer, FlatStreamer<32>);
INDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer16, FlatStreamer<16>);
INDEX_FACTORY_REGISTER_STREAMER_ALIAS(FlatStreamer32, FlatStreamer<32>);
}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <ailego/parallel/lock.h>
#include <zvec/core/framework/index_streamer.h>
#include "flat_streamer_entity.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Flat Streamer
 */
template <size_t BATCH_SIZE>
class FlatStreamer : public IndexStreamer {
 public:
  using ContextPointer = IndexStreamer::Context::UPointer;

  FlatStreamer(void);
  virtual ~FlatStreamer(void);

  FlatStreamer(const FlatStreamer &streamer) = delete;
  FlatStreamer &operator=(const FlatStreamer &streamer) = delete;

 public:
  //! Initialize Streamer
  int init(const IndexMeta &, const ailego::Params &) override;

  //! Cleanup Streamer
  int cleanup(void) override;

  //! Create a context
  IndexStreamer::Context::UPointer create_context(void) const override;

  //! Create a new iterator
  IndexProvider::Pointer create_provider(void) const override;

  //! Add a vector into index
  int add_impl(uint64_t pkey, const void *query, const IndexQueryMeta &qmeta,
               Context::UPointer &context) override;

  int add_with_id_impl(uint32_t id, const void *query,
                       const IndexQueryMeta &qmeta,
                       Context::Pointer &context) override;

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  Context::UPointer &context) const override {
    return search_bf_impl(query, qmeta, 1, context);
  }

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::UPointer &context) const override {
    return search_bf_impl(query, qmeta, count, context);
  }

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     Context::UPointer &context) const override {
    return search_bf_impl(query, qmeta, 1, context);
  }

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     uint32_t count, Context::UPointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const void *query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               Context::UPointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const void *query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               Context::UPointer &context) const override;

  int group_by_search_impl(const void *query, const IndexQueryMeta &qmeta,
                           uint32_t count, Context::UPointer &context) const;

  int group_by_search_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, uint32_t count,
      Context::Pointer &context) const;

  //! Open index from file path
  int open(IndexStorage::Pointer stg) override;

  //! Close file
  int close(void) override;

  //! flush file
  int flush(uint64_t checkpoint) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  const FlatStreamerEntity &entity(void) const {
    return entity_;
  }

  virtual const void *get_vector(uint64_t key) const override {
    return this->get_vector_by_key(key);
  }

  virtual int get_vector(const uint64_t key,
                         IndexStorage::MemoryBlock &block) const override {
    return this->get_vector_by_key(key, block);
  }

  const void *get_vector_by_key(uint64_t key) const {
    return entity_.get_vector_by_key(key);
  }

  int get_vector_by_key(const uint64_t key,
                        IndexStorage::MemoryBlock &block) const override {
    return entity_.get_vector_by_key(key, block);
  }
  const void *get_vector_by_id(uint32_t id) const override {
    return get_vector_by_key(id);
  }

  int get_vector_by_id(const uint32_t id,
                       IndexStorage::MemoryBlock &block) const override {
    return get_vector_by_key(id, block);
  }

  uint32_t magic(void) const {
    return magic_;
  }

  //! Retrieve block size of data read
  uint32_t read_block_size(void) const {
    return read_block_size_;
  }

 private:
  //! Constants
  static constexpr uint32_t kDefaultBlockVecCount = 32u;
  static constexpr uint32_t kDefaultSegmentSize = 4 * 1024 * 1024u;
  static constexpr float kDefaultDocsSoftLimitRatio = 0.9f;

  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };

  //! Members
  uint32_t magic_{0};
  uint32_t docs_hard_limit_{std::numeric_limits<uint32_t>::max()};
  uint32_t docs_soft_limit_{0};
  IndexMeta meta_{};
  std::vector<std::vector<std::string>> data_;
  IndexStreamer::Stats stats_{};
  IndexMetric::Pointer metric_{};
  State state_{STATE_INIT};
  mutable std::mutex mapping_mutex_{};
  ailego::SharedMutex dump_mutex_{};
  FlatStreamerEntity entity_;
  bool column_major_order_{false};
  bool use_key_info_map_{true};
  uint32_t read_block_size_{0};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "flat_streamer.h"

namespace zvec {
namespace core {

/*! Brute Force Streamer Context
 */
template <size_t BATCH_SIZE>
class FlatStreamerContext : public IndexStreamer::Context {
 public:
  //! Constructor
  FlatStreamerContext(const FlatStreamer<BATCH_SIZE> *owner) {
    this->reset(owner);
  }

  //! Destructor
  virtual ~FlatStreamerContext(void) = default;

  //! Set topk of search result
  void set_topk(uint32_t topk) override {
    topk_ = topk;
    result_heap_.limit(topk);
  }

  //! Retrieve search result
  const IndexDocumentList &result(void) const override {
    return results_[0];
  }

  //! Retrieve search result with index
  const IndexDocumentList &result(size_t idx) const override {
    return results_[idx];
  }

  //! Retrieve result object for output
  IndexDocumentList *mutable_result(size_t idx) override {
    ailego_assert_with(idx < results_.size(), "invalid idx");
    return &results_[idx];
  }

  inline IndexDocumentHeap *result_heap() {
    return &result_heap_;
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }
  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(
      size_t idx) const override {
    return group_results_[idx];
  }

  //! Update the parameters of context
  int update(const ailego::Params & /*params*/) override {
    return 0;
  }

  //! Retrieve magic number
  uint32_t magic(void) const override {
    return magic_;
  }

  //! Get group topk
  inline uint32_t group_topk() const {
    return group_topk_;
  }
  //! Get group num
  inline uint32_t group_num() const {
    return group_num_;
  }
  inline std::map<std::string, TopkHeap> &group_topk_heaps() {
    return group_topk_heaps_;
  }
  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }
  bool fetch_vector() const override {
    return fetch_vector_;
  }
  inline void resize_group_results(size_t size) {
    if (group_by_search()) {
      group_results_.resize(size);
    }
  }

  void topk_to_result(uint32_t idx) {
    if (ailego_unlikely(result_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));
    result_heap_.sort();
    results_[idx].clear();
    for (int i = 0; i < size; ++i) {
      auto score = result_heap_[i].score();
      if (score > this->threshold()) {
        break;
      }

      key_t key = result_heap_[i].key();
      if (fetch_vector_) {
        IndexStorage::MemoryBlock block;
        owner_->entity().get_vector_by_key(key, block);
        results_[idx].emplace_back(key, score, key, block);
      } else {
        results_[idx].emplace_back(key, score, key);
      }
    }
  }

  void topk_to_group_result(uint32_t idx) {
    ailego_assert_with(idx < group_results_.size(), "invalid idx");
    group_results_[idx].clear();
    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;
    std::vector<std::pair<std::string, float>> best_score_in_groups;
    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();
         itr++) {
      const std::string &group_id = (*itr).first;
      auto &heap = (*itr).second;
      heap.sort();
      if (heap.size() > 0) {
        float best_score = heap[0].second;
        best_score_in_groups.push_back(std::make_pair(group_id, best_score));
      }
    }
    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
              [](const std::pair<std::string, float> &a,
                 const std::pair<std::string, float> &b) -> int {
                return a.second < b.second;
              });
    // truncate to group num
    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();
         ++i) {
      const std::string &group_id = best_score_in_groups[i].first;
      group_topk_list.emplace_back(
          std::make_pair(group_id, group_topk_heaps_[group_id]));
    }
    group_results_[idx].resize(group_topk_list.size());
    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {
      const std::string &group_id = group_topk_list[i].first;
      group_results_[idx][i].set_group_id(group_id);
      uint32_t size = std::min(
          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));
      for (uint32_t j = 0; j < size; ++j) {
        auto score = group_topk_list[i].second[j].second;
        if (score > this->threshold()) {
          break;
        }
        node_id_t id = group_topk_list[i].second[j].first;
        auto provider = owner_->create_provider();
        if (fetch_vector_) {
          IndexStorage::MemoryBlock block;
          provider->get_vector(id, block);
          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id,
                                                              block);
        } else {
          group_results_[idx][i].mutable_docs()->emplace_back(id, score, id);
        }
      }
    }
  }

  //! Get if group by search
  bool group_by_search() {
    return group_num_ > 0;
  }
  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;
    group_topk_heaps_.clear();
  }

  void reset() override {
    for (auto &it : results_) {
      it.clear();
    }
    for (auto &it : group_results_) {
      it.clear();
    }
  }

  //! Reset the context
  void reset(const FlatStreamer<BATCH_SIZE> *owner) {
    this->reset();
    magic_ = owner->magic();
    feature_size_ = owner->meta().element_size();

    uint32_t block_size = feature_size_ * BATCH_SIZE;
    actual_read_size_ =
        (owner->read_block_size() + block_size - 1) / block_size * block_size;
    owner_ = owner;
  }

  //! Reset all the query results
  void reset_results(size_t qnum) {
    results_.resize(qnum);
    stats_vec_.resize(qnum);
    for (size_t i = 0; i < qnum; ++i) {
      results_[i].clear();
      stats_vec_[i].clear();
    }
    result_heap_.clear();
    result_heap_.limit(topk_);
    result_heap_.set_threshold(this->threshold());
  }

  Stats *mutable_stats(size_t idx = 0) {
    ailego_assert_with(stats_vec_.size() > idx, "invalid index");
    return &stats_vec_[idx];
  }

 private:
  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};
  std::vector<Stats> stats_vec_{};
  uint32_t magic_{0};
  uint32_t topk_{0};
  uint32_t feature_size_{0};
  uint32_t actual_read_size_{0};
  IndexDocumentHeap result_heap_;
  std::vector<IndexDocumentList> results_{};
  std::string batch_queries_{};
  float scores_[BATCH_SIZE * BATCH_SIZE];
  bool fetch_vector_{false};
  // group
  uint32_t group_num_{0};
  uint32_t group_topk_{0};
  std::map<std::string, TopkHeap> group_topk_heaps_{};
  std::vector<IndexGroupDocumentList> group_results_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer_dumper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "flat_streamer.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

template <size_t BATCH_SIZE>
class FlatStreamerDumper {
 public:
  typedef std::unique_ptr<FlatStreamerDumper> Pointer;

  FlatStreamerDumper(const FlatStreamer<BATCH_SIZE> *owner) {
    owner_ = owner;
    dump_size_ = 0;
  }

  int dump(const IndexDumper::Pointer &dumper) {
    ailego::ElapsedTime stamp;

    std::vector<uint64_t> keys;
    if (owner_->meta().major_order() == IndexMeta::MO_COLUMN) {
      int error_code = this->write_column_index(dumper.get(), &keys);
      if (error_code != 0) {
        return error_code;
      }
    } else {
      int error_code = this->write_row_index(dumper.get(), &keys);
      if (error_code != 0) {
        return error_code;
      }
    }

    int error_code = this->write_keys(keys, dumper.get());
    if (error_code != 0) {
      return error_code;
    }

    error_code = this->write_mapping(keys, dumper.get());
    if (error_code != 0) {
      return error_code;
    }

    error_code = IndexHelper::SerializeToDumper(owner_->meta(), dumper.get());
    if (error_code != 0) {
      return error_code;
    }
    LOG_DEBUG("dumped_count: %zu, costtime: %zu", keys.size(),
              (size_t)stamp.milli_seconds());
    return 0;
  }

  size_t dump_size() {
    return dump_size_;
  }

 private:
  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {
    switch (IndexMeta::AlignSizeof(owner_->meta().data_type())) {
      case 2:
        return this->write_column_index<uint16_t>(dumper, keys);
      case 4:
        return this->write_column_index<uint32_t>(dumper, keys);
      case 8:
        return this->write_column_index<uint64_t>(dumper, keys);
      default:
        ailego_check_with(0, "BAD CASE");
    }
    return IndexError_Runtime;
  }

  template <typename T>
  int write_column_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {
    auto iter = owner_->entity().creater_iterator();
    if (!iter) {
      LOG_ERROR("Failed to create iterator");
      return IndexError_Runtime;
    }

    // Write features
    size_t element_size = owner_->meta().element_size();
    size_t block_size = element_size * BATCH_SIZE;
    std::string block1, block2;
    block1.reserve(block_size);
    block2.reserve(block_size);

    for (; iter->is_valid(); iter->next()) {
      block1.append(reinterpret_cast<const char *>(iter->data()), element_size);
      keys->emplace_back(iter->key());

      if (block1.size() == block_size) {
        ailego::MatrixHelper::Transpose<T, BATCH_SIZE>(
            block1.data(), element_size / sizeof(T), (void *)block2.data());

        if (dumper->write(block2.data(), block_size) != block_size) {
          LOG_ERROR("Failed to write data into dumper %s",
                    dumper->name().c_str());
          return IndexError_WriteData;
        }
        block1.clear();
        dump_size_ += block_size;
      }
    }

    if (!block1.empty()) {
      if (dumper->write(block1.data(), block1.size()) != block1.size()) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += block1.size();
    }

    // Write the padding if need
    size_t features_size = keys->size() * element_size;
    size_t features_padding_size =
        ailego_align(features_size, 32) - features_size;
    if (features_padding_size) {
      std::string padding(features_padding_size, '\0');

      if (dumper->write(padding.data(), padding.size()) != padding.size()) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += padding.size();
    }
    return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,
                          features_padding_size, 0);
  }

  int write_row_index(IndexDumper *dumper, std::vector<uint64_t> *keys) {
    auto iter = owner_->entity().creater_iterator();
    if (!iter) {
      LOG_ERROR("Failed to create iterator");
      return IndexError_Runtime;
    }

    // Write features
    size_t element_size = owner_->meta().element_size();
    for (; iter->is_valid(); iter->next()) {
      if (dumper->write(iter->data(), element_size) != element_size) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += element_size;
      keys->emplace_back(iter->key());
    }

    // Write the padding if need
    size_t features_size = keys->size() * element_size;
    size_t features_padding_size =
        ailego_align(features_size, 32) - features_size;
    if (features_padding_size) {
      std::string padding(features_padding_size, '\0');

      if (dumper->write(padding.data(), padding.size()) != padding.size()) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += padding.size();
    }
    return dumper->append(FLAT_SEGMENT_FEATURES_SEG_ID, features_size,
                          features_padding_size, 0);
  }

  int write_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper) {
    size_t keys_size = keys.size() * sizeof(uint64_t);
    size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;
    if (dumper->write(keys.data(), keys_size) != keys_size) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
    dump_size_ += keys_size;

    // Write the padding if need
    if (keys_padding_size) {
      std::string padding(keys_padding_size, '\0');
      if (dumper->write(padding.data(), padding.size()) != padding.size()) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += padding.size();
    }
    return dumper->append(FLAT_SEGMENT_KEYS_SEG_ID, keys_size,
                          keys_padding_size, 0);
  }

  int write_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper) {
    std::vector<uint32_t> mapping(keys.size());
    std::iota(mapping.begin(), mapping.end(), 0);
    std::sort(mapping.begin(), mapping.end(),
              [&keys](uint32_t lhs, uint32_t rhs) {
                return (keys[lhs] < keys[rhs]);
              });

    size_t mapping_size = mapping.size() * sizeof(uint32_t);
    size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;
    if (dumper->write(mapping.data(), mapping_size) != mapping_size) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
    dump_size_ += mapping_size;

    // Write the padding if need
    if (mapping_padding_size) {
      std::string padding(mapping_padding_size, '\0');
      if (dumper->write(padding.data(), padding.size()) != padding.size()) {
        LOG_ERROR("Failed to write data into dumper %s",
                  dumper->name().c_str());
        return IndexError_WriteData;
      }
      dump_size_ += padding.size();
    }
    return dumper->append(FLAT_SEGMENT_MAPPING_SEG_ID, mapping_size,
                          mapping_padding_size, 0);
  }

 private:
  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};
  size_t dump_size_{0};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_streamer_entity.h"
#include <cstdint>
#include <zvec/core/framework/index_error.h>
#include "flat_utility.h"

namespace zvec {
namespace core {

FlatStreamerEntity::FlatStreamerEntity(IndexStreamer::Stats &stats)
    : stats_(stats) {}

int FlatStreamerEntity::open(IndexStorage::Pointer storage,
                             const IndexMeta & /*mt*/) {
  if (storage_) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }
  // segments_[0] store the meta information of the linear list
  ailego_assert_with(segments_.size() == 0, "Invalid Size");

  key_info_map_lock_ = std::make_shared<ailego::SharedMutex>();
  key_info_map_.clear();
  id_key_vector_.clear();
  withid_key_info_map_.clear();
  withid_key_map_.clear();

  vec_unit_size_ = IndexMeta::AlignSizeof(index_meta_.data_type());
  vec_cols_ = index_meta_.element_size() / vec_unit_size_;
  meta_.header.block_size =
      ailego_align(sizeof(BlockHeader) + sizeof(DeletionMap) +
                       (index_meta_.element_size() + sizeof(uint64_t)) *
                           meta_.header.block_vector_count,
                   32);

  if (storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID) ||
      storage->get(FLAT_LINEAR_META_SEG_ID)) {
    int ret = this->load_storage(storage);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to load storage index");
      return ret;
    }
  } else {
    int ret = this->init_storage(storage);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to init storage");
      return ret;
    }
  }

  storage_ = storage;

  //! Create the distance calculator
  auto metric = IndexFactory::CreateMetric(index_meta_.metric_name());
  if (!metric) {
    LOG_ERROR("Failed to create metric %s", index_meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = metric->init(index_meta_, index_meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failed to initialize metric %s",
              index_meta_.metric_name().c_str());
    return ret;
  }
  row_distance_ = metric->distance();
  column_distance_ =
      metric->distance_matrix(meta_.header.block_vector_count, 1);

  LOG_DEBUG("Open storage %s done, metric=%s", storage_->name().c_str(),
            index_meta_.metric_name().c_str());

  return 0;
}

int FlatStreamerEntity::close(void) {
  segments_.clear();
  storage_.reset();
  key_info_map_lock_.reset();
  key_info_map_.clear();
  withid_key_info_map_.clear();
  withid_key_map_.clear();
  id_key_vector_.clear();
  meta_.create_time = 0;
  meta_.update_time = 0;
  meta_.segment_count = 0;
  meta_.header.total_vector_count = 0;
  meta_.header.block_count = 0;
  meta_.header.block_size = 0;
  meta_.header.linear_body_size = 0;

  return 0;
}

int FlatStreamerEntity::flush_linear_meta(void) {
  if (!storage_) {
    return 0;
  }

  meta_.update_time = ailego::Realtime::Seconds();
  meta_.revision_id = stats_.revision_id();
  stats_.set_update_time(meta_.update_time);
  auto segment = storage_->get(FLAT_LINEAR_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get segment %s", FLAT_LINEAR_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("Failed to write segment %s", FLAT_LINEAR_META_SEG_ID.c_str());
    return IndexError_WriteData;
  }

  return 0;
}

int FlatStreamerEntity::flush(uint64_t checkpoint) {
  int ret = this->flush_linear_meta();
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (checkpoint != 0) {
    storage_->refresh(checkpoint);
  }
  ret = storage_->flush();
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to refresh storage for %s", IndexError::What(ret));
    return ret;
  }
  if (checkpoint != 0) {
    stats_.set_check_point(checkpoint);
  }

  return 0;
}

int FlatStreamerEntity::add(uint64_t key, const void *vec, size_t size) {
  std::lock_guard<std::mutex> lock(mutex_);
  if (filter_same_key_) {
    key_info_map_lock_->lock_shared();
    if (key_info_map_.find(key) != key_info_map_.end()) {
      key_info_map_lock_->unlock_shared();
      LOG_WARN("Try to add duplicate key, drop it");
      return IndexError_Duplicate;
    }
    key_info_map_lock_->unlock_shared();
  }
  if (size != static_cast<size_t>(index_meta_.element_size())) {
    LOG_ERROR("Failed to add, mismatch size %zu vs elemsize %u", size,
              index_meta_.element_size());
    return IndexError_Mismatch;
  }

  IndexStorage::MemoryBlock head_block;
  this->get_head_block(head_block);
  const BlockLocation *bl =
      reinterpret_cast<const BlockLocation *>(head_block.data());
  if (ailego_unlikely(bl == nullptr)) {
    LOG_ERROR("Failed to get block loc");
    return IndexError_ReadData;
  }
  BlockLocation block = *bl;

  if (!this->is_valid_block(block)) {
    int ret = this->alloc_block(block, &block);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
    ret = this->update_head_block(block);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
  }

  int ret = this->add_to_block(block, key, vec, size);
  if (ret == IndexError_IndexFull) {
    ret = this->alloc_block(block, &block);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
    ret = this->update_head_block(block);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
    ret = this->add_to_block(block, key, vec, size);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
  }
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  (*stats_.mutable_added_count())++;
  stats_.set_revision_id(meta_.revision_id + 1);

  return 0;
}

int FlatStreamerEntity::search(const void *query, const IndexFilter &filter,
                               uint32_t *scan_count, IndexDocumentHeap *heap,
                               IndexContext::Stats *context_stats) const {
  IndexStorage::MemoryBlock head_block;
  this->get_head_block(head_block);
  const BlockLocation *bl =
      reinterpret_cast<const BlockLocation *>(head_block.data());
  if (ailego_unlikely(bl == nullptr)) {
    LOG_ERROR("Failed to get block loc");
    return IndexError_ReadData;
  }

  BlockLocation block = *bl;

  while (this->is_valid_block(block)) {
    IndexStorage::MemoryBlock block_header_block;
    this->get_block_header(block, block_header_block);
    const BlockHeader *hd =
        reinterpret_cast<const BlockHeader *>(block_header_block.data());
    if (ailego_unlikely(hd == nullptr)) {
      LOG_ERROR("Failed to get block header");
      return IndexError_ReadData;
    }

    if (hd->vector_count > 0) {
      *scan_count += hd->vector_count;
      IndexStorage::MemoryBlock deletion_map_block;
      this->get_block_deletion_map(block, deletion_map_block);
      const DeletionMap *deletion_map =
          reinterpret_cast<const DeletionMap *>(deletion_map_block.data());
      if (filter.is_valid() || deletion_map->is_dirty()) {
        this->search_block(query, block, hd, 1.0, filter, deletion_map, heap,
                           context_stats);
      } else {
        *(context_stats->mutable_dist_calced_count()) += hd->vector_count;
        this->search_block(query, block, hd, 1.0, heap);
      }
    }
    block = hd->next;
  }
  return 0;
}

//! Search in a block
void FlatStreamerEntity::search_block(const void *query,
                                      const BlockLocation &bl,
                                      const BlockHeader *hd, float norm_val,
                                      IndexDocumentHeap *heap) const {
  std::vector<float> distances(block_vector_count());
  IndexStorage::MemoryBlock vecs_block;
  this->get_block_vectors(bl, vecs_block);
  const char *vecs = reinterpret_cast<const char *>(vecs_block.data());
  IndexStorage::MemoryBlock keys_block;
  this->get_block_keys(bl, keys_block);
  const uint64_t *keys = reinterpret_cast<const uint64_t *>(keys_block.data());
  row_major_distance(query, vecs, hd->vector_count, distances.data());
  for (size_t k = 0; k < hd->vector_count; ++k) {
    if (keys[k] != kInvalidKey) {
      heap->emplace(keys[k], distances[k] * norm_val);
    }
  }
}

//! Search in a block with filter
void FlatStreamerEntity::search_block(
    const void *query, const BlockLocation &bl, const BlockHeader *hd,
    float norm_val, const IndexFilter &filter, const DeletionMap *deletion_map,
    IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const {
  std::vector<float> distances(block_vector_count());

  IndexStorage::MemoryBlock vecs_block;
  this->get_block_vectors(bl, vecs_block);
  const char *vecs = reinterpret_cast<const char *>(vecs_block.data());
  IndexStorage::MemoryBlock keys_block;
  this->get_block_keys(bl, keys_block);
  const uint64_t *keys = reinterpret_cast<const uint64_t *>(keys_block.data());

  DeletionMap keeps;
  for (size_t k = 0; k < hd->vector_count; ++k) {
    const bool condition1 = !deletion_map->test(k);
    const bool condition2 = filter.is_valid() ? !filter(keys[k]) : true;
    const bool condition3 = keys[k] != kInvalidKey;
    if (condition1 && condition2 && condition3) {
      keeps.set(k);
    }
  }
  if (!keeps.is_dirty()) {
    (*context_stats->mutable_filtered_count()) += hd->vector_count;
    return;
  }
  for (size_t k = 0; k < hd->vector_count; ++k) {
    if (keeps.test(k)) {
      auto cur_vec = vecs + index_meta_.element_size() * k;
      row_major_distance(query, cur_vec, 1, distances.data() + k);
      ++(*context_stats->mutable_dist_calced_count());
    }
  }
  for (size_t k = 0; k < hd->vector_count; ++k) {
    if (keeps.test(k)) {
      heap->emplace(keys[k], distances[k] * norm_val);
    } else {
      ++(*context_stats->mutable_filtered_count());
    }
  }
}

int FlatStreamerEntity::search_bf(const void *query, const IndexFilter &filter,
                                  IndexDocumentHeap *heap,
                                  IndexContext::Stats *context_stats) const {
  uint32_t scan_count;
  return this->search(query, filter, &scan_count, heap, context_stats);
}

FlatStreamerEntity::Pointer FlatStreamerEntity::clone(void) const {
  std::vector<IndexStorage::Segment::Pointer> segments;
  segments.reserve(segments_.size());
  for (size_t i = 0; i < segments_.size(); ++i) {
    segments.emplace_back(segments_[i]->clone());
    if (!segments[i]) {
      LOG_ERROR("Failed to clone segment, index=%zu", i);
      return nullptr;
    }
  }
  auto entity = new (std::nothrow) FlatStreamerEntity(stats_);
  if (!entity) {
    LOG_ERROR("Failed to New FlatStreamerEntity object");
    return nullptr;
  }
  entity->index_meta_ = this->index_meta_;
  entity->storage_ = this->storage_;
  // entity->reformer_ = this->reformer_;
  entity->segments_ = segments;
  entity->meta_ = this->meta_;
  entity->key_info_map_lock_ = this->key_info_map_lock_;
  entity->key_info_map_ = this->key_info_map_;
  entity->id_key_vector_ = this->id_key_vector_;
  entity->withid_key_info_map_ = this->withid_key_info_map_;
  entity->withid_key_map_ = this->withid_key_map_;
  entity->filter_same_key_ = this->filter_same_key_;
  entity->vec_unit_size_ = this->vec_unit_size_;
  entity->vec_cols_ = this->vec_cols_;
  return FlatStreamerEntity::Pointer(entity);
}

const void *FlatStreamerEntity::get_vector_by_key(uint64_t key) const {
  VectorLocation loc{};
  key_info_map_lock_->lock_shared();
  if (use_key_info_map_) {
    auto iterator = key_info_map_.find(key);
    if (iterator == key_info_map_.end()) {
      key_info_map_lock_->unlock_shared();
      return nullptr;
    }
    loc = iterator->second;
  } else {
    if (key < withid_key_info_map_.size()) {
      loc = withid_key_info_map_[key];
    } else {
      key_info_map_lock_->unlock_shared();
      return nullptr;
    }
  }
  key_info_map_lock_->unlock_shared();

  auto segment = this->get_segment(loc.segment_id);
  const void *data = nullptr;
  if (segment->read(loc.offset, &data, index_meta_.element_size()) !=
      index_meta_.element_size()) {
    LOG_ERROR("Failed to read segment, size=%u", index_meta_.element_size());
    return nullptr;
  }
  return data;
}

int FlatStreamerEntity::get_vector_by_key(
    const uint64_t key, IndexStorage::MemoryBlock &block) const {
  VectorLocation loc{};
  key_info_map_lock_->lock_shared();
  if (use_key_info_map_) {
    auto iterator = key_info_map_.find(key);
    if (iterator == key_info_map_.end()) {
      key_info_map_lock_->unlock_shared();
      return -1;
    }
    loc = iterator->second;
  } else {
    if (key < withid_key_info_map_.size()) {
      loc = withid_key_info_map_[key];
    } else {
      key_info_map_lock_->unlock_shared();
      return -1;
    }
  }
  key_info_map_lock_->unlock_shared();

  auto segment = this->get_segment(loc.segment_id);
  if (segment->read(loc.offset, block, index_meta_.element_size()) !=
      index_meta_.element_size()) {
    LOG_ERROR("Failed to read segment, size=%u", index_meta_.element_size());
    return -1;
  }
  return 0;
}

IndexProvider::Iterator::Pointer FlatStreamerEntity::creater_iterator(
    void) const {
  auto entity = this->clone();
  if (!entity) {
    LOG_ERROR("Failed to clone entity");
    return nullptr;
  }

  return Iterator::Pointer(new (std::nothrow)
                               FlatStreamerEntity::Iterator(std::move(entity)));
}

void FlatStreamerEntity::Iterator::read_next_block(void) {
  auto block_size = entity_->linear_block_size();
  while (segment_id_ < entity_->segments_.size()) {
    auto &segment = entity_->segments_[segment_id_];
    size_t off = block_index_ * block_size;
    if (off + block_size > segment->data_size()) {
      ++segment_id_;
      block_index_ = 0;
      continue;
    }
    if (segment->read(off, block_, block_size) != block_size) {
      LOG_ERROR("Failed to read block, off=%zu", off);
      break;
    }
    data_ = block_.data();
    auto hd = reinterpret_cast<const BlockHeader *>(
        static_cast<const char *>(data_) + block_size - sizeof(BlockHeader));
    if (hd->vector_count == 0) {
      ++block_index_;
      continue;
    }

    block_vector_count_ = hd->vector_count;
    block_vector_index_ = 0;
    size_t elemsize = entity_->index_meta_.element_size();
    keys_ = reinterpret_cast<const uint64_t *>(
        reinterpret_cast<const char *>(data_) +
        elemsize * entity_->block_vector_count());
    return;
  }

  is_valid_ = false;
}

int FlatStreamerEntity::init_storage(IndexStorage::Pointer storage) {
  // Init Linear Meta Segment
  meta_.create_time = ailego::Realtime::Seconds();
  stats_.set_create_time(meta_.create_time);
  meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(meta_.update_time);
  meta_.segment_count = 0;
  meta_.revision_id = 0;

  std::string str;
  index_meta_.serialize(&str);
  const size_t page = ailego::MemoryHelper::PageSize();

  meta_.header.header_size = sizeof(LinearIndexHeader) + str.size();
  meta_.header.total_vector_count = 0;
  meta_.header.linear_body_size = 0;
  meta_.header.block_count = 0;
  meta_.header.index_meta_size = str.size();
  meta_.header.linear_list_count = 1;

  AdjustSegmentSize(&meta_);

  LOG_DEBUG(
      "Create Streamer Index, VecSize=%u, BlockSize=%u SegmentSize=%u "
      "LinearListCount=%u",
      index_meta_.element_size(), meta_.header.block_size, meta_.segment_size,
      meta_.header.linear_list_count);

  size_t size = ailego_align(sizeof(meta_) + str.size(), page);
  int ret = storage->append(FLAT_LINEAR_META_SEG_ID, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to append segment %s", FLAT_LINEAR_META_SEG_ID.c_str());
    return ret;
  }
  auto segment = storage->get(FLAT_LINEAR_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get segment %s", FLAT_LINEAR_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("Failed to write segment data");
    return IndexError_WriteData;
  }
  if (segment->write(sizeof(meta_), str.data(), str.size()) != str.size()) {
    LOG_ERROR("Failed to write segment data, size=%zu", str.size());
    return IndexError_WriteData;
  }

  ret = storage->append("IndexMeta", str.size());
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to append segment IndexMeta, code: %d", ret);
    return ret;
  }
  auto index_meta_segment = storage->get("IndexMeta");
  if (index_meta_segment->write(0, str.data(), str.size()) != str.size()) {
    LOG_ERROR("Failed to write segment data, size=%zu", str.size());
    return IndexError_WriteData;
  }
  *stats_.mutable_index_size() += size;

  // Init Linear List Head Segment
  size = ailego_align(sizeof(BlockLocation) * linear_list_count(), page);
  ret = storage->append(FLAT_LINEAR_LIST_HEAD_SEG_ID, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to append segment %s for %s, size=%zu",
              FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str(), IndexError::What(ret),
              size);
    return ret;
  }
  segment = storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get segment %s", FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->resize(size) != size) {
    LOG_ERROR("Failed to resize segment, size=%zu", size);
    return IndexError_WriteData;
  }
  segments_.emplace_back(std::move(segment));

  *stats_.mutable_index_size() += size;

  return 0;
}

int FlatStreamerEntity::load_linear_meta(IndexStorage::Pointer storage) {
  AdjustSegmentSize(&meta_);

  // Load Meta Segment
  auto segment = storage->get(FLAT_LINEAR_META_SEG_ID);
  if (!segment || segment->data_size() < sizeof(meta_)) {
    LOG_ERROR("Missing segment %s, or invalid segment size",
              FLAT_LINEAR_META_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  IndexStorage::MemoryBlock data_block;
  if (segment->read(0, data_block, segment->data_size()) !=
      segment->data_size()) {
    LOG_ERROR("Failed to read storage, size=%zu", segment->data_size());
    return IndexError_InvalidFormat;
  }
  auto *mt = reinterpret_cast<const decltype(meta_) *>(data_block.data());
  if (mt->header.block_vector_count != meta_.header.block_vector_count) {
    LOG_ERROR("Unmatched BlockVecCount Setting, Index %u vs Setting %u",
              mt->header.block_vector_count, meta_.header.block_vector_count);
    return IndexError_Mismatch;
  }
  if (mt->header.block_size != meta_.header.block_size) {
    LOG_ERROR("Unmatched BlockSize Setting, Index %u vs Setting %u",
              mt->header.block_size, meta_.header.block_size);
    return IndexError_Mismatch;
  }
  if (mt->header.index_meta_size + sizeof(meta_) > segment->data_size()) {
    LOG_ERROR("Invalid format, IndexMetaSize %u, SegmentSize %zu",
              mt->header.index_meta_size, segment->data_size());
    return IndexError_InvalidFormat;
  }
  if (mt->header.linear_list_count != meta_.header.linear_list_count) {
    LOG_ERROR("Unmatch LinearListCount, Index size %u vs Setting %u",
              mt->header.linear_list_count, meta_.header.linear_list_count);
    return IndexError_InvalidFormat;
  }
  IndexMeta index_meta;
  if (!index_meta.deserialize(mt->header.index_meta,
                              mt->header.index_meta_size)) {
    LOG_ERROR("Failed to deserialize IndexMeta, size=%u",
              mt->header.index_meta_size);
    return IndexError_InvalidFormat;
  }
  if (index_meta.data_type() != index_meta_.data_type() ||
      index_meta.dimension() != index_meta_.dimension() ||
      index_meta.element_size() != index_meta_.element_size() ||
      index_meta.metric_name() != index_meta_.metric_name()) {
    LOG_ERROR(
        "Unmatch IndexMeta, Index(type=%u dim=%u elemsize=%u "
        "metric=%s) Setting(type=%u dim=%u elemsize=%u metric=%s)",
        index_meta.data_type(), index_meta.dimension(),
        index_meta.element_size(), index_meta.metric_name().c_str(),
        index_meta_.data_type(), index_meta_.dimension(),
        index_meta_.element_size(), index_meta_.metric_name().c_str());
    return IndexError_Mismatch;
  }
  // Segment Size can be reconfigurable
  auto segment_size = meta_.segment_size;
  std::memcpy(&meta_, mt, sizeof(meta_));
  meta_.segment_size = segment_size;
  return 0;
}

int FlatStreamerEntity::load_segment_keys_to_map(BlockLocation block) {
  while (this->is_valid_block(block)) {
    auto segment = this->get_segment(block.segment_id);

    IndexStorage::MemoryBlock block_header_block;
    this->get_block_header(block, block_header_block);
    const BlockHeader *hd =
        reinterpret_cast<const BlockHeader *>(block_header_block.data());
    if (ailego_unlikely(hd == nullptr)) {
      LOG_ERROR("Failed to get block header");
      return IndexError_ReadData;
    }
    IndexStorage::MemoryBlock keys_block;
    this->get_block_keys(block, keys_block);
    const uint64_t *keys =
        reinterpret_cast<const uint64_t *>(keys_block.data());
    IndexStorage::MemoryBlock deletion_map_block;
    this->get_block_deletion_map(block, deletion_map_block);
    const DeletionMap *deletion_map =
        reinterpret_cast<const DeletionMap *>(deletion_map_block.data());

    for (uint32_t vector_index = 0; vector_index < hd->vector_count;
         ++vector_index) {
      if (deletion_map->test(vector_index)) {
        continue;
      }
      size_t vector_off =
          this->get_block_vector_offset(block.block_index, vector_index);
      key_info_map_[keys[vector_index]] =
          VectorLocation(block.segment_id, false, vector_off);
      id_key_vector_.push_back(keys[vector_index]);
    }
    block = hd->next;
  }
  return 0;
}

int FlatStreamerEntity::load_segment_keys_to_vector() {
  for (uint32_t i = 0; i < meta_.header.total_vector_count; i++) {
    size_t block_id = i / block_vector_count();
    uint32_t vector_index = i % block_vector_count();

    ailego_assert(segments_.size() > 1);
    size_t segment_block_count =
        segments_[1]->data_size() / linear_block_size();
    size_t segment_id = block_id / segment_block_count + 1;
    size_t real_block_id = block_id % segment_block_count;
    size_t vector_off =
        this->get_block_vector_offset(real_block_id, vector_index);

    withid_key_info_map_.push_back(
        VectorLocation(segment_id, false, vector_off));
    size_t key_off = get_block_key_offset(real_block_id, vector_index);
    withid_key_map_.push_back(key_off);
  }
  return 0;
}

int FlatStreamerEntity::load_storage(IndexStorage::Pointer storage) {
  int ret = this->load_linear_meta(storage);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  // Load Linear List
  auto hd_segment = storage->get(FLAT_LINEAR_LIST_HEAD_SEG_ID);
  if (ailego_unlikely(!hd_segment)) {
    LOG_ERROR("Failed to get segment %s", FLAT_LINEAR_LIST_HEAD_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (hd_segment->data_size() < linear_list_count() * sizeof(BlockLocation)) {
    LOG_ERROR("Invalid segment size, LinearListCount=%zu, size=%zu",
              linear_list_count(), hd_segment->data_size());
    return IndexError_InvalidFormat;
  }
  segments_.emplace_back(hd_segment);

  size_t index_size = hd_segment->capacity();
  for (size_t i = 1; i <= meta_.segment_count; ++i) {
    std::string segment_id =
        ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, i);
    auto seg = storage->get(segment_id);
    if (!seg || seg->data_size() < meta_.header.block_size) {
      LOG_ERROR("Failed to get segment %s, or invalid segment size",
                segment_id.c_str());
      return IndexError_InvalidFormat;
    }
    index_size += seg->capacity();
    segments_.emplace_back(std::move(seg));
  }

  for (size_t i = 0; i < linear_list_count(); i++) {
    IndexStorage::MemoryBlock head_block;
    this->get_head_block(head_block);
    const BlockLocation *bl =
        reinterpret_cast<const BlockLocation *>(head_block.data());
    if (ailego_unlikely(bl == nullptr)) {
      LOG_ERROR("Failed to get block loc");
      return IndexError_ReadData;
    }
    BlockLocation block = *bl;
    if (use_key_info_map_) {
      ret = this->load_segment_keys_to_map(block);
    } else {
      ret = this->load_segment_keys_to_vector();
    }
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
  }

  char create_time[32];
  char update_time[32];
  ailego::Realtime::Gmtime(meta_.create_time, "%Y-%m-%d %H:%M:%S", create_time,
                           sizeof(create_time));
  ailego::Realtime::Gmtime(meta_.update_time, "%Y-%m-%d %H:%M:%S", update_time,
                           sizeof(update_time));
  LOG_DEBUG(
      "Load Index, IndexSize=%zu SegmentCount=%u SegmentSize=%u "
      "RevisionId=%zu BlockCount=%u BlockSize=%u "
      "BlockVectorCount=%u LinearListCount=%u TotalVecCount=%zu "
      "CreateTime=%s UpdateTime=%s",
      index_size, meta_.segment_count, meta_.segment_size,
      static_cast<size_t>(meta_.revision_id), meta_.header.block_count,
      meta_.header.block_size, meta_.header.block_vector_count,
      meta_.header.linear_list_count,
      static_cast<size_t>(meta_.header.total_vector_count), create_time,
      update_time);

  stats_.set_index_size(index_size);
  stats_.set_check_point(storage->check_point());
  stats_.set_create_time(meta_.create_time);
  stats_.set_revision_id(meta_.revision_id);
  stats_.set_update_time(meta_.update_time);
  stats_.set_loaded_count(meta_.header.total_vector_count);

  return 0;
}

int FlatStreamerEntity::alloc_segment(void) {
  size_t index = segments_.size();
  if (index == kMaxSegmentId) {
    LOG_ERROR("Failed to alloc new segment, exceed max count %zu",
              kMaxSegmentId);
    return IndexError_IndexFull;
  }

  std::string segment_id =
      ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, index);
  size_t size =
      ailego_align(meta_.segment_size, ailego::MemoryHelper::PageSize());
  auto segment = storage_->get(segment_id);
  if (segment) {
    if (segment->padding_size() < linear_block_size()) {
      LOG_ERROR(
          "Unexpect segment, index=%zu, data_size=%zu "
          "padding_size=%zu block_size=%zu",
          index, segment->data_size(), segment->padding_size(),
          linear_block_size());
      return IndexError_Runtime;
    }
    LOG_WARN("Alloc an existing segment=%s capacity=%zu", segment_id.c_str(),
             segment->capacity());
  } else {
    int ret = storage_->append(segment_id, size);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to alloc segment from storage");
      return ret;
    }
    segment = storage_->get(segment_id);
    if (ailego_unlikely(!segment)) {
      LOG_ERROR("Failed to get segment %s", segment_id.c_str());
      return IndexError_Runtime;
    }
  }
  meta_.segment_count += 1;
  meta_.header.linear_body_size += size;
  segments_.emplace_back(std::move(segment));
  *stats_.mutable_index_size() += size;

  // Update meta information
  auto meta_segment = storage_->get(FLAT_LINEAR_META_SEG_ID);
  if (ailego_unlikely(!meta_segment)) {
    LOG_ERROR("Failed to get segment %s", FLAT_LINEAR_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (meta_segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("Failed to write meta segment");
    return IndexError_WriteData;
  }

  return 0;
}

int FlatStreamerEntity::alloc_block(const BlockLocation &next,
                                    BlockLocation *block) {
  if (segments_.size() <= 1 ||
      segments_.back()->padding_size() < linear_block_size()) {
    int ret = this->alloc_segment();
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
  }

  auto &segment = segments_.back();
  size_t block_index = segment->data_size() / linear_block_size();
  if (block_index == kMaxBlockId) {
    LOG_ERROR("Failed to alloc block, exceed max count %zu per segment",
              kMaxBlockId);
    return IndexError_IndexFull;
  }

  BlockHeader header;
  header.next = next;
  header.vector_count = 0;
  header.column_major = false;

  size_t hd_off = segment->data_size() + linear_block_size() - sizeof(header);
  if (segment->write(hd_off, &header, sizeof(header)) != sizeof(header)) {
    LOG_ERROR("Failed to write block header");
    return IndexError_WriteData;
  }

  size_t del_off = hd_off - sizeof(DeletionMap);
  DeletionMap reset_del_map{};
  if (segment->write(del_off, &reset_del_map, sizeof(reset_del_map)) !=
      sizeof(reset_del_map)) {
    LOG_ERROR("Failed to write block deletion map");
    return IndexError_WriteData;
  }

  ++meta_.header.block_count;
  block->segment_id = segments_.size() - 1;
  block->block_index = (segment->data_size() / linear_block_size()) - 1;

  return 0;
}

int FlatStreamerEntity::add_to_block(const BlockLocation &block, uint64_t key,
                                     const void *data, size_t size) {
  IndexStorage::MemoryBlock block_header_block;
  this->get_block_header(block, block_header_block);
  const BlockHeader *header =
      reinterpret_cast<const BlockHeader *>(block_header_block.data());
  if (ailego_unlikely(header == nullptr)) {
    LOG_ERROR("Failed to get header");
    return IndexError_ReadData;
  }

  if (header->vector_count == block_vector_count()) {
    return IndexError_IndexFull;
  }

  auto &segment = segments_[block.segment_id];

  size_t vector_off =
      get_block_vector_offset(block.block_index, header->vector_count);
  if (segment->write(vector_off, data, size) != size) {
    LOG_ERROR("Failed to write vector, off=%zu size=%zu", vector_off, size);
    return IndexError_WriteData;
  }

  size_t key_off =
      get_block_key_offset(block.block_index, header->vector_count);
  if (segment->write(key_off, &key, sizeof(key)) != sizeof(key)) {
    LOG_ERROR("Failed to write key, off=%zu", key_off);
    return IndexError_WriteData;
  }

  BlockHeader hd = *header;
  hd.vector_count += 1;
  size_t hd_off = get_block_header_offset(block.block_index);
  if (segment->write(hd_off, &hd, sizeof(hd)) != sizeof(hd)) {
    LOG_ERROR("Failed to write block header, off=%zu", hd_off);
    return IndexError_WriteData;
  }

  VectorLocation loc(block.segment_id, false, vector_off);
  key_info_map_lock_->lock();
  key_info_map_[key] = loc;
  id_key_vector_.push_back(key);
  withid_key_info_map_.push_back(loc);
  withid_key_map_.push_back(key_off);
  key_info_map_lock_->unlock();

  ++meta_.header.total_vector_count;
  return 0;
}

int FlatStreamerEntity::add_vector_with_id(const uint32_t id, const void *query,
                                           const uint32_t size) {
  std::lock_guard<std::mutex> lock(mutex_);
  // if (filter_same_key_) {
  //   key_info_map_lock_->lock_shared();
  //   if (key_info_map_.find(id) != key_info_map_.end()) {
  //     key_info_map_lock_->unlock_shared();
  //     LOG_WARN("Try to add duplicate key, drop it");
  //     return IndexError_Duplicate;
  //   }
  //   key_info_map_lock_->unlock_shared();
  // }

  if (size != static_cast<size_t>(index_meta_.element_size())) {
    LOG_ERROR("Failed to add, mismatch size %u vs elemsize %u", size,
              index_meta_.element_size());
    return IndexError_Mismatch;
  }


  if (id >= vector_count()) {
    IndexStorage::MemoryBlock head_block;
    this->get_head_block(head_block);
    BlockLocation block =
        *reinterpret_cast<const BlockLocation *>(head_block.data());
    if (!this->is_valid_block(block)) {
      int ret = this->alloc_block(block, &block);
      if (ailego_unlikely(ret != 0)) {
        return ret;
      }
      ret = this->update_head_block(block);
      if (ailego_unlikely(ret != 0)) {
        return ret;
      }
    }
    for (size_t start_id = vector_count(); start_id < id; ++start_id) {
      std::vector<char> vec(size);
      int ret = this->add_to_block(block, kInvalidKey, vec.data(), size);
      if (ret == IndexError_IndexFull) {
        ret = this->alloc_block(block, &block);
        if (ailego_unlikely(ret != 0)) {
          return ret;
        }
        ret = this->update_head_block(block);
        if (ailego_unlikely(ret != 0)) {
          return ret;
        }
        ret = this->add_to_block(block, kInvalidKey, vec.data(), size);
        if (ailego_unlikely(ret != 0)) {
          return ret;
        }
      }
    }

    int ret = this->add_to_block(block, id, query, size);
    if (ret == IndexError_IndexFull) {
      ret = this->alloc_block(block, &block);
      if (ailego_unlikely(ret != 0)) {
        return ret;
      }
      ret = this->update_head_block(block);
      if (ailego_unlikely(ret != 0)) {
        return ret;
      }
      ret = this->add_to_block(block, id, query, size);
      if (ailego_unlikely(ret != 0)) {
        return ret;
      }
    }
  } else {
    VectorLocation vector_loc = withid_key_info_map_[id];
    auto segment = this->get_segment(vector_loc.segment_id);
    size_t vector_off = vector_loc.offset;
    if (segment->write(vector_off, query, size) != size) {
      LOG_ERROR("Failed to write vector, off=%zu size=%u", vector_off, size);
      return IndexError_WriteData;
    }
    size_t key_off = withid_key_map_[id];
    uint64_t key = id;
    if (segment->write(key_off, &key, sizeof(key)) != sizeof(key)) {
      LOG_ERROR("Failed to write key, off=%zu", key_off);
      return IndexError_WriteData;
    }
    key_info_map_lock_->lock();
    key_info_map_[key] = vector_loc;
    key_info_map_lock_->unlock();
  }
  (*stats_.mutable_added_count())++;
  stats_.set_revision_id(meta_.revision_id + 1);

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <unordered_map>
#include <ailego/parallel/lock.h>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>
#include "flat_index_format.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Flat Streamer Entity
 */
class FlatStreamerEntity {
 public:
  typedef std::shared_ptr<FlatStreamerEntity> Pointer;

  //! Constructor
  explicit FlatStreamerEntity(IndexStreamer::Stats &stats);

  //! Destructor
  virtual ~FlatStreamerEntity(void) = default;

  //! Open the entity with storage
  int open(IndexStorage::Pointer storage, const IndexMeta &mt);

  //! Close the entity
  int close(void);

  //! Flush Linear Meta information to storage
  int flush_linear_meta(void);

  //! Flush linear index to storage
  int flush(uint64_t checkpoint);

  //! Add vector to linear index
  int add(uint64_t key, const void *vec, size_t size);

  //! Search in linear list with filter
  int search(const void *query, const IndexFilter &filter, uint32_t *scan_count,
             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;

  //! Search in a block
  void search_block(const void *query, const BlockLocation &bl,
                    const BlockHeader *hd, float norm_val,
                    IndexDocumentHeap *heap) const;

  //! Search in a block with filter
  void search_block(const void *query, const BlockLocation &bl,
                    const BlockHeader *hd, float norm_val,
                    const IndexFilter &filter, const DeletionMap *deletion_map,
                    IndexDocumentHeap *heap,
                    IndexContext::Stats *context_stats) const;

  //! Flat Search with filter
  int search_bf(const void *query, const IndexFilter &filter,
                IndexDocumentHeap *heap,
                IndexContext::Stats *context_stats) const;

  //! Clone the entity
  virtual FlatStreamerEntity::Pointer clone(void) const;

  //! Retrieve the total vectors in the index
  size_t vector_count(void) const {
    return meta_.header.total_vector_count;
  }

  //! Retrieve the linear list count
  size_t linear_list_count(void) const {
    return meta_.header.linear_list_count;
  }

  //! Retrieve block size of the linear vector
  size_t linear_block_size(void) const {
    return meta_.header.block_size;
  }

  //! Retrieve the vectors count in one block
  size_t block_vector_count(void) const {
    // assert(meta_.header.block_vector_count == 32);
    return meta_.header.block_vector_count;
  }

  //! Retrieve IndexMeta of the linear index
  const IndexMeta &meta(void) const {
    return index_meta_;
  }

  //! Retrieve mutable IndexMeta of the linear index
  IndexMeta *mutable_meta(void) {
    return &index_meta_;
  }

  //! Retrieve vector by local id
  const void *get_vector_by_key(uint64_t key) const;

  int get_vector_by_key(const uint64_t key,
                        IndexStorage::MemoryBlock &block) const;

  //! Create a new iterator
  IndexProvider::Iterator::Pointer creater_iterator(void) const;


  //! Set params
  void set_block_vector_count(uint32_t count) {
    meta_.header.block_vector_count = count;
  }

  void set_use_key_info_map(bool use_id_map) {
    use_key_info_map_ = use_id_map;
    LOG_DEBUG("use_key_info_map_: %d", (int)use_key_info_map_);
  }

  //! Set params
  void set_segment_size(uint32_t size) {
    meta_.segment_size = size;
  }

  //! Set params
  void set_linear_list_count(uint32_t count) {
    meta_.header.linear_list_count = count;
  }

  //! Set params
  void enable_filter_same_key(bool enabled) {
    filter_same_key_ = enabled;
  }

  inline uint64_t key(uint32_t id) const {
    if (id < id_key_vector_.size()) {
      return id_key_vector_[id];
    } else {
      return kInvalidKey;
    }
  }

  inline void row_major_distance(const void *query, const void *feature,
                                 size_t fnum, float *out) const {
    const uint8_t *cur_feature = reinterpret_cast<const uint8_t *>(feature);
    for (size_t f = 0; f < fnum; ++f) {
      row_distance_(query, cur_feature, index_meta_.dimension(), out + f);
      cur_feature += index_meta_.element_size();
    }
  }

  int add_vector_with_id(const uint32_t id, const void *query,
                         const uint32_t element_size);

 private:
  //! Disable them
  FlatStreamerEntity(const FlatStreamerEntity &) = delete;
  FlatStreamerEntity &operator=(const FlatStreamerEntity &) = delete;

  /*! Iterator of all the linear list
   */
  class Iterator : public IndexProvider::Iterator {
   public:
    //! Constructor
    Iterator(const FlatStreamerEntity::Pointer &entity) : entity_(entity) {
      this->read_next_block();
    }
    //! Retrieve pointer of data
    const void *data(void) const override {
      return reinterpret_cast<const char *>(data_) +
             block_vector_index_ * entity_->index_meta_.element_size();
    }
    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return is_valid_;
    }
    //! Retrieve primary key
    uint64_t key(void) const override {
      return keys_[block_vector_index_];
    }
    //! Next iterator
    void next(void) override {
      if (++block_vector_index_ == block_vector_count_) {
        ++block_index_;
        this->read_next_block();
      }
    }

   private:
    //! Read next non-empty block
    void read_next_block(void);

    //! Members
    std::string buffer_{};
    const FlatStreamerEntity::Pointer entity_;
    IndexStorage::MemoryBlock block_;
    const void *data_{nullptr};
    const uint64_t *keys_{nullptr};
    uint32_t segment_id_{1u};  // The first segment is header info
    uint32_t block_index_{0u};
    uint32_t block_vector_index_{0u};
    uint32_t block_vector_count_{0u};
    bool is_valid_{true};
  };

  //! Retrive storage segment by index
  const IndexStorage::Segment::Pointer get_segment(size_t index) const {
    for (size_t i = segments_.size(); i <= index; ++i) {
      auto segment_id =
          ailego::StringHelper::Concat(FLAT_SEGMENT_FEATURES_SEG_ID, i);
      auto segment = storage_->get(segment_id);
      if (!segment) {
        LOG_ERROR("Failed to get segment %s", segment_id.c_str());
        return IndexStorage::Segment::Pointer();
      }
      segments_.emplace_back(std::move(segment));
    }
    return segments_[index];
  }

  //! Rejust the segment size as to aligned by page size
  void AdjustSegmentSize(StreamerLinearMeta *mt) {
    if (mt->segment_size < mt->header.block_size) {
      mt->segment_size = mt->header.block_size;
    }
    mt->segment_size = ailego_align(
        mt->segment_size / mt->header.block_size * mt->header.block_size,
        ailego::MemoryHelper::PageSize());
  }

  //! Init with an empty storage
  int init_storage(IndexStorage::Pointer storage);

  //! Load linear meta information from storage
  int load_linear_meta(IndexStorage::Pointer storage);

  //! Load keys to keys map
  int load_segment_keys_to_map(BlockLocation block);

  //! Load keys to keys map
  int load_segment_keys_to_vector(void);

  //! Load index from storage
  int load_storage(IndexStorage::Pointer storage);

  //! Check whether the block is empty
  bool is_valid_block(const BlockLocation &block) const {
    return block.segment_id != 0;
  }

  //! Update header block of an linear list
  int update_head_block(const BlockLocation &block) {
    ailego_assert_with(segments_.size() != 0, "Invalid Segments");

    auto &hd_segment = segments_[0];
    if (hd_segment->write(0, &block, sizeof(block)) != sizeof(block)) {
      LOG_ERROR("Failed to write head block location");
      return IndexError_WriteData;
    }

    return 0;
  }

  //! Alloc a new segment
  int alloc_segment(void);

  //! Alloc a new block
  int alloc_block(const BlockLocation &next, BlockLocation *block);

  //! Add a record to a block
  int add_to_block(const BlockLocation &block, uint64_t key, const void *data,
                   size_t size);

 private:
  size_t get_block_offset(uint32_t block_index) const {
    return block_index * linear_block_size();
  }

  size_t get_block_header_offset(uint32_t block_index) const {
    return get_block_offset(block_index) + linear_block_size() -
           sizeof(BlockHeader);
  }

  size_t get_block_deletion_map_offset(uint32_t block_index) const {
    return get_block_header_offset(block_index) - sizeof(DeletionMap);
  }

  size_t get_block_key_offset(uint32_t block_index,
                              uint32_t vector_index) const {
    return get_block_offset(block_index) +
           block_vector_count() * index_meta_.element_size() +
           sizeof(uint64_t) * vector_index;
  }

  size_t get_block_vector_offset(uint32_t block_index,
                                 uint32_t vector_index) const {
    return this->get_block_offset(block_index) +
           vector_index * index_meta_.element_size();
  }

  //! Get header block of an linear list
  int get_head_block(IndexStorage::MemoryBlock &header_block) const {
    ailego_assert_with(segments_.size() != 0, "Invalid Segments");
    auto &hd_segment = segments_[0];
    if (hd_segment->read(0, header_block, sizeof(BlockLocation)) !=
        sizeof(BlockLocation)) {
      LOG_ERROR("Failed to read head block location");
      return -1;
    }
    return 0;
  }

  //! Get BlockHeader of the block
  int get_block_header(const BlockLocation &block,
                       IndexStorage::MemoryBlock &header_block) const {
    // The header is located in the end of a block to align features
    auto &segment = this->get_segment(block.segment_id);
    ailego_assert_with(segment != nullptr, "Index Overflow");
    size_t off = this->get_block_header_offset(block.block_index);
    if (segment->read(off, header_block, sizeof(BlockHeader)) !=
        sizeof(BlockHeader)) {
      LOG_ERROR("Failed to read block header, off=%zu", off);
      return -1;
    }
    return 0;
  }
  int get_block_deletion_map(
      const BlockLocation &block,
      IndexStorage::MemoryBlock &deletion_map_block) const {
    auto &segment = this->get_segment(block.segment_id);
    ailego_assert_with(segment != nullptr, "Index Overflow");
    size_t off = this->get_block_deletion_map_offset(block.block_index);
    if (segment->read(off, deletion_map_block, sizeof(DeletionMap)) !=
        sizeof(DeletionMap)) {
      LOG_ERROR("Failed to read deletion map, off=%zu", off);
      return -1;
    }
    return 0;
  }

  int get_block_keys(const BlockLocation &block,
                     IndexStorage::MemoryBlock &keys_block) const {
    auto &segment = this->get_segment(block.segment_id);
    ailego_assert_with(segment != nullptr, "Index Overflow");
    size_t off = this->get_block_key_offset(block.block_index, 0);
    if (segment->read(off, keys_block,
                      block_vector_count() * sizeof(uint64_t)) !=
        block_vector_count() * sizeof(uint64_t)) {
      LOG_ERROR("Failed to read block header, off=%zu", off);
      return -1;
    }
    return 0;
  }

  int get_block_vectors(const BlockLocation &block,
                        IndexStorage::MemoryBlock &vector_block) const {
    auto &segment = this->get_segment(block.segment_id);
    ailego_assert_with(segment != nullptr, "Index Overflow");
    size_t off = this->get_block_vector_offset(block.block_index, 0);
    if (segment->read(off, vector_block,
                      block_vector_count() * index_meta_.element_size()) !=
        block_vector_count() * index_meta_.element_size()) {
      LOG_ERROR("Failed to read block header, off=%zu", off);
      return -1;
    }
    return 0;
  }

 private:
  //! Constants
  static constexpr size_t kMaxSegmentId = std::numeric_limits<uint32_t>::max();
  static constexpr size_t kMaxBlockId = std::numeric_limits<uint32_t>::max();

  //! Members
  std::mutex mutex_{};
  IndexMeta index_meta_{};
  IndexStorage::Pointer storage_{};
  IndexMetric::MatrixDistance row_distance_{}, column_distance_{};
  mutable std::vector<IndexStorage::Segment::Pointer> segments_{};
  StreamerLinearMeta meta_{};
  IndexStreamer::Stats &stats_;
  mutable std::shared_ptr<ailego::SharedMutex> key_info_map_lock_{};
  std::unordered_map<uint64_t, VectorLocation> key_info_map_{};
  std::vector<VectorLocation> withid_key_info_map_{};
  std::vector<uint32_t> withid_key_map_{};
  std::vector<uint64_t> id_key_vector_{};
  bool filter_same_key_{false};
  bool use_key_info_map_{true};
  uint32_t vec_unit_size_{0};
  uint32_t vec_cols_{0};
  mutable std::string vec_buf_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_streamer_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "flat_distance_matrix.h"
#include "flat_searcher.h"
#include "flat_streamer.h"
#include "flat_utility.h"

namespace zvec {
namespace core {

/*! Brute Force Streamer Provider
 */

template <size_t BATCH_SIZE>
class FlatStreamerProvider : public IndexProvider {
 public:
  //! Constructor
  FlatStreamerProvider(const FlatStreamer<BATCH_SIZE> *owner) {
    feature_size_ = owner->meta().element_size();
    total_vector_count_ = owner->entity().vector_count();
    owner_ = owner;
    block_buffer_.resize(BATCH_SIZE * feature_size_);
  }

  //! Create a new iterator
  IndexProvider::Iterator::Pointer create_iterator(void) override {
    return owner_->entity().creater_iterator();
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return total_vector_count_;
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const override {
    return owner_->meta().dimension();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return owner_->meta().data_type();
  }

  //! Retrieve vector size in bytes
  size_t element_size(void) const override {
    return owner_->meta().element_size();
  }

  //! Retrieve a vector using a primary key
  const void *get_vector(uint64_t key) const override {
    return this->get_vector_by_key(key);
  }

  int get_vector(const uint64_t key,
                 IndexStorage::MemoryBlock &block) const override {
    return this->get_vector_by_key(key, block);
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_->name();
  }

 protected:
  //! Retrieve a vector via primary key
  const void *get_vector_by_key(uint64_t key) const {
    return owner_->get_vector_by_key(key);
  }

  int get_vector_by_key(const uint64_t key,
                        IndexStorage::MemoryBlock &block) const {
    return owner_->get_vector_by_key(key, block);
  }

 private:
  //! Members
  const FlatStreamer<BATCH_SIZE> *owner_{nullptr};
  IndexStorage::Segment::Pointer features_segment_{};
  uint32_t feature_size_{0};
  uint32_t total_vector_count_{0};
  mutable std::vector<uint8_t> block_buffer_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat/flat_utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <mutex>
#include <ailego/utility/matrix_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_metric.h>

namespace zvec {
namespace core {

//! The default size of reading a block
static constexpr uint32_t FLAT_DEFAULT_READ_BLOCK_SIZE = 4 * 1024 * 1024;
static const std::string FLAT_LINEAR_META_SEG_ID = "flat.linear_meta";
static const std::string FLAT_LINEAR_LIST_HEAD_SEG_ID = "flat.linear_list_head";

static const std::string FLAT_SEGMENT_KEYS_SEG_ID("flat.keys");
static const std::string FLAT_SEGMENT_FEATURES_SEG_ID("flat.features");
static const std::string FLAT_SEGMENT_MAPPING_SEG_ID("flat.mapping");

// index params
static const std::string PARAM_FLAT_COLUMN_MAJOR_ORDER(
    "proxima.flat.column_major_order");
static const std::string PARAM_FLAT_BATCH_SIZE("proxima.flat.batch_size");
static const std::string PARAM_FLAT_READ_BLOCK_SIZE(
    "proxima.flat.read_block_size");
static const std::string PARAM_FLAT_USE_ID_MAP("proxima.flat.use_id_map");

//! Determines if a number is equal to two to the power of n.
template <size_t K>
struct IsEqualPowerofTwo
    : std::integral_constant<bool, K != 0 && (K ^ (K - 1)) == (K | (K - 1))> {};

//! Transpose a block
template <size_t M>
static inline void ReverseTranspose(size_t align_size, const void *src,
                                    size_t dim, void *dst) {
  switch (align_size) {
    case 2:
      ailego::MatrixHelper::ReverseTranspose<uint16_t, M>(src, dim, dst);
      break;
    case 4:
      ailego::MatrixHelper::ReverseTranspose<uint32_t, M>(src, dim, dst);
      break;
    case 8:
      ailego::MatrixHelper::ReverseTranspose<uint64_t, M>(src, dim, dst);
      break;
  }
}

static inline void ReverseTranspose(size_t align_size, const void *src,
                                    size_t m, size_t dim, void *dst) {
  switch (align_size) {
    case 2:
      ailego::MatrixHelper::ReverseTranspose<uint16_t>(src, m, dim, dst);
      break;
    case 4:
      ailego::MatrixHelper::ReverseTranspose<uint32_t>(src, m, dim, dst);
      break;
    case 8:
      ailego::MatrixHelper::ReverseTranspose<uint64_t>(src, m, dim, dst);
      break;
  }
}

template <typename T>
static inline void TransposeOne(const void *src, size_t M, size_t N,
                                void *dst) {
  for (size_t i = 0; i < N; ++i) {
    reinterpret_cast<T *>(dst)[i] = reinterpret_cast<const T *>(src)[i * M];
  }
}

static inline void Transpose(size_t align_size, const void *src, size_t m,
                             size_t dim, void *dst) {
  switch (align_size) {
    case 2:
      ailego::MatrixHelper::Transpose<uint16_t>(src, m, dim, dst);
      break;
    case 4:
      ailego::MatrixHelper::Transpose<uint32_t>(src, m, dim, dst);
      break;
    case 8:
      ailego::MatrixHelper::Transpose<uint64_t>(src, m, dim, dst);
      break;
  }
}

//! Transpose queries
template <size_t K>
void TransposeQueries(const void *query, const IndexQueryMeta &qmeta,
                      size_t query_count, std::string *out) {
  if (K <= 1) {
    ailego_assert(query_count == 1);
    (void)query_count;
    out->append(reinterpret_cast<const char *>(query) + out->size(),
                qmeta.element_size());
  } else {
    ailego_assert_with(IsEqualPowerofTwo<K>::value,
                       "K must be equal to two to the power of n.");

    size_t query_batch_count = query_count / K;
    size_t query_offset = out->size();
    out->resize(query_offset + query_batch_count * K * qmeta.element_size());

    switch (IndexMeta::AlignSizeof(qmeta.data_type())) {
      case 2:
        for (size_t i = 0; i != query_batch_count; ++i) {
          ailego::MatrixHelper::Transpose<uint16_t, K>(
              (const char *)query + query_offset,
              qmeta.element_size() / sizeof(uint16_t), &((*out)[query_offset]));
          query_offset += qmeta.element_size() * K;
        }
        break;

      case 4:
        for (size_t i = 0; i != query_batch_count; ++i) {
          ailego::MatrixHelper::Transpose<uint32_t, K>(
              (const char *)query + query_offset,
              qmeta.element_size() / sizeof(uint32_t), &((*out)[query_offset]));

          query_offset += qmeta.element_size() * K;
        }
        break;

      case 8:
        for (size_t i = 0; i != query_batch_count; ++i) {
          ailego::MatrixHelper::Transpose<uint64_t, K>(
              (const char *)query + query_offset,
              qmeta.element_size() / sizeof(uint64_t), &((*out)[query_offset]));
          query_offset += qmeta.element_size() * K;
        }
        break;

      default:
        ailego_check_with(0, "BAD CASE");
    }
    size_t query_left_count = query_count % K;
    if (query_left_count != 0) {
      TransposeQueries<(K >> 1)>(query, qmeta, query_left_count, out);
    }
  }
}

//! Create and initialize measure
static inline int InitializeMetric(const IndexMeta &mt,
                                   IndexMetric::Pointer *out) {
  IndexMetric::Pointer measure = IndexFactory::CreateMetric(mt.metric_name());
  if (!measure) {
    return IndexError_NoExist;
  }

  int error_code = measure->init(mt, mt.metric_params());
  if (error_code != 0) {
    return error_code;
  }
  *out = measure;
  return 0;
}

//! Verify measure
static inline bool VerifyMetric(const IndexMeta &meta) {
  IndexMetric::Pointer measure = IndexFactory::CreateMetric(meta.metric_name());
  if (!measure) {
    return false;
  }
  int error_code = measure->init(meta, meta.metric_params());
  if (error_code != 0) {
    return false;
  }
  return true;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_knn_flat_sparse 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS core_framework 
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_builder.h"
#include <cstddef>
#include <cstdint>
#include <utility>
#include <utility/sparse_utility.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>
#include "flat_sparse_index_format.h"
#include "flat_sparse_utility.h"

namespace zvec {
namespace core {

FlatSparseBuilder::FlatSparseBuilder() {}

int FlatSparseBuilder::init(const IndexMeta &meta,
                            const ailego::Params & /*params*/) {
  LOG_INFO("Begin FlatSparseBuilder::init");

  meta_ = meta;

  state_ = BUILD_STATE_INITED;
  LOG_INFO("End FlatSparseBuilder::init");
  return 0;
}

int FlatSparseBuilder::cleanup(void) {
  LOG_INFO("Begin FlatSparseBuilder::cleanup");

  stats_.clear_attributes();
  stats_.set_trained_count(0UL);
  stats_.set_built_count(0UL);
  stats_.set_dumped_count(0UL);
  stats_.set_discarded_count(0UL);
  stats_.set_trained_costtime(0UL);
  stats_.set_built_costtime(0UL);
  stats_.set_dumped_costtime(0UL);
  state_ = BUILD_STATE_INIT;

  LOG_INFO("End FlatSparseBuilder::cleanup");

  return 0;
}

int FlatSparseBuilder::train(IndexThreads::Pointer,
                             IndexSparseHolder::Pointer /*holder*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before FlatSparseBuilder::train");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin FlatSparseBuilder::train");

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End FlatSparseBuilder::train");

  return 0;
}

int FlatSparseBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before FlatSparseBuilder::train");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin FlatSparseBuilder::train by trainer");

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End FlatSparseBuilder::train by trainer");

  return 0;
}

int FlatSparseBuilder::build(IndexThreads::Pointer,
                             IndexSparseHolder::Pointer holder) {
  LOG_INFO("Begin FlatSparseBuilder::build");

  ailego::ElapsedTime stamp;
  if (!holder) {
    LOG_ERROR("Input holder is nullptr while building index");
    return IndexError_InvalidArgument;
  }

  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while building index");
    return IndexError_Mismatch;
  }

  holder_ = std::move(holder);

  stats_.set_built_count(holder_->count());
  stats_.set_built_costtime(stamp.milli_seconds());
  state_ = BUILD_STATE_BUILT;

  LOG_INFO("End FlatSparseBuilder::build");
  return 0;
}

int FlatSparseBuilder::dump(const IndexDumper::Pointer &dumper) {
  if (state_ != BUILD_STATE_BUILT || !holder_) {
    LOG_INFO("Build the index before FlatSparseBuilder::dump");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin FlatSparseBuilder::dump");

  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  uint32_t dump_count;
  ret = do_dump(dumper, &dump_count);
  if (ret != 0) {
    LOG_ERROR("Failed to dump index");
    return ret;
  }

  holder_ = nullptr;
  stats_.set_dumped_count(dump_count);
  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);

  LOG_INFO("End FlatSparseBuilder::dump");
  return 0;
}

int FlatSparseBuilder::do_dump(const IndexDumper::Pointer &dumper,
                               uint32_t *dump_count) {
  // bf meta
  int ret = dump_meta(dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to dump meta");
    return ret;
  }

  std::vector<uint64_t> keys;
  ret = dump_vector_and_offset(dumper.get(), &keys);
  if (ret != 0) {
    LOG_ERROR("Failed to dump offset data");
    return ret;
  }

  ret = dump_keys(keys, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to dump keys");
    return ret;
  }

  ret = dump_mapping(keys, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to dump mapping");
    return ret;
  }

  *dump_count = keys.size();

  return 0;
}

int FlatSparseBuilder::dump_meta(IndexDumper *dumper) {
  FlatSparseMeta meta;
  meta.create_time = ailego::Realtime::Seconds();
  meta.update_time = ailego::Realtime::Seconds();
  meta.doc_cnt = holder_->count();

  if (dumper->write(&meta, sizeof(meta)) != sizeof(meta)) {
    LOG_ERROR("Failed to write meta");
    return IndexError_WriteData;
  }

  size_t meta_padding_size = ailego_align(sizeof(meta), 32) - sizeof(meta);
  if (meta_padding_size) {
    std::string padding(meta_padding_size, '\0');
    if (dumper->write(padding.data(), meta_padding_size) != meta_padding_size) {
      LOG_ERROR("Failed to write meta padding");
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_META_SEG_ID, sizeof(meta),
                        meta_padding_size, 0);
}

int FlatSparseBuilder::dump_vector_and_offset(IndexDumper *dumper,
                                              std::vector<uint64_t> *keys) {
  // iterate the holder
  auto iter = holder_->create_iterator();
  if (!iter) {
    LOG_ERROR("Failed to create iterator");
    return IndexError_Runtime;
  }

  uint64_t written_length{0U};

  std::vector<std::pair<uint64_t, uint32_t>> offset_lens;
  while (iter->is_valid()) {
    keys->push_back(iter->key());

    uint32_t length;
    if (write_vector_data(iter->sparse_count(), iter->sparse_indices(),
                          iter->sparse_data(), dumper, &length) != 0) {
      return IndexError_WriteData;
    }

    offset_lens.push_back({written_length, length});
    written_length += length;
    iter->next();
  }

  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID, written_length, 0,
                     0) != 0) {
    LOG_ERROR("Failed to append offset data");
    return IndexError_WriteData;
  }

  LOG_DEBUG("Data total written: %zu", (size_t)written_length);

  for (auto &offset_len : offset_lens) {
    if (dumper->write(&offset_len.first, sizeof(offset_len.first)) !=
        sizeof(offset_len.first)) {
      LOG_ERROR("Failed to write offset");
      return IndexError_WriteData;
    }

    if (dumper->write(&offset_len.second, sizeof(offset_len.second)) !=
        sizeof(offset_len.second)) {
      LOG_ERROR("Failed to write length");
      return IndexError_WriteData;
    }
  }

  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID,
                     offset_lens.size() * (sizeof(uint64_t) + sizeof(uint32_t)),
                     0, 0) != 0) {
    LOG_ERROR("Failed to append offset data");
    return IndexError_WriteData;
  }

  LOG_DEBUG("Offset total written: %zu",
            offset_lens.size() * (sizeof(uint64_t) + sizeof(uint32_t)));

  return 0;
}

int FlatSparseBuilder::write_vector_data(const uint32_t sparse_count,
                                         const uint32_t *sparse_indices,
                                         const void *sparse_vec,
                                         IndexDumper *dumper,
                                         uint32_t *length) {
  std::string sparse_buffer;

  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_vec,
                                   meta_.unit_size(), sparse_buffer);

  if (dumper->write(sparse_buffer.data(), sparse_buffer.size()) !=
      sparse_buffer.size()) {
    LOG_ERROR("Failed to write sparse data");
    return IndexError_WriteData;
  }

  *length = sparse_buffer.size();

  return 0;
}

int FlatSparseBuilder::dump_keys(const std::vector<uint64_t> &keys,
                                 IndexDumper *dumper) {
  size_t keys_size = keys.size() * sizeof(uint64_t);
  if (dumper->write(keys.data(), keys_size) != keys_size) {
    LOG_ERROR("Failed to write keys to dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }
  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;
  if (keys_padding_size) {
    std::string padding(keys_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write padding to dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID, keys_size,
                        keys_padding_size, 0);
}

int FlatSparseBuilder::dump_mapping(const std::vector<uint64_t> &keys,
                                    IndexDumper *dumper) {
  std::vector<uint32_t> mapping(keys.size());
  std::iota(mapping.begin(), mapping.end(), 0);
  std::sort(
      mapping.begin(), mapping.end(),
      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });

  size_t mapping_size = mapping.size() * sizeof(uint32_t);
  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;
  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {
    LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }

  // Write the padding if need
  if (mapping_padding_size) {
    std::string padding(mapping_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID, mapping_size,
                        mapping_padding_size, 0);
}

INDEX_FACTORY_REGISTER_BUILDER(FlatSparseBuilder);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_holder.h>

namespace zvec {
namespace core {

/*! Brute Force Sparse Builder
 */
class FlatSparseBuilder : public IndexBuilder {
 public:
  //! Constructor
  FlatSparseBuilder();

  //! Initialize the builder
  int init(const IndexMeta &meta, const ailego::Params &params) override;

  //! Cleanup the builder
  int cleanup(void) override;

  //! Train the data
  int train(IndexThreads::Pointer, IndexSparseHolder::Pointer holder) override;

  //! Train the data
  int train(const IndexTrainer::Pointer &trainer) override;

  int train(IndexThreads::Pointer /*threads*/,
            IndexHolder::Pointer /*holder*/) override {
    return IndexError_NotImplemented;
  }

  int build(IndexThreads::Pointer /*threads*/,
            IndexHolder::Pointer /*holder*/) override {
    return IndexError_NotImplemented;
  }

  //! Build the index
  int build(IndexThreads::Pointer threads,
            IndexSparseHolder::Pointer holder) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

 private:
  int do_dump(const IndexDumper::Pointer &dumper, uint32_t *dump_count);

  int dump_meta(IndexDumper *dumper);

  int dump_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  int dump_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  int dump_vector_and_offset(IndexDumper *dumper, std::vector<uint64_t> *keys);

  int write_vector_data(const uint32_t sparse_count,
                        const uint32_t *sparse_indices, const void *sparse_vec,
                        IndexDumper *dumper, uint32_t *length);

 private:
  enum BUILD_STATE {
    BUILD_STATE_INIT = 0,
    BUILD_STATE_INITED = 1,
    BUILD_STATE_TRAINED = 2,
    BUILD_STATE_BUILT = 3
  };

  IndexSparseHolder::Pointer holder_{};

  std::atomic_bool error_{false};
  IndexMeta meta_{};
  IndexMetric::Pointer measure_{};
  std::mutex mutex_{};
  std::condition_variable cond_{};
  Stats stats_{};

  BUILD_STATE state_{BUILD_STATE_INIT};
};


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_context.h"

namespace zvec {
namespace core {

const FlatSparseEntity *FlatSparseContext::entity() const {
  if (context_type_ == kStreamerContext) {
    return &streamer_owner_->entity();
  } else if (context_type_ == kSearcherContext) {
    return &searcher_owner_->entity();
  }
  return nullptr;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <utility/sparse_utility.h>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_document.h>
#include "flat_sparse_entity.h"
#include "flat_sparse_searcher.h"
#include "flat_sparse_streamer.h"

namespace zvec {
namespace core {

class FlatSparseStreamer;
class FlatSparseSearcher;

/*! Brute Force Sparse Streamer Context
 */
class FlatSparseContext : public IndexContext {
 public:
  //! Constructor
  enum ContextType {
    kUnknownContext = 0,
    kSearcherContext = 1,
    kStreamerContext = 3
  };
  FlatSparseContext(const FlatSparseStreamer *streamer_ptr)
      : streamer_owner_(streamer_ptr), context_type_(kStreamerContext) {}

  FlatSparseContext(const FlatSparseSearcher *searcher_ptr)
      : searcher_owner_(searcher_ptr), context_type_(kSearcherContext) {}

  //! Destructor
  virtual ~FlatSparseContext(void) = default;

  //! Set topk of search result
  void set_topk(uint32_t topk) override {
    topk_ = topk;
    result_heap_.limit(topk_);
    result_heap_.set_threshold(this->threshold());
  }

  //! Retrieve search result
  const IndexDocumentList &result(void) const override {
    return results_.at(0);
  }

  //! Retrieve search result with index
  const IndexDocumentList &result(size_t index) const override {
    return results_.at(index);
  }

  //! Retrieve result object for output
  IndexDocumentList *mutable_result(size_t idx) override {
    return &results_.at(idx);
  }

  inline IndexDocumentHeap *result_heap() {
    return &result_heap_;
  }

  //! Update the parameters of context
  int update(const ailego::Params & /*params*/) override {
    return 0;
  }

  //! Retrieve magic number
  uint32_t magic(void) const override {
    return magic_;
  }

  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  bool fetch_vector() const override {
    return fetch_vector_;
  }

  //! Retrieve search group result with index
  const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }

  //! Retrieve search group result with index
  const IndexGroupDocumentList &group_result(size_t idx) const override {
    return group_results_[idx];
  }

  IndexGroupDocumentList *mutable_group_result(size_t idx) {
    return &group_results_[idx];
  }

  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;
    result_group_heap_.clear();
  }

  //! Get if group by search
  inline bool group_by_search() {
    return group_num_ > 0;
  }

  inline uint32_t group_topk() const {
    return group_topk_;
  }

  inline uint32_t group_num() const {
    return group_num_;
  }

  void reset() override {}

  //! Reset the context
  void reset(const FlatSparseStreamer *streamer_ptr) {
    magic_ = streamer_ptr->magic();
    streamer_owner_ = streamer_ptr;
    context_type_ = kStreamerContext;
  }

  void reset(const FlatSparseSearcher *searcher_ptr) {
    magic_ = searcher_ptr->magic();
    searcher_owner_ = searcher_ptr;
    context_type_ = kSearcherContext;
  }

  //! Reset all the query results
  void reset_results(size_t qnum) {
    if (group_by_search()) {
      group_results_.resize(qnum);
    } else {
      result_heap_.clear();
      result_heap_.limit(topk_);
      result_heap_.set_threshold(this->threshold());
      results_.resize(qnum);
      stats_vec_.resize(qnum);
      for (size_t i = 0; i < results_.size(); ++i) {
        results_[i].clear();
        stats_vec_[i].clear();
      }
    }
  }

  Stats *mutable_stats(size_t idx = 0) {
    ailego_assert_with(stats_vec_.size() > idx, "invalid index");
    return &stats_vec_[idx];
  }

  inline void topk_to_result(uint32_t idx) {
    if (ailego_unlikely(result_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));
    result_heap_.sort();
    results_[idx].clear();
    for (int i = 0; i < size; ++i) {
      auto score = result_heap_[i].score();
      if (score > this->threshold()) {
        break;
      }

      key_t key = result_heap_[i].key();
      if (fetch_vector_) {
        node_id_t id = entity()->get_id(key);
        IndexStorage::MemoryBlock vec_block;
        entity()->get_sparse_vector(id, vec_block);
        const void *sparse_data = vec_block.data();
        IndexSparseDocument sparse_doc;
        if (sparse_data != nullptr) {
          SparseUtility::ReverseSparseFormat(sparse_data, sparse_doc,
                                             entity()->sparse_unit_size());
        }
        results_[idx].emplace_back(key, score, id, nullptr, sparse_doc);
      } else {
        results_[idx].emplace_back(key, score);
      }
    }
  }

 private:
  const FlatSparseEntity *entity() const;

 private:
  const FlatSparseStreamer *streamer_owner_{nullptr};
  const FlatSparseSearcher *searcher_owner_{nullptr};
  ContextType context_type_{kUnknownContext};
  std::vector<Stats> stats_vec_{};
  uint32_t magic_{0};
  uint32_t topk_{0};
  IndexDocumentHeap result_heap_;
  // std::string batch_queries_{};
  bool fetch_vector_{false};

  // group
  uint32_t group_num_{0};
  uint32_t group_topk_{0};
  std::map<std::string, IndexDocumentHeap> result_group_heap_{};
  std::vector<IndexDocumentList> results_{};
  std::vector<IndexGroupDocumentList> group_results_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <unordered_map>
#include <zvec/core/framework/index_framework.h>
#include "flat_sparse_index_format.h"

namespace zvec {
namespace core {

using node_id_t = uint32_t;
constexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);

/*! Flat Sparse Entity
 */
class FlatSparseEntity {
 public:
  typedef std::shared_ptr<FlatSparseEntity> Pointer;

  //! Constructor
  explicit FlatSparseEntity() {}

  //! Destructor
  ~FlatSparseEntity() = default;

  //! Disable them
  FlatSparseEntity(const FlatSparseEntity &) = delete;
  FlatSparseEntity &operator=(const FlatSparseEntity &) = delete;

  //! Search in linear list with filter
  int search(const std::string &sparse_vector, const IndexFilter &filter,
             IndexDocumentHeap *heap) const {
    for (node_id_t i = 0; i < doc_cnt(); i++) {
      uint64_t key = get_key(i);
      if (ailego_unlikely(key == kInvalidKey)) {
        // LOG_ERROR("The key of node_id[%u] not found in keys map", i);
        // return IndexError_Runtime;
        continue;
      }
      if (!filter.is_valid() || !filter(key)) {
        float dist = get_search_distance(sparse_vector, i);
        heap->emplace(key, dist);
      }
    }

    return 0;
  }

  //! Search in linear list with filter and target pkeys
  int search_p_keys(const std::string &sparse_vector,
                    const std::vector<uint64_t> &p_keys,
                    const IndexFilter &filter, IndexDocumentHeap *heap) const {
    for (auto p_key : p_keys) {
      if (!filter.is_valid() || !filter(p_key)) {
        auto node_id = get_id(p_key);
        if (node_id != kInvalidNodeId) {
          float dist = get_search_distance(sparse_vector, node_id);
          heap->emplace(p_key, dist);
        }
      }
    }

    return 0;
  }

  //! Group search in linear list with filter
  int search_group(
      const std::string &sparse_vector, const IndexFilter &filter,
      const std::function<std::string(uint64_t)> &group_by_func, uint32_t topk,
      std::unordered_map<std::string, IndexDocumentHeap> *heap) const {
    for (node_id_t i = 0; i < doc_cnt(); i++) {
      uint64_t key = get_key(i);
      if (ailego_unlikely(key == kInvalidKey)) {
        LOG_ERROR("The key of node_id[%u] not found in keys map", i);
        return IndexError_Runtime;
      }
      if (!filter.is_valid() || !filter(key)) {
        float dist = get_search_distance(sparse_vector, i);

        std::string group_id = group_by_func(key);

        auto &group_heap = (*heap)[group_id];
        if (group_heap.empty()) {
          group_heap.limit(topk);
        }
        group_heap.emplace(key, dist);
      }
    }

    return 0;
  }

  //! Group search in linear list with filter and target pkeys
  int search_group_p_keys(
      const std::string &sparse_vector, const std::vector<uint64_t> &p_keys,
      const IndexFilter &filter,
      const std::function<std::string(uint64_t)> &group_by_func, uint32_t topk,
      std::unordered_map<std::string, IndexDocumentHeap> *heap) const {
    for (auto p_key : p_keys) {
      if (!filter.is_valid() || !filter(p_key)) {
        auto node_id = get_id(p_key);
        if (node_id != kInvalidNodeId) {
          float dist = get_search_distance(sparse_vector, node_id);

          std::string group_id = group_by_func(p_key);

          auto &group_heap = (*heap)[group_id];
          if (group_heap.empty()) {
            group_heap.limit(topk);
          }
          group_heap.emplace(p_key, dist);
        }
      }
    }

    return 0;
  }

  //! Get sparse vector by key
  int get_sparse_vector(uint64_t key, std::string *sparse_vector) const {
    const void *sparse_vector_ptr;
    uint32_t sparse_vector_len;
    int ret = get_sparse_vector_ptr_by_key(key, &sparse_vector_ptr,
                                           &sparse_vector_len);
    if (ret != 0) {
      return ret;
    }
    *sparse_vector = std::string(static_cast<const char *>(sparse_vector_ptr),
                                 sparse_vector_len);
    return 0;
  }

  //! Get sparse vector by node id
  const void *get_sparse_vector(node_id_t id) const {
    const void *sparse_vector_ptr;
    uint32_t sparse_vector_len;
    int ret =
        get_sparse_vector_ptr_by_id(id, &sparse_vector_ptr, &sparse_vector_len);
    if (ret != 0) {
      return nullptr;
    }
    return sparse_vector_ptr;
  }

  int get_sparse_vector_by_key(const uint64_t key,
                               std::string *sparse_vector) const {
    uint32_t sparse_vector_len;
    IndexStorage::MemoryBlock sparse_vector_block;
    int ret = get_sparse_vector_ptr_by_key(key, sparse_vector_block,
                                           &sparse_vector_len);
    if (ret != 0) {
      return ret;
    }
    *sparse_vector =
        std::string(static_cast<const char *>(sparse_vector_block.data()),
                    sparse_vector_len);
    return 0;
  }

  int get_sparse_vector(node_id_t id,
                        IndexStorage::MemoryBlock &sparse_vector_block) const {
    uint32_t sparse_vector_len;
    return get_sparse_vector_ptr_by_id(id, sparse_vector_block,
                                       &sparse_vector_len);
  }

  int get_sparse_vector_ptr_by_key(uint64_t key, const void **sparse_vector_ptr,
                                   uint32_t *sparse_vector_len_ptr) const {
    auto node_id = get_id(key);
    if (node_id == kInvalidNodeId) {
      return IndexError_NoExist;
    }

    return get_sparse_vector_ptr_by_id(node_id, sparse_vector_ptr,
                                       sparse_vector_len_ptr);
  }

  int get_sparse_vector_ptr_by_key(
      const uint64_t key, IndexStorage::MemoryBlock &sparse_vector_block,
      uint32_t *sparse_vector_len_ptr) const {
    auto node_id = get_id(key);
    if (node_id == kInvalidNodeId) {
      return IndexError_NoExist;
    }

    return get_sparse_vector_ptr_by_id(node_id, sparse_vector_block,
                                       sparse_vector_len_ptr);
  }

  std::vector<uint64_t> get_keys() const {
    std::vector<uint64_t> keys;
    node_id_t doc_total_cnt = doc_cnt();
    for (node_id_t node_id = 0; node_id < doc_total_cnt; ++node_id) {
      uint64_t key = get_key(node_id);
      if (key == kInvalidKey) {
        return {kInvalidKey};
      } else {
        keys.push_back(key);
      }
    }

    return keys;
  }


 public:
  virtual uint32_t doc_cnt() const = 0;

  virtual uint32_t total_sparse_count() const = 0;

  virtual node_id_t get_id(uint64_t key) const = 0;

  virtual uint64_t get_key(node_id_t id) const = 0;

  virtual int get_sparse_vector_ptr_by_id(
      node_id_t id, const void **sparse_vector,
      uint32_t *sparse_vector_len) const = 0;

  virtual int get_sparse_vector_ptr_by_id(
      const node_id_t /*id*/,
      IndexStorage::MemoryBlock & /*sparse_vector_block*/,
      uint32_t * /*sparse_vector_len*/) const {
    return IndexError_NotImplemented;
  }


  virtual float get_search_distance(const std::string &vector,
                                    node_id_t target_node_id) const = 0;
  virtual size_t sparse_unit_size() const = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_index_format.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_framework.h>

namespace zvec {
namespace core {

static constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();
static constexpr uint32_t kDefaultOffsetChunkSize = 1024 * 1024;    // 1MB
static constexpr uint32_t kDefaultDataChunkSize = 8 * 1024 * 1024;  // 8MB

struct FlatSparseMeta {
  uint64_t create_time{0};
  uint64_t update_time{0};
  uint32_t doc_cnt{0};
  uint32_t total_sparse_count{0};
  uint8_t reserved[8] = {0};
};

static_assert(sizeof(FlatSparseMeta) % 32 == 0,
              "FlatSparseMeta must be aligned with 32 bytes");

struct FlatSparseStreamerMeta {
  uint32_t offset_chunk_count{0};
  uint32_t offset_chunk_size{kDefaultOffsetChunkSize};
  uint32_t data_chunk_count{0};
  uint32_t data_chunk_size{kDefaultDataChunkSize};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <utility/sparse_utility.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_meta.h>
#include "flat_sparse_streamer_entity.h"

namespace zvec {
namespace core {

/*! Brute Force Sparse Streamer Provider
 */
// FlatSparseStreamerEntity or FlatSparseSearcherEntity
template <typename FlatSparseEntityType>
class FlatSparseIndexProvider : public IndexSparseProvider {
 public:
  //! Constructor
  FlatSparseIndexProvider(const std::shared_ptr<FlatSparseEntityType> entity,
                          const IndexMeta &meta, const std::string &owner)
      : entity_(entity), meta_(meta), owner_class_(owner) {}

  //! Create a new iterator
  IndexSparseProvider::Iterator::Pointer create_iterator(void) override {
    return IndexSparseProvider::Iterator::Pointer(new (std::nothrow)
                                                      Iterator(entity_, meta_));
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return entity_->doc_cnt();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return meta_.data_type();
  }

  //! Retrieve a vector using a primary key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override {
    std::string sparse_data;

    int ret = entity_->get_sparse_vector_by_key(key, &sparse_data);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to get sparse vector, key=%zu, ret=%s", (size_t)key,
                IndexError::What(ret));
      return ret;
    }

    SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,
                                       sparse_indices_buffer,
                                       sparse_values_buffer, meta_.unit_size());
    return 0;
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

  size_t total_sparse_count() const override {
    return entity_->total_sparse_count();
  }

 private:
  class Iterator : public IndexSparseProvider::Iterator {
   public:
    Iterator(const std::shared_ptr<FlatSparseEntityType> &entity,
             const IndexMeta &meta)
        : entity_(entity), meta_(meta), cur_id_(0U), valid_(false) {
      IndexStorage::MemoryBlock sparse_data_block;
      entity_->get_sparse_vector(cur_id_, sparse_data_block);
      const void *sparse_data = sparse_data_block.data();
      if (sparse_data != nullptr) {
        valid_ = true;

        sparse_indices_buffer_.clear();
        sparse_data_buffer_.clear();

        SparseUtility::ReverseSparseFormat(
            sparse_data, &sparse_count_, &sparse_indices_buffer_,
            &sparse_data_buffer_, meta.unit_size());
      }
    }

    //! Retrieve sparse count
    virtual uint32_t sparse_count() const override {
      return sparse_count_;
    }

    //! Retrieve sparse indices
    virtual const uint32_t *sparse_indices() const override {
      return reinterpret_cast<const uint32_t *>(sparse_indices_buffer_.data());
    }

    //! Retrieve sparse data
    virtual const void *sparse_data() const override {
      return reinterpret_cast<const void *>(sparse_data_buffer_.data());
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return cur_id_ < entity_->doc_cnt() && valid_;
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      // std::cout << "iter key=" << cur_id_ << std::endl;
      return entity_->get_key(cur_id_);
    }

    //! Next iterator
    virtual void next(void) override {
      cur_id_ = get_next_valid_id(cur_id_ + 1);

      if (cur_id_ < entity_->doc_cnt()) {
        IndexStorage::MemoryBlock sparse_data_block;
        entity_->get_sparse_vector(cur_id_, sparse_data_block);
        const void *sparse_data = sparse_data_block.data();
        if (sparse_data != nullptr) {
          valid_ = true;

          sparse_indices_buffer_.clear();
          sparse_data_buffer_.clear();

          SparseUtility::ReverseSparseFormat(
              sparse_data, &sparse_count_, &sparse_indices_buffer_,
              &sparse_data_buffer_, meta_.unit_size());
        } else {
          valid_ = false;
        }
      }
    }

    //! Reset the iterator
    void reset(void) {
      cur_id_ = get_next_valid_id(0);
      IndexStorage::MemoryBlock sparse_data_block;
      entity_->get_sparse_vector(cur_id_, sparse_data_block);
      const void *sparse_data = sparse_data_block.data();
      if (sparse_data != nullptr) {
        valid_ = true;

        SparseUtility::ReverseSparseFormat(
            sparse_data, &sparse_count_, &sparse_indices_buffer_,
            &sparse_data_buffer_, meta_.unit_size());
      }
    }

   private:
    node_id_t get_next_valid_id(node_id_t start_id) {
      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {
        if (entity_->get_key(i) != kInvalidNodeId) {
          return i;
        }
      }
      return kInvalidNodeId;
    }

   private:
    const std::shared_ptr<FlatSparseEntityType> entity_{nullptr};
    const IndexMeta &meta_;
    node_id_t cur_id_;
    uint32_t sparse_count_;
    std::string sparse_indices_buffer_;
    std::string sparse_data_buffer_;
    bool valid_{false};
  };

 private:
  const std::shared_ptr<FlatSparseEntityType> entity_{nullptr};
  const IndexMeta &meta_;
  const std::string owner_class_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_search.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <ailego/math/inner_product_matrix.h>
#include "flat_sparse_context.h"

namespace zvec {
namespace core {

static inline IndexGroupDocumentList ConvertGroupMapToResult(
    std::unordered_map<std::string, IndexDocumentHeap> group_map,
    uint32_t group_num) {
  IndexGroupDocumentList result;

  std::vector<std::pair<std::string, float>> best_score_in_groups;
  for (auto itr = group_map.begin(); itr != group_map.end(); itr++) {
    const std::string &group_id = (*itr).first;
    auto &heap = (*itr).second;

    if (heap.size() > 0) {
      float best_score = heap[0].score();
      best_score_in_groups.push_back(std::make_pair(group_id, best_score));
    }
  }

  std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
            [](const std::pair<std::string, float> &a,
               const std::pair<std::string, float> &b) -> int {
              return a.second < b.second;
            });

  // truncate to group num
  for (uint32_t i = 0; i < group_num && i < best_score_in_groups.size(); ++i) {
    const std::string &group_id = best_score_in_groups[i].first;

    result.emplace_back(
        GroupIndexDocument(group_id, std::move(group_map[group_id])));
  }

  return result;
}

static inline int FlatSearch(const uint32_t *sparse_count,
                             const uint32_t *sparse_indices,
                             const void *sparse_query, bool with_p_keys,
                             const std::vector<std::vector<uint64_t>> &p_keys,
                             const IndexQueryMeta &qmeta, uint32_t count,
                             const IndexMeta, IndexContext::Pointer &context,
                             FlatSparseEntity *entity) {
  int ret;

  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to FlatSparseContext failed");
    return IndexError_Cast;
  }

  // reset context results
  ctx->reset_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(uint64_t)> group_by = [&](uint64_t key) {
      return ctx->group_by()(key);
    };

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      ailego::MinusInnerProductSparseMatrix<float>::transform_sparse_format(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          sparse_query_buffer);

      std::unordered_map<std::string, IndexDocumentHeap> group_heap{};

      if (with_p_keys) {
        ret = entity->search_group_p_keys(sparse_query_buffer, p_keys[q],
                                          ctx->filter(), group_by,
                                          ctx->group_topk(), &group_heap);
      } else {
        ret = entity->search_group(sparse_query_buffer, ctx->filter(), group_by,
                                   ctx->group_topk(), &group_heap);
      }

      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Failed to search group, ret=%s", IndexError::What(ret));
        return ret;
      }

      // sort group heap
      for (auto &group : group_heap) {
        group.second.sort();
      }

      auto group_result =
          ConvertGroupMapToResult(std::move(group_heap), ctx->group_num());
      ctx->mutable_group_result(q)->swap(group_result);
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      ailego::MinusInnerProductSparseMatrix<float>::transform_sparse_format(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          sparse_query_buffer);

      auto heap = ctx->result_heap();

      if (with_p_keys) {
        ret = entity->search_p_keys(sparse_query_buffer, p_keys[q],
                                    ctx->filter(), heap);
      } else {
        ret = entity->search(sparse_query_buffer, ctx->filter(), heap);
      }

      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Failed to search, ret=%s", IndexError::What(ret));
        return ret;
      }

      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  }

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_searcher.h"
#include <utility/sparse_utility.h>
#include <zvec/core/framework/index_error.h>
#include "flat_sparse_context.h"
#include "flat_sparse_provider.h"
#include "flat_sparse_search.h"

namespace zvec {
namespace core {

const uint32_t FlatSparseSearcher::VERSION = 0U;

FlatSparseSearcher::FlatSparseSearcher(void) {}

FlatSparseSearcher::~FlatSparseSearcher(void) {}

int FlatSparseSearcher::init(const ailego::Params & /*params*/) {
  state_ = STATE_INITED;

  return 0;
}

int FlatSparseSearcher::cleanup(void) {
  this->unload();
  return 0;
}

int FlatSparseSearcher::load(IndexStorage::Pointer container,
                             IndexMetric::Pointer /*measure*/) {
  if (state_ != STATE_INITED) {
    LOG_ERROR("Init the searcher first before load index");
    return IndexError_Runtime;
  }

  LOG_INFO("Begin FlatSparseSearcher::load");

  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  if (meta_.searcher_revision() != VERSION) {
    LOG_ERROR("Unsupported searcher revision %u", meta_.searcher_revision());
    return IndexError_Unsupported;
  }

  ret = entity_.load(container, meta_);
  if (ret != 0) {
    LOG_ERROR("FlatSparseSearcher load index failed");
    return ret;
  }

  state_ = STATE_LOADED;
  magic_ = IndexContext::GenerateMagic();

  LOG_INFO("End FlatSparseSearcher::load");

  return 0;
}

int FlatSparseSearcher::unload(void) {
  LOG_INFO("Begin FlatSparseSearcher::unload");

  meta_.clear();
  entity_.unload();
  state_ = STATE_INITED;

  LOG_INFO("End FlatSparseSearcher::unload");

  return 0;
}

int FlatSparseSearcher::search_bf_impl(const uint32_t *sparse_count,
                                       const uint32_t *sparse_indices,
                                       const void *sparse_query,
                                       const IndexQueryMeta &qmeta,
                                       uint32_t count,
                                       Context::Pointer &context) const {
  return do_search(sparse_count, sparse_indices, sparse_query, false, {}, qmeta,
                   count, context);
}

int FlatSparseSearcher::search_bf_by_p_keys_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    ContextPointer &context) const {
  return do_search(sparse_count, sparse_indices, sparse_query, true, p_keys,
                   qmeta, count, context);
}

int FlatSparseSearcher::get_sparse_vector(
    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  if (state_ != STATE_LOADED) {
    LOG_ERROR("Failed to get sparse vector, load container first!");
    return IndexError_NoIndexLoaded;
  }

  std::string sparse_data;

  int ret = entity_.get_sparse_vector(key, &sparse_data);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to get sparse vector, key=%zu, ret=%s", (size_t)key,
              IndexError::What(ret));
    return ret;
  }

  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,
                                     sparse_indices_buffer,
                                     sparse_values_buffer, meta_.unit_size());

  return 0;
}

FlatSparseSearcher::ContextPointer FlatSparseSearcher::create_context() const {
  if (state_ != STATE_LOADED) {
    LOG_ERROR("Failed to create Context, load container first!");
    return Context::UPointer();
  }
  FlatSparseSearcherEntity::Pointer entity = entity_.clone();
  return FlatSparseSearcher::ContextPointer(new FlatSparseContext(this));
}

//! Create a new iterator
IndexSearcher::SparseProvider::Pointer
FlatSparseSearcher::create_sparse_provider(void) const {
  if (state_ != STATE_LOADED) {
    LOG_ERROR("Failed to create provider, load container first!");
    return SparseProvider::Pointer();
  }

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone entity failed");
    return SparseProvider::Pointer();
  }
  return SparseProvider::Pointer(
      new FlatSparseIndexProvider<FlatSparseSearcherEntity>(
          entity, meta_, "FlatSparseSearcher"));
}

int FlatSparseSearcher::do_search(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, bool with_p_keys,
    const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    ContextPointer &context) const {
  if (state_ != STATE_LOADED) {
    LOG_ERROR("Failed to do search, load container first!");
    return IndexError_NoIndexLoaded;
  }

  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  return FlatSearch(sparse_count, sparse_indices, sparse_query, with_p_keys,
                    p_keys, qmeta, count, meta_, context,
                    (FlatSparseEntity *)&entity_);
}

INDEX_FACTORY_REGISTER_SEARCHER(FlatSparseSearcher);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "flat_sparse_searcher_entity.h"

namespace zvec {
namespace core {

class FlatSparseSearcher : public IndexSearcher {
 public:
  static const uint32_t VERSION;

 public:
  using ContextPointer = IndexSearcher::Context::Pointer;

 public:
  FlatSparseSearcher(void);
  virtual ~FlatSparseSearcher(void);

  FlatSparseSearcher(const FlatSparseSearcher &) = delete;
  FlatSparseSearcher &operator=(const FlatSparseSearcher &) = delete;

 public:
  //! Initialize Searcher
  int init(const ailego::Params &params) override;

  //! Cleanup Searcher
  int cleanup(void) override;

  //! Load Index from storage
  int load(IndexStorage::Pointer container,
           IndexMetric::Pointer /*measure*/) override;

  //! Unload index from storage
  int unload(void) override;

  int search_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                  Context::Pointer & /*context*/) const override {
    return IndexError_NotImplemented;
  }

  int search_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                  uint32_t /*count*/,
                  Context::Pointer & /*context*/) const override {
    return IndexError_NotImplemented;
  }

  int search_bf_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                     Context::Pointer & /*context*/) const override {
    return IndexError_NotImplemented;
  }

  int search_bf_impl(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                     uint32_t /*count*/,
                     Context::Pointer & /*context*/) const override {
    return IndexError_NotImplemented;
  }

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const override {
    return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                       context);
  }

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const override {
    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,
                          count, context);
  }

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const override {
    return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                          context);
  }

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t *sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta, uint32_t count,
                     Context::Pointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,
                                    p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               ContextPointer &context) const override;

  //! Fetch sparser vector by key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override;

  //! Create a searcher context
  ContextPointer create_context() const override;

  //! Create a new iterator
  IndexSearcher::SparseProvider::Pointer create_sparse_provider(
      void) const override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  const ailego::Params &params(void) const override {
    return params_;
  }

  const FlatSparseSearcherEntity &entity(void) const {
    return entity_;
  }

  uint32_t magic(void) const {
    return magic_;
  }

 private:
  inline int check_params(const IndexQueryMeta &qmeta) const {
    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {
      LOG_ERROR("Unsupported query meta");
      return IndexError_Mismatch;
    }
    return 0;
  }

  int do_search(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                const void *sparse_query, bool with_p_keys,
                const std::vector<std::vector<uint64_t>> &p_keys,
                const IndexQueryMeta &qmeta, uint32_t count,
                ContextPointer &context) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  FlatSparseSearcherEntity entity_{};
  IndexMeta meta_{};
  ailego::Params params_{};
  uint32_t magic_{0U};

  Stats stats_;
  State state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_searcher_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_searcher_entity.h"
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_logger.h>
#include "flat_sparse_utility.h"

namespace zvec {
namespace core {

FlatSparseSearcherEntity::FlatSparseSearcherEntity() {}

int FlatSparseSearcherEntity::load(const IndexStorage::Pointer &container,
                                   const IndexMeta &index_meta) {
  if (container_) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }

  int ret = this->load_container(container);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to load storage index");
    return ret;
  }

  if (init_measure(index_meta) != 0) {
    LOG_ERROR("Failed to init measure");
    return IndexError_InvalidFormat;
  }

  container_ = container;
  return 0;
}

int FlatSparseSearcherEntity::init_measure(const IndexMeta &meta) {
  measure_ = IndexFactory::CreateMetric(meta.metric_name());
  if (!measure_) {
    LOG_ERROR("Failed to create measure %s", meta.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = measure_->init(meta, meta.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failled to init measure, ret=%d", ret);
    return ret;
  }

  if (!measure_->sparse_distance()) {
    LOG_ERROR("Invalid measure distance");
    return IndexError_InvalidArgument;
  }

  search_sparse_distance_ = measure_->sparse_distance();

  if (measure_->query_metric() && measure_->query_metric()->distance()) {
    search_sparse_distance_ = measure_->query_metric()->sparse_distance();
  }
  sparse_unit_size_ = meta.unit_size();

  return 0;
}

int FlatSparseSearcherEntity::load_container(
    const IndexStorage::Pointer &container) {
  // meta
  auto segment = container->get(PARAM_FLAT_SPARSE_META_SEG_ID);
  if (!segment || segment->data_size() < sizeof(meta_)) {
    LOG_ERROR("Missing segment %s, or invalid segment size",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  const void *data;
  if (ailego_unlikely(segment->read(0, &data, sizeof(meta_)) !=
                      sizeof(meta_))) {
    LOG_ERROR("Failed to read meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_ReadData;
  }
  meta_ = *(reinterpret_cast<const decltype(meta_) *>(data));

  // keys segment
  keys_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID);
  if (!keys_chunk_) {
    LOG_ERROR("Missing segment %s", PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }

  // mapping segment
  mapping_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID);
  if (!mapping_chunk_) {
    LOG_ERROR("Missing segment %s",
              PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }

  // offset segment
  sparse_offset_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID);
  if (!sparse_offset_chunk_) {
    LOG_ERROR("Missing segment %s",
              PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }

  // data segment
  sparse_data_chunk_ = container->get(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID);
  if (!sparse_data_chunk_) {
    LOG_ERROR("Missing segment %s", PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }

  return 0;
}

int FlatSparseSearcherEntity::unload() {
  container_.reset();
  sparse_data_chunk_.reset();
  sparse_offset_chunk_.reset();
  keys_chunk_.reset();
  mapping_chunk_.reset();

  return 0;
}

FlatSparseSearcherEntity::Pointer FlatSparseSearcherEntity::clone() const {
  auto entity = new (std::nothrow)
      FlatSparseSearcherEntity(meta_, sparse_data_chunk_, sparse_offset_chunk_,
                               keys_chunk_, mapping_chunk_);
  return FlatSparseSearcherEntity::Pointer(entity);
}

int FlatSparseSearcherEntity::get_sparse_vector_ptr_by_id(
    node_id_t id, const void **sparse_vector_ptr,
    uint32_t *sparse_vector_len_ptr) const {
  uint32_t offset_chunk_offset = id * offset_size_per_node();

  const void *offset_info = nullptr;
  if (ailego_unlikely(sparse_offset_chunk_->read(
                          offset_chunk_offset, &offset_info,
                          offset_size_per_node()) != offset_size_per_node())) {
    LOG_ERROR("Read offset info failed, offset=%u", offset_chunk_offset);
    return IndexError_ReadData;
  };

  // sparse offset
  uint64_t sparse_offset = *(uint64_t *)offset_info;
  uint32_t sparse_vector_len =
      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));

  if (sparse_vector_len > 0) {
    const void *sparse_data =
        get_sparse_vector_data(sparse_offset, sparse_vector_len);
    if (ailego_unlikely(sparse_data == nullptr)) {
      LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)sparse_offset,
                sparse_vector_len);

      return IndexError_ReadData;
    }
    *sparse_vector_ptr = sparse_data;
    *sparse_vector_len_ptr = sparse_vector_len;
  }

  return 0;
}

const void *FlatSparseSearcherEntity::get_sparse_vector_data(
    uint64_t offset, uint32_t length) const {
  const void *data;
  auto size = sparse_data_chunk_->read(offset, &data, length);
  if (size != length) {
    LOG_ERROR(
        "read sparse vector data failed: offset=%zu, "
        "length=%u, size=%zu",
        (size_t)offset, length, size);
    return nullptr;
  }
  return data;
}


node_id_t FlatSparseSearcherEntity::get_id(uint64_t key) const {
  if (ailego_unlikely(!mapping_chunk_)) {
    LOG_ERROR("Index missing mapping segment");
    return kInvalidNodeId;
  }

  //! Do binary search
  node_id_t start = 0UL;
  node_id_t end = doc_cnt();
  const void *data;
  node_id_t idx = 0u;
  while (start < end) {
    idx = start + (end - start) / 2;
    if (ailego_unlikely(mapping_chunk_->read(idx * sizeof(node_id_t), &data,
                                             sizeof(node_id_t)) !=
                        sizeof(node_id_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    const uint64_t *mkey;
    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);
    if (ailego_unlikely(keys_chunk_->read(
                            local_id * sizeof(uint64_t), (const void **)(&mkey),
                            sizeof(uint64_t)) != sizeof(uint64_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    if (*mkey < key) {
      start = idx + 1;
    } else if (*mkey > key) {
      end = idx;
    } else {
      return local_id;
    }
  }
  return kInvalidNodeId;
}

uint64_t FlatSparseSearcherEntity::get_key(node_id_t id) const {
  const void *key;
  if (ailego_unlikely(
          keys_chunk_->read(id * sizeof(uint64_t), &key, sizeof(uint64_t)) !=
          sizeof(uint64_t))) {
    LOG_ERROR("Read key from segment failed");
    return kInvalidKey;
  }
  return *(reinterpret_cast<const uint64_t *>(key));
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_searcher_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_framework.h>
#include "flat_sparse_entity.h"
#include "flat_sparse_index_format.h"

namespace zvec {
namespace core {


/*! Flat Sparse Searcher Entity
 */
class FlatSparseSearcherEntity : public FlatSparseEntity {
 public:
  typedef std::shared_ptr<FlatSparseSearcherEntity> Pointer;

  using Chunk = IndexStorage::Segment;

  //! Constructor
  explicit FlatSparseSearcherEntity();

  //! Destructor
  virtual ~FlatSparseSearcherEntity() = default;

  //! Disable them
  FlatSparseSearcherEntity(const FlatSparseSearcherEntity &) = delete;
  FlatSparseSearcherEntity &operator=(const FlatSparseSearcherEntity &) =
      delete;

  //! Load the entity with container
  int load(const IndexStorage::Pointer &container, const IndexMeta &index_meta);

  //! Unload the entity
  int unload();

 public:
  inline uint32_t doc_cnt() const override {
    return meta_.doc_cnt;
  }

  inline uint32_t total_sparse_count() const override {
    return meta_.total_sparse_count;
  }

  size_t sparse_unit_size() const override {
    return sparse_unit_size_;
  }

  float get_search_distance(const std::string &vector,
                            node_id_t target_node_id) const override {
    float dist;
    const void *target_vector;
    uint32_t target_vector_len;
    get_sparse_vector_ptr_by_id(target_node_id, &target_vector,
                                &target_vector_len);
    search_sparse_distance_(vector.c_str(), target_vector, &dist);
    return dist;
  }

  FlatSparseSearcherEntity::Pointer clone() const;

  node_id_t get_id(uint64_t key) const override;

  uint64_t get_key(node_id_t id) const override;

  int get_sparse_vector_ptr_by_id(node_id_t id, const void **sparse_vector,
                                  uint32_t *sparse_vector_len) const override;

 private:
  int load_container(const IndexStorage::Pointer &container);

  int init_measure(const IndexMeta &meta);

  inline uint32_t offset_size_per_node() const {
    return sizeof(uint64_t) + sizeof(uint32_t);
  }

  const void *get_sparse_vector_data(uint64_t offset, uint32_t length) const;

 private:
  FlatSparseSearcherEntity(const FlatSparseMeta &meta,
                           Chunk::Pointer sparse_data_chunk,
                           Chunk::Pointer sparse_offset_chunk,
                           Chunk::Pointer keys_chunk,
                           Chunk::Pointer mapping_chunk)
      : meta_(meta),
        sparse_data_chunk_(sparse_data_chunk),
        sparse_offset_chunk_(sparse_offset_chunk),
        keys_chunk_(keys_chunk),
        mapping_chunk_(mapping_chunk) {}

 private:
  IndexStorage::Pointer container_{};

  // meta
  FlatSparseMeta meta_;

  // measure
  IndexMetric::Pointer measure_{};
  IndexMetric::MatrixSparseDistance search_sparse_distance_{};

  // chunk
  Chunk::Pointer sparse_data_chunk_;
  Chunk::Pointer sparse_offset_chunk_;
  Chunk::Pointer keys_chunk_;
  Chunk::Pointer mapping_chunk_;

  size_t sparse_unit_size_{0U};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_streamer.h"
#include <cstdint>
#include <utility/sparse_utility.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_meta.h>
#include "flat_sparse_context.h"
#include "flat_sparse_provider.h"
#include "flat_sparse_search.h"

namespace zvec {
namespace core {

const uint32_t FlatSparseStreamer::VERSION = 0U;

FlatSparseStreamer::FlatSparseStreamer() : entity_(stats_) {}

FlatSparseStreamer::~FlatSparseStreamer() {
  this->close();
}

int FlatSparseStreamer::init(const IndexMeta &imeta,
                             const ailego::Params &params) {
  LOG_DEBUG("FlatSparseStreamer init");

  meta_ = imeta;
  meta_.set_streamer("FlatSparseStreamer", VERSION, params);

  state_ = STATE_INITED;

  return 0;
}

int FlatSparseStreamer::cleanup() {
  LOG_DEBUG("FlatSparseStreamer cleanup");

  this->close();

  meta_.clear();

  return 0;
}

int FlatSparseStreamer::open(IndexStorage::Pointer stg) {
  LOG_DEBUG("FlatSparseStreamer open");

  if (ailego_unlikely(state_ != STATE_INITED)) {
    LOG_ERROR("Open storage failed, init streamer first!");
    return IndexError_NoReady;
  }

  int ret = entity_.open(std::move(stg), meta_);
  if (ret != 0) {
    LOG_ERROR("FlatSparseStreamer entity failed to open storage");
    return ret;
  }

  IndexMeta index_meta;
  ret = entity_.get_index_sparse_meta(&index_meta);
  if (ret == IndexError_NoExist) {
    // Set IndexMeta for the new index
    ret = entity_.set_index_sparse_meta(meta_);
    if (ret != 0) {
      LOG_ERROR("Failed to set index meta for %s", IndexError::What(ret));
      return ret;
    }
  } else {
    if (index_meta.streamer_revision() != meta_.streamer_revision()) {
      LOG_ERROR("Streamer revision mismatch, expect=%u, actual=%u",
                meta_.streamer_revision(), index_meta.streamer_revision());
      return IndexError_Mismatch;
    }
    if (index_meta.metric_name() != meta_.metric_name() ||
        index_meta.data_type() != meta_.data_type()) {
      LOG_ERROR("IndexMeta mismatch from the previous in index");
      return IndexError_Mismatch;
    }
    // The IndexMeasure Params may be updated like MipsSquaredEuclidean
    auto metric_params = index_meta.metric_params();
    metric_params.merge(meta_.metric_params());
    meta_.set_metric(index_meta.metric_name(), 0, metric_params);
  }

  state_ = STATE_OPENED;
  magic_ = IndexContext::GenerateMagic();

  return 0;
}

int FlatSparseStreamer::close() {
  if (state_ != STATE_OPENED) {
    return 0;
  }

  LOG_DEBUG("FlatSparseStreamer close");

  stats_.clear();
  int ret = entity_.close();
  if (ret != 0) {
    LOG_ERROR("Failed to close entity %s", IndexError::What(ret));
    return ret;
  }
  state_ = STATE_INITED;
  return 0;
}

int FlatSparseStreamer::flush(uint64_t checkpoint) {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to flush, open streamer first!");
    return IndexError_NoReady;
  }

  LOG_INFO("FlatSparseStreamer flush, checkpoint=%zu", (size_t)checkpoint);

  return entity_.flush(checkpoint);
}

int FlatSparseStreamer::dump(const IndexDumper::Pointer &dumper) {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to dump, open streamer first!");
    return IndexError_NoReady;
  }

  LOG_INFO("FlatSparseStreamer dump");

  shared_mutex_.lock();
  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });

  meta_.set_searcher("FlatSparseSearcher", VERSION, ailego::Params());

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  return entity_.dump(dumper);
}

FlatSparseStreamer::ContextPointer FlatSparseStreamer::create_context() const {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to create Context, open streamer first!");
    return Context::UPointer();
  }
  FlatSparseStreamerEntity::Pointer entity = entity_.clone();
  return FlatSparseStreamer::ContextPointer(new FlatSparseContext(this));
}

IndexStreamer::SparseProvider::Pointer
FlatSparseStreamer::create_sparse_provider(void) const {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to create provider, open streamer first!");
    return SparseProvider::Pointer();
  }

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone entity failed");
    return SparseProvider::Pointer();
  }
  return SparseProvider::Pointer(
      new FlatSparseIndexProvider<FlatSparseStreamerEntity>(
          entity, meta_, "FlatSparseStreamerProvider"));
}

int FlatSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count,
                                 const uint32_t *sparse_indices,
                                 const void *sparse_query,
                                 const IndexQueryMeta &qmeta,
                                 Context::Pointer &context) {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to add_impl, open streamer first!");
    (*stats_.mutable_discarded_count())++;
    return IndexError_NoReady;
  }

  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) {
    LOG_ERROR(
        "Failed to add sparse vector: number of non-zero elements (%u) exceeds "
        "maximum allowed (%u), key=%zu",
        sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey);
    (*stats_.mutable_discarded_count())++;
    return IndexError_InvalidValue;
  }

  // context is trivial here
  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to FlatSparseContext failed");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Cast;
  }

  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  // convert to sparse format and add to entity
  std::string sparse_query_buffer;
  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,
                                   meta_.unit_size(), sparse_query_buffer);

  ret = entity_.add(pkey, sparse_query_buffer, sparse_count);
  if (ret != 0) {
    LOG_ERROR("Failed to add sparse vector, key=%zu, ret=%s", (size_t)pkey,
              IndexError::What(ret));
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  (*stats_.mutable_added_count())++;
  return 0;
}

int FlatSparseStreamer::add_with_id_impl(uint32_t pkey,
                                         const uint32_t sparse_count,
                                         const uint32_t *sparse_indices,
                                         const void *sparse_query,
                                         const IndexQueryMeta &qmeta,
                                         Context::Pointer &context) {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to add_with_id_impl, open streamer first!");
    (*stats_.mutable_discarded_count())++;
    return IndexError_NoReady;
  }

  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(sparse_count > PARAM_FLAT_SPARSE_MAX_DIM_SIZE)) {
    LOG_ERROR(
        "Failed to add sparse vector: number of non-zero elements (%u) exceeds "
        "maximum allowed (%u), key=%zu",
        sparse_count, PARAM_FLAT_SPARSE_MAX_DIM_SIZE, (size_t)pkey);
    (*stats_.mutable_discarded_count())++;
    return IndexError_InvalidValue;
  }

  // context is trivial here
  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to FlatSparseContext failed");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Cast;
  }

  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  // convert to sparse format and add to entity
  std::string sparse_query_buffer;
  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,
                                   meta_.unit_size(), sparse_query_buffer);

  ret = entity_.add_vector_with_id(pkey, sparse_query_buffer, sparse_count);
  if (ret != 0) {
    LOG_ERROR("Failed to add sparse vector, key=%zu, ret=%s", (size_t)pkey,
              IndexError::What(ret));
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  (*stats_.mutable_added_count())++;
  return 0;
}

//! Similarity search with sparse inputs
int FlatSparseStreamer::search_impl(const uint32_t sparse_count,
                                    const uint32_t *sparse_indices,
                                    const void *sparse_query,
                                    const IndexQueryMeta &qmeta,
                                    Context::Pointer &context) const {
  return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                     context);
}

//! Similarity search with sparse inputs
int FlatSparseStreamer::search_impl(const uint32_t *sparse_count,
                                    const uint32_t *sparse_indices,
                                    const void *sparse_query,
                                    const IndexQueryMeta &qmeta, uint32_t count,
                                    Context::Pointer &context) const {
  return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,
                        count, context);
}

//! Similarity brute force search with sparse inputs
int FlatSparseStreamer::search_bf_impl(const uint32_t sparse_count,
                                       const uint32_t *sparse_indices,
                                       const void *sparse_query,
                                       const IndexQueryMeta &qmeta,
                                       Context::Pointer &context) const {
  return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                        context);
}

//! Linear search by primary keys
int FlatSparseStreamer::search_bf_by_p_keys_impl(
    const uint32_t sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, ContextPointer &context) const {
  return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,
                                  p_keys, qmeta, 1, context);
}

//! Similarity brute force search with sparse inputs
int FlatSparseStreamer::search_bf_impl(const uint32_t *sparse_count,
                                       const uint32_t *sparse_indices,
                                       const void *sparse_query,
                                       const IndexQueryMeta &qmeta,
                                       uint32_t count,
                                       Context::Pointer &context) const {
  return do_search(sparse_count, sparse_indices, sparse_query, false, {}, qmeta,
                   count, context);
}

//! Linear search by primary keys with sparse inputs
int FlatSparseStreamer::search_bf_by_p_keys_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    ContextPointer &context) const {
  return do_search(sparse_count, sparse_indices, sparse_query, true, p_keys,
                   qmeta, count, context);
}

//! Fetch sparse vector by key
int FlatSparseStreamer::get_sparse_vector(
    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to get_sparse_vector, open streamer first!");
    return IndexError_NoReady;
  }

  std::string sparse_data;

  int ret = entity_.get_sparse_vector_by_key(key, &sparse_data);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to get sparse vector, key=%zu, ret=%s", (size_t)key,
              IndexError::What(ret));
    return ret;
  }

  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,
                                     sparse_indices_buffer,
                                     sparse_values_buffer, meta_.unit_size());

  return 0;
}

int FlatSparseStreamer::do_search(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, bool with_p_keys,
    const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    ContextPointer &context) const {
  if (state_ != STATE_OPENED) {
    LOG_ERROR("Failed to do_search, open streamer first!");
    return IndexError_NoReady;
  }

  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  FlatSparseContext *ctx = dynamic_cast<FlatSparseContext *>(context.get());
  if (ctx->magic() != magic_) {
    ctx->reset(this);
  }

  return FlatSearch(sparse_count, sparse_indices, sparse_query, with_p_keys,
                    p_keys, qmeta, count, meta_, context,
                    (FlatSparseEntity *)&entity_);
}

INDEX_FACTORY_REGISTER_STREAMER(FlatSparseStreamer);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <ailego/parallel/lock.h>
#include <zvec/core/framework/index_streamer.h>
#include "flat_sparse_streamer_entity.h"

namespace zvec {
namespace core {

/*! Flat Sparse Streamer
 */
class FlatSparseStreamer : public IndexStreamer {
 public:
  static const uint32_t VERSION;

 public:
  using ContextPointer = IndexStreamer::Context::Pointer;

  FlatSparseStreamer(void);
  virtual ~FlatSparseStreamer(void);

  FlatSparseStreamer(const FlatSparseStreamer &streamer) = delete;
  FlatSparseStreamer &operator=(const FlatSparseStreamer &streamer) = delete;

 public:
  //! Initialize Streamer
  int init(const IndexMeta &, const ailego::Params &) override;

  //! Cleanup Streamer
  int cleanup(void) override;

  //! Open index from file path
  int open(IndexStorage::Pointer stg) override;

  //! Close file
  int close(void) override;

  //! flush file
  int flush(uint64_t checkpoint) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Create a context
  ContextPointer create_context(void) const override;

  //! Create a new iterator
  IndexStreamer::SparseProvider::Pointer create_sparse_provider(
      void) const override;

  int add_impl(uint64_t pkey, const uint32_t sparse_count,
               const uint32_t *sparse_indices, const void *sparse_query,
               const IndexQueryMeta &qmeta, Context::Pointer &context) override;

  int add_with_id_impl(uint32_t pkey, const uint32_t sparse_count,
                       const uint32_t *sparse_indices, const void *sparse_query,
                       const IndexQueryMeta &qmeta,
                       Context::Pointer &context) override;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const override;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const override;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const override;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t *sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta, uint32_t count,
                     Context::Pointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               ContextPointer &context) const override;

  //! Linear search by primary keys with sparse inputs
  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               ContextPointer &context) const override;

  //! Fetch sparse vector by key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override;

  int get_sparse_vector_by_id(
      uint32_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,
      std::string *sparse_values_buffer) const override {
    return get_sparse_vector(id, sparse_count, sparse_indices_buffer,
                             sparse_values_buffer);
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }
  const FlatSparseStreamerEntity &entity(void) const {
    return entity_;
  }

  uint32_t magic(void) const {
    return magic_;
  }

 private:
  inline int check_params(const IndexQueryMeta &qmeta) const {
    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {
      LOG_ERROR("Unsupported query meta, type=%d, expected=%d",
                qmeta.data_type(), meta_.data_type());
      return IndexError_Mismatch;
    }
    return 0;
  }

  int do_search(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                const void *sparse_query, bool with_p_keys,
                const std::vector<std::vector<uint64_t>> &p_keys,
                const IndexQueryMeta &qmeta, uint32_t count,
                ContextPointer &context) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };

  IndexMeta meta_{};
  FlatSparseStreamerEntity entity_;

  uint32_t magic_{0U};
  Stats stats_{};
  State state_{STATE_INIT};

  //! avoid add vector while dumping index
  ailego::SharedMutex shared_mutex_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_streamer_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse_streamer_entity.h"
#include <cstdint>
#include <memory>
#include <utility>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>
#include "flat_sparse_index_format.h"
#include "flat_sparse_utility.h"

namespace zvec {
namespace core {

FlatSparseStreamerEntity::FlatSparseStreamerEntity(IndexStreamer::Stats &stats)
    : stats_(stats) {}

int FlatSparseStreamerEntity::open(IndexStorage::Pointer storage,
                                   const IndexMeta &meta) {
  if (storage_) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }

  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();
  if (!keys_map_lock_) {
    LOG_ERROR("FlatSparseStreamerEntity new object failed");
    return IndexError_NoMemory;
  }
  keys_map_ = std::make_shared<std::map<uint64_t, node_id_t>>();

  if (storage->get(PARAM_FLAT_SPARSE_META_SEG_ID) ||
      storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID)) {
    int ret = this->load_storage(storage, meta);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to load storage index");
      return ret;
    }
  } else {
    int ret = this->init_storage(storage, meta);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Failed to load storage index");
      return ret;
    }
  }

  if (init_metric(meta) != 0) {
    LOG_ERROR("Failed to init metric");
    return IndexError_InvalidFormat;
  }

  // reserve data chunk
  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT,
                             &max_data_chunk_cnt_);
  sparse_data_chunks_.reserve(max_data_chunk_cnt_);

  // reserve offset chunk
  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT,
                             &max_doc_cnt_);
  sparse_offset_chunks_.reserve(max_doc_cnt_ / doc_cnt_per_offset_chunk() + 1);
  sparse_unit_size_ = meta.unit_size();

  LOG_DEBUG(
      "FlatSparseStreamerEntity open success, doc_count[%u], "
      "data_chunk_size[%u], offset_chunk_size[%u], data_chunk_count[%zu], "
      "offset_chunk_count[%zu]",
      meta_.doc_cnt, streamer_meta_.data_chunk_size,
      streamer_meta_.offset_chunk_size, sparse_data_chunks_.size(),
      sparse_offset_chunks_.size());

  storage_ = storage;
  return 0;
}

int FlatSparseStreamerEntity::init_metric(const IndexMeta &meta) {
  metric_ = IndexFactory::CreateMetric(meta.metric_name());
  if (!metric_) {
    LOG_ERROR("Failed to create metric %s", meta.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta, meta.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failled to init metric, ret=%d", ret);
    return ret;
  }

  if (!metric_->sparse_distance()) {
    LOG_ERROR("Invalid metric distance");
    return IndexError_InvalidArgument;
  }

  search_sparse_distance_ = metric_->sparse_distance();

  if (metric_->query_metric() && metric_->query_metric()->distance()) {
    search_sparse_distance_ = metric_->query_metric()->sparse_distance();
  }

  return 0;
}

int FlatSparseStreamerEntity::load_storage(IndexStorage::Pointer storage,
                                           const IndexMeta &meta) {
  size_t index_size{0};

  // load meta
  auto segment = storage->get(PARAM_FLAT_SPARSE_META_SEG_ID);

  if (!segment || segment->data_size() < sizeof(meta_)) {
    LOG_ERROR("Missing segment %s, or invalid segment size",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  IndexStorage::MemoryBlock data_block;
  if (ailego_unlikely(segment->read(0, data_block, sizeof(meta_)) !=
                      sizeof(meta_))) {
    LOG_ERROR("Failed to read meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_ReadData;
  }
  meta_ = *(reinterpret_cast<const decltype(meta_) *>(data_block.data()));
  index_size += segment->capacity();

  // load streamer meta
  segment = storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);
  if (!segment || segment->data_size() < sizeof(streamer_meta_)) {
    LOG_ERROR("Missing segment %s, or invalid segment size",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  if (ailego_unlikely(segment->read(0, data_block, sizeof(streamer_meta_)) !=
                      sizeof(streamer_meta_))) {
    LOG_ERROR("Failed to read streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_ReadData;
  }
  streamer_meta_ =
      *(reinterpret_cast<const decltype(streamer_meta_) *>(data_block.data()));
  index_size += segment->capacity();

  uint32_t meta_data_chunk_size{streamer_meta_.data_chunk_size};
  uint32_t meta_offset_chunk_size{streamer_meta_.offset_chunk_size};
  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE,
                             &meta_data_chunk_size);
  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE,
                             &meta_offset_chunk_size);
  if (streamer_meta_.data_chunk_size != meta_data_chunk_size ||
      streamer_meta_.offset_chunk_size != meta_offset_chunk_size) {
    LOG_ERROR(
        "Invalid streamer meta chunk size data[%u] offset[%u], expect data[%u] "
        "offset[%u]",
        streamer_meta_.data_chunk_size, streamer_meta_.offset_chunk_size,
        meta_data_chunk_size, meta_offset_chunk_size);
    return IndexError_InvalidFormat;
  }

  // check chunk cnt
  if (streamer_meta_.data_chunk_count > max_data_chunk_cnt_ ||
      meta_.doc_cnt > max_doc_cnt_) {
    LOG_ERROR(
        "Invalid data chunk count[%u] doc count[%u], expect less than "
        "chunk count[%u] doc count[%u]",
        streamer_meta_.data_chunk_count, meta_.doc_cnt, max_data_chunk_cnt_,
        max_doc_cnt_);
    return IndexError_InvalidFormat;
  }

  // load offset chunks
  for (size_t i = 0; i < streamer_meta_.offset_chunk_count; ++i) {
    std::string segment_id =
        ailego::StringHelper::Concat(PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX, i);
    segment = storage->get(segment_id);
    if (!segment) {
      LOG_ERROR("Missing segment %s", segment_id.c_str());
      return IndexError_InvalidFormat;
    }
    sparse_offset_chunks_.emplace_back(segment);
    index_size += segment->capacity();
  }
  // load data chunks
  for (size_t i = 0; i < streamer_meta_.data_chunk_count; ++i) {
    std::string segment_id =
        ailego::StringHelper::Concat(PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX, i);
    segment = storage->get(segment_id);
    if (!segment) {
      LOG_ERROR("Missing segment %s", segment_id.c_str());
    }
    sparse_data_chunks_.emplace_back(segment);
    index_size += segment->capacity();
  }

  // load keys
  for (node_id_t i = 0; i < meta_.doc_cnt; ++i) {
    (*keys_map_)[get_key(i)] = i;
  }

  stats_.set_index_size(index_size);
  stats_.set_check_point(storage->check_point());
  stats_.set_create_time(meta_.create_time);
  stats_.set_update_time(meta_.update_time);
  stats_.set_loaded_count(keys_map_->size());

  return 0;
}

int FlatSparseStreamerEntity::init_storage(IndexStorage::Pointer storage,
                                           const IndexMeta &meta) {
  meta_.create_time = ailego::Realtime::Seconds();
  stats_.set_create_time(meta_.create_time);
  meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(meta_.update_time);
  meta_.doc_cnt = 0;

  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE,
                             &streamer_meta_.data_chunk_size);
  meta.streamer_params().get(PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE,
                             &streamer_meta_.offset_chunk_size);

  // append meta segment
  size_t size = ailego_align(sizeof(meta_), ailego::MemoryHelper::PageSize());
  int ret = storage->append(PARAM_FLAT_SPARSE_META_SEG_ID, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to append meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return ret;
  }
  auto segment = storage->get(PARAM_FLAT_SPARSE_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("Failed to write meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_WriteData;
  }

  *stats_.mutable_index_size() += size;

  // append streamer meta segment
  size = ailego_align(sizeof(streamer_meta_), ailego::MemoryHelper::PageSize());
  ret = storage->append(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to append streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return ret;
  }
  segment = storage->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &streamer_meta_, sizeof(streamer_meta_)) !=
      sizeof(streamer_meta_)) {
    LOG_ERROR("Failed to write streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_WriteData;
  }

  *stats_.mutable_index_size() += size;

  return 0;
}

int FlatSparseStreamerEntity::close() {
  storage_.reset();
  sparse_data_chunks_.clear();
  sparse_offset_chunks_.clear();

  keys_map_lock_.reset();
  keys_map_.reset();

  return 0;
}

int FlatSparseStreamerEntity::flush(uint64_t checkpoint) {
  // flush meta
  meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(meta_.update_time);
  auto segment = storage_->get(PARAM_FLAT_SPARSE_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("Failed to write meta segment %s",
              PARAM_FLAT_SPARSE_META_SEG_ID.c_str());
    return IndexError_WriteData;
  }

  // flush streamer meta
  streamer_meta_.data_chunk_count = sparse_data_chunks_.size();
  streamer_meta_.offset_chunk_count = sparse_offset_chunks_.size();
  segment = storage_->get(PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID);
  if (ailego_unlikely(!segment)) {
    LOG_ERROR("Failed to get streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_Runtime;
  }
  if (segment->write(0, &streamer_meta_, sizeof(streamer_meta_)) !=
      sizeof(streamer_meta_)) {
    LOG_ERROR("Failed to write streamer meta segment %s",
              PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID.c_str());
    return IndexError_WriteData;
  }

  if (checkpoint != 0) {
    storage_->refresh(checkpoint);
  }
  int ret = storage_->flush();
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Failed to flush storage for %s", IndexError::What(ret));
    return ret;
  }
  if (checkpoint != 0) {
    stats_.set_check_point(checkpoint);
  }

  return 0;
}

int FlatSparseStreamerEntity::dump(const IndexDumper::Pointer &dumper) {
  ailego::ElapsedTime stamp;

  int ret;
  // meta
  ret = dump_meta(dumper.get());
  if (ret != 0) {
    return ret;
  }

  auto duration_dump_meta = stamp.milli_seconds();

  // offset & data
  ret = dump_offset_data(dumper.get());
  if (ret != 0) {
    return ret;
  }

  auto duration_dump_offset_data = stamp.milli_seconds() - duration_dump_meta;

  // keys
  std::vector<uint64_t> keys = get_keys();
  ret = dump_keys(keys, dumper.get());
  if (ret != 0) {
    return ret;
  }

  auto duration_dump_keys =
      stamp.milli_seconds() - duration_dump_offset_data - duration_dump_meta;

  // mapping
  ret = dump_mapping(keys, dumper.get());
  if (ret != 0) {
    return ret;
  }

  auto duration_dump_mapping = stamp.milli_seconds() -
                               duration_dump_offset_data - duration_dump_meta -
                               duration_dump_keys;

  LOG_INFO(
      "Dump index meta: %zu ms, offset & data: %zu ms, keys: %zu ms, "
      "mapping: %zu ms",
      (size_t)duration_dump_meta, (size_t)duration_dump_offset_data,
      (size_t)duration_dump_keys, (size_t)duration_dump_mapping);

  return 0;
}

int FlatSparseStreamerEntity::dump_offset_data(IndexDumper *dumper) {
  ailego::ElapsedTime stamp;

  uint64_t init_offset = dump_size_;
  std::vector<std::pair<uint64_t, uint32_t>> offset_length;

  // write data
  int ret;
  node_id_t total_doc_cnt = doc_cnt();
  for (node_id_t node_id = 0; node_id < total_doc_cnt; node_id++) {
    uint32_t target_vector_len;
    IndexStorage::MemoryBlock target_vector_block;
    ret = get_sparse_vector_ptr_by_id(node_id, target_vector_block,
                                      &target_vector_len);
    if (ret != 0) {
      LOG_ERROR("Failed to get vector, node_id=%u, error: %s", node_id,
                IndexError::What(ret));
      return ret;
    }
    const void *target_vector = target_vector_block.data();
    ret = dump_sparse_vector_data(target_vector, target_vector_len, dumper);
    if (ret != 0) {
      LOG_ERROR("Failed to dump sparse vector data, node_id=%u, error: %s",
                node_id, IndexError::What(ret));
      return ret;
    }

    offset_length.push_back({dump_size_ - init_offset, target_vector_len});
    dump_size_ += target_vector_len;
  }

  // append data segment
  if (dumper->append(PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID,
                     dump_size_ - init_offset, 0, 0) != 0) {
    LOG_ERROR("append data segment failed");
    return IndexError_WriteData;
  }

  auto duration_dump_data = stamp.milli_seconds();

  // write offset
  for (auto &offset_length_pair : offset_length) {
    if (dumper->write(&offset_length_pair.first,
                      sizeof(offset_length_pair.first)) !=
        sizeof(offset_length_pair.first)) {
      return IndexError_WriteData;
    }
    if (dumper->write(&offset_length_pair.second,
                      sizeof(offset_length_pair.second)) !=
        sizeof(offset_length_pair.second)) {
      return IndexError_WriteData;
    }
    dump_size_ +=
        sizeof(offset_length_pair.first) + sizeof(offset_length_pair.second);
  }

  // append offset segment
  if (dumper->append(
          PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID,
          offset_length.size() * (sizeof(uint64_t) + sizeof(uint32_t)), 0,
          0) != 0) {
    LOG_ERROR("append offset segment failed");
    return IndexError_WriteData;
  }

  auto duration_dump_offset = stamp.milli_seconds() - duration_dump_data;

  LOG_INFO("Dump offset: %zu ms, data: %zu ms", (size_t)duration_dump_offset,
           (size_t)duration_dump_data);

  return 0;
}

int FlatSparseStreamerEntity::dump_sparse_vector_data(const void *data,
                                                      uint32_t length,
                                                      IndexDumper *dumper) {
  if (dumper->write(data, length) != length) {
    return IndexError_WriteData;
  }
  return 0;
}

int FlatSparseStreamerEntity::dump_meta(IndexDumper *dumper) {
  if (dumper->write(&meta_, sizeof(meta_)) != sizeof(meta_)) {
    LOG_ERROR("write meta failed");
    return IndexError_WriteData;
  }

  size_t meta_padding_size = ailego_align(sizeof(meta_), 32) - sizeof(meta_);
  if (meta_padding_size) {
    std::string padding(meta_padding_size, '\0');
    if (dumper->write(padding.data(), meta_padding_size) != meta_padding_size) {
      LOG_ERROR("write meta padding failed");
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_META_SEG_ID, sizeof(meta_),
                        meta_padding_size, 0);
}

int FlatSparseStreamerEntity::dump_keys(const std::vector<uint64_t> &keys,
                                        IndexDumper *dumper) {
  if (keys.size() == 1 && keys.back() == kInvalidKey) {
    return IndexError_Runtime;
  }

  size_t keys_size = keys.size() * sizeof(uint64_t);
  if (dumper->write(keys.data(), keys_size) != keys_size) {
    LOG_ERROR("Failed to write keys to dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }
  size_t keys_padding_size = ailego_align(keys_size, 32) - keys_size;
  if (keys_padding_size) {
    std::string padding(keys_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write padding to dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID, keys_size,
                        keys_padding_size, 0);
}

int FlatSparseStreamerEntity::dump_mapping(const std::vector<uint64_t> &keys,
                                           IndexDumper *dumper) {
  std::vector<uint32_t> mapping(keys.size());
  std::iota(mapping.begin(), mapping.end(), 0);
  std::sort(
      mapping.begin(), mapping.end(),
      [&keys](uint32_t lhs, uint32_t rhs) { return (keys[lhs] < keys[rhs]); });

  size_t mapping_size = mapping.size() * sizeof(uint32_t);
  size_t mapping_padding_size = ailego_align(mapping_size, 32) - mapping_size;
  if (dumper->write(mapping.data(), mapping_size) != mapping_size) {
    LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
    return IndexError_WriteData;
  }

  // Write the padding if need
  if (mapping_padding_size) {
    std::string padding(mapping_padding_size, '\0');
    if (dumper->write(padding.data(), padding.size()) != padding.size()) {
      LOG_ERROR("Failed to write data into dumper %s", dumper->name().c_str());
      return IndexError_WriteData;
    }
  }
  return dumper->append(PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID, mapping_size,
                        mapping_padding_size, 0);
}

FlatSparseStreamerEntity::Pointer FlatSparseStreamerEntity::clone() const {
  auto entity = new (std::nothrow) FlatSparseStreamerEntity(
      stats_, meta_, streamer_meta_, keys_map_lock_, keys_map_,
      sparse_data_chunks_, sparse_offset_chunks_);
  return FlatSparseStreamerEntity::Pointer(entity);
}

int FlatSparseStreamerEntity::add(uint64_t key,
                                  const std::string &sparse_vector,
                                  const uint32_t sparse_count) {
  uint32_t sparse_vector_len = sparse_vector.size();

  sparse_vector_len = AlignSize(sparse_vector_len);

  if (sparse_vector_len > streamer_meta_.data_chunk_size) {
    LOG_ERROR(
        "Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk "
        "size: %u",
        sparse_vector_len, streamer_meta_.data_chunk_size);
    return IndexError_InvalidArgument;
  }

  std::lock_guard<std::mutex> lock(mutex_);
  node_id_t local_id = doc_cnt();

  if (ailego_unlikely(local_id >= max_doc_cnt_)) {
    LOG_ERROR("Add vector failed for exceed max doc count: %u", max_doc_cnt_);
    return IndexError_IndexFull;
  }

  // duplicate check
  if (ailego_unlikely(get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  // get sparse data chunk and offset for write sparse vector
  Chunk::Pointer sparse_data_chunk;
  uint32_t sparse_data_chunk_offset = -1U;
  uint32_t sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;
  if (sparse_data_chunk_index == -1U ||
      sparse_data_chunks_[sparse_data_chunk_index]->data_size() +
              sparse_vector_len >
          streamer_meta_.data_chunk_size) {
    if (ailego_unlikely(sparse_data_chunks_.capacity() ==
                        sparse_data_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      if (sparse_data_chunk_index != -1U) {
        LOG_ERROR(
            "capacity: %zu, chunk used size: %zu, chunk size: %u, "
            "sparse_vector_len: %u",
            sparse_data_chunks_.capacity(),
            sparse_data_chunks_[sparse_data_chunk_index]->data_size(),
            streamer_meta_.data_chunk_size, sparse_vector_len);
      }
      return IndexError_IndexFull;
    }

    sparse_data_chunk = alloc_new_data_chunk(sparse_data_chunks_.size());
    if (ailego_unlikely(!sparse_data_chunk)) {
      LOG_ERROR("allocate data chunk failed");
      return IndexError_NoMemory;
    }
    sparse_data_chunks_.emplace_back(sparse_data_chunk);
    sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;
    sparse_data_chunk_offset = 0UL;
  } else {
    sparse_data_chunk = sparse_data_chunks_[sparse_data_chunk_index];
    sparse_data_chunk_offset = sparse_data_chunk->data_size();
  }

  // write sparse vector
  if (sparse_vector.size() > 0) {
    if (ailego_unlikely(write_sparse_vector_data(
                            sparse_data_chunk_index, sparse_data_chunk_offset,
                            sparse_vector.data(), sparse_vector.size()) != 0)) {
      LOG_ERROR("write sparse vector failed");
      return IndexError_NoMemory;
    }
  }

  uint64_t sparse_offset = sparse_data_chunk_index;
  sparse_offset = (sparse_offset << 32U) + sparse_data_chunk_offset;

  // get sparse offset chunk and offset for write new info
  Chunk::Pointer sparse_offset_chunk;
  uint32_t sparse_offset_chunk_offset = -1U;
  uint32_t sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;
  if (sparse_offset_chunk_index == -1U ||
      sparse_offset_chunks_[sparse_offset_chunk_index]->data_size() +
              offset_size_per_node() >
          streamer_meta_.offset_chunk_size) {
    // no space left and need to allocate new offset chunk
    if (ailego_unlikely(sparse_offset_chunks_.capacity() ==
                        sparse_offset_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }

    sparse_offset_chunk = alloc_new_offset_chunk(sparse_offset_chunks_.size());
    if (ailego_unlikely(!sparse_offset_chunk)) {
      LOG_ERROR("allocate offset chunk failed");
      return IndexError_NoMemory;
    }
    sparse_offset_chunks_.emplace_back(sparse_offset_chunk);
    sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;
    sparse_offset_chunk_offset = 0UL;
  } else {
    sparse_offset_chunk = sparse_offset_chunks_[sparse_offset_chunk_index];
    sparse_offset_chunk_offset = sparse_offset_chunk->data_size();
  }

  // write offset
  size_t size = sparse_offset_chunk->write(sparse_offset_chunk_offset,
                                           &sparse_offset, sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("Chunk write sparse vec offset failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  // write length
  size =
      sparse_offset_chunk->write(sparse_offset_chunk_offset + sizeof(uint64_t),
                                 &sparse_vector_len, sizeof(uint32_t));
  if (ailego_unlikely(size != sizeof(uint32_t))) {
    LOG_ERROR("Chunk write sparse vec len failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  // write key
  size = sparse_offset_chunk->write(
      sparse_offset_chunk_offset + 2 * sizeof(uint64_t), &key,
      sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("Chunk write key failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  // LOG_INFO("Write sparse vector, key=%lu, offset chunk=%u, offset=%u,
  // len=%u",
  //          key, sparse_offset_chunk_index, sparse_offset_chunk_offset,
  //          offset_size_per_node());

  // LOG_INFO("Write sparse vector, key=%lu, data chunk=%u, offset=%u, len=%u",
  //          key, sparse_data_chunk_index, sparse_data_chunk_offset,
  //          sparse_vector_len);

  // resize chunk
  if (sparse_vector_len > 0) {
    sparse_data_chunk_offset += sparse_vector_len;
    if (ailego_unlikely(sparse_data_chunk->resize(sparse_data_chunk_offset) !=
                        sparse_data_chunk_offset)) {
      LOG_ERROR("Sparse Chunk resize to %u failed", sparse_data_chunk_offset);
      return IndexError_Runtime;
    }
  }

  // persist in keys_map
  {
    keys_map_lock_->lock();
    (*keys_map_)[key] = local_id;
    keys_map_lock_->unlock();
  }

  inc_doc_count();
  inc_total_sparse_count(sparse_count);

  return 0;
}

int FlatSparseStreamerEntity::add_vector_with_id(
    uint32_t id, const std::string &sparse_vector,
    const uint32_t sparse_count) {
  uint32_t sparse_vector_len = sparse_vector.size();

  sparse_vector_len = AlignSize(sparse_vector_len);

  if (sparse_vector_len > streamer_meta_.data_chunk_size) {
    LOG_ERROR(
        "Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk "
        "size: %u",
        sparse_vector_len, streamer_meta_.data_chunk_size);
    return IndexError_InvalidArgument;
  }

  std::lock_guard<std::mutex> lock(mutex_);

  if (id >= doc_cnt()) {
    for (auto i = doc_cnt(); i <= id; i++) {
      node_id_t local_id = doc_cnt();
      if (ailego_unlikely(local_id >= max_doc_cnt_)) {
        LOG_ERROR("Add vector failed for exceed max doc count: %u",
                  max_doc_cnt_);
        return IndexError_IndexFull;
      }
      uint32_t sparse_data_chunk_index, sparse_data_chunk_offset,
          sparse_offset_chunk_index, sparse_offset_chunk_offset;
      if (i < id) {
        write_sparse_vector_to_chunk("", 0, sparse_data_chunk_index,
                                     sparse_data_chunk_offset);
      } else {
        write_sparse_vector_to_chunk(sparse_vector, sparse_vector_len,
                                     sparse_data_chunk_index,
                                     sparse_data_chunk_offset);
      }
      uint64_t sparse_offset =
          ((uint64_t)sparse_data_chunk_index << 32U) + sparse_data_chunk_offset;
      get_new_sparse_offset_chunk(sparse_offset_chunk_index,
                                  sparse_offset_chunk_offset);
      uint64_t written_key = kInvalidKey;
      if (i == id) {
        written_key = i;
      }
      write_sparse_offset_to_chunk(sparse_offset_chunk_index,
                                   sparse_offset_chunk_offset, sparse_offset,
                                   sparse_vector_len, written_key);
      {
        keys_map_lock_->lock();
        (*keys_map_)[i] = written_key;
        keys_map_lock_->unlock();
      }
      inc_doc_count();
    }
  } else {
    uint32_t sparse_data_chunk_index, sparse_data_chunk_offset;
    write_sparse_vector_to_chunk(sparse_vector, sparse_vector_len,
                                 sparse_data_chunk_index,
                                 sparse_data_chunk_offset);
    uint64_t sparse_offset =
        ((uint64_t)sparse_data_chunk_index << 32U) + sparse_data_chunk_offset;
    uint32_t sparse_offset_chunk_index =
        id / get_offset_info_number_per_chunk();
    uint32_t sparse_offset_chunk_offset =
        id % get_offset_info_number_per_chunk() * offset_size_per_node();
    write_sparse_offset_to_chunk(sparse_offset_chunk_index,
                                 sparse_offset_chunk_offset, sparse_offset,
                                 sparse_vector_len, id);
    {
      keys_map_lock_->lock();
      (*keys_map_)[id] = id;
      keys_map_lock_->unlock();
    }
  }
  inc_total_sparse_count(sparse_count);
  return 0;
}

int FlatSparseStreamerEntity::write_sparse_vector_to_chunk(
    const std::string &sparse_vector, const uint32_t sparse_vector_len,
    uint32_t &sparse_data_chunk_index, uint32_t &sparse_data_chunk_offset) {
  // get sparse data chunk and offset for write sparse vector
  Chunk::Pointer sparse_data_chunk;
  sparse_data_chunk_offset = -1U;
  sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;
  if (sparse_data_chunk_index == -1U ||
      sparse_data_chunks_[sparse_data_chunk_index]->data_size() +
              sparse_vector_len >
          streamer_meta_.data_chunk_size) {
    if (ailego_unlikely(sparse_data_chunks_.capacity() ==
                        sparse_data_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      if (sparse_data_chunk_index != -1U) {
        LOG_ERROR(
            "capacity: %zu, chunk used size: %zu, chunk size: %u, "
            "sparse_vector_len: %u",
            sparse_data_chunks_.capacity(),
            sparse_data_chunks_[sparse_data_chunk_index]->data_size(),
            streamer_meta_.data_chunk_size, sparse_vector_len);
      }
      return IndexError_IndexFull;
    }

    sparse_data_chunk = alloc_new_data_chunk(sparse_data_chunks_.size());
    if (ailego_unlikely(!sparse_data_chunk)) {
      LOG_ERROR("allocate data chunk failed");
      return IndexError_NoMemory;
    }
    sparse_data_chunks_.emplace_back(sparse_data_chunk);
    sparse_data_chunk_index = sparse_data_chunks_.size() - 1U;
    sparse_data_chunk_offset = 0UL;
  } else {
    sparse_data_chunk = sparse_data_chunks_[sparse_data_chunk_index];
    sparse_data_chunk_offset = sparse_data_chunk->data_size();
  }

  // write sparse vector
  if (sparse_vector.size() > 0) {
    if (ailego_unlikely(write_sparse_vector_data(
                            sparse_data_chunk_index, sparse_data_chunk_offset,
                            sparse_vector.data(), sparse_vector.size()) != 0)) {
      LOG_ERROR("write sparse vector failed");
      return IndexError_NoMemory;
    }
  }

  // resize chunk
  if (sparse_vector_len > 0) {
    uint32_t sparse_data_chunk_size =
        sparse_data_chunk_offset + sparse_vector_len;
    if (ailego_unlikely(sparse_data_chunk->resize(sparse_data_chunk_size) !=
                        sparse_data_chunk_size)) {
      LOG_ERROR("Sparse Chunk resize to %u failed", sparse_data_chunk_size);
      return IndexError_Runtime;
    }
  }
  return 0;
}

int FlatSparseStreamerEntity::get_new_sparse_offset_chunk(
    uint32_t &sparse_offset_chunk_index, uint32_t &sparse_offset_chunk_offset) {
  // get sparse offset chunk and offset for write new info
  Chunk::Pointer sparse_offset_chunk;
  sparse_offset_chunk_offset = -1U;
  sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;
  if (sparse_offset_chunk_index == -1U ||
      sparse_offset_chunks_[sparse_offset_chunk_index]->data_size() +
              offset_size_per_node() >
          streamer_meta_.offset_chunk_size) {
    // no space left and need to allocate new offset chunk
    if (ailego_unlikely(sparse_offset_chunks_.capacity() ==
                        sparse_offset_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }

    sparse_offset_chunk = alloc_new_offset_chunk(sparse_offset_chunks_.size());
    if (ailego_unlikely(!sparse_offset_chunk)) {
      LOG_ERROR("allocate offset chunk failed");
      return IndexError_NoMemory;
    }
    sparse_offset_chunks_.emplace_back(sparse_offset_chunk);
    sparse_offset_chunk_index = sparse_offset_chunks_.size() - 1U;
    sparse_offset_chunk_offset = 0UL;
  } else {
    sparse_offset_chunk = sparse_offset_chunks_[sparse_offset_chunk_index];
    sparse_offset_chunk_offset = sparse_offset_chunk->data_size();
  }
  return 0;
}

int FlatSparseStreamerEntity::write_sparse_offset_to_chunk(
    const uint32_t sparse_offset_chunk_index,
    const uint32_t sparse_offset_chunk_offset, const uint64_t sparse_offset,
    const uint32_t sparse_vector_len, const uint64_t node_id) {
  // write offset
  Chunk::Pointer sparse_offset_chunk =
      sparse_offset_chunks_[sparse_offset_chunk_index];
  size_t size = sparse_offset_chunk->write(sparse_offset_chunk_offset,
                                           &sparse_offset, sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("Chunk write sparse vec offset failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  // write length
  size =
      sparse_offset_chunk->write(sparse_offset_chunk_offset + sizeof(uint64_t),
                                 &sparse_vector_len, sizeof(uint32_t));
  if (ailego_unlikely(size != sizeof(uint32_t))) {
    LOG_ERROR("Chunk write sparse vec len failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  // write key
  size = sparse_offset_chunk->write(
      sparse_offset_chunk_offset + 2 * sizeof(uint64_t), &node_id,
      sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("Chunk write key failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  return 0;
}

uint64_t FlatSparseStreamerEntity::get_key(node_id_t node_id) const {
  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();
  uint32_t offset_chunk_key_offset =
      node_id % get_offset_info_number_per_chunk() * offset_size_per_node() +
      2 * sizeof(uint64_t);

  IndexStorage::MemoryBlock block;
  if (ailego_unlikely(sparse_offset_chunks_[offset_chunk_index]->read(
                          offset_chunk_key_offset, block, sizeof(uint64_t)) !=
                      sizeof(uint64_t))) {
    LOG_ERROR("Read key failed, offset=%u, node_id=%u", offset_chunk_key_offset,
              node_id);
    return kInvalidKey;
  };

  return *reinterpret_cast<const uint64_t *>(block.data());
}

int FlatSparseStreamerEntity::get_sparse_vector_ptr_by_id(
    node_id_t node_id, const void **sparse_vector_ptr,
    uint32_t *sparse_vector_len_ptr) const {
  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();
  uint32_t offset_chunk_offset =
      node_id % get_offset_info_number_per_chunk() * offset_size_per_node();

  // LOG_DEBUG("Read sparse vector, offset chunk=%u, offset=%u, len=%u",
  //           offset_chunk_index, offset_chunk_offset, offset_size_per_node());

  auto offset_chunk = sparse_offset_chunks_[offset_chunk_index];

  const void *offset_info = nullptr;
  size_t read_len = offset_chunk->read(offset_chunk_offset, &offset_info,
                                       offset_size_per_node());
  if (ailego_unlikely(read_len != offset_size_per_node())) {
    LOG_ERROR("Read offset info failed, offset=%u, read_len=%zu, expect=%u",
              offset_chunk_offset, read_len, offset_size_per_node());
    return IndexError_ReadData;
  };

  // sparse offset
  uint64_t sparse_offset = *(uint64_t *)offset_info;
  uint32_t sparse_vector_len =
      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));

  uint32_t sparse_data_chunk_index =
      static_cast<uint32_t>((sparse_offset >> 32) & 0xFFFFFFFF);
  uint32_t sparse_data_chunk_offset =
      static_cast<uint32_t>(sparse_offset & 0xFFFFFFFF);

  if (sparse_vector_len > 0) {
    const void *sparse_data = get_sparse_vector_data(
        sparse_data_chunk_index, sparse_data_chunk_offset, sparse_vector_len);
    if (ailego_unlikely(sparse_data == nullptr)) {
      LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)sparse_offset,
                sparse_vector_len);

      return IndexError_ReadData;
    }
    *sparse_vector_ptr = sparse_data;
    *sparse_vector_len_ptr = sparse_vector_len;
  }

  // LOG_DEBUG("Read sparse vector, data chunk=%u, offset=%u, len=%u",
  //           sparse_data_chunk_index, sparse_data_chunk_offset,
  //           sparse_vector_len);

  return 0;
}

int FlatSparseStreamerEntity::get_sparse_vector_ptr_by_id(
    node_id_t node_id, IndexStorage::MemoryBlock &sparse_vector_block,
    uint32_t *sparse_vector_len_ptr) const {
  uint32_t offset_chunk_index = node_id / get_offset_info_number_per_chunk();
  uint32_t offset_chunk_offset =
      node_id % get_offset_info_number_per_chunk() * offset_size_per_node();

  // LOG_DEBUG("Read sparse vector, offset chunk=%u, offset=%u, len=%u",
  //           offset_chunk_index, offset_chunk_offset, offset_size_per_node());

  auto offset_chunk = sparse_offset_chunks_[offset_chunk_index];

  const void *offset_info = nullptr;
  IndexStorage::MemoryBlock offset_info_block;
  size_t read_len = offset_chunk->read(offset_chunk_offset, offset_info_block,
                                       offset_size_per_node());
  if (ailego_unlikely(read_len != offset_size_per_node())) {
    LOG_ERROR("Read offset info failed, offset=%u, read_len=%zu, expect=%u",
              offset_chunk_offset, read_len, offset_size_per_node());
    return IndexError_ReadData;
  };
  offset_info = offset_info_block.data();

  // sparse offset
  uint64_t sparse_offset = *(uint64_t *)offset_info;
  uint32_t sparse_vector_len =
      *(uint32_t *)((uint8_t *)offset_info + sizeof(uint64_t));

  uint32_t sparse_data_chunk_index =
      static_cast<uint32_t>((sparse_offset >> 32) & 0xFFFFFFFF);
  uint32_t sparse_data_chunk_offset =
      static_cast<uint32_t>(sparse_offset & 0xFFFFFFFF);

  if (sparse_vector_len > 0) {
    get_sparse_vector_data(sparse_data_chunk_index, sparse_data_chunk_offset,
                           sparse_vector_len, sparse_vector_block);
    if (ailego_unlikely(sparse_vector_block.data() == nullptr)) {
      LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)sparse_offset,
                sparse_vector_len);

      return IndexError_ReadData;
    }
    *sparse_vector_len_ptr = sparse_vector_len;
  }

  return 0;
}

int FlatSparseStreamerEntity::write_sparse_vector_data(uint32_t chunk_index,
                                                       uint64_t offset,
                                                       const void *data,
                                                       uint32_t length) {
  auto size = sparse_data_chunks_[chunk_index]->write(offset, data, length);
  if (size != length) {
    LOG_ERROR(
        "write sparse vector data failed: chunk_index=%u, offset=%zu, "
        "length=%u, size=%zu, chunk_data_size=%zu",
        chunk_index, (size_t)offset, length, size,
        sparse_data_chunks_[chunk_index]->data_size());
    return IndexError_WriteData;
  }
  // LOG_DEBUG(
  //     "write_sparse_vector_data: chunk_index=%u, offset=%lu, length=%u, "
  //     "data=%p",
  //     chunk_index, offset, length, data);
  return 0;
}

const void *FlatSparseStreamerEntity::get_sparse_vector_data(
    uint32_t chunk_index, uint64_t offset, uint32_t length) const {
  const void *data;
  auto size = sparse_data_chunks_[chunk_index]->read(offset, &data, length);
  if (size != length) {
    LOG_ERROR(
        "read sparse vector data failed: chunk_index=%u, offset=%zu, "
        "length=%u, size=%zu",
        chunk_index, (size_t)offset, length, size);
    return nullptr;
  }
  // LOG_DEBUG(
  //     "get_sparse_vector_data: chunk_index=%u, offset=%lu, length=%u, "
  //     "data=%p",
  //     chunk_index, offset, length, data);
  return data;
}

int FlatSparseStreamerEntity::get_sparse_vector_data(
    uint32_t chunk_index, uint64_t offset, uint32_t length,
    IndexStorage::MemoryBlock &block) const {
  auto size = sparse_data_chunks_[chunk_index]->read(offset, block, length);
  if (size != length) {
    LOG_ERROR(
        "read sparse vector data failed: chunk_index=%u, offset=%zu, "
        "length=%u, size=%zu",
        chunk_index, (size_t)offset, length, size);
    return IndexError_ReadData;
  }
  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_streamer_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_streamer.h>
#include "flat_sparse_entity.h"
#include "flat_sparse_index_format.h"
#include "flat_sparse_utility.h"

namespace zvec {
namespace core {


/*! Flat Sparse Streamer Entity
 */
class FlatSparseStreamerEntity : public FlatSparseEntity {
 public:
  typedef std::shared_ptr<FlatSparseStreamerEntity> Pointer;

  using Chunk = IndexStorage::Segment;

  //! Constructor
  explicit FlatSparseStreamerEntity(IndexStreamer::Stats &stats);

  //! Destructor
  virtual ~FlatSparseStreamerEntity() = default;

  //! Disable them
  FlatSparseStreamerEntity(const FlatSparseStreamerEntity &) = delete;
  FlatSparseStreamerEntity &operator=(const FlatSparseStreamerEntity &) =
      delete;

  //! Open the entity with storage
  int open(IndexStorage::Pointer storage, const IndexMeta &meta);

  //! Close the entity
  int close();

  //! Flush linear index to storage
  int flush(uint64_t checkpoint);

  //! Dump index by dumper
  int dump(const IndexDumper::Pointer &dumper);

  //! Add sparse vector to linear index
  int add(uint64_t key, const std::string &sparse_vector,
          const uint32_t sparse_count);

  //! Add sparse vector to linear index with id
  int add_vector_with_id(uint32_t id, const std::string &sparse_vector,
                         uint32_t sparse_count);

  //! Clone entity
  FlatSparseStreamerEntity::Pointer clone() const;

  int get_index_sparse_meta(IndexMeta *meta) const {
    return IndexHelper::DeserializeFromStorage(storage_.get(), meta);
  }

  int set_index_sparse_meta(const IndexMeta &meta) const {
    return IndexHelper::SerializeToStorage(meta, storage_.get());
  }

 public:
  inline uint32_t doc_cnt() const override {
    return meta_.doc_cnt;
  }

  inline uint32_t total_sparse_count() const override {
    return meta_.total_sparse_count;
  }

  size_t sparse_unit_size() const override {
    return sparse_unit_size_;
  }

  inline node_id_t get_id(uint64_t key) const override {
    keys_map_lock_->lock_shared();
    auto it = keys_map_->find(key);
    keys_map_lock_->unlock_shared();
    return it == keys_map_->end() ? kInvalidNodeId : it->second;
  }

  uint64_t get_key(node_id_t node_id) const override;

  int get_sparse_vector_ptr_by_id(node_id_t id, const void **sparse_vector,
                                  uint32_t *sparse_vector_len) const override;
  int get_sparse_vector_ptr_by_id(
      const node_id_t id, IndexStorage::MemoryBlock &sparse_vector_block,
      uint32_t *sparse_vector_len) const override;

  float get_search_distance(const std::string &vector,
                            node_id_t target_node_id) const override {
    float dist;
    const void *target_vector;
    uint32_t target_vector_len;
    get_sparse_vector_ptr_by_id(target_node_id, &target_vector,
                                &target_vector_len);
    search_sparse_distance_(vector.c_str(), target_vector, &dist);
    return dist;
  }

 private:
  void inc_doc_count() {
    meta_.doc_cnt++;
  }
  void inc_total_sparse_count(uint32_t count) {
    meta_.total_sparse_count += count;
  }

  int init_metric(const IndexMeta &meta);

  int init_storage(IndexStorage::Pointer storage, const IndexMeta &meta);

  int load_storage(IndexStorage::Pointer storage, const IndexMeta &meta);

  static inline size_t AlignSize(size_t size) {
    return (size + 0x1F) & (~0x1F);
  }

  inline uint32_t offset_size_per_node() const {
    return 3 * sizeof(uint64_t);
  }

  inline uint32_t doc_cnt_per_offset_chunk() const {
    return streamer_meta_.offset_chunk_size / offset_size_per_node();
  }

  Chunk::Pointer alloc_new_offset_chunk(uint32_t chunk_id) {
    std::string segment_id = ailego::StringHelper::Concat(
        PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX, chunk_id);
    // LOG_INFO("Alloc new offset chunk %s", segment_id.c_str());
    return alloc_new_chunk(segment_id, streamer_meta_.offset_chunk_size);
  }

  Chunk::Pointer alloc_new_data_chunk(uint32_t chunk_id) {
    std::string segment_id = ailego::StringHelper::Concat(
        PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX, chunk_id);
    // LOG_INFO("Alloc new data chunk %s", segment_id.c_str());
    return alloc_new_chunk(segment_id, streamer_meta_.data_chunk_size);
  }

  Chunk::Pointer alloc_new_chunk(const std::string &segment_id, uint32_t size) {
    int ret = storage_->append(segment_id, size);
    if (ailego_unlikely(ret != 0)) {
      return nullptr;
    }
    *stats_.mutable_index_size() += size;
    return storage_->get(segment_id);
  }

  inline uint32_t get_offset_info_number_per_chunk() const {
    return streamer_meta_.offset_chunk_size / offset_size_per_node();
  }

  int write_sparse_vector_to_chunk(const std::string &sparse_vector,
                                   const uint32_t sparse_vector_len,
                                   uint32_t &sparse_data_chunk_index,
                                   uint32_t &sparse_data_chunk_offset);

  int get_new_sparse_offset_chunk(uint32_t &sparse_offset_chunk_index,
                                  uint32_t &sparse_offset_chunk_offset);

  int write_sparse_offset_to_chunk(const uint32_t sparse_offset_chunk_index,
                                   const uint32_t sparse_offset_chunk_offset,
                                   const uint64_t sparse_offset,
                                   const uint32_t sparse_vector_len,
                                   const uint64_t node_id);

  int write_sparse_vector_data(uint32_t chunk_index, uint64_t offset,
                               const void *data, uint32_t length);

  const void *get_sparse_vector_data(uint32_t chunk_index, uint64_t offset,
                                     uint32_t length) const;

  int get_sparse_vector_data(uint32_t chunk_index, uint64_t offset,
                             uint32_t length,
                             IndexStorage::MemoryBlock &block) const;

  int dump_sparse_vector_data(const void *data, uint32_t length,
                              IndexDumper *dumper);

  int dump_meta(IndexDumper *dumper);

  int dump_index_meta(IndexDumper *dumper);

  int dump_keys(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  int dump_mapping(const std::vector<uint64_t> &keys, IndexDumper *dumper);

  int dump_offset_data(IndexDumper *dumper);


 private:
  FlatSparseStreamerEntity(
      IndexStreamer::Stats &stats, const FlatSparseMeta &meta,
      const FlatSparseStreamerMeta &streamer_meta,
      std::shared_ptr<ailego::SharedMutex> keys_map_lock,
      std::shared_ptr<std::map<uint64_t, node_id_t>> keys_map,
      std::vector<Chunk::Pointer> sparse_data_chunks,
      std::vector<Chunk::Pointer> sparse_offset_chunks)
      : stats_(stats),
        meta_(meta),
        streamer_meta_(streamer_meta),
        keys_map_lock_(keys_map_lock),
        keys_map_(keys_map),
        sparse_data_chunks_(std::move(sparse_data_chunks)),
        sparse_offset_chunks_(std::move(sparse_offset_chunks)) {}

 private:
  IndexStorage::Pointer storage_{};
  IndexStreamer::Stats &stats_;

  // meta
  FlatSparseMeta meta_;
  FlatSparseStreamerMeta streamer_meta_;

  // metric
  IndexMetric::Pointer metric_{};
  IndexMetric::MatrixSparseDistance search_sparse_distance_{};

  std::mutex mutex_{};

  // keys map
  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};
  std::shared_ptr<std::map<uint64_t, node_id_t>> keys_map_{};

  // chunks
  mutable std::vector<Chunk::Pointer> sparse_data_chunks_{};
  mutable std::vector<Chunk::Pointer> sparse_offset_chunks_{};

  // config
  uint32_t max_doc_cnt_{1 << 24U};  // 16 million
  uint32_t max_data_chunk_cnt_{
      1 << 10U};  // 1024, default single_data_chunk_size = 8M,
                  // default_total_max = 1024 * 8M = 8G

  uint64_t dump_size_{0U};
  size_t sparse_unit_size_{0U};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/flat_sparse/flat_sparse_utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>

namespace zvec {
namespace core {

static constexpr uint32_t PARAM_FLAT_SPARSE_MAX_DIM_SIZE = 16384;
static const std::string PARAM_FLAT_SPARSE_META_SEG_ID =
    "bruteforce_sparse_meta";

// streamer
static const std::string PARAM_FLAT_SPARSE_STREAMER_META_SEG_ID =
    "bruteforce_sparse_streamer_meta";
static const std::string PARAM_FLAT_SPARSE_OFFSET_SEG_ID_PREFIX =
    "bruteforce_sparse_streamer_offset_";
static const std::string PARAM_FLAT_SPARSE_DATA_SEG_ID_PREFIX =
    "bruteforce_sparse_streamer_data_";

// searcher
static const std::string PARAM_FLAT_SPARSE_DUMP_OFFSET_SEG_ID =
    "bruteforce_sparse_searcher_offset_segment";
static const std::string PARAM_FLAT_SPARSE_DUMP_DATA_SEG_ID =
    "bruteforce_sparse_searcher_data_segment";
static const std::string PARAM_FLAT_SPARSE_DUMP_KEYS_SEG_ID =
    "bruteforce_sparse_searcher_keys_segment";
static const std::string PARAM_FLAT_SPARSE_DUMP_MAPPING_SEG_ID =
    "bruteforce_sparse_searcher_mapping_segment";

// streamer
static const std::string PARAM_FLAT_SPARSE_STREAMER_OFFSET_CHUNK_SIZE(
    "proxima.bruteforce.sparse_streamer.offset_chunk_size");

static const std::string PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE(
    "proxima.bruteforce.sparse_streamer.data_chunk_size");

static const std::string PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT(
    "proxima.bruteforce.sparse_streamer.max_doc_cnt");

static const std::string PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT(
    "proxima.bruteforce.sparse_streamer.max_data_chunk_cnt");

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_knn_hnsw 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS core_framework sparsehash
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/hnsw/hnsw_algorithm.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_algorithm.h"
#include <chrono>
#include <iostream>
#include <vector>
#include <ailego/internal/cpu_features.h>

namespace zvec {
namespace core {

HnswAlgorithm::HnswAlgorithm(HnswEntity &entity)
    : entity_(entity),
      mt_(std::chrono::system_clock::now().time_since_epoch().count()),
      lock_pool_(kLockCnt) {}

int HnswAlgorithm::cleanup() {
  return 0;
}

int HnswAlgorithm::add_node(node_id_t id, level_t level, HnswContext *ctx) {
  spin_lock_.lock();

  // std::cout << "id: " << id << ", level: " << level << std::endl;

  auto cur_max_level = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    return 0;
  }
  spin_lock_.unlock();

  if (ailego_unlikely(level > cur_max_level)) {
    mutex_.lock();
    // re-check max level
    cur_max_level = entity_.cur_max_level();
    entry_point = entity_.entry_point();
    if (level <= cur_max_level) {
      mutex_.unlock();
    }
  }

  level_t cur_level = cur_max_level;
  dist_t dist = ctx->dist_calculator()(entry_point);
  for (; cur_level > level; --cur_level) {
    select_entry_point(cur_level, &entry_point, &dist, ctx);
  }

  for (; cur_level >= 0; --cur_level) {
    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),
                     ctx);
  }

  // add neighbors from down level to top level, to avoid upper level visible
  // to knn_search but the under layer level not ready
  for (cur_level = 0; cur_level <= level; ++cur_level) {
    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);
    ctx->level_topk(cur_level).clear();
  }

  if (ailego_unlikely(level > cur_max_level)) {
    spin_lock_.lock();
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    mutex_.unlock();
  }

  return 0;
}

int HnswAlgorithm::search(HnswContext *ctx) const {
  spin_lock_.lock();
  auto maxLevel = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  spin_lock_.unlock();

  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    return 0;
  }

  dist_t dist = ctx->dist_calculator().dist(entry_point);
  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {
    select_entry_point(cur_level, &entry_point, &dist, ctx);
  }

  auto &topk_heap = ctx->topk_heap();
  topk_heap.clear();
  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);

  if (ctx->group_by_search()) {
    expand_neighbors_by_group(topk_heap, ctx);
  }

  return 0;
}

//! select_entry_point on hnsw level, ef = 1
void HnswAlgorithm::select_entry_point(level_t level, node_id_t *entry_point,
                                       dist_t *dist, HnswContext *ctx) const {
  auto &entity = ctx->get_entity();
  HnswDistCalculator &dc = ctx->dist_calculator();
  while (true) {
    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }
    uint32_t size = neighbors.size();
    if (size == 0) {
      break;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
    int ret = entity.get_vector(&neighbors[0], size, neighbor_vec_blocks);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }

    bool find_closer = false;

    std::vector<float> dists(size);
    std::vector<const void *> neighbor_vecs(size);
    for (uint32_t i = 0; i < size; ++i) {
      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
    }

    dc.batch_dist(neighbor_vecs.data(), size, dists.data());

    for (uint32_t i = 0; i < size; ++i) {
      dist_t cur_dist = dists[i];

      if (cur_dist < *dist) {
        *entry_point = neighbors[i];
        *dist = cur_dist;
        find_closer = true;
      }
    }

    if (!find_closer) {
      break;
    }
  }

  return;
}

void HnswAlgorithm::add_neighbors(node_id_t id, level_t level,
                                  TopkHeap &topk_heap, HnswContext *ctx) {
  if (ailego_unlikely(topk_heap.size() == 0)) {
    return;
  }

  HnswDistCalculator &dc = ctx->dist_calculator();

  update_neighbors(dc, id, level, topk_heap);

  // reverse update neighbors
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    reverse_update_neighbors(dc, topk_heap[i].first, level, id,
                             topk_heap[i].second, ctx->update_heap());
  }

  return;
}

void HnswAlgorithm::search_neighbors(level_t level, node_id_t *entry_point,
                                     dist_t *dist, TopkHeap &topk,
                                     HnswContext *ctx) const {
  const auto &entity = ctx->get_entity();
  HnswDistCalculator &dc = ctx->dist_calculator();
  VisitFilter &visit = ctx->visit_filter();
  CandidateHeap &candidates = ctx->candidates();
  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
  if (ctx->filter().is_valid()) {
    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
  }

  candidates.clear();
  visit.clear();
  visit.set_visited(*entry_point);
  if (!filter(*entry_point)) {
    topk.emplace(*entry_point, *dist);
  }

  candidates.emplace(*entry_point, *dist);
  while (!candidates.empty() && !ctx->reach_scan_limit()) {
    auto top = candidates.begin();
    node_id_t main_node = top->first;
    dist_t main_dist = top->second;

    if (topk.full() && main_dist > topk[0].second) {
      break;
    }

    candidates.pop();
    const Neighbors neighbors = entity.get_neighbors(level, main_node);
    ailego_prefetch(neighbors.data);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }

    std::vector<node_id_t> neighbor_ids(neighbors.size());
    uint32_t size = 0;
    for (uint32_t i = 0; i < neighbors.size(); ++i) {
      node_id_t node = neighbors[i];
      if (visit.visited(node)) {
        if (ailego_unlikely(ctx->debugging())) {
          (*ctx->mutable_stats_visit_dup_cnt())++;
        }
        continue;
      }
      visit.set_visited(node);
      neighbor_ids[size++] = node;
    }
    if (size == 0) {
      continue;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
    int ret = entity.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }

    // do prefetch
    static constexpr node_id_t BATCH_SIZE = 12;
    static constexpr node_id_t PREFETCH_STEP = 2;
    for (uint32_t i = 0; i < std::min(BATCH_SIZE * PREFETCH_STEP, size); ++i) {
      ailego_prefetch(neighbor_vec_blocks[i].data());
    }
    // done

    std::vector<float> dists(size);
    std::vector<const void *> neighbor_vecs(size);

    for (uint32_t i = 0; i < size; ++i) {
      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
    }

    dc.batch_dist(neighbor_vecs.data(), size, dists.data());

    for (uint32_t i = 0; i < size; ++i) {
      node_id_t node = neighbor_ids[i];
      dist_t cur_dist = dists[i];

      if ((!topk.full()) || cur_dist < topk[0].second) {
        candidates.emplace(node, cur_dist);
        // update entry_point for next level scan
        if (cur_dist < *dist) {
          *entry_point = node;
          *dist = cur_dist;
        }
        if (!filter(node)) {
          topk.emplace(node, cur_dist);
        }
      }  // end if
    }  // end for
  }  // while

  return;
}

void HnswAlgorithm::expand_neighbors_by_group(TopkHeap &topk,
                                              HnswContext *ctx) const {
  if (!ctx->group_by().is_valid()) {
    return;
  }

  const auto &entity = ctx->get_entity();
  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
    return ctx->group_by()(entity.get_key(id));
  };

  // devide into groups
  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();
  for (uint32_t i = 0; i < topk.size(); ++i) {
    node_id_t id = topk[i].first;
    auto score = topk[i].second;

    std::string group_id = group_by(id);

    auto &topk_heap = group_topk_heaps[group_id];
    if (topk_heap.empty()) {
      topk_heap.limit(ctx->group_topk());
    }
    topk_heap.emplace_back(id, score);
  }

  // stage 2, expand to reach group num as possible
  if (group_topk_heaps.size() < ctx->group_num()) {
    VisitFilter &visit = ctx->visit_filter();
    CandidateHeap &candidates = ctx->candidates();
    HnswDistCalculator &dc = ctx->dist_calculator();

    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
    if (ctx->filter().is_valid()) {
      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
    }

    // refill to get enough groups
    candidates.clear();
    visit.clear();
    for (uint32_t i = 0; i < topk.size(); ++i) {
      node_id_t id = topk[i].first;
      float score = topk[i].second;

      visit.set_visited(id);
      candidates.emplace_back(id, score);
    }

    // do expand
    while (!candidates.empty() && !ctx->reach_scan_limit()) {
      auto top = candidates.begin();
      node_id_t main_node = top->first;

      candidates.pop();
      const Neighbors neighbors = entity.get_neighbors(0, main_node);
      if (ailego_unlikely(ctx->debugging())) {
        (*ctx->mutable_stats_get_neighbors())++;
      }

      std::vector<node_id_t> neighbor_ids(neighbors.size());
      uint32_t size = 0;
      for (uint32_t i = 0; i < neighbors.size(); ++i) {
        node_id_t node = neighbors[i];
        if (visit.visited(node)) {
          if (ailego_unlikely(ctx->debugging())) {
            (*ctx->mutable_stats_visit_dup_cnt())++;
          }
          continue;
        }
        visit.set_visited(node);
        neighbor_ids[size++] = node;
      }
      if (size == 0) {
        continue;
      }

      std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
      int ret =
          entity.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);
      if (ailego_unlikely(ctx->debugging())) {
        (*ctx->mutable_stats_get_vector())++;
      }
      if (ailego_unlikely(ret != 0)) {
        break;
      }

      static constexpr node_id_t PREFETCH_STEP = 2;
      for (uint32_t i = 0; i < size; ++i) {
        node_id_t node = neighbor_ids[i];
        node_id_t prefetch_id = i + PREFETCH_STEP;
        if (prefetch_id < size) {
          ailego_prefetch(neighbor_vec_blocks[prefetch_id].data());
        }
        dist_t cur_dist = dc.dist(neighbor_vec_blocks[i].data());

        if (!filter(node)) {
          std::string group_id = group_by(node);

          auto &topk_heap = group_topk_heaps[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(node, cur_dist);

          if (group_topk_heaps.size() >= ctx->group_num()) {
            break;
          }
        }

        candidates.emplace(node, cur_dist);
      }  // end for
    }  // end while
  }  // end if
}

void HnswAlgorithm::update_neighbors(HnswDistCalculator &dc, node_id_t id,
                                     level_t level, TopkHeap &topk_heap) {
  topk_heap.sort();

  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);
  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {
    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {
      entity_.update_neighbors(level, id, topk_heap);
      return;
    }
  }

  uint32_t cur_size = 0;
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    node_id_t cur_node = topk_heap[i].first;
    dist_t cur_node_dist = topk_heap[i].second;
    bool good = true;
    for (uint32_t j = 0; j < cur_size; ++j) {
      dist_t tmp_dist = dc.dist(cur_node, topk_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      topk_heap[cur_size].first = cur_node;
      topk_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  // when after-prune neighbor count is too seldom,
  // we use this strategy to make-up enough edges
  // not only just make-up out-degrees
  // we also make-up enough in-degrees
  uint32_t min_neighbors = entity_.min_neighbor_cnt();
  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();
       ++k) {
    bool exist = false;
    for (size_t j = 0; j < cur_size; ++j) {
      if (topk_heap[j].first == topk_heap[k].first) {
        exist = true;
        break;
      }
    }
    if (!exist) {
      topk_heap[cur_size].first = topk_heap[k].first;
      topk_heap[cur_size].second = topk_heap[k].second;
      cur_size++;
    }
  }

  topk_heap.resize(cur_size);
  entity_.update_neighbors(level, id, topk_heap);

  return;
}

void HnswAlgorithm::reverse_update_neighbors(HnswDistCalculator &dc,
                                             node_id_t id, level_t level,
                                             node_id_t link_id, dist_t dist,
                                             TopkHeap &update_heap) {
  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);

  uint32_t lock_idx = id & kLockMask;
  lock_pool_[lock_idx].lock();
  const Neighbors neighbors = entity_.get_neighbors(level, id);
  size_t size = neighbors.size();
  ailego_assert_with(size <= max_neighbor_cnt, "invalid neighbor size");
  if (size < max_neighbor_cnt) {
    entity_.add_neighbor(level, id, size, link_id);
    lock_pool_[lock_idx].unlock();
    return;
  }

  update_heap.emplace(link_id, dist);

  for (size_t i = 0; i < size; ++i) {
    node_id_t node = neighbors[i];
    dist_t cur_dist = dc.dist(id, node);
    update_heap.emplace(node, cur_dist);
  }

  //! TODO: optimize prune
  //! prune edges
  update_heap.sort();
  size_t cur_size = 0;
  for (size_t i = 0; i < update_heap.size(); ++i) {
    node_id_t cur_node = update_heap[i].first;
    dist_t cur_node_dist = update_heap[i].second;
    bool good = true;
    for (size_t j = 0; j < cur_size; ++j) {
      dist_t tmp_dist = dc.dist(cur_node, update_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      update_heap[cur_size].first = cur_node;
      update_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  update_heap.resize(cur_size);
  entity_.update_neighbors(level, id, update_heap);

  lock_pool_[lock_idx].unlock();

  update_heap.clear();

  return;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_algorithm.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <ailego/parallel/lock.h>
#include "hnsw_context.h"
#include "hnsw_dist_calculator.h"
#include "hnsw_entity.h"

namespace zvec {
namespace core {

//! hnsw graph algorithm implement
class HnswAlgorithm {
 public:
  typedef std::unique_ptr<HnswAlgorithm> UPointer;

 public:
  //! Constructor
  explicit HnswAlgorithm(HnswEntity &entity);

  //! Destructor
  ~HnswAlgorithm() = default;

  //! Cleanup HnswAlgorithm
  int cleanup();

  //! Add a node to hnsw graph
  //! @id:     the node unique id
  //! @level:  a node will be add to graph in each level [0, level]
  //! return 0 on success, or errCode in failure
  int add_node(node_id_t id, level_t level, HnswContext *ctx);

  //! do knn search in graph
  //! return 0 on success, or errCode in failure. results saved in ctx
  int search(HnswContext *ctx) const;

  //! Initiate HnswAlgorithm
  int init() {
    level_probas_.clear();
    double level_mult =
        1 / std::log(static_cast<double>(entity_.scaling_factor()));
    for (int level = 0;; level++) {
      // refers faiss get_random_level alg
      double proba =
          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));
      if (proba < 1e-9) {
        break;
      }
      level_probas_.push_back(proba);
    }

    return 0;
  }

  //! Generate a random level
  //! return graph level
  uint32_t get_random_level() const {
    // gen rand float (0, 1)
    double f = mt_() / static_cast<float>(mt_.max());
    for (size_t level = 0; level < level_probas_.size(); level++) {
      if (f < level_probas_[level]) {
        return level;
      }
      f -= level_probas_[level];
    }
    return level_probas_.size() - 1;
  }

 private:
  //! Select in upper layer to get entry point for next layer search
  void select_entry_point(level_t level, node_id_t *entry_point, dist_t *dist,
                          HnswContext *ctx) const;

  //! update node id neighbors from topkHeap, and reverse link is also updated
  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,
                     HnswContext *ctx);

  //! Given a node id and level, search the nearest neighbors in graph
  //! Note: the nearest neighbors result keeps in topk, and entry_point and
  //! dist will be updated to current level nearest node id and distance
  void search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,
                        TopkHeap &topk, HnswContext *ctx) const;

  //! Update the node's neighbors
  void update_neighbors(HnswDistCalculator &dc, node_id_t id, level_t level,
                        TopkHeap &topk_heap);

  //! Checking linkId could be id's new neighbor, and add as neighbor if true
  //! @dc         distance calculator
  //! @updateHeap temporary heap in updating neighbors
  void reverse_update_neighbors(HnswDistCalculator &dc, node_id_t id,
                                level_t level, node_id_t link_id, dist_t dist,
                                TopkHeap &update_heap);

  //! expand neighbors until group nums are reached
  void expand_neighbors_by_group(TopkHeap &topk, HnswContext *ctx) const;

 private:
  HnswAlgorithm(const HnswAlgorithm &) = delete;
  HnswAlgorithm &operator=(const HnswAlgorithm &) = delete;

 private:
  static constexpr uint32_t kLockCnt{1U << 8};
  static constexpr uint32_t kLockMask{kLockCnt - 1U};

  HnswEntity &entity_;
  mutable std::mt19937 mt_{};
  std::vector<double> level_probas_{};

  mutable ailego::SpinMutex spin_lock_{};  // global spin lock
  std::mutex mutex_{};                     // global mutex
  // TODO: spin lock?
  std::vector<std::mutex> lock_pool_{};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/hnsw_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_builder.h"
#include <iostream>
#include <thread>
#include <ailego/pattern/defer.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_logger.h>
#include "hnsw_algorithm.h"
#include "hnsw_params.h"

namespace zvec {
namespace core {

HnswBuilder::HnswBuilder() = default;

int HnswBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
  LOG_INFO("Begin HnswBuilder::init");

  meta_ = meta;
  auto params_copy = params;
  meta_.set_builder("HnswBuilder", HnswEntity::kRevision,
                    std::move(params_copy));

  size_t memory_quota = 0UL;
  params.get(PARAM_HNSW_BUILDER_MEMORY_QUOTA, &memory_quota);
  params.get(PARAM_HNSW_BUILDER_THREAD_COUNT, &thread_cnt_);
  params.get(PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);
  params.get(PARAM_HNSW_BUILDER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_BUILDER_CHECK_INTERVAL_SECS, &check_interval_secs_);

  params.get(PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT, &upper_max_neighbor_cnt_);
  float multiplier = HnswEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER, &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_BUILDER_SCALING_FACTOR, &scaling_factor_);

  multiplier = HnswEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;

  if (ef_construction_ == 0) {
    ef_construction_ = HnswEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0) {
    upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d]",
              PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT.c_str(), kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%d] must be <= [%s]-[%d]",
              PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT.c_str(), min_neighbor_cnt_,
              PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),
              upper_max_neighbor_cnt_);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0) {
    l0_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {
    LOG_ERROR("L0MaxNeighborCnt must be in range (0,%d)",
              HnswEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_BUILDER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }
  if (thread_cnt_ == 0) {
    thread_cnt_ = std::thread::hardware_concurrency();
  }
  if (thread_cnt_ > std::thread::hardware_concurrency()) {
    LOG_WARN("[%s] greater than cpu cores %u",
             PARAM_HNSW_BUILDER_THREAD_COUNT.c_str(),
             std::thread::hardware_concurrency());
  }
  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("CreateMetric failed, name: %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("IndexMetric init failed, ret=%d", ret);
    return ret;
  }

  entity_.set_vector_size(meta_.element_size());

  entity_.set_ef_construction(ef_construction_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_memory_quota(memory_quota);
  entity_.set_prune_cnt(prune_cnt);

  ret = entity_.init();
  if (ret != 0) {
    return ret;
  }

  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  state_ = BUILD_STATE_INITED;
  LOG_INFO(
      "End HnswBuilder::init, params: vectorSize=%u efConstruction=%u "
      "l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u "
      "memoryQuota=%zu neighborPruneCnt=%zu metricName=%s ",
      meta_.element_size(), ef_construction_, l0_max_neighbor_cnt_,
      upper_max_neighbor_cnt_, scaling_factor_, memory_quota, prune_cnt,
      meta_.metric_name().c_str());

  return 0;
}

int HnswBuilder::cleanup(void) {
  LOG_INFO("Begin HnswBuilder::cleanup");

  l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;
  min_neighbor_cnt_ = 0;
  upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;
  ef_construction_ = HnswEntity::kDefaultEfConstruction;
  scaling_factor_ = HnswEntity::kDefaultScalingFactor;
  check_interval_secs_ = kDefaultLogIntervalSecs;
  errcode_ = 0;
  error_ = false;
  entity_.cleanup();
  alg_->cleanup();
  meta_.clear();
  metric_.reset();
  stats_.clear_attributes();
  stats_.set_trained_count(0UL);
  stats_.set_built_count(0UL);
  stats_.set_dumped_count(0UL);
  stats_.set_discarded_count(0UL);
  stats_.set_trained_costtime(0UL);
  stats_.set_built_costtime(0UL);
  stats_.set_dumped_costtime(0UL);
  state_ = BUILD_STATE_INIT;

  LOG_INFO("End HnswBuilder::cleanup");

  return 0;
}

int HnswBuilder::train(IndexThreads::Pointer, IndexHolder::Pointer holder) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswBuilder::train");
    return IndexError_NoReady;
  }

  if (!holder) {
    LOG_ERROR("Input holder is nullptr while training index");
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while training index");
    return IndexError_Mismatch;
  }
  LOG_INFO("Begin HnswBuilder::train");
  size_t trained_cost_time = 0;
  size_t trained_count = 0;

  if (metric_->support_train()) {
    auto start_time = ailego::Monotime::MilliSeconds();
    auto iter = holder->create_iterator();
    if (!iter) {
      LOG_ERROR("Create iterator for holder failed");
      return IndexError_Runtime;
    }
    while (iter->is_valid()) {
      int ret = metric_->train(iter->data(), meta_.dimension());
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw build measure train failed, ret=%d", ret);
        return ret;
      }
      iter->next();
      ++trained_count;
    }
    trained_cost_time = ailego::Monotime::MilliSeconds() - start_time;
  }
  stats_.set_trained_count(trained_count);
  stats_.set_trained_costtime(trained_cost_time);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswBuilder::train");

  return 0;
}

int HnswBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswBuilder::train");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswBuilder::train by trainer");

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswBuilder::train by trainer");

  return 0;
}

int HnswBuilder::build(IndexThreads::Pointer threads,
                       IndexHolder::Pointer holder) {
  if (state_ != BUILD_STATE_TRAINED) {
    LOG_ERROR("Train the index before HnswBuilder::build");
    return IndexError_NoReady;
  }

  if (!holder) {
    LOG_ERROR("Input holder is nullptr while building index");
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while building index");
    return IndexError_Mismatch;
  }
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }

  auto start_time = ailego::Monotime::MilliSeconds();
  LOG_INFO("Begin HnswBuilder::build");

  if (holder->count() != static_cast<size_t>(-1)) {
    LOG_DEBUG("HnswBuilder holder documents count %lu", holder->count());
    int ret = entity_.reserve_space(holder->count());
    if (ret != 0) {
      LOG_ERROR("HnswBuilde reserver space failed");
      return ret;
    }
  }
  auto iter = holder->create_iterator();
  if (!iter) {
    LOG_ERROR("Create iterator for holder failed");
    return IndexError_Runtime;
  }
  int ret;
  error_ = false;
  while (iter->is_valid()) {
    level_t level = alg_->get_random_level();
    node_id_t id;

    const void *vec = iter->data();
    ret = entity_.add_vector(level, iter->key(), vec, &id);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
    iter->next();
  }
  // Holder is not needed, cleanup it.
  holder.reset();

  LOG_INFO("Finished save vector, start build graph...");

  auto task_group = threads->make_group();
  if (!task_group) {
    LOG_ERROR("Failed to create task group");
    return IndexError_Runtime;
  }

  std::atomic<node_id_t> finished{0};
  for (size_t i = 0; i < threads->count(); ++i) {
    task_group->submit(ailego::Closure ::New(this, &HnswBuilder::do_build, i,
                                             threads->count(), &finished));
  }

  while (!task_group->is_finished()) {
    std::unique_lock<std::mutex> lk(mutex_);
    cond_.wait_until(lk, std::chrono::system_clock::now() +
                             std::chrono::seconds(check_interval_secs_));
    if (error_.load(std::memory_order_acquire)) {
      LOG_ERROR("Failed to build index while waiting finish");
      return errcode_;
    }
    LOG_INFO("Built cnt %u, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / entity_.doc_cnt());
  }
  if (error_.load(std::memory_order_acquire)) {
    LOG_ERROR("Failed to build index while waiting finish");
    return errcode_;
  }
  task_group->wait_finish();

  stats_.set_built_count(finished.load());
  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = BUILD_STATE_BUILT;

  LOG_INFO("End HnswBuilder::build");
  return 0;
}

void HnswBuilder::do_build(node_id_t idx, size_t step_size,
                           std::atomic<node_id_t> *finished) {
  AILEGO_DEFER([&]() {
    std::lock_guard<std::mutex> latch(mutex_);
    cond_.notify_one();
  });
  HnswContext *ctx = new (std::nothrow)
      HnswContext(meta_.dimension(), metric_,
                  std::shared_ptr<HnswEntity>(&entity_, [](HnswEntity *) {}));
  if (ailego_unlikely(ctx == nullptr)) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to create context");
      errcode_ = IndexError_NoMemory;
    }
    return;
  }
  HnswContext::Pointer auto_ptr(ctx);
  ctx->set_max_scan_num(entity_.doc_cnt());
  int ret = ctx->init(HnswContext::kBuilderContext);
  if (ret != 0) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to init context");
      errcode_ = IndexError_Runtime;
    }
    return;
  }

  IndexQueryMeta qmeta(meta_.data_type(), meta_.dimension());
  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {
    ctx->reset_query(entity_.get_vector(id));
    ret = alg_->add_node(id, entity_.get_level(id), ctx);
    if (ailego_unlikely(ret != 0)) {
      if (!error_.exchange(true)) {
        LOG_ERROR("Hnsw graph add node failed");
        errcode_ = ret;
      }
      return;
    }
    ctx->clear();
    (*finished)++;
  }
}

int HnswBuilder::dump(const IndexDumper::Pointer &dumper) {
  if (state_ != BUILD_STATE_BUILT) {
    LOG_INFO("Build the index before HnswBuilder::dump");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswBuilder::dump");

  meta_.set_searcher("HnswSearcher", HnswEntity::kRevision, ailego::Params());
  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  ret = entity_.dump(dumper);
  if (ret != 0) {
    LOG_ERROR("HnswBuilder dump index failed");
    return ret;
  }

  stats_.set_dumped_count(entity_.doc_cnt());
  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);

  LOG_INFO("EndHnswBuilder::dump");
  return 0;
}

INDEX_FACTORY_REGISTER_BUILDER(HnswBuilder);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_builder.h>
#include "hnsw_algorithm.h"
#include "hnsw_builder_entity.h"

namespace zvec {
namespace core {

class HnswBuilder : public IndexBuilder {
 public:
  //! Constructor
  HnswBuilder();

  //! Initialize the builder
  virtual int init(const IndexMeta &meta,
                   const ailego::Params &params) override;

  //! Cleanup the builder
  virtual int cleanup(void) override;

  //! Train the data
  virtual int train(IndexThreads::Pointer,
                    IndexHolder::Pointer holder) override;

  //! Train the data
  virtual int train(const IndexTrainer::Pointer &trainer) override;


  //! Build the index
  virtual int build(IndexThreads::Pointer threads,
                    IndexHolder::Pointer holder) override;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

 private:
  void do_build(node_id_t idx, size_t step_size,
                std::atomic<node_id_t> *finished);

  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;
  constexpr static uint32_t kMaxNeighborCnt = 65535;

 private:
  enum BUILD_STATE {
    BUILD_STATE_INIT = 0,
    BUILD_STATE_INITED = 1,
    BUILD_STATE_TRAINED = 2,
    BUILD_STATE_BUILT = 3
  };

  HnswBuilderEntity entity_{};
  HnswAlgorithm::UPointer alg_;  // impl graph algorithm
  uint32_t thread_cnt_{0};
  uint32_t min_neighbor_cnt_{0};
  uint32_t upper_max_neighbor_cnt_{HnswEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t l0_max_neighbor_cnt_{HnswEntity::kDefaultL0MaxNeighborCnt};
  uint32_t ef_construction_{HnswEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswEntity::kDefaultScalingFactor};
  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};

  int errcode_{0};
  std::atomic_bool error_{false};
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};
  std::mutex mutex_{};
  std::condition_variable cond_{};
  Stats stats_{};

  BUILD_STATE state_{BUILD_STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_builder_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_builder_entity.h"
#include <iostream>
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswBuilderEntity::HnswBuilderEntity() {
  update_ep_and_level(kInvalidNodeId, 0U);
}

int HnswBuilderEntity::cleanup() {
  memory_quota_ = 0UL;
  neighbors_size_ = 0U;
  upper_neighbors_size_ = 0U;
  padding_size_ = 0U;
  vectors_buffer_.clear();
  keys_buffer_.clear();
  neighbors_buffer_.clear();
  upper_neighbors_buffer_.clear();
  neighbors_index_.clear();

  vectors_buffer_.shrink_to_fit();
  keys_buffer_.shrink_to_fit();
  neighbors_buffer_.shrink_to_fit();
  upper_neighbors_buffer_.shrink_to_fit();
  neighbors_index_.shrink_to_fit();

  this->HnswEntity::cleanup();

  return 0;
}

int HnswBuilderEntity::init() {
  size_t size = vector_size();

  //! aligned size to 32
  set_node_size(AlignSize(size));
  //! if node size is aligned to 1k, the build performance will downgrade
  if (node_size() % 1024 == 0) {
    set_node_size(AlignSize(node_size() + 1));
  }

  padding_size_ = node_size() - size;

  neighbors_size_ = neighbors_size();
  upper_neighbors_size_ = upper_neighbors_size();

  return 0;
}

int HnswBuilderEntity::reserve_space(size_t docs) {
  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +
                                sizeof(NeighborIndex) * docs >
                            memory_quota_)) {
    return IndexError_NoMemory;
  }

  vectors_buffer_.reserve(node_size() * docs);
  keys_buffer_.reserve(sizeof(key_t) * docs);
  neighbors_buffer_.reserve(neighbors_size_ * docs);
  neighbors_index_.reserve(docs);

  return 0;
}

int HnswBuilderEntity::add_vector(level_t level, key_t key, const void *vec,
                                  node_id_t *id) {
  if (memory_quota_ > 0 &&
      (vectors_buffer_.capacity() + keys_buffer_.capacity() +
       neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +
       neighbors_index_.capacity() * sizeof(NeighborIndex)) > memory_quota_) {
    LOG_ERROR("Add vector failed, used memory exceed quota, cur_doc=%u",
              doc_cnt());
    return IndexError_NoMemory;
  }

  vectors_buffer_.append(reinterpret_cast<const char *>(vec), vector_size());
  vectors_buffer_.append(padding_size_, '\0');
  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));

  // init level 0 neighbors
  neighbors_buffer_.append(neighbors_size_, '\0');

  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);

  // init upper layer neighbors
  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
    upper_neighbors_buffer_.append(upper_neighbors_size_, '\0');
  }

  *id = (*mutable_doc_cnt())++;

  return 0;
}

key_t HnswBuilderEntity::get_key(node_id_t id) const {
  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +
                                           id * sizeof(key_t)));
}

const void *HnswBuilderEntity::get_vector(node_id_t id) const {
  return vectors_buffer_.data() + id * node_size();
}

int HnswBuilderEntity::get_vector(const node_id_t id,
                                  IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector(id);
  block.reset((void *)vec);
  return 0;
}

int HnswBuilderEntity::get_vector(const node_id_t *ids, uint32_t count,
                                  const void **vecs) const {
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();
  }

  return 0;
}

int HnswBuilderEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  const void *vecs[count];
  get_vector(ids, count, vecs);
  for (uint32_t i = 0; i < count; ++i) {
    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

const Neighbors HnswBuilderEntity::get_neighbors(level_t level,
                                                 node_id_t id) const {
  const NeighborsHeader *hd = get_neighbor_header(level, id);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswBuilderEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  for (size_t i = 0; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }
  hd->neighbor_cnt = neighbors.size();

  // std::cout << "id: " << id << ", neighbour, id: ";
  // for (size_t i = 0; i < neighbors.size(); ++i) {
  //   if (i == neighbors.size()-1)
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     std::endl;
  //   else
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     ", id: ";
  // }

  return 0;
}

void HnswBuilderEntity::add_neighbor(level_t level, node_id_t id,
                                     uint32_t /*size*/, node_id_t neighbor_id) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;

  return;
}

int HnswBuilderEntity::dump(const IndexDumper::Pointer &dumper) {
  key_t *keys =
      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));
  auto ret =
      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_builder_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/internal/platform.h>
#include "hnsw_entity.h"

namespace zvec {
namespace core {

class HnswBuilderEntity : public HnswEntity {
 public:
  //! Add vector and key to hnsw entity, and local id will be saved to id
  virtual int add_vector(level_t level, key_t key, const void *vec,
                         node_id_t *id) override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const override;

  //! Batch get vectors feature data by keys
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get the node id's neighbors on graph level
  const NeighborsHeader *get_neighbor_header(level_t level,
                                             node_id_t id) const {
    if (level == 0) {
      return reinterpret_cast<const NeighborsHeader *>(
          neighbors_buffer_.data() + neighbors_size_ * id);
    } else {
      size_t offset = neighbors_index_[id].offset;
      return reinterpret_cast<const NeighborsHeader *>(
          upper_neighbors_buffer_.data() + offset +
          (level - 1) * upper_neighbors_size_);
    }
  }

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  //! Replace node id in level's neighbors
  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;

  //! add a neighbor to id in graph level
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Dump the hnsw graph to dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Cleanup the entity
  virtual int cleanup(void) override;

 public:
  //! Constructor
  HnswBuilderEntity();

  //! Get the node graph level by id
  level_t get_level(node_id_t id) const {
    return neighbors_index_[id].level;
  }

  //! Init builerEntity
  int init();

  //! reserve buffer space for documents
  //! @param  docs    number of documents
  int reserve_space(size_t docs);

  //! Set memory quota params
  inline void set_memory_quota(size_t memory_quota) {
    memory_quota_ = memory_quota;
  }

  //! Get neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get upper neighbors size
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

 public:
  HnswBuilderEntity(const HnswBuilderEntity &) = delete;
  HnswBuilderEntity &operator=(const HnswBuilderEntity &) = delete;

 private:
  friend class HnswSearcherEntity;
  //! class internal used only
  struct NeighborIndex {
    NeighborIndex(size_t off, level_t l) : offset(off), level(l) {}
    uint64_t offset : 48;
    uint64_t level : 16;
  };

  std::string vectors_buffer_{};          // aligned vectors
  std::string keys_buffer_{};             // aligned vectors
  std::string neighbors_buffer_{};        // level 0 neighbors buffer
  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer

  std::string sparse_data_buffer_{};  // aligned spase data buffer
  size_t sparse_data_offset_{0};      //

  // upper layer offset + level in upper_neighbors_buffer_
  std::vector<NeighborIndex> neighbors_index_{};
  size_t memory_quota_{0UL};
  size_t neighbors_size_{0U};        // level 0 neighbors size
  size_t upper_neighbors_size_{0U};  // level 0 neighbors size
  size_t padding_size_{};            // padding size for each vector element
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_chunk.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_chunk.h"
#include <chrono>
#include <random>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

int ChunkBroker::init_storage(size_t chunk_size) {
  chunk_meta_.clear();
  chunk_meta_.chunk_size = chunk_size;
  chunk_meta_.create_time = ailego::Realtime::Seconds();
  stats_.set_create_time(chunk_meta_.create_time);
  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  //! alloc meta chunk
  size_t size = sizeof(HnswChunkMeta);
  size = (size + page_mask_) & (~page_mask_);
  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return ret;
  }
  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);
  if (ailego_unlikely(!chunk_meta_segment_)) {
    LOG_ERROR("Get meta segment failed");
    return IndexError_Runtime;
  }

  //! update meta info and write to storage
  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;
  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
    return IndexError_WriteData;
  }

  return 0;
}

int ChunkBroker::load_storage(size_t chunk_size) {
  IndexStorage::MemoryBlock data_block;
  size_t size = chunk_meta_segment_->read(0UL, data_block,
                                          chunk_meta_segment_->data_size());
  if (size != sizeof(HnswChunkMeta)) {
    LOG_ERROR("Invalid hnsw meta chunk, read size=%zu chunk size=%zu", size,
              chunk_meta_segment_->data_size());
    return IndexError_InvalidFormat;
  }
  std::memcpy(&chunk_meta_, data_block.data(), size);
  if (chunk_meta_.chunk_size != chunk_size) {
    LOG_ERROR(
        "Params hnsw chunk size=%zu mismatch from previous %zu "
        "in index",
        chunk_size, (size_t)chunk_meta_.chunk_size);
    return IndexError_Mismatch;
  }

  *stats_.mutable_check_point() = stg_->check_point();
  stats_.set_revision_id(chunk_meta_.revision_id);
  stats_.set_update_time(chunk_meta_.update_time);
  stats_.set_create_time(chunk_meta_.create_time);

  char create_time[32];
  char update_time[32];
  ailego::Realtime::Gmtime(chunk_meta_.create_time, "%Y-%m-%d %H:%M:%S",
                           create_time, sizeof(create_time));
  ailego::Realtime::Gmtime(chunk_meta_.update_time, "%Y-%m-%d %H:%M:%S",
                           update_time, sizeof(update_time));
  LOG_DEBUG(
      "Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu "
      "upperNeighborChunks=%zu revisionId=%zu "
      "createTime=%s updateTime=%s",
      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],
      (size_t)chunk_meta_.revision_id, create_time, update_time);

  return 0;
}

int ChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,
                      size_t chunk_size, bool check_crc) {
  if (ailego_unlikely(stg_)) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }
  stg_ = std::move(stg);
  if (stg_->isHugePage()) {
    page_mask_ = ailego::MemoryHelper::HugePageSize() - 1;
  } else {
    page_mask_ = ailego::MemoryHelper::PageSize() - 1;
  }
  check_crc_ = check_crc;
  max_chunks_size_ = max_index_size;
  dirty_ = false;

  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  chunk_meta_segment_ = stg_->get(segment_id);
  if (!chunk_meta_segment_) {
    LOG_DEBUG("Create new index");
    return init_storage(chunk_size);
  }

  return load_storage(chunk_size);
}

int ChunkBroker::close(void) {
  flush(0UL);

  stg_.reset();
  check_crc_ = false;
  dirty_ = false;

  return 0;
}

int ChunkBroker::flush(uint64_t checkpoint) {
  ailego_assert_with(chunk_meta_segment_, "invalid meta segment");

  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  size_t size =
      chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
  }

  stg_->refresh(checkpoint);
  int ret = stg_->flush();
  if (ret == 0) {
    (*stats_.mutable_check_point()) = checkpoint;
  } else {
    LOG_ERROR("Storage flush failed for %s", IndexError::What(ret));
  }
  return ret;
}

std::pair<int, Chunk::Pointer> ChunkBroker::alloc_chunk(int type,
                                                        uint64_t seq_id,
                                                        size_t size) {
  ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");

  Chunk::Pointer chunk;
  if (ailego_unlikely(!stg_)) {
    LOG_ERROR("Init storage first");
    return std::make_pair(IndexError_Uninitialized, chunk);
  }

  //! check exist a empty chunk with the same name
  chunk = get_chunk(type, seq_id);
  if (chunk) {
    if (ailego_unlikely(chunk->capacity() == size &&
                        chunk->data_size() == 0UL)) {
      LOG_ERROR("Exist invalid chunk size %zu, expect size %zu",
                chunk->capacity(), size);
      chunk.reset();
      return std::make_pair(IndexError_Runtime, chunk);
    }
    return std::make_pair(0, chunk);
  }
  //! align to page size
  size = (size + page_mask_) & (~page_mask_);
  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {
    LOG_ERROR("No space to new a chunk, curIndexSize=%zu allocSize=%zu",
              (size_t)chunk_meta_.total_size, size);
    return std::make_pair(IndexError_IndexFull, chunk);
  }

  std::string segment_id = make_segment_id(type, seq_id);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return std::make_pair(ret, chunk);
  }
  chunk_meta_.chunk_cnts[type] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;

  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage append segment failed, wsize=%zu", size);
  }

  chunk = get_chunk(type, seq_id);
  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);
}

Chunk::Pointer ChunkBroker::get_chunk(int type, uint64_t seq_id) const {
  std::string segment_id = make_segment_id(type, seq_id);
  return stg_->get(segment_id);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_chunk.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <atomic>
#include <cstddef>
#include <mutex>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

using Chunk = IndexStorage::Segment;

class ChunkBroker {
 public:
  typedef std::shared_ptr<ChunkBroker> Pointer;

  enum CHUNK_TYPE {
    CHUNK_TYPE_HEADER = 1,
    CHUNK_TYPE_META = 2,
    CHUNK_TYPE_NODE = 3,
    CHUNK_TYPE_UPPER_NEIGHBOR = 4,
    CHUNK_TYPE_NEIGHBOR_INDEX = 5,
    CHUNK_TYPE_SPARSE_NODE = 6,
    CHUNK_TYPE_MAX = 8
  };
  static constexpr size_t kDefaultChunkSeqId = 0UL;

  ChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {}

  //! Open storage
  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,
           bool check_crc);

  int close(void);

  int flush(uint64_t checkpoint);

  //! alloc a new chunk with size, not thread-safe
  std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id,
                                             size_t size);

  //! alloc a new chunk with chunk size
  inline std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id) {
    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);
  }

  Chunk::Pointer get_chunk(int type, uint64_t seq_id) const;

  inline size_t get_chunk_cnt(int type) const {
    ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");
    return chunk_meta_.chunk_cnts[type];
  }

  inline bool dirty(void) const {
    return dirty_;
  }

  inline void mark_dirty(void) {
    if (!dirty_) {
      dirty_ = true;
      chunk_meta_.revision_id += 1;
      stats_.set_revision_id(chunk_meta_.revision_id);
    }
  }

  const IndexStorage::Pointer storage(void) const {
    return stg_;
  }

 private:
  ChunkBroker(const ChunkBroker &) = delete;
  ChunkBroker &operator=(const ChunkBroker &) = delete;

  struct HnswChunkMeta {
    HnswChunkMeta(void) {
      memset(this, 0, sizeof(HnswChunkMeta));
    }
    void clear() {
      memset(this, 0, sizeof(HnswChunkMeta));
    }

    uint64_t chunk_cnts[CHUNK_TYPE_MAX];
    uint64_t chunk_size;   // size of per chunk
    uint64_t total_size;   // total size of allocated chunk
    uint64_t revision_id;  // index revision
    uint64_t create_time;
    uint64_t update_time;
    uint64_t reserved[3];
  };

  static_assert(sizeof(HnswChunkMeta) % 32 == 0,
                "HnswChunkMeta must be aligned with 32 bytes");

  //! Init the storage after open an empty index
  int init_storage(size_t chunk_size);

  //! Load index from storage
  int load_storage(size_t chunk_size);

  static inline const std::string make_segment_id(int type, uint64_t seq_id) {
    return "HnswT" + ailego::StringHelper::ToString(type) + "S" +
           ailego::StringHelper::ToString(seq_id);
  }

 private:
  IndexStreamer::Stats &stats_;
  HnswChunkMeta chunk_meta_{};
  size_t page_mask_{0UL};
  size_t max_chunks_size_{0UL};
  IndexStorage::Pointer stg_{};
  IndexStorage::Segment::Pointer chunk_meta_segment_{};
  bool check_crc_{false};
  bool dirty_{false};  // set as true if index is modified , the flag
                       // will not be cleared even if flushed
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_context.h"
#include <chrono>
#include "hnsw_params.h"

namespace zvec {
namespace core {

HnswContext::HnswContext(size_t dimension, const IndexMetric::Pointer &metric,
                         const HnswEntity::Pointer &entity)
    : IndexContext(metric),
      entity_(entity),
      dc_(entity_.get(), metric, dimension) {}

HnswContext::HnswContext(const IndexMetric::Pointer &metric,
                         const HnswEntity::Pointer &entity)
    : IndexContext(metric), entity_(entity), dc_(entity_.get(), metric) {}

HnswContext::~HnswContext() {
  visit_filter_.destroy();
}

int HnswContext::init(ContextType type) {
  int ret;
  uint32_t doc_cnt;

  type_ = type;

  switch (type) {
    case kBuilderContext:
      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      break;

    case kSearcherContext:
      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,
                               negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      break;

    case kStreamerContext:
      // maxScanNum is unknown if inited from streamer, so the docCnt may
      // change. we need to compute maxScanNum by scan ratio, and preserve
      // max_doc_cnt space from visit filter
      doc_cnt = entity_->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }

      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);

      check_need_adjuct_ctx();
      break;

    default:
      LOG_ERROR("Init context failed");
      return IndexError_Runtime;
  }

  return 0;
}

int HnswContext::update(const ailego::Params &params) {
  auto update_visit_filter_param = [&]() {
    bool need_update = false;
    std::string p;
    switch (type_) {
      case kSearcherContext:
        p = PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE;
        break;
      case kStreamerContext:
        p = PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE;
        break;
    }

    if (params.has(p)) {
      bool bf_enabled;
      params.get(p, &bf_enabled);
      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {
        need_update = true;
        filter_mode_ =
            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
      }
    }

    float prob = negative_probability_;
    p.clear();
    switch (type_) {
      case kSearcherContext:
        p = PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
      case kStreamerContext:
        p = PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
    }
    params.get(p, &prob);
    if (filter_mode_ == VisitFilter::BloomFilter &&
        std::abs(prob - negative_probability_) > 1e-6) {
      need_update = true;
    }
    if (need_update) {
      visit_filter_.destroy();
      int max_doc_cnt = 0;
      if (type_ == kSearcherContext) {
        max_doc_cnt = entity_->doc_cnt();
      } else {
        max_doc_cnt = reserve_max_doc_cnt_;
      }
      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,
                                   negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
    }
    return 0;
  };

  switch (type_) {
    case kSearcherContext:
      if (params.has(PARAM_HNSW_SEARCHER_EF)) {
        params.get(PARAM_HNSW_SEARCHER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }

      if (params.has(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO)) {
        params.get(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
        max_scan_num_ =
            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());
        max_scan_num_ = std::max(10000U, max_scan_num_);
      }

      if (params.has(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    case kStreamerContext:
      if (params.has(PARAM_HNSW_STREAMER_EF)) {
        params.get(PARAM_HNSW_STREAMER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }
      params.get(PARAM_HNSW_STREAMER_EF, &ef_);
      params.get(PARAM_HNSW_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
      params.get(PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
      params.get(PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
        LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
                  PARAM_HNSW_STREAMER_MAX_SCAN_RATIO.c_str());
        return IndexError_InvalidArgument;
      }
      if (max_scan_limit_ < min_scan_limit_) {
        LOG_ERROR("[%s] must be >= [%s]",
                  PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT.c_str(),
                  PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT.c_str());
        return IndexError_InvalidArgument;
      }

      if (params.has(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    default:
      LOG_ERROR("update context failed, type=%u", type_);
      return IndexError_Runtime;
  }
}

int HnswContext::update_context(ContextType type, const IndexMeta &meta,
                                const IndexMetric::Pointer &metric,
                                const HnswEntity::Pointer &entity,
                                uint32_t magic_num) {
  uint32_t doc_cnt;

  if (ailego_unlikely(type != type_)) {
    LOG_ERROR(
        "HnswContext doesn't support shared by different type, "
        "src=%u dst=%u",
        type_, type);
    return IndexError_Unsupported;
  }

  magic_ = kInvalidMgic;

  // TODO: support change filter mode?
  switch (type) {
    case kBuilderContext:
      LOG_ERROR("BuildContext doesn't support update");
      return IndexError_NotImplemented;

    case kSearcherContext:
      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    case kStreamerContext:
      doc_cnt = entity->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      update_heap_.limit(entity->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    default:
      LOG_ERROR("update context failed");
      return IndexError_Runtime;
  }

  entity_ = entity;
  dc_.update(entity_.get(), metric, meta.dimension());
  magic_ = magic_num;
  level_topks_.clear();

  return 0;
}

void HnswContext::fill_random_to_topk_full(void) {
  static std::mt19937 mt(
      std::chrono::system_clock::now().time_since_epoch().count());
  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);
  std::function<node_id_t()> gen;
  node_id_t seqid;
  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };
  if (this->filter().is_valid()) {
    myfilter = [&](node_id_t id) {
      return this->filter()(entity_->get_key(id));
    };
  }

  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {
    gen = [&](void) { return dt(mt); };
  } else {
    // If topk limit is big value, gen sequential id from an random initial
    seqid = dt(mt);
    gen = [&](void) {
      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);
      return seqid;
    };
  }

  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {
    const auto id = gen();
    if (!visit_filter_.visited(id) && !myfilter(id)) {
      visit_filter_.set_visited(id);
      topk_heap_.emplace(id, dc_.dist(id));
    }
  }
  return;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/hnsw_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_context.h>
#include "utility/sparse_utility.h"
#include "utility/visit_filter.h"
#include "hnsw_dist_calculator.h"
#include "hnsw_entity.h"

namespace zvec {
namespace core {

class HnswContext : public IndexContext {
 public:
  //! Index Context Pointer
  typedef std::unique_ptr<HnswContext> Pointer;

  enum ContextType {
    kUnknownContext = 0,
    kSearcherContext = 1,
    kBuilderContext = 2,
    kStreamerContext = 3
  };

  //! Construct
  HnswContext(size_t dimension, const IndexMetric::Pointer &metric,
              const HnswEntity::Pointer &entity);

  //! Construct
  HnswContext(const IndexMetric::Pointer &metric,
              const HnswEntity::Pointer &entity);

  //! Destructor
  virtual ~HnswContext();

 public:
  //! Set topk of search result
  virtual void set_topk(uint32_t val) override {
    topk_ = val;
    topk_heap_.limit(std::max(val, ef_));
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(void) const override {
    return results_[0];
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(size_t idx) const override {
    return results_[idx];
  }

  //! Retrieve result object for output
  virtual IndexDocumentList *mutable_result(size_t idx) override {
    ailego_assert_with(idx < results_.size(), "invalid idx");
    return &results_[idx];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(
      size_t idx) const override {
    return group_results_[idx];
  }

  virtual uint32_t magic(void) const override {
    return magic_;
  }

  //! Set mode of debug
  virtual void set_debug_mode(bool enable) override {
    debug_mode_ = enable;
  }

  //! Retrieve mode of debug
  virtual bool debug_mode(void) const override {
    return this->debugging();
  }

  //! Retrieve string of debug
  virtual std::string debug_string(void) const override {
    char buf[4096];
    size_t size = snprintf(
        buf, sizeof(buf),
        "scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,
        stats_visit_dup_cnt_);
    return std::string(buf, size);
  }

  //! Update the parameters of context
  virtual int update(const ailego::Params &params) override;

 public:
  //! Init context
  int init(ContextType type);

  //! Update context, the context may be shared by different searcher/streamer
  int update_context(ContextType type, const IndexMeta &meta,
                     const IndexMetric::Pointer &metric,
                     const HnswEntity::Pointer &entity, uint32_t magic_num);

  inline const HnswEntity &get_entity() const {
    return *entity_;
  }

  inline void resize_results(size_t size) {
    if (group_by_search()) {
      group_results_.resize(size);
    } else {
      results_.resize(size);
    }
  }

  inline void topk_to_result() {
    return topk_to_result(0);
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_result(uint32_t idx) {
    if (group_by_search()) {
      topk_to_group_result(idx);
    } else {
      topk_to_single_result(idx);
    }
  }

  inline void recal_topk_dist() {
    TopkHeap heap(topk_heap_);
    topk_heap_.clear();

    for (size_t i = 0; i < heap.size(); ++i) {
      node_id_t id = heap[i].first;
      dist_t dist = dc_.dist(id);
      topk_heap_.emplace_back(id, dist);
    }
  }

  inline void topk_to_single_result(uint32_t idx) {
    if (force_padding_topk_ && !topk_heap_.full() &&
        topk_heap_.size() < entity_->doc_cnt()) {
      this->fill_random_to_topk_full();
    }
    if (ailego_unlikely(topk_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));
    topk_heap_.sort();
    results_[idx].clear();

    for (int i = 0; i < size; ++i) {
      auto score = topk_heap_[i].second;
      if (score > this->threshold()) {
        break;
      }

      node_id_t id = topk_heap_[i].first;
      if (fetch_vector_) {
        results_[idx].emplace_back(entity_->get_key(id), score, id,
                                   entity_->get_vector(id));
      } else {
        results_[idx].emplace_back(entity_->get_key(id), score, id);
      }
    }

    return;
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_group_result(uint32_t idx) {
    ailego_assert_with(idx < group_results_.size(), "invalid idx");

    group_results_[idx].clear();

    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;
    std::vector<std::pair<std::string, float>> best_score_in_groups;
    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();
         itr++) {
      const std::string &group_id = (*itr).first;
      auto &heap = (*itr).second;
      heap.sort();

      if (heap.size() > 0) {
        float best_score = heap[0].second;
        best_score_in_groups.push_back(std::make_pair(group_id, best_score));
      }
    }

    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
              [](const std::pair<std::string, float> &a,
                 const std::pair<std::string, float> &b) -> int {
                return a.second < b.second;
              });

    // truncate to group num
    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();
         ++i) {
      const std::string &group_id = best_score_in_groups[i].first;

      group_topk_list.emplace_back(
          std::make_pair(group_id, group_topk_heaps_[group_id]));
    }

    group_results_[idx].resize(group_topk_list.size());

    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {
      const std::string &group_id = group_topk_list[i].first;
      group_results_[idx][i].set_group_id(group_id);

      uint32_t size = std::min(
          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));

      for (uint32_t j = 0; j < size; ++j) {
        auto score = group_topk_list[i].second[j].second;
        if (score > this->threshold()) {
          break;
        }

        node_id_t id = group_topk_list[i].second[j].first;

        if (fetch_vector_) {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score, id, entity_->get_vector(id));
        } else {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score, id);
        }
      }
    }
  }

  inline void reset_query(const void *query) {
    if (auto query_preprocess_func = index_metric_->get_query_preprocess_func();
        query_preprocess_func != nullptr) {
      size_t dim = dc_.dimension();
      preprocess_buffer_.resize(dim);
      memcpy(preprocess_buffer_.data(), query, dim);
      query_preprocess_func(preprocess_buffer_.data(), dim);
      query = preprocess_buffer_.data();
    }

    dc_.reset_query(query);
    dc_.clear_compare_cnt();
  }

  inline HnswDistCalculator &dist_calculator() {
    return dc_;
  }

  inline TopkHeap &topk_heap() {
    return topk_heap_;
  }

  inline TopkHeap &update_heap() {
    return update_heap_;
  }

  inline VisitFilter &visit_filter() {
    return visit_filter_;
  }

  inline CandidateHeap &candidates() {
    return candidates_;
  }

  inline void set_max_scan_num(uint32_t max_scan_num) {
    max_scan_num_ = max_scan_num;
  }

  inline void set_max_scan_limit(uint32_t max_scan_limit) {
    max_scan_limit_ = max_scan_limit;
  }

  inline void set_min_scan_limit(uint32_t min_scan_limit) {
    min_scan_limit_ = min_scan_limit;
  }

  inline void set_ef(uint32_t v) {
    ef_ = v;
  }

  inline void set_filter_mode(uint32_t v) {
    filter_mode_ = v;
  }

  inline void set_filter_negative_probability(float v) {
    negative_probability_ = v;
  }

  inline void set_max_scan_ratio(float v) {
    max_scan_ratio_ = v;
  }

  virtual void set_magic(uint32_t v) {
    magic_ = v;
  }

  virtual void set_force_padding_topk(bool v) {
    force_padding_topk_ = v;
  }

  void set_bruteforce_threshold(uint32_t v) override {
    bruteforce_threshold_ = v;
  }

  inline uint32_t get_bruteforce_threshold() const {
    return bruteforce_threshold_;
  }

  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  bool fetch_vector() const override {
    return fetch_vector_;
  }

  //! Reset context
  void reset(void) override {
    this->clear();
    set_filter(nullptr);
    reset_threshold();
    set_fetch_vector(false);
    set_group_params(0, 0);
    reset_group_by();
  }

  inline std::map<std::string, TopkHeap> &group_topk_heaps() {
    return group_topk_heaps_;
  }

  inline TopkHeap &level_topk(int level) {
    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {
      int cur_level = level_topks_.size();
      level_topks_.resize(level + 1);
      for (; cur_level <= level; ++cur_level) {
        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),
                                    entity_->ef_construction());
        level_topks_[cur_level].clear();
        level_topks_[cur_level].limit(heap_size);
      }
    }

    return level_topks_[level];
  }

  inline void check_need_adjuct_ctx(void) {
    check_need_adjuct_ctx(entity_->doc_cnt());
  }

  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {
    if (cur_doc > kMaxReserveDocCnt) {
      return kMaxReserveDocCnt;
    } else if (cur_doc < kMinReserveDocCnt) {
      return kMinReserveDocCnt;
    }
    return cur_doc;
  }

  //! candidates heap and visitfilter need to resize as doc cnt growing up
  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {
    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {
      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {
        reserve_max_doc_cnt_ =
            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);
      }
      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);
      max_scan_num_ = max_scan_cnt;
      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);
      candidates_.clear();
      candidates_.limit(max_scan_num_);
    }
  }

  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {
    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;
    if (max_scan < min_scan_limit_) {
      max_scan = min_scan_limit_;
    } else if (max_scan > max_scan_limit_) {
      max_scan = max_scan_limit_;
    }
    return max_scan;
  }

  inline size_t get_scan_num() const {
    return dc_.compare_cnt();
  }

  inline uint64_t reach_scan_limit() const {
    return dc_.compare_cnt() >= max_scan_num_;
  }

  inline bool error() const {
    return dc_.error();
  }

  inline void clear() {
    dc_.clear();
    if (ailego_unlikely(this->debugging())) {
      stats_get_neighbors_cnt_ = 0u;
      stats_get_vector_cnt_ = 0u;
      stats_visit_dup_cnt_ = 0u;
    }
    // do not clear results_ for the next query will need it
    for (auto &it : results_) {
      it.clear();
    }
    for (auto &it : group_results_) {
      it.clear();
    }
  }

  uint32_t *mutable_stats_get_neighbors() {
    return &stats_get_neighbors_cnt_;
  }

  uint32_t *mutable_stats_get_vector() {
    return &stats_get_vector_cnt_;
  }

  uint32_t *mutable_stats_visit_dup_cnt() {
    return &stats_visit_dup_cnt_;
  }

  inline bool debugging(void) const {
    return debug_mode_;
  }

  inline void update_dist_caculator_distance(
      const IndexMetric::MatrixDistance &distance,
      const IndexMetric::MatrixBatchDistance &batch_distance) {
    dc_.update_distance(distance, batch_distance);
  }

  //! Get topk
  inline uint32_t topk() const override {
    return topk_;
  }

  //! Get group topk
  inline uint32_t group_topk() const {
    return group_topk_;
  }

  //! Get group num
  inline uint32_t group_num() const {
    return group_num_;
  }

  //! Get if group by search
  inline bool group_by_search() {
    return group_num_ > 0;
  }

  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;

    topk_ = group_topk_ * group_num_;

    topk_heap_.limit(std::max(topk_, ef_));

    group_topk_heaps_.clear();
  }

 private:
  // Filling random nodes if topk not full
  void fill_random_to_topk_full(void);

  constexpr static uint32_t kTriggerReserveCnt = 4096UL;
  constexpr static uint32_t kMinReserveDocCnt = 4096UL;
  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;
  constexpr static uint32_t kInvalidMgic = -1U;

 private:
  HnswEntity::Pointer entity_;
  HnswDistCalculator dc_;
  IndexMetric::Pointer metric_;

  bool debug_mode_{false};
  bool force_padding_topk_{false};
  uint32_t max_scan_num_{0};
  uint32_t max_scan_limit_{0};
  uint32_t min_scan_limit_{0};
  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};
  uint32_t topk_{0};
  uint32_t group_topk_{0};
  uint32_t filter_mode_{VisitFilter::ByteMap};
  float negative_probability_{HnswEntity::kDefaultBFNegativeProbability};
  uint32_t ef_{HnswEntity::kDefaultEf};
  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};
  uint32_t magic_{0U};
  std::vector<IndexDocumentList> results_{};
  std::vector<IndexGroupDocumentList> group_results_{};
  TopkHeap topk_heap_{};
  TopkHeap update_heap_{};
  std::vector<TopkHeap> level_topks_{};
  CandidateHeap candidates_{};
  VisitFilter visit_filter_{};
  uint32_t bruteforce_threshold_{};
  bool fetch_vector_{false};

  uint32_t group_num_{0};
  std::map<std::string, TopkHeap> group_topk_heaps_{};

  uint32_t type_{kUnknownContext};
  //! debug stats info
  uint32_t stats_get_neighbors_cnt_{0u};
  uint32_t stats_get_vector_cnt_{0u};
  uint32_t stats_visit_dup_cnt_{0u};
  std::string preprocess_buffer_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_dist_calculator.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_meta.h>
#include "hnsw_entity.h"

namespace zvec {
namespace core {

class HnswDistCalculator {
 public:
  typedef std::shared_ptr<HnswDistCalculator> Pointer;

 public:
  enum DistType {
    DIST_NONE = 0,
    DIST_DENSE = 1,
    DIST_HYBRID = 2,
    DIST_SPARSE = 3
  };

 public:
  //! Constructor
  HnswDistCalculator(const HnswEntity *entity,
                     const IndexMetric::Pointer &metric, uint32_t dim)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(nullptr),
        dim_(dim),
        compare_cnt_(0) {}

  //! Constructor
  HnswDistCalculator(const HnswEntity *entity,
                     const IndexMetric::Pointer &metric, uint32_t dim,
                     const void *query)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(query),
        dim_(dim),
        compare_cnt_(0) {}

  //! Constructor
  HnswDistCalculator(const HnswEntity *entity,
                     const IndexMetric::Pointer &metric)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(nullptr),
        dim_(0),
        compare_cnt_(0) {}

  void update(const HnswEntity *entity, const IndexMetric::Pointer &metric) {
    entity_ = entity;
    distance_ = metric->distance();
    batch_distance_ = metric->batch_distance();
  }

  void update(const HnswEntity *entity, const IndexMetric::Pointer &metric,
              uint32_t dim) {
    entity_ = entity;
    distance_ = metric->distance();
    batch_distance_ = metric->batch_distance();
    dim_ = dim;
  }

  inline void update_distance(
      const IndexMetric::MatrixDistance &distance,
      const IndexMetric::MatrixBatchDistance &batch_distance) {
    distance_ = distance;
    batch_distance_ = batch_distance;
  }

  //! Reset query vector data
  inline void reset_query(const void *query) {
    error_ = false;
    query_ = query;
  }

  //! Returns distance
  inline dist_t dist(const void *vec_lhs, const void *vec_rhs) {
    if (ailego_unlikely(vec_lhs == nullptr || vec_rhs == nullptr)) {
      LOG_ERROR("Nullptr of dense vector");
      error_ = true;
      return 0.0f;
    }

    float score{0.0f};

    distance_(vec_lhs, vec_rhs, dim_, &score);

    return score;
  }

  //! Returns distance between query and vec.
  inline dist_t dist(const void *vec) {
    compare_cnt_++;

    return dist(vec, query_);
  }

  //! Return distance between query and node id.
  inline dist_t dist(node_id_t id) {
    compare_cnt_++;

    const void *feat = entity_->get_vector(id);
    if (ailego_unlikely(feat == nullptr)) {
      LOG_ERROR("Get nullptr vector, id=%u", id);
      error_ = true;
      return 0.0f;
    }

    return dist(feat, query_);
  }

  //! Return dist node lhs between node rhs
  inline dist_t dist(node_id_t lhs, node_id_t rhs) {
    compare_cnt_++;

    const void *feat = entity_->get_vector(lhs);
    const void *query = entity_->get_vector(rhs);
    if (ailego_unlikely(feat == nullptr || query == nullptr)) {
      LOG_ERROR("Get nullptr vector");
      error_ = true;
      return 0.0f;
    }

    return dist(feat, query);
  }

  dist_t operator()(const void *vec) {
    return dist(vec);
  }

  dist_t operator()(id_t i) {
    return dist(i);
  }

  dist_t operator()(id_t lhs, id_t rhs) {
    return dist(lhs, rhs);
  }

  void batch_dist(const void **vecs, size_t num, dist_t *distances) {
    compare_cnt_++;

    batch_distance_(vecs, query_, num, dim_, distances);
  }

  inline dist_t batch_dist(node_id_t id) {
    compare_cnt_++;

    const void *feat = entity_->get_vector(id);
    if (ailego_unlikely(feat == nullptr)) {
      LOG_ERROR("Get nullptr vector, id=%u", id);
      error_ = true;
      return 0.0f;
    }
    dist_t score = 0;
    batch_distance_(&feat, query_, 1, dim_, &score);

    return score;
  }

  inline void clear() {
    compare_cnt_ = 0;
    error_ = false;
  }

  inline void clear_compare_cnt() {
    compare_cnt_ = 0;
  }

  inline bool error() const {
    return error_;
  }

  //! Get distances compute times
  inline uint32_t compare_cnt() const {
    return compare_cnt_;
  }

  inline uint32_t dimension() const {
    return dim_;
  }

 private:
  HnswDistCalculator(const HnswDistCalculator &) = delete;
  HnswDistCalculator &operator=(const HnswDistCalculator &) = delete;

 private:
  const HnswEntity *entity_;

  IndexMetric::MatrixDistance distance_;
  IndexMetric::MatrixBatchDistance batch_distance_;

  const void *query_;
  uint32_t dim_;

  uint32_t compare_cnt_;  // record distance compute times
  // uint32_t compare_cnt_batch_;  // record batch distance compute time
  bool error_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_entity.h"
#include <zvec/core/framework/index_stats.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

const std::string HnswEntity::kGraphHeaderSegmentId = "graph.header";
const std::string HnswEntity::kGraphFeaturesSegmentId = "graph.features";
const std::string HnswEntity::kGraphKeysSegmentId = "graph.keys";
const std::string HnswEntity::kGraphNeighborsSegmentId = "graph.neighbors";
const std::string HnswEntity::kGraphOffsetsSegmentId = "graph.offsets";
const std::string HnswEntity::kGraphMappingSegmentId = "graph.mapping";
const std::string HnswEntity::kHnswHeaderSegmentId = "hnsw.header";
const std::string HnswEntity::kHnswNeighborsSegmentId = "hnsw.neighbors";
const std::string HnswEntity::kHnswOffsetsSegmentId = "hnsw.offsets";

int HnswEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                                  size_t data_size, size_t *padding_size) {
  *padding_size = AlignSize(data_size) - data_size;
  if (*padding_size == 0) {
    return 0;
  }

  std::string padding(*padding_size, '\0');
  if (dumper->write(padding.data(), *padding_size) != *padding_size) {
    LOG_ERROR("Append padding failed, size %lu", *padding_size);
    return IndexError_WriteData;
  }
  return 0;
}

int64_t HnswEntity::dump_segment(const IndexDumper::Pointer &dumper,
                                 const std::string &segment_id,
                                 const void *data, size_t size) const {
  size_t len = dumper->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect: %lu, actual: %lu",
              segment_id.c_str(), size, len);
    return IndexError_WriteData;
  }

  size_t padding_size = AlignSize(size) - size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %lu", padding_size);
      return IndexError_WriteData;
    }
  }

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  int ret = dumper->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return ret;
  }

  return len + padding_size;
}

int64_t HnswEntity::dump_header(const IndexDumper::Pointer &dumper,
                                const HNSWHeader &hd) const {
  //! dump basic graph header. header is aligned and does not need padding
  int64_t graph_hd_size =
      dump_segment(dumper, kGraphHeaderSegmentId, &hd.graph, hd.graph.size);
  if (graph_hd_size < 0) {
    return graph_hd_size;
  }

  //! dump basic graph header. header is aligned and does not need padding
  int64_t hnsw_hd_size =
      dump_segment(dumper, kHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);
  if (hnsw_hd_size < 0) {
    return hnsw_hd_size;
  }

  return graph_hd_size + hnsw_hd_size;
}

void HnswEntity::reshuffle_vectors(
    const std::function<level_t(node_id_t)> & /*get_level*/,
    std::vector<node_id_t> * /*n2o_mapping*/,
    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {
  // TODO
  return;
}

int64_t HnswEntity::dump_mapping_segment(const IndexDumper::Pointer &dumper,
                                         const key_t *keys) const {
  std::vector<node_id_t> mapping(doc_cnt());

  std::iota(mapping.begin(), mapping.end(), 0U);
  std::sort(mapping.begin(), mapping.end(),
            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });

  size_t size = mapping.size() * sizeof(node_id_t);

  return dump_segment(dumper, kGraphMappingSegmentId, mapping.data(), size);
}

int64_t HnswEntity::dump_segments(
    const IndexDumper::Pointer &dumper, key_t *keys,
    const std::function<level_t(node_id_t)> &get_level) const {
  HNSWHeader dump_hd(header());

  dump_hd.graph.node_size = AlignSize(vector_size());

  std::vector<node_id_t> n2o_mapping;  // map new id to origin id
  std::vector<node_id_t> o2n_mapping;  // map origin id to new id
  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);
  if (!o2n_mapping.empty()) {
    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];
  }

  //! Dump header
  int64_t hd_size = dump_header(dumper, dump_hd);
  if (hd_size < 0) {
    return hd_size;
  }

  //! Dump vectors
  int64_t vecs_size = dump_vectors(dumper, n2o_mapping);
  if (vecs_size < 0) {
    return vecs_size;
  }

  //! Dump neighbors
  auto neighbors_size =
      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);
  if (neighbors_size < 0) {
    return neighbors_size;
  }
  //! free memory
  n2o_mapping = std::vector<node_id_t>();
  o2n_mapping = std::vector<node_id_t>();

  //! Dump keys
  size_t key_segment_size = doc_cnt() * sizeof(key_t);
  int64_t keys_size =
      dump_segment(dumper, kGraphKeysSegmentId, keys, key_segment_size);
  if (keys_size < 0) {
    return keys_size;
  }

  //! Dump mapping
  int64_t mapping_size = dump_mapping_segment(dumper, keys);
  if (mapping_size < 0) {
    return mapping_size;
  }

  return hd_size + keys_size + vecs_size + neighbors_size + mapping_size;
}

int64_t HnswEntity::dump_vectors(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping) const {
  size_t vector_dump_size = vector_size();

  size_t padding_size = AlignSize(vector_dump_size) - vector_dump_size;

  std::vector<char> padding(padding_size);
  memset(padding.data(), 0, sizeof(char) * padding_size);
  const void *data = nullptr;
  uint32_t crc = 0U;
  size_t vecs_size = 0UL;

  //! dump vectors
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    data = get_vector(reorder_mapping.empty() ? id : reorder_mapping[id]);
    if (ailego_unlikely(!data)) {
      return IndexError_ReadData;
    }
    size_t len = dumper->write(data, vector_size());
    if (len != vector_size()) {
      LOG_ERROR("Dump vectors failed, write=%zu expect=%zu", len,
                vector_size());
      return IndexError_WriteData;
    }

    crc = ailego::Crc32c::Hash(data, vector_size(), crc);
    vecs_size += vector_size();

    if (padding_size == 0) {
      continue;
    }

    len = dumper->write(padding.data(), padding_size);
    if (len != padding_size) {
      LOG_ERROR("Dump vectors failed, write=%zu expect=%zu", len, padding_size);
      return IndexError_WriteData;
    }
    crc = ailego::Crc32c::Hash(padding.data(), padding_size, crc);
    vecs_size += padding_size;
  }

  int ret = dumper->append(kGraphFeaturesSegmentId, vecs_size, 0UL, crc);
  if (ret != 0) {
    LOG_ERROR("Dump vectors segment meta failed, ret %d", ret);
    return ret;
  }

  return vecs_size;
}

int64_t HnswEntity::dump_graph_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<GraphNeighborMeta> graph_meta;
  graph_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  std::vector<node_id_t> mapping(l0_neighbor_cnt());

  uint32_t min_neighbor_count = 10000;
  uint32_t max_neighbor_count = 0;
  size_t sum_neighbor_count = 0;

  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    const Neighbors neighbors =
        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);
    ailego_assert_with(!!neighbors.data, "invalid neighbors");
    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),
                       "invalid neighbors");

    uint32_t neighbor_count = neighbors.size();
    if (neighbor_count < min_neighbor_count) {
      min_neighbor_count = neighbor_count;
    }
    if (neighbor_count > max_neighbor_count) {
      max_neighbor_count = neighbor_count;
    }
    sum_neighbor_count += neighbor_count;

    graph_meta.emplace_back(offset, neighbor_count);
    size_t size = neighbors.size() * sizeof(node_id_t);
    const node_id_t *data = &neighbors[0];
    if (!neighbor_mapping.empty()) {
      for (node_id_t i = 0; i < neighbors.size(); ++i) {
        mapping[i] = neighbor_mapping[neighbors[i]];
      }
      data = mapping.data();
    }
    if (dumper->write(data, size) != size) {
      LOG_ERROR("Dump graph neighbor id=%u failed, size %lu", id, size);
      return IndexError_WriteData;
    }
    crc = ailego::Crc32c::Hash(data, size, crc);
    offset += size;
  }

  uint32_t average_neighbor_count = 0;
  if (doc_cnt() > 0) {
    average_neighbor_count = sum_neighbor_count / doc_cnt();
  }
  LOG_INFO(
      "Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] "
      "average_neighbor_count[%u]",
      min_neighbor_count, max_neighbor_count, average_neighbor_count);

  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }
  ret = dumper->append(kGraphNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d",
              kGraphNeighborsSegmentId.c_str(), ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len = dump_segment(dumper, kGraphOffsetsSegmentId, graph_meta.data(),
                          graph_meta.size() * sizeof(GraphNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

int64_t HnswEntity::dump_upper_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::function<level_t(node_id_t)> &get_level,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<HnswNeighborMeta> hnsw_meta;
  hnsw_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  std::vector<node_id_t> buffer(upper_neighbor_cnt() + 1);
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];
    auto level = get_level(new_id);
    if (level == 0) {
      hnsw_meta.emplace_back(0U, 0U);
      continue;
    }
    hnsw_meta.emplace_back(offset, level);
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
      const Neighbors neighbors = get_neighbors(cur_level, new_id);
      ailego_assert_with(!!neighbors.data, "invalid neighbors");
      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),
                         "invalid neighbors");
      memset(buffer.data(), 0, sizeof(node_id_t) * buffer.size());
      buffer[0] = neighbors.size();
      if (neighbor_mapping.empty()) {
        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));
      } else {
        for (node_id_t i = 0; i < neighbors.size(); ++i) {
          buffer[i + 1] = neighbor_mapping[neighbors[i]];
        }
      }
      if (dumper->write(buffer.data(), sizeof(node_id_t) * buffer.size()) !=
          sizeof(node_id_t) * buffer.size()) {
        LOG_ERROR("Dump graph neighbor id=%u failed, size %lu", id,
                  sizeof(node_id_t) * buffer.size());
        return IndexError_WriteData;
      }
      crc = ailego::Crc32c::Hash(buffer.data(),
                                 sizeof(node_id_t) * buffer.size(), crc);
      offset += sizeof(node_id_t) * buffer.size();
    }
  }
  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }

  ret = dumper->append(kHnswNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d", kHnswNeighborsSegmentId.c_str(),
              ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len = dump_segment(dumper, kHnswOffsetsSegmentId, hnsw_meta.data(),
                          hnsw_meta.size() * sizeof(HnswNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string.h>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/container/heap.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_storage.h>

namespace zvec {
namespace core {

using node_id_t = uint32_t;
using key_t = uint64_t;
using level_t = int32_t;
using dist_t = float;
using TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;
using CandidateHeap =
    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;
constexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);
constexpr key_t kInvalidKey = static_cast<key_t>(-1);
class DistCalculator;

struct GraphHeader {
  uint32_t size;
  uint32_t version;
  uint32_t graph_type;
  uint32_t doc_count;
  uint32_t vector_size;
  uint32_t node_size;
  uint32_t l0_neighbor_count;
  uint32_t prune_type;
  uint32_t prune_neighbor_count;
  uint32_t ef_construction;
  uint32_t options;
  uint32_t min_neighbor_count;
  uint8_t reserved_[4080];
};

static_assert(sizeof(GraphHeader) % 32 == 0,
              "GraphHeader must be aligned with 32 bytes");

//! Hnsw upper neighbor header
struct HnswHeader {
  uint32_t size;      // header size
  uint32_t revision;  // current total docs of the graph
  uint32_t upper_neighbor_count;
  uint32_t ef_construction;
  uint32_t scaling_factor;
  uint32_t max_level;
  uint32_t entry_point;
  uint32_t options;
  uint8_t reserved_[30];
};

static_assert(sizeof(HnswHeader) % 32 == 0,
              "GraphHeader must be aligned with 32 bytes");

//! Hnsw common header and upper neighbor header
struct HNSWHeader {
  HNSWHeader() {
    clear();
  }

  HNSWHeader(const HNSWHeader &header) {
    memcpy(this, &header, sizeof(header));
  }

  HNSWHeader &operator=(const HNSWHeader &header) {
    memcpy(this, &header, sizeof(header));
    return *this;
  }

  //! Reset state to zero, and the params is untouched
  void inline reset() {
    graph.doc_count = 0U;
    hnsw.entry_point = kInvalidNodeId;
    hnsw.max_level = 0;
  }

  //! Clear all fields to init value
  void inline clear() {
    memset(this, 0, sizeof(HNSWHeader));
    hnsw.entry_point = kInvalidNodeId;
    graph.size = sizeof(GraphHeader);
    hnsw.size = sizeof(HnswHeader);
  }

  size_t l0_neighbor_cnt() const {
    return graph.l0_neighbor_count;
  }

  size_t upper_neighbor_cnt() const {
    return hnsw.upper_neighbor_count;
  }

  size_t vector_size() const {
    return graph.vector_size;
  }

  size_t ef_construction() const {
    return graph.ef_construction;
  }

  size_t scaling_factor() const {
    return hnsw.scaling_factor;
  }

  size_t neighbor_prune_cnt() const {
    return graph.prune_neighbor_count;
  }

  node_id_t entry_point() const {
    return hnsw.entry_point;
  }

  node_id_t doc_cnt() const {
    return graph.doc_count;
  }

  GraphHeader graph;
  HnswHeader hnsw;
};

struct NeighborsHeader {
  uint32_t neighbor_cnt;
  node_id_t neighbors[0];
};

struct Neighbors {
  Neighbors() : cnt{0}, data{nullptr} {}

  Neighbors(uint32_t cnt_in, const node_id_t *data_in)
      : cnt{cnt_in}, data{data_in} {}

  Neighbors(const IndexStorage::MemoryBlock &mem_block)
      : neighbor_block{mem_block} {
    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());
    cnt = hd->neighbor_cnt;
    data = hd->neighbors;
  }

  size_t size(void) const {
    return cnt;
  }

  const node_id_t &operator[](size_t idx) const {
    return data[idx];
  }

  uint32_t cnt;
  const node_id_t *data;
  IndexStorage::MemoryBlock neighbor_block;
};

//! level 0 neighbors offset
struct GraphNeighborMeta {
  GraphNeighborMeta(size_t o, size_t cnt) : offset(o), neighbor_cnt(cnt) {}

  uint64_t offset : 48;
  uint64_t neighbor_cnt : 16;
};

//! hnsw upper neighbors meta
struct HnswNeighborMeta {
  HnswNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}

  uint64_t offset : 48;  // offset = idx * upper neighors size
  uint64_t level : 16;
};

class HnswEntity {
 public:
  //! Constructor
  HnswEntity() {}

  //! Constructor
  HnswEntity(const HNSWHeader &hd) {
    header_ = hd;
  }

  //! Destructor
  virtual ~HnswEntity() {}

  //! HnswEntity Pointerd;
  typedef std::shared_ptr<HnswEntity> Pointer;

  //! Get max neighbor size of graph level
  inline size_t neighbor_cnt(level_t level) const {
    return level == 0 ? header_.graph.l0_neighbor_count
                      : header_.hnsw.upper_neighbor_count;
  }

  //! get max neighbor size of graph level 0
  inline size_t l0_neighbor_cnt() const {
    return header_.graph.l0_neighbor_count;
  }

  //! get min neighbor size of graph
  inline size_t min_neighbor_cnt() const {
    return header_.graph.min_neighbor_count;
  }

  //! get upper neighbor size of graph level other than 0
  inline size_t upper_neighbor_cnt() const {
    return header_.hnsw.upper_neighbor_count;
  }

  //! Get current total doc of the hnsw graph
  inline node_id_t *mutable_doc_cnt() {
    return &header_.graph.doc_count;
  }

  inline node_id_t doc_cnt() const {
    return header_.graph.doc_count;
  }

  //! Get hnsw graph scaling params
  inline size_t scaling_factor() const {
    return header_.hnsw.scaling_factor;
  }

  //! Get prune_size
  inline size_t prune_cnt() const {
    return header_.graph.prune_neighbor_count;
  }

  //! Current entity of top level graph
  inline node_id_t entry_point() const {
    return header_.hnsw.entry_point;
  }

  //! Current max graph level
  inline level_t cur_max_level() const {
    return header_.hnsw.max_level;
  }

  //! Retrieve index vector size
  size_t vector_size() const {
    return header_.graph.vector_size;
  }

  //! Retrieve node size
  size_t node_size() const {
    return header_.graph.node_size;
  }

  //! Retrieve ef constuction
  size_t ef_construction() const {
    return header_.graph.ef_construction;
  }

  void set_vector_size(size_t size) {
    header_.graph.vector_size = size;
  }

  void set_prune_cnt(size_t v) {
    header_.graph.prune_neighbor_count = v;
  }

  void set_scaling_factor(size_t val) {
    header_.hnsw.scaling_factor = val;
  }

  void set_l0_neighbor_cnt(size_t cnt) {
    header_.graph.l0_neighbor_count = cnt;
  }

  void set_min_neighbor_cnt(size_t cnt) {
    header_.graph.min_neighbor_count = cnt;
  }

  void set_upper_neighbor_cnt(size_t cnt) {
    header_.hnsw.upper_neighbor_count = cnt;
  }

  void set_ef_construction(size_t ef) {
    header_.graph.ef_construction = ef;
  }

 protected:
  inline const HNSWHeader &header() const {
    return header_;
  }

  inline HNSWHeader *mutable_header() {
    return &header_;
  }

  inline size_t header_size() const {
    return sizeof(header_);
  }

  void set_node_size(size_t size) {
    header_.graph.node_size = size;
  }

  //! Dump all segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segments(
      const IndexDumper::Pointer &dumper, key_t *keys,
      const std::function<level_t(node_id_t)> &get_level) const;

 private:
  //! dump mapping segment, for get_vector_by_key in provider
  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,
                               const key_t *keys) const;

  //! dump hnsw head by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_header(const IndexDumper::Pointer &dumper,
                      const HNSWHeader &hd) const;

  //! dump vectors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_vectors(const IndexDumper::Pointer &dumper,
                       const std::vector<node_id_t> &reorder_mapping) const;

  //! dump hnsw neighbors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,
                         const std::function<level_t(node_id_t)> &get_level,
                         const std::vector<node_id_t> &reorder_mapping,
                         const std::vector<node_id_t> &neighbor_mapping) const {
    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);
    if (len1 < 0) {
      return len1;
    }
    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,
                                     neighbor_mapping);
    if (len2 < 0) {
      return len2;
    }

    return len1 + len2;
  }

  //! dump segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segment(const IndexDumper::Pointer &dumper,
                       const std::string &segment_id, const void *data,
                       size_t size) const;

  //! Dump level 0 neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_graph_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

  //! Dump upper level neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_upper_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::function<level_t(node_id_t)> &get_level,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

 public:
  //! Cleanup the entity
  virtual int cleanup(void) {
    header_.clear();
    return 0;
  }

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswEntity::Pointer clone() const {
    LOG_ERROR("Update neighbors not implemented");
    return HnswEntity::Pointer();
  }

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const = 0;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const = 0;

  //! Get vectors feature data by keys
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const = 0;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const = 0;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const = 0;

  //! Retrieve a vector using a primary key
  virtual const void *get_vector_by_key(uint64_t /*key*/) const {
    LOG_ERROR("get vector not implemented");
    return nullptr;
  }

  virtual int get_vector_by_key(const key_t /*key*/,
                                IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;

  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t /*level*/, key_t /*key*/, const void * /*vec*/,
                         node_id_t * /*id*/) {
    return IndexError_NotImplemented;
  }

  //! Add vector and id to hnsw entity
  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,
                                 const void * /*vec*/) {
    return IndexError_NotImplemented;
  }

  virtual int update_neighbors(
      level_t /*level*/, node_id_t /*id*/,
      const std::vector<std::pair<node_id_t, dist_t>> & /*neighbors*/) {
    LOG_ERROR("Update neighbors dense not implemented");

    return 0;
  }

  //! Append neighbor_id to node id neighbors on level, size is the current
  //! neighbors size. Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,
                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {
    LOG_ERROR("Add neighbor not implemented");
  }

  //! Update entry point and max level
  virtual void update_ep_and_level(node_id_t ep, level_t level) {
    header_.hnsw.entry_point = ep;
    header_.hnsw.max_level = level;
  }

  virtual int load(const IndexStorage::Pointer & /*container*/,
                   bool /*check_crc*/) {
    LOG_ERROR("Load not implemented");
    return IndexError_NotImplemented;
  }

  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {
    LOG_ERROR("Dump not implemented");
    return IndexError_NotImplemented;
  }

  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                               size_t data_size, size_t *padding_size);

 protected:
  static inline size_t AlignSize(size_t size) {
    return (size + 0x1F) & (~0x1F);
  }

  static inline size_t AlignPageSize(size_t size) {
    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;
    return (size + page_mask) & (~page_mask);
  }

  static inline size_t AlignHugePageSize(size_t size) {
    size_t page_mask = ailego::MemoryHelper::HugePageSize() - 1;
    return (size + page_mask) & (~page_mask);
  }

  //! rearrange vectors to improve cache locality
  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,
                         std::vector<node_id_t> *n2o_mapping,
                         std::vector<node_id_t> *o2n_mapping,
                         key_t *keys) const;

 public:
  const static std::string kGraphHeaderSegmentId;
  const static std::string kGraphFeaturesSegmentId;
  const static std::string kGraphKeysSegmentId;
  const static std::string kGraphNeighborsSegmentId;
  const static std::string kGraphOffsetsSegmentId;
  const static std::string kGraphMappingSegmentId;
  const static std::string kHnswHeaderSegmentId;
  const static std::string kHnswNeighborsSegmentId;
  const static std::string kHnswOffsetsSegmentId;

  constexpr static uint32_t kRevision = 0U;
  constexpr static size_t kMaxGraphLayers = 15;
  constexpr static uint32_t kDefaultEfConstruction = 500;
  constexpr static uint32_t kDefaultEf = 500;
  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW
  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;
  constexpr static uint32_t kMaxNeighborCnt = 65535;
  constexpr static float kDefaultScanRatio = 0.1f;
  constexpr static uint32_t kDefaultMinScanLimit = 10000;
  constexpr static uint32_t kDefaultMaxScanLimit =
      std::numeric_limits<uint32_t>::max();
  constexpr static float kDefaultBFNegativeProbability = 0.001f;
  constexpr static uint32_t kDefaultScalingFactor = 50U;
  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;
  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion
  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;
  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;
  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;
  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;
  constexpr static float kDefaultNeighborPruneMultiplier =
      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier
  constexpr static float kDefaultL0MaxNeighborCntMultiplier =
      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier

 protected:
  HNSWHeader header_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_index_hash.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_chunk.h"

namespace zvec {
namespace core {

//! Persistent hashmap implement through open addressing algorithm
template <class Key, class Val, Val EmptyVal = 0U,
          typename =
              typename std::enable_if<std::is_integral<Key>::value>::type>
class HnswIndexHashMap {
  using key_type = Key;
  using val_type = Val;

  struct Iterator {
    key_type first;
    val_type second;
  };
  typedef Iterator *iterator;
  typedef Iterator Item;
  typedef const Iterator *const_iterator;

  class Slot {
   public:
    Slot(Chunk::Pointer &&chunk, const void *data)
        : chunk_(std::move(chunk)),
          items_(reinterpret_cast<const Item *>(data)) {}
    //! Return a empty loc or the key item loc

    Slot(Chunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)
        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {
      items_ = reinterpret_cast<const Item *>(items_block_.data());
    }
    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {
      auto it = &items_[key & mask];
      for (auto i = 0U; i < max_items; ++i) {
        if (it->first == key || it->second == EmptyVal) {
          // LOG_DEBUG("i=%u", i);
          return it;
        }
        ++it;
        if (it == &items_[max_items]) {
          it = &items_[0];
        }
      }
      return nullptr;
    }

    bool update(const_iterator it) {
      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -
                        reinterpret_cast<const uint8_t *>(&items_[0]);
      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=
                          sizeof(Item))) {
        LOG_ERROR("Chunk write failed");
        return false;
      }
      return true;
    }

   private:
    Chunk::Pointer chunk_{};
    const Item *items_{nullptr};  // point to chunk data
    IndexStorage::MemoryBlock items_block_{};
  };

 public:
  //! Init the hash
  //! broker      the index allocator
  //! chunk_size  the size of per chunk allocated, actual size may greater
  //! factor      factor = 1/ratio, ratio is the probability of a squence
  //! number inserted to this container
  //! max         the max number key can be inserted
  //! expansion_ratio   memory expansion ratio
  int init(ChunkBroker::Pointer &broker, uint32_t chunk_size, uint32_t factor,
           size_t max, float expansion_ratio) {
    ailego_assert_with(expansion_ratio > 1.0f, "ratio must > 1.0f");
    broker_ = broker;

    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));
    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));
    size_t range = slot_items_ * factor / expansion_ratio;
    mask_bits_ = std::floor(std::log2(range));
    range = 1UL << mask_bits_;
    size_t max_slots = std::ceil(max * 1.0f / range);
    slots_.reserve(max_slots);
    slot_loc_mask_ = slot_items_ - 1U;
    int ret = load();
    if (ret != 0) {
      return ret;
    }

    LOG_DEBUG(
        "HnswIndexHash init, chunkSize=%u factor=%u max=%zu "
        "ratio=%f slotItems=%u maxSlots=%zu maskBits=%u "
        "range=%zu",
        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,
        mask_bits_, range);

    return 0;
  }

  int cleanup(void) {
    broker_.reset();
    slots_.clear();
    slots_.shrink_to_fit();
    mask_bits_ = 0U;
    slot_items_ = 0U;
    slot_loc_mask_ = 0U;

    return 0;
  }

  const_iterator end(void) const {
    return nullptr;
  }

  const_iterator find(const key_type key) const {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      return end();
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    return it && it->second != EmptyVal ? it : nullptr;
  }

  bool insert(key_type key, val_type val) {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      if (ailego_unlikely(idx >= slots_.capacity())) {
        LOG_ERROR("no space to insert");
        return false;
      }
      for (auto i = slots_.size(); i <= idx; ++i) {
        if (ailego_unlikely(!alloc_slot(i))) {
          return false;
        }
      }
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    if (ailego_unlikely(it == nullptr)) {
      LOG_ERROR("no space to insert");
      return false;
    }

    //! TODO: write memory is ok?
    const_cast<iterator>(it)->first = key;
    const_cast<iterator>(it)->second = val;

    return slots_[idx].update(it);
  }

 private:
  bool alloc_slot(size_t idx) {
    ailego_assert_with(idx == slots_.size(), "invalid idx");

    size_t size = slot_items_ * sizeof(Item);
    auto p =
        broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, idx, size);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return false;
    }
    Chunk::Pointer chunk = p.second;
    if (ailego_unlikely(chunk->resize(size) != size)) {
      LOG_ERROR("Chunk resize failed, size=%zu", size);
      return false;
    }
    //! Read the whole data to memory
    IndexStorage::MemoryBlock data_block;
    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
      LOG_ERROR("Chunk read failed, size=%zu", size);
      return false;
    }

    slots_.emplace_back(std::move(chunk), std::move(data_block));
    return true;
  }

  int load(void) {
    size_t slots_cnt =
        broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);
    for (size_t i = 0UL; i < slots_cnt; ++i) {
      auto chunk =
          broker_->get_chunk(ChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);
      if (!chunk) {
        LOG_ERROR("Get chunk failed, seq=%zu", i);
        return IndexError_InvalidFormat;
      }
      size_t size = sizeof(Item) * slot_items_;
      if (chunk->data_size() < size) {
        LOG_ERROR(
            "Hash params may be mismatch, seq=%zu, data_size=%zu "
            "expect=%zu",
            i, chunk->data_size(), size);
        return IndexError_InvalidFormat;
      }
      //! Read the whole data to memory
      IndexStorage::MemoryBlock data_block;
      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
        LOG_ERROR("Chunk read failed, size=%zu", size);
        return false;
      }
      slots_.emplace_back(std::move(chunk), std::move(data_block));
    }
    return 0;
  }

 private:
  ChunkBroker::Pointer broker_{};  // chunk broker
  std::vector<Slot> slots_{};
  uint32_t mask_bits_{0U};
  uint32_t slot_items_{};  // must be a power of 2
  uint32_t slot_loc_mask_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_index_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_provider.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_streamer.h>
#include "hnsw_entity.h"

namespace zvec {
namespace core {

class HnswIndexProvider : public IndexProvider {
 public:
  HnswIndexProvider(const IndexMeta &meta, const HnswEntity::Pointer &entity,
                    const std::string &owner)
      : meta_(meta), entity_(entity), owner_class_(owner) {}

  HnswIndexProvider(const HnswIndexProvider &) = delete;
  HnswIndexProvider &operator=(const HnswIndexProvider &) = delete;

 public:  // holder interface
  //! Create a new iterator
  IndexProvider::Iterator::Pointer create_iterator() override {
    return HnswIndexProvider::Iterator::Pointer(new (std::nothrow)
                                                    Iterator(entity_));
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return entity_->doc_cnt();
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const override {
    return meta_.dimension();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return meta_.data_type();
  }

  //! Retrieve vector size in bytes
  size_t element_size(void) const override {
    return meta_.element_size();
  }

 public:  // provider's unique interface
  //! Retrieve a vector using a primary key
  const void *get_vector(uint64_t key) const override {
    return entity_->get_vector_by_key(key);
  }

  int get_vector(const uint64_t key,
                 IndexStorage::MemoryBlock &block) const override {
    return entity_->get_vector_by_key(key, block);
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

 private:
  class Iterator : public IndexProvider::Iterator {
   public:
    Iterator(const HnswEntity::Pointer &entity)
        : entity_(entity), cur_id_(0U) {}

    //! Retrieve pointer of data
    //! NOTICE: the vec feature will be changed after iterating to next, so
    //! the caller need to keep a copy of it before iterator to next vector
    virtual const void *data(void) const override {
      return entity_->get_vector(cur_id_);
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return cur_id_ < entity_->doc_cnt();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return entity_->get_key(cur_id_);
    }

    //! Next iterator
    virtual void next(void) override {
      // cur_id_ += 1;
      cur_id_ = get_next_valid_id(cur_id_ + 1);
    }

    //! Reset the iterator
    void reset(void) {
      cur_id_ = get_next_valid_id(0);
    }

   private:
    node_id_t get_next_valid_id(node_id_t start_id) {
      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {
        if (entity_->get_key(i) != kInvalidNodeId) {
          cur_id_ = i;
          return i;
        }
      }
      return kInvalidNodeId;
    }

   private:
    const HnswEntity::Pointer entity_;
    node_id_t cur_id_;
  };

 private:
  const IndexMeta &meta_;
  const HnswEntity::Pointer entity_;
  const std::string owner_class_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

static const std::string PARAM_HNSW_BUILDER_THREAD_COUNT(
    "proxima.hnsw.builder.thread_count");
static const std::string PARAM_HNSW_BUILDER_MEMORY_QUOTA(
    "proxima.hnsw.builder.memory_quota");
static const std::string PARAM_HNSW_BUILDER_EFCONSTRUCTION(
    "proxima.hnsw.builder.efconstruction");
static const std::string PARAM_HNSW_BUILDER_SCALING_FACTOR(
    "proxima.hnsw.builder.scaling_factor");
static const std::string PARAM_HNSW_BUILDER_CHECK_INTERVAL_SECS(
    "proxima.hnsw.builder.check_interval_secs");
static const std::string PARAM_HNSW_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw.builder.neighbor_prune_multiplier");
static const std::string PARAM_HNSW_BUILDER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw.builder.min_neighbor_count");
static const std::string PARAM_HNSW_BUILDER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw.builder.max_neighbor_count");
static const std::string PARAM_HNSW_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
    "proxima.hnsw.builder.l0_max_neighbor_count_multiplier");

static const std::string PARAM_HNSW_SEARCHER_EF("proxima.hnsw.searcher.ef");
static const std::string PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw.searcher.brute_force_threshold");
static const std::string PARAM_HNSW_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(
    "proxima.hnsw.searcher.neighbors_in_memory_enable");
static const std::string PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO(
    "proxima.hnsw.searcher.max_scan_ratio");
static const std::string PARAM_HNSW_SEARCHER_CHECK_CRC_ENABLE(
    "proxima.hnsw.searcher.check_crc_enable");
static const std::string PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw.searcher.visit_bloomfilter_enable");
static const std::string PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
    "proxima.hnsw.searcher.visit_bloomfilter_negative_prob");
static const std::string PARAM_HNSW_SEARCHER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw.searcher.force_padding_result_enable");

static const std::string PARAM_HNSW_STREAMER_MAX_SCAN_RATIO(
    "proxima.hnsw.streamer.max_scan_ratio");
static const std::string PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT(
    "proxima.hnsw.streamer.min_scan_limit");
static const std::string PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT(
    "proxima.hnsw.streamer.max_scan_limit");
static const std::string PARAM_HNSW_STREAMER_EF("proxima.hnsw.streamer.ef");
static const std::string PARAM_HNSW_STREAMER_EFCONSTRUCTION(
    "proxima.hnsw.streamer.efconstruction");
static const std::string PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw.streamer.max_neighbor_count");
static const std::string PARAM_HNSW_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
    "proxima.hnsw.streamer.l0_max_neighbor_count_multiplier");
static const std::string PARAM_HNSW_STREAMER_SCALING_FACTOR(
    "proxima.hnsw.streamer.scaling_factor");
static const std::string PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw.streamer.brute_force_threshold");
static const std::string PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT(
    "proxima.hnsw.streamer.docs_hard_limit");
static const std::string PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT(
    "proxima.hnsw.streamer.docs_soft_limit");
static const std::string PARAM_HNSW_STREAMER_MAX_INDEX_SIZE(
    "proxima.hnsw.streamer.max_index_size");
static const std::string PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw.streamer.visit_bloomfilter_enable");
static const std::string PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
    "proxima.hnsw.streamer.visit_bloomfilter_negative_prob");
static const std::string PARAM_HNSW_STREAMER_CHECK_CRC_ENABLE(
    "proxima.hnsw.streamer.check_crc_enable");
static const std::string PARAM_HNSW_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw.streamer.neighbor_prune_multiplier");
static const std::string PARAM_HNSW_STREAMER_CHUNK_SIZE(
    "proxima.hnsw.streamer.chunk_size");
static const std::string PARAM_HNSW_STREAMER_FILTER_SAME_KEY(
    "proxima.hnsw.streamer.filter_same_key");
static const std::string PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE(
    "proxima.hnsw.streamer.get_vector_enable");
static const std::string PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw.streamer.min_neighbor_count");
static const std::string PARAM_HNSW_STREAMER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw.streamer.force_padding_result_enable");
static const std::string PARAM_HNSW_STREAMER_ESTIMATE_DOC_COUNT(
    "proxima.hnsw.streamer.estimate_doc_count");
static const std::string PARAM_HNSW_STREAMER_USE_ID_MAP(
    "proxima.hnsw.streamer.use_id_map");

static const std::string PARAM_HNSW_REDUCER_WORKING_PATH(
    "proxima.hnsw.reducer.working_path");
static const std::string PARAM_HNSW_REDUCER_NUM_OF_ADD_THREADS(
    "proxima.hnsw.reducer.num_of_add_threads");
static const std::string PARAM_HNSW_REDUCER_INDEX_NAME(
    "proxima.hnsw.reducer.index_name");
static const std::string PARAM_HNSW_REDUCER_EFCONSTRUCTION(
    "proxima.hnsw.reducer.efconstruction");

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_searcher.h"
#include "hnsw_algorithm.h"
#include "hnsw_index_provider.h"
#include "hnsw_params.h"

namespace zvec {
namespace core {

HnswSearcher::HnswSearcher() = default;

HnswSearcher::~HnswSearcher() = default;

int HnswSearcher::init(const ailego::Params &search_params) {
  params_ = search_params;
  params_.get(PARAM_HNSW_SEARCHER_EF, &ef_);
  params_.get(PARAM_HNSW_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params_.get(PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);
  params_.get(PARAM_HNSW_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);
  params_.get(PARAM_HNSW_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,
              &neighbors_in_memory_enabled_);
  params_.get(PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
              &bf_negative_probability_);
  params_.get(PARAM_HNSW_SEARCHER_BRUTE_FORCE_THRESHOLD,
              &bruteforce_threshold_);
  params_.get(PARAM_HNSW_SEARCHER_FORCE_PADDING_RESULT_ENABLE,
              &force_padding_topk_enabled_);

  if (ef_ == 0) {
    ef_ = HnswEntity::kDefaultEf;
  }
  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {
    LOG_ERROR("[%s] must be in range (0,1)",
              PARAM_HNSW_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);

  state_ = STATE_INITED;

  LOG_DEBUG(
      "Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u "
      "neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u "
      "forcePadding=%u",
      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,
      neighbors_in_memory_enabled_, bf_negative_probability_,
      bruteforce_threshold_, force_padding_topk_enabled_);

  return 0;
}

void HnswSearcher::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }
}

int HnswSearcher::cleanup() {
  LOG_INFO("Begin HnswSearcher:cleanup");

  metric_.reset();
  meta_.clear();
  stats_.clear_attributes();
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  max_scan_ratio_ = HnswEntity::kDefaultScanRatio;
  max_scan_num_ = 0U;
  ef_ = HnswEntity::kDefaultEf;
  bf_enabled_ = false;
  bf_negative_probability_ = HnswEntity::kDefaultBFNegativeProbability;
  bruteforce_threshold_ = HnswEntity::kDefaultBruteForceThreshold;
  check_crc_enabled_ = false;
  neighbors_in_memory_enabled_ = false;
  entity_.cleanup();
  state_ = STATE_INIT;

  LOG_INFO("End HnswSearcher:cleanup");

  return 0;
}

int HnswSearcher::load(IndexStorage::Pointer container,
                       IndexMetric::Pointer metric) {
  if (state_ != STATE_INITED) {
    LOG_ERROR("Init the searcher first before load index");
    return IndexError_Runtime;
  }

  LOG_INFO("Begin HnswSearcher:load");

  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  ret = entity_.load(container, check_crc_enabled_);
  if (ret != 0) {
    LOG_ERROR("HnswSearcher load index failed");
    return ret;
  }

  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));

  if (metric) {
    metric_ = metric;
  } else {
    metric_ = IndexFactory::CreateMetric(meta_.metric_name());
    if (!metric_) {
      LOG_ERROR("CreateMetric failed, name: %s", meta_.metric_name().c_str());
      return IndexError_NoExist;
    }
    ret = metric_->init(meta_, meta_.metric_params());
    if (ret != 0) {
      LOG_ERROR("IndexMetric init failed, ret=%d", ret);
      return ret;
    }
    if (metric_->query_metric()) {
      metric_ = metric_->query_metric();
    }
  }

  if (!metric_->is_matched(meta_)) {
    LOG_ERROR("IndexMetric not match index meta");
    return IndexError_Mismatch;
  }

  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());
  max_scan_num_ = std::max(4096U, max_scan_num_);

  stats_.set_loaded_count(entity_.doc_cnt());
  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = STATE_LOADED;
  magic_ = IndexContext::GenerateMagic();

  LOG_INFO("End HnswSearcher::load");

  return 0;
}

int HnswSearcher::unload() {
  LOG_INFO("HnswSearcher unload index");

  meta_.clear();
  entity_.cleanup();
  metric_.reset();
  max_scan_num_ = 0;
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  state_ = STATE_INITED;

  return 0;
}

int HnswSearcher::update_context(HnswContext *ctx) const {
  const HnswEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_num(max_scan_num_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  return ctx->update_context(HnswContext::kSearcherContext, meta_, metric_,
                             entity, magic_);
}

int HnswSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                              uint32_t count, Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(query, qmeta, count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);
  for (size_t q = 0; q < count; ++q) {
    ctx->reset_query(query);
    int ret = alg_->search(ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                                 uint32_t count,
                                 Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }
        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().batch_dist(id);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }
        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().batch_dist(id);
          ctx->topk_heap().emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswSearcher::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().batch_dist(id);
            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().batch_dist(id);
            ctx->topk_heap().emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

IndexSearcher::Context::Pointer HnswSearcher::create_context() const {
  if (ailego_unlikely(state_ != STATE_LOADED)) {
    LOG_ERROR("Load the index first before create context");
    return Context::Pointer();
  }
  const HnswEntity::Pointer search_ctx_entity = entity_.clone();
  if (!search_ctx_entity) {
    LOG_ERROR("Failed to create search context entity");
    return Context::Pointer();
  }
  HnswContext *ctx = new (std::nothrow)
      HnswContext(meta_.dimension(), metric_, search_ctx_entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_num(max_scan_num_);
  uint32_t filter_mode =
      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
  ctx->set_filter_mode(filter_mode);
  ctx->set_filter_negative_probability(bf_negative_probability_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  if (ailego_unlikely(ctx->init(HnswContext::kSearcherContext)) != 0) {
    LOG_ERROR("Init HnswContext failed");
    delete ctx;
    return Context::Pointer();
  }

  return Context::Pointer(ctx);
}

IndexProvider::Pointer HnswSearcher::create_provider(void) const {
  LOG_DEBUG("HnswSearcher create provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswEntity failed");
    return Provider::Pointer();
  }
  return Provider::Pointer(
      new (std::nothrow) HnswIndexProvider(meta_, entity, "HnswSearcher"));
}

const void *HnswSearcher::get_vector(uint64_t key) const {
  return entity_.get_vector_by_key(key);
}

INDEX_FACTORY_REGISTER_SEARCHER(HnswSearcher);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_framework.h>
#include "hnsw_searcher_entity.h"
#include "hnsw_streamer.h"

namespace zvec {
namespace core {

class HnswSearcher : public IndexSearcher {
 public:
  using ContextPointer = IndexSearcher::Context::Pointer;

 public:
  HnswSearcher(void);
  ~HnswSearcher(void);

  HnswSearcher(const HnswSearcher &) = delete;
  HnswSearcher &operator=(const HnswSearcher &) = delete;

 protected:
  //! Initialize Searcher
  virtual int init(const ailego::Params &params) override;

  //! Cleanup Searcher
  virtual int cleanup(void) override;

  //! Load Index from storage
  virtual int load(IndexStorage::Pointer container,
                   IndexMetric::Pointer metric) override;

  //! Unload index from storage
  virtual int unload(void) override;

  //! KNN Search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          ContextPointer &context) const override {
    return search_impl(query, qmeta, 1, context);
  }

  //! KNN Search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          ContextPointer &context) const override;

  //! Linear Search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             ContextPointer &context) const override {
    return search_bf_impl(query, qmeta, 1, context);
  }

  //! Linear Search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             ContextPointer &context) const override;

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, uint32_t count,
      ContextPointer &context) const override;

  //! Fetch vector by key
  virtual const void *get_vector(uint64_t key) const override;

  //! Create a searcher context
  virtual ContextPointer create_context() const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  virtual const ailego::Params &params(void) const override {
    return params_;
  }

  virtual void print_debug_info() override;

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  HnswSearcherEntity entity_{};
  HnswAlgorithm::UPointer alg_;  // impl graph algorithm

  IndexMetric::Pointer metric_{};
  IndexMeta meta_{};
  ailego::Params params_{};
  Stats stats_;
  uint32_t ef_{HnswEntity::kDefaultEf};
  uint32_t max_scan_num_{0U};
  uint32_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};
  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool neighbors_in_memory_enabled_{false};
  bool force_padding_topk_enabled_{false};
  float bf_negative_probability_{HnswEntity::kDefaultBFNegativeProbability};
  uint32_t magic_{0U};

  State state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/hnsw_searcher_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_searcher_entity.h"
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswSearcherEntity::HnswSearcherEntity() {}

int HnswSearcherEntity::cleanup(void) {
  storage_.reset();
  vectors_.reset();
  keys_.reset();
  neighbors_.reset();
  neighbors_meta_.reset();
  neighbors_in_memory_enabled_ = false;
  loaded_ = false;

  this->HnswEntity::cleanup();

  return 0;
}

key_t HnswSearcherEntity::get_key(node_id_t id) const {
  const void *key;
  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=
                      sizeof(key_t))) {
    LOG_ERROR("Read key from segment failed");
    return kInvalidKey;
  }
  return *(reinterpret_cast<const key_t *>(key));
}

//! Get vector local id by key
node_id_t HnswSearcherEntity::get_id(key_t key) const {
  if (ailego_unlikely(!mapping_)) {
    LOG_ERROR("Index missing mapping segment");
    return kInvalidNodeId;
  }

  //! Do binary search
  node_id_t start = 0UL;
  node_id_t end = doc_cnt();
  const void *data;
  node_id_t idx = 0u;
  while (start < end) {
    idx = start + (end - start) / 2;
    if (ailego_unlikely(
            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=
            sizeof(node_id_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    const key_t *mkey;
    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);
    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),
                                    (const void **)(&mkey),
                                    sizeof(key_t)) != sizeof(key_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    if (*mkey < key) {
      start = idx + 1;
    } else if (*mkey > key) {
      end = idx;
    } else {
      return local_id;
    }
  }
  return kInvalidNodeId;
}

const void *HnswSearcherEntity::get_vector_by_key(key_t key) const {
  node_id_t local_id = get_id(key);
  if (ailego_unlikely(local_id == kInvalidNodeId)) {
    return nullptr;
  }

  return get_vector(local_id);
}

const void *HnswSearcherEntity::get_vector(node_id_t id) const {
  size_t read_size = vector_size();
  size_t offset = node_size() * id;

  const void *vec;
  if (ailego_unlikely(vectors_->read(offset, &vec, read_size) != read_size)) {
    LOG_ERROR("Read vector from segment failed");
    return nullptr;
  }
  return vec;
}

int HnswSearcherEntity::get_vector(const node_id_t id,
                                   IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector(id);
  block.reset((void *)vec);
  return 0;
}

const void *HnswSearcherEntity::get_vectors() const {
  const void *vec;
  size_t len = node_size() * doc_cnt();
  if (vectors_->read(0, &vec, len) != len) {
    LOG_ERROR("Read vectors from segment failed");
    return nullptr;
  }
  return vec;
}

int HnswSearcherEntity::get_vector(const node_id_t *ids, uint32_t count,
                                   const void **vecs) const {
  ailego_assert_with(count <= segment_datas_.size(), "invalid count");

  size_t read_size = vector_size();

  for (uint32_t i = 0; i < count; ++i) {
    segment_datas_[i].offset = node_size() * ids[i];
    segment_datas_[i].length = read_size;

    ailego_assert_with(segment_datas_[i].offset < vectors_->data_size(),
                       "invalid offset");
  }
  if (ailego_unlikely(!vectors_->read(&segment_datas_[0], count))) {
    LOG_ERROR("Read vectors from segment failed");
    return IndexError_ReadData;
  }
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = segment_datas_[i].data;
  }

  return 0;
}

int HnswSearcherEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  const void *vecs[count];
  get_vector(ids, count, vecs);
  for (uint32_t i = 0; i < count; ++i) {
    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

const Neighbors HnswSearcherEntity::get_neighbors(level_t level,
                                                  node_id_t id) const {
  if (level == 0) {
    if (neighbors_in_memory_enabled_) {
      auto hd = reinterpret_cast<const NeighborsHeader *>(
          fixed_neighbors_.get() + neighbors_size() * id);
      return {hd->neighbor_cnt, hd->neighbors};
    }

    const GraphNeighborMeta *m;
    if (ailego_unlikely(neighbors_meta_->read(id * sizeof(GraphNeighborMeta),
                                              (const void **)(&m),
                                              sizeof(GraphNeighborMeta)) !=
                        sizeof(GraphNeighborMeta))) {
      LOG_ERROR("Read neighbors meta from segment failed");
      return {0, nullptr};
    }

    const void *data;
    if (ailego_unlikely(neighbors_->read(m->offset, &data,
                                         m->neighbor_cnt * sizeof(node_id_t)) !=
                        m->neighbor_cnt * sizeof(node_id_t))) {
      LOG_ERROR("Read neighbors from segment failed");
      return {0, nullptr};
    }
    return {static_cast<uint32_t>(m->neighbor_cnt),
            reinterpret_cast<const node_id_t *>(data)};
  }

  //! Read level > 0 neighbors
  const HnswNeighborMeta *m;
  if (ailego_unlikely(upper_neighbors_meta_->read(id * sizeof(HnswNeighborMeta),
                                                  (const void **)(&m),
                                                  sizeof(HnswNeighborMeta)) !=
                      sizeof(HnswNeighborMeta))) {
    LOG_ERROR("Read neighbors meta from segment failed");
    return {0, nullptr};
  }

  ailego_assert_with(level <= m->level, "invalid level");
  size_t offset = m->offset + (level - 1) * upper_neighbors_size();
  ailego_assert_with(offset <= upper_neighbors_->data_size(), "invalid offset");
  const void *data;
  if (ailego_unlikely(
          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=
          upper_neighbors_size())) {
    LOG_ERROR("Read neighbors from segment failed");
    return {0, nullptr};
  }

  auto hd = reinterpret_cast<const NeighborsHeader *>(data);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswSearcherEntity::load(const IndexStorage::Pointer &container,
                             bool check_crc) {
  storage_ = container;

  int ret = load_segments(check_crc);
  if (ret != 0) {
    return ret;
  }

  loaded_ = true;

  LOG_INFO(
      "Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu "
      "l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu "
      "vectorSize=%zu nodeSize=%zu vectorSegmentSize=%zu keySegmentSize=%zu "
      "neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu ",
      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),
      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), vector_size(),
      node_size(), vectors_->data_size(), keys_->data_size(),
      neighbors_ == nullptr ? 0 : neighbors_->data_size(),
      neighbors_meta_ == nullptr ? 0 : neighbors_meta_->data_size());

  return 0;
}

int HnswSearcherEntity::load_segments(bool check_crc) {
  //! load header
  const void *data = nullptr;
  HNSWHeader hd;
  auto graph_hd_segment = storage_->get(kGraphHeaderSegmentId);
  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {
    LOG_ERROR("Miss or invalid segment %s", kGraphHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                             sizeof(hd.graph)) != sizeof(hd.graph)) {
    LOG_ERROR("Read segment %s failed", kGraphHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.graph, data, sizeof(hd.graph));

  auto hnsw_hd_segment = storage_->get(kHnswHeaderSegmentId);
  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {
    LOG_ERROR("Miss or invalid segment %s", kHnswHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {
    LOG_ERROR("Read segment %s failed", kHnswHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));
  *mutable_header() = hd;
  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));

  vectors_ = storage_->get(kGraphFeaturesSegmentId);
  if (!vectors_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kGraphFeaturesSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  keys_ = storage_->get(kGraphKeysSegmentId);
  if (!keys_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kGraphKeysSegmentId.c_str());
    return IndexError_InvalidFormat;
  }

  neighbors_ = storage_->get(kGraphNeighborsSegmentId);
  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kGraphNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }
  neighbors_meta_ = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta_ ||
      neighbors_meta_->data_size() < sizeof(GraphNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_ = storage_->get(kHnswNeighborsSegmentId);
  if (!upper_neighbors_ ||
      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kHnswNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_meta_ = storage_->get(kHnswOffsetsSegmentId);
  if (!upper_neighbors_meta_ || upper_neighbors_meta_->data_size() <
                                    sizeof(HnswNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kHnswOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  mapping_ = storage_->get(kGraphMappingSegmentId);
  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kGraphMappingSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  if (check_crc) {
    std::vector<SegmentPointer> segments;
    segments.emplace_back(graph_hd_segment);
    segments.emplace_back(hnsw_hd_segment);
    segments.emplace_back(vectors_);
    segments.emplace_back(keys_);

    segments.emplace_back(neighbors_);
    segments.emplace_back(neighbors_meta_);
    segments.emplace_back(upper_neighbors_);
    segments.emplace_back(upper_neighbors_meta_);

    if (!do_crc_check(segments)) {
      LOG_ERROR("Check index crc failed, the index may broken");
      return IndexError_Runtime;
    }
  }

  if (neighbors_in_memory_enabled_) {
    int ret = load_and_flat_neighbors();
    if (ret != 0) {
      return ret;
    }
  }

  return 0;
}

int HnswSearcherEntity::load_and_flat_neighbors() {
  fixed_neighbors_.reset(
      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},
      std::default_delete<char[]>());
  if (!fixed_neighbors_) {
    LOG_ERROR("Malloc memory failed");
    return IndexError_NoMemory;
  }

  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const GraphNeighborMeta *neighbors_index = nullptr;
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           neighbors_meta->data_size()) !=
      neighbors_meta->data_size()) {
    LOG_ERROR("Read segment %s data failed", kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  const char *neighbor_data;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);
    if (ailego_unlikely(
            neighbors_->read(neighbors_index[id].offset,
                             reinterpret_cast<const void **>(&neighbor_data),
                             rd_size) != rd_size)) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }
    // copy level 0 neighbors to fixed size neighbors memory
    char *dst = fixed_neighbors_.get() + neighbors_size() * id;
    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;
    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);
  }

  return 0;
}

int HnswSearcherEntity::get_fixed_neighbors(
    std::vector<uint32_t> *fixed_neighbors) const {
  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const GraphNeighborMeta *neighbors_index = nullptr;
  size_t meta_size = neighbors_meta->data_size();
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           meta_size) != meta_size) {
    LOG_ERROR("Read segment %s data failed", kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  size_t fixed_neighbor_cnt = l0_neighbor_cnt();
  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);

  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();
  size_t total_neighbor_cnt = 0;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;
    if (cur_neighbor_cnt == 0) {
      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;
      continue;
    }
    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);
    const uint32_t *neighbors;
    if (neighbors_->read(neighbors_index[id].offset,
                         reinterpret_cast<const void **>(&neighbors),
                         rd_size) != rd_size) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }

    // copy level 0 neighbors to fixed size neighbors memory
    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;
    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);

    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;
    total_neighbor_cnt += cur_neighbor_cnt;
  }
  LOG_INFO("total neighbor cnt: %zu, average neighbor cnt: %zu",
           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());

  return 0;
}

bool HnswSearcherEntity::do_crc_check(
    std::vector<SegmentPointer> &segments) const {
  constexpr size_t blk_size = 4096;
  const void *data;
  for (auto &segment : segments) {
    size_t offset = 0;
    size_t rd_size;
    uint32_t crc = 0;
    while (offset < segment->data_size()) {
      size_t size = std::min(blk_size, segment->data_size() - offset);
      if ((rd_size = segment->read(offset, &data, size)) <= 0) {
        break;
      }
      offset += rd_size;
      crc = ailego::Crc32c::Hash(data, rd_size, crc);
    }
    if (crc != segment->data_crc()) {
      return false;
    }
  }
  return true;
}

const HnswEntity::Pointer HnswSearcherEntity::clone() const {
  auto vectors = vectors_->clone();
  if (ailego_unlikely(!vectors)) {
    LOG_ERROR("clone segment %s failed", kGraphFeaturesSegmentId.c_str());
    return HnswEntity::Pointer();
  }
  auto keys = keys_->clone();
  if (ailego_unlikely(!keys)) {
    LOG_ERROR("clone segment %s failed", kGraphKeysSegmentId.c_str());
    return HnswEntity::Pointer();
  }

  auto mapping = mapping_->clone();
  if (ailego_unlikely(!mapping)) {
    LOG_ERROR("clone segment %s failed", kGraphMappingSegmentId.c_str());
    return HnswEntity::Pointer();
  }

  auto neighbors = neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed", kGraphNeighborsSegmentId.c_str());
    return HnswEntity::Pointer();
  }
  auto upper_neighbors = upper_neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed", kHnswNeighborsSegmentId.c_str());
    return HnswEntity::Pointer();
  }
  auto neighbors_meta = neighbors_meta_->clone();
  if (ailego_unlikely(!neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kGraphOffsetsSegmentId.c_str());
    return HnswEntity::Pointer();
  }
  auto upper_neighbors_meta = upper_neighbors_meta_->clone();
  if (ailego_unlikely(!upper_neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kHnswOffsetsSegmentId.c_str());
    return HnswEntity::Pointer();
  }

  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,
                                   upper_neighbors_meta};

  HnswSearcherEntity *entity = new (std::nothrow)
      HnswSearcherEntity(header(), vectors, keys, mapping, neighbor_group,
                         fixed_neighbors_, neighbors_in_memory_enabled_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswSearcherEntity new failed");
  }

  return HnswEntity::Pointer(entity);
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/hnsw_searcher_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_builder_entity.h"
#include "hnsw_entity.h"

namespace zvec {
namespace core {

class HnswSearcherEntity : public HnswEntity {
 public:
  using Pointer = std::shared_ptr<HnswSearcherEntity>;
  using SegmentPointer = IndexStorage::Segment::Pointer;

 public:
  struct SegmentGroupParam {
    SegmentGroupParam(SegmentPointer neighbors_in,
                      SegmentPointer neighbors_meta_in,
                      SegmentPointer upper_neighbors_in,
                      SegmentPointer upper_neighbors_meta_in)
        : neighbors{neighbors_in},
          neighbors_meta{neighbors_meta_in},
          upper_neighbors{upper_neighbors_in},
          upper_neighbors_meta{upper_neighbors_meta_in} {}

    SegmentPointer neighbors{nullptr};
    SegmentPointer neighbors_meta{nullptr};
    SegmentPointer upper_neighbors{nullptr};
    SegmentPointer upper_neighbors_meta{nullptr};
  };

  //! Constructor
  HnswSearcherEntity();

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector local id by key
  node_id_t get_id(key_t key) const;

  //! Get vector feature data by key
  virtual const void *get_vector_by_key(key_t key) const override;

  //! Get vector feature data by id
  virtual const void *get_vector(node_id_t id) const override;

  //! Get vector feature data by id
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get all vectors
  const void *get_vectors() const;

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  virtual int load(const IndexStorage::Pointer &container,
                   bool check_crc) override;

  int load_segments(bool check_crc);

  virtual int cleanup(void) override;

 public:
  bool is_loaded() const {
    return loaded_;
  }

  void set_neighbors_in_memory(bool enabled) {
    neighbors_in_memory_enabled_ = enabled;
  }

  //! get fixed length neighbors data
  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;

 private:
  //! Constructor
  HnswSearcherEntity(const HNSWHeader &hd, const SegmentPointer &vectors,
                     const SegmentPointer &keys, const SegmentPointer &mapping,
                     const SegmentGroupParam &neighbor_group,
                     const std::shared_ptr<char> &fixed_neighbors,
                     bool neighbors_in_memory_enabled)
      : HnswEntity(hd),
        vectors_(vectors),
        keys_(keys),
        mapping_(mapping),
        neighbors_(neighbor_group.neighbors),
        neighbors_meta_(neighbor_group.neighbors_meta),
        upper_neighbors_(neighbor_group.upper_neighbors),
        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),
        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {
    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),
                          IndexStorage::SegmentData(0U, 0U));
    fixed_neighbors_ = fixed_neighbors;
  }

  bool do_crc_check(std::vector<SegmentPointer> &segments) const;

  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory
  int load_and_flat_neighbors(void);

 public:
  HnswSearcherEntity(const HnswSearcherEntity &) = delete;
  HnswSearcherEntity &operator=(const HnswSearcherEntity &) = delete;

 private:
  IndexStorage::Pointer storage_{};

  SegmentPointer vectors_{};
  SegmentPointer keys_{};
  SegmentPointer mapping_{};

  SegmentPointer neighbors_{};
  SegmentPointer neighbors_meta_{};
  SegmentPointer upper_neighbors_{};
  SegmentPointer upper_neighbors_meta_{};

  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};
  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors
  bool neighbors_in_memory_enabled_{false};
  bool loaded_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_streamer.h"
#include <iostream>
#include <ailego/internal/cpu_features.h>
#include <ailego/pattern/defer.h>
#include <ailego/utility/memory_helper.h>
#include "utility/sparse_utility.h"
#include "hnsw_algorithm.h"
#include "hnsw_context.h"
#include "hnsw_dist_calculator.h"
#include "hnsw_index_provider.h"

namespace zvec {
namespace core {

HnswStreamer::HnswStreamer() : entity_(stats_) {}

HnswStreamer::~HnswStreamer() {
  if (state_ == STATE_INITED) {
    this->cleanup();
  }
}

int HnswStreamer::init(const IndexMeta &imeta, const ailego::Params &params) {
  meta_ = imeta;
  meta_.set_streamer("HnswStreamer", HnswEntity::kRevision, params);

  params.get(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, &max_index_size_);

  params.get(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, &upper_max_neighbor_cnt_);
  float multiplier = HnswEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER, &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;

  multiplier = HnswEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_STREAMER_SCALING_FACTOR, &scaling_factor_);

  params.get(PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);
  params.get(PARAM_HNSW_STREAMER_EF, &ef_);
  params.get(PARAM_HNSW_STREAMER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);
  params.get(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
             &bf_negative_prob_);
  params.get(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);
  params.get(PARAM_HNSW_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params.get(PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
  params.get(PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
  params.get(PARAM_HNSW_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);
  params.get(PARAM_HNSW_STREAMER_CHUNK_SIZE, &chunk_size_);
  params.get(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, &filter_same_key_);
  params.get(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, &get_vector_enabled_);
  params.get(PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);
  params.get(PARAM_HNSW_STREAMER_FORCE_PADDING_RESULT_ENABLE,
             &force_padding_topk_enabled_);
  params.get(PARAM_HNSW_STREAMER_USE_ID_MAP, &use_id_map_);
  entity_.set_use_key_info_map(use_id_map_);

  params.get(PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);
  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str(),
              PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());
    return IndexError_InvalidArgument;
  } else if (docs_soft_limit_ == 0UL) {
    docs_soft_limit_ =
        docs_hard_limit_ * HnswEntity::kDefaultDocsSoftLimitRatio;
  }

  if (ef_ == 0U) {
    ef_ = HnswEntity::kDefaultEf;
  }
  if (ef_construction_ == 0U) {
    ef_construction_ = HnswEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0U) {
    upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d)",
              PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              HnswEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0U) {
    l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswEntity::kMaxNeighborCnt) {
    LOG_ERROR("MaxL0NeighborCnt must be in range (0,%d)",
              HnswEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%u] must be <= [%s]-[%u]",
              PARAM_HNSW_STREAMER_MIN_NEIGHBOR_COUNT.c_str(), min_neighbor_cnt_,
              PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              upper_max_neighbor_cnt_);
    return IndexError_InvalidArgument;
  }

  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {
    LOG_ERROR("[%s] must be in range (0,1)",
              PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_STREAMER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
    LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
              PARAM_HNSW_STREAMER_MAX_SCAN_RATIO.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_limit_ < min_scan_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_STREAMER_MAX_SCAN_LIMIT.c_str(),
              PARAM_HNSW_STREAMER_MIN_SCAN_LIMIT.c_str());
    return IndexError_InvalidArgument;
  }

  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }
  if (chunk_size_ == 0UL) {
    chunk_size_ = HnswEntity::kDefaultChunkSize;
  }
  if (chunk_size_ > HnswEntity::kMaxChunkSize) {
    LOG_ERROR("[%s] must be < %zu", PARAM_HNSW_STREAMER_CHUNK_SIZE.c_str(),
              HnswEntity::kMaxChunkSize);
    return IndexError_InvalidArgument;
  }

  entity_.set_ef_construction(ef_construction_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_prune_cnt(prune_cnt);

  entity_.set_vector_size(meta_.element_size());

  entity_.set_chunk_size(chunk_size_);
  entity_.set_filter_same_key(filter_same_key_);
  entity_.set_get_vector(get_vector_enabled_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);

  int ret = entity_.init(docs_hard_limit_);
  if (ret != 0) {
    LOG_ERROR("Hnsw entity init failed for %s", IndexError::What(ret));
    return ret;
  }

  LOG_DEBUG(
      "Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu "
      "efConstruction=%u ef=%u upperMaxNeighborCnt=%u l0MaxNeighborCnt=%u "
      "scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu "
      "bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f "
      "checkCrcEnabled=%d pruneSize=%zu vectorSize=%u chunkSize=%zu "
      "filterSameKey=%u getVectorEnabled=%u minNeighborCount=%u "
      "forcePadding=%u ",
      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,
      ef_, upper_max_neighbor_cnt_, l0_max_neighbor_cnt_, scaling_factor_,
      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,
      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,
      meta_.element_size(), chunk_size_, filter_same_key_, get_vector_enabled_,
      min_neighbor_cnt_, force_padding_topk_enabled_);

  alg_ = HnswAlgorithm::UPointer(new HnswAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  state_ = STATE_INITED;

  return 0;
}

int HnswStreamer::cleanup(void) {
  if (state_ == STATE_OPENED) {
    this->close();
  }

  LOG_INFO("HnswStreamer cleanup");

  meta_.clear();
  metric_.reset();
  stats_.clear();
  entity_.cleanup();

  if (alg_) {
    alg_->cleanup();
  }

  max_index_size_ = 0UL;
  docs_hard_limit_ = HnswEntity::kDefaultDocsHardLimit;
  docs_soft_limit_ = 0UL;
  upper_max_neighbor_cnt_ = HnswEntity::kDefaultUpperMaxNeighborCnt;
  l0_max_neighbor_cnt_ = HnswEntity::kDefaultL0MaxNeighborCnt;
  ef_ = HnswEntity::kDefaultEf;
  ef_construction_ = HnswEntity::kDefaultEfConstruction;
  bf_enabled_ = false;
  scaling_factor_ = HnswEntity::kDefaultScalingFactor;
  bruteforce_threshold_ = HnswEntity::kDefaultBruteForceThreshold;
  max_scan_limit_ = HnswEntity::kDefaultMaxScanLimit;
  min_scan_limit_ = HnswEntity::kDefaultMinScanLimit;
  chunk_size_ = HnswEntity::kDefaultChunkSize;
  bf_negative_prob_ = HnswEntity::kDefaultBFNegativeProbability;
  max_scan_ratio_ = HnswEntity::kDefaultScanRatio;
  state_ = STATE_INIT;
  check_crc_enabled_ = false;
  filter_same_key_ = false;
  get_vector_enabled_ = false;

  return 0;
}

int HnswStreamer::open(IndexStorage::Pointer stg) {
  LOG_INFO("HnswStreamer open");

  if (ailego_unlikely(state_ != STATE_INITED)) {
    LOG_ERROR("Open storage failed, init streamer first!");
    return IndexError_NoReady;
  }
  int ret = entity_.open(std::move(stg), max_index_size_, check_crc_enabled_);
  if (ret != 0) {
    return ret;
  }
  IndexMeta index_meta;
  ret = entity_.get_index_meta(&index_meta);
  if (ret == IndexError_NoExist) {
    // Set IndexMeta for the new index
    ret = entity_.set_index_meta(meta_);
    if (ret != 0) {
      LOG_ERROR("Failed to set index meta for %s", IndexError::What(ret));
      return ret;
    }
  } else if (ret != 0) {
    LOG_ERROR("Failed to get index meta for %s", IndexError::What(ret));
    return ret;
  } else {
    if (index_meta.dimension() != meta_.dimension() ||
        index_meta.element_size() != meta_.element_size() ||
        index_meta.metric_name() != meta_.metric_name() ||
        index_meta.data_type() != meta_.data_type()) {
      LOG_ERROR("IndexMeta mismatch from the previous in index");
      return IndexError_Mismatch;
    }
    // The IndexMetric Params may be updated like MipsSquaredEuclidean
    auto metric_params = index_meta.metric_params();
    metric_params.merge(meta_.metric_params());
    meta_.set_metric(index_meta.metric_name(), 0, metric_params);
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("Failed to create metric %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failed to init metric, ret=%d", ret);
    return ret;
  }

  if (!metric_->distance()) {
    LOG_ERROR("Invalid metric distance");
    return IndexError_InvalidArgument;
  }

  if (!metric_->batch_distance()) {
    LOG_ERROR("Invalid metric batch distance");
    return IndexError_InvalidArgument;
  }

  add_distance_ = metric_->distance();
  add_batch_distance_ = metric_->batch_distance();

  search_distance_ = add_distance_;
  search_batch_distance_ = add_batch_distance_;

  if (metric_->query_metric() && metric_->query_metric()->distance() &&
      metric_->query_metric()->batch_distance()) {
    search_distance_ = metric_->query_metric()->distance();
    search_batch_distance_ = metric_->query_metric()->batch_distance();
  }

  state_ = STATE_OPENED;
  magic_ = IndexContext::GenerateMagic();

  return 0;
}

int HnswStreamer::close(void) {
  LOG_INFO("HnswStreamer close");

  stats_.clear();
  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  int ret = entity_.close();
  if (ret != 0) {
    return ret;
  }
  state_ = STATE_INITED;

  return 0;
}

int HnswStreamer::flush(uint64_t checkpoint) {
  LOG_INFO("HnswStreamer flush checkpoint=%zu", (size_t)checkpoint);

  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  return entity_.flush(checkpoint);
}

int HnswStreamer::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("HnswStreamer dump");

  shared_mutex_.lock();
  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });

  meta_.set_searcher("HnswSearcher", HnswEntity::kRevision, ailego::Params());

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }
  return entity_.dump(dumper);
}

IndexStreamer::Context::Pointer HnswStreamer::create_context(void) const {
  if (ailego_unlikely(state_ != STATE_OPENED)) {
    LOG_ERROR("Create context failed, open storage first!");
    return Context::Pointer();
  }

  HnswEntity::Pointer entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("CreateContext clone init failed");
    return Context::Pointer();
  }
  HnswContext *ctx =
      new (std::nothrow) HnswContext(meta_.dimension(), metric_, entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter
                                   : VisitFilter::ByteMap);
  ctx->set_filter_negative_probability(bf_negative_prob_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  if (ailego_unlikely(ctx->init(HnswContext::kStreamerContext)) != 0) {
    LOG_ERROR("Init HnswContext failed");
    delete ctx;
    return Context::Pointer();
  }
  uint32_t estimate_doc_count = 0;
  if (meta_.streamer_params().get(PARAM_HNSW_STREAMER_ESTIMATE_DOC_COUNT,
                                  &estimate_doc_count)) {
    LOG_DEBUG("HnswStreamer doc_count[%zu] estimate[%zu]",
              (size_t)entity_.doc_cnt(), (size_t)estimate_doc_count);
  }
  ctx->check_need_adjuct_ctx(std::max(entity_.doc_cnt(), estimate_doc_count));

  return Context::Pointer(ctx);
}

IndexProvider::Pointer HnswStreamer::create_provider(void) const {
  LOG_DEBUG("HnswStreamer create provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswEntity failed");
    return nullptr;
  }
  return Provider::Pointer(
      new HnswIndexProvider(meta_, entity, "HnswStreamer"));
}

int HnswStreamer::update_context(HnswContext *ctx) const {
  const HnswEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  return ctx->update_context(HnswContext::kStreamerContext, meta_, metric_,
                             entity, magic_);
}

//! Add a vector with id into index
int HnswStreamer::add_with_id_impl(uint32_t id, const void *query,
                                   const IndexQueryMeta &qmeta,
                                   IndexStreamer::Context::Pointer &context) {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %u exceed [%s]", entity_.doc_cnt(),
                PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %u exceed [%s]", entity_.doc_cnt(),
               PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);
  ctx->reset_query(query);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());

  if (metric_->support_train()) {
    const std::lock_guard<std::mutex> lk(mutex_);
    ret = metric_->train(query, meta_.dimension());
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw streamer metric train failed");
      (*stats_.mutable_discarded_count())++;
      return ret;
    }
  }

  level_t level = alg_->get_random_level();
  ret = entity_.add_vector_with_id(level, id, query);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw steamer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}

//! Add a vector into index
int HnswStreamer::add_impl(uint64_t pkey, const void *query,
                           const IndexQueryMeta &qmeta,
                           IndexStreamer::Context::Pointer &context) {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %u exceed [%s]", entity_.doc_cnt(),
                PARAM_HNSW_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %u exceed [%s]", entity_.doc_cnt(),
               PARAM_HNSW_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);
  ctx->reset_query(query);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());

  if (metric_->support_train()) {
    const std::lock_guard<std::mutex> lk(mutex_);
    ret = metric_->train(query, meta_.dimension());
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw streamer metric train failed");
      (*stats_.mutable_discarded_count())++;
      return ret;
    }
  }

  level_t level = alg_->get_random_level();
  node_id_t id;
  ret = entity_.add_vector(level, pkey, query, &id);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw steamer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}


int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                              IndexStreamer::Context::Pointer &context) const {
  return search_impl(query, qmeta, 1, context);
}

//! Similarity search
int HnswStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                              uint32_t count,
                              IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }
  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(query, qmeta, count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());
  for (size_t q = 0; q < count; ++q) {
    ctx->reset_query(query);
    ret = alg_->search(ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

void HnswStreamer::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    if (entity_.get_key(id) == kInvalidKey) {
      continue;
    }
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    if (neighbours.size() == 0) std::cout << std::endl;
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }

  // entity_.print_key_map();
}

int HnswStreamer::search_bf_impl(
    const void *query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) const {
  return search_bf_impl(query, qmeta, 1, context);
}

int HnswStreamer::search_bf_impl(
    const void *query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }
  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().batch_dist(id);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    auto &filter = ctx->filter();
    auto &topk = ctx->topk_heap();

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      topk.clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!filter.is_valid() || !filter(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().batch_dist(id);
          topk.emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswStreamer::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswContext *ctx = dynamic_cast<HnswContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().batch_dist(id);
            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    auto &filter = ctx->filter();
    auto &topk = ctx->topk_heap();

    for (size_t q = 0; q < count; ++q) {
      ctx->reset_query(query);
      topk.clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        key_t pk = p_keys[q][idx];
        if (!filter.is_valid() || !filter(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().batch_dist(id);
            topk.emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}


INDEX_FACTORY_REGISTER_STREAMER(HnswStreamer);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw/hnsw_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <ailego/parallel/lock.h>
#include <zvec/core/framework/index_framework.h>
#include "hnsw_algorithm.h"
#include "hnsw_streamer_entity.h"

namespace zvec {
namespace core {

class HnswStreamer : public IndexStreamer {
 public:
  using ContextPointer = IndexStreamer::Context::Pointer;

  HnswStreamer(void);
  virtual ~HnswStreamer(void);

  HnswStreamer(const HnswStreamer &streamer) = delete;
  HnswStreamer &operator=(const HnswStreamer &streamer) = delete;

 protected:
  //! Initialize Streamer
  virtual int init(const IndexMeta &imeta,
                   const ailego::Params &params) override;

  //! Cleanup Streamer
  virtual int cleanup(void) override;

  //! Create a context
  virtual Context::Pointer create_context(void) const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Add a vector into index
  virtual int add_impl(uint64_t pkey, const void *query,
                       const IndexQueryMeta &qmeta,
                       Context::Pointer &context) override;

  //! Add a vector with id into index
  virtual int add_with_id_impl(uint32_t id, const void *query,
                               const IndexQueryMeta &qmeta,
                               Context::Pointer &context) override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             Context::Pointer &context) const override;

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, uint32_t count,
      ContextPointer &context) const override;

  //! Fetch vector by key
  virtual const void *get_vector(uint64_t key) const override {
    return entity_.get_vector_by_key(key);
  }

  virtual int get_vector(const uint64_t key,
                         IndexStorage::MemoryBlock &block) const override {
    return entity_.get_vector_by_key(key, block);
  }

  //! Fetch vector by id
  virtual const void *get_vector_by_id(uint32_t id) const override {
    return entity_.get_vector(id);
  }

  virtual int get_vector_by_id(
      const uint32_t id, IndexStorage::MemoryBlock &block) const override {
    return entity_.get_vector(id, block);
  }

  //! Open index from file path
  virtual int open(IndexStorage::Pointer stg) override;

  //! Close file
  virtual int close(void) override;

  //! flush file
  virtual int flush(uint64_t checkpoint) override;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  virtual void print_debug_info() override;

 private:
  inline int check_params(const void *query,
                          const IndexQueryMeta &qmeta) const {
    if (ailego_unlikely(!query)) {
      LOG_ERROR("null query");
      return IndexError_InvalidArgument;
    }
    if (ailego_unlikely(qmeta.dimension() != meta_.dimension() ||
                        qmeta.data_type() != meta_.data_type() ||
                        qmeta.element_size() != meta_.element_size())) {
      LOG_ERROR("Unsupported query meta");
      return IndexError_Mismatch;
    }
    return 0;
  }

  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,
                                        uint32_t count) const {
    for (uint32_t i = 0; i < count; ++i) {
      if (sparse_count[i] != 0)
        LOG_ERROR("Sparse cout is not empty. Index: %u, Sparse Count: %u", i,
                  sparse_count[i]);
      return IndexError_InvalidArgument;
    }

    return 0;
  }

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };
  class Stats : public IndexStreamer::Stats {
   public:
    void clear(void) {
      set_revision_id(0u);
      set_loaded_count(0u);
      set_added_count(0u);
      set_discarded_count(0u);
      set_index_size(0u);
      set_dumped_size(0u);
      set_check_point(0u);
      set_create_time(0u);
      set_update_time(0u);
      clear_attributes();
    }
  };

  HnswStreamerEntity entity_;
  HnswAlgorithm::UPointer alg_;
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};

  IndexMetric::MatrixDistance add_distance_{};
  IndexMetric::MatrixDistance search_distance_{};

  IndexMetric::MatrixBatchDistance add_batch_distance_{};
  IndexMetric::MatrixBatchDistance search_batch_distance_{};

  Stats stats_{};
  std::mutex mutex_{};

  size_t max_index_size_{0UL};
  size_t chunk_size_{HnswEntity::kDefaultChunkSize};
  size_t docs_hard_limit_{HnswEntity::kDefaultDocsHardLimit};
  size_t docs_soft_limit_{0UL};
  uint32_t min_neighbor_cnt_{0u};
  uint32_t upper_max_neighbor_cnt_{HnswEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t l0_max_neighbor_cnt_{HnswEntity::kDefaultL0MaxNeighborCnt};
  uint32_t ef_{HnswEntity::kDefaultEf};
  uint32_t ef_construction_{HnswEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswEntity::kDefaultScalingFactor};
  size_t bruteforce_threshold_{HnswEntity::kDefaultBruteForceThreshold};
  size_t max_scan_limit_{HnswEntity::kDefaultMaxScanLimit};
  size_t min_scan_limit_{HnswEntity::kDefaultMinScanLimit};
  float bf_negative_prob_{HnswEntity::kDefaultBFNegativeProbability};
  float max_scan_ratio_{HnswEntity::kDefaultScanRatio};

  uint32_t magic_{0U};
  State state_{STATE_INIT};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};
  bool force_padding_topk_enabled_{false};
  bool use_id_map_{true};

  //! avoid add vector while dumping index
  ailego::SharedMutex shared_mutex_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_streamer_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_streamer_entity.h"
#include <ailego/utility/memory_helper.h>

// #define DEBUG_PRINT

namespace zvec {
namespace core {

HnswStreamerEntity::HnswStreamerEntity(IndexStreamer::Stats &stats)
    : stats_(stats) {}

HnswStreamerEntity::~HnswStreamerEntity() {}

int HnswStreamerEntity::init(size_t max_doc_cnt) {
  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {
    LOG_ERROR("scalingFactor=%zu is too small", scaling_factor());
    return IndexError_InvalidArgument;
  }

  std::lock_guard<std::mutex> lock(mutex_);
  broker_ = std::make_shared<ChunkBroker>(stats_);
  upper_neighbor_index_ = std::make_shared<NIHashMap>();
  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();
  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();
  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {
    LOG_ERROR("HnswStreamerEntity new object failed");
    return IndexError_NoMemory;
  }
  keys_map_->set_empty_key(kInvalidKey);

  neighbor_size_ = neighbors_size();
  upper_neighbor_size_ = upper_neighbors_size();

  //! vector + key + level 0 neighbors
  size_t size = vector_size() + sizeof(key_t) + neighbor_size_;

  size = AlignSize(size);
  set_node_size(size);
  return 0;
}

int HnswStreamerEntity::cleanup() {
  std::lock_guard<std::mutex> lock(mutex_);
  mutable_header()->clear();
  chunk_size_ = kDefaultChunkSize;
  node_index_mask_bits_ = 0U;
  node_index_mask_ = 0U;
  node_cnt_per_chunk_ = 0U;
  neighbor_size_ = 0U;
  upper_neighbor_size_ = 0U;
  if (upper_neighbor_index_) {
    upper_neighbor_index_->cleanup();
  }
  if (keys_map_) {
    keys_map_->clear();
  }
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();
  filter_same_key_ = false;
  get_vector_enabled_ = false;
  broker_.reset();

  return 0;
}

int HnswStreamerEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {
  std::vector<char> buffer(neighbor_size_);
  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer.data());
  hd->neighbor_cnt = neighbors.size();
  size_t i = 0;
  for (; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }

  auto loc = get_neighbor_chunk_loc(level, id);
  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];
  size_t ret = loc.first->write(loc.second, hd, size);
  if (ailego_unlikely(ret != size)) {
    LOG_ERROR("Write neighbor header failed, ret=%zu", ret);

    return IndexError_Runtime;
  }

  return 0;
}

const Neighbors HnswStreamerEntity::get_neighbors(level_t level,
                                                  node_id_t id) const {
  Chunk *chunk = nullptr;
  size_t offset = 0UL;
  size_t neighbor_size = neighbor_size_;
  if (level == 0UL) {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    offset =
        (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);

    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    ailego_assert_with(chunk_idx < node_chunks_.size(), "invalid chunk idx");
    chunk = node_chunks_[chunk_idx].get();
  } else {
    auto p = get_upper_neighbor_chunk_loc(level, id);
    chunk = upper_neighbor_chunks_[p.first].get();
    offset = p.second;
    neighbor_size = upper_neighbor_size_;
  }

  ailego_assert_with(offset < chunk->data_size(), "invalid chunk offset");
  IndexStorage::MemoryBlock neighbor_block;
  size_t size = chunk->read(offset, neighbor_block, neighbor_size);
  if (ailego_unlikely(size != neighbor_size)) {
    LOG_ERROR("Read neighbor header failed, ret=%zu", size);
    return Neighbors();
  }
  return Neighbors(neighbor_block);
}

//! Get vector data by key
const void *HnswStreamerEntity::get_vector(node_id_t id) const {
  auto loc = get_vector_chunk_loc(id);
  const void *vec = nullptr;
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = vector_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
              loc.second, read_size, ret);
  }

  return vec;
}

int HnswStreamerEntity::get_vector(const node_id_t *ids, uint32_t count,
                                   const void **vecs) const {
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = vector_size();

    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
                loc.second, read_size, ret);
      return IndexError_ReadData;
    }
  }
  return 0;
}

int HnswStreamerEntity::get_vector(const node_id_t id,
                                   IndexStorage::MemoryBlock &block) const {
  auto loc = get_vector_chunk_loc(id);
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = vector_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
              loc.second, read_size, ret);
    return IndexError_ReadData;
  }
  return 0;
}

int HnswStreamerEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  vec_blocks.resize(count);
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = vector_size();

    size_t ret =
        node_chunks_[loc.first]->read(loc.second, vec_blocks[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
                loc.second, read_size, ret);
      return IndexError_ReadData;
    }
  }
  return 0;
}

key_t HnswStreamerEntity::get_key(node_id_t id) const {
  if (use_key_info_map_) {
    auto loc = get_key_chunk_loc(id);
    IndexStorage::MemoryBlock key_block;
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");
    size_t ret =
        node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));
    if (ailego_unlikely(ret != sizeof(key_t))) {
      LOG_ERROR("Read vector failed, ret=%zu", ret);
      return kInvalidKey;
    }

    return *reinterpret_cast<const key_t *>(key_block.data());
  } else {
    return id;
  }
}

void HnswStreamerEntity::add_neighbor(level_t level, node_id_t id,
                                      uint32_t size, node_id_t neighbor_id) {
  auto loc = get_neighbor_chunk_loc(level, id);
  size_t offset =
      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);
  ailego_assert_with(size < neighbor_cnt(level), "invalid neighbor size");
  ailego_assert_with(offset < loc.first->data_size(), "invalid chunk offset");
  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));
  if (ailego_unlikely(ret != sizeof(node_id_t))) {
    LOG_ERROR("Write neighbor id failed, ret=%zu", ret);
    return;
  }

  uint32_t neighbors = size + 1;
  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));
  if (ailego_unlikely(ret != sizeof(uint32_t))) {
    LOG_ERROR("Write neighbor cnt failed, ret=%zu", ret);
  }

  return;
}

int HnswStreamerEntity::init_chunks(const Chunk::Pointer &header_chunk) {
  if (header_chunk->data_size() < header_size()) {
    LOG_ERROR("Invalid header chunk size");
    return IndexError_InvalidFormat;
  }
  IndexStorage::MemoryBlock header_block;
  size_t size = header_chunk->read(0UL, header_block, header_size());
  if (ailego_unlikely(size != header_size())) {
    LOG_ERROR("Read header chunk failed");
    return IndexError_ReadData;
  }
  *mutable_header() =
      *reinterpret_cast<const HNSWHeader *>(header_block.data());

  int ret = check_hnsw_index(&header());
  if (ret != 0) {
    broker_->close();
    return ret;
  }

  node_chunks_.resize(broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NODE));
  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {
    node_chunks_[seq] = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_NODE, seq);
    if (!node_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer data chunk %zu th of %zu", seq,
                node_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  upper_neighbor_chunks_.resize(
      broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));
  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {
    upper_neighbor_chunks_[seq] =
        broker_->get_chunk(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);
    if (!upper_neighbor_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer index chunk %zu th of %zu", seq,
                upper_neighbor_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  return 0;
}

int HnswStreamerEntity::open(IndexStorage::Pointer stg, uint64_t max_index_size,
                             bool check_crc) {
  std::lock_guard<std::mutex> lock(mutex_);
  bool huge_page = stg->isHugePage();
  LOG_DEBUG("huge_page: %d", (int)huge_page);
  int ret = init_chunk_params(max_index_size, huge_page);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("init_chunk_params failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Open index failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,
                                    scaling_factor(), estimate_doc_capacity(),
                                    kUpperHashMemoryInflateRatio);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Init neighbor hash map failed");
    return ret;
  }

  //! init header
  auto header_chunk = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_HEADER,
                                         ChunkBroker::kDefaultChunkSeqId);
  if (!header_chunk) {  // open empty index, create one
    auto p =
        broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_HEADER,
                             ChunkBroker::kDefaultChunkSeqId, header_size());
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc header chunk failed");
      return p.first;
    }
    size_t size = p.second->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }
    return 0;
  }

  //! Open an exist hnsw index
  ret = init_chunks(header_chunk);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  //! total docs including features wrote in index but neighbors may not ready
  node_id_t total_vecs = 0;
  if (node_chunks_.size() > 0) {
    size_t last_idx = node_chunks_.size() - 1;
    auto last_chunk = node_chunks_[last_idx];
    if (last_chunk->data_size() % node_size()) {
      LOG_WARN("The index may broken");
      return IndexError_InvalidFormat;
    }
    total_vecs = last_idx * node_cnt_per_chunk_ +
                 node_chunks_[last_idx]->data_size() / node_size();
  }

  LOG_INFO(
      "Open index, l0NeighborCnt=%zu upperNeighborCnt=%zu "
      "efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u",
      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),
      total_vecs, cur_max_level());
  //! try to correct the docCnt if index not fully flushed
  if (doc_cnt() != total_vecs) {
    LOG_WARN("Index closed abnormally, using totalVecs as curDocCnt");
    *mutable_doc_cnt() = total_vecs;
  }
  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      for (node_id_t id = 0U; id < doc_cnt(); ++id) {
        if (get_key(id) == kInvalidKey) {
          continue;
        }
        (*keys_map_)[get_key(id)] = id;
      }
    }
  }

  stats_.set_loaded_count(doc_cnt());

  return 0;
}

int HnswStreamerEntity::close() {
  LOG_DEBUG("close index");

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  mutable_header()->reset();
  upper_neighbor_index_->cleanup();
  keys_map_->clear();
  header_.clear();
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();

  return broker_->close();
}

int HnswStreamerEntity::flush(uint64_t checkpoint) {
  LOG_INFO("Flush index, curDocs=%u", doc_cnt());

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  int ret = broker_->flush(checkpoint);
  if (ret != 0) {
    return ret;
  }

  return 0;
}

int HnswStreamerEntity::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("Dump index, curDocs=%u", doc_cnt());

  //! sort by keys, to support get_vector by key in searcher
  std::vector<key_t> keys(doc_cnt());
  for (node_id_t i = 0; i < doc_cnt(); ++i) {
    keys[i] = get_key(i);
  }

  //! dump neighbors
  auto get_level = [&](node_id_t id) {
    auto it = upper_neighbor_index_->find(id);
    if (it == upper_neighbor_index_->end()) {
      return 0U;
    };
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    return meta->level;
  };
  auto ret = dump_segments(dumper, keys.data(), get_level);
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }
  *stats_.mutable_dumped_size() += ret;

  return 0;
}

int HnswStreamerEntity::check_hnsw_index(const HNSWHeader *hd) const {
  if (l0_neighbor_cnt() != hd->l0_neighbor_cnt() ||
      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {
    LOG_ERROR("Param neighbor cnt: %zu:%zu mismatch index previous %zu:%zu",
              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->l0_neighbor_cnt(),
              hd->upper_neighbor_cnt());
    return IndexError_Mismatch;
  }
  if (vector_size() != hd->vector_size()) {
    LOG_ERROR("vector size %zu mismatch index previous %zu", vector_size(),
              hd->vector_size());
    return IndexError_Mismatch;
  }
  if (ef_construction() != hd->ef_construction()) {
    LOG_WARN("Param efConstruction %zu mismatch index previous %zu",
             ef_construction(), hd->ef_construction());
  }
  if (scaling_factor() != hd->scaling_factor()) {
    LOG_WARN("Param scalingFactor %zu mismatch index previous %zu",
             scaling_factor(), hd->scaling_factor());
    return IndexError_Mismatch;
  }
  if (prune_cnt() != hd->neighbor_prune_cnt()) {
    LOG_WARN("Param pruneCnt %zu mismatch index previous %zu", prune_cnt(),
             hd->neighbor_prune_cnt());
    return IndexError_Mismatch;
  }
  if ((hd->entry_point() != kInvalidNodeId &&
       hd->entry_point() >= hd->doc_cnt()) ||
      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {
    LOG_WARN("Invalid entryPoint %u, docCnt %u", hd->entry_point(),
             hd->doc_cnt());
    return IndexError_InvalidFormat;
  }
  if (hd->entry_point() == kInvalidNodeId &&
      broker_->get_chunk_cnt(ChunkBroker::CHUNK_TYPE_NODE) > 0) {
    LOG_WARN("The index is broken, maybe it haven't flush");
    return IndexError_InvalidFormat;
  }

  return 0;
}

int HnswStreamerEntity::add_vector(level_t level, key_t key, const void *vec,
                                   node_id_t *id) {
  Chunk::Pointer node_chunk;
  size_t chunk_offset = -1UL;

  std::lock_guard<std::mutex> lock(mutex_);
  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  node_id_t local_id = static_cast<node_id_t>(doc_cnt());
  uint32_t chunk_index = node_chunks_.size() - 1U;
  if (chunk_index == -1U ||
      (node_chunks_[chunk_index]->data_size() >=
       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc
    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }
    chunk_index++;
    auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NODE, chunk_index,
                                  chunk_size_);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return p.first;
    }
    node_chunk = p.second;
    chunk_offset = 0UL;
    node_chunks_.emplace_back(node_chunk);
  } else {
    node_chunk = node_chunks_[chunk_index];
    chunk_offset = node_chunk->data_size();
  }

  size_t size = node_chunk->write(chunk_offset, vec, vector_size());
  if (ailego_unlikely(size != vector_size())) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  //! level 0 neighbors is inited to zero by default

  int ret = add_upper_neighbor(level, local_id);
  if (ret != 0) {
    return ret;
  }

  chunk_offset += node_size();
  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
    LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
    return IndexError_Runtime;
  }
  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      keys_map_lock_->lock();
      (*keys_map_)[key] = local_id;
      keys_map_lock_->unlock();
    }
  }

  *mutable_doc_cnt() += 1;
  broker_->mark_dirty();
  *id = local_id;

  return 0;
}

int HnswStreamerEntity::add_vector_with_id(level_t level, node_id_t id,
                                           const void *vec) {
  Chunk::Pointer node_chunk;
  size_t chunk_offset = -1UL;
  key_t key = id;

  std::lock_guard<std::mutex> lock(mutex_);

  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  // set node_chunk & chunk_offset if succeed
  auto func_get_node_chunk_and_offset = [&](node_id_t node_id) -> int {
    uint32_t chunk_index = node_id >> node_index_mask_bits_;
    ailego_assert_with(chunk_index <= node_chunks_.size(), "invalid chunk idx");
    // belongs to next chunk
    if (chunk_index == node_chunks_.size()) {
      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
        LOG_ERROR("add vector failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_NODE, chunk_index,
                                    chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      node_chunk = p.second;
      node_chunks_.emplace_back(node_chunk);
    }

    node_chunk = node_chunks_[chunk_index];
    chunk_offset = (node_id & node_index_mask_) * node_size();
    return 0;
  };

  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {
    if (auto ret = func_get_node_chunk_and_offset(start_id); ret != 0) {
      LOG_ERROR("func_get_node_chunk_and_offset failed");
      return ret;
    }
    size_t size = node_chunk->write(chunk_offset + vector_size(), &kInvalidKey,
                                    sizeof(key_t));
    if (ailego_unlikely(size != sizeof(key_t))) {
      LOG_ERROR("Chunk write key failed, ret=%zu", size);
      return IndexError_WriteData;
    }

    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (auto ret = func_get_node_chunk_and_offset(id); ret != 0) {
    LOG_ERROR("func_get_node_chunk_and_offset failed");
    return ret;
  }

  size_t size = node_chunk->write(chunk_offset, vec, vector_size());
  if (ailego_unlikely(size != vector_size())) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  //! level 0 neighbors is inited to zero by default

  int ret = add_upper_neighbor(level, id);
  if (ret != 0) {
    return ret;
  }

  if (*mutable_doc_cnt() <= id) {
    *mutable_doc_cnt() = id + 1;
    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      keys_map_lock_->lock();
      (*keys_map_)[key] = id;
      keys_map_lock_->unlock();
    }
  }

  broker_->mark_dirty();

  return 0;
}

void HnswStreamerEntity::update_ep_and_level(node_id_t ep, level_t level) {
  HnswEntity::update_ep_and_level(ep, level);
  flush_header();

  return;
}

const HnswEntity::Pointer HnswStreamerEntity::clone() const {
  std::vector<Chunk::Pointer> node_chunks;
  node_chunks.reserve(node_chunks_.size());
  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {
    node_chunks.emplace_back(node_chunks_[i]->clone());
    if (ailego_unlikely(!node_chunks[i])) {
      LOG_ERROR("HnswStreamerEntity get chunk failed in clone");
      return HnswEntity::Pointer();
    }
  }

  std::vector<Chunk::Pointer> upper_neighbor_chunks;
  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());
  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {
    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());
    if (ailego_unlikely(!upper_neighbor_chunks[i])) {
      LOG_ERROR("HnswStreamerEntity get chunk failed in clone");
      return HnswEntity::Pointer();
    }
  }

  HnswStreamerEntity *entity = new (std::nothrow) HnswStreamerEntity(
      stats_, header(), chunk_size_, node_index_mask_bits_,
      upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,
      upper_neighbor_index_, keys_map_lock_, keys_map_, use_key_info_map_,
      std::move(node_chunks), std::move(upper_neighbor_chunks), broker_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswStreamerEntity new failed");
  }
  return HnswEntity::Pointer(entity);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw/hnsw_streamer_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <ailego/parallel/lock.h>
#include <sparsehash/dense_hash_map>
#include <sparsehash/dense_hash_set>
#include <zvec/ailego/container/heap.h>
#include <zvec/core/framework/index_framework.h>
#include "hnsw_chunk.h"
#include "hnsw_entity.h"
#include "hnsw_index_hash.h"
#include "hnsw_params.h"

namespace zvec {
namespace core {

//! HnswStreamerEntity manage vector data, pkey, and node's neighbors
class HnswStreamerEntity : public HnswEntity {
 public:
  //! Cleanup
  //! return 0 on success, or errCode in failure
  virtual int cleanup() override;

  //! Make a copy of streamer entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const override;

  //! Get vectors feature data by local ids
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;

  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t level, key_t key, const void *vec,
                         node_id_t *id) override;

  //! Add vector and id to hnsw entity
  virtual int add_vector_with_id(level_t level, node_id_t id,
                                 const void *vec) override;

  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;

  //! Append neighbor_id to node id neighbors on level
  //! Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Dump index by dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  virtual void update_ep_and_level(node_id_t ep, level_t level) override;

  void set_use_key_info_map(bool use_id_map) {
    use_key_info_map_ = use_id_map;
    LOG_DEBUG("use_key_info_map_: %d", (int)use_key_info_map_);
  }

 public:
  //! Constructor
  HnswStreamerEntity(IndexStreamer::Stats &stats);

  //! Destructor
  ~HnswStreamerEntity();

  //! Get vector feature data by key
  virtual const void *get_vector_by_key(key_t key) const override {
    auto id = get_id(key);
    return id == kInvalidNodeId ? nullptr : get_vector(id);
  }

  virtual int get_vector_by_key(
      const key_t key, IndexStorage::MemoryBlock &block) const override {
    auto id = get_id(key);
    if (id != kInvalidNodeId) {
      return get_vector(id, block);
    } else {
      return IndexError_InvalidArgument;
    }
  }

  //! Init entity
  int init(size_t max_doc_cnt);

  //! Flush graph entity to disk
  //! return 0 on success, or errCode in failure
  int flush(uint64_t checkpoint);

  //! Open entity from storage
  //! return 0 on success, or errCode in failure
  int open(IndexStorage::Pointer stg, uint64_t max_index_size, bool check_crc);

  //! Close entity
  //! return 0 on success, or errCode in failure
  int close();

  //! Set meta information from entity
  int set_index_meta(const IndexMeta &meta) const {
    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());
  }

  //! Get meta information from entity
  int get_index_meta(IndexMeta *meta) const {
    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);
  }

  //! Set params: chunk size
  inline void set_chunk_size(size_t val) {
    chunk_size_ = val;
  }

  //! Set params
  inline void set_filter_same_key(bool val) {
    filter_same_key_ = val;
  }

  //! Set params
  inline void set_get_vector(bool val) {
    get_vector_enabled_ = val;
  }

  //! Get vector local id by key
  inline node_id_t get_id(key_t key) const {
    if (use_key_info_map_) {
      keys_map_lock_->lock_shared();
      auto it = keys_map_->find(key);
      keys_map_lock_->unlock_shared();
      return it == keys_map_->end() ? kInvalidNodeId : it->second;
    } else {
      return key;
    }
  }

  void print_key_map() const {
    std::cout << "key map begins" << std::endl;

    auto iter = keys_map_->begin();
    while (iter != keys_map_->end()) {
      std::cout << "key: " << iter->first << ", id: " << iter->second
                << std::endl;
      ;
      iter++;
    }

    std::cout << "key map ends" << std::endl;
  }

  //! Get l0 neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get neighbors size for level > 0
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }


 private:
  union UpperNeighborIndexMeta {
    struct {
      uint32_t level : 4;
      uint32_t index : 28;  // index is composite type: chunk idx, and the
                            // N th neighbors in chunk, they two composite
                            // the 28 bits location
    };
    uint32_t data;
  };

  template <class Key, class T>
  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;
  template <class Key, class T>
  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;

  template <class Key>
  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;
  template <class Key>
  using HashSetPointer = std::shared_ptr<HashSet<Key>>;

  //! upper neighbor index hashmap
  using NIHashMap = HnswIndexHashMap<node_id_t, uint32_t>;
  using NIHashMapPointer = std::shared_ptr<NIHashMap>;

  //! Private construct, only be called by clone method
  HnswStreamerEntity(IndexStreamer::Stats &stats, const HNSWHeader &hd,
                     size_t chunk_size, uint32_t node_index_mask_bits,
                     uint32_t upper_neighbor_mask_bits, bool filter_same_key,
                     bool get_vector_enabled,
                     const NIHashMapPointer &upper_neighbor_index,
                     std::shared_ptr<ailego::SharedMutex> &keys_map_lock,
                     const HashMapPointer<key_t, node_id_t> &keys_map,
                     bool use_key_info_map,
                     std::vector<Chunk::Pointer> &&node_chunks,
                     std::vector<Chunk::Pointer> &&upper_neighbor_chunks,
                     const ChunkBroker::Pointer &broker)
      : stats_(stats),
        chunk_size_(chunk_size),
        node_index_mask_bits_(node_index_mask_bits),
        node_cnt_per_chunk_(1UL << node_index_mask_bits_),
        node_index_mask_(node_cnt_per_chunk_ - 1),
        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),
        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),
        filter_same_key_(filter_same_key),
        get_vector_enabled_(get_vector_enabled),
        use_key_info_map_(use_key_info_map),
        upper_neighbor_index_(upper_neighbor_index),
        keys_map_lock_(keys_map_lock),
        keys_map_(keys_map),
        node_chunks_(std::move(node_chunks)),
        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),
        broker_(broker) {
    *mutable_header() = hd;

    neighbor_size_ = neighbors_size();
    upper_neighbor_size_ = upper_neighbors_size();
  }

  //! Called only in searching procedure per context, so no need to lock
  void sync_chunks(ChunkBroker::CHUNK_TYPE type, size_t idx,
                   std::vector<Chunk::Pointer> *chunks) const {
    if (ailego_likely(idx < chunks->size())) {
      return;
    }
    for (size_t i = chunks->size(); i <= idx; ++i) {
      auto chunk = broker_->get_chunk(type, i);
      // the storage can ensure get chunk will success after the first get
      ailego_assert_with(!!chunk, "get chunk failed");
      chunks->emplace_back(std::move(chunk));
    }
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(
      node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size();

    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();

    sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(
      level_t level, node_id_t id) const {
    auto it = upper_neighbor_index_->find(id);
    ailego_assert_abort(it != upper_neighbor_index_->end(),
                        "Get upper neighbor header failed");
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;
    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *
                      upper_neighbor_size_;
    sync_chunks(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,
                &upper_neighbor_chunks_);
    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),
                        "invalid chunk idx");
    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),
                        "invalid chunk offset");
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk + chunk offset
  inline std::pair<Chunk *, size_t> get_neighbor_chunk_loc(level_t level,
                                                           node_id_t id) const {
    if (level == 0UL) {
      uint32_t chunk_idx = id >> node_index_mask_bits_;
      uint32_t offset =
          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);

      sync_chunks(ChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
      ailego_assert_abort(chunk_idx < node_chunks_.size(), "invalid chunk idx");
      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),
                          "invalid chunk offset");
      return std::make_pair(node_chunks_[chunk_idx].get(), offset);
    } else {
      auto p = get_upper_neighbor_chunk_loc(level, id);
      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);
    }
  }

  //! Chunk hnsw index valid
  int check_hnsw_index(const HNSWHeader *hd) const;

  size_t get_total_upper_neighbors_size(level_t level) const {
    return level * upper_neighbor_size_;
  }

  //! Add upper neighbor header and reserve space for upper neighbor
  int add_upper_neighbor(level_t level, node_id_t id) {
    if (level == 0) {
      return 0;
    }
    Chunk::Pointer chunk;
    uint64_t chunk_offset = -1UL;
    size_t neighbors_size = get_total_upper_neighbors_size(level);
    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;
    if (chunk_index == -1UL ||
        (upper_neighbor_chunks_[chunk_index]->padding_size() <
         neighbors_size)) {  // no space left and need to alloc
      chunk_index++;
      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==
                          upper_neighbor_chunks_.size())) {
        LOG_ERROR("add upper neighbor failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p = broker_->alloc_chunk(ChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,
                                    chunk_index, upper_neighbor_chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      chunk = p.second;
      chunk_offset = 0UL;
      upper_neighbor_chunks_.emplace_back(chunk);
    } else {
      chunk = upper_neighbor_chunks_[chunk_index];
      chunk_offset = chunk->data_size();
    }
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,
                       "invalid offset");
    ailego_assert_with((chunk_offset / upper_neighbor_size_) <
                           (1U << upper_neighbor_mask_bits_),
                       "invalid offset");
    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),
                       "invalid chunk index");
    UpperNeighborIndexMeta meta;
    meta.level = level;
    meta.index = (chunk_index << upper_neighbor_mask_bits_) |
                 (chunk_offset / upper_neighbor_size_);
    chunk_offset += upper_neighbor_size_ * level;
    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {
      LOG_ERROR("HashMap insert value failed");
      return IndexError_Runtime;
    }

    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", (size_t)chunk_offset);
      return IndexError_Runtime;
    }

    return 0;
  }

  size_t estimate_doc_capacity() const {
    return node_chunks_.capacity() * node_cnt_per_chunk_;
  }

  int init_chunk_params(size_t max_index_size, bool huge_page) {
    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());
    //! align node cnt per chunk to pow of 2
    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));
    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;
    if (huge_page) {
      chunk_size_ = AlignHugePageSize(node_cnt_per_chunk_ * node_size());
    } else {
      chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());
    }
    node_index_mask_ = node_cnt_per_chunk_ - 1;

    if (max_index_size == 0UL) {
      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;
    } else {
      max_index_size_ = max_index_size;
    }

    //! To get a balanced upper neighbor chunk size.
    //! If the upper chunk size is equal to node chunk size, it may waste
    //! upper neighbor chunk space; if the upper neighbor chunk size is too
    //! small, the will need large upper neighbor chunks index space. So to
    //! get a balanced ratio be sqrt of the node/neighbor size ratio
    float ratio =
        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);
    if (huge_page) {
      upper_neighbor_chunk_size_ = AlignHugePageSize(
          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),
                   static_cast<size_t>(chunk_size_ / ratio)));
    } else {
      upper_neighbor_chunk_size_ = AlignPageSize(
          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),
                   static_cast<size_t>(chunk_size_ / ratio)));
    }
    upper_neighbor_mask_bits_ =
        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));
    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;

    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);
    size_t max_upper_chunk_cnt = std::ceil(
        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /
        (upper_neighbor_chunk_size_ / upper_neighbor_size_));
    max_upper_chunk_cnt =
        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());

    //! reserve space to avoid memmove in chunks vector emplace chunk, so
    //! as to lock-free in reading chunk
    node_chunks_.reserve(max_node_chunk_cnt);
    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);

    LOG_DEBUG(
        "Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u "
        "upperNeighborChunkSize=%u "
        "nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu "
        "maxIndexSize=%zu ratio=%.3f",
        node_size(), chunk_size_, upper_neighbor_size_,
        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,
        max_upper_chunk_cnt, max_index_size_, ratio);

    return 0;
  }

  //! Init node chunk and neighbor chunks
  int init_chunks(const Chunk::Pointer &header_chunk);

  int flush_header(void) {
    if (!broker_->dirty()) {
      // do not need to flush
      return 0;
    }
    auto header_chunk = broker_->get_chunk(ChunkBroker::CHUNK_TYPE_HEADER,
                                           ChunkBroker::kDefaultChunkSeqId);
    if (ailego_unlikely(!header_chunk)) {
      LOG_ERROR("get header chunk failed");
      return IndexError_Runtime;
    }
    size_t size = header_chunk->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }

    return 0;
  }

 private:
  HnswStreamerEntity(const HnswStreamerEntity &) = delete;
  HnswStreamerEntity &operator=(const HnswStreamerEntity &) = delete;
  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;

 private:
  IndexStreamer::Stats &stats_;
  HNSWHeader header_{};
  std::mutex mutex_{};
  size_t max_index_size_{0UL};
  uint32_t chunk_size_{kDefaultChunkSize};
  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};
  uint32_t node_index_mask_bits_{0U};
  uint32_t node_cnt_per_chunk_{0U};
  uint32_t node_index_mask_{0U};
  uint32_t neighbor_size_{0U};
  uint32_t upper_neighbor_size_{0U};
  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the
  //! following mask
  uint32_t upper_neighbor_mask_bits_{0U};
  uint32_t upper_neighbor_mask_{0U};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};
  bool use_key_info_map_{true};

  NIHashMapPointer upper_neighbor_index_{};

  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};
  HashMapPointer<key_t, node_id_t> keys_map_{};

  //! the chunks will be changed in searcher, so need mutable
  //! data chunk include: vector, key, level 0 neighbors
  mutable std::vector<Chunk::Pointer> node_chunks_{};

  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors
  mutable std::vector<Chunk::Pointer> upper_neighbor_chunks_{};

  ChunkBroker::Pointer broker_{};  // chunk broker
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(AUTO_DETECT_ARCH)
  foreach(FILE ${HNSW_RABITQ_FILES})
      set_source_files_properties(
          ${FILE}
          PROPERTIES
          COMPILE_FLAGS "${RABITQ_ARCH_FLAG}"
      )
  endforeach()
endif()

cc_library(
    NAME core_knn_hnsw_rabitq
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS core_framework rabitqlib sparsehash
    INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_algorithm.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_algorithm.h"
#include <chrono>
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

HnswRabitqAlgorithm::HnswRabitqAlgorithm(HnswRabitqEntity &entity)
    : entity_(entity),
      mt_(std::chrono::system_clock::now().time_since_epoch().count()),
      lock_pool_(kLockCnt) {}

int HnswRabitqAlgorithm::cleanup() {
  return 0;
}

int HnswRabitqAlgorithm::add_node(node_id_t id, level_t level,
                                  HnswRabitqContext *ctx) {
  spin_lock_.lock();

  // std::cout << "id: " << id << ", level: " << level << std::endl;

  auto cur_max_level = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    return 0;
  }
  spin_lock_.unlock();

  if (ailego_unlikely(level > cur_max_level)) {
    mutex_.lock();
    // re-check max level
    cur_max_level = entity_.cur_max_level();
    entry_point = entity_.entry_point();
    if (level <= cur_max_level) {
      mutex_.unlock();
    }
  }

  level_t cur_level = cur_max_level;
  ResultRecord dist = ctx->dist_calculator()(entry_point);
  for (; cur_level > level; --cur_level) {
    select_entry_point(cur_level, &entry_point, &dist, ctx);
  }

  for (; cur_level >= 0; --cur_level) {
    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),
                     ctx);
  }

  // add neighbors from down level to top level, to avoid upper level visible
  // to knn_search but the under layer level not ready
  for (cur_level = 0; cur_level <= level; ++cur_level) {
    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);
    ctx->level_topk(cur_level).clear();
  }

  if (ailego_unlikely(level > cur_max_level)) {
    spin_lock_.lock();
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    mutex_.unlock();
  }

  return 0;
}

//! select_entry_point on hnsw level, ef = 1
void HnswRabitqAlgorithm::select_entry_point(level_t level,
                                             node_id_t *entry_point,
                                             ResultRecord *dist,
                                             HnswRabitqContext *ctx) const {
  auto &entity = ctx->get_entity();
  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();
  while (true) {
    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }
    uint32_t size = neighbors.size();
    if (size == 0) {
      break;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
    int ret = dc.get_vector(&neighbors[0], size, neighbor_vec_blocks);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }

    bool find_closer = false;

    std::vector<float> dists(size);
    std::vector<const void *> neighbor_vecs(size);
    for (uint32_t i = 0; i < size; ++i) {
      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
    }

    dc.batch_dist(neighbor_vecs.data(), size, dists.data());

    for (uint32_t i = 0; i < size; ++i) {
      ResultRecord cur_dist = dists[i];

      if (cur_dist < *dist) {
        *entry_point = neighbors[i];
        *dist = cur_dist;
        find_closer = true;
      }
    }

    if (!find_closer) {
      break;
    }
  }

  return;
}

void HnswRabitqAlgorithm::add_neighbors(node_id_t id, level_t level,
                                        TopkHeap &topk_heap,
                                        HnswRabitqContext *ctx) {
  if (ailego_unlikely(topk_heap.size() == 0)) {
    return;
  }

  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();

  update_neighbors(dc, id, level, topk_heap);

  // reverse update neighbors
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    reverse_update_neighbors(dc, topk_heap[i].first, level, id,
                             topk_heap[i].second, ctx->update_heap());
  }

  return;
}

void HnswRabitqAlgorithm::search_neighbors(level_t level,
                                           node_id_t *entry_point,
                                           ResultRecord *dist, TopkHeap &topk,
                                           HnswRabitqContext *ctx) const {
  const auto &entity = ctx->get_entity();
  HnswRabitqAddDistCalculator &dc = ctx->dist_calculator();
  VisitFilter &visit = ctx->visit_filter();
  CandidateHeap &candidates = ctx->candidates();
  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
  if (ctx->filter().is_valid()) {
    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
  }

  candidates.clear();
  visit.clear();
  visit.set_visited(*entry_point);
  if (!filter(*entry_point)) {
    topk.emplace(*entry_point, *dist);
  }

  candidates.emplace(*entry_point, *dist);
  while (!candidates.empty() && !ctx->reach_scan_limit()) {
    auto top = candidates.begin();
    node_id_t main_node = top->first;
    ResultRecord main_dist = top->second;

    if (topk.full() && main_dist > topk[0].second) {
      break;
    }

    candidates.pop();
    const Neighbors neighbors = entity.get_neighbors(level, main_node);
    ailego_prefetch(neighbors.data);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }

    std::vector<node_id_t> neighbor_ids(neighbors.size());
    uint32_t size = 0;
    for (uint32_t i = 0; i < neighbors.size(); ++i) {
      node_id_t node = neighbors[i];
      if (visit.visited(node)) {
        if (ailego_unlikely(ctx->debugging())) {
          (*ctx->mutable_stats_visit_dup_cnt())++;
        }
        continue;
      }
      visit.set_visited(node);
      neighbor_ids[size++] = node;
    }
    if (size == 0) {
      continue;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_vec_blocks;
    int ret = dc.get_vector(neighbor_ids.data(), size, neighbor_vec_blocks);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }

    // do prefetch
    static constexpr node_id_t BATCH_SIZE = 12;
    static constexpr node_id_t PREFETCH_STEP = 2;
    for (uint32_t i = 0; i < std::min(BATCH_SIZE * PREFETCH_STEP, size); ++i) {
      ailego_prefetch(neighbor_vec_blocks[i].data());
    }
    // done

    std::vector<float> dists(size);
    std::vector<const void *> neighbor_vecs(size);

    for (uint32_t i = 0; i < size; ++i) {
      neighbor_vecs[i] = neighbor_vec_blocks[i].data();
    }

    dc.batch_dist(neighbor_vecs.data(), size, dists.data());

    for (uint32_t i = 0; i < size; ++i) {
      node_id_t node = neighbor_ids[i];
      ResultRecord cur_dist = dists[i];

      if ((!topk.full()) || cur_dist < topk[0].second) {
        candidates.emplace(node, cur_dist);
        // update entry_point for next level scan
        if (cur_dist < *dist) {
          *entry_point = node;
          *dist = cur_dist;
        }
        if (!filter(node)) {
          topk.emplace(node, cur_dist);
        }
      }  // end if
    }  // end for
  }  // while

  return;
}

void HnswRabitqAlgorithm::update_neighbors(HnswRabitqAddDistCalculator &dc,
                                           node_id_t id, level_t level,
                                           TopkHeap &topk_heap) {
  topk_heap.sort();

  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);
  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {
    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {
      entity_.update_neighbors(level, id, topk_heap);
      return;
    }
  }

  uint32_t cur_size = 0;
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    node_id_t cur_node = topk_heap[i].first;
    ResultRecord cur_node_dist = topk_heap[i].second;
    bool good = true;
    for (uint32_t j = 0; j < cur_size; ++j) {
      ResultRecord tmp_dist = dc.dist(cur_node, topk_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      topk_heap[cur_size].first = cur_node;
      topk_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  // when after-prune neighbor count is too seldom,
  // we use this strategy to make-up enough edges
  // not only just make-up out-degrees
  // we also make-up enough in-degrees
  uint32_t min_neighbors = entity_.min_neighbor_cnt();
  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();
       ++k) {
    bool exist = false;
    for (size_t j = 0; j < cur_size; ++j) {
      if (topk_heap[j].first == topk_heap[k].first) {
        exist = true;
        break;
      }
    }
    if (!exist) {
      topk_heap[cur_size].first = topk_heap[k].first;
      topk_heap[cur_size].second = topk_heap[k].second;
      cur_size++;
    }
  }

  topk_heap.resize(cur_size);
  entity_.update_neighbors(level, id, topk_heap);

  return;
}

void HnswRabitqAlgorithm::reverse_update_neighbors(
    HnswRabitqAddDistCalculator &dc, node_id_t id, level_t level,
    node_id_t link_id, ResultRecord dist, TopkHeap &update_heap) {
  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);

  uint32_t lock_idx = id & kLockMask;
  lock_pool_[lock_idx].lock();
  const Neighbors neighbors = entity_.get_neighbors(level, id);
  size_t size = neighbors.size();
  ailego_assert_with(size <= max_neighbor_cnt, "invalid neighbor size");
  if (size < max_neighbor_cnt) {
    entity_.add_neighbor(level, id, size, link_id);
    lock_pool_[lock_idx].unlock();
    return;
  }

  update_heap.emplace(link_id, dist);

  for (size_t i = 0; i < size; ++i) {
    node_id_t node = neighbors[i];
    ResultRecord cur_dist = dc.dist(id, node);
    update_heap.emplace(node, cur_dist);
  }

  //! TODO: optimize prune
  //! prune edges
  update_heap.sort();
  size_t cur_size = 0;
  for (size_t i = 0; i < update_heap.size(); ++i) {
    node_id_t cur_node = update_heap[i].first;
    ResultRecord cur_node_dist = update_heap[i].second;
    bool good = true;
    for (size_t j = 0; j < cur_size; ++j) {
      ResultRecord tmp_dist = dc.dist(cur_node, update_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      update_heap[cur_size].first = cur_node;
      update_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  update_heap.resize(cur_size);
  entity_.update_neighbors(level, id, update_heap);

  lock_pool_[lock_idx].unlock();

  update_heap.clear();

  return;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_algorithm.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <ailego/parallel/lock.h>
#include "hnsw_rabitq_context.h"
#include "hnsw_rabitq_dist_calculator.h"
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

//! hnsw graph algorithm implement
class HnswRabitqAlgorithm {
 public:
  typedef std::unique_ptr<HnswRabitqAlgorithm> UPointer;

 public:
  //! Constructor
  explicit HnswRabitqAlgorithm(HnswRabitqEntity &entity);

  //! Destructor
  ~HnswRabitqAlgorithm() = default;

  //! Cleanup HnswRabitqAlgorithm
  int cleanup();

  //! Add a node to hnsw graph
  //! @id:     the node unique id
  //! @level:  a node will be add to graph in each level [0, level]
  //! return 0 on success, or errCode in failure
  int add_node(node_id_t id, level_t level, HnswRabitqContext *ctx);

  //! Initiate HnswRabitqAlgorithm
  int init() {
    level_probas_.clear();
    double level_mult =
        1 / std::log(static_cast<double>(entity_.scaling_factor()));
    for (int level = 0;; level++) {
      // refers faiss get_random_level alg
      double proba =
          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));
      if (proba < 1e-9) {
        break;
      }
      level_probas_.push_back(proba);
    }

    return 0;
  }

  //! Generate a random level
  //! return graph level
  uint32_t get_random_level() const {
    // gen rand float (0, 1)
    double f = mt_() / static_cast<float>(mt_.max());
    for (size_t level = 0; level < level_probas_.size(); level++) {
      if (f < level_probas_[level]) {
        return level;
      }
      f -= level_probas_[level];
    }
    return level_probas_.size() - 1;
  }

 private:
  //! Select in upper layer to get entry point for next layer search
  void select_entry_point(level_t level, node_id_t *entry_point,
                          ResultRecord *dist, HnswRabitqContext *ctx) const;

  //! update node id neighbors from topkHeap, and reverse link is also updated
  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,
                     HnswRabitqContext *ctx);

  //! Given a node id and level, search the nearest neighbors in graph
  //! Note: the nearest neighbors result keeps in topk, and entry_point and
  //! dist will be updated to current level nearest node id and distance
  void search_neighbors(level_t level, node_id_t *entry_point,
                        ResultRecord *dist, TopkHeap &topk,
                        HnswRabitqContext *ctx) const;

  //! Update the node's neighbors
  void update_neighbors(HnswRabitqAddDistCalculator &dc, node_id_t id,
                        level_t level, TopkHeap &topk_heap);

  //! Checking linkId could be id's new neighbor, and add as neighbor if true
  //! @dc         distance calculator
  //! @updateHeap temporary heap in updating neighbors
  void reverse_update_neighbors(HnswRabitqAddDistCalculator &dc, node_id_t id,
                                level_t level, node_id_t link_id,
                                ResultRecord dist, TopkHeap &update_heap);

 private:
  HnswRabitqAlgorithm(const HnswRabitqAlgorithm &) = delete;
  HnswRabitqAlgorithm &operator=(const HnswRabitqAlgorithm &) = delete;

 private:
  static constexpr uint32_t kLockCnt{1U << 8};
  static constexpr uint32_t kLockMask{kLockCnt - 1U};

  HnswRabitqEntity &entity_;
  mutable std::mt19937 mt_{};
  std::vector<double> level_probas_{};

  mutable ailego::SpinMutex spin_lock_{};  // global spin lock
  std::mutex mutex_{};                     // global mutex
  // TODO: spin lock?
  std::vector<std::mutex> lock_pool_{};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_builder.h"
#include <cstdlib>
#include <iostream>
#include <thread>
#include <ailego/pattern/defer.h>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_memory.h"
#include "zvec/core/framework/index_meta.h"
#include "zvec/core/framework/index_provider.h"
#include "hnsw_rabitq_algorithm.h"
#include "hnsw_rabitq_entity.h"
#include "hnsw_rabitq_params.h"
#include "rabitq_converter.h"
#include "rabitq_params.h"
#include "rabitq_reformer.h"

namespace zvec {
namespace core {

HnswRabitqBuilder::HnswRabitqBuilder() {}

int HnswRabitqBuilder::init(const IndexMeta &meta,
                            const ailego::Params &params) {
  LOG_INFO("Begin HnswRabitqBuilder::init");

  meta_ = meta;
  auto params_copy = params;
  meta_.set_builder("HnswRabitqBuilder", HnswRabitqEntity::kRevision,
                    std::move(params_copy));

  size_t memory_quota = 0UL;
  params.get(PARAM_HNSW_RABITQ_BUILDER_MEMORY_QUOTA, &memory_quota);
  params.get(PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT, &thread_cnt_);
  params.get(PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);
  params.get(PARAM_HNSW_RABITQ_BUILDER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_RABITQ_BUILDER_CHECK_INTERVAL_SECS,
             &check_interval_secs_);

  params.get(PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT,
             &upper_max_neighbor_cnt_);
  float multiplier = HnswRabitqEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_RABITQ_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,
             &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR, &scaling_factor_);

  multiplier = HnswRabitqEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_RABITQ_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;

  if (ef_construction_ == 0) {
    ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0) {
    upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d]",
              PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),
              kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%d] must be <= [%s]-[%d]",
              PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT.c_str(),
              min_neighbor_cnt_,
              PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),
              upper_max_neighbor_cnt_);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0) {
    l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {
    LOG_ERROR("L0MaxNeighborCnt must be in range (0,%d)",
              HnswRabitqEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }
  if (thread_cnt_ == 0) {
    thread_cnt_ = std::thread::hardware_concurrency();
  }
  if (thread_cnt_ > std::thread::hardware_concurrency()) {
    LOG_WARN("[%s] greater than cpu cores %zu",
             PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT.c_str(),
             static_cast<size_t>(std::thread::hardware_concurrency()));
  }
  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("CreateMetric failed, name: %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("IndexMetric init failed, ret=%d", ret);
    return ret;
  }

  uint32_t total_bits = 0;
  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);
  if (total_bits == 0) {
    total_bits = kDefaultRabitqTotalBits;
  }
  if (total_bits < 1 || total_bits > 9) {
    LOG_ERROR("Invalid total_bits: %zu, must be in [1, 9]", (size_t)total_bits);
    return IndexError_InvalidArgument;
  }
  uint8_t ex_bits = total_bits - 1;
  entity_.set_ex_bits(ex_bits);

  uint32_t dimension = 0;
  params.get(PARAM_HNSW_RABITQ_GENERAL_DIMENSION, &dimension);
  if (dimension == 0) {
    LOG_ERROR("%s not set", PARAM_HNSW_RABITQ_GENERAL_DIMENSION.c_str());
    return IndexError_InvalidArgument;
  }
  if (dimension < kMinRabitqDimSize || dimension > kMaxRabitqDimSize) {
    LOG_ERROR("Invalid dimension: %u, must be in [%d, %d]", dimension,
              kMinRabitqDimSize, kMaxRabitqDimSize);
    return IndexError_InvalidArgument;
  }
  entity_.update_rabitq_params_and_vector_size(dimension);

  entity_.set_ef_construction(ef_construction_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_memory_quota(memory_quota);
  entity_.set_prune_cnt(prune_cnt);

  ret = entity_.init();
  if (ret != 0) {
    return ret;
  }

  alg_ = HnswRabitqAlgorithm::UPointer(new HnswRabitqAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  // Create and initialize RaBitQ converter
  converter_ = std::make_shared<RabitqConverter>();

  IndexMeta converter_meta = meta_;
  converter_meta.set_dimension(dimension);
  ret = converter_->init(converter_meta, params);
  if (ret != 0) {
    LOG_ERROR("Failed to initialize RabitqConverter: %d", ret);
    return ret;
  }

  state_ = BUILD_STATE_INITED;
  LOG_INFO(
      "End HnswRabitqBuilder::init, params: rawVectorSize=%u vectorSize=%zu "
      "efConstruction=%u "
      "l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u "
      "memoryQuota=%zu neighborPruneCnt=%zu metricName=%s ",
      meta_.element_size(), entity_.vector_size(), ef_construction_,
      l0_max_neighbor_cnt_, upper_max_neighbor_cnt_, scaling_factor_,
      memory_quota, prune_cnt, meta_.metric_name().c_str());

  return 0;
}

int HnswRabitqBuilder::cleanup(void) {
  LOG_INFO("Begin HnswRabitqBuilder::cleanup");

  l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;
  min_neighbor_cnt_ = 0;
  upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;
  ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;
  scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;
  check_interval_secs_ = kDefaultLogIntervalSecs;
  errcode_ = 0;
  error_ = false;
  entity_.cleanup();
  if (alg_) {
    alg_->cleanup();
  }
  meta_.clear();
  metric_.reset();
  stats_.clear_attributes();
  stats_.set_trained_count(0UL);
  stats_.set_built_count(0UL);
  stats_.set_dumped_count(0UL);
  stats_.set_discarded_count(0UL);
  stats_.set_trained_costtime(0UL);
  stats_.set_built_costtime(0UL);
  stats_.set_dumped_costtime(0UL);
  state_ = BUILD_STATE_INIT;

  LOG_INFO("End HnswRabitqBuilder::cleanup");

  return 0;
}

int HnswRabitqBuilder::train(IndexThreads::Pointer,
                             IndexHolder::Pointer holder) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswRabitqBuilder::train");
    return IndexError_NoReady;
  }

  if (!holder) {
    LOG_ERROR("Input holder is nullptr while training index");
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while training index");
    return IndexError_Mismatch;
  }
  LOG_INFO("Begin HnswRabitqBuilder::train");
  size_t trained_cost_time = 0;
  size_t trained_count = 0;

  int ret = train_converter_and_load_reformer(holder);
  if (ret != 0) {
    return ret;
  }

  if (metric_->support_train()) {
    auto start_time = ailego::Monotime::MilliSeconds();
    auto iter = holder->create_iterator();
    if (!iter) {
      LOG_ERROR("Create iterator for holder failed");
      return IndexError_Runtime;
    }
    while (iter->is_valid()) {
      ret = metric_->train(iter->data(), meta_.dimension());
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw build measure train failed, ret=%d", ret);
        return ret;
      }
      iter->next();
      ++trained_count;
    }
    trained_cost_time = ailego::Monotime::MilliSeconds() - start_time;
  }
  stats_.set_trained_count(trained_count);
  stats_.set_trained_costtime(trained_cost_time);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswRabitqBuilder::train");

  return 0;
}

int HnswRabitqBuilder::train_converter_and_load_reformer(
    IndexHolder::Pointer holder) {
  // Train converter (KMeans clustering)
  int ret = converter_->train(holder);
  if (ret != 0) {
    LOG_ERROR("Failed to train RabitqConverter: %d", ret);
    return ret;
  }
  auto memory_dumper = IndexFactory::CreateDumper("MemoryDumper");
  memory_dumper->init(ailego::Params());
  std::string file_id = ailego::StringHelper::Concat(
      "rabitq_converter_", ailego::Monotime::MilliSeconds(), rand());
  ret = memory_dumper->create(file_id);
  if (ret != 0) {
    LOG_ERROR("Failed to create memory dumper: %d", ret);
    return ret;
  }
  // Release memory
  AILEGO_DEFER([&file_id]() { IndexMemory::Instance()->remove(file_id); });
  ret = converter_->dump(memory_dumper);
  if (ret != 0) {
    LOG_ERROR("Failed to dump RabitqConverter: %d", ret);
    return ret;
  }
  ret = memory_dumper->close();
  if (ret != 0) {
    LOG_ERROR("Failed to close memory dumper: %d", ret);
    return ret;
  }

  reformer_ = std::make_shared<RabitqReformer>();
  ailego::Params reformer_params;
  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());
  ret = reformer_->init(reformer_params);
  if (ret != 0) {
    LOG_ERROR("Failed to initialize RabitqReformer: %d", ret);
    return ret;
  }
  auto memory_storage = IndexFactory::CreateStorage("MemoryReadStorage");
  ret = memory_storage->open(file_id, false);
  if (ret != 0) {
    LOG_ERROR("Failed to open memory storage: %d", ret);
    return ret;
  }
  ret = reformer_->load(memory_storage);
  if (ret != 0) {
    LOG_ERROR("Failed to load RabitqReformer: %d", ret);
    return ret;
  }
  return 0;
}

int HnswRabitqBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswRabitqBuilder::train");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswRabitqBuilder::train by trainer");

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswRabitqBuilder::train by trainer");

  return 0;
}

int HnswRabitqBuilder::build(IndexThreads::Pointer threads,
                             IndexHolder::Pointer holder) {
  if (state_ != BUILD_STATE_TRAINED) {
    LOG_ERROR("Train the index before HnswRabitqBuilder::build");
    return IndexError_NoReady;
  }

  if (!holder) {
    LOG_ERROR("Input holder is nullptr while building index");
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while building index");
    return IndexError_Mismatch;
  }
  IndexProvider::Pointer provider =
      std::dynamic_pointer_cast<IndexProvider>(holder);
  if (!provider) {
    LOG_ERROR("Rabitq builder expect IndexProvider");
    return IndexError_InvalidArgument;
  }

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);
  }

  auto start_time = ailego::Monotime::MilliSeconds();
  LOG_INFO("Begin HnswRabitqBuilder::build");

  if (holder->count() != static_cast<size_t>(-1)) {
    LOG_DEBUG("HnswRabitqBuilder holder documents count %lu", holder->count());
    int ret = entity_.reserve_space(holder->count());
    if (ret != 0) {
      LOG_ERROR("HnswBuilde reserver space failed");
      return ret;
    }
  }
  auto iter = holder->create_iterator();
  if (!iter) {
    LOG_ERROR("Create iterator for holder failed");
    return IndexError_Runtime;
  }
  int ret;
  error_ = false;
  IndexQueryMeta ometa;
  ometa.set_meta(holder->data_type(), holder->dimension());
  while (iter->is_valid()) {
    const void *vec = iter->data();
    // quantize vector
    std::string converted_vector;
    IndexQueryMeta converted_meta;
    ret = reformer_->convert(vec, ometa, &converted_vector, &converted_meta);
    if (ret != 0) {
      LOG_ERROR("Rabitq hnsw convert failed, ret=%d", ret);
      return ret;
    }


    level_t level = alg_->get_random_level();
    node_id_t id;

    if (converted_vector.size() != entity_.vector_size()) {
      LOG_ERROR(
          "Converted vector size %zu is not equal to entity vector size %zu",
          converted_vector.size(), entity_.vector_size());
      return IndexError_InvalidArgument;
    }
    ret = entity_.add_vector(level, iter->key(), converted_vector.data(), &id);
    if (ailego_unlikely(ret != 0)) {
      return ret;
    }
    iter->next();
  }

  LOG_INFO("Finished save vector, start build graph...");

  auto task_group = threads->make_group();
  if (!task_group) {
    LOG_ERROR("Failed to create task group");
    return IndexError_Runtime;
  }

  std::atomic<node_id_t> finished{0};
  for (size_t i = 0; i < threads->count(); ++i) {
    task_group->submit(ailego::Closure ::New(this, &HnswRabitqBuilder::do_build,
                                             i, threads->count(), provider,
                                             &finished));
  }

  while (!task_group->is_finished()) {
    std::unique_lock<std::mutex> lk(mutex_);
    cond_.wait_until(lk, std::chrono::system_clock::now() +
                             std::chrono::seconds(check_interval_secs_));
    if (error_.load(std::memory_order_acquire)) {
      LOG_ERROR("Failed to build index while waiting finish");
      return errcode_;
    }
    LOG_INFO("Built cnt %zu, finished percent %.3f%%",
             static_cast<size_t>(finished.load()),
             finished.load() * 100.0f / entity_.doc_cnt());
  }
  if (error_.load(std::memory_order_acquire)) {
    LOG_ERROR("Failed to build index while waiting finish");
    return errcode_;
  }
  task_group->wait_finish();

  stats_.set_built_count(finished.load());
  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);

  state_ = BUILD_STATE_BUILT;
  LOG_INFO("End HnswRabitqBuilder::build with RaBitQ quantization");
  return 0;
}

void HnswRabitqBuilder::do_build(node_id_t idx, size_t step_size,
                                 IndexProvider::Pointer provider,
                                 std::atomic<node_id_t> *finished) {
  AILEGO_DEFER([&]() {
    std::lock_guard<std::mutex> latch(mutex_);
    cond_.notify_one();
  });
  HnswRabitqContext *ctx = new (std::nothrow) HnswRabitqContext(
      meta_.dimension(), metric_,
      std::shared_ptr<HnswRabitqEntity>(&entity_, [](HnswRabitqEntity *) {}));
  if (ailego_unlikely(ctx == nullptr)) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to create context");
      errcode_ = IndexError_NoMemory;
    }
    return;
  }
  HnswRabitqContext::Pointer auto_ptr(ctx);
  ctx->set_provider(std::move(provider));
  ctx->set_max_scan_num(entity_.doc_cnt());
  int ret = ctx->init(HnswRabitqContext::kBuilderContext);
  if (ret != 0) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to init context");
      errcode_ = IndexError_Runtime;
    }
    return;
  }

  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {
    ctx->reset_query(ctx->dist_calculator().get_vector(id));
    ret = alg_->add_node(id, entity_.get_level(id), ctx);
    if (ailego_unlikely(ret != 0)) {
      if (!error_.exchange(true)) {
        LOG_ERROR("Hnsw graph add node failed");
        errcode_ = ret;
      }
      return;
    }
    ctx->clear();
    (*finished)++;
  }
}

int HnswRabitqBuilder::dump(const IndexDumper::Pointer &dumper) {
  if (state_ != BUILD_STATE_BUILT) {
    LOG_INFO("Build the index before HnswRabitqBuilder::dump");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswRabitqBuilder::dump");

  meta_.set_searcher("HnswRabitqSearcher", HnswRabitqEntity::kRevision,
                     ailego::Params());
  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  // Dump RaBitQ centroids first
  if (converter_) {
    ret = converter_->dump(dumper);
    if (ret != 0) {
      LOG_ERROR("Failed to dump RabitqConverter: %d", ret);
      return ret;
    }
    LOG_INFO("RaBitQ centroids dumped: %zu bytes, cost %zu ms",
             converter_->stats().dumped_size(),
             static_cast<size_t>(converter_->stats().dumped_costtime()));
  }

  ret = entity_.dump(dumper);
  if (ret != 0) {
    LOG_ERROR("HnswRabitqBuilder dump index failed");
    return ret;
  }

  stats_.set_dumped_count(entity_.doc_cnt());
  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);

  LOG_INFO("End HnswRabitqBuilder::dump");
  return 0;
}


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/parallel/thread_pool.h>
#include "zvec/core/framework/index_builder.h"
#include "zvec/core/framework/index_converter.h"
#include "zvec/core/framework/index_reformer.h"
#include "hnsw_rabitq_algorithm.h"
#include "hnsw_rabitq_builder_entity.h"

namespace zvec {
namespace core {

class HnswRabitqBuilder : public IndexBuilder {
 public:
  //! Constructor
  HnswRabitqBuilder();

  //! Initialize the builder
  virtual int init(const IndexMeta &meta,
                   const ailego::Params &params) override;

  //! Cleanup the builder
  virtual int cleanup(void) override;

  //! Train the data
  virtual int train(IndexThreads::Pointer,
                    IndexHolder::Pointer holder) override;

  //! Train the data
  virtual int train(const IndexTrainer::Pointer &trainer) override;


  //! Build the index
  virtual int build(IndexThreads::Pointer threads,
                    IndexHolder::Pointer holder) override;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

 private:
  void do_build(node_id_t idx, size_t step_size,
                IndexProvider::Pointer provider,
                std::atomic<node_id_t> *finished);

  int train_converter_and_load_reformer(IndexHolder::Pointer holder);

  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;
  constexpr static uint32_t kMaxNeighborCnt = 65535;

 private:
  enum BUILD_STATE {
    BUILD_STATE_INIT = 0,
    BUILD_STATE_INITED = 1,
    BUILD_STATE_TRAINED = 2,
    BUILD_STATE_BUILT = 3
  };

  HnswRabitqBuilderEntity entity_{};
  HnswRabitqAlgorithm::UPointer alg_;  // impl graph algorithm
  uint32_t thread_cnt_{0};
  uint32_t min_neighbor_cnt_{0};
  uint32_t upper_max_neighbor_cnt_{
      HnswRabitqEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t l0_max_neighbor_cnt_{HnswRabitqEntity::kDefaultL0MaxNeighborCnt};
  uint32_t ef_construction_{HnswRabitqEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswRabitqEntity::kDefaultScalingFactor};
  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};

  int errcode_{0};
  std::atomic_bool error_{false};
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};
  IndexConverter::Pointer converter_{};  // RaBitQ converter
  IndexReformer::Pointer reformer_{};    // RaBitQ reformer
  std::mutex mutex_{};
  std::condition_variable cond_{};
  Stats stats_{};

  BUILD_STATE state_{BUILD_STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_builder_entity.h"
#include <iostream>
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswRabitqBuilderEntity::HnswRabitqBuilderEntity() {
  update_ep_and_level(kInvalidNodeId, 0U);
}

int HnswRabitqBuilderEntity::cleanup() {
  memory_quota_ = 0UL;
  neighbors_size_ = 0U;
  upper_neighbors_size_ = 0U;
  padding_size_ = 0U;
  vectors_buffer_.clear();
  keys_buffer_.clear();
  neighbors_buffer_.clear();
  upper_neighbors_buffer_.clear();
  neighbors_index_.clear();

  vectors_buffer_.shrink_to_fit();
  keys_buffer_.shrink_to_fit();
  neighbors_buffer_.shrink_to_fit();
  upper_neighbors_buffer_.shrink_to_fit();
  neighbors_index_.shrink_to_fit();

  this->HnswRabitqEntity::cleanup();

  return 0;
}

int HnswRabitqBuilderEntity::init() {
  size_t size = vector_size();

  //! aligned size to 32
  set_node_size(AlignSize(size));
  //! if node size is aligned to 1k, the build performance will downgrade
  if (node_size() % 1024 == 0) {
    set_node_size(AlignSize(node_size() + 1));
  }

  padding_size_ = node_size() - size;

  neighbors_size_ = neighbors_size();
  upper_neighbors_size_ = upper_neighbors_size();

  return 0;
}

int HnswRabitqBuilderEntity::reserve_space(size_t docs) {
  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +
                                sizeof(NeighborIndex) * docs >
                            memory_quota_)) {
    return IndexError_NoMemory;
  }

  vectors_buffer_.reserve(node_size() * docs);
  keys_buffer_.reserve(sizeof(key_t) * docs);
  neighbors_buffer_.reserve(neighbors_size_ * docs);
  neighbors_index_.reserve(docs);

  return 0;
}

int HnswRabitqBuilderEntity::add_vector(level_t level, key_t key,
                                        const void *vec, node_id_t *id) {
  if (memory_quota_ > 0 &&
      (vectors_buffer_.capacity() + keys_buffer_.capacity() +
       neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +
       neighbors_index_.capacity() * sizeof(NeighborIndex)) > memory_quota_) {
    LOG_ERROR("Add vector failed, used memory exceed quota, cur_doc=%zu",
              static_cast<size_t>(doc_cnt()));
    return IndexError_NoMemory;
  }

  vectors_buffer_.append(reinterpret_cast<const char *>(vec), vector_size());
  vectors_buffer_.append(padding_size_, '\0');
  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));

  // init level 0 neighbors
  neighbors_buffer_.append(neighbors_size_, '\0');

  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);

  // init upper layer neighbors
  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
    upper_neighbors_buffer_.append(upper_neighbors_size_, '\0');
  }

  *id = (*mutable_doc_cnt())++;

  return 0;
}

key_t HnswRabitqBuilderEntity::get_key(node_id_t id) const {
  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +
                                           id * sizeof(key_t)));
}

const void *HnswRabitqBuilderEntity::get_vector(node_id_t id) const {
  return vectors_buffer_.data() + id * node_size();
}

int HnswRabitqBuilderEntity::get_vector(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector(id);
  block.reset((void *)vec);
  return 0;
}

int HnswRabitqBuilderEntity::get_vector(const node_id_t *ids, uint32_t count,
                                        const void **vecs) const {
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();
  }

  return 0;
}

int HnswRabitqBuilderEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  std::vector<const void *> vecs(count);
  get_vector(ids, count, vecs.data());
  for (uint32_t i = 0; i < count; ++i) {
    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

const Neighbors HnswRabitqBuilderEntity::get_neighbors(level_t level,
                                                       node_id_t id) const {
  const NeighborsHeader *hd = get_neighbor_header(level, id);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswRabitqBuilderEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  for (size_t i = 0; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }
  hd->neighbor_cnt = neighbors.size();

  // std::cout << "id: " << id << ", neighbour, id: ";
  // for (size_t i = 0; i < neighbors.size(); ++i) {
  //   if (i == neighbors.size()-1)
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     std::endl;
  //   else
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     ", id: ";
  // }

  return 0;
}

void HnswRabitqBuilderEntity::add_neighbor(level_t level, node_id_t id,
                                           uint32_t /*size*/,
                                           node_id_t neighbor_id) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;

  return;
}

int HnswRabitqBuilderEntity::dump(const IndexDumper::Pointer &dumper) {
  key_t *keys =
      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));
  auto ret =
      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/internal/platform.h>
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

class HnswRabitqBuilderEntity : public HnswRabitqEntity {
 public:
  //! Add vector and key to hnsw entity, and local id will be saved to id
  virtual int add_vector(level_t level, key_t key, const void *vec,
                         node_id_t *id) override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const override;

  //! Batch get vectors feature data by keys
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get the node id's neighbors on graph level
  const NeighborsHeader *get_neighbor_header(level_t level,
                                             node_id_t id) const {
    if (level == 0) {
      return reinterpret_cast<const NeighborsHeader *>(
          neighbors_buffer_.data() + neighbors_size_ * id);
    } else {
      size_t offset = neighbors_index_[id].offset;
      return reinterpret_cast<const NeighborsHeader *>(
          upper_neighbors_buffer_.data() + offset +
          (level - 1) * upper_neighbors_size_);
    }
  }

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  //! Replace node id in level's neighbors
  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors)
      override;

  //! add a neighbor to id in graph level
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Dump the hnsw graph to dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Cleanup the entity
  virtual int cleanup(void) override;

 public:
  //! Constructor
  HnswRabitqBuilderEntity();

  //! Get the node graph level by id
  level_t get_level(node_id_t id) const {
    return neighbors_index_[id].level;
  }

  //! Init builerEntity
  int init();

  //! reserve buffer space for documents
  //! @param  docs    number of documents
  int reserve_space(size_t docs);

  //! Set memory quota params
  inline void set_memory_quota(size_t memory_quota) {
    memory_quota_ = memory_quota;
  }

  //! Get neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get upper neighbors size
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

 public:
  HnswRabitqBuilderEntity(const HnswRabitqBuilderEntity &) = delete;
  HnswRabitqBuilderEntity &operator=(const HnswRabitqBuilderEntity &) = delete;

 private:
  friend class HnswRabitqSearcherEntity;
  //! class internal used only
  struct NeighborIndex {
    NeighborIndex(size_t off, level_t l) : offset(off), level(l) {}
    uint64_t offset : 48;
    uint64_t level : 16;
  };

  std::string vectors_buffer_{};          // aligned vectors
  std::string keys_buffer_{};             // aligned vectors
  std::string neighbors_buffer_{};        // level 0 neighbors buffer
  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer

  std::string sparse_data_buffer_{};  // aligned spase data buffer
  size_t sparse_data_offset_{0};      //

  // upper layer offset + level in upper_neighbors_buffer_
  std::vector<NeighborIndex> neighbors_index_{};
  size_t memory_quota_{0UL};
  size_t neighbors_size_{0U};        // level 0 neighbors size
  size_t upper_neighbors_size_{0U};  // level 0 neighbors size
  size_t padding_size_{};            // padding size for each vector element
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_chunk.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_chunk.h"
#include <chrono>
#include <random>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/utility/time_helper.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_helper.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_streamer.h"

namespace zvec {
namespace core {

int HnswRabitqChunkBroker::init_storage(size_t chunk_size) {
  chunk_meta_.clear();
  chunk_meta_.chunk_size = chunk_size;
  chunk_meta_.create_time = ailego::Realtime::Seconds();
  stats_.set_create_time(chunk_meta_.create_time);
  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  //! alloc meta chunk
  size_t size = sizeof(HnswChunkMeta);
  size = (size + page_mask_) & (~page_mask_);
  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return ret;
  }
  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);
  if (ailego_unlikely(!chunk_meta_segment_)) {
    LOG_ERROR("Get meta segment failed");
    return IndexError_Runtime;
  }

  //! update meta info and write to storage
  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;
  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
    return IndexError_WriteData;
  }

  return 0;
}

int HnswRabitqChunkBroker::load_storage(size_t chunk_size) {
  IndexStorage::MemoryBlock data_block;
  size_t size = chunk_meta_segment_->read(0UL, data_block,
                                          chunk_meta_segment_->data_size());
  if (size != sizeof(HnswChunkMeta)) {
    LOG_ERROR("Invalid hnsw meta chunk, read size=%zu chunk size=%zu", size,
              chunk_meta_segment_->data_size());
    return IndexError_InvalidFormat;
  }
  std::memcpy(&chunk_meta_, data_block.data(), size);
  if (chunk_meta_.chunk_size != chunk_size) {
    LOG_ERROR(
        "Params hnsw chunk size=%zu mismatch from previous %zu "
        "in index",
        chunk_size, (size_t)chunk_meta_.chunk_size);
    return IndexError_Mismatch;
  }

  *stats_.mutable_check_point() = stg_->check_point();
  stats_.set_revision_id(chunk_meta_.revision_id);
  stats_.set_update_time(chunk_meta_.update_time);
  stats_.set_create_time(chunk_meta_.create_time);

  char create_time[32];
  char update_time[32];
  ailego::Realtime::Gmtime(chunk_meta_.create_time, "%Y-%m-%d %H:%M:%S",
                           create_time, sizeof(create_time));
  ailego::Realtime::Gmtime(chunk_meta_.update_time, "%Y-%m-%d %H:%M:%S",
                           update_time, sizeof(update_time));
  LOG_DEBUG(
      "Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu "
      "upperNeighborChunks=%zu revisionId=%zu "
      "createTime=%s updateTime=%s",
      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],
      (size_t)chunk_meta_.revision_id, create_time, update_time);

  return 0;
}

int HnswRabitqChunkBroker::open(IndexStorage::Pointer stg,
                                size_t max_index_size, size_t chunk_size,
                                bool check_crc) {
  if (ailego_unlikely(stg_)) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }
  stg_ = std::move(stg);
  if (stg_->isHugePage()) {
    page_mask_ = ailego::MemoryHelper::HugePageSize() - 1;
  } else {
    page_mask_ = ailego::MemoryHelper::PageSize() - 1;
  }
  check_crc_ = check_crc;
  max_chunks_size_ = max_index_size;
  dirty_ = false;

  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  chunk_meta_segment_ = stg_->get(segment_id);
  if (!chunk_meta_segment_) {
    LOG_DEBUG("Create new index");
    return init_storage(chunk_size);
  }

  return load_storage(chunk_size);
}

int HnswRabitqChunkBroker::close(void) {
  flush(0UL);

  stg_.reset();
  check_crc_ = false;
  dirty_ = false;

  return 0;
}

int HnswRabitqChunkBroker::flush(uint64_t checkpoint) {
  ailego_assert_with(chunk_meta_segment_, "invalid meta segment");

  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  size_t size =
      chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
  }

  stg_->refresh(checkpoint);
  int ret = stg_->flush();
  if (ret == 0) {
    (*stats_.mutable_check_point()) = checkpoint;
  } else {
    LOG_ERROR("Storage flush failed for %s", IndexError::What(ret));
  }
  return ret;
}

std::pair<int, Chunk::Pointer> HnswRabitqChunkBroker::alloc_chunk(
    int type, uint64_t seq_id, size_t size) {
  ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");

  Chunk::Pointer chunk;
  if (ailego_unlikely(!stg_)) {
    LOG_ERROR("Init storage first");
    return std::make_pair(IndexError_Uninitialized, chunk);
  }

  //! check exist a empty chunk with the same name
  chunk = get_chunk(type, seq_id);
  if (chunk) {
    if (ailego_unlikely(chunk->capacity() == size &&
                        chunk->data_size() == 0UL)) {
      LOG_ERROR("Exist invalid chunk size %zu, expect size %zu",
                chunk->capacity(), size);
      chunk.reset();
      return std::make_pair(IndexError_Runtime, chunk);
    }
    return std::make_pair(0, chunk);
  }
  //! align to page size
  size = (size + page_mask_) & (~page_mask_);
  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {
    LOG_ERROR("No space to new a chunk, curIndexSize=%zu allocSize=%zu",
              (size_t)chunk_meta_.total_size, size);
    return std::make_pair(IndexError_IndexFull, chunk);
  }

  std::string segment_id = make_segment_id(type, seq_id);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return std::make_pair(ret, chunk);
  }
  chunk_meta_.chunk_cnts[type] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;

  size = chunk_meta_segment_->write(0UL, &chunk_meta_, sizeof(HnswChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswChunkMeta))) {
    LOG_ERROR("Storage append segment failed, wsize=%zu", size);
  }

  chunk = get_chunk(type, seq_id);
  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);
}

Chunk::Pointer HnswRabitqChunkBroker::get_chunk(int type,
                                                uint64_t seq_id) const {
  std::string segment_id = make_segment_id(type, seq_id);
  return stg_->get(segment_id);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_chunk.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <atomic>
#include <cstddef>
#include <mutex>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/string_helper.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_storage.h"
#include "zvec/core/framework/index_streamer.h"

namespace zvec {
namespace core {

using Chunk = IndexStorage::Segment;

class HnswRabitqChunkBroker {
 public:
  typedef std::shared_ptr<HnswRabitqChunkBroker> Pointer;

  enum CHUNK_TYPE {
    CHUNK_TYPE_HEADER = 1,
    CHUNK_TYPE_META = 2,
    CHUNK_TYPE_NODE = 3,
    CHUNK_TYPE_UPPER_NEIGHBOR = 4,
    CHUNK_TYPE_NEIGHBOR_INDEX = 5,
    CHUNK_TYPE_SPARSE_NODE = 6,
    CHUNK_TYPE_MAX = 8
  };
  static constexpr size_t kDefaultChunkSeqId = 0UL;

  HnswRabitqChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {}

  //! Open storage
  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,
           bool check_crc);

  int close(void);

  int flush(uint64_t checkpoint);

  //! alloc a new chunk with size, not thread-safe
  std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id,
                                             size_t size);

  //! alloc a new chunk with chunk size
  inline std::pair<int, Chunk::Pointer> alloc_chunk(int type, uint64_t seq_id) {
    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);
  }

  Chunk::Pointer get_chunk(int type, uint64_t seq_id) const;

  inline size_t get_chunk_cnt(int type) const {
    ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");
    return chunk_meta_.chunk_cnts[type];
  }

  inline bool dirty(void) const {
    return dirty_;
  }

  inline void mark_dirty(void) {
    if (!dirty_) {
      dirty_ = true;
      chunk_meta_.revision_id += 1;
      stats_.set_revision_id(chunk_meta_.revision_id);
    }
  }

  const IndexStorage::Pointer storage(void) const {
    return stg_;
  }

 private:
  HnswRabitqChunkBroker(const HnswRabitqChunkBroker &) = delete;
  HnswRabitqChunkBroker &operator=(const HnswRabitqChunkBroker &) = delete;

  struct HnswChunkMeta {
    HnswChunkMeta(void) {
      memset(this, 0, sizeof(HnswChunkMeta));
    }
    void clear() {
      memset(this, 0, sizeof(HnswChunkMeta));
    }

    uint64_t chunk_cnts[CHUNK_TYPE_MAX];
    uint64_t chunk_size;   // size of per chunk
    uint64_t total_size;   // total size of allocated chunk
    uint64_t revision_id;  // index revision
    uint64_t create_time;
    uint64_t update_time;
    uint64_t reserved[3];
  };

  static_assert(sizeof(HnswChunkMeta) % 32 == 0,
                "HnswChunkMeta must be aligned with 32 bytes");

  //! Init the storage after open an empty index
  int init_storage(size_t chunk_size);

  //! Load index from storage
  int load_storage(size_t chunk_size);

  static inline const std::string make_segment_id(int type, uint64_t seq_id) {
    return "HnswT" + ailego::StringHelper::ToString(type) + "S" +
           ailego::StringHelper::ToString(seq_id);
  }

 private:
  IndexStreamer::Stats &stats_;
  HnswChunkMeta chunk_meta_{};
  size_t page_mask_{0UL};
  size_t max_chunks_size_{0UL};
  IndexStorage::Pointer stg_{};
  IndexStorage::Segment::Pointer chunk_meta_segment_{};
  bool check_crc_{false};
  bool dirty_{false};  // set as true if index is modified , the flag
                       // will not be cleared even if flushed
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_context.h"
#include <chrono>
#include "hnsw_rabitq_params.h"

namespace zvec {
namespace core {

HnswRabitqContext::HnswRabitqContext(size_t dimension,
                                     const IndexMetric::Pointer &metric,
                                     const HnswRabitqEntity::Pointer &entity)
    : IndexContext(metric),
      entity_(entity),
      add_dc_(entity_.get(), metric, dimension) {}

HnswRabitqContext::HnswRabitqContext(const IndexMetric::Pointer &metric,
                                     const HnswRabitqEntity::Pointer &entity)
    : IndexContext(metric), entity_(entity), add_dc_(entity_.get(), metric) {}

HnswRabitqContext::~HnswRabitqContext() {
  visit_filter_.destroy();
}

int HnswRabitqContext::init(ContextType type) {
  int ret;
  uint32_t doc_cnt;

  type_ = type;

  switch (type) {
    case kBuilderContext:
      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      break;

    case kSearcherContext:
      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,
                               negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      break;

    case kStreamerContext:
      // maxScanNum is unknown if inited from streamer, so the docCnt may
      // change. we need to compute maxScanNum by scan ratio, and preserve
      // max_doc_cnt space from visit filter
      doc_cnt = entity_->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }

      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);

      check_need_adjuct_ctx();
      break;

    default:
      LOG_ERROR("Init context failed");
      return IndexError_Runtime;
  }

  return 0;
}

int HnswRabitqContext::update(const ailego::Params &params) {
  auto update_visit_filter_param = [&]() {
    bool need_update = false;
    std::string p;
    switch (type_) {
      case kSearcherContext:
        p = PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE;
        break;
      case kStreamerContext:
        p = PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE;
        break;
    }

    if (params.has(p)) {
      bool bf_enabled;
      params.get(p, &bf_enabled);
      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {
        need_update = true;
        filter_mode_ =
            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
      }
    }

    float prob = negative_probability_;
    p.clear();
    switch (type_) {
      case kSearcherContext:
        p = PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
      case kStreamerContext:
        p = PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
    }
    params.get(p, &prob);
    if (filter_mode_ == VisitFilter::BloomFilter &&
        std::abs(prob - negative_probability_) > 1e-6) {
      need_update = true;
    }
    if (need_update) {
      visit_filter_.destroy();
      int max_doc_cnt = 0;
      if (type_ == kSearcherContext) {
        max_doc_cnt = entity_->doc_cnt();
      } else {
        max_doc_cnt = reserve_max_doc_cnt_;
      }
      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,
                                   negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
    }
    return 0;
  };

  switch (type_) {
    case kSearcherContext:
      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_EF)) {
        params.get(PARAM_HNSW_RABITQ_SEARCHER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }

      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO)) {
        params.get(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
        max_scan_num_ =
            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());
        max_scan_num_ = std::max(10000U, max_scan_num_);
      }

      if (params.has(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    case kStreamerContext:
      if (params.has(PARAM_HNSW_RABITQ_STREAMER_EF)) {
        params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }
      params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);
      params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
      params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
      params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
        LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
                  PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO.c_str());
        return IndexError_InvalidArgument;
      }
      if (max_scan_limit_ < min_scan_limit_) {
        LOG_ERROR("[%s] must be >= [%s]",
                  PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT.c_str(),
                  PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT.c_str());
        return IndexError_InvalidArgument;
      }

      if (params.has(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    default:
      LOG_ERROR("update context failed, type=%zu", static_cast<size_t>(type_));
      return IndexError_Runtime;
  }
}

int HnswRabitqContext::update_context(ContextType type, const IndexMeta &meta,
                                      const IndexMetric::Pointer &metric,
                                      const HnswRabitqEntity::Pointer &entity,
                                      uint32_t magic_num) {
  uint32_t doc_cnt;

  if (ailego_unlikely(type != type_)) {
    LOG_ERROR(
        "HnswRabitqContext doesn't support shared by different type, "
        "src=%u dst=%u",
        type_, type);
    return IndexError_Unsupported;
  }

  magic_ = kInvalidMgic;

  // TODO: support change filter mode?
  switch (type) {
    case kBuilderContext:
      LOG_ERROR("BuildContext doesn't support update");
      return IndexError_NotImplemented;

    case kSearcherContext:
      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    case kStreamerContext:
      doc_cnt = entity->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      update_heap_.limit(entity->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    default:
      LOG_ERROR("update context failed");
      return IndexError_Runtime;
  }

  entity_ = entity;
  dc().update(entity_.get(), metric, meta.dimension());
  magic_ = magic_num;
  level_topks_.clear();

  return 0;
}

void HnswRabitqContext::fill_random_to_topk_full(void) {
  static std::mt19937 mt(
      std::chrono::system_clock::now().time_since_epoch().count());
  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);
  std::function<node_id_t()> gen;
  node_id_t seqid;
  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };
  if (this->filter().is_valid()) {
    myfilter = [&](node_id_t id) {
      return this->filter()(entity_->get_key(id));
    };
  }

  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {
    gen = [&](void) { return dt(mt); };
  } else {
    // If topk limit is big value, gen sequential id from an random initial
    seqid = dt(mt);
    gen = [&](void) {
      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);
      return seqid;
    };
  }

  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {
    const auto id = gen();
    if (!visit_filter_.visited(id) && !myfilter(id)) {
      visit_filter_.set_visited(id);
      topk_heap_.emplace(id, dc().dist(id));
    }
  }
  return;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/logger/logger.h>
#include "utility/visit_filter.h"
#include "zvec/core/framework/index_context.h"
#include "zvec/core/framework/index_provider.h"
#include "hnsw_rabitq_dist_calculator.h"
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

class HnswRabitqContext : public IndexContext {
 public:
  //! Index Context Pointer
  typedef std::unique_ptr<HnswRabitqContext> Pointer;

  enum ContextType {
    kUnknownContext = 0,
    kSearcherContext = 1,
    kBuilderContext = 2,
    kStreamerContext = 3
  };

  //! Construct
  HnswRabitqContext(size_t dimension, const IndexMetric::Pointer &metric,
                    const HnswRabitqEntity::Pointer &entity);

  //! Construct
  HnswRabitqContext(const IndexMetric::Pointer &metric,
                    const HnswRabitqEntity::Pointer &entity);

  //! Destructor
  virtual ~HnswRabitqContext();

 public:
  //! Set topk of search result
  virtual void set_topk(uint32_t val) override {
    topk_ = val;
    topk_heap_.limit(std::max(val, ef_));
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(void) const override {
    return results_[0];
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(size_t idx) const override {
    return results_[idx];
  }

  //! Retrieve result object for output
  virtual IndexDocumentList *mutable_result(size_t idx) override {
    ailego_assert_with(idx < results_.size(), "invalid idx");
    return &results_[idx];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(
      size_t idx) const override {
    return group_results_[idx];
  }

  virtual uint32_t magic(void) const override {
    return magic_;
  }

  //! Set mode of debug
  virtual void set_debug_mode(bool enable) override {
    debug_mode_ = enable;
  }

  //! Retrieve mode of debug
  virtual bool debug_mode(void) const override {
    return this->debugging();
  }

  //! Retrieve string of debug
  virtual std::string debug_string(void) const override {
    char buf[4096];
    size_t size = snprintf(
        buf, sizeof(buf),
        "scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,
        stats_visit_dup_cnt_);
    return std::string(buf, size);
  }

  //! Update the parameters of context
  virtual int update(const ailego::Params &params) override;

 public:
  //! Init context
  int init(ContextType type);

  //! Update context, the context may be shared by different searcher/streamer
  int update_context(ContextType type, const IndexMeta &meta,
                     const IndexMetric::Pointer &metric,
                     const HnswRabitqEntity::Pointer &entity,
                     uint32_t magic_num);

  inline const HnswRabitqEntity &get_entity() const {
    return *entity_;
  }

  inline void resize_results(size_t size) {
    if (group_by_search()) {
      group_results_.resize(size);
    } else {
      results_.resize(size);
    }
  }

  inline void topk_to_result() {
    return topk_to_result(0);
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_result(uint32_t idx) {
    if (group_by_search()) {
      topk_to_group_result(idx);
    } else {
      topk_to_single_result(idx);
    }
  }

  inline void topk_to_single_result(uint32_t idx) {
    if (force_padding_topk_ && !topk_heap_.full() &&
        topk_heap_.size() < entity_->doc_cnt()) {
      this->fill_random_to_topk_full();
    }
    if (ailego_unlikely(topk_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));
    topk_heap_.sort();
    results_[idx].clear();

    for (int i = 0; i < size; ++i) {
      auto score = topk_heap_[i].second;
      if (score.est_dist > this->threshold()) {
        break;
      }

      node_id_t id = topk_heap_[i].first;
      if (fetch_vector_) {
        results_[idx].emplace_back(entity_->get_key(id), score.est_dist, id,
                                   entity_->get_vector(id));
      } else {
        results_[idx].emplace_back(entity_->get_key(id), score.est_dist, id);
      }
    }

    return;
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_group_result(uint32_t idx) {
    ailego_assert_with(idx < group_results_.size(), "invalid idx");

    group_results_[idx].clear();

    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;
    std::vector<std::pair<std::string, ResultRecord>> best_score_in_groups;
    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();
         itr++) {
      const std::string &group_id = (*itr).first;
      auto &heap = (*itr).second;
      heap.sort();

      if (heap.size() > 0) {
        ResultRecord best_score = heap[0].second;
        best_score_in_groups.push_back(std::make_pair(group_id, best_score));
      }
    }

    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
              [](const std::pair<std::string, ResultRecord> &a,
                 const std::pair<std::string, ResultRecord> &b) -> int {
                return a.second < b.second;
              });

    // truncate to group num
    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();
         ++i) {
      const std::string &group_id = best_score_in_groups[i].first;

      group_topk_list.emplace_back(
          std::make_pair(group_id, group_topk_heaps_[group_id]));
    }

    group_results_[idx].resize(group_topk_list.size());

    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {
      const std::string &group_id = group_topk_list[i].first;
      group_results_[idx][i].set_group_id(group_id);

      uint32_t size = std::min(
          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));

      for (uint32_t j = 0; j < size; ++j) {
        auto score = group_topk_list[i].second[j].second;
        if (score > this->threshold()) {
          break;
        }

        node_id_t id = group_topk_list[i].second[j].first;

        if (fetch_vector_) {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score.est_dist, id,
              entity_->get_vector(id));
        } else {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score.est_dist, id);
        }
      }
    }
  }

  inline void reset_query(const void *query) {
    if (auto query_preprocess_func = index_metric_->get_query_preprocess_func();
        query_preprocess_func != nullptr) {
      size_t dim = dc().dimension();
      preprocess_buffer_.resize(dim);
      memcpy(preprocess_buffer_.data(), query, dim);
      query_preprocess_func(preprocess_buffer_.data(), dim);
      query = preprocess_buffer_.data();
    }

    dc().reset_query(query);
    dc().clear_compare_cnt();
    query_ = query;
  }

  inline HnswRabitqAddDistCalculator &dist_calculator() {
    return dc();
  }

  inline TopkHeap &topk_heap() {
    return topk_heap_;
  }

  inline TopkHeap &update_heap() {
    return update_heap_;
  }

  inline VisitFilter &visit_filter() {
    return visit_filter_;
  }

  inline CandidateHeap &candidates() {
    return candidates_;
  }

  inline void set_max_scan_num(uint32_t max_scan_num) {
    max_scan_num_ = max_scan_num;
  }

  inline void set_max_scan_limit(uint32_t max_scan_limit) {
    max_scan_limit_ = max_scan_limit;
  }

  inline void set_min_scan_limit(uint32_t min_scan_limit) {
    min_scan_limit_ = min_scan_limit;
  }

  inline void set_ef(uint32_t v) {
    ef_ = v;
  }

  inline void set_filter_mode(uint32_t v) {
    filter_mode_ = v;
  }

  inline void set_filter_negative_probability(float v) {
    negative_probability_ = v;
  }

  inline void set_max_scan_ratio(float v) {
    max_scan_ratio_ = v;
  }

  virtual void set_magic(uint32_t v) {
    magic_ = v;
  }

  virtual void set_force_padding_topk(bool v) {
    force_padding_topk_ = v;
  }

  void set_bruteforce_threshold(uint32_t v) override {
    bruteforce_threshold_ = v;
  }

  inline uint32_t get_bruteforce_threshold() const {
    return bruteforce_threshold_;
  }

  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  bool fetch_vector() const override {
    return fetch_vector_;
  }

  //! Reset context
  void reset(void) override {
    set_filter(nullptr);
    reset_threshold();
    set_fetch_vector(false);
    set_group_params(0, 0);
    reset_group_by();
  }

  inline std::map<std::string, TopkHeap> &group_topk_heaps() {
    return group_topk_heaps_;
  }

  inline TopkHeap &level_topk(int level) {
    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {
      int cur_level = level_topks_.size();
      level_topks_.resize(level + 1);
      for (; cur_level <= level; ++cur_level) {
        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),
                                    entity_->ef_construction());
        level_topks_[cur_level].clear();
        level_topks_[cur_level].limit(heap_size);
      }
    }

    return level_topks_[level];
  }

  inline void check_need_adjuct_ctx(void) {
    check_need_adjuct_ctx(entity_->doc_cnt());
  }

  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {
    if (cur_doc > kMaxReserveDocCnt) {
      return kMaxReserveDocCnt;
    } else if (cur_doc < kMinReserveDocCnt) {
      return kMinReserveDocCnt;
    }
    return cur_doc;
  }

  //! candidates heap and visitfilter need to resize as doc cnt growing up
  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {
    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {
      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {
        reserve_max_doc_cnt_ =
            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);
      }
      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);
      max_scan_num_ = max_scan_cnt;
      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);
      candidates_.clear();
      candidates_.limit(max_scan_num_);
    }
  }

  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {
    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;
    if (max_scan < min_scan_limit_) {
      max_scan = min_scan_limit_;
    } else if (max_scan > max_scan_limit_) {
      max_scan = max_scan_limit_;
    }
    return max_scan;
  }

  inline size_t get_scan_num() const {
    return dc().compare_cnt();
  }

  inline uint64_t reach_scan_limit() const {
    return dc().compare_cnt() >= max_scan_num_;
  }

  inline bool error() const {
    return dc().error();
  }

  inline void clear() {
    add_dc_.clear();
    if (ailego_unlikely(this->debugging())) {
      stats_get_neighbors_cnt_ = 0u;
      stats_get_vector_cnt_ = 0u;
      stats_visit_dup_cnt_ = 0u;
    }
    // do not clear results_ for the next query will need it
    for (auto &it : results_) {
      it.clear();
    }
  }

  uint32_t *mutable_stats_get_neighbors() {
    return &stats_get_neighbors_cnt_;
  }

  uint32_t *mutable_stats_get_vector() {
    return &stats_get_vector_cnt_;
  }

  uint32_t *mutable_stats_visit_dup_cnt() {
    return &stats_visit_dup_cnt_;
  }

  inline bool debugging(void) const {
    return debug_mode_;
  }

  inline void update_dist_caculator_distance(
      const IndexMetric::MatrixDistance &distance,
      const IndexMetric::MatrixBatchDistance &batch_distance) {
    dc().update_distance(distance, batch_distance);
  }

  //! Get topk
  inline uint32_t topk() const override {
    return topk_;
  }

  //! Get group topk
  inline uint32_t group_topk() const {
    return group_topk_;
  }

  //! Get group num
  inline uint32_t group_num() const {
    return group_num_;
  }

  //! Get if group by search
  inline bool group_by_search() {
    return group_num_ > 0;
  }

  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;

    topk_ = group_topk_ * group_num_;

    topk_heap_.limit(std::max(topk_, ef_));

    group_topk_heaps_.clear();
  }

  void set_provider(IndexProvider::Pointer provider) {
    add_dc_.set_provider(std::move(provider));
  }

  const void *query() const {
    return query_;
  }

 private:
  inline HnswRabitqAddDistCalculator &dc() {
    return add_dc_;
  }

  inline const HnswRabitqAddDistCalculator &dc() const {
    return add_dc_;
  }

 private:
  // Filling random nodes if topk not full
  void fill_random_to_topk_full(void);

  constexpr static uint32_t kTriggerReserveCnt = 4096UL;
  constexpr static uint32_t kMinReserveDocCnt = 4096UL;
  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;
  constexpr static uint32_t kInvalidMgic = -1U;

 private:
  HnswRabitqEntity::Pointer entity_;
  HnswRabitqAddDistCalculator add_dc_;
  IndexMetric::Pointer metric_;

  bool debug_mode_{false};
  bool force_padding_topk_{false};
  uint32_t max_scan_num_{0};
  uint32_t max_scan_limit_{0};
  uint32_t min_scan_limit_{0};
  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};
  uint32_t topk_{0};
  uint32_t group_topk_{0};
  uint32_t filter_mode_{VisitFilter::ByteMap};
  float negative_probability_{HnswRabitqEntity::kDefaultBFNegativeProbability};
  uint32_t ef_{HnswRabitqEntity::kDefaultEf};
  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};
  uint32_t magic_{0U};
  std::vector<IndexDocumentList> results_{};
  std::vector<IndexGroupDocumentList> group_results_{};
  TopkHeap topk_heap_{};
  TopkHeap update_heap_{};
  std::vector<TopkHeap> level_topks_{};
  CandidateHeap candidates_{};
  VisitFilter visit_filter_{};
  uint32_t bruteforce_threshold_{};
  bool fetch_vector_{false};

  uint32_t group_num_{0};
  std::map<std::string, TopkHeap> group_topk_heaps_{};

  uint32_t type_{kUnknownContext};
  //! debug stats info
  uint32_t stats_get_neighbors_cnt_{0u};
  uint32_t stats_get_vector_cnt_{0u};
  uint32_t stats_visit_dup_cnt_{0u};
  std::string preprocess_buffer_;
  const void *query_{nullptr};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.cc
================================================
// Copyright 2025-present the centaurdb project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.h"
#include "zvec/core/framework/index_error.h"

namespace zvec::core {

int HnswRabitqAddDistCalculator::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  for (uint32_t i = 0; i < count; ++i) {
    const node_id_t id = ids[i];
    key_t key = entity_->get_key(id);
    if (key == kInvalidKey) {
      return IndexError_NoExist;
    }
    IndexStorage::MemoryBlock block;
    int ret = provider_->get_vector(key, block);
    if (ret != 0) {
      return ret;
    }
    vec_blocks.push_back(std::move(block));
  }
  return 0;
}

}  // namespace zvec::core


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_dist_calculator.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "zvec/core/framework/index_meta.h"
#include "zvec/core/framework/index_metric.h"
#include "zvec/core/framework/index_provider.h"
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

//! HnswRabitqAddDistCalculator is only used for index construction
class HnswRabitqAddDistCalculator {
 public:
  typedef std::shared_ptr<HnswRabitqAddDistCalculator> Pointer;

 public:
  enum DistType {
    DIST_NONE = 0,
    DIST_DENSE = 1,
    DIST_HYBRID = 2,
    DIST_SPARSE = 3
  };

 public:
  //! Constructor
  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,
                              const IndexMetric::Pointer &metric, uint32_t dim)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(nullptr),
        dim_(dim),
        compare_cnt_(0) {}

  //! Constructor
  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,
                              const IndexMetric::Pointer &metric, uint32_t dim,
                              const void *query)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(query),
        dim_(dim),
        compare_cnt_(0) {}

  //! Constructor
  HnswRabitqAddDistCalculator(const HnswRabitqEntity *entity,
                              const IndexMetric::Pointer &metric)
      : entity_(entity),
        distance_(metric->distance()),
        batch_distance_(metric->batch_distance()),
        query_(nullptr),
        dim_(0),
        compare_cnt_(0) {}

  void update(const HnswRabitqEntity *entity,
              const IndexMetric::Pointer &metric) {
    entity_ = entity;
    distance_ = metric->distance();
    batch_distance_ = metric->batch_distance();
  }

  void update(const HnswRabitqEntity *entity,
              const IndexMetric::Pointer &metric, uint32_t dim) {
    entity_ = entity;
    distance_ = metric->distance();
    batch_distance_ = metric->batch_distance();
    dim_ = dim;
  }

  inline void update_distance(
      const IndexMetric::MatrixDistance &distance,
      const IndexMetric::MatrixBatchDistance &batch_distance) {
    distance_ = distance;
    batch_distance_ = batch_distance;
  }

  //! Reset query vector data
  inline void reset_query(const void *query) {
    error_ = false;
    query_ = query;
  }

  //! Returns distance
  inline dist_t dist(const void *vec_lhs, const void *vec_rhs) {
    if (ailego_unlikely(vec_lhs == nullptr || vec_rhs == nullptr)) {
      LOG_ERROR("Nullptr of dense vector");
      error_ = true;
      return 0.0f;
    }

    float score{0.0f};

    distance_(vec_lhs, vec_rhs, dim_, &score);

    return score;
  }

  //! Returns distance between query and vec.
  inline dist_t dist(const void *vec) {
    compare_cnt_++;

    return dist(vec, query_);
  }

  //! Return distance between query and node id.
  inline dist_t dist(node_id_t id) {
    compare_cnt_++;

    const void *feat = get_vector(id);
    if (ailego_unlikely(feat == nullptr)) {
      LOG_ERROR("Get nullptr vector, id=%u", id);
      error_ = true;
      return 0.0f;
    }

    return dist(feat, query_);
  }

  //! Return dist node lhs between node rhs
  inline dist_t dist(node_id_t lhs, node_id_t rhs) {
    compare_cnt_++;

    const void *feat = get_vector(lhs);
    const void *query = get_vector(rhs);
    if (ailego_unlikely(feat == nullptr || query == nullptr)) {
      LOG_ERROR("Get nullptr vector");
      error_ = true;
      return 0.0f;
    }

    return dist(feat, query);
  }

  dist_t operator()(const void *vec) {
    return dist(vec);
  }

  dist_t operator()(id_t i) {
    return dist(i);
  }

  dist_t operator()(id_t lhs, id_t rhs) {
    return dist(lhs, rhs);
  }

  void batch_dist(const void **vecs, size_t num, dist_t *distances) {
    compare_cnt_++;

    batch_distance_(vecs, query_, num, dim_, distances);
  }

  inline dist_t batch_dist(node_id_t id) {
    compare_cnt_++;

    const void *feat = get_vector(id);
    if (ailego_unlikely(feat == nullptr)) {
      LOG_ERROR("Get nullptr vector, id=%u", id);
      error_ = true;
      return 0.0f;
    }
    dist_t score = 0;
    batch_distance_(&feat, query_, 1, dim_, &score);

    return score;
  }

  inline void clear() {
    compare_cnt_ = 0;
    error_ = false;
  }

  inline void clear_compare_cnt() {
    compare_cnt_ = 0;
  }

  inline bool error() const {
    return error_;
  }

  //! Get distances compute times
  inline uint32_t compare_cnt() const {
    return compare_cnt_;
  }

  inline uint32_t dimension() const {
    return dim_;
  }

  void set_provider(IndexProvider::Pointer provider) {
    provider_ = std::move(provider);
  }

  int get_vector(const node_id_t *ids, uint32_t count,
                 std::vector<IndexStorage::MemoryBlock> &vec_blocks) const;

  const void *get_vector(node_id_t id) const {
    key_t key = entity_->get_key(id);
    if (key == kInvalidKey) {
      return nullptr;
    }
    return provider_->get_vector(key);
  }

 private:
  HnswRabitqAddDistCalculator(const HnswRabitqAddDistCalculator &) = delete;
  HnswRabitqAddDistCalculator &operator=(const HnswRabitqAddDistCalculator &) =
      delete;

 private:
  const HnswRabitqEntity *entity_;
  IndexMetric::MatrixDistance distance_;
  IndexMetric::MatrixBatchDistance batch_distance_;

  const void *query_;
  uint32_t dim_;

  uint32_t compare_cnt_;        // record distance compute times
  uint32_t compare_cnt_batch_;  // record batch distance compute time
  bool error_{false};

  // get raw vector
  IndexProvider::Pointer provider_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_entity.h"
#include <rabitqlib/index/query.hpp>
#include "utility/sparse_utility.h"
#include "zvec/core/framework/index_stats.h"

namespace zvec {
namespace core {

const std::string HnswRabitqEntity::kGraphHeaderSegmentId = "graph.header";
const std::string HnswRabitqEntity::kGraphFeaturesSegmentId = "graph.features";
const std::string HnswRabitqEntity::kGraphKeysSegmentId = "graph.keys";
const std::string HnswRabitqEntity::kGraphNeighborsSegmentId =
    "graph.neighbors";
const std::string HnswRabitqEntity::kGraphOffsetsSegmentId = "graph.offsets";
const std::string HnswRabitqEntity::kGraphMappingSegmentId = "graph.mapping";
const std::string HnswRabitqEntity::kHnswHeaderSegmentId = "hnsw.header";
const std::string HnswRabitqEntity::kHnswNeighborsSegmentId = "hnsw.neighbors";
const std::string HnswRabitqEntity::kHnswOffsetsSegmentId = "hnsw.offsets";

void HnswRabitqEntity::update_rabitq_params_and_vector_size(
    uint32_t dimension) {
  uint32_t padded_dim = ((dimension + 63) / 64) * 64;
  header_.graph.padded_dim = padded_dim;
  // BinDataMap layout: bin_code (padded_dim/8) + f_add + f_rescale + f_error
  header_.graph.size_bin_data =
      rabitqlib::BinDataMap<float>::data_bytes(padded_dim);
  // ExDataMap layout: ex_code (padded_dim*ex_bits/8) + f_add_ex + f_rescale_ex
  header_.graph.size_ex_data = rabitqlib::ExDataMap<float>::data_bytes(
      padded_dim, header_.graph.ex_bits);
  // quantized vector format: cluster_id + bin_data + ex_data
  header_.graph.vector_size =
      sizeof(uint32_t) + size_bin_data() + size_ex_data();
}

int HnswRabitqEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                                        size_t data_size,
                                        size_t *padding_size) {
  *padding_size = AlignSize(data_size) - data_size;
  if (*padding_size == 0) {
    return 0;
  }

  std::string padding(*padding_size, '\0');
  if (dumper->write(padding.data(), *padding_size) != *padding_size) {
    LOG_ERROR("Append padding failed, size %zu", *padding_size);
    return IndexError_WriteData;
  }
  return 0;
}

int64_t HnswRabitqEntity::dump_segment(const IndexDumper::Pointer &dumper,
                                       const std::string &segment_id,
                                       const void *data, size_t size) const {
  size_t len = dumper->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect: %zu, actual: %zu",
              segment_id.c_str(), size, len);
    return IndexError_WriteData;
  }

  size_t padding_size = AlignSize(size) - size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %zu", padding_size);
      return IndexError_WriteData;
    }
  }

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  int ret = dumper->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return ret;
  }

  return len + padding_size;
}

int64_t HnswRabitqEntity::dump_header(const IndexDumper::Pointer &dumper,
                                      const HNSWHeader &hd) const {
  //! dump basic graph header. header is aligned and does not need padding
  int64_t graph_hd_size =
      dump_segment(dumper, kGraphHeaderSegmentId, &hd.graph, hd.graph.size);
  if (graph_hd_size < 0) {
    return graph_hd_size;
  }

  //! dump basic graph header. header is aligned and does not need padding
  int64_t hnsw_hd_size =
      dump_segment(dumper, kHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);
  if (hnsw_hd_size < 0) {
    return hnsw_hd_size;
  }

  return graph_hd_size + hnsw_hd_size;
}

void HnswRabitqEntity::reshuffle_vectors(
    const std::function<level_t(node_id_t)> & /*get_level*/,
    std::vector<node_id_t> * /*n2o_mapping*/,
    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {
  // TODO
  return;
}

int64_t HnswRabitqEntity::dump_mapping_segment(
    const IndexDumper::Pointer &dumper, const key_t *keys) const {
  std::vector<node_id_t> mapping(doc_cnt());

  std::iota(mapping.begin(), mapping.end(), 0U);
  std::sort(mapping.begin(), mapping.end(),
            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });

  size_t size = mapping.size() * sizeof(node_id_t);

  return dump_segment(dumper, kGraphMappingSegmentId, mapping.data(), size);
}

int64_t HnswRabitqEntity::dump_segments(
    const IndexDumper::Pointer &dumper, key_t *keys,
    const std::function<level_t(node_id_t)> &get_level) const {
  HNSWHeader dump_hd(header());

  dump_hd.graph.node_size = AlignSize(vector_size());

  std::vector<node_id_t> n2o_mapping;  // map new id to origin id
  std::vector<node_id_t> o2n_mapping;  // map origin id to new id
  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);
  if (!o2n_mapping.empty()) {
    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];
  }

  //! Dump header
  int64_t hd_size = dump_header(dumper, dump_hd);
  if (hd_size < 0) {
    return hd_size;
  }

  //! Dump vectors
  int64_t vecs_size = dump_vectors(dumper, n2o_mapping);
  if (vecs_size < 0) {
    return vecs_size;
  }

  //! Dump neighbors
  auto neighbors_size =
      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);
  if (neighbors_size < 0) {
    return neighbors_size;
  }
  //! free memory
  n2o_mapping = std::vector<node_id_t>();
  o2n_mapping = std::vector<node_id_t>();

  //! Dump keys
  size_t key_segment_size = doc_cnt() * sizeof(key_t);
  int64_t keys_size =
      dump_segment(dumper, kGraphKeysSegmentId, keys, key_segment_size);
  if (keys_size < 0) {
    return keys_size;
  }

  //! Dump mapping
  int64_t mapping_size = dump_mapping_segment(dumper, keys);
  if (mapping_size < 0) {
    return mapping_size;
  }

  return hd_size + keys_size + vecs_size + neighbors_size + mapping_size;
}

int64_t HnswRabitqEntity::dump_vectors(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping) const {
  size_t vector_dump_size = vector_size();

  size_t padding_size = AlignSize(vector_dump_size) - vector_dump_size;

  char padding[padding_size];
  memset(padding, 0, sizeof(padding));
  const void *data = nullptr;
  uint32_t crc = 0U;
  size_t vecs_size = 0UL;

  //! dump vectors
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    data = get_vector(reorder_mapping.empty() ? id : reorder_mapping[id]);
    if (ailego_unlikely(!data)) {
      return IndexError_ReadData;
    }
    size_t len = dumper->write(data, vector_size());
    if (len != vector_size()) {
      LOG_ERROR("Dump vectors failed, write=%zu expect=%zu", len,
                vector_size());
      return IndexError_WriteData;
    }

    crc = ailego::Crc32c::Hash(data, vector_size(), crc);
    vecs_size += vector_size();

    if (padding_size == 0) {
      continue;
    }

    len = dumper->write(padding, padding_size);
    if (len != padding_size) {
      LOG_ERROR("Dump vectors failed, write=%zu expect=%zu", len, padding_size);
      return IndexError_WriteData;
    }
    crc = ailego::Crc32c::Hash(padding, padding_size, crc);
    vecs_size += padding_size;
  }

  int ret = dumper->append(kGraphFeaturesSegmentId, vecs_size, 0UL, crc);
  if (ret != 0) {
    LOG_ERROR("Dump vectors segment meta failed, ret %d", ret);
    return ret;
  }

  return vecs_size;
}

int64_t HnswRabitqEntity::dump_graph_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<GraphNeighborMeta> graph_meta;
  graph_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  node_id_t mapping[l0_neighbor_cnt()];

  uint32_t min_neighbor_count = 10000;
  uint32_t max_neighbor_count = 0;
  size_t sum_neighbor_count = 0;

  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    const Neighbors neighbors =
        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);
    ailego_assert_with(!!neighbors.data, "invalid neighbors");
    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),
                       "invalid neighbors");

    uint32_t neighbor_count = neighbors.size();
    if (neighbor_count < min_neighbor_count) {
      min_neighbor_count = neighbor_count;
    }
    if (neighbor_count > max_neighbor_count) {
      max_neighbor_count = neighbor_count;
    }
    sum_neighbor_count += neighbor_count;

    graph_meta.emplace_back(offset, neighbor_count);
    size_t size = neighbors.size() * sizeof(node_id_t);
    const node_id_t *data = &neighbors[0];
    if (!neighbor_mapping.empty()) {
      for (node_id_t i = 0; i < neighbors.size(); ++i) {
        mapping[i] = neighbor_mapping[neighbors[i]];
      }
      data = mapping;
    }
    if (dumper->write(data, size) != size) {
      LOG_ERROR("Dump graph neighbor id=%zu failed, size %zu",
                static_cast<size_t>(id), size);
      return IndexError_WriteData;
    }
    crc = ailego::Crc32c::Hash(data, size, crc);
    offset += size;
  }

  uint32_t average_neighbor_count = 0;
  if (doc_cnt() > 0) {
    average_neighbor_count = sum_neighbor_count / doc_cnt();
  }
  LOG_INFO(
      "Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] "
      "average_neighbor_count[%u]",
      min_neighbor_count, max_neighbor_count, average_neighbor_count);

  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }
  ret = dumper->append(kGraphNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d",
              kGraphNeighborsSegmentId.c_str(), ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len = dump_segment(dumper, kGraphOffsetsSegmentId, graph_meta.data(),
                          graph_meta.size() * sizeof(GraphNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

int64_t HnswRabitqEntity::dump_upper_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::function<level_t(node_id_t)> &get_level,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<HnswNeighborMeta> hnsw_meta;
  hnsw_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  node_id_t buffer[upper_neighbor_cnt() + 1];
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];
    auto level = get_level(new_id);
    if (level == 0) {
      hnsw_meta.emplace_back(0U, 0U);
      continue;
    }
    hnsw_meta.emplace_back(offset, level);
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
      const Neighbors neighbors = get_neighbors(cur_level, new_id);
      ailego_assert_with(!!neighbors.data, "invalid neighbors");
      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),
                         "invalid neighbors");
      memset(buffer, 0, sizeof(buffer));
      buffer[0] = neighbors.size();
      if (neighbor_mapping.empty()) {
        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));
      } else {
        for (node_id_t i = 0; i < neighbors.size(); ++i) {
          buffer[i + 1] = neighbor_mapping[neighbors[i]];
        }
      }
      if (dumper->write(buffer, sizeof(buffer)) != sizeof(buffer)) {
        LOG_ERROR("Dump graph neighbor id=%zu failed, size %zu",
                  static_cast<size_t>(id), sizeof(buffer));
        return IndexError_WriteData;
      }
      crc = ailego::Crc32c::Hash(buffer, sizeof(buffer), crc);
      offset += sizeof(buffer);
    }
  }
  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }

  ret = dumper->append(kHnswNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d", kHnswNeighborsSegmentId.c_str(),
              ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len = dump_segment(dumper, kHnswOffsetsSegmentId, hnsw_meta.data(),
                          hnsw_meta.size() * sizeof(HnswNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <execinfo.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/container/heap.h>
#include <zvec/ailego/logger/logger.h>
#include "zvec/core/framework/index_dumper.h"
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_storage.h"

namespace zvec {
namespace core {

using node_id_t = uint32_t;
using key_t = uint64_t;
using level_t = int32_t;
using dist_t = float;
struct EstimateRecord {
  float ip_x0_qr;
  float est_dist;
  float low_dist;

  bool operator<(const EstimateRecord &other) const {
    return this->est_dist < other.est_dist;
  }
};
struct ResultRecord {
  float est_dist;
  float low_dist;
  ResultRecord() : est_dist(0.0f), low_dist(0.0f) {}
  ResultRecord(float dist) : est_dist(dist), low_dist(dist) {}
  explicit ResultRecord(const EstimateRecord &other)
      : est_dist(other.est_dist), low_dist(other.low_dist) {}
  ResultRecord(float est_dist, float low_dist)
      : est_dist(est_dist), low_dist(low_dist) {}
  bool operator<(const ResultRecord &other) const {
    return this->est_dist < other.est_dist;
  }
  bool operator<=(const ResultRecord &other) const {
    return this->est_dist <= other.est_dist;
  }
  bool operator>(const ResultRecord &other) const {
    return this->est_dist > other.est_dist;
  }
};
using TopkHeap = ailego::KeyValueHeap<node_id_t, ResultRecord>;
using CandidateHeap =
    ailego::KeyValueHeap<node_id_t, ResultRecord, std::greater<ResultRecord>>;
constexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);
constexpr key_t kInvalidKey = static_cast<key_t>(-1);
class DistCalculator;

struct GraphHeader {
  uint32_t size;
  uint32_t version;
  uint32_t graph_type;
  uint32_t doc_count;
  uint32_t vector_size;
  uint32_t node_size;
  uint32_t l0_neighbor_count;
  uint32_t prune_type;
  uint32_t prune_neighbor_count;
  uint32_t ef_construction;
  uint32_t options;
  uint32_t min_neighbor_count;
  uint32_t padded_dim;
  uint32_t size_bin_data;
  uint32_t size_ex_data;
  uint8_t ex_bits;
  uint8_t reserved_[4067];
};

static_assert(sizeof(GraphHeader) % 32 == 0,
              "GraphHeader must be aligned with 32 bytes");

//! Hnsw upper neighbor header
struct HnswHeader {
  uint32_t size;      // header size
  uint32_t revision;  // current total docs of the graph
  uint32_t upper_neighbor_count;
  uint32_t ef_construction;
  uint32_t scaling_factor;
  uint32_t max_level;
  uint32_t entry_point;
  uint32_t options;
  uint8_t reserved_[30];
};

static_assert(sizeof(HnswHeader) % 32 == 0,
              "GraphHeader must be aligned with 32 bytes");

//! Hnsw common header and upper neighbor header
struct HNSWHeader {
  HNSWHeader() {
    clear();
  }

  HNSWHeader(const HNSWHeader &header) {
    memcpy(this, &header, sizeof(header));
  }

  HNSWHeader &operator=(const HNSWHeader &header) {
    memcpy(this, &header, sizeof(header));
    return *this;
  }

  //! Reset state to zero, and the params is untouched
  void inline reset() {
    graph.doc_count = 0U;
    hnsw.entry_point = kInvalidNodeId;
    hnsw.max_level = 0;
  }

  //! Clear all fields to init value
  void inline clear() {
    memset(this, 0, sizeof(HNSWHeader));
    hnsw.entry_point = kInvalidNodeId;
    graph.size = sizeof(GraphHeader);
    hnsw.size = sizeof(HnswHeader);
  }

  size_t l0_neighbor_cnt() const {
    return graph.l0_neighbor_count;
  }

  size_t upper_neighbor_cnt() const {
    return hnsw.upper_neighbor_count;
  }

  size_t vector_size() const {
    return graph.vector_size;
  }

  uint8_t ex_bits() const {
    return graph.ex_bits;
  }

  uint32_t padded_dim() const {
    return graph.padded_dim;
  }

  size_t ef_construction() const {
    return graph.ef_construction;
  }

  size_t scaling_factor() const {
    return hnsw.scaling_factor;
  }

  size_t neighbor_prune_cnt() const {
    return graph.prune_neighbor_count;
  }

  node_id_t entry_point() const {
    return hnsw.entry_point;
  }

  node_id_t doc_cnt() const {
    return graph.doc_count;
  }

  GraphHeader graph;
  HnswHeader hnsw;
};

struct NeighborsHeader {
  uint32_t neighbor_cnt;
  node_id_t neighbors[0];
};

struct Neighbors {
  Neighbors() : cnt{0}, data{nullptr} {}

  Neighbors(uint32_t cnt_in, const node_id_t *data_in)
      : cnt{cnt_in}, data{data_in} {}

  Neighbors(IndexStorage::MemoryBlock &&mem_block)
      : neighbor_block{std::move(mem_block)} {
    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());
    cnt = hd->neighbor_cnt;
    data = hd->neighbors;
  }

  size_t size(void) const {
    return cnt;
  }

  const node_id_t &operator[](size_t idx) const {
    return data[idx];
  }

  uint32_t cnt;
  const node_id_t *data;
  IndexStorage::MemoryBlock neighbor_block;
};

//! level 0 neighbors offset
struct GraphNeighborMeta {
  GraphNeighborMeta(size_t o, size_t cnt) : offset(o), neighbor_cnt(cnt) {}

  uint64_t offset : 48;
  uint64_t neighbor_cnt : 16;
};

//! hnsw upper neighbors meta
struct HnswNeighborMeta {
  HnswNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}

  uint64_t offset : 48;  // offset = idx * upper neighors size
  uint64_t level : 16;
};

class HnswRabitqEntity {
 public:
  //! Constructor
  HnswRabitqEntity() {}

  //! Constructor
  HnswRabitqEntity(const HNSWHeader &hd) {
    header_ = hd;
  }

  //! Destructor
  virtual ~HnswRabitqEntity() {}

  //! HnswRabitqEntity Pointerd;
  typedef std::shared_ptr<HnswRabitqEntity> Pointer;

  //! Get max neighbor size of graph level
  inline size_t neighbor_cnt(level_t level) const {
    return level == 0 ? header_.graph.l0_neighbor_count
                      : header_.hnsw.upper_neighbor_count;
  }

  //! get max neighbor size of graph level 0
  inline size_t l0_neighbor_cnt() const {
    return header_.graph.l0_neighbor_count;
  }

  //! get min neighbor size of graph
  inline size_t min_neighbor_cnt() const {
    return header_.graph.min_neighbor_count;
  }

  //! get upper neighbor size of graph level other than 0
  inline size_t upper_neighbor_cnt() const {
    return header_.hnsw.upper_neighbor_count;
  }

  //! Get current total doc of the hnsw graph
  inline node_id_t *mutable_doc_cnt() {
    return &header_.graph.doc_count;
  }

  inline node_id_t doc_cnt() const {
    return header_.graph.doc_count;
  }

  //! Get hnsw graph scaling params
  inline size_t scaling_factor() const {
    return header_.hnsw.scaling_factor;
  }

  //! Get prune_size
  inline size_t prune_cnt() const {
    return header_.graph.prune_neighbor_count;
  }

  //! Current entity of top level graph
  inline node_id_t entry_point() const {
    return header_.hnsw.entry_point;
  }

  //! Current max graph level
  inline level_t cur_max_level() const {
    return header_.hnsw.max_level;
  }

  //! Retrieve index vector size
  size_t vector_size() const {
    return header_.graph.vector_size;
  }

  //! Retrieve node size
  size_t node_size() const {
    return header_.graph.node_size;
  }

  //! Retrieve ef constuction
  size_t ef_construction() const {
    return header_.graph.ef_construction;
  }

  uint8_t ex_bits() const {
    return header_.graph.ex_bits;
  }

  uint32_t padded_dim() const {
    return header_.graph.padded_dim;
  }

  uint32_t size_bin_data() const {
    return header_.graph.size_bin_data;
  }

  uint32_t size_ex_data() const {
    return header_.graph.size_ex_data;
  }

  void update_rabitq_params_and_vector_size(uint32_t dimension);

  void set_ex_bits(uint8_t ex_bits) {
    header_.graph.ex_bits = ex_bits;
  }

  void set_prune_cnt(size_t v) {
    header_.graph.prune_neighbor_count = v;
  }

  void set_scaling_factor(size_t val) {
    header_.hnsw.scaling_factor = val;
  }

  void set_l0_neighbor_cnt(size_t cnt) {
    header_.graph.l0_neighbor_count = cnt;
  }

  void set_min_neighbor_cnt(size_t cnt) {
    header_.graph.min_neighbor_count = cnt;
  }

  void set_upper_neighbor_cnt(size_t cnt) {
    header_.hnsw.upper_neighbor_count = cnt;
  }

  void set_ef_construction(size_t ef) {
    header_.graph.ef_construction = ef;
  }

 protected:
  inline const HNSWHeader &header() const {
    return header_;
  }

  inline HNSWHeader *mutable_header() {
    return &header_;
  }

  inline size_t header_size() const {
    return sizeof(header_);
  }

  void set_node_size(size_t size) {
    header_.graph.node_size = size;
  }

  //! Dump all segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segments(
      const IndexDumper::Pointer &dumper, key_t *keys,
      const std::function<level_t(node_id_t)> &get_level) const;

 private:
  //! dump mapping segment, for get_vector_by_key in provider
  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,
                               const key_t *keys) const;

  //! dump hnsw head by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_header(const IndexDumper::Pointer &dumper,
                      const HNSWHeader &hd) const;

  //! dump vectors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_vectors(const IndexDumper::Pointer &dumper,
                       const std::vector<node_id_t> &reorder_mapping) const;

  //! dump hnsw neighbors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,
                         const std::function<level_t(node_id_t)> &get_level,
                         const std::vector<node_id_t> &reorder_mapping,
                         const std::vector<node_id_t> &neighbor_mapping) const {
    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);
    if (len1 < 0) {
      return len1;
    }
    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,
                                     neighbor_mapping);
    if (len2 < 0) {
      return len2;
    }

    return len1 + len2;
  }

  //! dump segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segment(const IndexDumper::Pointer &dumper,
                       const std::string &segment_id, const void *data,
                       size_t size) const;

  //! Dump level 0 neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_graph_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

  //! Dump upper level neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_upper_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::function<level_t(node_id_t)> &get_level,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

 public:
  //! Cleanup the entity
  virtual int cleanup(void) {
    header_.clear();
    return 0;
  }

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswRabitqEntity::Pointer clone() const {
    LOG_ERROR("Update neighbors not implemented");
    return HnswRabitqEntity::Pointer();
  }

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const = 0;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const = 0;

  //! Get vectors feature data by keys
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const = 0;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const = 0;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const = 0;

  //! Retrieve a vector using a primary key
  virtual const void *get_vector_by_key(uint64_t /*key*/) const {
    LOG_ERROR("get vector not implemented");
    return nullptr;
  }

  virtual int get_vector_by_key(const key_t /*key*/,
                                IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;

  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t /*level*/, key_t /*key*/, const void * /*vec*/,
                         node_id_t * /*id*/) {
    return IndexError_NotImplemented;
  }

  //! Add vector and id to hnsw entity
  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,
                                 const void * /*vec*/) {
    return IndexError_NotImplemented;
  }

  virtual int update_neighbors(
      level_t /*level*/, node_id_t /*id*/,
      const std::vector<std::pair<node_id_t, ResultRecord>> & /*neighbors*/) {
    LOG_ERROR("Update neighbors dense not implemented");

    return 0;
  }

  //! Append neighbor_id to node id neighbors on level, size is the current
  //! neighbors size. Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,
                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {
    LOG_ERROR("Add neighbor not implemented");
  }

  //! Update entry point and max level
  virtual void update_ep_and_level(node_id_t ep, level_t level) {
    header_.hnsw.entry_point = ep;
    header_.hnsw.max_level = level;
  }

  virtual int load(const IndexStorage::Pointer & /*container*/,
                   bool /*check_crc*/) {
    LOG_ERROR("Load not implemented");
    return IndexError_NotImplemented;
  }

  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {
    LOG_ERROR("Dump not implemented");
    return IndexError_NotImplemented;
  }

  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                               size_t data_size, size_t *padding_size);

  uint32_t get_cluster_id(const void *vec) const {
    return *reinterpret_cast<const uint32_t *>(
        reinterpret_cast<const char *>(vec) + cluster_id_offset());
  }

  const char *get_bin_data(const void *vec) const {
    return reinterpret_cast<const char *>(vec) + bin_data_offset();
  }

  const char *get_ex_data(const void *vec) const {
    return reinterpret_cast<const char *>(vec) + ex_data_offset();
  }

  uint32_t cluster_id_offset() const {
    return 0;
  }

  uint32_t bin_data_offset() const {
    return cluster_id_offset() + sizeof(uint32_t);
  }

  uint32_t ex_data_offset() const {
    return bin_data_offset() + size_bin_data();
  }

 protected:
  static inline size_t AlignSize(size_t size) {
    return (size + 0x1F) & (~0x1F);
  }

  static inline size_t AlignPageSize(size_t size) {
    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;
    return (size + page_mask) & (~page_mask);
  }

  static inline size_t AlignHugePageSize(size_t size) {
    size_t page_mask = ailego::MemoryHelper::HugePageSize() - 1;
    return (size + page_mask) & (~page_mask);
  }

  //! rearrange vectors to improve cache locality
  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,
                         std::vector<node_id_t> *n2o_mapping,
                         std::vector<node_id_t> *o2n_mapping,
                         key_t *keys) const;

 public:
  const static std::string kGraphHeaderSegmentId;
  const static std::string kGraphFeaturesSegmentId;
  const static std::string kGraphKeysSegmentId;
  const static std::string kGraphNeighborsSegmentId;
  const static std::string kGraphOffsetsSegmentId;
  const static std::string kGraphMappingSegmentId;
  const static std::string kHnswHeaderSegmentId;
  const static std::string kHnswNeighborsSegmentId;
  const static std::string kHnswOffsetsSegmentId;

  constexpr static uint32_t kRevision = 0U;
  constexpr static size_t kMaxGraphLayers = 15;
  constexpr static uint32_t kDefaultEfConstruction = 500;
  constexpr static uint32_t kDefaultEf = 500;
  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW
  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;
  constexpr static uint32_t kMaxNeighborCnt = 65535;
  constexpr static float kDefaultScanRatio = 0.1f;
  constexpr static uint32_t kDefaultMinScanLimit = 10000;
  constexpr static uint32_t kDefaultMaxScanLimit =
      std::numeric_limits<uint32_t>::max();
  constexpr static float kDefaultBFNegativeProbability = 0.001f;
  constexpr static uint32_t kDefaultScalingFactor = 50U;
  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;
  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion
  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;
  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;
  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;
  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;
  constexpr static float kDefaultNeighborPruneMultiplier =
      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier
  constexpr static float kDefaultL0MaxNeighborCntMultiplier =
      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier

 protected:
  HNSWHeader header_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_index_hash.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_rabitq_chunk.h"

namespace zvec {
namespace core {

//! Persistent hashmap implement through open addressing algorithm
template <class Key, class Val, Val EmptyVal = 0U,
          typename =
              typename std::enable_if<std::is_integral<Key>::value>::type>
class HnswIndexHashMap {
  using key_type = Key;
  using val_type = Val;

  struct Iterator {
    key_type first;
    val_type second;
  };
  typedef Iterator *iterator;
  typedef Iterator Item;
  typedef const Iterator *const_iterator;

  class Slot {
   public:
    Slot(Chunk::Pointer &&chunk, const void *data)
        : chunk_(std::move(chunk)),
          items_(reinterpret_cast<const Item *>(data)) {}
    //! Return a empty loc or the key item loc

    Slot(Chunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)
        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {
      items_ = reinterpret_cast<const Item *>(items_block_.data());
    }
    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {
      auto it = &items_[key & mask];
      for (auto i = 0U; i < max_items; ++i) {
        if (it->first == key || it->second == EmptyVal) {
          // LOG_DEBUG("i=%u", i);
          return it;
        }
        ++it;
        if (it == &items_[max_items]) {
          it = &items_[0];
        }
      }
      return nullptr;
    }

    bool update(const_iterator it) {
      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -
                        reinterpret_cast<const uint8_t *>(&items_[0]);
      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=
                          sizeof(Item))) {
        LOG_ERROR("Chunk write failed");
        return false;
      }
      return true;
    }

   private:
    Chunk::Pointer chunk_{};
    const Item *items_{nullptr};  // point to chunk data
    IndexStorage::MemoryBlock items_block_{};
  };

 public:
  //! Init the hash
  //! broker      the index allocator
  //! chunk_size  the size of per chunk allocated, actual size may greater
  //! factor      factor = 1/ratio, ratio is the probability of a squence
  //! number inserted to this container
  //! max         the max number key can be inserted
  //! expansion_ratio   memory expansion ratio
  int init(HnswRabitqChunkBroker::Pointer &broker, uint32_t chunk_size,
           uint32_t factor, size_t max, float expansion_ratio) {
    ailego_assert_with(expansion_ratio > 1.0f, "ratio must > 1.0f");
    broker_ = broker;

    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));
    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));
    size_t range = slot_items_ * factor / expansion_ratio;
    mask_bits_ = std::floor(std::log2(range));
    range = 1UL << mask_bits_;
    size_t max_slots = std::ceil(max * 1.0f / range);
    slots_.reserve(max_slots);
    slot_loc_mask_ = slot_items_ - 1U;
    int ret = load();
    if (ret != 0) {
      return ret;
    }

    LOG_DEBUG(
        "HnswRabitqIndexHash init, chunkSize=%u factor=%u max=%zu "
        "ratio=%f slotItems=%u maxSlots=%zu maskBits=%u "
        "range=%zu",
        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,
        mask_bits_, range);

    return 0;
  }

  int cleanup(void) {
    broker_.reset();
    slots_.clear();
    slots_.shrink_to_fit();
    mask_bits_ = 0U;
    slot_items_ = 0U;
    slot_loc_mask_ = 0U;

    return 0;
  }

  const_iterator end(void) const {
    return nullptr;
  }

  const_iterator find(const key_type key) const {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      return end();
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    return it && it->second != EmptyVal ? it : nullptr;
  }

  bool insert(key_type key, val_type val) {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      if (ailego_unlikely(idx >= slots_.capacity())) {
        LOG_ERROR("no space to insert");
        return false;
      }
      for (auto i = slots_.size(); i <= idx; ++i) {
        if (ailego_unlikely(!alloc_slot(i))) {
          return false;
        }
      }
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    if (ailego_unlikely(it == nullptr)) {
      LOG_ERROR("no space to insert");
      return false;
    }

    //! TODO: write memory is ok?
    const_cast<iterator>(it)->first = key;
    const_cast<iterator>(it)->second = val;

    return slots_[idx].update(it);
  }

 private:
  bool alloc_slot(size_t idx) {
    ailego_assert_with(idx == slots_.size(), "invalid idx");

    size_t size = slot_items_ * sizeof(Item);
    auto p = broker_->alloc_chunk(
        HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, idx, size);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return false;
    }
    Chunk::Pointer chunk = p.second;
    if (ailego_unlikely(chunk->resize(size) != size)) {
      LOG_ERROR("Chunk resize failed, size=%zu", size);
      return false;
    }
    //! Read the whole data to memory
    IndexStorage::MemoryBlock data_block;
    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
      LOG_ERROR("Chunk read failed, size=%zu", size);
      return false;
    }

    slots_.emplace_back(std::move(chunk), std::move(data_block));
    return true;
  }

  int load(void) {
    size_t slots_cnt = broker_->get_chunk_cnt(
        HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);
    for (size_t i = 0UL; i < slots_cnt; ++i) {
      auto chunk = broker_->get_chunk(
          HnswRabitqChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);
      if (!chunk) {
        LOG_ERROR("Get chunk failed, seq=%zu", i);
        return IndexError_InvalidFormat;
      }
      size_t size = sizeof(Item) * slot_items_;
      if (chunk->data_size() < size) {
        LOG_ERROR(
            "Hash params may be mismatch, seq=%zu, data_size=%zu "
            "expect=%zu",
            i, chunk->data_size(), size);
        return IndexError_InvalidFormat;
      }
      //! Read the whole data to memory
      IndexStorage::MemoryBlock data_block;
      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
        LOG_ERROR("Chunk read failed, size=%zu", size);
        return false;
      }
      slots_.emplace_back(std::move(chunk), std::move(data_block));
    }
    return 0;
  }

 private:
  HnswRabitqChunkBroker::Pointer broker_{};  // chunk broker
  std::vector<Slot> slots_{};
  uint32_t mask_bits_{0U};
  uint32_t slot_items_{};  // must be a power of 2
  uint32_t slot_loc_mask_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_index_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_searcher.h"
#include "zvec/core/framework/index_streamer.h"
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

class HnswRabitqIndexProvider : public IndexProvider {
 public:
  HnswRabitqIndexProvider(const IndexMeta &meta,
                          const HnswRabitqEntity::Pointer &entity,
                          const std::string &owner)
      : meta_(meta), entity_(entity), owner_class_(owner) {}

  HnswRabitqIndexProvider(const HnswRabitqIndexProvider &) = delete;
  HnswRabitqIndexProvider &operator=(const HnswRabitqIndexProvider &) = delete;

 public:  // holder interface
  //! Create a new iterator
  IndexProvider::Iterator::Pointer create_iterator() override {
    return HnswRabitqIndexProvider::Iterator::Pointer(new (std::nothrow)
                                                          Iterator(entity_));
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return entity_->doc_cnt();
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const override {
    return meta_.dimension();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return meta_.data_type();
  }

  //! Retrieve vector size in bytes
  size_t element_size(void) const override {
    return meta_.element_size();
  }

 public:  // provider's unique interface
  //! Retrieve a vector using a primary key
  const void *get_vector(uint64_t key) const override {
    return entity_->get_vector_by_key(key);
  }

  int get_vector(const uint64_t key,
                 IndexStorage::MemoryBlock &block) const override {
    return entity_->get_vector_by_key(key, block);
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

 private:
  class Iterator : public IndexProvider::Iterator {
   public:
    Iterator(const HnswRabitqEntity::Pointer &entity)
        : entity_(entity), cur_id_(0U) {}

    //! Retrieve pointer of data
    //! NOTICE: the vec feature will be changed after iterating to next, so
    //! the caller need to keep a copy of it before iterator to next vector
    virtual const void *data(void) const override {
      return entity_->get_vector(cur_id_);
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return cur_id_ < entity_->doc_cnt();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return entity_->get_key(cur_id_);
    }

    //! Next iterator
    virtual void next(void) override {
      // cur_id_ += 1;
      cur_id_ = get_next_valid_id(cur_id_ + 1);
    }

    //! Reset the iterator
    void reset(void) {
      cur_id_ = get_next_valid_id(0);
    }

   private:
    node_id_t get_next_valid_id(node_id_t start_id) {
      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {
        if (entity_->get_key(i) != kInvalidNodeId) {
          cur_id_ = i;
          return i;
        }
      }
      return kInvalidNodeId;
    }

   private:
    const HnswRabitqEntity::Pointer entity_;
    node_id_t cur_id_;
  };

 private:
  const IndexMeta &meta_;
  const HnswRabitqEntity::Pointer entity_;
  const std::string owner_class_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

inline const std::string PARAM_HNSW_RABITQ_GENERAL_DIMENSION(
    "proxima.hnsw_rabitq.general.dimension");

inline const std::string PARAM_HNSW_RABITQ_BUILDER_THREAD_COUNT(
    "proxima.hnsw_rabitq.builder.thread_count");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_MEMORY_QUOTA(
    "proxima.hnsw_rabitq.builder.memory_quota");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_EFCONSTRUCTION(
    "proxima.hnsw_rabitq.builder.efconstruction");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_SCALING_FACTOR(
    "proxima.hnsw_rabitq.builder.scaling_factor");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_CHECK_INTERVAL_SECS(
    "proxima.hnsw_rabitq.builder.check_interval_secs");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw_rabitq.builder.neighbor_prune_multiplier");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw_rabitq.builder.min_neighbor_count");
inline const std::string PARAM_HNSW_RABITQ_BUILDER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw_rabitq.builder.max_neighbor_count");
inline const std::string
    PARAM_HNSW_RABITQ_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
        "proxima.hnsw_rabitq.builder.l0_max_neighbor_count_multiplier");

inline const std::string PARAM_HNSW_RABITQ_SEARCHER_EF(
    "proxima.hnsw_rabitq.searcher.ef");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw_rabitq.searcher.brute_force_threshold");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(
    "proxima.hnsw_rabitq.searcher.neighbors_in_memory_enable");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO(
    "proxima.hnsw_rabitq.searcher.max_scan_ratio");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_CHECK_CRC_ENABLE(
    "proxima.hnsw_rabitq.searcher.check_crc_enable");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw_rabitq.searcher.visit_bloomfilter_enable");
inline const std::string
    PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
        "proxima.hnsw_rabitq.searcher.visit_bloomfilter_negative_prob");
inline const std::string PARAM_HNSW_RABITQ_SEARCHER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw_rabitq.searcher.force_padding_result_enable");

inline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO(
    "proxima.hnsw_rabitq.streamer.max_scan_ratio");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT(
    "proxima.hnsw_rabitq.streamer.min_scan_limit");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT(
    "proxima.hnsw_rabitq.streamer.max_scan_limit");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_EF(
    "proxima.hnsw_rabitq.streamer.ef");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION(
    "proxima.hnsw_rabitq.streamer.efconstruction");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw_rabitq.streamer.max_neighbor_count");
inline const std::string
    PARAM_HNSW_RABITQ_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
        "proxima.hnsw_rabitq.streamer.l0_max_neighbor_count_multiplier");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR(
    "proxima.hnsw_rabitq.streamer.scaling_factor");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw_rabitq.streamer.brute_force_threshold");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT(
    "proxima.hnsw_rabitq.streamer.docs_hard_limit");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT(
    "proxima.hnsw_rabitq.streamer.docs_soft_limit");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_MAX_INDEX_SIZE(
    "proxima.hnsw_rabitq.streamer.max_index_size");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw_rabitq.streamer.visit_bloomfilter_enable");
inline const std::string
    PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
        "proxima.hnsw_rabitq.streamer.visit_bloomfilter_negative_prob");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_CHECK_CRC_ENABLE(
    "proxima.hnsw_rabitq.streamer.check_crc_enable");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw_rabitq.streamer.neighbor_prune_multiplier");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE(
    "proxima.hnsw_rabitq.streamer.chunk_size");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_FILTER_SAME_KEY(
    "proxima.hnsw_rabitq.streamer.filter_same_key");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE(
    "proxima.hnsw_rabitq.streamer.get_vector_enable");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw_rabitq.streamer.min_neighbor_count");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw_rabitq.streamer.force_padding_result_enable");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_ESTIMATE_DOC_COUNT(
    "proxima.hnsw_rabitq.streamer.estimate_doc_count");
inline const std::string PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP(
    "proxima.hnsw_rabitq.streamer.use_id_map");

inline const std::string PARAM_HNSW_RABITQ_REDUCER_WORKING_PATH(
    "proxima.hnsw_rabitq.reducer.working_path");
inline const std::string PARAM_HNSW_RABITQ_REDUCER_NUM_OF_ADD_THREADS(
    "proxima.hnsw_rabitq.reducer.num_of_add_threads");
inline const std::string PARAM_HNSW_RABITQ_REDUCER_INDEX_NAME(
    "proxima.hnsw_rabitq.reducer.index_name");
inline const std::string PARAM_HNSW_RABITQ_REDUCER_EFCONSTRUCTION(
    "proxima.hnsw_rabitq.reducer.efconstruction");

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_algorithm.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_query_algorithm.h"
#include <chrono>
#include <iostream>
#include <ailego/internal/cpu_features.h>
#include <rabitqlib/index/estimator.hpp>
#include "zvec/ailego/internal/platform.h"
#include "hnsw_rabitq_entity.h"
#include "hnsw_rabitq_query_entity.h"

namespace zvec {
namespace core {

HnswRabitqQueryAlgorithm::HnswRabitqQueryAlgorithm(HnswRabitqEntity &entity,
                                                   size_t num_clusters,
                                                   RabitqMetricType metric_type)
    : entity_(entity),
      mt_(std::chrono::system_clock::now().time_since_epoch().count()),
      lock_pool_(kLockCnt),
      num_clusters_(num_clusters),
      metric_type_(metric_type) {
  ex_bits_ = entity_.ex_bits();
  padded_dim_ = entity_.padded_dim();
  ip_func_ = rabitqlib::select_excode_ipfunc(ex_bits_);
  LOG_INFO(
      "Create query algorithm. num_clusters=%zu ex_bits=%zu padded_dim=%zu",
      num_clusters_, ex_bits_, padded_dim_);
}

int HnswRabitqQueryAlgorithm::cleanup() {
  return 0;
}

int HnswRabitqQueryAlgorithm::search(HnswRabitqQueryEntity *entity,
                                     HnswRabitqContext *ctx) const {
  spin_lock_.lock();
  auto maxLevel = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  spin_lock_.unlock();

  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    return 0;
  }

  EstimateRecord curest;
  get_bin_est(entity_.get_vector(entry_point), curest, *entity);

  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {
    select_entry_point(cur_level, &entry_point, &curest, ctx, entity);
  }

  auto &topk_heap = ctx->topk_heap();
  topk_heap.clear();
  search_neighbors(0, &entry_point, &curest, topk_heap, ctx, entity);

  if (ctx->group_by_search()) {
    expand_neighbors_by_group(topk_heap, ctx, entity);
  }

  return 0;
}


//! select_entry_point on hnsw level, ef = 1
void HnswRabitqQueryAlgorithm::select_entry_point(
    level_t level, node_id_t *entry_point, EstimateRecord *curest,
    HnswRabitqContext *ctx, HnswRabitqQueryEntity *query_entity) const {
  auto &entity = ctx->get_entity();
  while (true) {
    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }
    ailego_prefetch(neighbors.data);
    uint32_t size = neighbors.size();
    if (size == 0) {
      break;
    }

    bool find_closer = false;
    for (uint32_t i = 0; i < size; ++i) {
      EstimateRecord candest;
      get_bin_est(entity_.get_vector(neighbors[i]), candest, *query_entity);

      if (candest.est_dist < curest->est_dist) {
        *curest = candest;
        *entry_point = neighbors[i];
        find_closer = true;
      }
    }

    if (!find_closer) {
      break;
    }
  }

  return;
}

void HnswRabitqQueryAlgorithm::search_neighbors(
    level_t level, node_id_t *entry_point, EstimateRecord *dist, TopkHeap &topk,
    HnswRabitqContext *ctx, HnswRabitqQueryEntity *query_entity) const {
  const auto &entity = ctx->get_entity();
  VisitFilter &visit = ctx->visit_filter();
  CandidateHeap &candidates = ctx->candidates();
  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
  if (ctx->filter().is_valid()) {
    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
  }

  candidates.clear();
  visit.clear();
  visit.set_visited(*entry_point);
  if (!filter(*entry_point)) {
    topk.emplace(*entry_point, ResultRecord(*dist));
  }

  candidates.emplace(*entry_point, ResultRecord(*dist));
  while (!candidates.empty() && !ctx->reach_scan_limit()) {
    auto top = candidates.begin();
    node_id_t main_node = top->first;
    auto main_dist = top->second;

    if (topk.full() && main_dist.est_dist > topk[0].second.est_dist) {
      break;
    }

    candidates.pop();
    const Neighbors neighbors = entity.get_neighbors(level, main_node);
    ailego_prefetch(neighbors.data);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }

    std::vector<node_id_t> neighbor_ids(neighbors.size());
    uint32_t size = 0;
    for (uint32_t i = 0; i < neighbors.size(); ++i) {
      node_id_t node = neighbors[i];
      if (visit.visited(node)) {
        if (ailego_unlikely(ctx->debugging())) {
          (*ctx->mutable_stats_visit_dup_cnt())++;
        }
        continue;
      }
      visit.set_visited(node);
      neighbor_ids[size++] = node;
    }
    if (size == 0) {
      continue;
    }

    for (uint32_t i = 0; i < size; ++i) {
      node_id_t node = neighbor_ids[i];
      EstimateRecord candest;
      auto *cand_vector = entity_.get_vector(node);
      ailego_prefetch(cand_vector);
      get_bin_est(cand_vector, candest, *query_entity);

      if (ex_bits_ > 0) {
        // Check preliminary score against current worst full estimate.
        bool flag_update_KNNs =
            (!topk.full()) || candest.low_dist < topk[0].second.est_dist;

        if (flag_update_KNNs) {
          // Compute the full estimate if promising.
          get_full_est(cand_vector, candest, *query_entity);
        } else {
          continue;
        }
      } else {
        // ex_bits_ == 0: est_dist is already the best estimate
        if (topk.full() && candest.est_dist >= topk[0].second.est_dist) {
          continue;
        }
      }
      candidates.emplace(node, ResultRecord(candest));
      // update entry_point for next level scan
      if (candest < *dist) {
        *entry_point = node;
        *dist = candest;
      }
      if (!filter(node)) {
        topk.emplace(node, ResultRecord(candest));
      }
    }  // end for
  }  // while

  return;
}

void HnswRabitqQueryAlgorithm::expand_neighbors_by_group(
    TopkHeap &topk, HnswRabitqContext *ctx,
    HnswRabitqQueryEntity *query_entity) const {
  if (!ctx->group_by().is_valid()) {
    return;
  }

  const auto &entity = ctx->get_entity();
  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
    return ctx->group_by()(entity.get_key(id));
  };

  // devide into groups
  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();
  for (uint32_t i = 0; i < topk.size(); ++i) {
    node_id_t id = topk[i].first;
    auto score = topk[i].second;

    std::string group_id = group_by(id);

    auto &topk_heap = group_topk_heaps[group_id];
    if (topk_heap.empty()) {
      topk_heap.limit(ctx->group_topk());
    }
    topk_heap.emplace_back(id, score);
  }

  // stage 2, expand to reach group num as possible
  if (group_topk_heaps.size() < ctx->group_num()) {
    VisitFilter &visit = ctx->visit_filter();
    CandidateHeap &candidates = ctx->candidates();

    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
    if (ctx->filter().is_valid()) {
      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
    }

    // refill to get enough groups
    candidates.clear();
    visit.clear();
    for (uint32_t i = 0; i < topk.size(); ++i) {
      node_id_t id = topk[i].first;
      auto score = topk[i].second;

      visit.set_visited(id);
      candidates.emplace_back(id, score);
    }

    // do expand
    while (!candidates.empty() && !ctx->reach_scan_limit()) {
      auto top = candidates.begin();
      node_id_t main_node = top->first;

      candidates.pop();
      const Neighbors neighbors = entity.get_neighbors(0, main_node);
      ailego_prefetch(neighbors.data);
      if (ailego_unlikely(ctx->debugging())) {
        (*ctx->mutable_stats_get_neighbors())++;
      }

      std::vector<node_id_t> neighbor_ids(neighbors.size());
      uint32_t size = 0;
      for (uint32_t i = 0; i < neighbors.size(); ++i) {
        node_id_t node = neighbors[i];
        if (visit.visited(node)) {
          if (ailego_unlikely(ctx->debugging())) {
            (*ctx->mutable_stats_visit_dup_cnt())++;
          }
          continue;
        }
        visit.set_visited(node);
        neighbor_ids[size++] = node;
      }
      if (size == 0) {
        continue;
      }

      for (uint32_t i = 0; i < size; ++i) {
        node_id_t node = neighbor_ids[i];
        EstimateRecord candest;
        auto *cand_vector = entity_.get_vector(node);
        ailego_prefetch(cand_vector);
        get_full_est(cand_vector, candest, *query_entity);

        if (!filter(node)) {
          std::string group_id = group_by(node);

          auto &topk_heap = group_topk_heaps[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(node, ResultRecord(candest));

          if (group_topk_heaps.size() >= ctx->group_num()) {
            break;
          }
        }
        candidates.emplace(node, ResultRecord(candest));
      }  // end for
    }  // end while
  }  // end if
}

void HnswRabitqQueryAlgorithm::get_bin_est(
    const void *vector, EstimateRecord &res,
    HnswRabitqQueryEntity &entity) const {
  const auto &q_to_centroids = entity.q_to_centroids;
  auto &query_wrapper = *entity.query_wrapper;
  uint32_t cluster_id = entity_.get_cluster_id(vector);
  const char *bin_data = entity_.get_bin_data(vector);
  if (metric_type_ == RabitqMetricType::kIP) {
    float norm = q_to_centroids[cluster_id];
    float error = q_to_centroids[cluster_id + num_clusters_];
    rabitqlib::split_single_estdist(bin_data, query_wrapper, padded_dim_,
                                    res.ip_x0_qr, res.est_dist, res.low_dist,
                                    -norm, error);
  } else {
    // L2 distance
    float norm = q_to_centroids[cluster_id];
    rabitqlib::split_single_estdist(bin_data, query_wrapper, padded_dim_,
                                    res.ip_x0_qr, res.est_dist, res.low_dist,
                                    norm * norm, norm);
  }
}

void HnswRabitqQueryAlgorithm::get_full_est(
    const void *vector, EstimateRecord &res,
    HnswRabitqQueryEntity &entity) const {
  const auto &q_to_centroids = entity.q_to_centroids;
  auto &query_wrapper = *entity.query_wrapper;
  uint32_t cluster_id = entity_.get_cluster_id(vector);
  const char *bin_data = entity_.get_bin_data(vector);
  const char *ex_data = entity_.get_ex_data(vector);

  if (metric_type_ == RabitqMetricType::kIP) {
    float norm = q_to_centroids[cluster_id];
    float error = q_to_centroids[cluster_id + num_clusters_];
    rabitqlib::split_single_fulldist(bin_data, ex_data, ip_func_, query_wrapper,
                                     padded_dim_, ex_bits_, res.est_dist,
                                     res.low_dist, res.ip_x0_qr, -norm, error);
  } else {
    // L2 distance
    float norm = q_to_centroids[cluster_id];
    rabitqlib::split_single_fulldist(
        bin_data, ex_data, ip_func_, query_wrapper, padded_dim_, ex_bits_,
        res.est_dist, res.low_dist, res.ip_x0_qr, norm * norm, norm);
  }
}


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_algorithm.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <ailego/parallel/lock.h>
#include "hnsw_rabitq_context.h"
#include "hnsw_rabitq_dist_calculator.h"
#include "hnsw_rabitq_entity.h"
#include "rabitq_params.h"

namespace zvec {
namespace core {

class HnswRabitqQueryEntity;

//! hnsw graph algorithm implement
class HnswRabitqQueryAlgorithm {
 public:
  typedef std::unique_ptr<HnswRabitqQueryAlgorithm> UPointer;

 public:
  //! Constructor
  explicit HnswRabitqQueryAlgorithm(HnswRabitqEntity &entity,
                                    size_t num_clusters,
                                    RabitqMetricType metric_type);

  //! Destructor
  ~HnswRabitqQueryAlgorithm() = default;

  //! Cleanup HnswRabitqQueryAlgorithm
  int cleanup();

  //! do knn search in graph
  //! return 0 on success, or errCode in failure. results saved in ctx
  int search(HnswRabitqQueryEntity *entity, HnswRabitqContext *ctx) const;

  //! Initiate HnswRabitqQueryAlgorithm
  int init() {
    level_probas_.clear();
    double level_mult =
        1 / std::log(static_cast<double>(entity_.scaling_factor()));
    for (int level = 0;; level++) {
      // refers faiss get_random_level alg
      double proba =
          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));
      if (proba < 1e-9) {
        break;
      }
      level_probas_.push_back(proba);
    }

    return 0;
  }

  //! Generate a random level
  //! return graph level
  uint32_t get_random_level() const {
    // gen rand float (0, 1)
    double f = mt_() / static_cast<float>(mt_.max());
    for (size_t level = 0; level < level_probas_.size(); level++) {
      if (f < level_probas_[level]) {
        return level;
      }
      f -= level_probas_[level];
    }
    return level_probas_.size() - 1;
  }
  void get_full_est(node_id_t id, EstimateRecord &res,
                    HnswRabitqQueryEntity &entity) const {
    return get_full_est(entity_.get_vector(id), res, entity);
  }

 private:
  //! Select in upper layer to get entry point for next layer search
  void select_entry_point(level_t level, node_id_t *entry_point,
                          EstimateRecord *dist, HnswRabitqContext *ctx,
                          HnswRabitqQueryEntity *entity) const;


  //! Given a node id and level, search the nearest neighbors in graph
  //! Note: the nearest neighbors result keeps in topk, and entry_point and
  //! dist will be updated to current level nearest node id and distance
  void search_neighbors(level_t level, node_id_t *entry_point,
                        EstimateRecord *dist, TopkHeap &topk,
                        HnswRabitqContext *ctx,
                        HnswRabitqQueryEntity *entity) const;


  //! expand neighbors until group nums are reached
  void expand_neighbors_by_group(TopkHeap &topk, HnswRabitqContext *ctx,
                                 HnswRabitqQueryEntity *query_entity) const;

  void get_full_est(const void *vector, EstimateRecord &res,
                    HnswRabitqQueryEntity &entity) const;
  void get_bin_est(const void *vector, EstimateRecord &res,
                   HnswRabitqQueryEntity &entity) const;

 private:
  HnswRabitqQueryAlgorithm(const HnswRabitqQueryAlgorithm &) = delete;
  HnswRabitqQueryAlgorithm &operator=(const HnswRabitqQueryAlgorithm &) =
      delete;


 private:
  static constexpr uint32_t kLockCnt{1U << 8};
  static constexpr uint32_t kLockMask{kLockCnt - 1U};

  HnswRabitqEntity &entity_;
  mutable std::mt19937 mt_{};
  std::vector<double> level_probas_{};

  mutable ailego::SpinMutex spin_lock_{};  // global spin lock
  std::mutex mutex_{};                     // global mutex
  // TODO: spin lock?
  std::vector<std::mutex> lock_pool_{};
  size_t num_clusters_{0};
  RabitqMetricType metric_type_{RabitqMetricType::kL2};
  size_t padded_dim_{0};
  size_t ex_bits_{0};
  float (*ip_func_)(const float *, const uint8_t *, size_t);
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_query_entity.h
================================================
// Copyright 2025-present the centaurdb project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <memory>
#include <vector>
#include <rabitqlib/index/query.hpp>
namespace zvec::core {

struct HnswRabitqQueryEntity {
  std::vector<float> rotated_query;
  std::vector<float> q_to_centroids;
  std::unique_ptr<rabitqlib::SplitSingleQuery<float>> query_wrapper;
};

}  // namespace zvec::core

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_register.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#include "hnsw_rabitq_builder.h"
#include "hnsw_rabitq_searcher.h"
#include "hnsw_rabitq_streamer.h"
#include "rabitq_converter.h"
#include "rabitq_reformer.h"

namespace zvec::core {

INDEX_FACTORY_REGISTER_STREAMER(HnswRabitqStreamer);
INDEX_FACTORY_REGISTER_REFORMER_ALIAS(RabitqReformer, RabitqReformer);
INDEX_FACTORY_REGISTER_SEARCHER(HnswRabitqSearcher);
INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(RabitqConverter, RabitqConverter);
INDEX_FACTORY_REGISTER_BUILDER(HnswRabitqBuilder);

}  // namespace zvec::core

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_searcher.h"
#include <rabitqlib/utils/rotator.hpp>
#include "hnsw_rabitq_algorithm.h"
#include "hnsw_rabitq_entity.h"
#include "hnsw_rabitq_index_provider.h"
#include "hnsw_rabitq_params.h"
#include "hnsw_rabitq_query_entity.h"
#include "hnsw_rabitq_searcher_entity.h"
#include "rabitq_params.h"

namespace zvec {
namespace core {

HnswRabitqSearcher::HnswRabitqSearcher() {}

HnswRabitqSearcher::~HnswRabitqSearcher() {}

int HnswRabitqSearcher::init(const ailego::Params &search_params) {
  params_ = search_params;
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_EF, &ef_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_ENABLE,
              &bf_enabled_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,
              &neighbors_in_memory_enabled_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
              &bf_negative_probability_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_BRUTE_FORCE_THRESHOLD,
              &bruteforce_threshold_);
  params_.get(PARAM_HNSW_RABITQ_SEARCHER_FORCE_PADDING_RESULT_ENABLE,
              &force_padding_topk_enabled_);

  if (ef_ == 0) {
    ef_ = HnswRabitqEntity::kDefaultEf;
  }
  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {
    LOG_ERROR(
        "[%s] must be in range (0,1)",
        PARAM_HNSW_RABITQ_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);

  ailego::Params reformer_params;
  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());
  int ret = reformer_.init(reformer_params);
  if (ret != 0) {
    LOG_ERROR("Failed to initialize RabitqReformer: %d", ret);
    return ret;
  }

  state_ = STATE_INITED;

  LOG_DEBUG(
      "Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u "
      "neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u "
      "forcePadding=%u",
      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,
      neighbors_in_memory_enabled_, bf_negative_probability_,
      bruteforce_threshold_, force_padding_topk_enabled_);

  return 0;
}

void HnswRabitqSearcher::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }
}

int HnswRabitqSearcher::cleanup() {
  LOG_INFO("Begin HnswRabitqSearcher:cleanup");

  metric_.reset();
  meta_.clear();
  stats_.clear_attributes();
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  max_scan_ratio_ = HnswRabitqEntity::kDefaultScanRatio;
  max_scan_num_ = 0U;
  ef_ = HnswRabitqEntity::kDefaultEf;
  bf_enabled_ = false;
  bf_negative_probability_ = HnswRabitqEntity::kDefaultBFNegativeProbability;
  bruteforce_threshold_ = HnswRabitqEntity::kDefaultBruteForceThreshold;
  check_crc_enabled_ = false;
  neighbors_in_memory_enabled_ = false;
  entity_.cleanup();
  state_ = STATE_INIT;

  LOG_INFO("End HnswRabitqSearcher:cleanup");

  return 0;
}

int HnswRabitqSearcher::load(IndexStorage::Pointer container,
                             IndexMetric::Pointer metric) {
  if (state_ != STATE_INITED) {
    LOG_ERROR("Init the searcher first before load index");
    return IndexError_Runtime;
  }

  LOG_INFO("Begin HnswRabitqSearcher:load");

  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  ret = reformer_.load(container);
  if (ret != 0) {
    LOG_ERROR("Failed to load reformer from container: %d", ret);
    return ret;
  }

  ret = entity_.load(container, check_crc_enabled_);
  if (ret != 0) {
    LOG_ERROR("HnswRabitqSearcher load index failed");
    return ret;
  }

  alg_ = HnswRabitqQueryAlgorithm::UPointer(new HnswRabitqQueryAlgorithm(
      entity_, reformer_.num_clusters(), reformer_.rabitq_metric_type()));

  if (metric) {
    metric_ = metric;
  } else {
    metric_ = IndexFactory::CreateMetric(meta_.metric_name());
    if (!metric_) {
      LOG_ERROR("CreateMetric failed, name: %s", meta_.metric_name().c_str());
      return IndexError_NoExist;
    }
    ret = metric_->init(meta_, meta_.metric_params());
    if (ret != 0) {
      LOG_ERROR("IndexMetric init failed, ret=%d", ret);
      return ret;
    }
    if (metric_->query_metric()) {
      metric_ = metric_->query_metric();
    }
  }

  if (!metric_->is_matched(meta_)) {
    LOG_ERROR("IndexMetric not match index meta");
    return IndexError_Mismatch;
  }

  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());
  max_scan_num_ = std::max(4096U, max_scan_num_);

  stats_.set_loaded_count(entity_.doc_cnt());
  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = STATE_LOADED;
  magic_ = IndexContext::GenerateMagic();

  LOG_INFO("End HnswRabitqSearcher::load");

  return 0;
}

int HnswRabitqSearcher::unload() {
  LOG_INFO("HnswRabitqSearcher unload index");

  meta_.clear();
  entity_.cleanup();
  metric_.reset();
  max_scan_num_ = 0;
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  state_ = STATE_INITED;

  return 0;
}

int HnswRabitqSearcher::update_context(HnswRabitqContext *ctx) const {
  const HnswRabitqEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_num(max_scan_num_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  return ctx->update_context(HnswRabitqContext::kSearcherContext, meta_,
                             metric_, entity, magic_);
}

int HnswRabitqSearcher::search_impl(const void *query,
                                    const IndexQueryMeta &qmeta, uint32_t count,
                                    Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(query, qmeta, count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);
  for (size_t q = 0; q < count; ++q) {
    HnswRabitqQueryEntity entity;
    int ret = reformer_.transform_to_entity(query, &entity);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher transform failed");
      return ret;
    }
    ctx->reset_query(query);
    ret = alg_->search(&entity, ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswRabitqSearcher::search_bf_impl(const void *query,
                                       const IndexQueryMeta &qmeta,
                                       uint32_t count,
                                       Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      int ret = reformer_.transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw searcher transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }
        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          EstimateRecord dist;
          alg_->get_full_est(id, dist, entity);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      int ret = reformer_.transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw searcher transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }
        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          EstimateRecord dist;
          alg_->get_full_est(id, dist, entity);
          ctx->topk_heap().emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswRabitqSearcher::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      int ret = reformer_.transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw searcher transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            EstimateRecord dist;
            alg_->get_full_est(id, dist, entity);
            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      int ret = reformer_.transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw searcher transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            EstimateRecord dist;
            alg_->get_full_est(id, dist, entity);
            ctx->topk_heap().emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

IndexSearcher::Context::Pointer HnswRabitqSearcher::create_context() const {
  if (ailego_unlikely(state_ != STATE_LOADED)) {
    LOG_ERROR("Load the index first before create context");
    return Context::Pointer();
  }
  const HnswRabitqEntity::Pointer search_ctx_entity = entity_.clone();
  if (!search_ctx_entity) {
    LOG_ERROR("Failed to create search context entity");
    return Context::Pointer();
  }
  HnswRabitqContext *ctx = new (std::nothrow)
      HnswRabitqContext(meta_.dimension(), metric_, search_ctx_entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswRabitqContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_num(max_scan_num_);
  uint32_t filter_mode =
      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
  ctx->set_filter_mode(filter_mode);
  ctx->set_filter_negative_probability(bf_negative_probability_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  if (ailego_unlikely(ctx->init(HnswRabitqContext::kSearcherContext)) != 0) {
    LOG_ERROR("Init HnswRabitqContext failed");
    delete ctx;
    return Context::Pointer();
  }

  return Context::Pointer(ctx);
}

IndexProvider::Pointer HnswRabitqSearcher::create_provider(void) const {
  LOG_DEBUG("HnswRabitqSearcher create provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswRabitqEntity failed");
    return Provider::Pointer();
  }
  return Provider::Pointer(new (std::nothrow) HnswRabitqIndexProvider(
      meta_, entity, "HnswRabitqSearcher"));
}

const void *HnswRabitqSearcher::get_vector(uint64_t key) const {
  return entity_.get_vector_by_key(key);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "zvec/core/framework/index_framework.h"
#include "hnsw_rabitq_query_algorithm.h"
#include "hnsw_rabitq_searcher_entity.h"
#include "rabitq_reformer.h"

namespace zvec {
namespace core {

class HnswRabitqSearcher : public IndexSearcher {
 public:
  using ContextPointer = IndexSearcher::Context::Pointer;

 public:
  HnswRabitqSearcher(void);
  ~HnswRabitqSearcher(void);

  HnswRabitqSearcher(const HnswRabitqSearcher &) = delete;
  HnswRabitqSearcher &operator=(const HnswRabitqSearcher &) = delete;

 protected:
  //! Initialize Searcher
  virtual int init(const ailego::Params &params) override;

  //! Cleanup Searcher
  virtual int cleanup(void) override;

  //! Load Index from storage
  virtual int load(IndexStorage::Pointer container,
                   IndexMetric::Pointer metric) override;

  //! Unload index from storage
  virtual int unload(void) override;

  //! KNN Search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          ContextPointer &context) const override {
    return search_impl(query, qmeta, 1, context);
  }

  //! KNN Search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          ContextPointer &context) const override;

  //! Linear Search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             ContextPointer &context) const override {
    return search_bf_impl(query, qmeta, 1, context);
  }

  //! Linear Search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             ContextPointer &context) const override;

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, uint32_t count,
      ContextPointer &context) const override;

  //! Fetch vector by key
  virtual const void *get_vector(uint64_t key) const override;

  //! Create a searcher context
  virtual ContextPointer create_context() const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  virtual const ailego::Params &params(void) const override {
    return params_;
  }

  virtual void print_debug_info() override;

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswRabitqContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  HnswRabitqSearcherEntity entity_{};
  HnswRabitqQueryAlgorithm::UPointer alg_;  // impl graph algorithm

  IndexMetric::Pointer metric_{};
  IndexMeta meta_{};
  ailego::Params params_{};
  Stats stats_;
  uint32_t ef_{HnswRabitqEntity::kDefaultEf};
  uint32_t max_scan_num_{0U};
  uint32_t bruteforce_threshold_{HnswRabitqEntity::kDefaultBruteForceThreshold};
  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool neighbors_in_memory_enabled_{false};
  bool force_padding_topk_enabled_{false};
  float bf_negative_probability_{
      HnswRabitqEntity::kDefaultBFNegativeProbability};
  uint32_t magic_{0U};
  RabitqReformer reformer_;

  State state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_searcher_entity.h"
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswRabitqSearcherEntity::HnswRabitqSearcherEntity() {}

int HnswRabitqSearcherEntity::cleanup(void) {
  storage_.reset();
  vectors_.reset();
  keys_.reset();
  neighbors_.reset();
  neighbors_meta_.reset();
  neighbors_in_memory_enabled_ = false;
  loaded_ = false;

  this->HnswRabitqEntity::cleanup();

  return 0;
}

key_t HnswRabitqSearcherEntity::get_key(node_id_t id) const {
  const void *key;
  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=
                      sizeof(key_t))) {
    LOG_ERROR("Read key from segment failed");
    return kInvalidKey;
  }
  return *(reinterpret_cast<const key_t *>(key));
}

//! Get vector local id by key
node_id_t HnswRabitqSearcherEntity::get_id(key_t key) const {
  if (ailego_unlikely(!mapping_)) {
    LOG_ERROR("Index missing mapping segment");
    return kInvalidNodeId;
  }

  //! Do binary search
  node_id_t start = 0UL;
  node_id_t end = doc_cnt();
  const void *data;
  node_id_t idx = 0u;
  while (start < end) {
    idx = start + (end - start) / 2;
    if (ailego_unlikely(
            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=
            sizeof(node_id_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    const key_t *mkey;
    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);
    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),
                                    (const void **)(&mkey),
                                    sizeof(key_t)) != sizeof(key_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    if (*mkey < key) {
      start = idx + 1;
    } else if (*mkey > key) {
      end = idx;
    } else {
      return local_id;
    }
  }
  return kInvalidNodeId;
}

const void *HnswRabitqSearcherEntity::get_vector_by_key(key_t key) const {
  node_id_t local_id = get_id(key);
  if (ailego_unlikely(local_id == kInvalidNodeId)) {
    return nullptr;
  }

  return get_vector(local_id);
}

const void *HnswRabitqSearcherEntity::get_vector(node_id_t id) const {
  size_t read_size = vector_size();
  size_t offset = node_size() * id;

  const void *vec;
  if (ailego_unlikely(vectors_->read(offset, &vec, read_size) != read_size)) {
    LOG_ERROR("Read vector from segment failed");
    return nullptr;
  }
  return vec;
}

int HnswRabitqSearcherEntity::get_vector(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector(id);
  block.reset((void *)vec);
  return 0;
}

const void *HnswRabitqSearcherEntity::get_vectors() const {
  const void *vec;
  size_t len = node_size() * doc_cnt();
  if (vectors_->read(0, &vec, len) != len) {
    LOG_ERROR("Read vectors from segment failed");
    return nullptr;
  }
  return vec;
}

int HnswRabitqSearcherEntity::get_vector(const node_id_t *ids, uint32_t count,
                                         const void **vecs) const {
  ailego_assert_with(count <= segment_datas_.size(), "invalid count");

  size_t read_size = vector_size();

  for (uint32_t i = 0; i < count; ++i) {
    segment_datas_[i].offset = node_size() * ids[i];
    segment_datas_[i].length = read_size;

    ailego_assert_with(segment_datas_[i].offset < vectors_->data_size(),
                       "invalid offset");
  }
  if (ailego_unlikely(!vectors_->read(&segment_datas_[0], count))) {
    LOG_ERROR("Read vectors from segment failed");
    return IndexError_ReadData;
  }
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = segment_datas_[i].data;
  }

  return 0;
}

int HnswRabitqSearcherEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  const void *vecs[count];
  get_vector(ids, count, vecs);
  for (uint32_t i = 0; i < count; ++i) {
    vec_blocks.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

const Neighbors HnswRabitqSearcherEntity::get_neighbors(level_t level,
                                                        node_id_t id) const {
  if (level == 0) {
    if (neighbors_in_memory_enabled_) {
      auto hd = reinterpret_cast<const NeighborsHeader *>(
          fixed_neighbors_.get() + neighbors_size() * id);
      return {hd->neighbor_cnt, hd->neighbors};
    }

    const GraphNeighborMeta *m;
    if (ailego_unlikely(neighbors_meta_->read(id * sizeof(GraphNeighborMeta),
                                              (const void **)(&m),
                                              sizeof(GraphNeighborMeta)) !=
                        sizeof(GraphNeighborMeta))) {
      LOG_ERROR("Read neighbors meta from segment failed");
      return {0, nullptr};
    }

    const void *data;
    if (ailego_unlikely(neighbors_->read(m->offset, &data,
                                         m->neighbor_cnt * sizeof(node_id_t)) !=
                        m->neighbor_cnt * sizeof(node_id_t))) {
      LOG_ERROR("Read neighbors from segment failed");
      return {0, nullptr};
    }
    return {static_cast<uint32_t>(m->neighbor_cnt),
            reinterpret_cast<const node_id_t *>(data)};
  }

  //! Read level > 0 neighbors
  const HnswNeighborMeta *m;
  if (ailego_unlikely(upper_neighbors_meta_->read(id * sizeof(HnswNeighborMeta),
                                                  (const void **)(&m),
                                                  sizeof(HnswNeighborMeta)) !=
                      sizeof(HnswNeighborMeta))) {
    LOG_ERROR("Read neighbors meta from segment failed");
    return {0, nullptr};
  }

  ailego_assert_with(level <= m->level, "invalid level");
  size_t offset = m->offset + (level - 1) * upper_neighbors_size();
  ailego_assert_with(offset <= upper_neighbors_->data_size(), "invalid offset");
  const void *data;
  if (ailego_unlikely(
          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=
          upper_neighbors_size())) {
    LOG_ERROR("Read neighbors from segment failed");
    return {0, nullptr};
  }

  auto hd = reinterpret_cast<const NeighborsHeader *>(data);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswRabitqSearcherEntity::load(const IndexStorage::Pointer &container,
                                   bool check_crc) {
  storage_ = container;

  int ret = load_segments(check_crc);
  if (ret != 0) {
    return ret;
  }

  loaded_ = true;

  LOG_INFO(
      "Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu "
      "l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu "
      "vectorSize=%zu nodeSize=%zu vectorSegmentSize=%zu keySegmentSize=%zu "
      "neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu ",
      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),
      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), vector_size(),
      node_size(), vectors_->data_size(), keys_->data_size(),
      neighbors_ == nullptr ? 0 : neighbors_->data_size(),
      neighbors_meta_ == nullptr ? 0 : neighbors_meta_->data_size());

  return 0;
}

int HnswRabitqSearcherEntity::load_segments(bool check_crc) {
  //! load header
  const void *data = nullptr;
  HNSWHeader hd;
  auto graph_hd_segment = storage_->get(kGraphHeaderSegmentId);
  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {
    LOG_ERROR("Miss or invalid segment %s", kGraphHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                             sizeof(hd.graph)) != sizeof(hd.graph)) {
    LOG_ERROR("Read segment %s failed", kGraphHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.graph, data, sizeof(hd.graph));

  auto hnsw_hd_segment = storage_->get(kHnswHeaderSegmentId);
  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {
    LOG_ERROR("Miss or invalid segment %s", kHnswHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {
    LOG_ERROR("Read segment %s failed", kHnswHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));
  *mutable_header() = hd;
  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));

  vectors_ = storage_->get(kGraphFeaturesSegmentId);
  if (!vectors_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kGraphFeaturesSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  keys_ = storage_->get(kGraphKeysSegmentId);
  if (!keys_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kGraphKeysSegmentId.c_str());
    return IndexError_InvalidFormat;
  }

  neighbors_ = storage_->get(kGraphNeighborsSegmentId);
  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kGraphNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }
  neighbors_meta_ = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta_ ||
      neighbors_meta_->data_size() < sizeof(GraphNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_ = storage_->get(kHnswNeighborsSegmentId);
  if (!upper_neighbors_ ||
      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kHnswNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_meta_ = storage_->get(kHnswOffsetsSegmentId);
  if (!upper_neighbors_meta_ || upper_neighbors_meta_->data_size() <
                                    sizeof(HnswNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kHnswOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  mapping_ = storage_->get(kGraphMappingSegmentId);
  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kGraphMappingSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  if (check_crc) {
    std::vector<SegmentPointer> segments;
    segments.emplace_back(graph_hd_segment);
    segments.emplace_back(hnsw_hd_segment);
    segments.emplace_back(vectors_);
    segments.emplace_back(keys_);

    segments.emplace_back(neighbors_);
    segments.emplace_back(neighbors_meta_);
    segments.emplace_back(upper_neighbors_);
    segments.emplace_back(upper_neighbors_meta_);

    if (!do_crc_check(segments)) {
      LOG_ERROR("Check index crc failed, the index may broken");
      return IndexError_Runtime;
    }
  }

  if (neighbors_in_memory_enabled_) {
    int ret = load_and_flat_neighbors();
    if (ret != 0) {
      return ret;
    }
  }

  return 0;
}

int HnswRabitqSearcherEntity::load_and_flat_neighbors() {
  fixed_neighbors_.reset(
      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},
      std::default_delete<char[]>());
  if (!fixed_neighbors_) {
    LOG_ERROR("Malloc memory failed");
    return IndexError_NoMemory;
  }

  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const GraphNeighborMeta *neighbors_index = nullptr;
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           neighbors_meta->data_size()) !=
      neighbors_meta->data_size()) {
    LOG_ERROR("Read segment %s data failed", kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  const char *neighbor_data;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);
    if (ailego_unlikely(
            neighbors_->read(neighbors_index[id].offset,
                             reinterpret_cast<const void **>(&neighbor_data),
                             rd_size) != rd_size)) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }
    // copy level 0 neighbors to fixed size neighbors memory
    char *dst = fixed_neighbors_.get() + neighbors_size() * id;
    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;
    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);
  }

  return 0;
}

int HnswRabitqSearcherEntity::get_fixed_neighbors(
    std::vector<uint32_t> *fixed_neighbors) const {
  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = storage_->get(kGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const GraphNeighborMeta *neighbors_index = nullptr;
  size_t meta_size = neighbors_meta->data_size();
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           meta_size) != meta_size) {
    LOG_ERROR("Read segment %s data failed", kGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  size_t fixed_neighbor_cnt = l0_neighbor_cnt();
  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);

  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();
  size_t total_neighbor_cnt = 0;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;
    if (cur_neighbor_cnt == 0) {
      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;
      continue;
    }
    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);
    const uint32_t *neighbors;
    if (neighbors_->read(neighbors_index[id].offset,
                         reinterpret_cast<const void **>(&neighbors),
                         rd_size) != rd_size) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }

    // copy level 0 neighbors to fixed size neighbors memory
    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;
    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);

    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;
    total_neighbor_cnt += cur_neighbor_cnt;
  }
  LOG_INFO("total neighbor cnt: %zu, average neighbor cnt: %zu",
           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());

  return 0;
}

bool HnswRabitqSearcherEntity::do_crc_check(
    std::vector<SegmentPointer> &segments) const {
  constexpr size_t blk_size = 4096;
  const void *data;
  for (auto &segment : segments) {
    size_t offset = 0;
    size_t rd_size;
    uint32_t crc = 0;
    while (offset < segment->data_size()) {
      size_t size = std::min(blk_size, segment->data_size() - offset);
      if ((rd_size = segment->read(offset, &data, size)) <= 0) {
        break;
      }
      offset += rd_size;
      crc = ailego::Crc32c::Hash(data, rd_size, crc);
    }
    if (crc != segment->data_crc()) {
      return false;
    }
  }
  return true;
}

const HnswRabitqEntity::Pointer HnswRabitqSearcherEntity::clone() const {
  auto vectors = vectors_->clone();
  if (ailego_unlikely(!vectors)) {
    LOG_ERROR("clone segment %s failed", kGraphFeaturesSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }
  auto keys = keys_->clone();
  if (ailego_unlikely(!keys)) {
    LOG_ERROR("clone segment %s failed", kGraphKeysSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }

  auto mapping = mapping_->clone();
  if (ailego_unlikely(!mapping)) {
    LOG_ERROR("clone segment %s failed", kGraphMappingSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }

  auto neighbors = neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed", kGraphNeighborsSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }
  auto upper_neighbors = upper_neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed", kHnswNeighborsSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }
  auto neighbors_meta = neighbors_meta_->clone();
  if (ailego_unlikely(!neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kGraphOffsetsSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }
  auto upper_neighbors_meta = upper_neighbors_meta_->clone();
  if (ailego_unlikely(!upper_neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kHnswOffsetsSegmentId.c_str());
    return HnswRabitqEntity::Pointer();
  }

  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,
                                   upper_neighbors_meta};

  HnswRabitqSearcherEntity *entity = new (std::nothrow)
      HnswRabitqSearcherEntity(header(), vectors, keys, mapping, neighbor_group,
                               fixed_neighbors_, neighbors_in_memory_enabled_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswRabitqSearcherEntity new failed");
  }

  return HnswRabitqEntity::Pointer(entity);
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_rabitq_builder_entity.h"
#include "hnsw_rabitq_entity.h"

namespace zvec {
namespace core {

class HnswRabitqSearcherEntity : public HnswRabitqEntity {
 public:
  using Pointer = std::shared_ptr<HnswRabitqSearcherEntity>;
  using SegmentPointer = IndexStorage::Segment::Pointer;

 public:
  struct SegmentGroupParam {
    SegmentGroupParam(SegmentPointer neighbors_in,
                      SegmentPointer neighbors_meta_in,
                      SegmentPointer upper_neighbors_in,
                      SegmentPointer upper_neighbors_meta_in)
        : neighbors{neighbors_in},
          neighbors_meta{neighbors_meta_in},
          upper_neighbors{upper_neighbors_in},
          upper_neighbors_meta{upper_neighbors_meta_in} {}

    SegmentPointer neighbors{nullptr};
    SegmentPointer neighbors_meta{nullptr};
    SegmentPointer upper_neighbors{nullptr};
    SegmentPointer upper_neighbors_meta{nullptr};
  };

  //! Constructor
  HnswRabitqSearcherEntity();

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswRabitqEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector local id by key
  node_id_t get_id(key_t key) const;

  //! Get vector feature data by key
  virtual const void *get_vector_by_key(key_t key) const override;

  //! Get vector feature data by id
  virtual const void *get_vector(node_id_t id) const override;

  //! Get vector feature data by id
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;
  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get all vectors
  const void *get_vectors() const;

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  virtual int load(const IndexStorage::Pointer &container,
                   bool check_crc) override;

  int load_segments(bool check_crc);

  virtual int cleanup(void) override;

 public:
  bool is_loaded() const {
    return loaded_;
  }

  void set_neighbors_in_memory(bool enabled) {
    neighbors_in_memory_enabled_ = enabled;
  }

  //! get fixed length neighbors data
  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;

 private:
  //! Constructor
  HnswRabitqSearcherEntity(const HNSWHeader &hd, const SegmentPointer &vectors,
                           const SegmentPointer &keys,
                           const SegmentPointer &mapping,
                           const SegmentGroupParam &neighbor_group,
                           const std::shared_ptr<char> &fixed_neighbors,
                           bool neighbors_in_memory_enabled)
      : HnswRabitqEntity(hd),
        vectors_(vectors),
        keys_(keys),
        mapping_(mapping),
        neighbors_(neighbor_group.neighbors),
        neighbors_meta_(neighbor_group.neighbors_meta),
        upper_neighbors_(neighbor_group.upper_neighbors),
        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),
        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {
    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),
                          IndexStorage::SegmentData(0U, 0U));
    fixed_neighbors_ = fixed_neighbors;
  }

  bool do_crc_check(std::vector<SegmentPointer> &segments) const;

  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory
  int load_and_flat_neighbors(void);

 public:
  HnswRabitqSearcherEntity(const HnswRabitqSearcherEntity &) = delete;
  HnswRabitqSearcherEntity &operator=(const HnswRabitqSearcherEntity &) =
      delete;

 private:
  IndexStorage::Pointer storage_{};

  SegmentPointer vectors_{};
  SegmentPointer keys_{};
  SegmentPointer mapping_{};

  SegmentPointer neighbors_{};
  SegmentPointer neighbors_meta_{};
  SegmentPointer upper_neighbors_{};
  SegmentPointer upper_neighbors_meta_{};

  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};
  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors
  bool neighbors_in_memory_enabled_{false};
  bool loaded_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_rabitq_streamer.h"
#include <iostream>
#include <memory>
#include <ailego/internal/cpu_features.h>
#include <ailego/pattern/defer.h>
#include <ailego/utility/memory_helper.h>
#include <arrow/compute/ordering.h>
#include "algorithm/hnsw_rabitq/rabitq_reformer.h"
#include "zvec/ailego/container/params.h"
#include "zvec/ailego/logger/logger.h"
#include "hnsw_rabitq_algorithm.h"
#include "hnsw_rabitq_context.h"
#include "hnsw_rabitq_dist_calculator.h"
#include "hnsw_rabitq_index_provider.h"
#include "hnsw_rabitq_query_entity.h"
#include "rabitq_params.h"
#include "rabitq_utils.h"

namespace zvec {
namespace core {
HnswRabitqStreamer::HnswRabitqStreamer() : entity_(stats_) {}

HnswRabitqStreamer::HnswRabitqStreamer(IndexProvider::Pointer provider,
                                       RabitqReformer::Pointer reformer)
    : entity_(stats_),
      reformer_(std::move(reformer)),
      provider_(std::move(provider)) {}

HnswRabitqStreamer::~HnswRabitqStreamer() {
  if (state_ == STATE_INITED) {
    this->cleanup();
  }
}

int HnswRabitqStreamer::init(const IndexMeta &imeta,
                             const ailego::Params &params) {
  meta_ = imeta;
  meta_.set_streamer("HnswRabitqStreamer", HnswRabitqEntity::kRevision, params);

  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_INDEX_SIZE, &max_index_size_);

  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT,
             &upper_max_neighbor_cnt_);
  float multiplier = HnswRabitqEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_RABITQ_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,
             &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;

  multiplier = HnswRabitqEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_RABITQ_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR, &scaling_factor_);

  params.get(PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_EF, &ef_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
             &bf_negative_prob_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_BRUTE_FORCE_THRESHOLD,
             &bruteforce_threshold_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE, &chunk_size_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_FILTER_SAME_KEY, &filter_same_key_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE,
             &get_vector_enabled_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_FORCE_PADDING_RESULT_ENABLE,
             &force_padding_topk_enabled_);
  params.get(PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP, &use_id_map_);
  entity_.set_use_key_info_map(use_id_map_);

  params.get(PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);
  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str(),
              PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());
    return IndexError_InvalidArgument;
  } else if (docs_soft_limit_ == 0UL) {
    docs_soft_limit_ =
        docs_hard_limit_ * HnswRabitqEntity::kDefaultDocsSoftLimitRatio;
  }

  if (ef_ == 0U) {
    ef_ = HnswRabitqEntity::kDefaultEf;
  }
  if (ef_construction_ == 0U) {
    ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0U) {
    upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d)",
              PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              HnswRabitqEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0U) {
    l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswRabitqEntity::kMaxNeighborCnt) {
    LOG_ERROR("MaxL0NeighborCnt must be in range (0,%d)",
              HnswRabitqEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%zu] must be <= [%s]-[%zu]",
              PARAM_HNSW_RABITQ_STREAMER_MIN_NEIGHBOR_COUNT.c_str(),
              static_cast<size_t>(min_neighbor_cnt_),
              PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              static_cast<size_t>(upper_max_neighbor_cnt_));
    return IndexError_InvalidArgument;
  }

  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {
    LOG_ERROR(
        "[%s] must be in range (0,1)",
        PARAM_HNSW_RABITQ_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_RABITQ_STREAMER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
    LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
              PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_RATIO.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_limit_ < min_scan_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_RABITQ_STREAMER_MAX_SCAN_LIMIT.c_str(),
              PARAM_HNSW_RABITQ_STREAMER_MIN_SCAN_LIMIT.c_str());
    return IndexError_InvalidArgument;
  }

  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }
  if (chunk_size_ == 0UL) {
    chunk_size_ = HnswRabitqEntity::kDefaultChunkSize;
  }
  if (chunk_size_ > HnswRabitqEntity::kMaxChunkSize) {
    LOG_ERROR("[%s] must be < %zu",
              PARAM_HNSW_RABITQ_STREAMER_CHUNK_SIZE.c_str(),
              HnswRabitqEntity::kMaxChunkSize);
    return IndexError_InvalidArgument;
  }
  uint32_t total_bits = 0;
  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);
  if (total_bits == 0) {
    total_bits = kDefaultRabitqTotalBits;
  }
  if (total_bits < 1 || total_bits > 9) {
    LOG_ERROR("Invalid total_bits: %zu, must be in [1, 9]", (size_t)total_bits);
    return IndexError_InvalidArgument;
  }
  uint8_t ex_bits = total_bits - 1;
  entity_.set_ex_bits(ex_bits);

  uint32_t dimension = 0;
  params.get(PARAM_HNSW_RABITQ_GENERAL_DIMENSION, &dimension);
  if (dimension == 0) {
    LOG_ERROR("%s not set", PARAM_HNSW_RABITQ_GENERAL_DIMENSION.c_str());
    return IndexError_InvalidArgument;
  }
  if (dimension < kMinRabitqDimSize || dimension > kMaxRabitqDimSize) {
    LOG_ERROR("Invalid dimension: %u, must be in [%d, %d]", dimension,
              kMinRabitqDimSize, kMaxRabitqDimSize);
    return IndexError_InvalidArgument;
  }
  entity_.update_rabitq_params_and_vector_size(dimension);

  entity_.set_ef_construction(ef_construction_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_prune_cnt(prune_cnt);

  entity_.set_chunk_size(chunk_size_);
  entity_.set_filter_same_key(filter_same_key_);
  entity_.set_get_vector(get_vector_enabled_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);

  int ret = entity_.init(docs_hard_limit_);
  if (ret != 0) {
    LOG_ERROR("Hnsw entity init failed for %s", IndexError::What(ret));
    return ret;
  }

  LOG_DEBUG(
      "Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu "
      "efConstruction=%u ef=%u upperMaxNeighborCnt=%u l0MaxNeighborCnt=%u "
      "scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu "
      "bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f "
      "checkCrcEnabled=%d pruneSize=%zu vectorSize=%u chunkSize=%zu "
      "filterSameKey=%u getVectorEnabled=%u minNeighborCount=%u "
      "forcePadding=%u ",
      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,
      ef_, upper_max_neighbor_cnt_, l0_max_neighbor_cnt_, scaling_factor_,
      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,
      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,
      meta_.element_size(), chunk_size_, filter_same_key_, get_vector_enabled_,
      min_neighbor_cnt_, force_padding_topk_enabled_);

  alg_ = HnswRabitqAlgorithm::UPointer(new HnswRabitqAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  state_ = STATE_INITED;

  return 0;
}

int HnswRabitqStreamer::cleanup(void) {
  if (state_ == STATE_OPENED) {
    this->close();
  }

  LOG_INFO("HnswRabitqStreamer cleanup");

  meta_.clear();
  metric_.reset();
  stats_.clear();
  entity_.cleanup();

  if (alg_) {
    alg_->cleanup();
  }

  max_index_size_ = 0UL;
  docs_hard_limit_ = HnswRabitqEntity::kDefaultDocsHardLimit;
  docs_soft_limit_ = 0UL;
  upper_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultUpperMaxNeighborCnt;
  l0_max_neighbor_cnt_ = HnswRabitqEntity::kDefaultL0MaxNeighborCnt;
  ef_ = HnswRabitqEntity::kDefaultEf;
  ef_construction_ = HnswRabitqEntity::kDefaultEfConstruction;
  bf_enabled_ = false;
  scaling_factor_ = HnswRabitqEntity::kDefaultScalingFactor;
  bruteforce_threshold_ = HnswRabitqEntity::kDefaultBruteForceThreshold;
  max_scan_limit_ = HnswRabitqEntity::kDefaultMaxScanLimit;
  min_scan_limit_ = HnswRabitqEntity::kDefaultMinScanLimit;
  chunk_size_ = HnswRabitqEntity::kDefaultChunkSize;
  bf_negative_prob_ = HnswRabitqEntity::kDefaultBFNegativeProbability;
  max_scan_ratio_ = HnswRabitqEntity::kDefaultScanRatio;
  state_ = STATE_INIT;
  check_crc_enabled_ = false;
  filter_same_key_ = false;
  get_vector_enabled_ = false;

  return 0;
}

int HnswRabitqStreamer::open(IndexStorage::Pointer stg) {
  LOG_INFO("HnswRabitqStreamer open");

  if (ailego_unlikely(state_ != STATE_INITED)) {
    LOG_ERROR("Open storage failed, init streamer first!");
    return IndexError_NoReady;
  }

  // try to load reformer
  if (reformer_ == nullptr) {
    reformer_ = std::make_shared<RabitqReformer>();
    ailego::Params reformer_params;
    reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());
    int ret = reformer_->init(reformer_params);
    if (ret != 0) {
      LOG_ERROR("Failed to initialize RabitqReformer: %d", ret);
      return ret;
    }

    ret = reformer_->load(stg);
    if (ret != 0) {
      LOG_ERROR("Failed to load reformer, ret=%d", ret);
      return ret;
    }
  } else {
    if (!stg->has(RABITQ_CONVERTER_SEG_ID)) {
      int ret = reformer_->dump(stg);
      if (ret != 0) {
        LOG_ERROR("Failed to dump reformer, ret=%d", ret);
        return ret;
      }
      LOG_INFO("Dump reformer success.");
    }
  }

  int ret = entity_.open(std::move(stg), max_index_size_, check_crc_enabled_);
  if (ret != 0) {
    return ret;
  }
  IndexMeta index_meta;
  ret = entity_.get_index_meta(&index_meta);
  if (ret == IndexError_NoExist) {
    // Set IndexMeta for the new index
    ret = entity_.set_index_meta(meta_);
    if (ret != 0) {
      LOG_ERROR("Failed to set index meta for %s", IndexError::What(ret));
      return ret;
    }
  } else if (ret != 0) {
    LOG_ERROR("Failed to get index meta for %s", IndexError::What(ret));
    return ret;
  } else {
    if (index_meta.dimension() != meta_.dimension() ||
        index_meta.element_size() != meta_.element_size() ||
        index_meta.metric_name() != meta_.metric_name() ||
        index_meta.data_type() != meta_.data_type()) {
      LOG_ERROR("IndexMeta mismatch from the previous in index");
      return IndexError_Mismatch;
    }
    // The IndexMetric Params may be updated like MipsSquaredEuclidean
    auto metric_params = index_meta.metric_params();
    metric_params.merge(meta_.metric_params());
    meta_.set_metric(index_meta.metric_name(), 0, metric_params);
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("Failed to create metric %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failed to init metric, ret=%d", ret);
    return ret;
  }

  if (!metric_->distance()) {
    LOG_ERROR("Invalid metric distance");
    return IndexError_InvalidArgument;
  }

  if (!metric_->batch_distance()) {
    LOG_ERROR("Invalid metric batch distance");
    return IndexError_InvalidArgument;
  }

  add_distance_ = metric_->distance();
  add_batch_distance_ = metric_->batch_distance();

  search_distance_ = add_distance_;
  search_batch_distance_ = add_batch_distance_;

  if (metric_->query_metric() && metric_->query_metric()->distance() &&
      metric_->query_metric()->batch_distance()) {
    search_distance_ = metric_->query_metric()->distance();
    search_batch_distance_ = metric_->query_metric()->batch_distance();
  }

  state_ = STATE_OPENED;
  magic_ = IndexContext::GenerateMagic();

  query_alg_ = HnswRabitqQueryAlgorithm::UPointer(new HnswRabitqQueryAlgorithm(
      entity_, reformer_->num_clusters(), reformer_->rabitq_metric_type()));

  return 0;
}

int HnswRabitqStreamer::close(void) {
  LOG_INFO("HnswRabitqStreamer close");

  stats_.clear();
  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  int ret = entity_.close();
  if (ret != 0) {
    return ret;
  }
  state_ = STATE_INITED;

  return 0;
}

int HnswRabitqStreamer::flush(uint64_t checkpoint) {
  LOG_INFO("HnswRabitqStreamer flush checkpoint=%zu", (size_t)checkpoint);

  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  return entity_.flush(checkpoint);
}

int HnswRabitqStreamer::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("HnswRabitqStreamer dump");

  shared_mutex_.lock();
  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });

  meta_.set_searcher("HnswRabitqSearcher", HnswRabitqEntity::kRevision,
                     ailego::Params());

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }
  ret = reformer_->dump(dumper);
  if (ret != 0) {
    LOG_ERROR("Failed to dump reformer into dumper.");
    return ret;
  }
  return entity_.dump(dumper);
}

IndexStreamer::Context::Pointer HnswRabitqStreamer::create_context(void) const {
  if (ailego_unlikely(state_ != STATE_OPENED)) {
    LOG_ERROR("Create context failed, open storage first!");
    return Context::Pointer();
  }

  HnswRabitqEntity::Pointer entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("CreateContext clone init failed");
    return Context::Pointer();
  }
  HnswRabitqContext *ctx =
      new (std::nothrow) HnswRabitqContext(meta_.dimension(), metric_, entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswRabitqContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter
                                   : VisitFilter::ByteMap);
  ctx->set_filter_negative_probability(bf_negative_prob_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  if (ailego_unlikely(ctx->init(HnswRabitqContext::kStreamerContext)) != 0) {
    LOG_ERROR("Init HnswRabitqContext failed");
    delete ctx;
    return Context::Pointer();
  }
  uint32_t estimate_doc_count = 0;
  if (meta_.streamer_params().get(PARAM_HNSW_RABITQ_STREAMER_ESTIMATE_DOC_COUNT,
                                  &estimate_doc_count)) {
    LOG_DEBUG("HnswRabitqStreamer doc_count[%zu] estimate[%zu]",
              (size_t)entity_.doc_cnt(), (size_t)estimate_doc_count);
  }
  ctx->check_need_adjuct_ctx(std::max(entity_.doc_cnt(), estimate_doc_count));

  return Context::Pointer(ctx);
}

IndexProvider::Pointer HnswRabitqStreamer::create_provider(void) const {
  LOG_DEBUG("HnswRabitqStreamer create provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswRabitqEntity failed");
    return nullptr;
  }
  return Provider::Pointer(
      new HnswRabitqIndexProvider(meta_, entity, "HnswRabitqStreamer"));
}

int HnswRabitqStreamer::update_context(HnswRabitqContext *ctx) const {
  const HnswRabitqEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  return ctx->update_context(HnswRabitqContext::kStreamerContext, meta_,
                             metric_, entity, magic_);
}

//! Add a vector with id into index
int HnswRabitqStreamer::add_with_id_impl(
    uint32_t id, const void *query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) {
  if (!provider_) {
    LOG_ERROR("Provider is nullptr, cannot add vector");
    return IndexError_InvalidArgument;
  }

  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %zu exceed [%s]",
                static_cast<size_t>(entity_.doc_cnt()),
                PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %zu exceed [%s]",
               static_cast<size_t>(entity_.doc_cnt()),
               PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);
  ctx->reset_query(query);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());
  ctx->set_provider(provider_);

  if (metric_->support_train()) {
    const std::lock_guard<std::mutex> lk(mutex_);
    ret = metric_->train(query, meta_.dimension());
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw streamer metric train failed");
      (*stats_.mutable_discarded_count())++;
      return ret;
    }
  }

  std::string converted_vector;
  IndexQueryMeta converted_meta;
  ret = reformer_->convert(query, qmeta, &converted_vector, &converted_meta);
  if (ret != 0) {
    LOG_ERROR("Rabitq hnsw convert failed, ret=%d", ret);
    return ret;
  }

  level_t level = alg_->get_random_level();
  ret = entity_.add_vector_with_id(level, id, converted_vector.data());
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw steamer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}

//! Add a vector into index
int HnswRabitqStreamer::add_impl(uint64_t pkey, const void *query,
                                 const IndexQueryMeta &qmeta,
                                 IndexStreamer::Context::Pointer &context) {
  if (!provider_) {
    LOG_ERROR("Provider is nullptr, cannot add vector");
    return IndexError_InvalidArgument;
  }

  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %zu exceed [%s]",
                static_cast<size_t>(entity_.doc_cnt()),
                PARAM_HNSW_RABITQ_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %zu exceed [%s]",
               static_cast<size_t>(entity_.doc_cnt()),
               PARAM_HNSW_RABITQ_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_, add_batch_distance_);
  ctx->reset_query(query);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());
  ctx->set_provider(provider_);

  if (metric_->support_train()) {
    const std::lock_guard<std::mutex> lk(mutex_);
    ret = metric_->train(query, meta_.dimension());
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw streamer metric train failed");
      (*stats_.mutable_discarded_count())++;
      return ret;
    }
  }

  std::string converted_vector;
  IndexQueryMeta converted_meta;
  ret = reformer_->convert(query, qmeta, &converted_vector, &converted_meta);
  if (ret != 0) {
    LOG_ERROR("Rabitq hnsw convert failed, ret=%d", ret);
    return ret;
  }

  level_t level = alg_->get_random_level();
  node_id_t id;
  ret = entity_.add_vector(level, pkey, converted_vector.data(), &id);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw steamer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}


int HnswRabitqStreamer::search_impl(
    const void *query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) const {
  return search_impl(query, qmeta, 1, context);
}

//! Similarity search
int HnswRabitqStreamer::search_impl(
    const void *query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }
  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(query, qmeta, count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());
  for (size_t q = 0; q < count; ++q) {
    HnswRabitqQueryEntity entity;
    ret = reformer_->transform_to_entity(query, &entity);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher transform failed");
      return ret;
    }
    ctx->reset_query(query);
    ret = query_alg_->search(&entity, ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + qmeta.element_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

void HnswRabitqStreamer::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    if (entity_.get_key(id) == kInvalidKey) {
      continue;
    }
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    if (neighbours.size() == 0) std::cout << std::endl;
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }

  // entity_.print_key_map();
}

int HnswRabitqStreamer::search_bf_impl(
    const void *query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) const {
  return search_bf_impl(query, qmeta, 1, context);
}

int HnswRabitqStreamer::search_bf_impl(
    const void *query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }
  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      ret = reformer_->transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw rabitq streamer transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          EstimateRecord dist;
          query_alg_->get_full_est(id, dist, entity);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      ret = reformer_->transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw rabitq streamer transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }
        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          EstimateRecord dist;
          query_alg_->get_full_est(id, dist, entity);
          ctx->topk_heap().emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

int HnswRabitqStreamer::search_bf_by_p_keys_impl(
    const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  int ret = check_params(query, qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswRabitqContext *ctx = dynamic_cast<HnswRabitqContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswRabitqContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_, search_batch_distance_);
  ctx->resize_results(count);

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_InvalidArgument;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      ret = reformer_->transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw rabitq streamer transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            EstimateRecord dist;
            query_alg_->get_full_est(id, dist, entity);
            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      HnswRabitqQueryEntity entity;
      ret = reformer_->transform_to_entity(query, &entity);
      if (ailego_unlikely(ret != 0)) {
        LOG_ERROR("Hnsw rabitq streamer transform failed");
        return ret;
      }
      ctx->reset_query(query);
      ctx->topk_heap().clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        key_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            EstimateRecord dist;
            query_alg_->get_full_est(id, dist, entity);
            ctx->topk_heap().emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);
      query = static_cast<const char *>(query) + qmeta.element_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}


}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <ailego/parallel/lock.h>
#include "algorithm/hnsw_rabitq/rabitq_reformer.h"
#include "zvec/core/framework/index_framework.h"
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"
#include "hnsw_rabitq_algorithm.h"
#include "hnsw_rabitq_query_algorithm.h"
#include "hnsw_rabitq_streamer_entity.h"

namespace zvec {
namespace core {

class HnswRabitqStreamer : public IndexStreamer {
 public:
  using ContextPointer = IndexStreamer::Context::Pointer;

  HnswRabitqStreamer();
  explicit HnswRabitqStreamer(IndexProvider::Pointer provider,
                              RabitqReformer::Pointer reformer = nullptr);
  virtual ~HnswRabitqStreamer(void);

  HnswRabitqStreamer(const HnswRabitqStreamer &streamer) = delete;
  HnswRabitqStreamer &operator=(const HnswRabitqStreamer &streamer) = delete;

  void set_provider(IndexProvider::Pointer provider) {
    provider_ = std::move(provider);
  }

  void set_reformer(IndexReformer::Pointer reformer) {
    reformer_ = std::dynamic_pointer_cast<RabitqReformer>(reformer);
  }

 protected:
  //! Initialize Streamer
  virtual int init(const IndexMeta &imeta,
                   const ailego::Params &params) override;

  //! Cleanup Streamer
  virtual int cleanup(void) override;

  //! Create a context
  virtual Context::Pointer create_context(void) const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Add a vector into index
  virtual int add_impl(uint64_t pkey, const void *query,
                       const IndexQueryMeta &qmeta,
                       Context::Pointer &context) override;

  //! Add a vector with id into index
  virtual int add_with_id_impl(uint32_t id, const void *query,
                               const IndexQueryMeta &qmeta,
                               Context::Pointer &context) override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             Context::Pointer &context) const override;

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, uint32_t count,
      ContextPointer &context) const override;

  //! Fetch vector by key
  virtual const void *get_vector(uint64_t key) const override {
    return entity_.get_vector_by_key(key);
  }

  virtual int get_vector(const uint64_t key,
                         IndexStorage::MemoryBlock &block) const override {
    return entity_.get_vector_by_key(key, block);
  }

  //! Fetch vector by id
  virtual const void *get_vector_by_id(uint32_t id) const override {
    return entity_.get_vector(id);
  }

  virtual int get_vector_by_id(
      const uint32_t id, IndexStorage::MemoryBlock &block) const override {
    return entity_.get_vector(id, block);
  }

  //! Open index from file path
  virtual int open(IndexStorage::Pointer stg) override;

  //! Close file
  virtual int close(void) override;

  //! flush file
  virtual int flush(uint64_t checkpoint) override;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  virtual void print_debug_info() override;

 private:
  inline int check_params(const void *query,
                          const IndexQueryMeta &qmeta) const {
    if (ailego_unlikely(!query)) {
      LOG_ERROR("null query");
      return IndexError_InvalidArgument;
    }
    if (ailego_unlikely(qmeta.dimension() != meta_.dimension() ||
                        qmeta.data_type() != meta_.data_type() ||
                        qmeta.element_size() != meta_.element_size())) {
      LOG_ERROR("Unsupported query meta");
      return IndexError_Mismatch;
    }
    return 0;
  }

  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,
                                        uint32_t count) const {
    for (uint32_t i = 0; i < count; ++i) {
      if (sparse_count[i] != 0)
        LOG_ERROR("Sparse cout is not empty. Index: %u, Sparse Count: %u", i,
                  sparse_count[i]);
      return IndexError_InvalidArgument;
    }

    return 0;
  }

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswRabitqContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };
  class Stats : public IndexStreamer::Stats {
   public:
    void clear(void) {
      set_revision_id(0u);
      set_loaded_count(0u);
      set_added_count(0u);
      set_discarded_count(0u);
      set_index_size(0u);
      set_dumped_size(0u);
      set_check_point(0u);
      set_create_time(0u);
      set_update_time(0u);
      clear_attributes();
    }
  };

  HnswRabitqStreamerEntity entity_;
  HnswRabitqAlgorithm::UPointer alg_;
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};

  IndexMetric::MatrixDistance add_distance_{};
  IndexMetric::MatrixDistance search_distance_{};

  IndexMetric::MatrixBatchDistance add_batch_distance_{};
  IndexMetric::MatrixBatchDistance search_batch_distance_{};

  RabitqReformer::Pointer reformer_{};            // RaBitQ reformer
  HnswRabitqQueryAlgorithm::UPointer query_alg_;  // query algorithm
  // provider_ provides raw vector, which is used to build graph
  IndexProvider::Pointer provider_{};

  Stats stats_{};
  std::mutex mutex_{};

  size_t max_index_size_{0UL};
  size_t chunk_size_{HnswRabitqEntity::kDefaultChunkSize};
  size_t docs_hard_limit_{HnswRabitqEntity::kDefaultDocsHardLimit};
  size_t docs_soft_limit_{0UL};
  uint32_t min_neighbor_cnt_{0u};
  uint32_t upper_max_neighbor_cnt_{
      HnswRabitqEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t l0_max_neighbor_cnt_{HnswRabitqEntity::kDefaultL0MaxNeighborCnt};
  uint32_t ef_{HnswRabitqEntity::kDefaultEf};
  uint32_t ef_construction_{HnswRabitqEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswRabitqEntity::kDefaultScalingFactor};
  size_t bruteforce_threshold_{HnswRabitqEntity::kDefaultBruteForceThreshold};
  size_t max_scan_limit_{HnswRabitqEntity::kDefaultMaxScanLimit};
  size_t min_scan_limit_{HnswRabitqEntity::kDefaultMinScanLimit};
  float bf_negative_prob_{HnswRabitqEntity::kDefaultBFNegativeProbability};
  float max_scan_ratio_{HnswRabitqEntity::kDefaultScanRatio};

  uint32_t magic_{0U};
  State state_{STATE_INIT};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};
  bool force_padding_topk_enabled_{false};
  bool use_id_map_{true};

  //! avoid add vector while dumping index
  ailego::SharedMutex shared_mutex_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_rabitq_streamer_entity.h"
#include <ailego/utility/memory_helper.h>

// #define DEBUG_PRINT

namespace zvec {
namespace core {

HnswRabitqStreamerEntity::HnswRabitqStreamerEntity(IndexStreamer::Stats &stats)
    : stats_(stats) {}

HnswRabitqStreamerEntity::~HnswRabitqStreamerEntity() {}

int HnswRabitqStreamerEntity::init(size_t max_doc_cnt) {
  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {
    LOG_ERROR("scalingFactor=%zu is too small", scaling_factor());
    return IndexError_InvalidArgument;
  }

  std::lock_guard<std::mutex> lock(mutex_);
  broker_ = std::make_shared<HnswRabitqChunkBroker>(stats_);
  upper_neighbor_index_ = std::make_shared<NIHashMap>();
  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();
  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();
  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {
    LOG_ERROR("HnswRabitqStreamerEntity new object failed");
    return IndexError_NoMemory;
  }
  keys_map_->set_empty_key(kInvalidKey);

  neighbor_size_ = neighbors_size();
  upper_neighbor_size_ = upper_neighbors_size();

  //! vector + key + level 0 neighbors
  size_t size = vector_size() + sizeof(key_t) + neighbor_size_;

  size = AlignSize(size);
  set_node_size(size);
  return 0;
}

int HnswRabitqStreamerEntity::cleanup() {
  std::lock_guard<std::mutex> lock(mutex_);
  mutable_header()->clear();
  chunk_size_ = kDefaultChunkSize;
  node_index_mask_bits_ = 0U;
  node_index_mask_ = 0U;
  node_cnt_per_chunk_ = 0U;
  neighbor_size_ = 0U;
  upper_neighbor_size_ = 0U;
  if (upper_neighbor_index_) {
    upper_neighbor_index_->cleanup();
  }
  if (keys_map_) {
    keys_map_->clear();
  }
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();
  filter_same_key_ = false;
  get_vector_enabled_ = false;
  broker_.reset();

  return 0;
}

int HnswRabitqStreamerEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors) {
  char buffer[neighbor_size_];
  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer);
  hd->neighbor_cnt = neighbors.size();
  size_t i = 0;
  for (; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }

  auto loc = get_neighbor_chunk_loc(level, id);
  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];
  size_t ret = loc.first->write(loc.second, hd, size);
  if (ailego_unlikely(ret != size)) {
    LOG_ERROR("Write neighbor header failed, ret=%zu", ret);

    return IndexError_Runtime;
  }

  return 0;
}

const Neighbors HnswRabitqStreamerEntity::get_neighbors(level_t level,
                                                        node_id_t id) const {
  Chunk *chunk = nullptr;
  size_t offset = 0UL;
  size_t neighbor_size = neighbor_size_;
  if (level == 0UL) {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    offset =
        (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);

    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,
                &node_chunks_);
    ailego_assert_with(chunk_idx < node_chunks_.size(), "invalid chunk idx");
    chunk = node_chunks_[chunk_idx].get();
  } else {
    auto p = get_upper_neighbor_chunk_loc(level, id);
    chunk = upper_neighbor_chunks_[p.first].get();
    offset = p.second;
    neighbor_size = upper_neighbor_size_;
  }

  ailego_assert_with(offset < chunk->data_size(), "invalid chunk offset");
  IndexStorage::MemoryBlock neighbor_block;
  size_t size = chunk->read(offset, neighbor_block, neighbor_size);
  if (ailego_unlikely(size != neighbor_size)) {
    LOG_ERROR("Read neighbor header failed, ret=%zu", size);
    return Neighbors();
  }
  return Neighbors(std::move(neighbor_block));
}

//! Get vector data by key
const void *HnswRabitqStreamerEntity::get_vector(node_id_t id) const {
  auto loc = get_vector_chunk_loc(id);
  const void *vec = nullptr;
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = vector_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%zu, read size=%zu, ret=%zu",
              static_cast<size_t>(loc.second), read_size, ret);
  }

  return vec;
}

int HnswRabitqStreamerEntity::get_vector(const node_id_t *ids, uint32_t count,
                                         const void **vecs) const {
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = vector_size();

    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%zu, read size=%zu, ret=%zu",
                static_cast<size_t>(loc.second), read_size, ret);
      return IndexError_ReadData;
    }
  }
  return 0;
}

int HnswRabitqStreamerEntity::get_vector(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  auto loc = get_vector_chunk_loc(id);
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = vector_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%zu, read size=%zu, ret=%zu",
              static_cast<size_t>(loc.second), read_size, ret);
    return IndexError_ReadData;
  }
  return 0;
}

int HnswRabitqStreamerEntity::get_vector(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &vec_blocks) const {
  vec_blocks.resize(count);
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = vector_size();

    size_t ret =
        node_chunks_[loc.first]->read(loc.second, vec_blocks[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%zu, read size=%zu, ret=%zu",
                static_cast<size_t>(loc.second), read_size, ret);
      return IndexError_ReadData;
    }
  }
  return 0;
}

key_t HnswRabitqStreamerEntity::get_key(node_id_t id) const {
  if (use_key_info_map_) {
    auto loc = get_key_chunk_loc(id);
    IndexStorage::MemoryBlock key_block;
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");
    size_t ret =
        node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));
    if (ailego_unlikely(ret != sizeof(key_t))) {
      LOG_ERROR("Read vector failed, ret=%zu", ret);
      return kInvalidKey;
    }

    return *reinterpret_cast<const key_t *>(key_block.data());
  } else {
    return id;
  }
}

void HnswRabitqStreamerEntity::add_neighbor(level_t level, node_id_t id,
                                            uint32_t size,
                                            node_id_t neighbor_id) {
  auto loc = get_neighbor_chunk_loc(level, id);
  size_t offset =
      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);
  ailego_assert_with(size < neighbor_cnt(level), "invalid neighbor size");
  ailego_assert_with(offset < loc.first->data_size(), "invalid chunk offset");
  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));
  if (ailego_unlikely(ret != sizeof(node_id_t))) {
    LOG_ERROR("Write neighbor id failed, ret=%zu", ret);
    return;
  }

  uint32_t neighbors = size + 1;
  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));
  if (ailego_unlikely(ret != sizeof(uint32_t))) {
    LOG_ERROR("Write neighbor cnt failed, ret=%zu", ret);
  }

  return;
}

int HnswRabitqStreamerEntity::init_chunks(const Chunk::Pointer &header_chunk) {
  if (header_chunk->data_size() < header_size()) {
    LOG_ERROR("Invalid header chunk size");
    return IndexError_InvalidFormat;
  }
  IndexStorage::MemoryBlock header_block;
  size_t size = header_chunk->read(0UL, header_block, header_size());
  if (ailego_unlikely(size != header_size())) {
    LOG_ERROR("Read header chunk failed");
    return IndexError_ReadData;
  }
  *mutable_header() =
      *reinterpret_cast<const HNSWHeader *>(header_block.data());

  int ret = check_hnsw_index(&header());
  if (ret != 0) {
    broker_->close();
    return ret;
  }

  node_chunks_.resize(
      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_NODE));
  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {
    node_chunks_[seq] =
        broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, seq);
    if (!node_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer data chunk %zu th of %zu", seq,
                node_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  upper_neighbor_chunks_.resize(
      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));
  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {
    upper_neighbor_chunks_[seq] = broker_->get_chunk(
        HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);
    if (!upper_neighbor_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer index chunk %zu th of %zu", seq,
                upper_neighbor_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  return 0;
}

int HnswRabitqStreamerEntity::open(IndexStorage::Pointer stg,
                                   uint64_t max_index_size, bool check_crc) {
  std::lock_guard<std::mutex> lock(mutex_);
  bool huge_page = stg->isHugePage();
  LOG_DEBUG("huge_page: %d", (int)huge_page);
  int ret = init_chunk_params(max_index_size, huge_page);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("init_chunk_params failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Open index failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,
                                    scaling_factor(), estimate_doc_capacity(),
                                    kUpperHashMemoryInflateRatio);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Init neighbor hash map failed");
    return ret;
  }

  //! init header
  auto header_chunk =
      broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,
                         HnswRabitqChunkBroker::kDefaultChunkSeqId);
  if (!header_chunk) {  // open empty index, create one
    auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,
                                  HnswRabitqChunkBroker::kDefaultChunkSeqId,
                                  header_size());
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc header chunk failed");
      return p.first;
    }
    size_t size = p.second->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }
    return 0;
  }

  //! Open an exist hnsw index
  ret = init_chunks(header_chunk);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  //! total docs including features wrote in index but neighbors may not ready
  node_id_t total_vecs = 0;
  if (node_chunks_.size() > 0) {
    size_t last_idx = node_chunks_.size() - 1;
    auto last_chunk = node_chunks_[last_idx];
    if (last_chunk->data_size() % node_size()) {
      LOG_WARN("The index may broken");
      return IndexError_InvalidFormat;
    }
    total_vecs = last_idx * node_cnt_per_chunk_ +
                 node_chunks_[last_idx]->data_size() / node_size();
  }

  LOG_INFO(
      "Open index, l0NeighborCnt=%zu upperNeighborCnt=%zu "
      "efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u",
      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),
      total_vecs, cur_max_level());
  //! try to correct the docCnt if index not fully flushed
  if (doc_cnt() != total_vecs) {
    LOG_WARN("Index closed abnormally, using totalVecs as curDocCnt");
    *mutable_doc_cnt() = total_vecs;
  }
  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      for (node_id_t id = 0U; id < doc_cnt(); ++id) {
        if (get_key(id) == kInvalidKey) {
          continue;
        }
        (*keys_map_)[get_key(id)] = id;
      }
    }
  }

  stats_.set_loaded_count(doc_cnt());

  return 0;
}

int HnswRabitqStreamerEntity::close() {
  LOG_DEBUG("close index");

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  mutable_header()->reset();
  upper_neighbor_index_->cleanup();
  keys_map_->clear();
  header_.clear();
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();

  return broker_->close();
}

int HnswRabitqStreamerEntity::flush(uint64_t checkpoint) {
  LOG_INFO("Flush index, curDocs=%zu", static_cast<size_t>(doc_cnt()));

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  int ret = broker_->flush(checkpoint);
  if (ret != 0) {
    return ret;
  }

  return 0;
}

int HnswRabitqStreamerEntity::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("Dump index, curDocs=%zu", static_cast<size_t>(doc_cnt()));

  //! sort by keys, to support get_vector by key in searcher
  std::vector<key_t> keys(doc_cnt());
  for (node_id_t i = 0; i < doc_cnt(); ++i) {
    keys[i] = get_key(i);
  }

  //! dump neighbors
  auto get_level = [&](node_id_t id) {
    auto it = upper_neighbor_index_->find(id);
    if (it == upper_neighbor_index_->end()) {
      return 0U;
    };
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    return meta->level;
  };
  auto ret = dump_segments(dumper, keys.data(), get_level);
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }
  *stats_.mutable_dumped_size() += ret;

  return 0;
}

int HnswRabitqStreamerEntity::check_hnsw_index(const HNSWHeader *hd) const {
  if (l0_neighbor_cnt() != hd->l0_neighbor_cnt() ||
      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {
    LOG_ERROR("Param neighbor cnt: %zu:%zu mismatch index previous %zu:%zu",
              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->l0_neighbor_cnt(),
              hd->upper_neighbor_cnt());
    return IndexError_Mismatch;
  }
  if (vector_size() != hd->vector_size()) {
    LOG_ERROR("vector size %zu mismatch index previous %zu", vector_size(),
              hd->vector_size());
    return IndexError_Mismatch;
  }
  if (ef_construction() != hd->ef_construction()) {
    LOG_WARN("Param efConstruction %zu mismatch index previous %zu",
             ef_construction(), hd->ef_construction());
  }
  if (scaling_factor() != hd->scaling_factor()) {
    LOG_WARN("Param scalingFactor %zu mismatch index previous %zu",
             scaling_factor(), hd->scaling_factor());
    return IndexError_Mismatch;
  }
  if (prune_cnt() != hd->neighbor_prune_cnt()) {
    LOG_WARN("Param pruneCnt %zu mismatch index previous %zu", prune_cnt(),
             hd->neighbor_prune_cnt());
    return IndexError_Mismatch;
  }
  if ((hd->entry_point() != kInvalidNodeId &&
       hd->entry_point() >= hd->doc_cnt()) ||
      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {
    LOG_WARN("Invalid entryPoint %zu, docCnt %zu",
             static_cast<size_t>(hd->entry_point()),
             static_cast<size_t>(hd->doc_cnt()));
    return IndexError_InvalidFormat;
  }
  if (hd->entry_point() == kInvalidNodeId &&
      broker_->get_chunk_cnt(HnswRabitqChunkBroker::CHUNK_TYPE_NODE) > 0) {
    LOG_WARN("The index is broken, maybe it haven't flush");
    return IndexError_InvalidFormat;
  }

  return 0;
}

int HnswRabitqStreamerEntity::add_vector(level_t level, key_t key,
                                         const void *vec, node_id_t *id) {
  Chunk::Pointer node_chunk;
  size_t chunk_offset = -1UL;

  std::lock_guard<std::mutex> lock(mutex_);
  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  node_id_t local_id = static_cast<node_id_t>(doc_cnt());
  uint32_t chunk_index = node_chunks_.size() - 1U;
  if (chunk_index == -1U ||
      (node_chunks_[chunk_index]->data_size() >=
       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc
    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }
    chunk_index++;
    auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE,
                                  chunk_index, chunk_size_);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return p.first;
    }
    node_chunk = p.second;
    chunk_offset = 0UL;
    node_chunks_.emplace_back(node_chunk);
  } else {
    node_chunk = node_chunks_[chunk_index];
    chunk_offset = node_chunk->data_size();
  }

  size_t size = node_chunk->write(chunk_offset, vec, vector_size());
  if (ailego_unlikely(size != vector_size())) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  //! level 0 neighbors is inited to zero by default

  int ret = add_upper_neighbor(level, local_id);
  if (ret != 0) {
    return ret;
  }

  chunk_offset += node_size();
  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
    LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
    return IndexError_Runtime;
  }
  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      keys_map_lock_->lock();
      (*keys_map_)[key] = local_id;
      keys_map_lock_->unlock();
    }
  }

  *mutable_doc_cnt() += 1;
  broker_->mark_dirty();
  *id = local_id;

  return 0;
}

int HnswRabitqStreamerEntity::add_vector_with_id(level_t level, node_id_t id,
                                                 const void *vec) {
  Chunk::Pointer node_chunk;
  size_t chunk_offset = -1UL;
  key_t key = id;

  std::lock_guard<std::mutex> lock(mutex_);

  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  // set node_chunk & chunk_offset if succeed
  auto func_get_node_chunk_and_offset = [&](node_id_t node_id) -> int {
    uint32_t chunk_index = node_id >> node_index_mask_bits_;
    ailego_assert_with(chunk_index <= node_chunks_.size(), "invalid chunk idx");
    // belongs to next chunk
    if (chunk_index == node_chunks_.size()) {
      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
        LOG_ERROR("add vector failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p = broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_NODE,
                                    chunk_index, chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      node_chunk = p.second;
      node_chunks_.emplace_back(node_chunk);
    }

    node_chunk = node_chunks_[chunk_index];
    chunk_offset = (node_id & node_index_mask_) * node_size();
    return 0;
  };

  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {
    if (auto ret = func_get_node_chunk_and_offset(start_id); ret != 0) {
      LOG_ERROR("func_get_node_chunk_and_offset failed");
      return ret;
    }
    size_t size = node_chunk->write(chunk_offset + vector_size(), &kInvalidKey,
                                    sizeof(key_t));
    if (ailego_unlikely(size != sizeof(key_t))) {
      LOG_ERROR("Chunk write key failed, ret=%zu", size);
      return IndexError_WriteData;
    }

    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (auto ret = func_get_node_chunk_and_offset(id); ret != 0) {
    LOG_ERROR("func_get_node_chunk_and_offset failed");
    return ret;
  }

  size_t size = node_chunk->write(chunk_offset, vec, vector_size());
  if (ailego_unlikely(size != vector_size())) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size = node_chunk->write(chunk_offset + vector_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("Chunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }
  //! level 0 neighbors is inited to zero by default

  int ret = add_upper_neighbor(level, id);
  if (ret != 0) {
    return ret;
  }

  if (*mutable_doc_cnt() <= id) {
    *mutable_doc_cnt() = id + 1;
    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (filter_same_key_ || get_vector_enabled_) {
    if (use_key_info_map_) {
      keys_map_lock_->lock();
      (*keys_map_)[key] = id;
      keys_map_lock_->unlock();
    }
  }

  broker_->mark_dirty();

  return 0;
}

void HnswRabitqStreamerEntity::update_ep_and_level(node_id_t ep,
                                                   level_t level) {
  HnswRabitqEntity::update_ep_and_level(ep, level);
  flush_header();

  return;
}

const HnswRabitqEntity::Pointer HnswRabitqStreamerEntity::clone() const {
  std::vector<Chunk::Pointer> node_chunks;
  node_chunks.reserve(node_chunks_.size());
  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {
    node_chunks.emplace_back(node_chunks_[i]->clone());
    if (ailego_unlikely(!node_chunks[i])) {
      LOG_ERROR("HnswRabitqStreamerEntity get chunk failed in clone");
      return HnswRabitqEntity::Pointer();
    }
  }

  std::vector<Chunk::Pointer> upper_neighbor_chunks;
  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());
  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {
    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());
    if (ailego_unlikely(!upper_neighbor_chunks[i])) {
      LOG_ERROR("HnswRabitqStreamerEntity get chunk failed in clone");
      return HnswRabitqEntity::Pointer();
    }
  }

  HnswRabitqStreamerEntity *entity =
      new (std::nothrow) HnswRabitqStreamerEntity(
          stats_, header(), chunk_size_, node_index_mask_bits_,
          upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,
          upper_neighbor_index_, keys_map_lock_, keys_map_, use_key_info_map_,
          std::move(node_chunks), std::move(upper_neighbor_chunks), broker_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswRabitqStreamerEntity new failed");
  }
  return HnswRabitqEntity::Pointer(entity);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <ailego/parallel/lock.h>
#include <sparsehash/dense_hash_map>
#include <sparsehash/dense_hash_set>
#include <zvec/ailego/container/heap.h>
#include "zvec/core/framework/index_framework.h"
#include "hnsw_rabitq_chunk.h"
#include "hnsw_rabitq_entity.h"
#include "hnsw_rabitq_index_hash.h"
#include "hnsw_rabitq_params.h"

namespace zvec {
namespace core {

//! HnswRabitqStreamerEntity manage vector data, pkey, and node's neighbors
class HnswRabitqStreamerEntity : public HnswRabitqEntity {
 public:
  //! Cleanup
  //! return 0 on success, or errCode in failure
  virtual int cleanup() override;

  //! Make a copy of streamer entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswRabitqEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector(node_id_t id) const override;

  //! Get vectors feature data by local ids
  virtual int get_vector(const node_id_t *ids, uint32_t count,
                         const void **vecs) const override;

  virtual int get_vector(const node_id_t id,
                         IndexStorage::MemoryBlock &block) const override;

  virtual int get_vector(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &vec_blocks) const override;

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t level, key_t key, const void *vec,
                         node_id_t *id) override;

  //! Add vector and id to hnsw entity
  virtual int add_vector_with_id(level_t level, node_id_t id,
                                 const void *vec) override;

  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, ResultRecord>> &neighbors)
      override;

  //! Append neighbor_id to node id neighbors on level
  //! Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Dump index by dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  virtual void update_ep_and_level(node_id_t ep, level_t level) override;

  void set_use_key_info_map(bool use_id_map) {
    use_key_info_map_ = use_id_map;
    LOG_DEBUG("use_key_info_map_: %d", (int)use_key_info_map_);
  }

 public:
  //! Constructor
  HnswRabitqStreamerEntity(IndexStreamer::Stats &stats);

  //! Destructor
  ~HnswRabitqStreamerEntity();

  //! Get vector feature data by key
  virtual const void *get_vector_by_key(key_t key) const override {
    auto id = get_id(key);
    return id == kInvalidNodeId ? nullptr : get_vector(id);
  }

  virtual int get_vector_by_key(
      const key_t key, IndexStorage::MemoryBlock &block) const override {
    auto id = get_id(key);
    if (id != kInvalidNodeId) {
      return get_vector(id, block);
    } else {
      return IndexError_InvalidArgument;
    }
  }

  //! Init entity
  int init(size_t max_doc_cnt);

  //! Flush graph entity to disk
  //! return 0 on success, or errCode in failure
  int flush(uint64_t checkpoint);

  //! Open entity from storage
  //! return 0 on success, or errCode in failure
  int open(IndexStorage::Pointer stg, uint64_t max_index_size, bool check_crc);

  //! Close entity
  //! return 0 on success, or errCode in failure
  int close();

  //! Set meta information from entity
  int set_index_meta(const IndexMeta &meta) const {
    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());
  }

  //! Get meta information from entity
  int get_index_meta(IndexMeta *meta) const {
    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);
  }

  //! Set params: chunk size
  inline void set_chunk_size(size_t val) {
    chunk_size_ = val;
  }

  //! Set params
  inline void set_filter_same_key(bool val) {
    filter_same_key_ = val;
  }

  //! Set params
  inline void set_get_vector(bool val) {
    get_vector_enabled_ = val;
  }

  //! Get vector local id by key
  inline node_id_t get_id(key_t key) const {
    if (use_key_info_map_) {
      keys_map_lock_->lock_shared();
      auto it = keys_map_->find(key);
      keys_map_lock_->unlock_shared();
      return it == keys_map_->end() ? kInvalidNodeId : it->second;
    } else {
      return key;
    }
  }

  void print_key_map() const {
    std::cout << "key map begins" << std::endl;

    auto iter = keys_map_->begin();
    while (iter != keys_map_->end()) {
      std::cout << "key: " << iter->first << ", id: " << iter->second
                << std::endl;
      ;
      iter++;
    }

    std::cout << "key map ends" << std::endl;
  }

  //! Get l0 neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get neighbors size for level > 0
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }


 private:
  union UpperNeighborIndexMeta {
    struct {
      uint32_t level : 4;
      uint32_t index : 28;  // index is composite type: chunk idx, and the
                            // N th neighbors in chunk, they two composite
                            // the 28 bits location
    };
    uint32_t data;
  };

  template <class Key, class T>
  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;
  template <class Key, class T>
  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;

  template <class Key>
  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;
  template <class Key>
  using HashSetPointer = std::shared_ptr<HashSet<Key>>;

  //! upper neighbor index hashmap
  using NIHashMap = HnswIndexHashMap<node_id_t, uint32_t>;
  using NIHashMapPointer = std::shared_ptr<NIHashMap>;

  //! Private construct, only be called by clone method
  HnswRabitqStreamerEntity(IndexStreamer::Stats &stats, const HNSWHeader &hd,
                           size_t chunk_size, uint32_t node_index_mask_bits,
                           uint32_t upper_neighbor_mask_bits,
                           bool filter_same_key, bool get_vector_enabled,
                           const NIHashMapPointer &upper_neighbor_index,
                           std::shared_ptr<ailego::SharedMutex> &keys_map_lock,
                           const HashMapPointer<key_t, node_id_t> &keys_map,
                           bool use_key_info_map,
                           std::vector<Chunk::Pointer> &&node_chunks,
                           std::vector<Chunk::Pointer> &&upper_neighbor_chunks,
                           const HnswRabitqChunkBroker::Pointer &broker)
      : stats_(stats),
        chunk_size_(chunk_size),
        node_index_mask_bits_(node_index_mask_bits),
        node_cnt_per_chunk_(1UL << node_index_mask_bits_),
        node_index_mask_(node_cnt_per_chunk_ - 1),
        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),
        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),
        filter_same_key_(filter_same_key),
        get_vector_enabled_(get_vector_enabled),
        use_key_info_map_(use_key_info_map),
        upper_neighbor_index_(upper_neighbor_index),
        keys_map_lock_(keys_map_lock),
        keys_map_(keys_map),
        node_chunks_(std::move(node_chunks)),
        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),
        broker_(broker) {
    *mutable_header() = hd;

    neighbor_size_ = neighbors_size();
    upper_neighbor_size_ = upper_neighbors_size();
  }

  //! Called only in searching procedure per context, so no need to lock
  void sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE type, size_t idx,
                   std::vector<Chunk::Pointer> *chunks) const {
    if (ailego_likely(idx < chunks->size())) {
      return;
    }
    for (size_t i = chunks->size(); i <= idx; ++i) {
      auto chunk = broker_->get_chunk(type, i);
      // the storage can ensure get chunk will success after the first get
      ailego_assert_with(!!chunk, "get chunk failed");
      chunks->emplace_back(std::move(chunk));
    }
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(
      node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size();

    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,
                &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();

    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,
                &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(
      level_t level, node_id_t id) const {
    auto it = upper_neighbor_index_->find(id);
    ailego_assert_abort(it != upper_neighbor_index_->end(),
                        "Get upper neighbor header failed");
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;
    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *
                      upper_neighbor_size_;
    sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,
                &upper_neighbor_chunks_);
    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),
                        "invalid chunk idx");
    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),
                        "invalid chunk offset");
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk + chunk offset
  inline std::pair<Chunk *, size_t> get_neighbor_chunk_loc(level_t level,
                                                           node_id_t id) const {
    if (level == 0UL) {
      uint32_t chunk_idx = id >> node_index_mask_bits_;
      uint32_t offset =
          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);

      sync_chunks(HnswRabitqChunkBroker::CHUNK_TYPE_NODE, chunk_idx,
                  &node_chunks_);
      ailego_assert_abort(chunk_idx < node_chunks_.size(), "invalid chunk idx");
      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),
                          "invalid chunk offset");
      return std::make_pair(node_chunks_[chunk_idx].get(), offset);
    } else {
      auto p = get_upper_neighbor_chunk_loc(level, id);
      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);
    }
  }

  //! Chunk hnsw index valid
  int check_hnsw_index(const HNSWHeader *hd) const;

  size_t get_total_upper_neighbors_size(level_t level) const {
    return level * upper_neighbor_size_;
  }

  //! Add upper neighbor header and reserve space for upper neighbor
  int add_upper_neighbor(level_t level, node_id_t id) {
    if (level == 0) {
      return 0;
    }
    Chunk::Pointer chunk;
    uint64_t chunk_offset = -1UL;
    size_t neighbors_size = get_total_upper_neighbors_size(level);
    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;
    if (chunk_index == -1UL ||
        (upper_neighbor_chunks_[chunk_index]->padding_size() <
         neighbors_size)) {  // no space left and need to alloc
      chunk_index++;
      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==
                          upper_neighbor_chunks_.size())) {
        LOG_ERROR("add upper neighbor failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p =
          broker_->alloc_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,
                               chunk_index, upper_neighbor_chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      chunk = p.second;
      chunk_offset = 0UL;
      upper_neighbor_chunks_.emplace_back(chunk);
    } else {
      chunk = upper_neighbor_chunks_[chunk_index];
      chunk_offset = chunk->data_size();
    }
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,
                       "invalid offset");
    ailego_assert_with((chunk_offset / upper_neighbor_size_) <
                           (1U << upper_neighbor_mask_bits_),
                       "invalid offset");
    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),
                       "invalid chunk index");
    UpperNeighborIndexMeta meta;
    meta.level = level;
    meta.index = (chunk_index << upper_neighbor_mask_bits_) |
                 (chunk_offset / upper_neighbor_size_);
    chunk_offset += upper_neighbor_size_ * level;
    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {
      LOG_ERROR("HashMap insert value failed");
      return IndexError_Runtime;
    }

    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", (size_t)chunk_offset);
      return IndexError_Runtime;
    }

    return 0;
  }

  size_t estimate_doc_capacity() const {
    return node_chunks_.capacity() * node_cnt_per_chunk_;
  }

  int init_chunk_params(size_t max_index_size, bool huge_page) {
    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());
    //! align node cnt per chunk to pow of 2
    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));
    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;
    if (huge_page) {
      chunk_size_ = AlignHugePageSize(node_cnt_per_chunk_ * node_size());
    } else {
      chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());
    }
    node_index_mask_ = node_cnt_per_chunk_ - 1;

    if (max_index_size == 0UL) {
      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;
    } else {
      max_index_size_ = max_index_size;
    }

    //! To get a balanced upper neighbor chunk size.
    //! If the upper chunk size is equal to node chunk size, it may waste
    //! upper neighbor chunk space; if the upper neighbor chunk size is too
    //! small, the will need large upper neighbor chunks index space. So to
    //! get a balanced ratio be sqrt of the node/neighbor size ratio
    float ratio =
        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);
    if (huge_page) {
      upper_neighbor_chunk_size_ = AlignHugePageSize(
          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),
                   static_cast<size_t>(chunk_size_ / ratio)));
    } else {
      upper_neighbor_chunk_size_ = AlignPageSize(
          std::max(get_total_upper_neighbors_size(kMaxGraphLayers),
                   static_cast<size_t>(chunk_size_ / ratio)));
    }
    upper_neighbor_mask_bits_ =
        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));
    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;

    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);
    size_t max_upper_chunk_cnt = std::ceil(
        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /
        (upper_neighbor_chunk_size_ / upper_neighbor_size_));
    max_upper_chunk_cnt =
        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());

    //! reserve space to avoid memmove in chunks vector emplace chunk, so
    //! as to lock-free in reading chunk
    node_chunks_.reserve(max_node_chunk_cnt);
    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);

    LOG_DEBUG(
        "Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u "
        "upperNeighborChunkSize=%u "
        "nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu "
        "maxIndexSize=%zu ratio=%.3f",
        node_size(), chunk_size_, upper_neighbor_size_,
        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,
        max_upper_chunk_cnt, max_index_size_, ratio);

    return 0;
  }

  //! Init node chunk and neighbor chunks
  int init_chunks(const Chunk::Pointer &header_chunk);

  int flush_header(void) {
    if (!broker_->dirty()) {
      // do not need to flush
      return 0;
    }
    auto header_chunk =
        broker_->get_chunk(HnswRabitqChunkBroker::CHUNK_TYPE_HEADER,
                           HnswRabitqChunkBroker::kDefaultChunkSeqId);
    if (ailego_unlikely(!header_chunk)) {
      LOG_ERROR("get header chunk failed");
      return IndexError_Runtime;
    }
    size_t size = header_chunk->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }

    return 0;
  }

 private:
  HnswRabitqStreamerEntity(const HnswRabitqStreamerEntity &) = delete;
  HnswRabitqStreamerEntity &operator=(const HnswRabitqStreamerEntity &) =
      delete;
  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;

 private:
  IndexStreamer::Stats &stats_;
  HNSWHeader header_{};
  std::mutex mutex_{};
  size_t max_index_size_{0UL};
  uint32_t chunk_size_{kDefaultChunkSize};
  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};
  uint32_t node_index_mask_bits_{0U};
  uint32_t node_cnt_per_chunk_{0U};
  uint32_t node_index_mask_{0U};
  uint32_t neighbor_size_{0U};
  uint32_t upper_neighbor_size_{0U};
  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the
  //! following mask
  uint32_t upper_neighbor_mask_bits_{0U};
  uint32_t upper_neighbor_mask_{0U};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};
  bool use_key_info_map_{true};

  NIHashMapPointer upper_neighbor_index_{};

  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};
  HashMapPointer<key_t, node_id_t> keys_map_{};

  //! the chunks will be changed in searcher, so need mutable
  //! data chunk include: vector, key, level 0 neighbors
  mutable std::vector<Chunk::Pointer> node_chunks_{};

  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors
  mutable std::vector<Chunk::Pointer> upper_neighbor_chunks_{};

  HnswRabitqChunkBroker::Pointer broker_{};  // chunk broker
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "rabitq_converter.h"
#include <cstring>
#include <memory>
#include <rabitqlib/utils/rotator.hpp>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/string_helper.h>
#include "ailego/pattern/defer.h"
#include "algorithm/hnsw_rabitq/rabitq_reformer.h"
#include "zvec/core/framework/index_cluster.h"
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_features.h"
#include "zvec/core/framework/index_holder.h"
#include "zvec/core/framework/index_memory.h"
#include "zvec/core/framework/index_meta.h"
#include "rabitq_params.h"
#include "rabitq_utils.h"

#ifdef _MSC_VER
#define strncasecmp _strnicmp
#endif

namespace zvec {
namespace core {

RabitqConverter::~RabitqConverter() {
  this->cleanup();
}

int RabitqConverter::init(const IndexMeta &meta, const ailego::Params &params) {
  // Copy meta and ensure it has metric information
  meta_ = meta;
  dimension_ = meta.dimension();

  if (meta_.metric_name().empty()) {
    LOG_ERROR("Meta metric is empty");
    return IndexError_InvalidArgument;
  }

  // Round up dimension to multiple of 64
  padded_dim_ = ((dimension_ + 63) / 64) * 64;

  // Get RaBitQ parameters with defaults
  uint32_t total_bits = 0;
  params.get(PARAM_RABITQ_TOTAL_BITS, &total_bits);
  if (total_bits == 0) {
    total_bits = kDefaultRabitqTotalBits;
  }
  if (total_bits < 1 || total_bits > 9) {
    LOG_ERROR("Invalid total_bits: %zu, must be in [1, 9]", (size_t)total_bits);
    return IndexError_InvalidArgument;
  }
  ex_bits_ = total_bits - 1;

  params.get(PARAM_RABITQ_NUM_CLUSTERS, &num_clusters_);
  if (num_clusters_ == 0) {
    num_clusters_ = kDefaultNumClusters;
  }

  if (ex_bits_ > 8) {
    LOG_ERROR("Invalid ex_bits: %zu, must be <= 8", ex_bits_);
    return IndexError_InvalidArgument;
  }

  if (meta.data_type() != IndexMeta::DataType::DT_FP32) {
    LOG_ERROR("RaBitQ only supports FP32 data type");
    return IndexError_Unsupported;
  }
  params.get(PARAM_RABITQ_SAMPLE_COUNT, &sample_count_);

  std::string rotator_type_str;
  params.get(PARAM_RABITQ_ROTATOR_TYPE, &rotator_type_str);
  if (rotator_type_str.empty()) {
    rotator_type_ = rabitqlib::RotatorType::FhtKacRotator;
  } else if (strncasecmp(rotator_type_str.c_str(), "fht", 3) == 0) {
    rotator_type_ = rabitqlib::RotatorType::FhtKacRotator;
  } else if (strncasecmp(rotator_type_str.c_str(), "matrix", 6) == 0) {
    rotator_type_ = rabitqlib::RotatorType::MatrixRotator;
  } else {
    LOG_ERROR("Invalid rotator_type: %s", rotator_type_str.c_str());
    return IndexError_InvalidArgument;
  }

  // Create rotator
  rotator_.reset(
      rabitqlib::choose_rotator<float>(dimension_, rotator_type_, padded_dim_));

  LOG_INFO(
      "RabitqConverter initialized: dim=%zu, padded_dim=%zu, "
      "num_clusters=%zu, ex_bits=%zu, rotator_type=%d[%s] sample_count[%zu]",
      dimension_, padded_dim_, num_clusters_, ex_bits_, (int)rotator_type_,
      rotator_type_str.c_str(), sample_count_);

  return 0;
}

int RabitqConverter::cleanup() {
  centroids_.clear();
  rotated_centroids_.clear();
  result_holder_.reset();
  rotator_.reset();
  return 0;
}

int RabitqConverter::train(IndexHolder::Pointer holder) {
  if (!holder) {
    LOG_ERROR("Null holder for training");
    return IndexError_InvalidArgument;
  }

  ailego::ElapsedTime timer;

  size_t vector_count = holder->count();
  if (vector_count == 0) {
    LOG_ERROR("No vectors for training");
    return IndexError_InvalidArgument;
  }

  // do sampling from all data
  size_t sample_count = vector_count;
  if (sample_count_ > 0) {
    sample_count = std::min(sample_count_, vector_count);
  }
  LOG_INFO("Training with %zu vectors from %zu of holder", sample_count,
           vector_count);
  auto sampler = std::make_shared<SampleIndexFeatures<CompactIndexFeatures>>(
      meta_, sample_count);
  auto iter = holder->create_iterator();
  if (!iter) {
    LOG_ERROR("Create iterator error");
    return IndexError_Runtime;
  }
  for (; iter->is_valid(); iter->next()) {
    sampler->emplace(iter->data());
  }

  // Holder is not needed, cleanup it.
  holder.reset();

  if (sampler->count() == 0) {
    LOG_ERROR("Load training data error");
    return IndexError_InvalidLength;
  }


  // Create KmeansCluster for training centroids
  auto cluster = IndexFactory::CreateCluster("OptKmeansCluster");
  if (!cluster) {
    LOG_ERROR("Failed to create OptKmeansCluster");
    return IndexError_NoExist;
  }

  // Initialize cluster
  LOG_INFO(
      "Initializing KmeansCluster with meta: dim=%u, data_type=%d, metric=%s",
      meta_.dimension(), (int)meta_.data_type(), meta_.metric_name().c_str());
  ailego::Params cluster_params;
  int ret = cluster->init(meta_, cluster_params);
  if (ret != 0) {
    LOG_ERROR("Failed to initialize KmeansCluster: %d", ret);
    return ret;
  }

  ret = cluster->mount(sampler);
  if (ret != 0) {
    LOG_ERROR("Failed to mount training data: %d", ret);
    return ret;
  }
  cluster->suggest(num_clusters_);

  // Perform clustering
  IndexCluster::CentroidList cents;
  // TODO: support specify threads with argument
  auto threads = std::make_shared<SingleQueueIndexThreads>(0, false);
  ret = cluster->cluster(threads, cents);
  if (ret != 0) {
    LOG_ERROR("Failed to perform clustering: %d", ret);
    return ret;
  }

  if (cents.size() != num_clusters_) {
    LOG_WARN("Expected %zu clusters, got %zu", num_clusters_, cents.size());
    num_clusters_ = cents.size();
  }
  // Extract original centroids (for LinearSeeker query)
  centroids_.resize(num_clusters_ * dimension_);
  // Extract rotated centroids (for quantization)
  rotated_centroids_.resize(num_clusters_ * padded_dim_);
  for (uint32_t i = 0; i < num_clusters_; ++i) {
    const float *cent_data = static_cast<const float *>(cents[i].feature());
    // Save original centroids
    std::memcpy(&centroids_[i * dimension_], cent_data,
                dimension_ * sizeof(float));
    // Save rotated centroids
    this->rotator_->rotate(cent_data, &rotated_centroids_[i * padded_dim_]);
  }

  stats_.set_trained_count(sampler->count());
  stats_.set_trained_costtime(timer.milli_seconds());

  LOG_INFO("Training completed: %zu centroids, cost %zu ms", num_clusters_,
           static_cast<size_t>(timer.milli_seconds()));

  return 0;
}


int RabitqConverter::transform(IndexHolder::Pointer holder) {
  if (!holder) {
    LOG_ERROR("Null holder for transformation");
    return IndexError_InvalidArgument;
  }

  if (rotated_centroids_.empty()) {
    LOG_ERROR("Centroids not trained yet");
    return IndexError_NoReady;
  }

  LOG_ERROR("Not implemented");
  return IndexError_NotImplemented;
}

int RabitqConverter::dump(const IndexDumper::Pointer &dumper) {
  if (!dumper) {
    LOG_ERROR("Null dumper");
    return IndexError_InvalidArgument;
  }

  if (rotated_centroids_.empty() || centroids_.empty()) {
    LOG_ERROR("No centroids to dump");
    return IndexError_NoReady;
  }

  ailego::ElapsedTime timer;
  size_t dumped_size = 0;

  int ret = dump_rabitq_centroids(
      dumper, dimension_, padded_dim_, ex_bits_, num_clusters_, rotator_type_,
      rotated_centroids_, centroids_, rotator_, &dumped_size);
  if (ret != 0) {
    return ret;
  }

  stats_.set_dumped_size(dumped_size);
  stats_.set_dumped_costtime(timer.milli_seconds());

  LOG_INFO("Dump completed: %zu bytes, cost %zu ms", stats_.dumped_size(),
           static_cast<size_t>(timer.milli_seconds()));
  return 0;
}

int RabitqConverter::to_reformer(IndexReformer::Pointer *reformer) {
  auto memory_dumper = IndexFactory::CreateDumper("MemoryDumper");
  memory_dumper->init(ailego::Params());
  std::string file_id = ailego::StringHelper::Concat(
      "rabitq_converter_", ailego::Monotime::MilliSeconds(), rand());
  int ret = memory_dumper->create(file_id);
  if (ret != 0) {
    LOG_ERROR("Failed to create memory dumper: %d", ret);
    return ret;
  }
  // Release memory
  AILEGO_DEFER([&file_id]() { IndexMemory::Instance()->remove(file_id); });
  ret = this->dump(memory_dumper);
  if (ret != 0) {
    LOG_ERROR("Failed to dump RabitqConverter: %d", ret);
    return ret;
  }
  ret = memory_dumper->close();
  if (ret != 0) {
    LOG_ERROR("Failed to close memory dumper: %d", ret);
    return ret;
  }

  auto res = std::make_shared<RabitqReformer>();
  ailego::Params reformer_params;
  reformer_params.set(PARAM_RABITQ_METRIC_NAME, meta_.metric_name());
  ret = res->init(reformer_params);
  if (ret != 0) {
    LOG_ERROR("Failed to initialize RabitqReformer: %d", ret);
    return ret;
  }
  auto memory_storage = IndexFactory::CreateStorage("MemoryReadStorage");
  ret = memory_storage->open(file_id, false);
  if (ret != 0) {
    LOG_ERROR("Failed to open memory storage: %d", ret);
    return ret;
  }
  ret = res->load(memory_storage);
  if (ret != 0) {
    LOG_ERROR("Failed to load RabitqReformer: %d", ret);
    return ret;
  }
  *reformer = std::move(res);
  return 0;
}


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_converter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <vector>
#include <rabitqlib/utils/rotator.hpp>
#include "zvec/core/framework/index_cluster.h"
#include "zvec/core/framework/index_converter.h"
#include "zvec/core/framework/index_reformer.h"
#include "zvec/core/framework/index_threads.h"
#include "rabitq_params.h"

namespace zvec {
namespace core {

class RabitqReformer;

/*! RaBitQ Converter
 * Trains KMeans centroids and quantizes vectors using RaBitQ
 */
class RabitqConverter : public IndexConverter {
 public:
  //! Constructor
  RabitqConverter() = default;

  //! Destructor
  ~RabitqConverter() override;

  //! Initialize Converter
  int init(const IndexMeta &meta, const ailego::Params &params) override;

  //! Cleanup Converter
  int cleanup(void) override;

  //! Train the data - perform KMeans clustering
  int train(IndexHolder::Pointer holder) override;

  //! Transform the data - quantize vectors using RaBitQ
  int transform(IndexHolder::Pointer holder) override;

  //! Dump centroids and config into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return result_holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  int to_reformer(IndexReformer::Pointer *reformer) override;

 private:
  static inline size_t AlignSize(size_t size) {
    return (size + 0x1F) & (~0x1F);
  }

 private:
  IndexMeta meta_;
  IndexHolder::Pointer result_holder_;
  Stats stats_;
  size_t sample_count_{0};

  // RaBitQ parameters
  size_t num_clusters_{0};
  size_t ex_bits_{0};
  size_t dimension_{0};
  size_t padded_dim_{0};

  // Original centroids: num_clusters * dimension (for LinearSeeker query)
  std::vector<float> centroids_;
  // Rotated centroids: num_clusters * padded_dim (for quantization)
  std::vector<float> rotated_centroids_;

  // Rotator for vector transformation
  rabitqlib::RotatorType rotator_type_{rabitqlib::RotatorType::FhtKacRotator};
  std::unique_ptr<rabitqlib::Rotator<float>> rotator_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

// Local metric type enum that mirrors rabitqlib::MetricType,
// without exposing rabitqlib headers to consumers of this file.
enum class RabitqMetricType {
  kL2 = 0,
  kIP = 1,
};

// RaBitQ Converter parameters
static const std::string PARAM_RABITQ_NUM_CLUSTERS(
    "proxima.rabitq.num_clusters");
static const std::string PARAM_RABITQ_TOTAL_BITS("proxima.rabitq.total_bits");
static const std::string PARAM_RABITQ_METRIC_NAME("proxima.rabitq.metric_name");
static const std::string PARAM_RABITQ_ROTATOR_TYPE(
    "proxima.rabitq.rotator.type");
static const std::string PARAM_RABITQ_SAMPLE_COUNT(
    "proxima.rabitq.sample_count");

// Default values
constexpr size_t kDefaultNumClusters = 16;
// 4-bit, 5-bit, and 7-bit quantization typically achieve 90%, 95%, and 99%
// recall, respectively—without accessing raw vectors for reranking
constexpr size_t kDefaultRabitqTotalBits = 7;

constexpr int kMinRabitqDimSize = 64;
constexpr int kMaxRabitqDimSize = 4095;


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "rabitq_reformer.h"
#include <string>
#include <vector>
#include <rabitqlib/defines.hpp>
#include <rabitqlib/index/query.hpp>
#include <rabitqlib/quantization/rabitq.hpp>
#include <rabitqlib/utils/rotator.hpp>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/string_helper.h>
#include "core/algorithm/cluster/linear_seeker.h"
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_features.h"
#include "zvec/core/framework/index_meta.h"
#include "zvec/core/framework/index_storage.h"
#include "hnsw_rabitq_query_entity.h"
#include "rabitq_converter.h"
#include "rabitq_utils.h"

namespace zvec {
namespace core {

// All rabitqlib types are confined to this translation unit via pimpl.
struct RabitqReformer::Impl {
  // RaBitQ parameters
  size_t num_clusters{0};
  size_t ex_bits{0};
  size_t dimension{0};
  size_t padded_dim{0};
  size_t size_bin_data{0};
  size_t size_ex_data{0};
  bool loaded{false};

  // Original centroids: num_clusters * dimension (for LinearSeeker query)
  std::vector<float> centroids;
  // Rotated centroids: num_clusters * padded_dim (for quantization)
  std::vector<float> rotated_centroids;

  rabitqlib::RotatorType rotator_type{rabitqlib::RotatorType::FhtKacRotator};
  std::unique_ptr<rabitqlib::Rotator<float>> rotator;
  rabitqlib::quant::RabitqConfig query_config;
  rabitqlib::quant::RabitqConfig config;
  rabitqlib::MetricType metric_type{rabitqlib::METRIC_L2};

  LinearSeeker::Pointer centroid_seeker;
  CoherentIndexFeatures::Pointer centroid_features;

  // Translate local enum to rabitqlib enum (used only inside this .cc).
  static rabitqlib::MetricType to_rabitq(RabitqMetricType m) {
    return m == RabitqMetricType::kIP ? rabitqlib::METRIC_IP
                                      : rabitqlib::METRIC_L2;
  }

  // Translate rabitqlib enum to local enum.
  static RabitqMetricType from_rabitq(rabitqlib::MetricType m) {
    return m == rabitqlib::METRIC_IP ? RabitqMetricType::kIP
                                     : RabitqMetricType::kL2;
  }

  int quantize_vector(const float *raw_vector, uint32_t cluster_id,
                      std::string *quantized_data) const;
};

RabitqReformer::RabitqReformer() : impl_(std::make_unique<Impl>()) {}

RabitqReformer::~RabitqReformer() {
  this->cleanup();
}

size_t RabitqReformer::num_clusters() const {
  return impl_->num_clusters;
}

RabitqMetricType RabitqReformer::rabitq_metric_type() const {
  return Impl::from_rabitq(impl_->metric_type);
}

int RabitqReformer::init(const ailego::Params &params) {
  std::string metric_name = params.get_as_string(PARAM_RABITQ_METRIC_NAME);
  if (metric_name == "SquaredEuclidean") {
    impl_->metric_type = rabitqlib::METRIC_L2;
  } else if (metric_name == "InnerProduct") {
    impl_->metric_type = rabitqlib::METRIC_IP;
  } else if (metric_name == "Cosine") {
    impl_->metric_type = rabitqlib::METRIC_IP;
  } else {
    LOG_ERROR("Unsupported metric name: %s", metric_name.c_str());
    return IndexError_InvalidArgument;
  }
  LOG_DEBUG("Rabitq reformer init done. metric_name=%s metric_type=%d",
            metric_name.c_str(), static_cast<int>(impl_->metric_type));
  return 0;
}

int RabitqReformer::cleanup() {
  impl_->centroids.clear();
  impl_->rotated_centroids.clear();
  impl_->centroid_seeker.reset();
  impl_->centroid_features.reset();
  impl_->loaded = false;
  impl_->rotator.reset();
  return 0;
}

int RabitqReformer::unload() {
  return this->cleanup();
}

int RabitqReformer::load(IndexStorage::Pointer storage) {
  if (!storage) {
    LOG_ERROR("Invalid storage for load");
    return IndexError_InvalidArgument;
  }

  auto segment = storage->get(RABITQ_CONVERTER_SEG_ID);
  if (!segment) {
    LOG_ERROR("Failed to get segment %s", RABITQ_CONVERTER_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }

  size_t offset = 0;
  RabitqConverterHeader header;
  IndexStorage::MemoryBlock block;
  size_t size = segment->read(offset, block, sizeof(header));
  if (size != sizeof(header)) {
    LOG_ERROR("Failed to read header");
    return IndexError_InvalidFormat;
  }
  memcpy(&header, block.data(), sizeof(header));
  impl_->dimension = header.dim;
  impl_->padded_dim = header.padded_dim;
  impl_->ex_bits = header.ex_bits;
  impl_->num_clusters = header.num_clusters;
  impl_->rotator_type =
      static_cast<rabitqlib::RotatorType>(header.rotator_type);
  offset += sizeof(header);

  // Read rotated centroids
  size_t rotated_centroids_size =
      sizeof(float) * header.num_clusters * header.padded_dim;
  size = segment->read(offset, block, rotated_centroids_size);
  if (size != rotated_centroids_size) {
    LOG_ERROR("Failed to read rotated centroids");
    return IndexError_InvalidFormat;
  }
  impl_->rotated_centroids.resize(header.num_clusters * header.padded_dim);
  memcpy(impl_->rotated_centroids.data(), block.data(), rotated_centroids_size);
  offset += size;

  // Read original centroids (for LinearSeeker query)
  size_t centroids_size = sizeof(float) * header.num_clusters * header.dim;
  size = segment->read(offset, block, centroids_size);
  if (size != centroids_size) {
    LOG_ERROR("Failed to read centroids");
    return IndexError_InvalidFormat;
  }
  impl_->centroids.resize(header.num_clusters * header.dim);
  memcpy(impl_->centroids.data(), block.data(), centroids_size);
  offset += size;

  // Read rotator
  size_t rotator_size = header.rotator_size;
  size = segment->read(offset, block, rotator_size);
  if (size != rotator_size) {
    LOG_ERROR("Failed to read rotator");
    return IndexError_InvalidFormat;
  }
  impl_->rotator.reset(rabitqlib::choose_rotator<float>(
      impl_->dimension, impl_->rotator_type, impl_->padded_dim));
  impl_->rotator->load(reinterpret_cast<const char *>(block.data()));
  offset += size;

  impl_->query_config = rabitqlib::quant::faster_config(
      impl_->padded_dim, rabitqlib::SplitSingleQuery<float>::kNumBits);
  impl_->config =
      rabitqlib::quant::faster_config(impl_->padded_dim, impl_->ex_bits + 1);

  impl_->size_bin_data =
      rabitqlib::BinDataMap<float>::data_bytes(impl_->padded_dim);
  impl_->size_ex_data = rabitqlib::ExDataMap<float>::data_bytes(
      impl_->padded_dim, impl_->ex_bits);

  // Initialize LinearSeeker for centroid search
  IndexMeta centroid_meta;
  centroid_meta.set_data_type(IndexMeta::DataType::DT_FP32);
  centroid_meta.set_dimension(static_cast<uint32_t>(impl_->dimension));
  // Note:
  // 1. spherical kmeans is used for InnerProduct and Cosine, so centroids are
  // normalized.
  // 2. for Cosine metric, `transform_to_entity` input is normalized, need to
  // use InnerProduct metric as Cosine metric requires extra dimension which is
  // unsuitable for centroids.
  centroid_meta.set_metric(impl_->metric_type == rabitqlib::METRIC_L2
                               ? "SquaredEuclidean"
                               : "InnerProduct",
                           0, ailego::Params());

  impl_->centroid_features = std::make_shared<CoherentIndexFeatures>();
  impl_->centroid_features->mount(centroid_meta, impl_->centroids.data(),
                                  impl_->centroids.size() * sizeof(float));

  impl_->centroid_seeker = std::make_shared<LinearSeeker>();
  int ret = impl_->centroid_seeker->init(centroid_meta);
  if (ret != 0) {
    LOG_ERROR("Failed to init centroid seeker. ret[%d]", ret);
    return ret;
  }
  ret = impl_->centroid_seeker->mount(impl_->centroid_features);
  if (ret != 0) {
    LOG_ERROR("Failed to mount centroid features. ret[%d]", ret);
    return ret;
  }

  LOG_INFO(
      "Rabitq reformer load done. dimension=%zu, padded_dim=%zu, "
      "ex_bits=%zu, num_clusters=%zu, size_bin_data=%zu, size_ex_data=%zu "
      "rotator_type=%d",
      impl_->dimension, impl_->padded_dim, impl_->ex_bits, impl_->num_clusters,
      impl_->size_bin_data, impl_->size_ex_data, (int)impl_->rotator_type);
  impl_->loaded = true;
  return 0;
}

int RabitqReformer::convert(const void *record, const IndexQueryMeta &rmeta,
                            std::string *out, IndexQueryMeta *ometa) const {
  if (!impl_->loaded) {
    LOG_ERROR("Centroids not loaded yet");
    return IndexError_NoReady;
  }

  if (!record || !out) {
    LOG_ERROR("Invalid arguments for convert");
    return IndexError_InvalidArgument;
  }

  // input may be transformed, require rmeta.dimension >= dimension
  if (rmeta.dimension() < impl_->dimension ||
      rmeta.data_type() != IndexMeta::DataType::DT_FP32) {
    LOG_ERROR("Invalid record meta: dimension=%zu, data_type=%d",
              static_cast<size_t>(rmeta.dimension()), (int)rmeta.data_type());
    return IndexError_InvalidArgument;
  }

  // Find nearest centroid using LinearSeeker
  Seeker::Document doc;
  int ret = impl_->centroid_seeker->seek(
      record, impl_->dimension * sizeof(float), &doc);
  if (ret != 0) {
    LOG_ERROR("Failed to seek centroid. ret[%d]", ret);
    return ret;
  }
  uint32_t cluster_id = doc.index;

  const float *vector = static_cast<const float *>(record);
  ret = impl_->quantize_vector(vector, cluster_id, out);
  if (ret != 0) {
    LOG_ERROR("Failed to quantize vector");
    return ret;
  }

  ometa->set_meta(IndexMeta::DataType::DT_INT8, (uint32_t)out->size());
  return 0;
}

int RabitqReformer::transform(const void *, const IndexQueryMeta &,
                              std::string *, IndexQueryMeta *) const {
  return IndexError_NotImplemented;
}

int RabitqReformer::transform_to_entity(const void *query,
                                        HnswRabitqQueryEntity *entity) const {
  if (!impl_->loaded) {
    LOG_ERROR("Centroids not loaded yet");
    return IndexError_NoReady;
  }

  if (!query) {
    LOG_ERROR("Invalid arguments for transform");
    return IndexError_InvalidArgument;
  }

  const float *query_vector = static_cast<const float *>(query);

  // Apply rotator
  entity->rotated_query.resize(impl_->padded_dim);
  impl_->rotator->rotate(query_vector, entity->rotated_query.data());

  // Quantize query to 4-bit representation
  entity->query_wrapper = std::make_unique<rabitqlib::SplitSingleQuery<float>>(
      entity->rotated_query.data(), impl_->padded_dim, impl_->ex_bits,
      impl_->query_config, impl_->metric_type);

  // Preprocess - get the distance from query to all centroids
  entity->q_to_centroids.resize(impl_->num_clusters);

  if (impl_->metric_type == rabitqlib::METRIC_L2) {
    for (size_t i = 0; i < impl_->num_clusters; i++) {
      entity->q_to_centroids[i] = std::sqrt(rabitqlib::euclidean_sqr(
          entity->rotated_query.data(),
          impl_->rotated_centroids.data() + (i * impl_->padded_dim),
          impl_->padded_dim));
    }
  } else if (impl_->metric_type == rabitqlib::METRIC_IP) {
    entity->q_to_centroids.resize(impl_->num_clusters * 2);
    // first half as g_add, second half as g_error
    for (size_t i = 0; i < impl_->num_clusters; i++) {
      entity->q_to_centroids[i] = rabitqlib::dot_product(
          entity->rotated_query.data(),
          impl_->rotated_centroids.data() + (i * impl_->padded_dim),
          impl_->padded_dim);
      entity->q_to_centroids[i + impl_->num_clusters] =
          std::sqrt(rabitqlib::euclidean_sqr(
              entity->rotated_query.data(),
              impl_->rotated_centroids.data() + (i * impl_->padded_dim),
              impl_->padded_dim));
    }
  }

  return 0;
}

int RabitqReformer::Impl::quantize_vector(const float *raw_vector,
                                          uint32_t cluster_id,
                                          std::string *quantized_data) const {
  std::vector<float> rotated_data(padded_dim);
  rotator->rotate(raw_vector, rotated_data.data());

  // quantized format: cluster_id + bin_data + ex_data
  quantized_data->resize(sizeof(cluster_id) + size_bin_data + size_ex_data);
  memcpy(&(*quantized_data)[0], &cluster_id, sizeof(cluster_id));
  int bin_data_offset = sizeof(cluster_id);
  int ex_data_offset = bin_data_offset + size_bin_data;
  rabitqlib::quant::quantize_split_single(
      rotated_data.data(), rotated_centroids.data() + (cluster_id * padded_dim),
      padded_dim, ex_bits, &(*quantized_data)[bin_data_offset],
      &(*quantized_data)[ex_data_offset], metric_type, config);

  return 0;
}

int RabitqReformer::dump(const IndexDumper::Pointer &dumper) {
  if (!dumper) {
    LOG_ERROR("Null dumper");
    return IndexError_InvalidArgument;
  }

  if (!impl_->loaded || impl_->rotated_centroids.empty() ||
      impl_->centroids.empty()) {
    LOG_ERROR("No centroids to dump");
    return IndexError_NoReady;
  }

  size_t dumped_size = 0;
  int ret = dump_rabitq_centroids(
      dumper, impl_->dimension, impl_->padded_dim, impl_->ex_bits,
      impl_->num_clusters, impl_->rotator_type, impl_->rotated_centroids,
      impl_->centroids, impl_->rotator, &dumped_size);
  if (ret != 0) {
    return ret;
  }

  LOG_INFO("RabitqReformer dump completed: %zu bytes", dumped_size);
  return 0;
}

int RabitqReformer::dump(const IndexStorage::Pointer &storage) {
  if (!storage) {
    LOG_ERROR("Null storage");
    return IndexError_InvalidArgument;
  }

  if (!impl_->loaded || impl_->rotated_centroids.empty() ||
      impl_->centroids.empty()) {
    LOG_ERROR("No centroids to dump");
    return IndexError_NoReady;
  }

  auto align_size = [](size_t size) -> size_t {
    return (size + 0x1F) & (~0x1F);
  };

  size_t header_size = sizeof(RabitqConverterHeader);
  size_t rotated_centroids_size =
      impl_->rotated_centroids.size() * sizeof(float);
  size_t centroids_size = impl_->centroids.size() * sizeof(float);
  size_t rotator_size = impl_->rotator->dump_bytes();
  size_t data_size =
      header_size + rotated_centroids_size + centroids_size + rotator_size;
  size_t total_size = align_size(data_size);

  int ret = storage->append(RABITQ_CONVERTER_SEG_ID, total_size);
  if (ret != 0) {
    LOG_ERROR("Failed to append segment %s, ret=%d",
              RABITQ_CONVERTER_SEG_ID.c_str(), ret);
    return ret;
  }

  auto segment = storage->get(RABITQ_CONVERTER_SEG_ID);
  if (!segment) {
    LOG_ERROR("Failed to get segment %s", RABITQ_CONVERTER_SEG_ID.c_str());
    return IndexError_ReadData;
  }

  size_t offset = 0;

  RabitqConverterHeader header;
  header.dim = static_cast<uint32_t>(impl_->dimension);
  header.padded_dim = static_cast<uint32_t>(impl_->padded_dim);
  header.num_clusters = static_cast<uint32_t>(impl_->num_clusters);
  header.ex_bits = static_cast<uint8_t>(impl_->ex_bits);
  header.rotator_type = static_cast<uint8_t>(impl_->rotator_type);
  header.rotator_size = static_cast<uint32_t>(rotator_size);
  size_t written = segment->write(offset, &header, header_size);
  if (written != header_size) {
    LOG_ERROR("Failed to write header: written=%zu, expected=%zu", written,
              header_size);
    return IndexError_WriteData;
  }
  offset += header_size;

  written = segment->write(offset, impl_->rotated_centroids.data(),
                           rotated_centroids_size);
  if (written != rotated_centroids_size) {
    LOG_ERROR("Failed to write rotated centroids: written=%zu, expected=%zu",
              written, rotated_centroids_size);
    return IndexError_WriteData;
  }
  offset += rotated_centroids_size;

  written = segment->write(offset, impl_->centroids.data(), centroids_size);
  if (written != centroids_size) {
    LOG_ERROR("Failed to write centroids: written=%zu, expected=%zu", written,
              centroids_size);
    return IndexError_WriteData;
  }
  offset += centroids_size;

  std::vector<char> buffer(rotator_size);
  impl_->rotator->save(buffer.data());
  written = segment->write(offset, buffer.data(), rotator_size);
  if (written != rotator_size) {
    LOG_ERROR("Failed to write rotator data: written=%zu, expected=%zu",
              written, rotator_size);
    return IndexError_WriteData;
  }

  LOG_INFO("RabitqReformer dump to storage completed: %zu bytes", data_size);
  return 0;
}


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_reformer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma once

#include <memory>
#include "zvec/core/framework/index_dumper.h"
#include "zvec/core/framework/index_reformer.h"
#include "zvec/core/framework/index_storage.h"
#include "rabitq_params.h"

namespace zvec {
namespace core {

class HnswRabitqQueryEntity;

/*! RaBitQ Reformer
 * Loads centroids and performs query transformation and vector quantization.
 *
 * All rabitqlib types are hidden behind a pimpl to avoid leaking rabitqlib
 * headers to consumers of this class.
 */
class RabitqReformer : public IndexReformer {
 public:
  typedef std::shared_ptr<RabitqReformer> Pointer;

  RabitqReformer();
  ~RabitqReformer() override;

  // Non-copyable
  RabitqReformer(const RabitqReformer &) = delete;
  RabitqReformer &operator=(const RabitqReformer &) = delete;

  int init(const ailego::Params &params) override;
  int cleanup(void) override;
  int load(IndexStorage::Pointer storage) override;
  int unload(void) override;

  // transform() is not implemented for RabitqReformer; use transform_to_entity.
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override;

  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,
              IndexQueryMeta *ometa) const override;

  int dump(const IndexDumper::Pointer &dumper);
  int dump(const IndexStorage::Pointer &storage);

  int transform_to_entity(const void *query,
                          HnswRabitqQueryEntity *entity) const;

  size_t num_clusters() const;
  RabitqMetricType rabitq_metric_type() const;

 private:
  struct Impl;
  std::unique_ptr<Impl> impl_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_utils.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "rabitq_utils.h"
#include <string>
#include <zvec/ailego/hash/crc32c.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_logger.h"

namespace zvec {
namespace core {

int dump_rabitq_centroids(
    const IndexDumper::Pointer &dumper, size_t dimension, size_t padded_dim,
    size_t ex_bits, size_t num_clusters, rabitqlib::RotatorType rotator_type,
    const std::vector<float> &rotated_centroids,
    const std::vector<float> &centroids,
    const std::unique_ptr<rabitqlib::Rotator<float>> &rotator,
    size_t *out_dumped_size) {
  auto align_size = [](size_t size) -> size_t {
    return (size + 0x1F) & (~0x1F);
  };

  uint32_t crc = 0;
  size_t dumped_size = 0;

  // Write header
  RabitqConverterHeader header;
  header.dim = static_cast<uint32_t>(dimension);
  header.padded_dim = static_cast<uint32_t>(padded_dim);
  header.num_clusters = static_cast<uint32_t>(num_clusters);
  header.ex_bits = static_cast<uint8_t>(ex_bits);
  header.rotator_type = static_cast<uint8_t>(rotator_type);
  header.rotator_size = static_cast<uint32_t>(rotator->dump_bytes());
  size_t size = dumper->write(&header, sizeof(header));
  if (size != sizeof(header)) {
    LOG_ERROR("Failed to write header: written=%zu, expected=%zu", size,
              sizeof(header));
    return IndexError_WriteData;
  }
  crc = ailego::Crc32c::Hash(&header, sizeof(header), crc);
  dumped_size += size;

  // Write rotated centroids
  size = dumper->write(rotated_centroids.data(),
                       rotated_centroids.size() * sizeof(float));
  if (size != rotated_centroids.size() * sizeof(float)) {
    LOG_ERROR("Failed to write rotated centroids: written=%zu, expected=%zu",
              size, rotated_centroids.size() * sizeof(float));
    return IndexError_WriteData;
  }
  crc = ailego::Crc32c::Hash(rotated_centroids.data(),
                             rotated_centroids.size() * sizeof(float), crc);
  dumped_size += size;

  // Write original centroids
  size = dumper->write(centroids.data(), centroids.size() * sizeof(float));
  if (size != centroids.size() * sizeof(float)) {
    LOG_ERROR("Failed to write centroids: written=%zu, expected=%zu", size,
              centroids.size() * sizeof(float));
    return IndexError_WriteData;
  }
  crc = ailego::Crc32c::Hash(centroids.data(), centroids.size() * sizeof(float),
                             crc);
  dumped_size += size;

  // Write rotator data
  std::vector<char> buffer(rotator->dump_bytes());
  rotator->save(buffer.data());
  size = dumper->write(buffer.data(), buffer.size());
  if (size != buffer.size()) {
    LOG_ERROR("Failed to write rotator data: written=%zu, expected=%zu", size,
              buffer.size());
    return IndexError_WriteData;
  }
  crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), crc);
  dumped_size += size;

  // Write padding
  size_t padding_size = align_size(dumped_size) - dumped_size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %lu", padding_size);
      return IndexError_WriteData;
    }
  }

  int ret =
      dumper->append(RABITQ_CONVERTER_SEG_ID, dumped_size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d",
              RABITQ_CONVERTER_SEG_ID.c_str(), ret);
    return ret;
  }

  if (out_dumped_size) {
    *out_dumped_size = dumped_size;
  }
  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_rabitq/rabitq_utils.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <vector>
#include <rabitqlib/utils/rotator.hpp>
#include "zvec/core/framework/index_dumper.h"

namespace zvec {
namespace core {

inline const std::string RABITQ_CONVERTER_SEG_ID{"rabitq.converter"};

struct RabitqConverterHeader {
  uint32_t num_clusters;
  uint32_t dim;
  uint32_t padded_dim;
  uint32_t rotator_size;
  uint8_t ex_bits;
  uint8_t rotator_type;
  uint8_t padding[2];
  uint32_t reserve[3];

  RabitqConverterHeader() {
    memset(this, 0, sizeof(RabitqConverterHeader));
  }
};
static_assert(sizeof(RabitqConverterHeader) % 32 == 0,
              "RabitqConverterHeader must be aligned with 32 bytes");

// Common dump implementation for RabitqConverter and RabitqReformer
int dump_rabitq_centroids(
    const IndexDumper::Pointer &dumper, size_t dimension, size_t padded_dim,
    size_t ex_bits, size_t num_clusters, rabitqlib::RotatorType rotator_type,
    const std::vector<float> &rotated_centroids,
    const std::vector<float> &centroids,
    const std::unique_ptr<rabitqlib::Rotator<float>> &rotator,
    size_t *out_dumped_size = nullptr);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_knn_hnsw_sparse 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS core_framework sparsehash
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_algorithm.cc
================================================

// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_algorithm.h"
#include <chrono>
#include <iostream>
#include <vector>
#include <ailego/internal/cpu_features.h>

namespace zvec {
namespace core {

HnswSparseAlgorithm::HnswSparseAlgorithm(HnswSparseEntity &entity)
    : entity_(entity),
      mt_(std::chrono::system_clock::now().time_since_epoch().count()),
      lock_pool_(kLockCnt) {}

int HnswSparseAlgorithm::cleanup() {
  return 0;
}

int HnswSparseAlgorithm::add_node(node_id_t id, level_t level,
                                  HnswSparseContext *ctx) {
  spin_lock_.lock();

  // std::cout << "id: " << id << ", level: " << level << std::endl;

  auto cur_max_level = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    return 0;
  }
  spin_lock_.unlock();

  if (ailego_unlikely(level > cur_max_level)) {
    mutex_.lock();
    // re-check max level
    cur_max_level = entity_.cur_max_level();
    entry_point = entity_.entry_point();
    if (level <= cur_max_level) {
      mutex_.unlock();
    }
  }

  level_t cur_level = cur_max_level;
  dist_t dist = ctx->dist_calculator()(entry_point);
  for (; cur_level > level; --cur_level) {
    select_entry_point(cur_level, &entry_point, &dist, ctx);
  }

  for (; cur_level >= 0; --cur_level) {
    search_neighbors(cur_level, &entry_point, &dist, ctx->level_topk(cur_level),
                     ctx);
  }

  // add neighbors from down level to top level, to avoid upper level visible
  // to knn_search but the under layer level not ready
  for (cur_level = 0; cur_level <= level; ++cur_level) {
    add_neighbors(id, cur_level, ctx->level_topk(cur_level), ctx);
    ctx->level_topk(cur_level).clear();
  }

  if (ailego_unlikely(level > cur_max_level)) {
    spin_lock_.lock();
    entity_.update_ep_and_level(id, level);
    spin_lock_.unlock();
    mutex_.unlock();
  }

  return 0;
}

int HnswSparseAlgorithm::search(HnswSparseContext *ctx) const {
  spin_lock_.lock();
  auto maxLevel = entity_.cur_max_level();
  auto entry_point = entity_.entry_point();
  spin_lock_.unlock();

  if (ailego_unlikely(entry_point == kInvalidNodeId)) {
    return 0;
  }

  dist_t dist = ctx->dist_calculator().dist(entry_point);
  for (level_t cur_level = maxLevel; cur_level >= 1; --cur_level) {
    select_entry_point(cur_level, &entry_point, &dist, ctx);
  }

  auto &topk_heap = ctx->topk_heap();
  topk_heap.clear();
  search_neighbors(0, &entry_point, &dist, topk_heap, ctx);

  if (ctx->group_by_search()) {
    expand_neighbors_by_group(topk_heap, ctx);
  }

  return 0;
}

//! select_entry_point on hnsw level, ef = 1
void HnswSparseAlgorithm::select_entry_point(level_t level,
                                             node_id_t *entry_point,
                                             dist_t *dist,
                                             HnswSparseContext *ctx) const {
  auto &entity = ctx->get_entity();
  HnswSparseDistCalculator &dc = ctx->dist_calculator();
  while (true) {
    const Neighbors neighbors = entity.get_neighbors(level, *entry_point);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }
    uint32_t size = neighbors.size();
    if (size == 0) {
      break;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;
    int ret = entity.get_vector_metas(&neighbors[0], size, neighbor_block_vecs);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }
    bool find_closer = false;
    for (uint32_t i = 0; i < size; ++i) {
      dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());
      if (cur_dist < *dist) {
        *entry_point = neighbors[i];
        *dist = cur_dist;
        find_closer = true;
      }
    }

    if (!find_closer) {
      break;
    }
  }

  return;
}

void HnswSparseAlgorithm::add_neighbors(node_id_t id, level_t level,
                                        TopkHeap &topk_heap,
                                        HnswSparseContext *ctx) {
  if (ailego_unlikely(topk_heap.size() == 0)) {
    return;
  }

  HnswSparseDistCalculator &dc = ctx->dist_calculator();

  update_neighbors(dc, id, level, topk_heap);

  // reverse update neighbors
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    reverse_update_neighbors(dc, topk_heap[i].first, level, id,
                             topk_heap[i].second, ctx->update_heap());
  }

  return;
}

void HnswSparseAlgorithm::search_neighbors(level_t level,
                                           node_id_t *entry_point, dist_t *dist,
                                           TopkHeap &topk,
                                           HnswSparseContext *ctx) const {
  const auto &entity = ctx->get_entity();
  HnswSparseDistCalculator &dc = ctx->dist_calculator();
  VisitFilter &visit = ctx->visit_filter();
  CandidateHeap &candidates = ctx->candidates();
  std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
  if (ctx->filter().is_valid()) {
    filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
  }

  candidates.clear();
  visit.clear();
  visit.set_visited(*entry_point);
  if (!filter(*entry_point)) {
    topk.emplace(*entry_point, *dist);
  }

  candidates.emplace(*entry_point, *dist);
  while (!candidates.empty() && !ctx->reach_scan_limit()) {
    auto top = candidates.begin();
    node_id_t main_node = top->first;
    dist_t main_dist = top->second;

    if (topk.full() && main_dist > topk[0].second) {
      break;
    }

    candidates.pop();
    const Neighbors neighbors = entity.get_neighbors(level, main_node);
    ailego_prefetch(neighbors.data);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_neighbors())++;
    }

    std::vector<node_id_t> neighbor_ids(neighbors.size());
    uint32_t size = 0;
    for (uint32_t i = 0; i < neighbors.size(); ++i) {
      node_id_t node = neighbors[i];
      if (visit.visited(node)) {
        if (ailego_unlikely(ctx->debugging())) {
          (*ctx->mutable_stats_visit_dup_cnt())++;
        }
        continue;
      }
      visit.set_visited(node);
      neighbor_ids[size++] = node;
    }
    if (size == 0) {
      continue;
    }

    std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;
    int ret =
        entity.get_vector_metas(neighbor_ids.data(), size, neighbor_block_vecs);
    if (ailego_unlikely(ctx->debugging())) {
      (*ctx->mutable_stats_get_vector())++;
    }
    if (ailego_unlikely(ret != 0)) {
      break;
    }

    static constexpr node_id_t PREFETCH_STEP = 2;
    static constexpr node_id_t SPARSE_PREFETCH_STEP = 1;

    for (uint32_t i = 0; i < std::min(PREFETCH_STEP, size); ++i) {
      ailego_prefetch(neighbor_block_vecs[i].data());
    }
    for (uint32_t i = 0; i < size; ++i) {
      node_id_t node = neighbor_ids[i];
      node_id_t prefetch_id = i + PREFETCH_STEP;
      if (prefetch_id < size) {
        ailego_prefetch(neighbor_block_vecs[prefetch_id].data());
      }

      node_id_t sparse_prefetch_id = i + SPARSE_PREFETCH_STEP;
      if (sparse_prefetch_id < size) {
        IndexStorage::MemoryBlock sparse_block;
        int sparse_length = 0;
        entity.get_sparse_data_from_vector(
            neighbor_block_vecs[sparse_prefetch_id].data(), sparse_block,
            sparse_length);
        auto sparse_data = std::make_pair(sparse_block.data(), sparse_length);
        if (sparse_data.first != nullptr) {
          ailego_prefetch(sparse_data.first);
        }
      }

      dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());
      if ((!topk.full()) || cur_dist < topk[0].second) {
        candidates.emplace(node, cur_dist);
        // update entry_point for next level scan
        if (cur_dist < *dist) {
          *entry_point = node;
          *dist = cur_dist;
        }
        if (!filter(node)) {
          topk.emplace(node, cur_dist);
        }
      }  // end if
    }  // end for
  }  // while

  return;
}

void HnswSparseAlgorithm::expand_neighbors_by_group(
    TopkHeap &topk, HnswSparseContext *ctx) const {
  if (!ctx->group_by().is_valid()) {
    return;
  }

  const auto &entity = ctx->get_entity();
  std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
    return ctx->group_by()(entity.get_key(id));
  };

  // devide into groups
  std::map<std::string, TopkHeap> &group_topk_heaps = ctx->group_topk_heaps();
  for (uint32_t i = 0; i < topk.size(); ++i) {
    node_id_t id = topk[i].first;
    auto score = topk[i].second;

    std::string group_id = group_by(id);

    auto &topk_heap = group_topk_heaps[group_id];
    if (topk_heap.empty()) {
      topk_heap.limit(ctx->group_topk());
    }
    topk_heap.emplace_back(id, score);
  }

  // stage 2, expand to reach group num as possible
  if (group_topk_heaps.size() < ctx->group_num()) {
    VisitFilter &visit = ctx->visit_filter();
    CandidateHeap &candidates = ctx->candidates();
    HnswSparseDistCalculator &dc = ctx->dist_calculator();

    std::function<bool(node_id_t)> filter = [](node_id_t) { return false; };
    if (ctx->filter().is_valid()) {
      filter = [&](node_id_t id) { return ctx->filter()(entity.get_key(id)); };
    }

    // refill to get enough groups
    candidates.clear();
    visit.clear();
    for (uint32_t i = 0; i < topk.size(); ++i) {
      node_id_t id = topk[i].first;
      float score = topk[i].second;

      visit.set_visited(id);
      candidates.emplace_back(id, score);
    }

    // do expand
    while (!candidates.empty() && !ctx->reach_scan_limit()) {
      auto top = candidates.begin();
      node_id_t main_node = top->first;

      candidates.pop();
      const Neighbors neighbors = entity.get_neighbors(0, main_node);
      if (ailego_unlikely(ctx->debugging())) {
        (*ctx->mutable_stats_get_neighbors())++;
      }

      std::vector<node_id_t> neighbor_ids(neighbors.size());
      uint32_t size = 0;
      for (uint32_t i = 0; i < neighbors.size(); ++i) {
        node_id_t node = neighbors[i];
        if (visit.visited(node)) {
          if (ailego_unlikely(ctx->debugging())) {
            (*ctx->mutable_stats_visit_dup_cnt())++;
          }
          continue;
        }
        visit.set_visited(node);
        neighbor_ids[size++] = node;
      }
      if (size == 0) {
        continue;
      }

      std::vector<IndexStorage::MemoryBlock> neighbor_block_vecs;
      int ret = entity.get_vector_metas(neighbor_ids.data(), size,
                                        neighbor_block_vecs);
      if (ailego_unlikely(ctx->debugging())) {
        (*ctx->mutable_stats_get_vector())++;
      }
      if (ailego_unlikely(ret != 0)) {
        break;
      }

      static constexpr node_id_t PREFETCH_STEP = 2;
      for (uint32_t i = 0; i < size; ++i) {
        node_id_t node = neighbor_ids[i];
        node_id_t prefetch_id = i + PREFETCH_STEP;
        if (prefetch_id < size) {
          ailego_prefetch(neighbor_block_vecs[prefetch_id].data());
        }
        dist_t cur_dist = dc.dist(neighbor_block_vecs[i].data());

        if (!filter(node)) {
          std::string group_id = group_by(node);

          auto &topk_heap = group_topk_heaps[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(node, cur_dist);

          if (group_topk_heaps.size() >= ctx->group_num()) {
            break;
          }
        }

        candidates.emplace(node, cur_dist);
      }  // end for
    }  // end while
  }  // end if
}

void HnswSparseAlgorithm::update_neighbors(HnswSparseDistCalculator &dc,
                                           node_id_t id, level_t level,
                                           TopkHeap &topk_heap) {
  topk_heap.sort();

  uint32_t max_neighbor_cnt = entity_.neighbor_cnt(level);
  if (topk_heap.size() <= static_cast<size_t>(entity_.prune_cnt())) {
    if (topk_heap.size() <= static_cast<size_t>(max_neighbor_cnt)) {
      entity_.update_neighbors(level, id, topk_heap);
      return;
    }
  }

  uint32_t cur_size = 0;
  for (size_t i = 0; i < topk_heap.size(); ++i) {
    node_id_t cur_node = topk_heap[i].first;
    dist_t cur_node_dist = topk_heap[i].second;
    bool good = true;
    for (uint32_t j = 0; j < cur_size; ++j) {
      dist_t tmp_dist = dc.dist(cur_node, topk_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      topk_heap[cur_size].first = cur_node;
      topk_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  // when after-prune neighbor count is too seldom,
  // we use this strategy to make-up enough edges
  // not only just make-up out-degrees
  // we also make-up enough in-degrees
  uint32_t min_neighbors = entity_.min_neighbor_cnt();
  for (size_t k = cur_size; cur_size < min_neighbors && k < topk_heap.size();
       ++k) {
    bool exist = false;
    for (size_t j = 0; j < cur_size; ++j) {
      if (topk_heap[j].first == topk_heap[k].first) {
        exist = true;
        break;
      }
    }
    if (!exist) {
      topk_heap[cur_size].first = topk_heap[k].first;
      topk_heap[cur_size].second = topk_heap[k].second;
      cur_size++;
    }
  }

  topk_heap.resize(cur_size);
  entity_.update_neighbors(level, id, topk_heap);

  return;
}

void HnswSparseAlgorithm::reverse_update_neighbors(HnswSparseDistCalculator &dc,
                                                   node_id_t id, level_t level,
                                                   node_id_t link_id,
                                                   dist_t dist,
                                                   TopkHeap &update_heap) {
  const size_t max_neighbor_cnt = entity_.neighbor_cnt(level);

  uint32_t lock_idx = id & kLockMask;
  lock_pool_[lock_idx].lock();
  const Neighbors neighbors = entity_.get_neighbors(level, id);
  size_t size = neighbors.size();
  ailego_assert_with(size <= max_neighbor_cnt, "invalid neighbor size");
  if (size < max_neighbor_cnt) {
    entity_.add_neighbor(level, id, size, link_id);
    lock_pool_[lock_idx].unlock();
    return;
  }

  update_heap.emplace(link_id, dist);

  for (size_t i = 0; i < size; ++i) {
    node_id_t node = neighbors[i];
    dist_t cur_dist = dc.dist(id, node);
    update_heap.emplace(node, cur_dist);
  }

  //! TODO: optimize prune
  //! prune edges
  update_heap.sort();
  size_t cur_size = 0;
  for (size_t i = 0; i < update_heap.size(); ++i) {
    node_id_t cur_node = update_heap[i].first;
    dist_t cur_node_dist = update_heap[i].second;
    bool good = true;
    for (size_t j = 0; j < cur_size; ++j) {
      dist_t tmp_dist = dc.dist(cur_node, update_heap[j].first);
      if (tmp_dist <= cur_node_dist) {
        good = false;
        break;
      }
    }

    if (good) {
      update_heap[cur_size].first = cur_node;
      update_heap[cur_size].second = cur_node_dist;
      cur_size++;
      if (cur_size >= max_neighbor_cnt) {
        break;
      }
    }
  }

  update_heap.resize(cur_size);
  entity_.update_neighbors(level, id, update_heap);

  lock_pool_[lock_idx].unlock();

  update_heap.clear();

  return;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_algorithm.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <ailego/parallel/lock.h>
#include "hnsw_sparse_context.h"
#include "hnsw_sparse_dist_calculator.h"
#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

//! hnsw graph algorithm implement
class HnswSparseAlgorithm {
 public:
  typedef std::unique_ptr<HnswSparseAlgorithm> UPointer;

 public:
  //! Constructor
  explicit HnswSparseAlgorithm(HnswSparseEntity &entity);

  //! Cleanup HnswSparseAlgorithm
  int cleanup();

  //! Add a node to hnsw graph
  //! @id:     the node unique id
  //! @level:  a node will be add to graph in each level [0, level]
  //! return 0 on success, or errCode in failure
  int add_node(node_id_t id, level_t level, HnswSparseContext *ctx);

  //! do knn search in graph
  //! return 0 on success, or errCode in failure. results saved in ctx
  int search(HnswSparseContext *ctx) const;

  //! Initiate HnswAlgorithm
  int init() {
    level_probas_.clear();
    double level_mult =
        1 / std::log(static_cast<double>(entity_.scaling_factor()));
    for (int level = 0;; level++) {
      // refers faiss get_random_level alg
      double proba =
          std::exp(-level / level_mult) * (1 - std::exp(-1 / level_mult));
      if (proba < 1e-9) {
        break;
      }
      level_probas_.push_back(proba);
    }

    return 0;
  }

  //! Generate a random level
  //! return graph level
  uint32_t get_random_level() const {
    // gen rand float (0, 1)
    double f = mt_() / static_cast<float>(mt_.max());
    for (size_t level = 0; level < level_probas_.size(); level++) {
      if (f < level_probas_[level]) {
        return level;
      }
      f -= level_probas_[level];
    }
    return level_probas_.size() - 1;
  }

 private:
  //! Select in upper layer to get entry point for next layer search
  void select_entry_point(level_t level, node_id_t *entry_point, dist_t *dist,
                          HnswSparseContext *ctx) const;

  //! update node id neighbors from topkHeap, and reverse link is also updated
  void add_neighbors(node_id_t id, level_t level, TopkHeap &topk_heap,
                     HnswSparseContext *ctx);

  //! Given a node id and level, search the nearest neighbors in graph
  //! Note: the nearest neighbors result keeps in topk, and entry_point and
  //! dist will be updated to current level nearest node id and distance
  void search_neighbors(level_t level, node_id_t *entry_point, dist_t *dist,
                        TopkHeap &topk, HnswSparseContext *ctx) const;

  //! Update the node's neighbors
  void update_neighbors(HnswSparseDistCalculator &dc, node_id_t id,
                        level_t level, TopkHeap &topk_heap);

  //! Checking linkId could be id's new neighbor, and add as neighbor if true
  //! @dc         distance calculator
  //! @updateHeap temporary heap in updating neighbors
  void reverse_update_neighbors(HnswSparseDistCalculator &dc, node_id_t id,
                                level_t level, node_id_t link_id, dist_t dist,
                                TopkHeap &update_heap);

  //! expand neighbors until group nums are reached
  void expand_neighbors_by_group(TopkHeap &topk, HnswSparseContext *ctx) const;

 private:
  HnswSparseAlgorithm(const HnswSparseAlgorithm &) = delete;
  HnswSparseAlgorithm &operator=(const HnswSparseAlgorithm &) = delete;

 private:
  static constexpr uint32_t kLockCnt{1U << 8};
  static constexpr uint32_t kLockMask{kLockCnt - 1U};

  HnswSparseEntity &entity_;
  mutable std::mt19937 mt_{};
  std::vector<double> level_probas_{};

  mutable ailego::SpinMutex spin_lock_{};  // global spin lock
  std::mutex mutex_{};                     // global mutex
  // TODO: spin lock?
  std::vector<std::mutex> lock_pool_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_builder.h"
#include <iostream>
#include <thread>
#include <ailego/pattern/defer.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_logger.h>
#include "hnsw_sparse_algorithm.h"
#include "hnsw_sparse_params.h"

namespace zvec {
namespace core {

HnswSparseBuilder::HnswSparseBuilder() {}

int HnswSparseBuilder::init(const IndexMeta &meta,
                            const ailego::Params &params) {
  LOG_INFO("Begin HnswSparseBuilder::init");

  meta_ = meta;
  auto params_copy = params;
  meta_.set_builder("HnswSparseBuilder", HnswSparseEntity::kRevision,
                    std::move(params_copy));

  size_t memory_quota = 0UL;
  params.get(PARAM_HNSW_SPARSE_BUILDER_MEMORY_QUOTA, &memory_quota);
  params.get(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, &thread_cnt_);
  params.get(PARAM_HNSW_SPARSE_BUILDER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_SPARSE_BUILDER_CHECK_INTERVAL_SECS,
             &check_interval_secs_);

  params.get(PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT,
             &upper_max_neighbor_cnt_);
  float multiplier = HnswSparseEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_SPARSE_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,
             &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR, &scaling_factor_);

  multiplier = HnswSparseEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_SPARSE_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;

  if (ef_construction_ == 0) {
    ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0) {
    upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d]",
              PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),
              kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%d] must be <= [%s]-[%d]",
              PARAM_HNSW_SPARSE_BUILDER_MIN_NEIGHBOR_COUNT.c_str(),
              min_neighbor_cnt_,
              PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT.c_str(),
              upper_max_neighbor_cnt_);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0) {
    l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {
    LOG_ERROR("L0MaxNeighborCnt must be in range (0,%d)",
              HnswSparseEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }
  if (thread_cnt_ == 0) {
    thread_cnt_ = std::thread::hardware_concurrency();
  }
  if (thread_cnt_ > std::thread::hardware_concurrency()) {
    LOG_WARN("[%s] greater than cpu cores %u",
             PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT.c_str(),
             std::thread::hardware_concurrency());
  }
  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("CreateMeasure failed, name: %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("IndexMeasure init failed, ret=%d", ret);
    return ret;
  }

  entity_.set_ef_construction(ef_construction_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_memory_quota(memory_quota);
  entity_.set_prune_cnt(prune_cnt);

  entity_.set_sparse_meta_size(HnswSparseEntity::kSparseMetaSize);
  entity_.set_sparse_unit_size(meta.unit_size());

  ret = entity_.init();
  if (ret != 0) {
    return ret;
  }

  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  state_ = BUILD_STATE_INITED;
  LOG_INFO(
      "End HnswSparseBuilder::init, params: efConstruction=%u "
      "l0NeighborCnt=%u upperNeighborCnt=%u scalingFactor=%u "
      "memoryQuota=%zu neighborPruneCnt=%zu measureName=%s ",
      ef_construction_, l0_max_neighbor_cnt_, upper_max_neighbor_cnt_,
      scaling_factor_, memory_quota, prune_cnt, meta_.metric_name().c_str());

  return 0;
}

int HnswSparseBuilder::cleanup(void) {
  LOG_INFO("Begin HnswSparseBuilder::cleanup");

  l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultL0MaxNeighborCnt;
  min_neighbor_cnt_ = 0;
  upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;
  ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;
  scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;
  check_interval_secs_ = kDefaultLogIntervalSecs;
  errcode_ = 0;
  error_ = false;
  entity_.cleanup();
  alg_->cleanup();
  meta_.clear();
  metric_.reset();
  stats_.clear_attributes();
  stats_.set_trained_count(0UL);
  stats_.set_built_count(0UL);
  stats_.set_dumped_count(0UL);
  stats_.set_discarded_count(0UL);
  stats_.set_trained_costtime(0UL);
  stats_.set_built_costtime(0UL);
  stats_.set_dumped_costtime(0UL);
  state_ = BUILD_STATE_INIT;

  LOG_INFO("End HnswSparseBuilder::cleanup");

  return 0;
}

int HnswSparseBuilder::train(IndexThreads::Pointer,
                             IndexSparseHolder::Pointer /*holder*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswSparseBuilder::train");
    return IndexError_NoReady;
  }

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswSparseBuilder::train");

  return 0;
}

int HnswSparseBuilder::train(const IndexTrainer::Pointer & /*trainer*/) {
  if (state_ != BUILD_STATE_INITED) {
    LOG_ERROR("Init the builder before HnswSparseBuilder::train");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswSparseBuilder::train by trainer");

  stats_.set_trained_count(0UL);
  stats_.set_trained_costtime(0UL);
  state_ = BUILD_STATE_TRAINED;

  LOG_INFO("End HnswSparseBuilder::train by trainer");

  return 0;
}

int HnswSparseBuilder::build(IndexThreads::Pointer threads,
                             IndexSparseHolder::Pointer holder) {
  if (!holder) {
    LOG_ERROR("Input holder is nullptr while building index");
    return IndexError_InvalidArgument;
  }

  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while building index");
    return IndexError_Mismatch;
  }
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }

  auto start_time = ailego::Monotime::MilliSeconds();

  LOG_INFO("Begin HnswSparseBuilder::build sparse");

  // holder should be hybrid holder
  auto sparse_holder = std::dynamic_pointer_cast<IndexSparseHolder>(holder);

  if (sparse_holder == nullptr) {
    LOG_ERROR("HnswSparseBuilder failed to cast holder");
    return IndexError_Runtime;
  }

  if (sparse_holder->count() != static_cast<size_t>(-1)) {
    LOG_DEBUG("HnswSparseBuilder holder documents count %lu",
              sparse_holder->count());

    int ret = entity_.reserve_space(sparse_holder->count(),
                                    sparse_holder->total_sparse_count());
    if (ret != 0) {
      LOG_ERROR("HnswBuilde reserver space failed");
      return ret;
    }
  }
  auto iter = sparse_holder->create_iterator();
  if (!iter) {
    LOG_ERROR("Create iterator for holder failed");
    return IndexError_Runtime;
  }

  int ret;
  error_ = false;
  while (iter->is_valid()) {
    level_t level = alg_->get_random_level();
    node_id_t id;

    ret = entity_.add_vector(level, iter->key(), iter->sparse_count(),
                             iter->sparse_indices(), iter->sparse_data(), &id);

    if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {
      return ret;
    }

    iter->next();
  }
  // Holder is not needed, cleanup it.
  sparse_holder.reset();

  LOG_INFO("Finished save vector, start build graph...");

  std::atomic<node_id_t> finished{0};

  ret = build_graph(threads, finished);
  if (ret != 0) {
    LOG_ERROR("Failed to build graph");
    return ret;
  }

  stats_.set_built_count(finished.load());
  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = BUILD_STATE_BUILT;

  LOG_INFO("End HnswSparseBuilder::build");
  return 0;
}

int HnswSparseBuilder::build_graph(IndexThreads::Pointer threads,
                                   std::atomic<node_id_t> &finished) {
  auto task_group = threads->make_group();
  if (!task_group) {
    LOG_ERROR("Failed to create task group");
    return IndexError_Runtime;
  }

  for (size_t i = 0; i < threads->count(); ++i) {
    task_group->submit(ailego::Closure ::New(this, &HnswSparseBuilder::do_build,
                                             i, threads->count(), &finished));
  }

  while (!task_group->is_finished()) {
    std::unique_lock<std::mutex> lk(mutex_);
    cond_.wait_until(lk, std::chrono::system_clock::now() +
                             std::chrono::seconds(check_interval_secs_));
    if (error_.load(std::memory_order_acquire)) {
      LOG_ERROR("Failed to build index while waiting finish");
      return errcode_;
    }
    LOG_INFO("Built cnt %u, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / entity_.doc_cnt());
  }
  if (error_.load(std::memory_order_acquire)) {
    LOG_ERROR("Failed to build index while waiting finish");
    return errcode_;
  }
  task_group->wait_finish();

  return 0;
}

void HnswSparseBuilder::do_build(node_id_t idx, size_t step_size,
                                 std::atomic<node_id_t> *finished) {
  AILEGO_DEFER([&]() {
    std::lock_guard<std::mutex> latch(mutex_);
    cond_.notify_one();
  });

  HnswSparseContext *ctx = new (std::nothrow) HnswSparseContext(
      metric_,
      std::shared_ptr<HnswSparseEntity>(&entity_, [](HnswSparseEntity *) {}));
  if (ailego_unlikely(ctx == nullptr)) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to create context");
      errcode_ = IndexError_NoMemory;
    }
    return;
  }
  HnswSparseContext::Pointer auto_ptr(ctx);
  ctx->set_max_scan_num(entity_.doc_cnt());
  int ret = ctx->init(HnswSparseContext::kSparseBuilderContext);
  if (ret != 0) {
    if (!error_.exchange(true)) {
      LOG_ERROR("Failed to init context");
      errcode_ = IndexError_Runtime;
    }
    return;
  }

  IndexQueryMeta qmeta(meta_.data_type());
  for (node_id_t id = idx; id < entity_.doc_cnt(); id += step_size) {
    const void *vec = entity_.get_vector_meta(id);

    auto sparse_data = entity_.get_sparse_data_from_vector(vec);

    ctx->reset_query(sparse_data.first);

    ret = alg_->add_node(id, entity_.get_level(id), ctx);
    if (ailego_unlikely(ret != 0)) {
      if (!error_.exchange(true)) {
        LOG_ERROR("Hnsw graph add node failed");
        errcode_ = ret;
      }
      return;
    }
    ctx->clear();
    (*finished)++;
  }
}

int HnswSparseBuilder::dump(const IndexDumper::Pointer &dumper) {
  if (state_ != BUILD_STATE_BUILT) {
    LOG_INFO("Build the index before HnswSparseBuilder::dump");
    return IndexError_NoReady;
  }

  LOG_INFO("Begin HnswSparseBuilder::dump");

  meta_.set_searcher("HnswSparseSearcher", HnswSparseEntity::kRevision,
                     ailego::Params());
  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  ret = entity_.dump(dumper);
  if (ret != 0) {
    LOG_ERROR("HnswSparseBuilder dump index failed");
    return ret;
  }

  stats_.set_dumped_count(entity_.doc_cnt());
  stats_.set_dumped_costtime(ailego::Monotime::MilliSeconds() - start_time);

  LOG_INFO("EndHnswSparseBuilder::dump");
  return 0;
}

int HnswSparseBuilder::build(IndexThreads::Pointer threads, size_t count,
                             const uint64_t *keys,
                             const uint64_t *sparse_indptr,
                             const uint32_t *sparse_indices,
                             const void *sparse_data) {
  IndexQueryMeta qmeta(meta_.data_type());

  return build(threads, qmeta, count, keys, sparse_indptr, sparse_indices,
               sparse_data);
}

int HnswSparseBuilder::build(IndexThreads::Pointer threads,
                             const IndexQueryMeta &qmeta, size_t count,
                             const uint64_t *keys,
                             const uint64_t *sparse_indptr,
                             const uint32_t *sparse_indices,
                             const void *sparse_data) {
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_cnt_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }

  auto start_time = ailego::Monotime::MilliSeconds();

  LOG_INFO("Begin HnswSparseBuilder::build sparse, documents count %lu", count);

  size_t total_sparse_count = sparse_indptr[count];

  int ret = entity_.reserve_space(count, total_sparse_count);
  if (ret != 0) {
    LOG_ERROR("HnswBuilde reserver space failed");
    return ret;
  }

  if (qmeta.data_type() == meta_.data_type()) {
    for (size_t i = 0; i < count; i++) {
      level_t level = alg_->get_random_level();
      node_id_t id;

      uint32_t sparse_count = sparse_indptr[i + 1] - sparse_indptr[i];
      const uint32_t *sparse_indices_temp = sparse_indices + sparse_indptr[i];

      const void *sparse_data_temp = static_cast<const char *>(sparse_data) +
                                     sparse_indptr[i] * qmeta.unit_size();

      ret = entity_.add_vector(level, keys[i], sparse_count,
                               sparse_indices_temp, sparse_data_temp, &id);
      if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {
        return ret;
      }
    }
  } else if (meta_.data_type() == IndexMeta::DataType::DT_FP16 &&
             qmeta.data_type() == IndexMeta::DataType::DT_FP32) {
    // transform from float 32 to float 16
    auto reformer = IndexFactory::CreateReformer("HalfFloatSparseReformer");
    if (!reformer) {
      LOG_ERROR("Sparse reformer not existed.");

      return IndexError_NoExist;
    }

    meta_.set_converter("HalfFloatSparseConverter", 0, ailego::Params());
    meta_.set_reformer("HalfFloatSparseReformer", 0, ailego::Params());

    for (size_t i = 0; i < count; i++) {
      level_t level = alg_->get_random_level();
      node_id_t id;

      uint32_t sparse_count = sparse_indptr[i + 1] - sparse_indptr[i];
      const uint32_t *sparse_indices_temp = sparse_indices + sparse_indptr[i];

      const void *sparse_data_temp = static_cast<const char *>(sparse_data) +
                                     sparse_indptr[i] * qmeta.unit_size();

      std::string query_fp16;
      IndexQueryMeta ometa;

      reformer->transform(sparse_count, sparse_indices_temp, sparse_data_temp,
                          qmeta, &query_fp16, &ometa);

      ret = entity_.add_vector(level, keys[i], sparse_count,
                               sparse_indices_temp, query_fp16.data(), &id);
      if (ailego_unlikely(ret != 0) && ret != IndexError_InvalidValue) {
        return ret;
      }
    }
  } else {
    LOG_ERROR("Format not supported.");

    return IndexError_Unsupported;
  }

  LOG_INFO("Finished save vector, start build graph...");

  std::atomic<node_id_t> finished{0};

  ret = build_graph(threads, finished);
  if (ret != 0) {
    LOG_ERROR("Failed to build graph");
    return ret;
  }

  stats_.set_built_count(finished.load());
  stats_.set_built_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = BUILD_STATE_BUILT;

  LOG_INFO("End HnswSparseBuilder::build");
  return 0;
}

INDEX_FACTORY_REGISTER_BUILDER(HnswSparseBuilder);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_builder.h>
#include "hnsw_sparse_algorithm.h"
#include "hnsw_sparse_builder_entity.h"

namespace zvec {
namespace core {

class HnswSparseBuilder : public IndexBuilder {
 public:
  //! Constructor
  HnswSparseBuilder();

  //! Initialize the builder
  int init(const IndexMeta &meta, const ailego::Params &params) override;

  //! Cleanup the builder
  int cleanup(void) override;

  //! Train the data
  int train(IndexThreads::Pointer, IndexSparseHolder::Pointer holder) override;

  //! Train the data
  int train(const IndexTrainer::Pointer &trainer) override;

  //! Build the index
  int build(IndexThreads::Pointer threads,
            IndexSparseHolder::Pointer holder) override;

  //! Build the index with indptr format
  int build(IndexThreads::Pointer threads, const IndexQueryMeta &qmeta,
            size_t count, const uint64_t *keys, const uint64_t *sparse_indptr,
            const uint32_t *sparse_indices, const void *sparse_data) override;

  //! Build the index with indptr format
  int build(IndexThreads::Pointer threads, size_t count, const uint64_t *keys,
            const uint64_t *sparse_indptr, const uint32_t *sparse_indices,
            const void *sparse_data) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

 private:
  int build_graph(IndexThreads::Pointer threads,
                  std::atomic<node_id_t> &finished);
  void do_build(node_id_t idx, size_t step_size,
                std::atomic<node_id_t> *finished);

  constexpr static uint32_t kDefaultLogIntervalSecs = 15U;
  constexpr static uint32_t kMaxNeighborCnt = 65535;

 private:
  enum BUILD_STATE {
    BUILD_STATE_INIT = 0,
    BUILD_STATE_INITED = 1,
    BUILD_STATE_TRAINED = 2,
    BUILD_STATE_BUILT = 3
  };

  HnswSparseBuilderEntity entity_{};
  HnswSparseAlgorithm::UPointer alg_;  // impl graph algorithm
  uint32_t thread_cnt_{0};
  uint32_t l0_max_neighbor_cnt_{HnswSparseEntity::kDefaultL0MaxNeighborCnt};
  uint32_t min_neighbor_cnt_{0};
  uint32_t upper_max_neighbor_cnt_{
      HnswSparseEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t ef_construction_{HnswSparseEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswSparseEntity::kDefaultScalingFactor};
  uint32_t check_interval_secs_{kDefaultLogIntervalSecs};

  int errcode_{0};
  std::atomic_bool error_{false};
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};
  std::mutex mutex_{};
  std::condition_variable cond_{};
  Stats stats_{};

  BUILD_STATE state_{BUILD_STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_builder_entity.h"
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswSparseBuilderEntity::HnswSparseBuilderEntity() {
  update_ep_and_level(kInvalidNodeId, 0U);
}

int HnswSparseBuilderEntity::cleanup() {
  memory_quota_ = 0UL;
  neighbors_size_ = 0U;
  upper_neighbors_size_ = 0U;
  padding_size_ = 0U;
  vectors_buffer_.clear();
  keys_buffer_.clear();
  neighbors_buffer_.clear();
  upper_neighbors_buffer_.clear();
  neighbors_index_.clear();

  vectors_buffer_.shrink_to_fit();
  keys_buffer_.shrink_to_fit();
  neighbors_buffer_.shrink_to_fit();
  upper_neighbors_buffer_.shrink_to_fit();
  neighbors_index_.shrink_to_fit();

  this->HnswSparseEntity::cleanup();

  return 0;
}

int HnswSparseBuilderEntity::init() {
  size_t size = vector_size();

  size += sparse_meta_size();

  //! aligned size to 32
  set_node_size(AlignSize(size));
  //! if node size is aligned to 1k, the build performance will downgrade
  if (node_size() % 1024 == 0) {
    set_node_size(AlignSize(node_size() + 1));
  }

  padding_size_ = node_size() - size;

  neighbors_size_ = neighbors_size();
  upper_neighbors_size_ = upper_neighbors_size();

  return 0;
}

int HnswSparseBuilderEntity::reserve_space(size_t docs,
                                           size_t total_sparse_count) {
  if (memory_quota_ > 0 && (node_size() * docs + neighbors_size_ * docs +
                                sizeof(SparseNeighborIndex) * docs >
                            memory_quota_)) {
    return IndexError_NoMemory;
  }

  vectors_buffer_.reserve(node_size() * docs);
  keys_buffer_.reserve(sizeof(key_t) * docs);
  neighbors_buffer_.reserve(neighbors_size_ * docs);
  neighbors_index_.reserve(docs);

  sparse_data_buffer_.reserve(sizeof(uint32_t) * docs +
                              (sizeof(uint32_t)) * total_sparse_count +
                              sparse_unit_size() * total_sparse_count);

  return 0;
}

int HnswSparseBuilderEntity::add_vector(level_t level, key_t key,
                                        const uint32_t sparse_count,
                                        const uint32_t *sparse_indices,
                                        const void *sparse_vec, node_id_t *id) {
  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {
    LOG_WARN(
        "Failed to add sparse vector: number of non-zero elements (%u) exceeds "
        "maximum allowed (%u), key=%zu",
        sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)key);
    return IndexError_InvalidValue;
  }

  std::string sparse_buffer;
  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_vec,
                                   sparse_unit_size(), sparse_buffer);

  uint32_t sparse_len = sparse_buffer.size();

  if (memory_quota_ > 0 &&
      (vectors_buffer_.capacity() + keys_buffer_.capacity() +
           neighbors_buffer_.capacity() + upper_neighbors_buffer_.capacity() +
           neighbors_index_.capacity() * sizeof(SparseNeighborIndex) +
           sparse_len >
       memory_quota_)) {
    LOG_ERROR("Add vector failed, used memory exceed quota, cur_doc=%u",
              doc_cnt());
    return IndexError_NoMemory;
  }

  vectors_buffer_.append(reinterpret_cast<const char *>(&sparse_data_offset_),
                         sizeof(uint64_t));
  vectors_buffer_.append(reinterpret_cast<const char *>(&sparse_len),
                         sizeof(uint32_t));
  vectors_buffer_.append(sizeof(uint32_t),
                         '\0');  // reserve to make it up to meta size
  vectors_buffer_.append(padding_size_, '\0');

  keys_buffer_.append(reinterpret_cast<const char *>(&key), sizeof(key));

  sparse_data_buffer_.append(sparse_buffer.data(), sparse_len);
  sparse_data_offset_ += sparse_len;

  // init level 0 neighbors
  neighbors_buffer_.append(neighbors_size_, '\0');

  neighbors_index_.emplace_back(upper_neighbors_buffer_.size(), level);

  // init upper layer neighbors
  for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
    upper_neighbors_buffer_.append(upper_neighbors_size_, '\0');
  }

  *id = (*mutable_doc_cnt())++;

  return 0;
}

key_t HnswSparseBuilderEntity::get_key(node_id_t id) const {
  return *(reinterpret_cast<const key_t *>(keys_buffer_.data() +
                                           id * sizeof(key_t)));
}

const void *HnswSparseBuilderEntity::get_vector_meta(node_id_t id) const {
  return vectors_buffer_.data() + id * node_size();
}

int HnswSparseBuilderEntity::get_vector_meta(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector_meta(id);
  block.reset((void *)vec);
  return 0;
}

int HnswSparseBuilderEntity::get_vector_metas(const node_id_t *ids,
                                              uint32_t count,
                                              const void **vecs) const {
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = vectors_buffer_.data() + ids[i] * node_size();
  }

  return 0;
}

int HnswSparseBuilderEntity::get_vector_metas(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {
  const void *vecs[count];
  get_vector_metas(ids, count, vecs);
  for (uint32_t i = 0; i < count; ++i) {
    block_vecs.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

//! Get vector feature data by key
const void *HnswSparseBuilderEntity::get_sparse_data(uint64_t offset,
                                                     uint32_t /*len*/) const {
  return reinterpret_cast<const uint8_t *>(sparse_data_buffer_.data()) + offset;
}

int HnswSparseBuilderEntity::get_sparse_data(
    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_sparse_data(offset, len);
  block.reset((void *)vec);
  return 0;
}

//! Get sparse data from id
const void *HnswSparseBuilderEntity::get_sparse_data(node_id_t id) const {
  auto sparse_data = get_sparse_data_from_vector(get_vector_meta(id));

  return sparse_data.first;
}

int HnswSparseBuilderEntity::get_sparse_data(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_sparse_data(id);
  block.reset((void *)vec);
  return 0;
}

//! Get sparse data from vector
std::pair<const void *, uint32_t>
HnswSparseBuilderEntity::get_sparse_data_from_vector(const void *vec) const {
  uint32_t vec_size = vector_size();
  const char *vec_ptr = reinterpret_cast<const char *>(vec);

  uint64_t offset = *((uint64_t *)(vec_ptr + vec_size));
  uint32_t sparse_vector_len =
      *((uint32_t *)(vec_ptr + vec_size + sizeof(uint64_t)));

  const void *sparse_data = get_sparse_data(offset, sparse_vector_len);
  if (ailego_unlikely(sparse_data == nullptr)) {
    LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)offset,
              sparse_vector_len);

    return std::make_pair(nullptr, 0);
  }

  return std::make_pair(sparse_data, sparse_vector_len);
}

int HnswSparseBuilderEntity::get_sparse_data_from_vector(
    const void *vec, IndexStorage::MemoryBlock &block,
    int &sparse_length) const {
  std::pair<const void *, uint32_t> sparse_data =
      get_sparse_data_from_vector(vec);
  block.reset((void *)sparse_data.first);
  sparse_length = sparse_data.second;
  return 0;
}

const Neighbors HnswSparseBuilderEntity::get_neighbors(level_t level,
                                                       node_id_t id) const {
  const NeighborsHeader *hd = get_neighbor_header(level, id);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswSparseBuilderEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  for (size_t i = 0; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }
  hd->neighbor_cnt = neighbors.size();

  // std::cout << "id: " << id << ", neighbour, id: ";
  // for (size_t i = 0; i < neighbors.size(); ++i) {
  //   if (i == neighbors.size()-1)
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     std::endl;
  //   else
  //     std::cout << neighbors[i].first << ", score:" << neighbors[i].second <<
  //     ", id: ";
  // }

  return 0;
}

void HnswSparseBuilderEntity::add_neighbor(level_t level, node_id_t id,
                                           uint32_t /*size*/,
                                           node_id_t neighbor_id) {
  NeighborsHeader *hd =
      const_cast<NeighborsHeader *>(get_neighbor_header(level, id));
  hd->neighbors[hd->neighbor_cnt++] = neighbor_id;

  return;
}

int HnswSparseBuilderEntity::dump(const IndexDumper::Pointer &dumper) {
  key_t *keys =
      reinterpret_cast<key_t *>(const_cast<char *>(keys_buffer_.data()));
  auto ret =
      dump_segments(dumper, keys, [&](node_id_t id) { return get_level(id); });
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_builder_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/internal/platform.h>
#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

class HnswSparseBuilderEntity : public HnswSparseEntity {
 public:
  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t level, key_t key, const uint32_t sparse_count,
                         const uint32_t *sparse_indices, const void *sparse_vec,
                         node_id_t *id) override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector_meta(node_id_t id) const override;

  virtual int get_vector_meta(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Batch get vectors feature data by keys
  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,
                               const void **vecs) const override;
  virtual int get_vector_metas(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;

  //! Get the node id's neighbors on graph level
  const NeighborsHeader *get_neighbor_header(level_t level,
                                             node_id_t id) const {
    if (level == 0) {
      return reinterpret_cast<const NeighborsHeader *>(
          neighbors_buffer_.data() + neighbors_size_ * id);
    } else {
      size_t offset = neighbors_index_[id].offset;
      return reinterpret_cast<const NeighborsHeader *>(
          upper_neighbors_buffer_.data() + offset +
          (level - 1) * upper_neighbors_size_);
    }
  }

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  //! Replace node id in level's neighbors
  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;

  //! add a neighbor to id in graph level
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Get vector sparse feature data by chunk index and offset
  virtual const void *get_sparse_data(uint64_t offset,
                                      uint32_t len) const override;
  //! Get sparse data from id
  virtual const void *get_sparse_data(node_id_t id) const override;

  virtual int get_sparse_data(uint64_t offset, uint32_t len,
                              IndexStorage::MemoryBlock &block) const override;

  virtual int get_sparse_data(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Get sparse data from vector
  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(
      const void *vec) const override;

  virtual int get_sparse_data_from_vector(const void *vec,
                                          IndexStorage::MemoryBlock &block,
                                          int &sparse_length) const override;

  //! Dump the hnsw graph to dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Cleanup the entity
  virtual int cleanup(void) override;

 public:
  //! Constructor
  HnswSparseBuilderEntity();

  //! Get the node graph level by id
  level_t get_level(node_id_t id) const {
    return neighbors_index_[id].level;
  }

  //! Init builerEntity
  int init();

  //! reserve buffer space for documents
  //! @param  docs    number of documents
  //! @param  total_sparse_count    total dim of sparse count
  int reserve_space(size_t docs, size_t total_sparse_count);

  //! Set memory quota params
  inline void set_memory_quota(size_t memory_quota) {
    memory_quota_ = memory_quota;
  }

  //! Get neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get upper neighbors size
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

 public:
  HnswSparseBuilderEntity(const HnswSparseBuilderEntity &) = delete;
  HnswSparseBuilderEntity &operator=(const HnswSparseBuilderEntity &) = delete;

 private:
  friend class HnswSparseSearcherEntity;

  //! class internal used only
  struct SparseNeighborIndex {
    SparseNeighborIndex(size_t off, level_t l) : offset(off), level(l) {}
    uint64_t offset : 48;
    uint64_t level : 16;
  };

  std::string vectors_buffer_{};          // aligned vectors
  std::string keys_buffer_{};             // aligned vectors
  std::string neighbors_buffer_{};        // level 0 neighbors buffer
  std::string upper_neighbors_buffer_{};  // upper layer neighbors buffer

  std::string sparse_data_buffer_{};  // aligned spase data buffer
  size_t sparse_data_offset_{0};      //

  // upper layer offset + level in upper_neighbors_buffer_
  std::vector<SparseNeighborIndex> neighbors_index_{};
  size_t memory_quota_{0UL};
  size_t neighbors_size_{0U};        // level 0 neighbors size
  size_t upper_neighbors_size_{0U};  // level 0 neighbors size
  size_t padding_size_{};            // padding size for each vector element
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_chunk.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_chunk.h"
#include <chrono>
#include <random>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

int SparseChunkBroker::init_storage(size_t chunk_size) {
  chunk_meta_.clear();
  chunk_meta_.chunk_size = chunk_size;
  chunk_meta_.create_time = ailego::Realtime::Seconds();
  stats_.set_create_time(chunk_meta_.create_time);
  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  //! alloc meta chunk
  size_t size = sizeof(HnswSparseChunkMeta);
  size = (size + page_mask_) & (~page_mask_);
  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return ret;
  }
  chunk_meta_segment_ = get_chunk(CHUNK_TYPE_META, kDefaultChunkSeqId);
  if (ailego_unlikely(!chunk_meta_segment_)) {
    LOG_ERROR("Get meta segment failed");
    return IndexError_Runtime;
  }

  //! update meta info and write to storage
  chunk_meta_.chunk_cnts[CHUNK_TYPE_META] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;
  size = chunk_meta_segment_->write(0UL, &chunk_meta_,
                                    sizeof(HnswSparseChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
    return IndexError_WriteData;
  }

  return 0;
}

int SparseChunkBroker::load_storage(size_t chunk_size) {
  IndexStorage::MemoryBlock data_block;
  size_t size = chunk_meta_segment_->read(0UL, data_block,
                                          chunk_meta_segment_->data_size());
  if (size != sizeof(HnswSparseChunkMeta)) {
    LOG_ERROR("Invalid hnsw meta chunk, read size=%zu chunk size=%zu", size,
              chunk_meta_segment_->data_size());
    return IndexError_InvalidFormat;
  }
  std::memcpy(&chunk_meta_, data_block.data(), size);
  if (chunk_meta_.chunk_size != chunk_size) {
    LOG_ERROR(
        "Params hnsw chunk size=%zu mismatch from previous %zu "
        "in index",
        chunk_size, (size_t)chunk_meta_.chunk_size);
    return IndexError_Mismatch;
  }

  *stats_.mutable_check_point() = stg_->check_point();
  stats_.set_revision_id(chunk_meta_.revision_id);
  stats_.set_update_time(chunk_meta_.update_time);
  stats_.set_create_time(chunk_meta_.create_time);

  char create_time[32];
  char update_time[32];
  ailego::Realtime::Gmtime(chunk_meta_.create_time, "%Y-%m-%d %H:%M:%S",
                           create_time, sizeof(create_time));
  ailego::Realtime::Gmtime(chunk_meta_.update_time, "%Y-%m-%d %H:%M:%S",
                           update_time, sizeof(update_time));
  LOG_DEBUG(
      "Load index, indexSize=%zu chunkSize=%zu nodeChunks=%zu "
      "upperNeighborChunks=%zu revisionId=%zu "
      "createTime=%s updateTime=%s",
      (size_t)chunk_meta_.total_size, (size_t)chunk_meta_.chunk_size,
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_NODE],
      (size_t)chunk_meta_.chunk_cnts[CHUNK_TYPE_UPPER_NEIGHBOR],
      (size_t)chunk_meta_.revision_id, create_time, update_time);

  return 0;
}

int SparseChunkBroker::open(IndexStorage::Pointer stg, size_t max_index_size,
                            size_t chunk_size, bool check_crc) {
  if (ailego_unlikely(stg_)) {
    LOG_ERROR("An storage instance is already opened");
    return IndexError_Duplicate;
  }
  stg_ = std::move(stg);
  check_crc_ = check_crc;
  max_chunks_size_ = max_index_size;
  dirty_ = false;

  const std::string segment_id =
      make_segment_id(CHUNK_TYPE_META, kDefaultChunkSeqId);
  chunk_meta_segment_ = stg_->get(segment_id);
  if (!chunk_meta_segment_) {
    LOG_DEBUG("Create new index");
    return init_storage(chunk_size);
  }

  return load_storage(chunk_size);
}

int SparseChunkBroker::close(void) {
  flush(0UL);

  stg_.reset();
  check_crc_ = false;
  dirty_ = false;

  return 0;
}

int SparseChunkBroker::flush(uint64_t checkpoint) {
  ailego_assert_with(chunk_meta_segment_, "invalid meta segment");

  chunk_meta_.update_time = ailego::Realtime::Seconds();
  stats_.set_update_time(chunk_meta_.update_time);

  size_t size = chunk_meta_segment_->write(0UL, &chunk_meta_,
                                           sizeof(HnswSparseChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {
    LOG_ERROR("Storage write data failed, wsize=%zu", size);
  }

  stg_->refresh(checkpoint);
  int ret = stg_->flush();
  if (ret == 0) {
    (*stats_.mutable_check_point()) = checkpoint;
  } else {
    LOG_ERROR("Storage flush failed for %s", IndexError::What(ret));
  }
  return ret;
}

std::pair<int, SparseChunk::Pointer> SparseChunkBroker::alloc_chunk(
    int type, uint64_t seq_id, size_t size) {
  ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");

  SparseChunk::Pointer chunk;
  if (ailego_unlikely(!stg_)) {
    LOG_ERROR("Init storage first");
    return std::make_pair(IndexError_Uninitialized, chunk);
  }

  //! check exist a empty chunk with the same name
  chunk = get_chunk(type, seq_id);
  if (chunk) {
    if (ailego_unlikely(chunk->capacity() == size &&
                        chunk->data_size() == 0UL)) {
      LOG_ERROR("Exist invalid chunk size %zu, expect size %zu",
                chunk->capacity(), size);
      chunk.reset();
      return std::make_pair(IndexError_Runtime, chunk);
    }
    return std::make_pair(0, chunk);
  }
  //! align to page size
  size = (size + page_mask_) & (~page_mask_);
  if (ailego_unlikely(chunk_meta_.total_size + size >= max_chunks_size_)) {
    LOG_ERROR("No space to new a chunk, curIndexSize=%zu allocSize=%zu",
              (size_t)chunk_meta_.total_size, size);
    return std::make_pair(IndexError_IndexFull, chunk);
  }

  std::string segment_id = make_segment_id(type, seq_id);
  int ret = stg_->append(segment_id, size);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Storage append segment failed for %s", IndexError::What(ret));
    return std::make_pair(ret, chunk);
  }
  chunk_meta_.chunk_cnts[type] += 1;
  chunk_meta_.total_size += size;
  (*stats_.mutable_index_size()) += size;

  size = chunk_meta_segment_->write(0UL, &chunk_meta_,
                                    sizeof(HnswSparseChunkMeta));
  if (ailego_unlikely(size != sizeof(HnswSparseChunkMeta))) {
    LOG_ERROR("Storage append segment failed, wsize=%zu", size);
  }

  chunk = get_chunk(type, seq_id);
  return std::make_pair(chunk ? 0 : IndexError_NoMemory, chunk);
}

SparseChunk::Pointer SparseChunkBroker::get_chunk(int type,
                                                  uint64_t seq_id) const {
  std::string segment_id = make_segment_id(type, seq_id);
  return stg_->get(segment_id);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_chunk.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include <atomic>
#include <cstddef>
#include <mutex>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

using SparseChunk = IndexStorage::Segment;

class SparseChunkBroker {
 public:
  typedef std::shared_ptr<SparseChunkBroker> Pointer;

  enum CHUNK_TYPE {
    CHUNK_TYPE_HEADER = 1,
    CHUNK_TYPE_META = 2,
    CHUNK_TYPE_NODE = 3,
    CHUNK_TYPE_UPPER_NEIGHBOR = 4,
    CHUNK_TYPE_NEIGHBOR_INDEX = 5,
    CHUNK_TYPE_SPARSE_NODE = 6,
    CHUNK_TYPE_MAX = 8
  };
  static constexpr size_t kDefaultChunkSeqId = 0UL;

  SparseChunkBroker(IndexStreamer::Stats &stats) : stats_(stats) {
    page_mask_ = ailego::MemoryHelper::PageSize() - 1;
  }

  //! Open storage
  int open(IndexStorage::Pointer stg, size_t max_index_size, size_t chunk_size,
           bool check_crc);

  int close(void);

  int flush(uint64_t checkpoint);

  //! alloc a new chunk with size, not thread-safe
  std::pair<int, SparseChunk::Pointer> alloc_chunk(int type, uint64_t seq_id,
                                                   size_t size);

  //! alloc a new chunk with chunk size
  inline std::pair<int, SparseChunk::Pointer> alloc_chunk(int type,
                                                          uint64_t seq_id) {
    return alloc_chunk(type, seq_id, chunk_meta_.chunk_size);
  }

  SparseChunk::Pointer get_chunk(int type, uint64_t seq_id) const;

  inline size_t get_chunk_cnt(int type) const {
    ailego_assert_with(type < CHUNK_TYPE_MAX, "chunk type overflow");
    return chunk_meta_.chunk_cnts[type];
  }

  inline bool dirty(void) const {
    return dirty_;
  }

  inline void mark_dirty(void) {
    if (!dirty_) {
      dirty_ = true;
      chunk_meta_.revision_id += 1;
      stats_.set_revision_id(chunk_meta_.revision_id);
    }
  }

  const IndexStorage::Pointer storage(void) const {
    return stg_;
  }

 private:
  SparseChunkBroker(const SparseChunkBroker &) = delete;
  SparseChunkBroker &operator=(const SparseChunkBroker &) = delete;

  struct HnswSparseChunkMeta {
    HnswSparseChunkMeta(void) {
      memset(this, 0, sizeof(HnswSparseChunkMeta));
    }
    void clear() {
      memset(this, 0, sizeof(HnswSparseChunkMeta));
    }

    uint64_t chunk_cnts[CHUNK_TYPE_MAX];
    uint64_t chunk_size;   // size of per chunk
    uint64_t total_size;   // total size of allocated chunk
    uint64_t revision_id;  // index revision
    uint64_t create_time;
    uint64_t update_time;
    uint64_t reserved[3];
  };

  static_assert(sizeof(HnswSparseChunkMeta) % 32 == 0,
                "HnswSparseChunkMeta must be aligned with 32 bytes");

  //! Init the storage after open an empty index
  int init_storage(size_t chunk_size);

  //! Load index from storage
  int load_storage(size_t chunk_size);

  static inline const std::string make_segment_id(int type, uint64_t seq_id) {
    return "HnswT" + ailego::StringHelper::ToString(type) + "S" +
           ailego::StringHelper::ToString(seq_id);
  }

 private:
  IndexStreamer::Stats &stats_;
  HnswSparseChunkMeta chunk_meta_{};
  size_t page_mask_{0UL};
  size_t max_chunks_size_{0UL};
  IndexStorage::Pointer stg_{};
  IndexStorage::Segment::Pointer chunk_meta_segment_{};
  bool check_crc_{false};
  bool dirty_{false};  // set as true if index is modified , the flag
                       // will not be cleared even if flushed
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_context.h"
#include <chrono>
#include "hnsw_sparse_params.h"

namespace zvec {
namespace core {

HnswSparseContext::HnswSparseContext(const IndexMetric::Pointer &metric,
                                     const HnswSparseEntity::Pointer &entity)
    : IndexContext(metric), entity_(entity), dc_(entity_.get(), metric) {}

HnswSparseContext::~HnswSparseContext() {
  visit_filter_.destroy();
}

int HnswSparseContext::init(ContextType type) {
  int ret;
  uint32_t doc_cnt;

  type_ = type;

  switch (type) {
    case kSparseBuilderContext:
      ret = visit_filter_.init(VisitFilter::ByteMap, entity_->doc_cnt(),
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      break;

    case kSparseSearcherContext:
      ret = visit_filter_.init(filter_mode_, entity_->doc_cnt(), max_scan_num_,
                               negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
      candidates_.limit(max_scan_num_);
      break;

    case kSparseStreamerContext:
      // maxScanNum is unknown if inited from streamer, so the docCnt may
      // change. we need to compute maxScanNum by scan ratio, and preserve
      // max_doc_cnt space from visit filter
      doc_cnt = entity_->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      ret = visit_filter_.init(filter_mode_, reserve_max_doc_cnt_,
                               max_scan_num_, negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }

      update_heap_.limit(entity_->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);

      check_need_adjuct_ctx();
      break;

    default:
      LOG_ERROR("Init context failed");
      return IndexError_Runtime;
  }

  return 0;
}

int HnswSparseContext::update(const ailego::Params &params) {
  LOG_DEBUG("Update hnsw context params");

  auto update_visit_filter_param = [&]() {
    bool need_update = false;
    std::string p;
    switch (type_) {
      case kSparseSearcherContext:
        p = PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE;
        break;
      case kSparseStreamerContext:
        p = PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE;
        break;
    }

    if (params.has(p)) {
      bool bf_enabled;
      params.get(p, &bf_enabled);
      if (bf_enabled ^ (filter_mode_ == VisitFilter::BloomFilter)) {
        need_update = true;
        filter_mode_ =
            bf_enabled ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
      }
    }

    float prob = negative_probability_;
    p.clear();
    switch (type_) {
      case kSparseSearcherContext:
        p = PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
      case kSparseStreamerContext:
        p = PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB;
        break;
    }
    params.get(p, &prob);
    if (filter_mode_ == VisitFilter::BloomFilter &&
        std::abs(prob - negative_probability_) > 1e-6) {
      need_update = true;
    }
    if (need_update) {
      visit_filter_.destroy();
      int max_doc_cnt = 0;
      if (type_ == kSparseSearcherContext) {
        max_doc_cnt = entity_->doc_cnt();
      } else {
        max_doc_cnt = reserve_max_doc_cnt_;
      }
      int ret = visit_filter_.init(filter_mode_, max_doc_cnt, max_scan_num_,
                                   negative_probability_);
      if (ret != 0) {
        LOG_ERROR("Create filter failed,  mode %d", filter_mode_);
        return ret;
      }
    }
    return 0;
  };

  switch (type_) {
    case kSparseSearcherContext:
      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_EF)) {
        params.get(PARAM_HNSW_SPARSE_SEARCHER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }

      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO)) {
        params.get(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
        max_scan_num_ =
            static_cast<uint32_t>(max_scan_ratio_ * entity_->doc_cnt());
        max_scan_num_ = std::max(10000U, max_scan_num_);
      }

      if (params.has(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    case kSparseStreamerContext:
      if (params.has(PARAM_HNSW_SPARSE_STREAMER_EF)) {
        params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);
        topk_heap_.limit(std::max(topk_, ef_));
      }
      params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);
      params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
      params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
      params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
      if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
        LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
                  PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO.c_str());
        return IndexError_InvalidArgument;
      }
      if (max_scan_limit_ < min_scan_limit_) {
        LOG_ERROR("[%s] must be >= [%s]",
                  PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT.c_str(),
                  PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT.c_str());
        return IndexError_InvalidArgument;
      }

      if (params.has(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD)) {
        params.get(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD,
                   &bruteforce_threshold_);
      }

      return update_visit_filter_param();

    default:
      LOG_ERROR("update context failed, type=%u", type_);
      return IndexError_Runtime;
  }
}


int HnswSparseContext::update_context(ContextType type,
                                      const IndexMeta & /*meta*/,
                                      const IndexMetric::Pointer &metric,
                                      const HnswSparseEntity::Pointer &entity,
                                      uint32_t magic_num) {
  uint32_t doc_cnt;

  if (ailego_unlikely(type != type_)) {
    LOG_ERROR(
        "HnswSparseContext doesn't support shared by different type, "
        "src=%u dst=%u",
        type_, type);
    return IndexError_Unsupported;
  }

  magic_ = kInvalidMgic;

  // TODO: support change filter mode?
  switch (type) {
    case kSparseBuilderContext:
      LOG_ERROR("BuildContext doesn't support update");
      return IndexError_NotImplemented;

    case kSparseSearcherContext:
      if (!visit_filter_.reset(entity->doc_cnt(), max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    case kSparseStreamerContext:
      doc_cnt = entity->doc_cnt();
      max_scan_num_ = compute_max_scan_num(doc_cnt);
      reserve_max_doc_cnt_ = doc_cnt + compute_reserve_cnt(doc_cnt);
      if (!visit_filter_.reset(reserve_max_doc_cnt_, max_scan_num_)) {
        LOG_ERROR("Reset filter failed, mode %d", visit_filter_.get_mode());
        return IndexError_Runtime;
      }

      update_heap_.limit(entity->l0_neighbor_cnt() + 1);
      candidates_.limit(max_scan_num_);
      topk_heap_.limit(std::max(topk_, ef_));
      break;

    default:
      LOG_ERROR("update context failed");
      return IndexError_Runtime;
  }

  entity_ = entity;
  dc_.update(entity_.get(), metric);
  magic_ = magic_num;
  level_topks_.clear();

  return 0;
}

void HnswSparseContext::fill_random_to_topk_full(void) {
  static std::mt19937 mt(
      std::chrono::system_clock::now().time_since_epoch().count());
  std::uniform_int_distribution<node_id_t> dt(0, entity_->doc_cnt() - 1);
  std::function<node_id_t()> gen;
  node_id_t seqid;
  std::function<bool(node_id_t)> myfilter = [](node_id_t) { return false; };
  if (this->filter().is_valid()) {
    myfilter = [&](node_id_t id) {
      return this->filter()(entity_->get_key(id));
    };
  }

  if (topk_heap_.limit() < entity_->doc_cnt() / 2) {
    gen = [&](void) { return dt(mt); };
  } else {
    // If topk limit is big value, gen sequential id from an random initial
    seqid = dt(mt);
    gen = [&](void) {
      seqid = seqid == (entity_->doc_cnt() - 1) ? 0 : (seqid + 1);
      return seqid;
    };
  }

  for (size_t i = 0; !topk_heap_.full() && i < entity_->doc_cnt(); ++i) {
    const auto id = gen();
    if (!visit_filter_.visited(id) && !myfilter(id)) {
      visit_filter_.set_visited(id);
      topk_heap_.emplace(id, dc_.dist(id));
    }
  }
  return;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_context.h>
#include "utility/sparse_utility.h"
#include "utility/visit_filter.h"
#include "hnsw_sparse_dist_calculator.h"

namespace zvec {
namespace core {

class HnswSparseContext : public IndexContext {
 public:
  //! Index Context Pointer
  typedef std::unique_ptr<HnswSparseContext> Pointer;

  enum ContextType {
    kUnknownContext = 0,
    kSparseSearcherContext = 1,
    kSparseBuilderContext = 2,
    kSparseStreamerContext = 3,
  };

  //! Construct
  HnswSparseContext(const IndexMetric::Pointer &metric,
                    const HnswSparseEntity::Pointer &entity);

  //! Destructor
  virtual ~HnswSparseContext();

 public:
  //! Set topk of search result
  virtual void set_topk(uint32_t val) override {
    topk_ = val;
    topk_heap_.limit(std::max(val, ef_));
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(void) const override {
    return results_[0];
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(size_t idx) const override {
    return results_[idx];
  }

  //! Retrieve result object for output
  virtual IndexDocumentList *mutable_result(size_t idx) override {
    ailego_assert_with(idx < results_.size(), "invalid idx");
    return &results_[idx];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const override {
    return group_results_[0];
  }

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(
      size_t idx) const override {
    return group_results_[idx];
  }

  virtual uint32_t magic(void) const override {
    return magic_;
  }

  //! Set mode of debug
  virtual void set_debug_mode(bool enable) override {
    debug_mode_ = enable;
  }

  //! Retrieve mode of debug
  virtual bool debug_mode(void) const override {
    return this->debugging();
  }

  //! Retrieve string of debug
  virtual std::string debug_string(void) const override {
    char buf[4096];
    size_t size = snprintf(
        buf, sizeof(buf),
        "scan_cnt=%zu,get_vector_cnt=%u,get_neighbors_cnt=%u,dup_node=%u",
        get_scan_num(), stats_get_vector_cnt_, stats_get_neighbors_cnt_,
        stats_visit_dup_cnt_);
    return std::string(buf, size);
  }

  //! Update the parameters of context
  virtual int update(const ailego::Params &params) override;

 public:
  //! Init context
  int init(ContextType type);

  //! Update context, the context may be shared by different searcher/streamer
  int update_context(ContextType type, const IndexMeta &meta,
                     const IndexMetric::Pointer &metric,
                     const HnswSparseEntity::Pointer &entity,
                     uint32_t magic_num);

  inline const HnswSparseEntity &get_entity() const {
    return *entity_;
  }

  inline void resize_results(size_t size) {
    if (group_by_search()) {
      group_results_.resize(size);
    } else {
      results_.resize(size);
    }
  }

  inline void topk_to_result() {
    return topk_to_result(0);
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_result(uint32_t idx) {
    if (group_by_search()) {
      topk_to_group_result(idx);
    } else {
      topk_to_single_result(idx);
    }
  }

  inline void recal_topk_dist() {
    TopkHeap heap(topk_heap_);
    topk_heap_.clear();

    for (size_t i = 0; i < heap.size(); ++i) {
      node_id_t id = heap[i].first;
      dist_t dist = dc_.dist(id);
      topk_heap_.emplace_back(id, dist);
    }
  }

  inline void topk_to_single_result(uint32_t idx) {
    if (force_padding_topk_ && !topk_heap_.full() &&
        topk_heap_.size() < entity_->doc_cnt()) {
      this->fill_random_to_topk_full();
    }
    if (ailego_unlikely(topk_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(topk_heap_.size()));
    topk_heap_.sort();
    results_[idx].clear();

    for (int i = 0; i < size; ++i) {
      auto score = topk_heap_[i].second;
      if (score > this->threshold()) {
        break;
      }

      node_id_t id = topk_heap_[i].first;
      if (fetch_vector_) {
        IndexSparseDocument sparse_doc;
        IndexStorage::MemoryBlock vec_block;
        entity_->get_sparse_data(id, vec_block);
        const void *sparse_data = vec_block.data();
        if (sparse_data != nullptr) {
          SparseUtility::ReverseSparseFormat(sparse_data, sparse_doc,
                                             entity_->sparse_unit_size());
        }

        results_[idx].emplace_back(entity_->get_key(id), score, id,
                                   entity_->get_vector_meta(id), sparse_doc);
      } else {
        results_[idx].emplace_back(entity_->get_key(id), score, id);
      }
    }

    return;
  }

  //! Construct result from topk heap, result will be normalized
  inline void topk_to_group_result(uint32_t idx) {
    ailego_assert_with(idx < group_results_.size(), "invalid idx");

    group_results_[idx].clear();

    std::vector<std::pair<std::string, TopkHeap>> group_topk_list;
    std::vector<std::pair<std::string, float>> best_score_in_groups;
    for (auto itr = group_topk_heaps_.begin(); itr != group_topk_heaps_.end();
         itr++) {
      const std::string &group_id = (*itr).first;
      auto &heap = (*itr).second;
      heap.sort();

      if (heap.size() > 0) {
        float best_score = heap[0].second;
        best_score_in_groups.push_back(std::make_pair(group_id, best_score));
      }
    }

    std::sort(best_score_in_groups.begin(), best_score_in_groups.end(),
              [](const std::pair<std::string, float> &a,
                 const std::pair<std::string, float> &b) -> int {
                return a.second < b.second;
              });

    // truncate to group num
    for (uint32_t i = 0; i < group_num() && i < best_score_in_groups.size();
         ++i) {
      const std::string &group_id = best_score_in_groups[i].first;

      group_topk_list.emplace_back(
          std::make_pair(group_id, group_topk_heaps_[group_id]));
    }

    group_results_[idx].resize(group_topk_list.size());

    for (uint32_t i = 0; i < group_topk_list.size(); ++i) {
      const std::string &group_id = group_topk_list[i].first;
      group_results_[idx][i].set_group_id(group_id);

      uint32_t size = std::min(
          group_topk_, static_cast<uint32_t>(group_topk_list[i].second.size()));

      for (uint32_t j = 0; j < size; ++j) {
        auto score = group_topk_list[i].second[j].second;
        if (score > this->threshold()) {
          break;
        }

        node_id_t id = group_topk_list[i].second[j].first;

        if (fetch_vector_) {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score, id, entity_->get_vector_meta(id));
        } else {
          group_results_[idx][i].mutable_docs()->emplace_back(
              entity_->get_key(id), score, id);
        }
      }
    }
  }

  inline void reset_query(const void *query) {
    dc_.reset_query(query);
    dc_.clear_compare_cnt();
  }

  inline HnswSparseDistCalculator &dist_calculator() {
    return dc_;
  }

  inline TopkHeap &topk_heap() {
    return topk_heap_;
  }

  inline TopkHeap &update_heap() {
    return update_heap_;
  }

  inline VisitFilter &visit_filter() {
    return visit_filter_;
  }

  inline CandidateHeap &candidates() {
    return candidates_;
  }

  inline void set_max_scan_num(uint32_t max_scan_num) {
    max_scan_num_ = max_scan_num;
  }

  inline void set_max_scan_limit(uint32_t max_scan_limit) {
    max_scan_limit_ = max_scan_limit;
  }

  inline void set_min_scan_limit(uint32_t min_scan_limit) {
    min_scan_limit_ = min_scan_limit;
  }

  inline void set_ef(uint32_t v) {
    ef_ = v;
  }

  inline void set_filter_mode(uint32_t v) {
    filter_mode_ = v;
  }

  inline void set_filter_negative_probability(float v) {
    negative_probability_ = v;
  }

  inline void set_max_scan_ratio(float v) {
    max_scan_ratio_ = v;
  }

  virtual void set_magic(uint32_t v) {
    magic_ = v;
  }

  virtual void set_force_padding_topk(bool v) {
    force_padding_topk_ = v;
  }

  virtual void set_bruteforce_threshold(uint32_t v) override {
    bruteforce_threshold_ = v;
  }

  inline uint32_t get_bruteforce_threshold() const {
    return bruteforce_threshold_;
  }

  virtual void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  virtual bool fetch_vector() const override {
    return fetch_vector_;
  }

  //! Reset context
  void reset(void) override {
    set_filter(nullptr);
    reset_threshold();
    set_fetch_vector(false);
    set_group_params(0, 0);
    reset_group_by();
  }

  inline std::map<std::string, TopkHeap> &group_topk_heaps() {
    return group_topk_heaps_;
  }

  inline TopkHeap &level_topk(int level) {
    if (ailego_unlikely(level_topks_.size() <= static_cast<size_t>(level))) {
      int cur_level = level_topks_.size();
      level_topks_.resize(level + 1);
      for (; cur_level <= level; ++cur_level) {
        size_t heap_size = std::max(entity_->neighbor_cnt(cur_level),
                                    entity_->ef_construction());
        level_topks_[cur_level].clear();
        level_topks_[cur_level].limit(heap_size);
      }
    }

    return level_topks_[level];
  }

  inline void check_need_adjuct_ctx(void) {
    check_need_adjuct_ctx(entity_->doc_cnt());
  }

  inline size_t compute_reserve_cnt(uint32_t cur_doc) const {
    if (cur_doc > kMaxReserveDocCnt) {
      return kMaxReserveDocCnt;
    } else if (cur_doc < kMinReserveDocCnt) {
      return kMinReserveDocCnt;
    }
    return cur_doc;
  }

  //! candidates heap and visitfilter need to resize as doc cnt growing up
  inline void check_need_adjuct_ctx(uint32_t doc_cnt) {
    if (ailego_unlikely(doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_)) {
      while (doc_cnt + kTriggerReserveCnt > reserve_max_doc_cnt_) {
        reserve_max_doc_cnt_ =
            reserve_max_doc_cnt_ + compute_reserve_cnt(reserve_max_doc_cnt_);
      }
      uint32_t max_scan_cnt = compute_max_scan_num(reserve_max_doc_cnt_);
      max_scan_num_ = max_scan_cnt;
      visit_filter_.reset(reserve_max_doc_cnt_, max_scan_cnt);
      candidates_.clear();
      candidates_.limit(max_scan_num_);
    }
  }

  inline uint32_t compute_max_scan_num(uint32_t max_doc_cnt) const {
    uint32_t max_scan = max_doc_cnt * max_scan_ratio_;
    if (max_scan < min_scan_limit_) {
      max_scan = min_scan_limit_;
    } else if (max_scan > max_scan_limit_) {
      max_scan = max_scan_limit_;
    }
    return max_scan;
  }

  inline size_t get_scan_num() const {
    return dc_.compare_cnt();
  }

  inline uint64_t reach_scan_limit() const {
    return dc_.compare_cnt() >= max_scan_num_;
  }

  inline bool error() const {
    return dc_.error();
  }

  inline void clear() {
    dc_.clear();
    if (ailego_unlikely(this->debugging())) {
      stats_get_neighbors_cnt_ = 0u;
      stats_get_vector_cnt_ = 0u;
      stats_visit_dup_cnt_ = 0u;
    }
    // do not clear results_ for the next query will need it
    for (auto &it : results_) {
      it.clear();
    }
  }

  uint32_t *mutable_stats_get_neighbors() {
    return &stats_get_neighbors_cnt_;
  }

  uint32_t *mutable_stats_get_vector() {
    return &stats_get_vector_cnt_;
  }

  uint32_t *mutable_stats_visit_dup_cnt() {
    return &stats_visit_dup_cnt_;
  }

  inline bool debugging(void) const {
    return debug_mode_;
  }

  inline void update_dist_caculator_distance(
      const IndexMetric::MatrixSparseDistance &distance) {
    dc_.update_distance(distance);
  }

  //! Get topk
  inline uint32_t topk() const override {
    return topk_;
  }

  //! Get group topk
  inline uint32_t group_topk() const {
    return group_topk_;
  }

  //! Get group num
  inline uint32_t group_num() const {
    return group_num_;
  }

  //! Get if group by search
  inline bool group_by_search() {
    return group_num_ > 0;
  }

  //! Set group params
  void set_group_params(uint32_t group_num, uint32_t group_topk) override {
    group_num_ = group_num;
    group_topk_ = group_topk;

    topk_ = group_topk_ * group_num_;

    topk_heap_.limit(std::max(topk_, ef_));

    group_topk_heaps_.clear();
  }

 private:
  // Filling random nodes if topk not full
  void fill_random_to_topk_full(void);

  constexpr static uint32_t kTriggerReserveCnt = 4096UL;
  constexpr static uint32_t kMinReserveDocCnt = 4096UL;
  constexpr static uint32_t kMaxReserveDocCnt = 128 * 1024UL;
  constexpr static uint32_t kInvalidMgic = -1U;

 private:
  HnswSparseEntity::Pointer entity_;
  HnswSparseDistCalculator dc_;
  bool debug_mode_{false};
  bool force_padding_topk_{false};
  uint32_t max_scan_num_{0};
  uint32_t max_scan_limit_{0};
  uint32_t min_scan_limit_{0};
  uint32_t reserve_max_doc_cnt_{kMinReserveDocCnt};
  uint32_t topk_{0};
  uint32_t group_topk_{0};
  uint32_t filter_mode_{VisitFilter::ByteMap};
  float negative_probability_{HnswSparseEntity::kDefaultBFNegativeProbability};
  uint32_t ef_{HnswSparseEntity::kDefaultEf};
  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};
  uint32_t magic_{0U};
  std::vector<IndexDocumentList> results_{};
  std::vector<IndexGroupDocumentList> group_results_{};
  TopkHeap topk_heap_{};
  TopkHeap update_heap_{};
  std::vector<TopkHeap> level_topks_{};
  CandidateHeap candidates_{};
  VisitFilter visit_filter_{};
  uint32_t bruteforce_threshold_{};
  bool fetch_vector_{false};

  uint32_t group_num_{0};
  std::map<std::string, TopkHeap> group_topk_heaps_{};

  uint32_t type_{kUnknownContext};
  //! debug stats info
  uint32_t stats_get_neighbors_cnt_{0u};
  uint32_t stats_get_vector_cnt_{0u};
  uint32_t stats_visit_dup_cnt_{0u};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_dist_calculator.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_meta.h>
#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

class HnswSparseDistCalculator {
 public:
  typedef std::shared_ptr<HnswSparseDistCalculator> Pointer;

 public:
  //! Constructor
  HnswSparseDistCalculator(const HnswSparseEntity *entity,
                           const IndexMetric::Pointer &metric)
      : entity_(entity),
        distance_(metric->sparse_distance()),
        query_{nullptr},
        compare_cnt_(0) {}

  //! Constructor
  HnswSparseDistCalculator(const HnswSparseEntity *entity,
                           const IndexMetric::Pointer &metric,
                           const void *query)
      : entity_(entity),
        distance_(metric->sparse_distance()),
        query_(query),
        compare_cnt_(0) {}

  void update(const HnswSparseEntity *entity,
              const IndexMetric::Pointer &metric) {
    entity_ = entity;
    distance_ = metric->sparse_distance();
  }

  inline void update_distance(
      const IndexMetric::MatrixSparseDistance &distance) {
    distance_ = distance;
  }

  //! Reset query vector data
  inline void reset_query(const void *query) {
    error_ = false;
    query_ = query;
  }

  //! Returns distance
  inline dist_t dist(const void *sparse_data_lhs, const void *sparse_data_rhs) {
    float score{0.0f};

    if (ailego_unlikely(sparse_data_lhs == nullptr ||
                        sparse_data_rhs == nullptr)) {
      // LOG_WARN("Nullptr of sparse vector. Return dense score only");
      // error_ = true;
      return score;
    }

    distance_(sparse_data_lhs, sparse_data_rhs, &score);

    return score;
  }

  //! Returns distance between query and vec.
  inline dist_t dist(const void *vec) {
    compare_cnt_++;

    auto sparse_data = entity_->get_sparse_data_from_vector(vec);
    if (sparse_data.first == nullptr) {
      error_ = true;
      return 0.0f;
    }

    return dist(sparse_data.first, query_);
  }

  //! Return distance between query and node id.
  inline dist_t dist(node_id_t id) {
    compare_cnt_++;

    const void *feat = entity_->get_vector_meta(id);
    if (ailego_unlikely(feat == nullptr)) {
      LOG_ERROR("Get nullptr vector, id=%u", id);
      error_ = true;
      return 0.0f;
    }

    auto sparse_data = entity_->get_sparse_data_from_vector(feat);
    if (sparse_data.first == nullptr) {
      error_ = true;
      return 0.0f;
    }

    return dist(sparse_data.first, query_);
  }

  //! Return dist node lhs between node rhs
  inline dist_t dist(node_id_t lhs, node_id_t rhs) {
    compare_cnt_++;

    const void *feat = entity_->get_vector_meta(lhs);
    const void *query = entity_->get_vector_meta(rhs);
    if (ailego_unlikely(feat == nullptr || query == nullptr)) {
      LOG_ERROR("Get nullptr vector");
      error_ = true;
      return 0.0f;
    }

    auto feat_sparse_data = entity_->get_sparse_data_from_vector(feat);
    if (feat_sparse_data.first == nullptr) {
      error_ = true;
      return 0.0f;
    }

    auto query_sparse_data = entity_->get_sparse_data_from_vector(query);
    if (query_sparse_data.first == nullptr) {
      error_ = true;
      return 0.0f;
    }

    return dist(feat_sparse_data.first, query_sparse_data.first);
  }

  dist_t operator()(const void *vec) {
    return dist(vec);
  }

  dist_t operator()(id_t i) {
    return dist(i);
  }

  dist_t operator()(id_t lhs, id_t rhs) {
    return dist(lhs, rhs);
  }

  inline void clear() {
    compare_cnt_ = 0;
    error_ = false;
  }

  inline void clear_compare_cnt() {
    compare_cnt_ = 0;
  }

  inline bool error() const {
    return error_;
  }

  //! Get distances compute times
  inline uint32_t compare_cnt() const {
    return compare_cnt_;
  }

 private:
  HnswSparseDistCalculator(const HnswSparseDistCalculator &) = delete;
  HnswSparseDistCalculator &operator=(const HnswSparseDistCalculator &) =
      delete;

 private:
  const HnswSparseEntity *entity_;

  IndexMetric::MatrixSparseDistance distance_;

  const void *query_;

  uint32_t compare_cnt_;  // record distance compute times
  bool error_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

const std::string HnswSparseEntity::kSparseGraphHeaderSegmentId =
    "sparse_graph.header";
const std::string HnswSparseEntity::kSparseGraphFeaturesSegmentId =
    "sparse_graph.features";
const std::string HnswSparseEntity::kSparseGraphKeysSegmentId =
    "sparse_graph.keys";
const std::string HnswSparseEntity::kSparseGraphNeighborsSegmentId =
    "sparse_graph.neighbors";
const std::string HnswSparseEntity::kSparseGraphOffsetsSegmentId =
    "sparse_graph.offsets";
const std::string HnswSparseEntity::kSparseGraphMappingSegmentId =
    "sparse_graph.mapping";
const std::string HnswSparseEntity::kSparseHnswHeaderSegmentId =
    "sparse_hnsw.header";
const std::string HnswSparseEntity::kSparseHnswNeighborsSegmentId =
    "sparse_hnsw.neighbors";
const std::string HnswSparseEntity::kSparseHnswOffsetsSegmentId =
    "sparse_hnsw.offsets";
const std::string HnswSparseEntity::kSparseGraphVectorsSegmentId =
    "sparse_graph.vectors";
const std::string HnswSparseEntity::kSparseGraphVectorMetaSegmentId =
    "sparse_graph.vector_meta";

int HnswSparseEntity::CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                                        size_t data_size,
                                        size_t *padding_size) {
  *padding_size = AlignSize(data_size) - data_size;
  if (*padding_size == 0) {
    return 0;
  }

  std::string padding(*padding_size, '\0');
  if (dumper->write(padding.data(), *padding_size) != *padding_size) {
    LOG_ERROR("Append padding failed, size %lu", *padding_size);
    return IndexError_WriteData;
  }
  return 0;
}

int64_t HnswSparseEntity::dump_segment(const IndexDumper::Pointer &dumper,
                                       const std::string &segment_id,
                                       const void *data, size_t size) const {
  size_t len = dumper->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect: %lu, actual: %lu",
              segment_id.c_str(), size, len);
    return IndexError_WriteData;
  }

  size_t padding_size = AlignSize(size) - size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %lu", padding_size);
      return IndexError_WriteData;
    }
  }

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  int ret = dumper->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return ret;
  }

  return len + padding_size;
}

int64_t HnswSparseEntity::dump_header(const IndexDumper::Pointer &dumper,
                                      const HNSWSparseHeader &hd) const {
  //! dump basic graph header. header is aligned and does not need padding
  int64_t graph_hd_size = dump_segment(dumper, kSparseGraphHeaderSegmentId,
                                       &hd.graph, hd.graph.size);
  if (graph_hd_size < 0) {
    return graph_hd_size;
  }

  //! dump basic graph header. header is aligned and does not need padding
  int64_t hnsw_hd_size =
      dump_segment(dumper, kSparseHnswHeaderSegmentId, &hd.hnsw, hd.hnsw.size);
  if (hnsw_hd_size < 0) {
    return hnsw_hd_size;
  }

  return graph_hd_size + hnsw_hd_size;
}

void HnswSparseEntity::reshuffle_vectors(
    const std::function<level_t(node_id_t)> & /*get_level*/,
    std::vector<node_id_t> * /*n2o_mapping*/,
    std::vector<node_id_t> * /*o2n_mapping*/, key_t * /*keys*/) const {
  // TODO
  return;
}

int64_t HnswSparseEntity::dump_mapping_segment(
    const IndexDumper::Pointer &dumper, const key_t *keys) const {
  std::vector<node_id_t> mapping(doc_cnt());

  std::iota(mapping.begin(), mapping.end(), 0U);
  std::sort(mapping.begin(), mapping.end(),
            [&](node_id_t i, node_id_t j) { return keys[i] < keys[j]; });

  size_t size = mapping.size() * sizeof(node_id_t);
  return dump_segment(dumper, kSparseGraphMappingSegmentId, mapping.data(),
                      size);
}

int64_t HnswSparseEntity::dump_segments(
    const IndexDumper::Pointer &dumper, key_t *keys,
    const std::function<level_t(node_id_t)> &get_level) const {
  HNSWSparseHeader dump_hd(header());

  dump_hd.graph.node_size = sparse_meta_size();

  std::vector<node_id_t> n2o_mapping;  // map new id to origin id
  std::vector<node_id_t> o2n_mapping;  // map origin id to new id
  reshuffle_vectors(get_level, &n2o_mapping, &o2n_mapping, keys);
  if (!o2n_mapping.empty()) {
    dump_hd.hnsw.entry_point = o2n_mapping[entry_point()];
  }

  //! Dump header
  int64_t hd_size = dump_header(dumper, dump_hd);
  if (hd_size < 0) {
    return hd_size;
  }

  //! Dump vectors
  int64_t sparse_vector_meta_size =
      dump_sparse_vector_meta(dumper, n2o_mapping);
  if (sparse_vector_meta_size < 0) {
    return sparse_vector_meta_size;
  }

  int64_t sparse_vecs_size = dump_sparse_vector(dumper, n2o_mapping);
  if (sparse_vecs_size < 0) {
    return sparse_vecs_size;
  }

  //! Dump neighbors
  auto neighbors_size =
      dump_neighbors(dumper, get_level, n2o_mapping, o2n_mapping);
  if (neighbors_size < 0) {
    return neighbors_size;
  }
  //! free memory
  n2o_mapping = std::vector<node_id_t>();
  o2n_mapping = std::vector<node_id_t>();

  //! Dump keys
  size_t key_segment_size = doc_cnt() * sizeof(key_t);
  int64_t keys_size =
      dump_segment(dumper, kSparseGraphKeysSegmentId, keys, key_segment_size);
  if (keys_size < 0) {
    return keys_size;
  }

  //! Dump mapping
  int64_t mapping_size = dump_mapping_segment(dumper, keys);
  if (mapping_size < 0) {
    return mapping_size;
  }

  return hd_size + keys_size + sparse_vector_meta_size + sparse_vecs_size +
         neighbors_size + mapping_size;
}


int64_t HnswSparseEntity::dump_sparse_vector_meta(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping) const {
  const void *data = nullptr;
  uint32_t crc = 0U;
  size_t dump_size = 0UL;

  uint64_t sparse_data_offset = 0UL;
  uint64_t sparse_data_len = 0UL;

  //! dump vectors
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    data = get_vector_meta(reorder_mapping.empty() ? id : reorder_mapping[id]);
    if (ailego_unlikely(!data)) {
      return IndexError_ReadData;
    }

    const char *data_ptr = reinterpret_cast<const char *>(data);
    sparse_data_len = *((uint32_t *)(data_ptr + sizeof(uint64_t)));

    size_t len = dumper->write(&sparse_data_offset, sizeof(uint64_t));
    if (len != sizeof(uint64_t)) {
      LOG_ERROR("Dump sparse data offset failed, write=%zu expect=%zu", len,
                sizeof(uint64_t));
      return IndexError_WriteData;
    }

    crc = ailego::Crc32c::Hash(&sparse_data_offset, sizeof(uint64_t), crc);
    dump_size += sizeof(uint64_t);

    len = dumper->write(&sparse_data_len, sizeof(uint64_t));
    if (len != sizeof(uint64_t)) {
      LOG_ERROR("Dump sparse data len failed, write=%zu expect=%zu", len,
                sizeof(uint64_t));
      return IndexError_WriteData;
    }

    crc = ailego::Crc32c::Hash(&sparse_data_len, sizeof(uint64_t), crc);
    dump_size += sizeof(uint64_t);

    sparse_data_offset += sparse_data_len;
  }

  int ret =
      dumper->append(kSparseGraphVectorMetaSegmentId, dump_size, 0UL, crc);
  if (ret != 0) {
    LOG_ERROR("Dump vectors segment meta failed, ret %d", ret);
    return ret;
  }

  return dump_size;
}

int64_t HnswSparseEntity::dump_sparse_vector(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping) const {
  uint32_t crc = 0U;
  size_t data_size = 0UL;
  const void *data = nullptr;

  uint64_t sparse_data_len = 0UL;
  uint32_t sparse_chunk_index = 0U;
  uint32_t sparse_chunk_offset = 0U;

  //! dump vectors
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    data = get_vector_meta(reorder_mapping.empty() ? id : reorder_mapping[id]);
    if (ailego_unlikely(!data)) {
      return IndexError_ReadData;
    }

    const char *data_ptr = reinterpret_cast<const char *>(data);

    sparse_data_len = *((uint32_t *)(data_ptr + sizeof(uint64_t)));

    uint64_t sparse_offset = *((uint64_t *)(data_ptr));

    const void *sparse = get_sparse_data(sparse_offset, sparse_data_len);
    if (ailego_unlikely(sparse == nullptr)) {
      LOG_ERROR("Get nullptr sparse, chunk index=%u, chunk offset=%u, len=%zu",
                sparse_chunk_index, sparse_chunk_offset,
                (size_t)sparse_data_len);
      return IndexError_ReadData;
    }

    size_t len = dumper->write(sparse, sparse_data_len);
    if (len != sparse_data_len) {
      LOG_ERROR("Dump sparse data failed, write=%zu expect=%zu", len,
                (size_t)sparse_data_len);
      return IndexError_WriteData;
    }

    crc = ailego::Crc32c::Hash(sparse, sparse_data_len, crc);
    data_size += sparse_data_len;
  }

  int ret = dumper->append(kSparseGraphVectorsSegmentId, data_size, 0UL, crc);
  if (ret != 0) {
    LOG_ERROR("Dump vectors segment meta failed, ret %d", ret);
    return ret;
  }

  return data_size;
}

int64_t HnswSparseEntity::dump_graph_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<SparseGraphNeighborMeta> graph_meta;
  graph_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  std::vector<node_id_t> mapping(l0_neighbor_cnt());

  uint32_t min_neighbor_count = 10000;
  uint32_t max_neighbor_count = 0;
  size_t sum_neighbor_count = 0;

  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    const Neighbors neighbors =
        get_neighbors(0, reorder_mapping.empty() ? id : reorder_mapping[id]);
    ailego_assert_with(!!neighbors.data, "invalid neighbors");
    ailego_assert_with(neighbors.size() <= l0_neighbor_cnt(),
                       "invalid neighbors");

    uint32_t neighbor_count = neighbors.size();
    if (neighbor_count < min_neighbor_count) {
      min_neighbor_count = neighbor_count;
    }
    if (neighbor_count > max_neighbor_count) {
      max_neighbor_count = neighbor_count;
    }
    sum_neighbor_count += neighbor_count;

    graph_meta.emplace_back(offset, neighbor_count);
    size_t size = neighbors.size() * sizeof(node_id_t);
    const node_id_t *data = &neighbors[0];
    if (!neighbor_mapping.empty()) {
      for (node_id_t i = 0; i < neighbors.size(); ++i) {
        mapping[i] = neighbor_mapping[neighbors[i]];
      }
      data = mapping.data();
    }
    if (dumper->write(data, size) != size) {
      LOG_ERROR("Dump graph neighbor id=%u failed, size %lu", id, size);
      return IndexError_WriteData;
    }
    crc = ailego::Crc32c::Hash(data, size, crc);
    offset += size;
  }

  uint32_t average_neighbor_count = 0;
  if (doc_cnt() > 0) {
    average_neighbor_count = sum_neighbor_count / doc_cnt();
  }
  LOG_INFO(
      "Dump hnsw graph: min_neighbor_count[%u] max_neighbor_count[%u] "
      "average_neighbor_count[%u]",
      min_neighbor_count, max_neighbor_count, average_neighbor_count);

  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }
  ret =
      dumper->append(kSparseGraphNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d",
              kSparseGraphNeighborsSegmentId.c_str(), ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len =
      dump_segment(dumper, kSparseGraphOffsetsSegmentId, graph_meta.data(),
                   graph_meta.size() * sizeof(SparseGraphNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

int64_t HnswSparseEntity::dump_upper_neighbors(
    const IndexDumper::Pointer &dumper,
    const std::function<level_t(node_id_t)> &get_level,
    const std::vector<node_id_t> &reorder_mapping,
    const std::vector<node_id_t> &neighbor_mapping) const {
  std::vector<HnswSparseNeighborMeta> hnsw_meta;
  hnsw_meta.reserve(doc_cnt());
  size_t offset = 0;
  uint32_t crc = 0;
  std::vector<node_id_t> buffer(upper_neighbor_cnt() + 1);
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    node_id_t new_id = reorder_mapping.empty() ? id : reorder_mapping[id];
    auto level = get_level(new_id);
    if (level == 0) {
      hnsw_meta.emplace_back(0U, 0U);
      continue;
    }
    hnsw_meta.emplace_back(offset, level);
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    for (level_t cur_level = 1; cur_level <= level; ++cur_level) {
      const Neighbors neighbors = get_neighbors(cur_level, new_id);
      ailego_assert_with(!!neighbors.data, "invalid neighbors");
      ailego_assert_with(neighbors.size() <= neighbor_cnt(cur_level),
                         "invalid neighbors");
      memset(buffer.data(), 0, sizeof(node_id_t) * buffer.size());
      buffer[0] = neighbors.size();
      if (neighbor_mapping.empty()) {
        memcpy(&buffer[1], &neighbors[0], neighbors.size() * sizeof(node_id_t));
      } else {
        for (node_id_t i = 0; i < neighbors.size(); ++i) {
          buffer[i + 1] = neighbor_mapping[neighbors[i]];
        }
      }
      if (dumper->write(buffer.data(), sizeof(node_id_t) * buffer.size()) !=
          sizeof(node_id_t) * buffer.size()) {
        LOG_ERROR("Dump graph neighbor id=%u failed, size %lu", id,
                  sizeof(node_id_t) * buffer.size());
        return IndexError_WriteData;
      }
      crc = ailego::Crc32c::Hash(buffer.data(),
                                 sizeof(node_id_t) * buffer.size(), crc);
      offset += sizeof(node_id_t) * buffer.size();
    }
  }
  size_t padding_size = 0;
  int ret = CalcAndAddPadding(dumper, offset, &padding_size);
  if (ret != 0) {
    return ret;
  }

  ret =
      dumper->append(kSparseHnswNeighborsSegmentId, offset, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s failed, ret %d",
              kSparseHnswNeighborsSegmentId.c_str(), ret);
    return ret;
  }

  //! dump level 0 neighbors meta
  auto len = dump_segment(dumper, kSparseHnswOffsetsSegmentId, hnsw_meta.data(),
                          hnsw_meta.size() * sizeof(HnswSparseNeighborMeta));
  if (len < 0) {
    return len;
  }

  return len + offset + padding_size;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string.h>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/container/heap.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_storage.h>

namespace zvec {
namespace core {

using node_id_t = uint32_t;
using key_t = uint64_t;
using level_t = int32_t;
using dist_t = float;
using TopkHeap = ailego::KeyValueHeap<node_id_t, dist_t>;
using CandidateHeap =
    ailego::KeyValueHeap<node_id_t, dist_t, std::greater<dist_t>>;
constexpr node_id_t kInvalidNodeId = static_cast<node_id_t>(-1);
constexpr key_t kInvalidKey = static_cast<key_t>(-1);
class HnswSparseDistCalculator;

struct SparseGraphHeader {
  uint32_t size;
  uint32_t version;
  uint32_t graph_type;
  uint32_t doc_count;
  uint32_t vector_size;
  uint32_t node_size;
  uint32_t l0_neighbor_count;
  uint32_t prune_type;
  uint32_t prune_neighbor_count;
  uint32_t ef_construction;
  uint32_t options;
  uint32_t min_neighbor_count;
  uint32_t sparse_meta_size;
  uint32_t sparse_unit_size;
  uint32_t total_sparse_count;
  uint8_t reserved[868];
};

static_assert(sizeof(SparseGraphHeader) % 32 == 0,
              "SparseGraphHeader must be aligned with 32 bytes");

//! Hnsw upper neighbor header
struct HnswSparseHeader {
  uint32_t size;      // header size
  uint32_t revision;  // current total docs of the graph
  uint32_t upper_neighbor_count;
  uint32_t ef_construction;
  uint32_t scaling_factor;
  uint32_t max_level;
  uint32_t entry_point;
  uint32_t options;
  uint8_t reserved[30];
};

struct SparseData {
 public:
  SparseData() {};

  SparseData(uint32_t sparse_count, const uint32_t *sparse_indices,
             const void *sparse_vec)
      : count(sparse_count), indices(sparse_indices), vec(sparse_vec) {}

  uint32_t count{0};
  const uint32_t *indices{nullptr};
  const void *vec{nullptr};
};

static_assert(sizeof(HnswSparseHeader) % 32 == 0,
              "SparseGraphHeader must be aligned with 32 bytes");

//! Hnsw common header and upper neighbor header
struct HNSWSparseHeader {
  HNSWSparseHeader() {
    clear();
  }

  HNSWSparseHeader(const HNSWSparseHeader &header) {
    memcpy(this, &header, sizeof(header));
  }

  HNSWSparseHeader &operator=(const HNSWSparseHeader &header) {
    memcpy(this, &header, sizeof(header));
    return *this;
  }

  //! Reset state to zero, and the params is untouched
  void inline reset() {
    graph.doc_count = 0U;
    hnsw.entry_point = kInvalidNodeId;
    hnsw.max_level = 0;
    graph.total_sparse_count = 0U;
  }

  //! Clear all fields to init value
  void inline clear() {
    memset(this, 0, sizeof(HNSWSparseHeader));
    hnsw.entry_point = kInvalidNodeId;
    graph.size = sizeof(SparseGraphHeader);
    hnsw.size = sizeof(HnswSparseHeader);
    graph.total_sparse_count = 0U;
  }

  size_t neighbor_cnt() const {
    return graph.l0_neighbor_count;
  }

  size_t upper_neighbor_cnt() const {
    return hnsw.upper_neighbor_count;
  }

  size_t vector_size() const {
    return graph.vector_size;
  }

  size_t ef_construction() const {
    return graph.ef_construction;
  }

  size_t scaling_factor() const {
    return hnsw.scaling_factor;
  }

  size_t neighbor_prune_cnt() const {
    return graph.prune_neighbor_count;
  }

  node_id_t entry_point() const {
    return hnsw.entry_point;
  }

  node_id_t doc_cnt() const {
    return graph.doc_count;
  }

  uint32_t total_sparse_count() const {
    return graph.total_sparse_count;
  }

  SparseGraphHeader graph;
  HnswSparseHeader hnsw;
};

struct NeighborsHeader {
  uint32_t neighbor_cnt;
  node_id_t neighbors[0];
};

struct Neighbors {
  Neighbors() : cnt{0}, data{nullptr} {}

  Neighbors(uint32_t cnt_in, const node_id_t *data_in)
      : cnt{cnt_in}, data{data_in} {}

  Neighbors(IndexStorage::MemoryBlock &&mem_block)
      : neighbor_block{std::move(mem_block)} {
    auto hd = reinterpret_cast<const NeighborsHeader *>(neighbor_block.data());
    cnt = hd->neighbor_cnt;
    data = hd->neighbors;
  }

  size_t size(void) const {
    return cnt;
  }

  const node_id_t &operator[](size_t idx) const {
    return data[idx];
  }

  uint32_t cnt;
  const node_id_t *data;
  IndexStorage::MemoryBlock neighbor_block;
};

//! level 0 neighbors offset
struct SparseGraphNeighborMeta {
  SparseGraphNeighborMeta(size_t o, size_t cnt)
      : offset(o), neighbor_cnt(cnt) {}

  uint64_t offset : 48;
  uint64_t neighbor_cnt : 16;
};

//! hnsw upper neighbors meta
struct HnswSparseNeighborMeta {
  HnswSparseNeighborMeta(size_t o, size_t l) : offset(o), level(l) {}

  uint64_t offset : 48;  // offset = idx * upper neighors size
  uint64_t level : 16;
};

class HnswSparseEntity {
 public:
  //! Constructor
  HnswSparseEntity() {}

  //! Constructor
  HnswSparseEntity(const HNSWSparseHeader &hd) {
    header_ = hd;
  }

  //! Destructor
  virtual ~HnswSparseEntity() {}

  //! HnswSparseEntity Pointerd;
  typedef std::shared_ptr<HnswSparseEntity> Pointer;

  //! Get max neighbor size of graph level
  inline size_t neighbor_cnt(level_t level) const {
    return level == 0 ? header_.graph.l0_neighbor_count
                      : header_.hnsw.upper_neighbor_count;
  }

  //! get max neighbor size of graph level 0
  inline size_t l0_neighbor_cnt() const {
    return header_.graph.l0_neighbor_count;
  }

  //! get min neighbor size of graph
  inline size_t min_neighbor_cnt() const {
    return header_.graph.min_neighbor_count;
  }

  //! get upper neighbor size of graph level other than 0
  inline size_t upper_neighbor_cnt() const {
    return header_.hnsw.upper_neighbor_count;
  }

  //! Get current total doc of the hnsw graph
  inline node_id_t *mutable_doc_cnt() {
    return &header_.graph.doc_count;
  }

  inline node_id_t doc_cnt() const {
    return header_.graph.doc_count;
  }

  inline uint32_t *mutable_total_sparse_count() {
    return &header_.graph.total_sparse_count;
  }

  uint32_t total_sparse_count() const {
    return header_.graph.total_sparse_count;
  }

  //! Get hnsw graph scaling params
  inline size_t scaling_factor() const {
    return header_.hnsw.scaling_factor;
  }

  //! Get prune_size
  inline size_t prune_cnt() const {
    return header_.graph.prune_neighbor_count;
  }

  //! Current entity of top level graph
  inline node_id_t entry_point() const {
    return header_.hnsw.entry_point;
  }

  //! Current max graph level
  inline level_t cur_max_level() const {
    return header_.hnsw.max_level;
  }

  //! Retrieve index vector size
  size_t vector_size() const {
    return header_.graph.vector_size;
  }

  //! Retrieve node size
  size_t node_size() const {
    return header_.graph.node_size;
  }

  //! Retrieve ef constuction
  size_t ef_construction() const {
    return header_.graph.ef_construction;
  }

  //! Retrieve sparse meta size
  size_t sparse_meta_size() const {
    return header_.graph.sparse_meta_size;
  }

  //! Retrieve sparse unit size
  size_t sparse_unit_size() const {
    return header_.graph.sparse_unit_size;
  }

  void set_vector_size(size_t size) {
    header_.graph.vector_size = size;
  }

  void set_prune_cnt(size_t v) {
    header_.graph.prune_neighbor_count = v;
  }

  void set_scaling_factor(size_t val) {
    header_.hnsw.scaling_factor = val;
  }

  void set_l0_neighbor_cnt(size_t cnt) {
    header_.graph.l0_neighbor_count = cnt;
  }

  void set_min_neighbor_cnt(size_t cnt) {
    header_.graph.min_neighbor_count = cnt;
  }

  void set_upper_neighbor_cnt(size_t cnt) {
    header_.hnsw.upper_neighbor_count = cnt;
  }

  void set_ef_construction(size_t ef) {
    header_.graph.ef_construction = ef;
  }

  void set_sparse_meta_size(size_t size) {
    header_.graph.sparse_meta_size = size;
  }

  void set_sparse_unit_size(size_t size) {
    header_.graph.sparse_unit_size = size;
  }

 protected:
  inline const HNSWSparseHeader &header() const {
    return header_;
  }

  inline HNSWSparseHeader *mutable_header() {
    return &header_;
  }

  inline size_t header_size() const {
    return sizeof(header_);
  }

  void set_node_size(size_t size) {
    header_.graph.node_size = size;
  }

  //! Dump all segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segments(
      const IndexDumper::Pointer &dumper, key_t *keys,
      const std::function<level_t(node_id_t)> &get_level) const;

 private:
  //! dump mapping segment, for get_vector_by_key in provider
  int64_t dump_mapping_segment(const IndexDumper::Pointer &dumper,
                               const key_t *keys) const;

  //! dump hnsw head by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_header(const IndexDumper::Pointer &dumper,
                      const HNSWSparseHeader &hd) const;

  //! dump vectors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_sparse_vector_meta(
      const IndexDumper::Pointer &dumper,
      const std::vector<node_id_t> &reorder_mapping) const;

  //! dump sparse vectors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_sparse_vector(
      const IndexDumper::Pointer &dumper,
      const std::vector<node_id_t> &reorder_mapping) const;

  //! dump hnsw neighbors by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_neighbors(const IndexDumper::Pointer &dumper,
                         const std::function<level_t(node_id_t)> &get_level,
                         const std::vector<node_id_t> &reorder_mapping,
                         const std::vector<node_id_t> &neighbor_mapping) const {
    auto len1 = dump_graph_neighbors(dumper, reorder_mapping, neighbor_mapping);
    if (len1 < 0) {
      return len1;
    }
    auto len2 = dump_upper_neighbors(dumper, get_level, reorder_mapping,
                                     neighbor_mapping);
    if (len2 < 0) {
      return len2;
    }

    return len1 + len2;
  }

  //! dump segment by dumper
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_segment(const IndexDumper::Pointer &dumper,
                       const std::string &segment_id, const void *data,
                       size_t size) const;

  //! Dump level 0 neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_graph_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

  //! Dump upper level neighbors
  //! Return dump size if success, errno(<0) in failure
  int64_t dump_upper_neighbors(
      const IndexDumper::Pointer &dumper,
      const std::function<level_t(node_id_t)> &get_level,
      const std::vector<node_id_t> &reorder_mapping,
      const std::vector<node_id_t> &neighbor_mapping) const;

 public:
  //! Cleanup the entity
  virtual int cleanup(void) {
    header_.clear();
    return 0;
  }

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswSparseEntity::Pointer clone() const {
    LOG_ERROR("Update neighbors not implemented");
    return HnswSparseEntity::Pointer();
  }

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const = 0;

  //! Get vector feature data by key
  virtual const void *get_vector_meta(node_id_t id) const = 0;

  virtual int get_vector_meta(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const = 0;

  //! Get vectors feature data by keys
  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,
                               const void **vecs) const = 0;
  virtual int get_vector_metas(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &block_vecs) const = 0;

  //! Retrieve a sparse vector using a primary key
  virtual int get_sparse_vector_by_key(
      uint64_t /*key*/, uint32_t * /*sparse_count*/,
      std::string * /*sparse_indices_buffer*/,
      std::string * /*sparse_values_buffer*/) const {
    LOG_ERROR("get sparse vector not implemented");
    return IndexError_NotImplemented;
  }

  //! Retrieve a sparse vector using a primary key
  virtual int get_sparse_vector_by_id(
      node_id_t /*id*/, uint32_t * /*sparse_count*/,
      std::string * /*sparse_indices_buffer*/,
      std::string * /*sparse_values_buffer*/) const {
    LOG_ERROR("get sparse vector not implemented");
    return IndexError_NotImplemented;
  }

  //! Get vector sparse feature data by chunk index and offset
  virtual const void *get_sparse_data(uint64_t offset, uint32_t len) const = 0;

  //! Get sparse data from id
  virtual const void *get_sparse_data(node_id_t id) const = 0;

  virtual int get_sparse_data(uint64_t offset, uint32_t len,
                              IndexStorage::MemoryBlock &block) const = 0;

  virtual int get_sparse_data(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const = 0;

  //! Get sparse data from vector
  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(
      const void *vec) const = 0;
  virtual int get_sparse_data_from_vector(const void *vec,
                                          IndexStorage::MemoryBlock &block,
                                          int &sparse_length) const = 0;

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level, node_id_t id) const = 0;

  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t /*level*/, key_t /*key*/,
                         const std::string & /*vec*/, uint32_t /*sparse_count*/,
                         node_id_t * /*id*/) {
    return IndexError_NotImplemented;
  }

  virtual int add_vector(level_t /*level*/, key_t /*key*/,
                         const uint32_t /*sparse_count*/,
                         const uint32_t * /*sparse_indices*/,
                         const void * /*sparse_vec*/, node_id_t * /*id*/) {
    return IndexError_NotImplemented;
  }

  //! Add vector and id
  virtual int add_vector_with_id(level_t /*level*/, node_id_t /*id*/,
                                 const std::string & /*vec*/,
                                 uint32_t /*sparse_count*/) {
    return IndexError_NotImplemented;
  }

  virtual int update_neighbors(
      level_t /*level*/, node_id_t /*id*/,
      const std::vector<std::pair<node_id_t, dist_t>> & /*neighbors*/) {
    LOG_ERROR("Update neighbors dense not implemented");

    return 0;
  }

  //! Append neighbor_id to node id neighbors on level, size is the current
  //! neighbors size. Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t /*level*/, node_id_t /*id*/,
                            uint32_t /*size*/, node_id_t /*neighbor_id*/) {
    LOG_ERROR("Add neighbor not implemented");
  }

  //! Update entry point and max level
  virtual void update_ep_and_level(node_id_t ep, level_t level) {
    header_.hnsw.entry_point = ep;
    header_.hnsw.max_level = level;
  }

  virtual int load(const IndexStorage::Pointer & /*container*/,
                   bool /*check_crc*/) {
    LOG_ERROR("Load not implemented");
    return IndexError_NotImplemented;
  }

  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {
    LOG_ERROR("Dump not implemented");
    return IndexError_NotImplemented;
  }

  static int CalcAndAddPadding(const IndexDumper::Pointer &dumper,
                               size_t data_size, size_t *padding_size);

 protected:
  static inline size_t AlignSize(size_t size) {
    return (size + 0x1F) & (~0x1F);
  }

  static inline size_t AlignPageSize(size_t size) {
    size_t page_mask = ailego::MemoryHelper::PageSize() - 1;
    return (size + page_mask) & (~page_mask);
  }

  //! rearrange vectors to improve cache locality
  void reshuffle_vectors(const std::function<level_t(node_id_t)> &get_level,
                         std::vector<node_id_t> *n2o_mapping,
                         std::vector<node_id_t> *o2n_mapping,
                         key_t *keys) const;

 public:
  const static std::string kSparseGraphHeaderSegmentId;
  const static std::string kSparseGraphFeaturesSegmentId;
  const static std::string kSparseGraphKeysSegmentId;
  const static std::string kSparseGraphNeighborsSegmentId;
  const static std::string kSparseGraphOffsetsSegmentId;
  const static std::string kSparseGraphMappingSegmentId;
  const static std::string kSparseHnswHeaderSegmentId;
  const static std::string kSparseHnswNeighborsSegmentId;
  const static std::string kSparseHnswOffsetsSegmentId;
  const static std::string kSparseGraphVectorsSegmentId;
  const static std::string kSparseGraphVectorMetaSegmentId;

  constexpr static uint32_t kRevision = 0U;
  constexpr static size_t kMaxGraphLayers = 15;
  constexpr static uint32_t kDefaultEfConstruction = 500;
  constexpr static uint32_t kDefaultEf = 500;
  constexpr static uint32_t kDefaultUpperMaxNeighborCnt = 50;  // M of HNSW
  constexpr static uint32_t kDefaultL0MaxNeighborCnt = 100;
  constexpr static uint32_t kMaxNeighborCnt = 65535;
  constexpr static float kDefaultScanRatio = 0.1f;
  constexpr static uint32_t kDefaultMinScanLimit = 10000;
  constexpr static uint32_t kDefaultMaxScanLimit =
      std::numeric_limits<uint32_t>::max();
  constexpr static float kDefaultBFNegativeProbability = 0.001f;
  constexpr static uint32_t kDefaultScalingFactor = 50U;
  constexpr static uint32_t kDefaultBruteForceThreshold = 1000U;
  constexpr static uint32_t kDefaultDocsHardLimit = 1 << 30U;  // 1 billion
  constexpr static float kDefaultDocsSoftLimitRatio = 0.9f;
  constexpr static size_t kMaxChunkSize = 0xFFFFFFFF;
  constexpr static size_t kDefaultChunkSize = 2UL * 1024UL * 1024UL;
  constexpr static size_t kDefaultMaxChunkCnt = 50000UL;
  constexpr static float kDefaultNeighborPruneMultiplier =
      1.0f;  // prune_cnt = upper_max_neighbor_cnt * multiplier
  constexpr static float kDefaultL0MaxNeighborCntMultiplier =
      2.0f;  // l0_max_neighbor_cnt = upper_max_neighbor_cnt * multiplier

  constexpr static uint32_t kSparseMetaSize = 2u * sizeof(uint64_t);
  constexpr static float kDefaultSparseNeighborRatio = 0.5f;
  constexpr static uint32_t kSparseMaxDimSize = 16384;
  constexpr static float kDefaultQueryFilteringRatio = 0.0f;  // turn off

 protected:
  HNSWSparseHeader header_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_index_hash.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_sparse_chunk.h"

namespace zvec {
namespace core {

//! Persistent hashmap implement through open addressing algorithm
template <class Key, class Val, Val EmptyVal = 0U,
          typename =
              typename std::enable_if<std::is_integral<Key>::value>::type>
class HnswSparseIndexHashMap {
  using key_type = Key;
  using val_type = Val;

  struct Iterator {
    key_type first;
    val_type second;
  };
  typedef Iterator *iterator;
  typedef Iterator Item;
  typedef const Iterator *const_iterator;

  class Slot {
   public:
    Slot(SparseChunk::Pointer &&chunk, const void *data)
        : chunk_(std::move(chunk)),
          items_(reinterpret_cast<const Item *>(data)) {}
    //! Return a empty loc or the key item loc

    Slot(SparseChunk::Pointer &&chunk, IndexStorage::MemoryBlock &&mem_block)
        : chunk_(std::move(chunk)), items_block_(std::move(mem_block)) {
      items_ = reinterpret_cast<const Item *>(items_block_.data());
    }
    const_iterator find(key_type key, uint32_t max_items, uint32_t mask) const {
      auto it = &items_[key & mask];
      for (auto i = 0U; i < max_items; ++i) {
        if (it->first == key || it->second == EmptyVal) {
          // LOG_DEBUG("i=%u", i);
          return it;
        }
        ++it;
        if (it == &items_[max_items]) {
          it = &items_[0];
        }
      }
      return nullptr;
    }

    bool update(const_iterator it) {
      uint32_t offset = reinterpret_cast<const uint8_t *>(it) -
                        reinterpret_cast<const uint8_t *>(&items_[0]);
      if (ailego_unlikely(chunk_->write(offset, it, sizeof(Item)) !=
                          sizeof(Item))) {
        LOG_ERROR("Chunk write failed");
        return false;
      }
      return true;
    }

   private:
    SparseChunk::Pointer chunk_{};
    const Item *items_{nullptr};  // point to chunk data
    IndexStorage::MemoryBlock items_block_{};
  };

 public:
  //! Init the hash
  //! broker      the index allocator
  //! chunk_size  the size of per chunk allocated, actual size may greater
  //! factor      factor = 1/ratio, ratio is the probability of a squence
  //! number inserted to this container
  //! max         the max number key can be inserted
  //! expansion_ratio   memory expansion ratio
  int init(SparseChunkBroker::Pointer &broker, uint32_t chunk_size,
           uint32_t factor, size_t max, float expansion_ratio) {
    ailego_assert_with(expansion_ratio > 1.0f, "ratio must > 1.0f");
    broker_ = broker;

    size_t items = std::ceil(chunk_size * 1.0f / sizeof(Item));
    slot_items_ = 1UL << static_cast<size_t>((std::ceil(std::log2(items))));
    size_t range = slot_items_ * factor / expansion_ratio;
    mask_bits_ = std::floor(std::log2(range));
    range = 1UL << mask_bits_;
    size_t max_slots = std::ceil(max * 1.0f / range);
    slots_.reserve(max_slots);
    slot_loc_mask_ = slot_items_ - 1U;

    int ret = load();
    if (ret != 0) {
      return ret;
    }

    LOG_DEBUG(
        "HnswIndexHash init, chunkSize=%u factor=%u max=%zu "
        "ratio=%f slotItems=%u maxSlots=%zu maskBits=%u "
        "range=%zu",
        chunk_size, factor, max, expansion_ratio, slot_items_, max_slots,
        mask_bits_, range);

    return 0;
  }

  int cleanup(void) {
    broker_.reset();
    slots_.clear();
    slots_.shrink_to_fit();
    mask_bits_ = 0U;
    slot_items_ = 0U;
    slot_loc_mask_ = 0U;

    return 0;
  }

  const_iterator end(void) const {
    return nullptr;
  }

  const_iterator find(const key_type key) const {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      return end();
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    return it && it->second != EmptyVal ? it : nullptr;
  }

  bool insert(key_type key, val_type val) {
    auto idx = key >> mask_bits_;
    if (idx >= slots_.size()) {
      if (ailego_unlikely(idx >= slots_.capacity())) {
        LOG_ERROR("no space to insert");
        return false;
      }
      for (auto i = slots_.size(); i <= idx; ++i) {
        if (ailego_unlikely(!alloc_slot(i))) {
          return false;
        }
      }
    }
    auto it = slots_[idx].find(key, slot_items_, slot_loc_mask_);
    if (ailego_unlikely(it == nullptr)) {
      LOG_ERROR("no space to insert");
      return false;
    }

    //! TODO: write memory is ok?
    const_cast<iterator>(it)->first = key;
    const_cast<iterator>(it)->second = val;

    return slots_[idx].update(it);
  }

 private:
  bool alloc_slot(size_t idx) {
    ailego_assert_with(idx == slots_.size(), "invalid idx");

    size_t size = slot_items_ * sizeof(Item);
    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX,
                                  idx, size);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return false;
    }
    SparseChunk::Pointer chunk = p.second;
    if (ailego_unlikely(chunk->resize(size) != size)) {
      LOG_ERROR("Chunk resize failed, size=%zu", size);
      return false;
    }
    //! Read the whole data to memory
    IndexStorage::MemoryBlock data_block;
    if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
      LOG_ERROR("Chunk read failed, size=%zu", size);
      return false;
    }

    slots_.emplace_back(std::move(chunk), std::move(data_block));
    return true;
  }

  int load(void) {
    size_t slots_cnt =
        broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX);
    for (size_t i = 0UL; i < slots_cnt; ++i) {
      auto chunk =
          broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_NEIGHBOR_INDEX, i);
      if (!chunk) {
        LOG_ERROR("Get chunk failed, seq=%zu", i);
        return IndexError_InvalidFormat;
      }
      size_t size = sizeof(Item) * slot_items_;
      if (chunk->data_size() < size) {
        LOG_ERROR(
            "Hash params may be mismatch, seq=%zu, data_size=%zu "
            "expect=%zu",
            i, chunk->data_size(), size);
        return IndexError_InvalidFormat;
      }
      //! Read the whole data to memory
      IndexStorage::MemoryBlock data_block;
      if (ailego_unlikely(chunk->read(0U, data_block, size) != size)) {
        LOG_ERROR("Chunk read failed, size=%zu", size);
        return false;
      }
      slots_.emplace_back(std::move(chunk), std::move(data_block));
    }
    return 0;
  }

 private:
  SparseChunkBroker::Pointer broker_{};  // chunk broker
  std::vector<Slot> slots_{};
  uint32_t mask_bits_{0U};
  uint32_t slot_items_{};  // must be a power of 2
  uint32_t slot_loc_mask_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_index_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

class HnswSparseIndexProvider : public IndexSparseProvider {
 public:
  HnswSparseIndexProvider(const IndexMeta &meta,
                          const HnswSparseEntity::Pointer &entity,
                          const std::string &owner)
      : meta_(meta), entity_(entity), owner_class_(owner) {}

  HnswSparseIndexProvider(const HnswSparseIndexProvider &) = delete;
  HnswSparseIndexProvider &operator=(const HnswSparseIndexProvider &) = delete;

 public:
  //! Create a new iterator
  IndexSparseProvider::Iterator::Pointer create_iterator(void) override {
    return IndexSparseProvider::Iterator::Pointer(new (std::nothrow)
                                                      Iterator(entity_));
  }

  //! Retrieve count of vectors
  size_t count(void) const override {
    return entity_->doc_cnt();
  }

  size_t total_sparse_count(void) const override {
    return entity_->total_sparse_count();
  }

  //! Retrieve type of vector
  IndexMeta::DataType data_type(void) const override {
    return meta_.data_type();
  }

  //! Retrieve a vector using a primary key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override {
    return entity_->get_sparse_vector_by_key(
        key, sparse_count, sparse_indices_buffer, sparse_values_buffer);
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

 private:
  class Iterator : public IndexSparseProvider::Iterator {
   public:
    Iterator(const HnswSparseEntity::Pointer &entity)
        : entity_(entity), cur_id_(0U), valid_(false) {
      const void *sparse_data = entity_->get_sparse_data(cur_id_);
      if (sparse_data != nullptr) {
        valid_ = true;

        sparse_indices_buffer_.clear();
        sparse_data_buffer_.clear();

        SparseUtility::ReverseSparseFormat(
            sparse_data, &sparse_count_, &sparse_indices_buffer_,
            &sparse_data_buffer_, entity_->sparse_unit_size());
      }
    }

    //! Retrieve sparse count
    virtual uint32_t sparse_count() const override {
      return sparse_count_;
    }

    //! Retrieve sparse indices
    virtual const uint32_t *sparse_indices() const override {
      return reinterpret_cast<const uint32_t *>(sparse_indices_buffer_.data());
    }

    //! Retrieve sparse data
    virtual const void *sparse_data() const override {
      return reinterpret_cast<const void *>(sparse_data_buffer_.data());
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return cur_id_ < entity_->doc_cnt() && valid_;
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return entity_->get_key(cur_id_);
    }

    //! Next iterator
    virtual void next(void) override {
      cur_id_ = get_next_valid_id(cur_id_ + 1);

      if (cur_id_ < entity_->doc_cnt()) {
        const void *sparse_data = entity_->get_sparse_data(cur_id_);
        if (sparse_data != nullptr) {
          valid_ = true;

          sparse_indices_buffer_.clear();
          sparse_data_buffer_.clear();

          SparseUtility::ReverseSparseFormat(
              sparse_data, &sparse_count_, &sparse_indices_buffer_,
              &sparse_data_buffer_, entity_->sparse_unit_size());
        } else {
          valid_ = false;
        }
      }
    }

    //! Reset the iterator
    void reset(void) {
      cur_id_ = get_next_valid_id(0);
      const void *sparse_data = entity_->get_sparse_data(cur_id_);
      if (sparse_data != nullptr) {
        valid_ = true;

        SparseUtility::ReverseSparseFormat(
            sparse_data, &sparse_count_, &sparse_indices_buffer_,
            &sparse_data_buffer_, entity_->sparse_unit_size());
      }
    }

   private:
    node_id_t get_next_valid_id(node_id_t start_id) {
      for (node_id_t i = start_id; i < entity_->doc_cnt(); i++) {
        if (entity_->get_key(i) != kInvalidNodeId) {
          return i;
        }
      }
      return kInvalidNodeId;
    }

   private:
    const HnswSparseEntity::Pointer entity_;
    node_id_t cur_id_;
    uint32_t sparse_count_{0};
    std::string sparse_indices_buffer_;
    std::string sparse_data_buffer_;
    bool valid_{false};
  };

 private:
  const IndexMeta &meta_;
  const HnswSparseEntity::Pointer entity_;
  const std::string owner_class_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

static const std::string PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT(
    "proxima.hnsw.sparse_builder.thread_count");
static const std::string PARAM_HNSW_SPARSE_BUILDER_MEMORY_QUOTA(
    "proxima.hnsw.sparse_builder.memory_quota");
static const std::string PARAM_HNSW_SPARSE_BUILDER_EFCONSTRUCTION(
    "proxima.hnsw.sparse_builder.efconstruction");
static const std::string PARAM_HNSW_SPARSE_BUILDER_SCALING_FACTOR(
    "proxima.hnsw.sparse_builder.scaling_factor");
static const std::string PARAM_HNSW_SPARSE_BUILDER_CHECK_INTERVAL_SECS(
    "proxima.hnsw.sparse_builder.check_interval_secs");
static const std::string PARAM_HNSW_SPARSE_BUILDER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw.sparse_builder.neighbor_prune_multiplier");
static const std::string PARAM_HNSW_SPARSE_BUILDER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw.sparse_builder.min_neighbor_count");
static const std::string PARAM_HNSW_SPARSE_BUILDER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw.sparse_builder.max_neighbor_count");
static const std::string
    PARAM_HNSW_SPARSE_BUILDER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
        "proxima.hnsw.sparse_builder.l0_max_neighbor_count_multiplier");

static const std::string PARAM_HNSW_SPARSE_SEARCHER_EF(
    "proxima.hnsw.sparse_searcher.ef");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw.sparse_searcher.brute_force_threshold");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE(
    "proxima.hnsw.sparse_searcher.neighbors_in_memory_enable");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO(
    "proxima.hnsw.sparse_searcher.max_scan_ratio");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_CHECK_CRC_ENABLE(
    "proxima.hnsw.sparse_searcher.check_crc_enable");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw.sparse_searcher.visit_bloomfilter_enable");
static const std::string
    PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
        "proxima.hnsw.sparse_searcher.visit_bloomfilter_negative_prob");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw.sparse_searcher.force_padding_result_enable");
static const std::string PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO(
    "proxima.hnsw.sparse_searcher.query_filtering_ratio");

static const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO(
    "proxima.hnsw.sparse_streamer.max_scan_ratio");
static const std::string PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT(
    "proxima.hnsw.sparse_streamer.min_scan_limit");
static const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT(
    "proxima.hnsw.sparse_streamer.max_scan_limit");
static const std::string PARAM_HNSW_SPARSE_STREAMER_EF(
    "proxima.hnsw.sparse_streamer.ef");
static const std::string PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION(
    "proxima.hnsw.sparse_streamer.efconstruction");
static const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT(
    "proxima.hnsw.sparse_streamer.max_neighbor_count");
static const std::string
    PARAM_HNSW_SPARSE_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER(
        "proxima.hnsw.sparse_streamer.l0_max_neighbor_count_multiplier");
static const std::string PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR(
    "proxima.hnsw.sparse_streamer.scaling_factor");
static const std::string PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD(
    "proxima.hnsw.sparse_streamer.brute_force_threshold");
static const std::string PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT(
    "proxima.hnsw.sparse_streamer.docs_hard_limit");
static const std::string PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT(
    "proxima.hnsw.sparse_streamer.docs_soft_limit");
static const std::string PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE(
    "proxima.hnsw.sparse_streamer.max_index_size");
static const std::string PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE(
    "proxima.hnsw.sparse_streamer.visit_bloomfilter_enable");
static const std::string
    PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB(
        "proxima.hnsw.sparse_streamer.visit_bloomfilter_negative_prob");
static const std::string PARAM_HNSW_SPARSE_STREAMER_CHECK_CRC_ENABLE(
    "proxima.hnsw.sparse_streamer.check_crc_enable");
static const std::string PARAM_HNSW_SPARSE_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER(
    "proxima.hnsw.sparse_streamer.neighbor_prune_multiplier");
static const std::string PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE(
    "proxima.hnsw.sparse_streamer.chunk_size");
static const std::string PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY(
    "proxima.hnsw.sparse_streamer.filter_same_key");
static const std::string PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE(
    "proxima.hnsw.sparse_streamer.get_vector_enable");
static const std::string PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT(
    "proxima.hnsw.sparse_streamer.min_neighbor_count");
static const std::string PARAM_HNSW_SPARSE_STREAMER_FORCE_PADDING_RESULT_ENABLE(
    "proxima.hnsw.sparse_streamer.force_padding_result_enable");
static const std::string PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO(
    "proxima.hnsw.sparse_streamer.query_filtering_ratio");

static const std::string PARAM_HNSW_SPARSE_REDUCER_WORKING_PATH(
    "proxima.hnsw.sparse_reducer.working_path");
static const std::string PARAM_HNSW_SPARSE_REDUCER_NUM_OF_ADD_THREADS(
    "proxima.hnsw.sparse_reducer.num_of_add_threads");
static const std::string PARAM_HNSW_SPARSE_REDUCER_INDEX_NAME(
    "proxima.hnsw.sparse_reducer.index_name");
static const std::string PARAM_HNSW_SPARSE_REDUCER_EFCONSTRUCTION(
    "proxima.hnsw.sparse_reducer.efconstruction");

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_searcher.h"
#include "hnsw_sparse_algorithm.h"
#include "hnsw_sparse_index_provider.h"
#include "hnsw_sparse_params.h"

namespace zvec {
namespace core {

HnswSparseSearcher::HnswSparseSearcher() {}

HnswSparseSearcher::~HnswSparseSearcher() {}

int HnswSparseSearcher::init(const ailego::Params &search_params) {
  params_ = search_params;
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_EF, &ef_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_ENABLE,
              &bf_enabled_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_CHECK_CRC_ENABLE, &check_crc_enabled_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_NEIGHBORS_IN_MEMORY_ENABLE,
              &neighbors_in_memory_enabled_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
              &bf_negative_probability_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_BRUTE_FORCE_THRESHOLD,
              &bruteforce_threshold_);
  params_.get(PARAM_HNSW_SPARSE_SEARCHER_FORCE_PADDING_RESULT_ENABLE,
              &force_padding_topk_enabled_);

  query_filtering_enabled_ =
      params_.get(PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO,
                  &query_filtering_ratio_);

  if (ef_ == 0) {
    ef_ = HnswSparseEntity::kDefaultEf;
  }
  if (bf_negative_probability_ <= 0.0f || bf_negative_probability_ >= 1.0f) {
    LOG_ERROR(
        "[%s] must be in range (0,1)",
        PARAM_HNSW_SPARSE_SEARCHER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  if (query_filtering_enabled_ &&
      (query_filtering_ratio_ <= 0.0f || query_filtering_ratio_ >= 1.0f)) {
    LOG_ERROR("[%s] must be in range (0, 1)",
              PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO.c_str());
    return IndexError_InvalidArgument;
  }

  entity_.set_neighbors_in_memory(neighbors_in_memory_enabled_);

  state_ = STATE_INITED;

  LOG_DEBUG(
      "Init params: ef=%u maxScanRatio=%f bfEnabled=%u checkCrcEnabled=%u "
      "neighborsInMemoryEnabled=%u bfNagtiveProb=%f bruteForceThreshold=%u "
      "forcePadding=%u filteringRatio=%f",
      ef_, max_scan_ratio_, bf_enabled_, check_crc_enabled_,
      neighbors_in_memory_enabled_, bf_negative_probability_,
      bruteforce_threshold_, force_padding_topk_enabled_,
      query_filtering_ratio_);

  return 0;
}

void HnswSparseSearcher::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }
}

int HnswSparseSearcher::cleanup() {
  LOG_INFO("Begin HnswSparseSearcher:cleanup");

  metric_.reset();
  meta_.clear();
  stats_.clear_attributes();
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  max_scan_ratio_ = HnswSparseEntity::kDefaultScanRatio;
  max_scan_num_ = 0U;
  ef_ = HnswSparseEntity::kDefaultEf;
  bf_enabled_ = false;
  bf_negative_probability_ = HnswSparseEntity::kDefaultBFNegativeProbability;
  bruteforce_threshold_ = HnswSparseEntity::kDefaultBruteForceThreshold;
  check_crc_enabled_ = false;
  neighbors_in_memory_enabled_ = false;
  entity_.cleanup();
  state_ = STATE_INIT;

  LOG_INFO("End HnswSparseSearcher:cleanup");

  return 0;
}

int HnswSparseSearcher::load(IndexStorage::Pointer container,
                             IndexMetric::Pointer metric) {
  if (state_ != STATE_INITED) {
    LOG_ERROR("Init the searcher first before load index");
    return IndexError_Runtime;
  }

  LOG_INFO("Begin HnswSparseSearcher:load");

  auto start_time = ailego::Monotime::MilliSeconds();

  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  ret = entity_.load(container, check_crc_enabled_);
  if (ret != 0) {
    LOG_ERROR("HnswSparseSearcher load index failed");
    return ret;
  }

  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));

  if (metric) {
    metric_ = metric;
  } else {
    metric_ = IndexFactory::CreateMetric(meta_.metric_name());
    if (!metric_) {
      LOG_ERROR("CreateMeasure failed, name: %s", meta_.metric_name().c_str());
      return IndexError_NoExist;
    }
    ret = metric_->init(meta_, meta_.metric_params());
    if (ret != 0) {
      LOG_ERROR("IndexMetric init failed, ret=%d", ret);
      return ret;
    }
    if (metric_->query_metric()) {
      metric_ = metric_->query_metric();
    }
  }

  // if (!metric_->is_matched(meta_)) {
  //   LOG_ERROR("IndexMeasure not match index meta");
  //   return IndexError_Mismatch;
  // }

  max_scan_num_ = static_cast<uint32_t>(max_scan_ratio_ * entity_.doc_cnt());
  max_scan_num_ = std::max(4096U, max_scan_num_);

  stats_.set_loaded_count(entity_.doc_cnt());
  stats_.set_loaded_costtime(ailego::Monotime::MilliSeconds() - start_time);
  state_ = STATE_LOADED;
  magic_ = IndexContext::GenerateMagic();

  LOG_INFO("End HnswSparseSearcher::load");

  return 0;
}

int HnswSparseSearcher::unload() {
  LOG_INFO("HnswSparseSearcher unload index");

  meta_.clear();
  entity_.cleanup();
  metric_.reset();
  max_scan_num_ = 0;
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  state_ = STATE_INITED;

  return 0;
}

int HnswSparseSearcher::update_context(HnswSparseContext *ctx) const {
  const HnswSparseEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_num(max_scan_num_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  return ctx->update_context(HnswSparseContext::kSparseSearcherContext, meta_,
                             metric_, entity, magic_);
}

//! Similarity search with sparse inputs
int HnswSparseSearcher::search_impl(const uint32_t *sparse_count,
                                    const uint32_t *sparse_indices,
                                    const void *sparse_query,
                                    const IndexQueryMeta &qmeta, uint32_t count,
                                    Context::Pointer &context) const {
  if (ailego_unlikely(!context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,
                          count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  for (size_t q = 0; q < count; ++q) {
    std::string sparse_query_buffer;
    std::string sparse_query_filtered_buffer;

    SparseUtility::TransSparseFormat(
        sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
        entity_.sparse_unit_size(), sparse_query_buffer);

    if (query_filtering_enabled_) {
      if (!SparseUtility::FilterSparseQuery(
              sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
              qmeta.data_type(), entity_.sparse_unit_size(),
              query_filtering_ratio_, &sparse_query_filtered_buffer)) {
        LOG_ERROR("Hnsw filtering failed");
        return IndexError_Runtime;
      }

      ctx->reset_query(sparse_query_filtered_buffer.data());
    } else {
      ctx->reset_query(sparse_query_buffer.data());
    }

    int ret = alg_->search(ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }

    if (query_filtering_enabled_) {
      ctx->reset_query(sparse_query_buffer.data());
      ctx->recal_topk_dist();
    }

    ctx->topk_to_result(q);

    sparse_indices_tmp += sparse_count[q];
    sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                       sparse_count[q] * qmeta.unit_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

//! Similarity search with sparse inputs
int HnswSparseSearcher::search_bf_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  if (ailego_unlikely(!context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }
  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_Runtime;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());

      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().dist(id);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());

      ctx->topk_heap().clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().dist(id);
          ctx->topk_heap().emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

//! Similarity search with sparse inputs
int HnswSparseSearcher::search_bf_by_p_keys_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  if (ailego_unlikely(!context)) {
    LOG_ERROR("The context is not created by this searcher");
    return IndexError_Mismatch;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    int ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->resize_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_Runtime;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().dist(id);

            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  } else {
    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      ctx->topk_heap().clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().dist(id);
            ctx->topk_heap().emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

IndexSearcher::Context::Pointer HnswSparseSearcher::create_context() const {
  if (ailego_unlikely(state_ != STATE_LOADED)) {
    LOG_ERROR("Load the index first before create context");
    return Context::Pointer();
  }
  const HnswSparseEntity::Pointer search_ctx_entity = entity_.clone();
  if (!search_ctx_entity) {
    LOG_ERROR("Failed to create search context entity");
    return Context::Pointer();
  }
  HnswSparseContext *ctx =
      new (std::nothrow) HnswSparseContext(metric_, search_ctx_entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswSparseContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_num(max_scan_num_);
  uint32_t filter_mode =
      bf_enabled_ ? VisitFilter::BloomFilter : VisitFilter::ByteMap;
  ctx->set_filter_mode(filter_mode);
  ctx->set_filter_negative_probability(bf_negative_probability_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  if (ailego_unlikely(ctx->init(HnswSparseContext::kSparseSearcherContext)) !=
      0) {
    LOG_ERROR("Init HnswSparseContext failed");
    delete ctx;
    return Context::Pointer();
  }

  return Context::Pointer(ctx);
}

IndexSearcher::SparseProvider::Pointer
HnswSparseSearcher::create_sparse_provider(void) const {
  LOG_DEBUG("HnswSparseSearcher create sparse provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswSparseEntity failed");
    return SparseProvider::Pointer();
  }
  return SparseProvider::Pointer(new (std::nothrow) HnswSparseIndexProvider(
      meta_, entity, "HnswSparseSearcher"));
}

int HnswSparseSearcher::get_sparse_vector(
    uint64_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  return entity_.get_sparse_vector_by_key(
      key, sparse_count, sparse_indices_buffer, sparse_values_buffer);
}

INDEX_FACTORY_REGISTER_SEARCHER(HnswSparseSearcher);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_framework.h>
#include "hnsw_sparse_searcher_entity.h"
#include "hnsw_sparse_streamer.h"

namespace zvec {
namespace core {

class HnswSparseSearcher : public IndexSearcher {
 public:
  using ContextPointer = IndexSearcher::Context::Pointer;

 public:
  HnswSparseSearcher(void);
  virtual ~HnswSparseSearcher(void);

  HnswSparseSearcher(const HnswSparseSearcher &) = delete;
  HnswSparseSearcher &operator=(const HnswSparseSearcher &) = delete;

 protected:
  //! Initialize Searcher
  int init(const ailego::Params &params) override;

  //! Cleanup Searcher
  int cleanup(void) override;

  //! Load Index from storage
  int load(IndexStorage::Pointer container,
           IndexMetric::Pointer measure) override;

  //! Unload index from storage
  int unload(void) override;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const override {
    return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                       context);
  }

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const override;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const override {
    return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                          context);
  }

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t *sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta, uint32_t count,
                     Context::Pointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               ContextPointer &context) const override {
    return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,
                                    p_keys, qmeta, 1, context);
  }

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               ContextPointer &context) const override;

  //! Fetch sparser vector by key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override;

  //! Create a searcher context
  ContextPointer create_context() const override;

  //! Create a new iterator
  IndexSearcher::SparseProvider::Pointer create_sparse_provider(
      void) const override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  const ailego::Params &params(void) const override {
    return params_;
  }

  void print_debug_info() override;

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswSparseContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  HnswSparseSearcherEntity entity_{};
  HnswSparseAlgorithm::UPointer alg_;  // impl graph algorithm

  IndexMetric::Pointer metric_{};
  IndexMeta meta_{};
  ailego::Params params_{};
  Stats stats_;
  uint32_t ef_{HnswSparseEntity::kDefaultEf};
  uint32_t max_scan_num_{0U};
  uint32_t bruteforce_threshold_{HnswSparseEntity::kDefaultBruteForceThreshold};
  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool neighbors_in_memory_enabled_{false};
  bool force_padding_topk_enabled_{false};
  float bf_negative_probability_{
      HnswSparseEntity::kDefaultBFNegativeProbability};

  bool query_filtering_enabled_{false};
  float query_filtering_ratio_{HnswSparseEntity::kDefaultQueryFilteringRatio};

  uint32_t magic_{0U};

  State state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_searcher_entity.h"
#include <zvec/ailego/hash/crc32c.h>
#include "utility/sparse_utility.h"

namespace zvec {
namespace core {

HnswSparseSearcherEntity::HnswSparseSearcherEntity() {}

int HnswSparseSearcherEntity::cleanup(void) {
  container_.reset();
  sparse_vector_meta_.reset();
  keys_.reset();
  neighbors_.reset();
  neighbors_meta_.reset();
  sparse_vectors_.reset();
  neighbors_in_memory_enabled_ = false;
  loaded_ = false;

  this->HnswSparseEntity::cleanup();

  return 0;
}

key_t HnswSparseSearcherEntity::get_key(node_id_t id) const {
  const void *key;
  if (ailego_unlikely(keys_->read(id * sizeof(key_t), &key, sizeof(key_t)) !=
                      sizeof(key_t))) {
    LOG_ERROR("Read key from segment failed");
    return kInvalidKey;
  }
  return *(reinterpret_cast<const key_t *>(key));
}

//! Get vector local id by key
node_id_t HnswSparseSearcherEntity::get_id(key_t key) const {
  if (ailego_unlikely(!mapping_)) {
    LOG_ERROR("Index missing mapping segment");
    return kInvalidNodeId;
  }

  //! Do binary search
  node_id_t start = 0UL;
  node_id_t end = doc_cnt();
  const void *data;
  node_id_t idx = 0u;
  while (start < end) {
    idx = start + (end - start) / 2;
    if (ailego_unlikely(
            mapping_->read(idx * sizeof(node_id_t), &data, sizeof(node_id_t)) !=
            sizeof(node_id_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    const key_t *mkey;
    node_id_t local_id = *reinterpret_cast<const node_id_t *>(data);
    if (ailego_unlikely(keys_->read(local_id * sizeof(key_t),
                                    (const void **)(&mkey),
                                    sizeof(key_t)) != sizeof(key_t))) {
      LOG_ERROR("Read key from segment failed");
      return kInvalidNodeId;
    }
    if (*mkey < key) {
      start = idx + 1;
    } else if (*mkey > key) {
      end = idx;
    } else {
      return local_id;
    }
  }
  return kInvalidNodeId;
}

int HnswSparseSearcherEntity::get_sparse_vector_by_key(
    key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  *sparse_count = 0;

  auto id = get_id(key);
  if (id == kInvalidNodeId) {
    return IndexError_NoExist;
  }

  const void *sparse_data = get_sparse_data(id);
  if (sparse_data == nullptr) {
    return IndexError_InvalidValue;
  }

  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,
                                     sparse_indices_buffer,
                                     sparse_values_buffer, sparse_unit_size());

  return 0;
}

const void *HnswSparseSearcherEntity::get_vector_meta(node_id_t id) const {
  size_t read_size = sparse_meta_size();
  size_t offset = sparse_meta_size() * id;

  const void *vec;
  if (ailego_unlikely(sparse_vector_meta_->read(offset, &vec, read_size) !=
                      read_size)) {
    LOG_ERROR("Read vector from segment failed");
    return nullptr;
  }
  return vec;
}

int HnswSparseSearcherEntity::get_vector_meta(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_vector_meta(id);
  block.reset((void *)vec);
  return 0;
}

int HnswSparseSearcherEntity::get_vector_metas(const node_id_t *ids,
                                               uint32_t count,
                                               const void **vecs) const {
  ailego_assert_with(count <= segment_datas_.size(), "invalid count");

  size_t read_size = sparse_meta_size();

  for (uint32_t i = 0; i < count; ++i) {
    segment_datas_[i].offset = sparse_meta_size() * ids[i];
    segment_datas_[i].length = read_size;

    ailego_assert_with(
        segment_datas_[i].offset < sparse_vector_meta_->data_size(),
        "invalid offset");
  }
  if (ailego_unlikely(!sparse_vector_meta_->read(&segment_datas_[0], count))) {
    LOG_ERROR("Read vectors from segment failed");
    return IndexError_ReadData;
  }
  for (uint32_t i = 0; i < count; ++i) {
    vecs[i] = segment_datas_[i].data;
  }

  return 0;
}

int HnswSparseSearcherEntity::get_vector_metas(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {
  const void *vecs[count];
  get_vector_metas(ids, count, vecs);
  for (uint32_t i = 0; i < count; ++i) {
    block_vecs.emplace_back(IndexStorage::MemoryBlock((void *)vecs[i]));
  }
  return 0;
}

const Neighbors HnswSparseSearcherEntity::get_neighbors(level_t level,
                                                        node_id_t id) const {
  if (level == 0) {
    if (neighbors_in_memory_enabled_) {
      auto hd = reinterpret_cast<const NeighborsHeader *>(
          fixed_neighbors_.get() + neighbors_size() * id);
      return {hd->neighbor_cnt, hd->neighbors};
    }

    const SparseGraphNeighborMeta *m;
    if (ailego_unlikely(
            neighbors_meta_->read(id * sizeof(SparseGraphNeighborMeta),
                                  (const void **)(&m),
                                  sizeof(SparseGraphNeighborMeta)) !=
            sizeof(SparseGraphNeighborMeta))) {
      LOG_ERROR("Read neighbors meta from segment failed");
      return {0, nullptr};
    }

    const void *data;
    if (ailego_unlikely(neighbors_->read(m->offset, &data,
                                         m->neighbor_cnt * sizeof(node_id_t)) !=
                        m->neighbor_cnt * sizeof(node_id_t))) {
      LOG_ERROR("Read neighbors from segment failed");
      return {0, nullptr};
    }
    return {static_cast<uint32_t>(m->neighbor_cnt),
            reinterpret_cast<const node_id_t *>(data)};
  }

  //! Read level > 0 neighbors
  const HnswSparseNeighborMeta *m;
  if (ailego_unlikely(
          upper_neighbors_meta_->read(id * sizeof(HnswSparseNeighborMeta),
                                      (const void **)(&m),
                                      sizeof(HnswSparseNeighborMeta)) !=
          sizeof(HnswSparseNeighborMeta))) {
    LOG_ERROR("Read neighbors meta from segment failed");
    return {0, nullptr};
  }

  ailego_assert_with(level <= m->level, "invalid level");
  size_t offset = m->offset + (level - 1) * upper_neighbors_size();
  ailego_assert_with(offset <= upper_neighbors_->data_size(), "invalid offset");
  const void *data;
  if (ailego_unlikely(
          upper_neighbors_->read(offset, &data, upper_neighbors_size()) !=
          upper_neighbors_size())) {
    LOG_ERROR("Read neighbors from segment failed");
    return {0, nullptr};
  }

  auto hd = reinterpret_cast<const NeighborsHeader *>(data);
  return {hd->neighbor_cnt, hd->neighbors};
}

int HnswSparseSearcherEntity::load(const IndexStorage::Pointer &container,
                                   bool check_crc) {
  container_ = container;

  int ret = load_segments(check_crc);
  if (ret != 0) {
    return ret;
  }

  loaded_ = true;

  LOG_INFO(
      "Index info: docCnt=%u entryPoint=%u maxLevel=%d efConstruct=%zu "
      "l0NeighborCnt=%zu upperNeighborCnt=%zu scalingFactor=%zu "
      "nodeSize=%zu sparesMetaSegmentSize=%zu keySegmentSize=%zu "
      "neighborsSegmentSize=%zu neighborsMetaSegmentSize=%zu "
      "sparseVectorSegmentSize=%zu",
      doc_cnt(), entry_point(), cur_max_level(), ef_construction(),
      l0_neighbor_cnt(), upper_neighbor_cnt(), scaling_factor(), node_size(),
      sparse_vector_meta_->data_size(), keys_->data_size(),
      neighbors_->data_size(), neighbors_meta_->data_size(),
      sparse_vectors_->data_size());

  return 0;
}

int HnswSparseSearcherEntity::load_segments(bool check_crc) {
  //! load header
  const void *data = nullptr;
  HNSWSparseHeader hd;
  auto graph_hd_segment = container_->get(kSparseGraphHeaderSegmentId);
  if (!graph_hd_segment || graph_hd_segment->data_size() < sizeof(hd.graph)) {
    LOG_ERROR("Miss or invalid segment %s",
              kSparseGraphHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (graph_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                             sizeof(hd.graph)) != sizeof(hd.graph)) {
    LOG_ERROR("Read segment %s failed", kSparseGraphHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.graph, data, sizeof(hd.graph));

  auto hnsw_hd_segment = container_->get(kSparseHnswHeaderSegmentId);
  if (!hnsw_hd_segment || hnsw_hd_segment->data_size() < sizeof(hd.hnsw)) {
    LOG_ERROR("Miss or invalid segment %s", kSparseHnswHeaderSegmentId.c_str());
    return IndexError_InvalidFormat;
  }
  if (hnsw_hd_segment->read(0, reinterpret_cast<const void **>(&data),
                            sizeof(hd.hnsw)) != sizeof(hd.hnsw)) {
    LOG_ERROR("Read segment %s failed", kSparseHnswHeaderSegmentId.c_str());
    return IndexError_ReadData;
  }
  memcpy(&hd.hnsw, data, sizeof(hd.hnsw));
  *mutable_header() = hd;
  segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()));

  sparse_vector_meta_ = container_->get(kSparseGraphVectorMetaSegmentId);
  if (!sparse_vector_meta_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kSparseGraphVectorMetaSegmentId.c_str());
    return IndexError_InvalidFormat;
  }

  keys_ = container_->get(kSparseGraphKeysSegmentId);
  if (!keys_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kSparseGraphKeysSegmentId.c_str());
    return IndexError_InvalidFormat;
  }

  sparse_vectors_ = container_->get(kSparseGraphVectorsSegmentId);
  if (!sparse_vectors_) {
    LOG_ERROR("IndexStorage get segment %s failed",
              kSparseGraphVectorsSegmentId.c_str());
    return IndexError_InvalidFormat;
  }

  neighbors_ = container_->get(kSparseGraphNeighborsSegmentId);
  if (!neighbors_ || (neighbors_->data_size() == 0 && doc_cnt() > 1)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kSparseGraphNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }
  neighbors_meta_ = container_->get(kSparseGraphOffsetsSegmentId);
  if (!neighbors_meta_ || neighbors_meta_->data_size() <
                              sizeof(SparseGraphNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kSparseGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_ = container_->get(kSparseHnswNeighborsSegmentId);
  if (!upper_neighbors_ ||
      (upper_neighbors_->data_size() == 0 && cur_max_level() > 0)) {
    LOG_ERROR("IndexStorage get segment %s failed or empty",
              kSparseHnswNeighborsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  upper_neighbors_meta_ = container_->get(kSparseHnswOffsetsSegmentId);
  if (!upper_neighbors_meta_ ||
      upper_neighbors_meta_->data_size() <
          sizeof(HnswSparseNeighborMeta) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kSparseHnswOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  mapping_ = container_->get(kSparseGraphMappingSegmentId);
  if (!mapping_ || mapping_->data_size() < sizeof(node_id_t) * doc_cnt()) {
    LOG_ERROR("IndexStorage get segment %s failed or invalid size",
              kSparseGraphMappingSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  if (check_crc) {
    std::vector<SegmentPointer> segments;
    segments.emplace_back(graph_hd_segment);
    segments.emplace_back(hnsw_hd_segment);
    segments.emplace_back(sparse_vector_meta_);
    segments.emplace_back(keys_);

    segments.emplace_back(sparse_vectors_);

    segments.emplace_back(neighbors_);
    segments.emplace_back(neighbors_meta_);
    segments.emplace_back(upper_neighbors_);
    segments.emplace_back(upper_neighbors_meta_);

    if (!do_crc_check(segments)) {
      LOG_ERROR("Check index crc failed, the index may broken");
      return IndexError_Runtime;
    }
  }

  if (neighbors_in_memory_enabled_) {
    int ret = load_and_flat_neighbors();
    if (ret != 0) {
      return ret;
    }
  }

  return 0;
}

int HnswSparseSearcherEntity::load_and_flat_neighbors() {
  fixed_neighbors_.reset(
      new (std::nothrow) char[neighbors_size() * doc_cnt()]{},
      std::default_delete<char[]>());
  if (!fixed_neighbors_) {
    LOG_ERROR("Malloc memory failed");
    return IndexError_NoMemory;
  }

  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = container_->get(kSparseGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const SparseGraphNeighborMeta *neighbors_index = nullptr;
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           neighbors_meta->data_size()) !=
      neighbors_meta->data_size()) {
    LOG_ERROR("Read segment %s data failed",
              kSparseGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  const char *neighbor_data;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t rd_size = neighbors_index[id].neighbor_cnt * sizeof(node_id_t);
    if (ailego_unlikely(
            neighbors_->read(neighbors_index[id].offset,
                             reinterpret_cast<const void **>(&neighbor_data),
                             rd_size) != rd_size)) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }
    // copy level 0 neighbors to fixed size neighbors memory
    char *dst = fixed_neighbors_.get() + neighbors_size() * id;
    *reinterpret_cast<uint32_t *>(dst) = neighbors_index[id].neighbor_cnt;
    memcpy(dst + sizeof(uint32_t), neighbor_data, rd_size);
  }

  return 0;
}

int HnswSparseSearcherEntity::get_fixed_neighbors(
    std::vector<uint32_t> *fixed_neighbors) const {
  //! Get a new segemnt to release the buffer after loading neighbors
  auto neighbors_meta = container_->get(kSparseGraphOffsetsSegmentId);
  if (!neighbors_meta) {
    LOG_ERROR("IndexStorage get segment graph.offsets failed");
    return IndexError_InvalidArgument;
  }

  const SparseGraphNeighborMeta *neighbors_index = nullptr;
  size_t meta_size = neighbors_meta->data_size();
  if (neighbors_meta->read(0, reinterpret_cast<const void **>(&neighbors_index),
                           meta_size) != meta_size) {
    LOG_ERROR("Read segment %s data failed",
              kSparseGraphOffsetsSegmentId.c_str());
    return IndexError_InvalidArgument;
  }

  size_t fixed_neighbor_cnt = l0_neighbor_cnt();
  fixed_neighbors->resize((fixed_neighbor_cnt + 1) * doc_cnt(), kInvalidNodeId);

  size_t neighbors_cnt_offset = fixed_neighbor_cnt * doc_cnt();
  size_t total_neighbor_cnt = 0;
  for (node_id_t id = 0; id < doc_cnt(); ++id) {
    size_t cur_neighbor_cnt = neighbors_index[id].neighbor_cnt;
    if (cur_neighbor_cnt == 0) {
      (*fixed_neighbors)[neighbors_cnt_offset + id] = 0;
      continue;
    }
    size_t rd_size = cur_neighbor_cnt * sizeof(node_id_t);
    const uint32_t *neighbors;
    if (neighbors_->read(neighbors_index[id].offset,
                         reinterpret_cast<const void **>(&neighbors),
                         rd_size) != rd_size) {
      LOG_ERROR("Read neighbors from segment failed");
      return IndexError_ReadData;
    }

    // copy level 0 neighbors to fixed size neighbors memory
    auto it = fixed_neighbors->begin() + id * fixed_neighbor_cnt;
    std::copy(neighbors, neighbors + cur_neighbor_cnt, it);

    (*fixed_neighbors)[neighbors_cnt_offset + id] = cur_neighbor_cnt;
    total_neighbor_cnt += cur_neighbor_cnt;
  }
  LOG_INFO("total neighbor cnt: %zu, average neighbor cnt: %zu",
           total_neighbor_cnt, total_neighbor_cnt / doc_cnt());

  return 0;
}

bool HnswSparseSearcherEntity::do_crc_check(
    std::vector<SegmentPointer> &segments) const {
  constexpr size_t blk_size = 4096;
  const void *data;
  for (auto &segment : segments) {
    size_t offset = 0;
    size_t rd_size;
    uint32_t crc = 0;
    while (offset < segment->data_size()) {
      size_t size = std::min(blk_size, segment->data_size() - offset);
      if ((rd_size = segment->read(offset, &data, size)) <= 0) {
        break;
      }
      offset += rd_size;
      crc = ailego::Crc32c::Hash(data, rd_size, crc);
    }
    if (crc != segment->data_crc()) {
      return false;
    }
  }
  return true;
}

const HnswSparseEntity::Pointer HnswSparseSearcherEntity::clone() const {
  auto keys = keys_->clone();
  if (ailego_unlikely(!keys)) {
    LOG_ERROR("clone segment %s failed", kSparseGraphKeysSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }

  auto mapping = mapping_->clone();
  if (ailego_unlikely(!mapping)) {
    LOG_ERROR("clone segment %s failed", kSparseGraphMappingSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }

  auto sparse_vector_meta = sparse_vector_meta_->clone();
  if (ailego_unlikely(!sparse_vector_meta)) {
    LOG_ERROR("clone segment %s failed",
              kSparseGraphVectorMetaSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }

  auto sparse_vectors = sparse_vectors_->clone();
  if (ailego_unlikely(!sparse_vectors)) {
    LOG_ERROR("clone segment %s failed", kSparseGraphVectorsSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }

  auto neighbors = neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed",
              kSparseGraphNeighborsSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }
  auto upper_neighbors = upper_neighbors_->clone();
  if (ailego_unlikely(!neighbors)) {
    LOG_ERROR("clone segment %s failed", kSparseHnswNeighborsSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }
  auto neighbors_meta = neighbors_meta_->clone();
  if (ailego_unlikely(!neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kSparseGraphOffsetsSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }
  auto upper_neighbors_meta = upper_neighbors_meta_->clone();
  if (ailego_unlikely(!upper_neighbors_meta)) {
    LOG_ERROR("clone segment %s failed", kSparseHnswOffsetsSegmentId.c_str());
    return HnswSparseEntity::Pointer();
  }

  SegmentGroupParam neighbor_group{neighbors, neighbors_meta, upper_neighbors,
                                   upper_neighbors_meta};
  SegmentGroupParam dense_neighbor_group{nullptr, nullptr, nullptr, nullptr};
  SegmentGroupParam sparse_neighbor_group{nullptr, nullptr, nullptr, nullptr};

  HnswSparseSearcherEntity *entity = new (std::nothrow)
      HnswSparseSearcherEntity(header(), keys, mapping, neighbor_group,
                               sparse_vector_meta, sparse_vectors,
                               fixed_neighbors_, neighbors_in_memory_enabled_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswSparseSearcherEntity new failed");
  }

  return HnswSparseEntity::Pointer(entity);
}

//! Get vector sparse feature data by chunk index and offset
const void *HnswSparseSearcherEntity::get_sparse_data(uint64_t offset,
                                                      uint32_t len) const {
  const void *sparse_data = nullptr;

  uint32_t real_length = sparse_vectors_->read(offset, &sparse_data, len);

  if (ailego_unlikely(real_length != len)) {
    LOG_ERROR("Read sparse data from segment failed, %u vs %u", real_length,
              len);
    return nullptr;
  }

  return sparse_data;
}

int HnswSparseSearcherEntity::get_sparse_data(
    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_sparse_data(offset, len);
  block.reset((void *)vec);
  return 0;
}

//! Get sparse data from id
const void *HnswSparseSearcherEntity::get_sparse_data(node_id_t id) const {
  const void *vec = get_vector_meta(id);
  if (vec == nullptr) {
    LOG_ERROR("get vector failed, id: %u", id);

    return nullptr;
  }

  auto sparse_data = get_sparse_data_from_vector(vec);

  return sparse_data.first;
}

int HnswSparseSearcherEntity::get_sparse_data(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  const void *vec = get_sparse_data(id);
  block.reset((void *)vec);
  return 0;
}

//! Get sparse data from vector
std::pair<const void *, uint32_t>
HnswSparseSearcherEntity::get_sparse_data_from_vector(const void *vec) const {
  if (vec == nullptr) {
    LOG_ERROR("vec is nullptr");

    return std::make_pair(nullptr, 0);
  }

  const char *vec_ptr = reinterpret_cast<const char *>(vec);

  uint64_t offset = *((uint64_t *)(vec_ptr));
  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));

  const void *sparse_data = get_sparse_data(offset, sparse_vector_len);
  if (ailego_unlikely(sparse_data == nullptr)) {
    LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)offset,
              sparse_vector_len);

    return std::make_pair(nullptr, 0);
  }

  return std::make_pair(sparse_data, sparse_vector_len);
}

int HnswSparseSearcherEntity::get_sparse_data_from_vector(
    const void *vec, IndexStorage::MemoryBlock &block,
    int &sparse_length) const {
  std::pair<const void *, uint32_t> sparse_data =
      get_sparse_data_from_vector(vec);
  block.reset((void *)sparse_data.first);
  sparse_length = sparse_data.second;
  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "hnsw_sparse_builder_entity.h"
#include "hnsw_sparse_entity.h"

namespace zvec {
namespace core {

class HnswSparseSearcherEntity : public HnswSparseEntity {
 public:
  using Pointer = std::shared_ptr<HnswSparseSearcherEntity>;
  using SegmentPointer = IndexStorage::Segment::Pointer;

 public:
  struct SegmentGroupParam {
    SegmentGroupParam(SegmentPointer neighbors_in,
                      SegmentPointer neighbors_meta_in,
                      SegmentPointer upper_neighbors_in,
                      SegmentPointer upper_neighbors_meta_in)
        : neighbors{neighbors_in},
          neighbors_meta{neighbors_meta_in},
          upper_neighbors{upper_neighbors_in},
          upper_neighbors_meta{upper_neighbors_meta_in} {}

    SegmentPointer neighbors{nullptr};
    SegmentPointer neighbors_meta{nullptr};
    SegmentPointer upper_neighbors{nullptr};
    SegmentPointer upper_neighbors_meta{nullptr};
  };

  //! Constructor
  HnswSparseSearcherEntity();

  //! Make a copy of searcher entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswSparseEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector local id by key
  node_id_t get_id(key_t key) const;

  //! Get sparse vector feature data by key
  virtual int get_sparse_vector_by_key(
      key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
      std::string *sparse_values_buffer) const override;

  //! Get vector feature data by id
  virtual const void *get_vector_meta(node_id_t id) const override;

  virtual int get_vector_meta(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Get vector feature data by id
  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,
                               const void **vecs) const override;

  virtual int get_vector_metas(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;

  //! Get vector sparse feature data by chunk index and offset
  virtual const void *get_sparse_data(uint64_t offset,
                                      uint32_t len) const override;

  //! Get sparse data from id
  virtual const void *get_sparse_data(node_id_t id) const override;

  virtual int get_sparse_data(uint64_t offset, uint32_t len,
                              IndexStorage::MemoryBlock &block) const override;

  virtual int get_sparse_data(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Get sparse data from vector
  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(
      const void *vec) const override;

  virtual int get_sparse_data_from_vector(const void *vec,
                                          IndexStorage::MemoryBlock &block,
                                          int &sparse_length) const override;

  //! Get the node id's neighbors on graph level
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;

  virtual int load(const IndexStorage::Pointer &container,
                   bool check_crc) override;

  int load_segments(bool check_crc);

  virtual int cleanup(void) override;

 public:
  bool is_loaded() const {
    return loaded_;
  }

  void set_neighbors_in_memory(bool enabled) {
    neighbors_in_memory_enabled_ = enabled;
  }

  //! get fixed length neighbors data
  int get_fixed_neighbors(std::vector<uint32_t> *fixed_neighbors) const;

 private:
  //! Constructor
  HnswSparseSearcherEntity(const HNSWSparseHeader &hd,
                           const SegmentPointer &keys,
                           const SegmentPointer &mapping,
                           const SegmentGroupParam &neighbor_group,
                           const SegmentPointer &sparse_vector_meta,
                           const SegmentPointer &sparse_vectors,
                           const std::shared_ptr<char> &fixed_neighbors,
                           bool neighbors_in_memory_enabled)
      : HnswSparseEntity(hd),
        keys_(keys),
        mapping_(mapping),
        neighbors_(neighbor_group.neighbors),
        neighbors_meta_(neighbor_group.neighbors_meta),
        upper_neighbors_(neighbor_group.upper_neighbors),
        upper_neighbors_meta_(neighbor_group.upper_neighbors_meta),
        sparse_vector_meta_(sparse_vector_meta),
        sparse_vectors_(sparse_vectors),
        neighbors_in_memory_enabled_(neighbors_in_memory_enabled) {
    segment_datas_.resize(std::max(l0_neighbor_cnt(), upper_neighbor_cnt()),
                          IndexStorage::SegmentData(0U, 0U));
    fixed_neighbors_ = fixed_neighbors;
  }

  bool do_crc_check(std::vector<SegmentPointer> &segments) const;

  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

  //! If neighbors_in_memory_enabled, load the level0 neighbors to memory
  int load_and_flat_neighbors(void);

 public:
  HnswSparseSearcherEntity(const HnswSparseSearcherEntity &) = delete;
  HnswSparseSearcherEntity &operator=(const HnswSparseSearcherEntity &) =
      delete;

 private:
  IndexStorage::Pointer container_{};

  SegmentPointer keys_{};
  SegmentPointer mapping_{};

  SegmentPointer neighbors_{};
  SegmentPointer neighbors_meta_{};
  SegmentPointer upper_neighbors_{};
  SegmentPointer upper_neighbors_meta_{};

  SegmentPointer sparse_vector_meta_{};
  SegmentPointer sparse_vectors_{};

  mutable std::vector<IndexStorage::SegmentData> segment_datas_{};
  std::shared_ptr<char> fixed_neighbors_{};  // level 0 fixed size neighbors
  bool neighbors_in_memory_enabled_{false};
  bool loaded_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_streamer.h"
#include <iostream>
#include <ailego/internal/cpu_features.h>
#include <ailego/pattern/defer.h>
#include <ailego/utility/memory_helper.h>
#include "hnsw_sparse_algorithm.h"
#include "hnsw_sparse_context.h"
#include "hnsw_sparse_dist_calculator.h"
#include "hnsw_sparse_index_provider.h"

namespace zvec {
namespace core {

HnswSparseStreamer::HnswSparseStreamer() : entity_(stats_) {}

HnswSparseStreamer::~HnswSparseStreamer() {
  if (state_ == STATE_INITED) {
    this->cleanup();
  }
}

int HnswSparseStreamer::init(const IndexMeta &imeta,
                             const ailego::Params &params) {
  meta_ = imeta;
  meta_.set_streamer("HnswSparseStreamer", HnswSparseEntity::kRevision, params);

  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, &max_index_size_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT,
             &upper_max_neighbor_cnt_);
  float multiplier = HnswSparseEntity::kDefaultL0MaxNeighborCntMultiplier;
  params.get(PARAM_HNSW_SPARSE_STREAMER_L0_MAX_NEIGHBOR_COUNT_MULTIPLIER,
             &multiplier);
  l0_max_neighbor_cnt_ = multiplier * upper_max_neighbor_cnt_;

  multiplier = HnswSparseEntity::kDefaultNeighborPruneMultiplier;
  params.get(PARAM_HNSW_SPARSE_STREAMER_NEIGHBOR_PRUNE_MULTIPLIER, &multiplier);
  size_t prune_cnt = multiplier * upper_max_neighbor_cnt_;
  scaling_factor_ = upper_max_neighbor_cnt_;
  params.get(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, &scaling_factor_);

  params.get(PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT, &docs_hard_limit_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_EF, &ef_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, &ef_construction_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, &bf_enabled_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB,
             &bf_negative_prob_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD,
             &bruteforce_threshold_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO, &max_scan_ratio_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT, &max_scan_limit_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT, &min_scan_limit_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_CHECK_CRC_ENABLE, &check_crc_enabled_);

  params.get(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, &chunk_size_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, &filter_same_key_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE,
             &get_vector_enabled_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT, &min_neighbor_cnt_);
  params.get(PARAM_HNSW_SPARSE_STREAMER_FORCE_PADDING_RESULT_ENABLE,
             &force_padding_topk_enabled_);

  query_filtering_enabled_ =
      params.get(PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO,
                 &query_filtering_ratio_);

  params.get(PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT, &docs_soft_limit_);
  if (docs_soft_limit_ > 0 && docs_soft_limit_ > docs_hard_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str(),
              PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());
    return IndexError_InvalidArgument;
  } else if (docs_soft_limit_ == 0UL) {
    docs_soft_limit_ =
        docs_hard_limit_ * HnswSparseEntity::kDefaultDocsSoftLimitRatio;
  }

  if (ef_ == 0U) {
    ef_ = HnswSparseEntity::kDefaultEf;
  }
  if (ef_construction_ == 0U) {
    ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;
  }
  if (upper_max_neighbor_cnt_ == 0U) {
    upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;
  }
  if (upper_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {
    LOG_ERROR("[%s] must be in range (0,%d)",
              PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              HnswSparseEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (l0_max_neighbor_cnt_ == 0U) {
    l0_max_neighbor_cnt_ = HnswSparseEntity::kDefaultL0MaxNeighborCnt;
  }
  if (l0_max_neighbor_cnt_ > HnswSparseEntity::kMaxNeighborCnt) {
    LOG_ERROR("UpperNeighborCnt must be in range (0,%d)",
              HnswSparseEntity::kMaxNeighborCnt);
    return IndexError_InvalidArgument;
  }
  if (min_neighbor_cnt_ > upper_max_neighbor_cnt_) {
    LOG_ERROR("[%s]-[%u] must be <= [%s]-[%u]",
              PARAM_HNSW_SPARSE_STREAMER_MIN_NEIGHBOR_COUNT.c_str(),
              min_neighbor_cnt_,
              PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT.c_str(),
              upper_max_neighbor_cnt_);
    return IndexError_InvalidArgument;
  }

  if (bf_negative_prob_ <= 0.0f || bf_negative_prob_ >= 1.0f) {
    LOG_ERROR(
        "[%s] must be in range (0,1)",
        PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_NEGATIVE_PROB.c_str());
    return IndexError_InvalidArgument;
  }

  if (scaling_factor_ == 0U) {
    scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;
  }
  if (scaling_factor_ < 5 || scaling_factor_ > 1000) {
    LOG_ERROR("[%s] must be in range [5,1000]",
              PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_ratio_ <= 0.0f || max_scan_ratio_ > 1.0f) {
    LOG_ERROR("[%s] must be in range (0.0f,1.0f]",
              PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_RATIO.c_str());
    return IndexError_InvalidArgument;
  }

  if (max_scan_limit_ < min_scan_limit_) {
    LOG_ERROR("[%s] must be >= [%s]",
              PARAM_HNSW_SPARSE_STREAMER_MAX_SCAN_LIMIT.c_str(),
              PARAM_HNSW_SPARSE_STREAMER_MIN_SCAN_LIMIT.c_str());
    return IndexError_InvalidArgument;
  }

  if (prune_cnt == 0UL) {
    prune_cnt = upper_max_neighbor_cnt_;
  }
  if (chunk_size_ == 0UL) {
    chunk_size_ = HnswSparseEntity::kDefaultChunkSize;
  }
  if (chunk_size_ > HnswSparseEntity::kMaxChunkSize) {
    LOG_ERROR("[%s] must be < %zu",
              PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE.c_str(),
              HnswSparseEntity::kMaxChunkSize);
    return IndexError_InvalidArgument;
  }

  if (query_filtering_enabled_ &&
      (query_filtering_ratio_ <= 0.0f || query_filtering_ratio_ >= 1.0f)) {
    LOG_ERROR("[%s] must be in range (0, 1)",
              PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO.c_str());
    return IndexError_InvalidArgument;
  }

  entity_.set_ef_construction(ef_construction_);
  entity_.set_l0_neighbor_cnt(l0_max_neighbor_cnt_);
  entity_.set_upper_neighbor_cnt(upper_max_neighbor_cnt_);
  entity_.set_scaling_factor(scaling_factor_);
  entity_.set_prune_cnt(prune_cnt);

  entity_.set_chunk_size(chunk_size_);
  entity_.set_filter_same_key(filter_same_key_);
  entity_.set_get_vector(get_vector_enabled_);
  entity_.set_min_neighbor_cnt(min_neighbor_cnt_);

  entity_.set_sparse_meta_size(HnswSparseEntity::kSparseMetaSize);
  entity_.set_sparse_unit_size(meta_.unit_size());

  int ret = entity_.init(max_index_size_, docs_hard_limit_);
  if (ret != 0) {
    LOG_ERROR("Hnsw entity init failed for %s", IndexError::What(ret));
    return ret;
  }
  LOG_DEBUG(
      "Init params: maxIndexSize=%zu docsHardLimit=%zu docsSoftLimit=%zu "
      "efConstruction=%u ef=%u l0NeighborCnt=%u upperNeighborCnt=%u "
      "scalingFactor=%u maxScanRatio=%.3f minScanLimit=%zu maxScanLimit=%zu "
      "bfEnabled=%d bruteFoceThreshold=%zu bfNegativeProbability=%.5f "
      "checkCrcEnabled=%d pruneSize=%zu chunkSize=%zu "
      "filterSameKey=%u getVectorEnabled=%u "
      "minNeighborCount=%u forcePadding=%u filteringRatio=%f",
      max_index_size_, docs_hard_limit_, docs_soft_limit_, ef_construction_,
      ef_, l0_max_neighbor_cnt_, upper_max_neighbor_cnt_, scaling_factor_,
      max_scan_ratio_, min_scan_limit_, max_scan_limit_, bf_enabled_,
      bruteforce_threshold_, bf_negative_prob_, check_crc_enabled_, prune_cnt,
      chunk_size_, filter_same_key_, get_vector_enabled_, min_neighbor_cnt_,
      force_padding_topk_enabled_, query_filtering_ratio_);

  alg_ = HnswSparseAlgorithm::UPointer(new HnswSparseAlgorithm(entity_));

  ret = alg_->init();
  if (ret != 0) {
    return ret;
  }

  state_ = STATE_INITED;

  return 0;
}

int HnswSparseStreamer::cleanup(void) {
  if (state_ == STATE_OPENED) {
    this->close();
  }

  LOG_INFO("HnswSparseStreamer cleanup");

  meta_.clear();
  metric_.reset();
  stats_.clear();
  entity_.cleanup();

  if (alg_) {
    alg_->cleanup();
  }

  max_index_size_ = 0UL;
  docs_hard_limit_ = HnswSparseEntity::kDefaultDocsHardLimit;
  docs_soft_limit_ = 0UL;
  upper_max_neighbor_cnt_ = HnswSparseEntity::kDefaultUpperMaxNeighborCnt;
  ef_ = HnswSparseEntity::kDefaultEf;
  ef_construction_ = HnswSparseEntity::kDefaultEfConstruction;
  bf_enabled_ = false;
  scaling_factor_ = HnswSparseEntity::kDefaultScalingFactor;
  bruteforce_threshold_ = HnswSparseEntity::kDefaultBruteForceThreshold;
  max_scan_limit_ = HnswSparseEntity::kDefaultMaxScanLimit;
  min_scan_limit_ = HnswSparseEntity::kDefaultMinScanLimit;
  chunk_size_ = HnswSparseEntity::kDefaultChunkSize;
  bf_negative_prob_ = HnswSparseEntity::kDefaultBFNegativeProbability;
  max_scan_ratio_ = HnswSparseEntity::kDefaultScanRatio;
  state_ = STATE_INIT;
  check_crc_enabled_ = false;
  filter_same_key_ = false;
  get_vector_enabled_ = false;

  sparse_neighbor_ratio_ = HnswSparseEntity::kDefaultSparseNeighborRatio;
  sparse_neighbor_cnt_ = 0UL;
  sparse_min_neighbor_cnt_ = 0UL;
  upper_sparse_neighbor_cnt_ = 0UL;

  return 0;
}

int HnswSparseStreamer::open(IndexStorage::Pointer stg) {
  LOG_INFO("HnswSparseStreamer open");

  if (ailego_unlikely(state_ != STATE_INITED)) {
    LOG_ERROR("Open storage failed, init streamer first!");
    return IndexError_NoReady;
  }
  int ret = entity_.open(std::move(stg), check_crc_enabled_);
  if (ret != 0) {
    return ret;
  }
  IndexMeta index_meta;
  ret = entity_.get_index_meta(&index_meta);
  if (ret == IndexError_NoExist) {
    // Set IndexMeta for the new index
    ret = entity_.set_index_meta(meta_);
    if (ret != 0) {
      LOG_ERROR("Failed to set index meta for %s", IndexError::What(ret));
      return ret;
    }
  } else if (ret != 0) {
    LOG_ERROR("Failed to get index meta for %s", IndexError::What(ret));
    return ret;
  } else {
    if (index_meta.metric_name() != meta_.metric_name() ||
        index_meta.data_type() != meta_.data_type()) {
      LOG_ERROR("IndexMeta mismatch from the previous in index");
      return IndexError_Mismatch;
    }
    // The IndexMetric Params may be updated like MipsSquaredEuclidean
    auto metric_params = index_meta.metric_params();
    metric_params.merge(meta_.metric_params());
    meta_.set_metric(index_meta.metric_name(), 0, metric_params);
  }

  metric_ = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric_) {
    LOG_ERROR("Failed to create metric %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  ret = metric_->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failled to init metric, ret=%d", ret);
    return ret;
  }

  if (!metric_->sparse_distance()) {
    LOG_ERROR("Invalid metric distance");
    return IndexError_InvalidArgument;
  }

  add_distance_ = metric_->sparse_distance();
  search_distance_ = add_distance_;

  if (metric_->query_metric() && metric_->query_metric()->distance()) {
    search_distance_ = metric_->query_metric()->sparse_distance();
  }

  state_ = STATE_OPENED;
  magic_ = IndexContext::GenerateMagic();

  return 0;
}

int HnswSparseStreamer::close(void) {
  LOG_INFO("HnswSparseStreamer close");

  stats_.clear();
  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  int ret = entity_.close();
  if (ret != 0) {
    return ret;
  }
  state_ = STATE_INITED;

  return 0;
}

int HnswSparseStreamer::flush(uint64_t checkpoint) {
  LOG_INFO("HnswSparseStreamer flush checkpoint=%zu", (size_t)checkpoint);

  meta_.set_metric(metric_->name(), 0, metric_->params());
  entity_.set_index_meta(meta_);
  return entity_.flush(checkpoint);
}

int HnswSparseStreamer::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("HnswSparseStreamer dump");

  shared_mutex_.lock();
  AILEGO_DEFER([&]() { shared_mutex_.unlock(); });

  meta_.set_searcher("HnswSparseSearcher", HnswSparseEntity::kRevision,
                     ailego::Params());

  int ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }
  return entity_.dump(dumper);
}

IndexStreamer::Context::Pointer HnswSparseStreamer::create_context(void) const {
  if (ailego_unlikely(state_ != STATE_OPENED)) {
    LOG_ERROR("Create context failed, open storage first!");
    return Context::Pointer();
  }

  HnswSparseEntity::Pointer entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("CreateContext clone init failed");
    return Context::Pointer();
  }
  HnswSparseContext *ctx =
      new (std::nothrow) HnswSparseContext(metric_, entity);
  if (ailego_unlikely(ctx == nullptr)) {
    LOG_ERROR("Failed to new HnswSparseContext");
    return Context::Pointer();
  }
  ctx->set_ef(ef_);
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_filter_mode(bf_enabled_ ? VisitFilter::BloomFilter
                                   : VisitFilter::ByteMap);
  ctx->set_filter_negative_probability(bf_negative_prob_);
  ctx->set_magic(magic_);
  ctx->set_force_padding_topk(force_padding_topk_enabled_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);

  if (ailego_unlikely(ctx->init(HnswSparseContext::kSparseStreamerContext)) !=
      0) {
    LOG_ERROR("Init HnswSparseContext failed");
    delete ctx;
    return Context::Pointer();
  }

  return Context::Pointer(ctx);
}

IndexStreamer::SparseProvider::Pointer
HnswSparseStreamer::create_sparse_provider(void) const {
  LOG_DEBUG("HnswSparseStreamer create sparse provider");

  auto entity = entity_.clone();
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("Clone HnswSparseEntity failed");
    return SparseProvider::Pointer();
  }
  return SparseProvider::Pointer(
      new HnswSparseIndexProvider(meta_, entity, "HnswSparseStreamer"));
}

int HnswSparseStreamer::update_context(HnswSparseContext *ctx) const {
  const HnswSparseEntity::Pointer entity = entity_.clone();
  if (!entity) {
    LOG_ERROR("Failed to clone search context entity");
    return IndexError_Runtime;
  }
  ctx->set_max_scan_limit(max_scan_limit_);
  ctx->set_min_scan_limit(min_scan_limit_);
  ctx->set_max_scan_ratio(max_scan_ratio_);
  ctx->set_bruteforce_threshold(bruteforce_threshold_);
  return ctx->update_context(HnswSparseContext::kSparseStreamerContext, meta_,
                             metric_, entity, magic_);
}

//! Add a vector with id  into index with sparse inputs
int HnswSparseStreamer::add_with_id_impl(uint32_t id,
                                         const uint32_t sparse_count,
                                         const uint32_t *sparse_indices,
                                         const void *sparse_query,
                                         const IndexQueryMeta &qmeta,
                                         Context::Pointer &context) {
  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {
    LOG_WARN(
        "Failed to add sparse vector: number of non-zero elements (%u) exceeds "
        "maximum allowed (%u), id=%u",
        sparse_count, HnswSparseEntity::kSparseMaxDimSize, id);
    return IndexError_InvalidValue;
  }

  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %u exceed [%s]", entity_.doc_cnt(),
                PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %u exceed [%s]", entity_.doc_cnt(),
               PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_);

  std::string sparse_query_buffer;
  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,
                                   entity_.sparse_unit_size(),
                                   sparse_query_buffer);

  ctx->reset_query(sparse_query_buffer.data());
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());

  level_t level = alg_->get_random_level();
  ret =
      entity_.add_vector_with_id(level, id, sparse_query_buffer, sparse_count);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw stramer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}

//! Add a vector into index with sparse inputs
int HnswSparseStreamer::add_impl(uint64_t pkey, const uint32_t sparse_count,
                                 const uint32_t *sparse_indices,
                                 const void *sparse_query,
                                 const IndexQueryMeta &qmeta,
                                 Context::Pointer &context) {
  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (ailego_unlikely(sparse_count > HnswSparseEntity::kSparseMaxDimSize)) {
    LOG_WARN(
        "Failed to add sparse vector: number of non-zero elements (%u) exceeds "
        "maximum allowed (%u), key=%zu",
        sparse_count, HnswSparseEntity::kSparseMaxDimSize, (size_t)pkey);
    return IndexError_InvalidValue;
  }

  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  if (ailego_unlikely(entity_.doc_cnt() >= docs_soft_limit_)) {
    if (entity_.doc_cnt() >= docs_hard_limit_) {
      LOG_ERROR("Current docs %u exceed [%s]", entity_.doc_cnt(),
                PARAM_HNSW_SPARSE_STREAMER_DOCS_HARD_LIMIT.c_str());
      const std::lock_guard<std::mutex> lk(mutex_);
      (*stats_.mutable_discarded_count())++;
      return IndexError_IndexFull;
    } else {
      LOG_WARN("Current docs %u exceed [%s]", entity_.doc_cnt(),
               PARAM_HNSW_SPARSE_STREAMER_DOCS_SOFT_LIMIT.c_str());
    }
  }
  if (ailego_unlikely(!shared_mutex_.try_lock_shared())) {
    LOG_ERROR("Cannot add vector while dumping index");
    (*stats_.mutable_discarded_count())++;
    return IndexError_Unsupported;
  }
  AILEGO_DEFER([&]() { shared_mutex_.unlock_shared(); });

  ctx->clear();
  ctx->update_dist_caculator_distance(add_distance_);

  std::string sparse_query_buffer;
  SparseUtility::TransSparseFormat(sparse_count, sparse_indices, sparse_query,
                                   entity_.sparse_unit_size(),
                                   sparse_query_buffer);

  ctx->reset_query(sparse_query_buffer.data());
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());

  level_t level = alg_->get_random_level();
  node_id_t id;
  ret = entity_.add_vector(level, pkey, sparse_query_buffer, sparse_count, &id);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw streamer add vector failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  ret = alg_->add_node(id, level, ctx);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Hnsw stramer add node failed");
    (*stats_.mutable_discarded_count())++;
    return ret;
  }

  if (ailego_unlikely(ctx->error())) {
    (*stats_.mutable_discarded_count())++;
    return IndexError_Runtime;
  }
  (*stats_.mutable_added_count())++;

  return 0;
}

//! Similarity search with sparse inputs
int HnswSparseStreamer::search_impl(
    const uint32_t sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) const {
  return search_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                     context);
}

//! Similarity search with sparse inputs
int HnswSparseStreamer::search_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }
  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }

  if (entity_.doc_cnt() <= ctx->get_bruteforce_threshold()) {
    return search_bf_impl(sparse_count, sparse_indices, sparse_query, qmeta,
                          count, context);
  }

  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_);
  ctx->resize_results(count);
  ctx->check_need_adjuct_ctx(entity_.doc_cnt());

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  for (size_t q = 0; q < count; ++q) {
    std::string sparse_query_buffer;
    std::string sparse_query_filtered_buffer;

    SparseUtility::TransSparseFormat(
        sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
        entity_.sparse_unit_size(), sparse_query_buffer);

    if (query_filtering_enabled_) {
      if (!SparseUtility::FilterSparseQuery(
              sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
              qmeta.data_type(), entity_.sparse_unit_size(),
              query_filtering_ratio_, &sparse_query_filtered_buffer)) {
        LOG_ERROR("Hnsw filtering failed");

        return IndexError_Runtime;
      }

      ctx->reset_query(sparse_query_filtered_buffer.data());
    } else {
      ctx->reset_query(sparse_query_buffer.data());
    }

    ret = alg_->search(ctx);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Hnsw searcher fast search failed");
      return ret;
    }

    if (query_filtering_enabled_) {
      ctx->reset_query(sparse_query_buffer.data());
      ctx->recal_topk_dist();
    }

    ctx->topk_to_result(q);

    sparse_indices_tmp += sparse_count[q];
    sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                       sparse_count[q] * qmeta.unit_size();
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

//! Similarity search with sparse inputs
int HnswSparseStreamer::search_bf_impl(
    const uint32_t sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const IndexQueryMeta &qmeta,
    IndexStreamer::Context::Pointer &context) const {
  return search_bf_impl(&sparse_count, sparse_indices, sparse_query, qmeta, 1,
                        context);
}

//! Similarity search with sparse inputs
int HnswSparseStreamer::search_bf_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const IndexQueryMeta &qmeta, uint32_t count,
    IndexStreamer::Context::Pointer &context) const {
  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_);
  ctx->resize_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_Runtime;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      ctx->group_topk_heaps().clear();

      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!ctx->filter().is_valid() || !ctx->filter()(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().dist(id);

          std::string group_id = group_by(id);

          auto &topk_heap = ctx->group_topk_heaps()[group_id];
          if (topk_heap.empty()) {
            topk_heap.limit(ctx->group_topk());
          }
          topk_heap.emplace_back(id, dist);
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  } else {
    auto &filter = ctx->filter();
    auto &topk = ctx->topk_heap();

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      topk.clear();
      for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
        if (entity_.get_key(id) == kInvalidKey) {
          continue;
        }

        if (!filter.is_valid() || !filter(entity_.get_key(id))) {
          dist_t dist = ctx->dist_calculator().dist(id);
          topk.emplace(id, dist);
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }

    if (ailego_unlikely(ctx->error())) {
      return IndexError_Runtime;
    }
  }

  return 0;
}

//! Linear search by primary keys
int HnswSparseStreamer::search_bf_by_p_keys_impl(
    const uint32_t sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, ContextPointer &context) const {
  return search_bf_by_p_keys_impl(&sparse_count, sparse_indices, sparse_query,
                                  p_keys, qmeta, 1, context);
}

//! Linear search by primary keys with sparse inputs
int HnswSparseStreamer::search_bf_by_p_keys_impl(
    const uint32_t *sparse_count, const uint32_t *sparse_indices,
    const void *sparse_query, const std::vector<std::vector<uint64_t>> &p_keys,
    const IndexQueryMeta &qmeta, uint32_t count,
    Context::Pointer &context) const {
  int ret = check_params(qmeta);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  if (ailego_unlikely(p_keys.size() != count)) {
    LOG_ERROR("The size of p_keys is not equal to count");
    return IndexError_InvalidArgument;
  }

  HnswSparseContext *ctx = dynamic_cast<HnswSparseContext *>(context.get());
  ailego_do_if_false(ctx) {
    LOG_ERROR("Cast context to HnswSparseContext failed");
    return IndexError_Cast;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher or streamer
    ret = update_context(ctx);
    if (ret != 0) {
      return ret;
    }
  }

  ctx->clear();
  ctx->update_dist_caculator_distance(search_distance_);
  ctx->resize_results(count);

  const uint32_t *sparse_indices_tmp = sparse_indices;
  const void *sparse_query_tmp = sparse_query;

  if (ctx->group_by_search()) {
    if (!ctx->group_by().is_valid()) {
      LOG_ERROR("Invalid group-by function");
      return IndexError_Runtime;
    }

    std::function<std::string(node_id_t)> group_by = [&](node_id_t id) {
      return ctx->group_by()(entity_.get_key(id));
    };

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      ctx->group_topk_heaps().clear();

      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        uint64_t pk = p_keys[q][idx];
        if (!ctx->filter().is_valid() || !ctx->filter()(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().dist(id);

            std::string group_id = group_by(id);

            auto &topk_heap = ctx->group_topk_heaps()[group_id];
            if (topk_heap.empty()) {
              topk_heap.limit(ctx->group_topk());
            }
            topk_heap.emplace_back(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  } else {
    auto &filter = ctx->filter();
    auto &topk = ctx->topk_heap();

    for (size_t q = 0; q < count; ++q) {
      std::string sparse_query_buffer;
      SparseUtility::TransSparseFormat(
          sparse_count[q], sparse_indices_tmp, sparse_query_tmp,
          entity_.sparse_unit_size(), sparse_query_buffer);

      ctx->reset_query(sparse_query_buffer.data());
      topk.clear();
      for (size_t idx = 0; idx < p_keys[q].size(); ++idx) {
        key_t pk = p_keys[q][idx];
        if (!filter.is_valid() || !filter(pk)) {
          node_id_t id = entity_.get_id(pk);
          if (id != kInvalidNodeId) {
            dist_t dist = ctx->dist_calculator().dist(id);
            topk.emplace(id, dist);
          }
        }
      }
      ctx->topk_to_result(q);

      sparse_indices_tmp += sparse_count[q];
      sparse_query_tmp = reinterpret_cast<const char *>(sparse_query_tmp) +
                         sparse_count[q] * qmeta.unit_size();
    }
  }

  if (ailego_unlikely(ctx->error())) {
    return IndexError_Runtime;
  }

  return 0;
}

void HnswSparseStreamer::print_debug_info() {
  for (node_id_t id = 0; id < entity_.doc_cnt(); ++id) {
    Neighbors neighbours = entity_.get_neighbors(0, id);
    std::cout << "node: " << id << "; ";
    for (uint32_t i = 0; i < neighbours.size(); ++i) {
      std::cout << neighbours[i];

      if (i == neighbours.size() - 1) {
        std::cout << std::endl;
      } else {
        std::cout << ", ";
      }
    }
  }

  // entity_.print_key_map();
}

INDEX_FACTORY_REGISTER_STREAMER(HnswSparseStreamer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <ailego/parallel/lock.h>
#include <zvec/core/framework/index_framework.h>
#include "hnsw_sparse_algorithm.h"
#include "hnsw_sparse_streamer_entity.h"

namespace zvec {
namespace core {

class HnswSparseStreamer : public IndexStreamer {
 public:
  using ContextPointer = IndexStreamer::Context::Pointer;

  HnswSparseStreamer(void);
  virtual ~HnswSparseStreamer(void);

  HnswSparseStreamer(const HnswSparseStreamer &streamer) = delete;
  HnswSparseStreamer &operator=(const HnswSparseStreamer &streamer) = delete;

 protected:
  //! Initialize Streamer
  int init(const IndexMeta &imeta, const ailego::Params &params) override;

  //! Cleanup Streamer
  int cleanup(void) override;

  //! Create a context
  Context::Pointer create_context(void) const override;

  //! Create a new sparse iterator
  IndexStreamer::SparseProvider::Pointer create_sparse_provider(
      void) const override;

  int add_impl(uint64_t pkey, const uint32_t sparse_count,
               const uint32_t *sparse_indices, const void *sparse_query,
               const IndexQueryMeta &qmeta, Context::Pointer &context) override;

  int add_with_id_impl(uint32_t id, const uint32_t sparse_count,
                       const uint32_t *sparse_indices, const void *sparse_query,
                       const IndexQueryMeta &qmeta,
                       Context::Pointer &context) override;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const override;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const override;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const override;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t *sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta, uint32_t count,
                     Context::Pointer &context) const override;

  //! Linear search by primary keys
  int search_bf_by_p_keys_impl(const uint32_t sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta,
                               ContextPointer &context) const override;

  //! Linear search by primary keys with sparse inputs
  int search_bf_by_p_keys_impl(const uint32_t *sparse_count,
                               const uint32_t *sparse_indices,
                               const void *sparse_query,
                               const std::vector<std::vector<uint64_t>> &p_keys,
                               const IndexQueryMeta &qmeta, uint32_t count,
                               ContextPointer &context) const override;

  //! Fetch sparse vector by key
  int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                        std::string *sparse_indices_buffer,
                        std::string *sparse_values_buffer) const override {
    return entity_.get_sparse_vector_by_key(
        key, sparse_count, sparse_indices_buffer, sparse_values_buffer);
  }

  //! Fetch vector by id
  int get_sparse_vector_by_id(
      uint32_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,
      std::string *sparse_values_buffer) const override {
    return entity_.get_sparse_vector_by_id(
        id, sparse_count, sparse_indices_buffer, sparse_values_buffer);
  }

  //! Open index from file path
  int open(IndexStorage::Pointer stg) override;

  //! Close file
  int close(void) override;

  //! flush file
  int flush(uint64_t checkpoint) override;

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve sparse meta of index
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  void print_debug_info() override;

 private:
  inline int check_params(const IndexQueryMeta &qmeta) const {
    if (ailego_unlikely(qmeta.data_type() != meta_.data_type())) {
      LOG_ERROR("Unsupported query meta");
      return IndexError_Mismatch;
    }
    return 0;
  }

  inline int check_sparse_count_is_zero(const uint32_t *sparse_count,
                                        uint32_t count) const {
    for (uint32_t i = 0; i < count; ++i) {
      if (sparse_count[i] != 0)
        LOG_ERROR("Sparse cout is not empty. Index: %u, Sparse Count: %u", i,
                  sparse_count[i]);
      return IndexError_InvalidArgument;
    }

    return 0;
  }

 private:
  //! To share ctx across streamer/searcher, we need to update the context for
  //! current streamer/searcher
  int update_context(HnswSparseContext *ctx) const;

 private:
  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_OPENED = 2 };
  class Stats : public IndexStreamer::Stats {
   public:
    void clear(void) {
      set_revision_id(0u);
      set_loaded_count(0u);
      set_added_count(0u);
      set_discarded_count(0u);
      set_index_size(0u);
      set_dumped_size(0u);
      set_check_point(0u);
      set_create_time(0u);
      set_update_time(0u);
      clear_attributes();
    }
  };

  HnswSparseStreamerEntity entity_;
  HnswSparseAlgorithm::UPointer alg_;
  IndexMeta meta_{};
  IndexMetric::Pointer metric_{};

  IndexMetric::MatrixSparseDistance add_distance_{};
  IndexMetric::MatrixSparseDistance search_distance_{};
  Stats stats_{};
  std::mutex mutex_{};

  size_t max_index_size_{0UL};
  size_t chunk_size_{HnswSparseEntity::kDefaultChunkSize};
  size_t docs_hard_limit_{HnswSparseEntity::kDefaultDocsHardLimit};
  size_t docs_soft_limit_{0UL};
  uint32_t min_neighbor_cnt_{0u};
  uint32_t upper_max_neighbor_cnt_{
      HnswSparseEntity::kDefaultUpperMaxNeighborCnt};
  uint32_t l0_max_neighbor_cnt_{HnswSparseEntity::kDefaultL0MaxNeighborCnt};
  uint32_t ef_{HnswSparseEntity::kDefaultEf};
  uint32_t ef_construction_{HnswSparseEntity::kDefaultEfConstruction};
  uint32_t scaling_factor_{HnswSparseEntity::kDefaultScalingFactor};
  size_t bruteforce_threshold_{HnswSparseEntity::kDefaultBruteForceThreshold};
  size_t max_scan_limit_{HnswSparseEntity::kDefaultMaxScanLimit};
  size_t min_scan_limit_{HnswSparseEntity::kDefaultMinScanLimit};
  float bf_negative_prob_{HnswSparseEntity::kDefaultBFNegativeProbability};
  float max_scan_ratio_{HnswSparseEntity::kDefaultScanRatio};
  float sparse_neighbor_ratio_{HnswSparseEntity::kDefaultSparseNeighborRatio};
  uint32_t sparse_neighbor_cnt_{0UL};
  uint32_t sparse_min_neighbor_cnt_{0UL};
  uint32_t upper_sparse_neighbor_cnt_{0UL};

  bool query_filtering_enabled_{false};
  float query_filtering_ratio_{HnswSparseEntity::kDefaultQueryFilteringRatio};

  uint32_t magic_{0U};
  State state_{STATE_INIT};
  bool bf_enabled_{false};
  bool check_crc_enabled_{false};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};
  bool force_padding_topk_enabled_{false};

  //! avoid add vector while dumping index
  ailego::SharedMutex shared_mutex_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_streamer_entity.h"
#include <algorithm>
#include <cstddef>
#include <iostream>
#include <ailego/utility/memory_helper.h>
#include "utility/sparse_utility.h"
#include "hnsw_sparse_dist_calculator.h"

namespace zvec {
namespace core {

HnswSparseStreamerEntity::HnswSparseStreamerEntity(IndexStreamer::Stats &stats)
    : stats_(stats) {}

HnswSparseStreamerEntity::~HnswSparseStreamerEntity() {}

int HnswSparseStreamerEntity::init(uint64_t max_index_size,
                                   size_t max_doc_cnt) {
  if (std::pow(scaling_factor(), kMaxGraphLayers) < max_doc_cnt) {
    LOG_ERROR("scalingFactor=%zu is too small", scaling_factor());
    return IndexError_InvalidArgument;
  }

  std::lock_guard<std::mutex> lock(mutex_);
  broker_ = std::make_shared<SparseChunkBroker>(stats_);
  upper_neighbor_index_ = std::make_shared<NIHashMap>();
  keys_map_lock_ = std::make_shared<ailego::SharedMutex>();
  keys_map_ = std::make_shared<HashMap<key_t, node_id_t>>();
  if (!keys_map_ || !upper_neighbor_index_ || !broker_ || !keys_map_lock_) {
    LOG_ERROR("HnswSparseStreamerEntity new object failed");
    return IndexError_NoMemory;
  }
  keys_map_->set_empty_key(kInvalidKey);

  neighbor_size_ = neighbors_size();
  upper_neighbor_size_ = upper_neighbors_size();

  //! vector + key + level 0 neighbors
  size_t size = sizeof(key_t) + neighbor_size_ + sparse_meta_size();

  size = AlignSize(size);
  set_node_size(size);

  return init_chunk_params(max_index_size);
}

int HnswSparseStreamerEntity::cleanup() {
  std::lock_guard<std::mutex> lock(mutex_);
  mutable_header()->clear();
  chunk_size_ = kDefaultChunkSize;
  node_index_mask_bits_ = 0U;
  node_index_mask_ = 0U;
  node_cnt_per_chunk_ = 0U;
  neighbor_size_ = 0U;
  upper_neighbor_size_ = 0U;
  if (upper_neighbor_index_) {
    upper_neighbor_index_->cleanup();
  }
  if (keys_map_) {
    keys_map_->clear();
  }
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();
  filter_same_key_ = false;
  get_vector_enabled_ = false;
  broker_.reset();

  return 0;
}

int HnswSparseStreamerEntity::update_neighbors(
    level_t level, node_id_t id,
    const std::vector<std::pair<node_id_t, dist_t>> &neighbors) {
  std::vector<char> buffer(neighbor_size_);
  NeighborsHeader *hd = reinterpret_cast<NeighborsHeader *>(buffer.data());
  hd->neighbor_cnt = neighbors.size();
  size_t i = 0;
  for (; i < neighbors.size(); ++i) {
    hd->neighbors[i] = neighbors[i].first;
  }

  auto loc = get_neighbor_chunk_loc(level, id);
  size_t size = reinterpret_cast<char *>(&hd->neighbors[i]) - &buffer[0];
  size_t ret = loc.first->write(loc.second, hd, size);
  if (ailego_unlikely(ret != size)) {
    LOG_ERROR("Write neighbor header failed, ret=%zu", ret);

    return IndexError_Runtime;
  }

  return 0;
}

const Neighbors HnswSparseStreamerEntity::get_neighbors(level_t level,
                                                        node_id_t id) const {
  SparseChunk *chunk = nullptr;
  size_t offset = 0UL;
  size_t neighbor_size = neighbor_size_;
  if (level == 0UL) {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    offset = (id & node_index_mask_) * node_size() + sizeof(key_t) +
             sparse_meta_size();

    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    ailego_assert_with(chunk_idx < node_chunks_.size(), "invalid chunk idx");
    chunk = node_chunks_[chunk_idx].get();
  } else {
    auto p = get_upper_neighbor_chunk_loc(level, id);
    chunk = upper_neighbor_chunks_[p.first].get();
    offset = p.second;
    neighbor_size = upper_neighbor_size_;
  }

  ailego_assert_with(offset < chunk->data_size(), "invalid chunk offset");
  IndexStorage::MemoryBlock neighbor_block;
  size_t size = chunk->read(offset, neighbor_block, neighbor_size);
  if (ailego_unlikely(size != neighbor_size)) {
    LOG_ERROR("Read neighbor header failed, ret=%zu", size);
    return Neighbors();
  }
  return Neighbors(std::move(neighbor_block));
}

//! Get vector feature data by key
const void *HnswSparseStreamerEntity::get_vector_meta(node_id_t id) const {
  auto loc = get_vector_chunk_loc(id);
  const void *vec = nullptr;
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = sparse_meta_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, &vec, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
              loc.second, read_size, ret);
  }

  return vec;
}

int HnswSparseStreamerEntity::get_vector_meta(
    const node_id_t id, IndexStorage::MemoryBlock &block) const {
  auto loc = get_vector_chunk_loc(id);
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t read_size = sparse_meta_size();

  size_t ret = node_chunks_[loc.first]->read(loc.second, block, read_size);
  if (ailego_unlikely(ret != read_size)) {
    LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
              loc.second, read_size, ret);
    return IndexError_ReadData;
  }

  return 0;
}

int HnswSparseStreamerEntity::get_vector_metas(const node_id_t *ids,
                                               uint32_t count,
                                               const void **vecs) const {
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = sparse_meta_size();

    size_t ret = node_chunks_[loc.first]->read(loc.second, &vecs[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
                loc.second, read_size, ret);
      return IndexError_ReadData;
    }
  }

  return 0;
}

int HnswSparseStreamerEntity::get_vector_metas(
    const node_id_t *ids, uint32_t count,
    std::vector<IndexStorage::MemoryBlock> &block_vecs) const {
  block_vecs.resize(count);
  for (auto i = 0U; i < count; ++i) {
    auto loc = get_vector_chunk_loc(ids[i]);
    ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
    ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                       "invalid chunk offset");

    size_t read_size = sparse_meta_size();

    size_t ret =
        node_chunks_[loc.first]->read(loc.second, block_vecs[i], read_size);
    if (ailego_unlikely(ret != read_size)) {
      LOG_ERROR("Read vector failed, offset=%u, read size=%zu, ret=%zu",
                loc.second, read_size, ret);
      return IndexError_ReadData;
    }
  }

  return 0;
}

//! Get vector feature data by key
const void *HnswSparseStreamerEntity::get_sparse_data(uint64_t offset,
                                                      uint32_t len) const {
  uint32_t chunk_index = offset >> 32;
  uint32_t chunk_offset = offset & 0xFFFFFFFF;

  auto loc = get_sparse_chunk_loc(chunk_index, chunk_offset);
  const void *data = nullptr;

  ailego_assert_with(loc.first < sparse_node_chunks_.size(),
                     "invalid chunk idx");
  ailego_assert_with(loc.second < sparse_node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t ret = sparse_node_chunks_[loc.first]->read(loc.second, &data, len);
  if (ailego_unlikely(ret != len)) {
    LOG_ERROR("Read sparse vector failed, offset=%zu, read size=%u, ret=%zu",
              (size_t)offset, len, ret);
  }
  return data;
}

int HnswSparseStreamerEntity::get_sparse_data(
    uint64_t offset, uint32_t len, IndexStorage::MemoryBlock &block) const {
  uint32_t chunk_index = offset >> 32;
  uint32_t chunk_offset = offset & 0xFFFFFFFF;

  auto loc = get_sparse_chunk_loc(chunk_index, chunk_offset);
  ailego_assert_with(loc.first < sparse_node_chunks_.size(),
                     "invalid chunk idx");
  ailego_assert_with(loc.second < sparse_node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");

  size_t ret = sparse_node_chunks_[loc.first]->read(loc.second, block, len);
  if (ailego_unlikely(ret != len)) {
    LOG_ERROR("Read sparse vector failed, offset=%zu, read size=%u, ret=%zu",
              (size_t)offset, len, ret);
    return IndexError_ReadData;
  }
  return 0;
}

//! Get sparse data from id
const void *HnswSparseStreamerEntity::get_sparse_data(node_id_t id) const {
  auto sparse_data = get_sparse_data_from_vector(get_vector_meta(id));

  return sparse_data.first;
}

int HnswSparseStreamerEntity::get_sparse_data(
    node_id_t id, IndexStorage::MemoryBlock &block) const {
  IndexStorage::MemoryBlock meta_block;
  get_vector_meta(id, meta_block);
  int sparse_length = 0;
  return get_sparse_data_from_vector(meta_block.data(), block, sparse_length);
}

//! Get sparse data from vector
std::pair<const void *, uint32_t>
HnswSparseStreamerEntity::get_sparse_data_from_vector(const void *vec) const {
  const char *vec_ptr = reinterpret_cast<const char *>(vec);

  uint64_t offset = *((uint64_t *)(vec_ptr));
  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));

  if (sparse_vector_len > 0) {
    const void *sparse_data = get_sparse_data(offset, sparse_vector_len);
    if (ailego_unlikely(sparse_data == nullptr)) {
      LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)offset,
                sparse_vector_len);

      return std::make_pair(nullptr, 0);
    }

    return std::make_pair(sparse_data, sparse_vector_len);
  }

  return std::make_pair(nullptr, 0);
}

int HnswSparseStreamerEntity::get_sparse_data_from_vector(
    const void *vec, IndexStorage::MemoryBlock &block,
    int &sparse_length) const {
  const char *vec_ptr = reinterpret_cast<const char *>(vec);

  uint64_t offset = *((uint64_t *)(vec_ptr));
  uint32_t sparse_vector_len = *((uint32_t *)(vec_ptr + sizeof(uint64_t)));

  if (sparse_vector_len > 0) {
    int ret = get_sparse_data(offset, sparse_vector_len, block);
    if (ailego_unlikely(ret != 0)) {
      LOG_ERROR("Get nullptr sparse, offset=%zu, len=%u", (size_t)offset,
                sparse_vector_len);
      return IndexError_ReadData;
    }
    sparse_length = sparse_vector_len;
  }
  return 0;
}

key_t HnswSparseStreamerEntity::get_key(node_id_t id) const {
  auto loc = get_key_chunk_loc(id);
  IndexStorage::MemoryBlock key_block;
  ailego_assert_with(loc.first < node_chunks_.size(), "invalid chunk idx");
  ailego_assert_with(loc.second < node_chunks_[loc.first]->data_size(),
                     "invalid chunk offset");
  size_t ret =
      node_chunks_[loc.first]->read(loc.second, key_block, sizeof(key_t));
  if (ailego_unlikely(ret != sizeof(key_t))) {
    LOG_ERROR("Read vector failed, ret=%zu", ret);
    return kInvalidKey;
  }

  return *reinterpret_cast<const key_t *>(key_block.data());
}

void HnswSparseStreamerEntity::add_neighbor(level_t level, node_id_t id,
                                            uint32_t size,
                                            node_id_t neighbor_id) {
  auto loc = get_neighbor_chunk_loc(level, id);
  size_t offset =
      loc.second + sizeof(NeighborsHeader) + size * sizeof(node_id_t);
  ailego_assert_with(size < neighbor_cnt(level), "invalid neighbor size");
  ailego_assert_with(offset < loc.first->data_size(), "invalid chunk offset");
  size_t ret = loc.first->write(offset, &neighbor_id, sizeof(node_id_t));
  if (ailego_unlikely(ret != sizeof(node_id_t))) {
    LOG_ERROR("Write neighbor id failed, ret=%zu", ret);
    return;
  }

  uint32_t neighbors = size + 1;
  ret = loc.first->write(loc.second, &neighbors, sizeof(uint32_t));
  if (ailego_unlikely(ret != sizeof(uint32_t))) {
    LOG_ERROR("Write neighbor cnt failed, ret=%zu", ret);
  }

  return;
}

int HnswSparseStreamerEntity::init_chunks(
    const SparseChunk::Pointer &header_chunk) {
  if (header_chunk->data_size() < header_size()) {
    LOG_ERROR("Invalid header chunk size");
    return IndexError_InvalidFormat;
  }
  IndexStorage::MemoryBlock data_block;
  size_t size = header_chunk->read(0UL, data_block, header_size());
  if (ailego_unlikely(size != header_size())) {
    LOG_ERROR("Read header chunk failed");
    return IndexError_ReadData;
  }
  *mutable_header() =
      *reinterpret_cast<const HNSWSparseHeader *>(data_block.data());

  int ret = check_hnsw_index(&header());
  if (ret != 0) {
    broker_->close();
    return ret;
  }

  node_chunks_.resize(
      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NODE));
  for (auto seq = 0UL; seq < node_chunks_.size(); ++seq) {
    node_chunks_[seq] =
        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_NODE, seq);
    if (!node_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer data chunk %zu th of %zu", seq,
                node_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  upper_neighbor_chunks_.resize(
      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR));
  for (auto seq = 0UL; seq < upper_neighbor_chunks_.size(); ++seq) {
    upper_neighbor_chunks_[seq] =
        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, seq);
    if (!upper_neighbor_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer index chunk %zu th of %zu", seq,
                upper_neighbor_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  sparse_node_chunks_.resize(
      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE));
  for (auto seq = 0UL; seq < sparse_node_chunks_.size(); ++seq) {
    sparse_node_chunks_[seq] =
        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE, seq);
    if (!sparse_node_chunks_[seq]) {
      LOG_ERROR("Missing hnsw streamer sparse data chunk %zu th of %zu", seq,
                sparse_node_chunks_.size());
      return IndexError_InvalidFormat;
    }
  }

  return 0;
}

int HnswSparseStreamerEntity::open(IndexStorage::Pointer stg, bool check_crc) {
  std::lock_guard<std::mutex> lock(mutex_);
  int ret =
      broker_->open(std::move(stg), max_index_size_, chunk_size_, check_crc);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Open index failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = upper_neighbor_index_->init(broker_, upper_neighbor_chunk_size_,
                                    scaling_factor(), estimate_doc_capacity(),
                                    kUpperHashMemoryInflateRatio);
  if (ailego_unlikely(ret != 0)) {
    LOG_ERROR("Init neighbor hash map failed");
    return ret;
  }

  //! init header
  auto header_chunk = broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,
                                         SparseChunkBroker::kDefaultChunkSeqId);
  if (!header_chunk) {  // open empty index, create one
    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,
                                  SparseChunkBroker::kDefaultChunkSeqId,
                                  header_size());
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc header chunk failed");
      return p.first;
    }
    size_t size = p.second->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }
    return 0;
  }

  //! Open an exist hnsw index
  ret = init_chunks(header_chunk);
  if (ailego_unlikely(ret != 0)) {
    return ret;
  }

  //! total docs including features wrote in index but neighbors may not ready
  node_id_t total_vecs = 0;
  if (node_chunks_.size() > 0) {
    size_t last_idx = node_chunks_.size() - 1;
    auto last_chunk = node_chunks_[last_idx];
    if (last_chunk->data_size() % node_size()) {
      LOG_WARN("The index may broken");
      return IndexError_InvalidFormat;
    }
    total_vecs = last_idx * node_cnt_per_chunk_ +
                 node_chunks_[last_idx]->data_size() / node_size();
  }

  LOG_INFO(
      "Open index, l0NeighborCnt=%zu upperneighborCnt=%zu "
      "efConstruction=%zu curDocCnt=%u totalVecs=%u maxLevel=%u",
      l0_neighbor_cnt(), upper_neighbor_cnt(), ef_construction(), doc_cnt(),
      total_vecs, cur_max_level());
  //! try to correct the docCnt if index not fully flushed
  if (doc_cnt() != total_vecs) {
    LOG_WARN("Index closed abnormally, using totalVecs as curDocCnt");
    *mutable_doc_cnt() = total_vecs;
  }
  if (filter_same_key_ || get_vector_enabled_) {
    for (node_id_t id = 0U; id < doc_cnt(); ++id) {
      (*keys_map_)[get_key(id)] = id;
    }
  }

  stats_.set_loaded_count(doc_cnt());

  return 0;
}

int HnswSparseStreamerEntity::close() {
  LOG_DEBUG("close index");

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  mutable_header()->reset();
  upper_neighbor_index_->cleanup();
  keys_map_->clear();
  header_.clear();
  node_chunks_.clear();
  upper_neighbor_chunks_.clear();

  sparse_node_chunks_.clear();

  return broker_->close();
}

int HnswSparseStreamerEntity::flush(uint64_t checkpoint) {
  LOG_INFO("Flush index, curDocs=%u", doc_cnt());

  std::lock_guard<std::mutex> lock(mutex_);
  flush_header();
  int ret = broker_->flush(checkpoint);
  if (ret != 0) {
    return ret;
  }

  return 0;
}

int HnswSparseStreamerEntity::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("Dump index, curDocs=%u", doc_cnt());

  //! sort by keys, to support get_vector by key in searcher
  std::vector<key_t> keys(doc_cnt());
  for (node_id_t i = 0; i < doc_cnt(); ++i) {
    keys[i] = get_key(i);
  }

  //! dump neighbors
  auto get_level = [&](node_id_t id) {
    auto it = upper_neighbor_index_->find(id);
    if (it == upper_neighbor_index_->end()) {
      return 0U;
    };
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    return meta->level;
  };
  auto ret = dump_segments(dumper, keys.data(), get_level);
  if (ailego_unlikely(ret < 0)) {
    return ret;
  }
  *stats_.mutable_dumped_size() += ret;

  return 0;
}

int HnswSparseStreamerEntity::check_hnsw_index(
    const HNSWSparseHeader *hd) const {
  if (l0_neighbor_cnt() != hd->neighbor_cnt() ||
      upper_neighbor_cnt() != hd->upper_neighbor_cnt()) {
    LOG_ERROR("Param neighbors:%zu:%zu mismatch index previous %zu:%zu",
              l0_neighbor_cnt(), upper_neighbor_cnt(), hd->neighbor_cnt(),
              hd->upper_neighbor_cnt());
    return IndexError_Mismatch;
  }
  if (ef_construction() != hd->ef_construction()) {
    LOG_WARN("Param efConstruction %zu mismatch index previous %zu",
             ef_construction(), hd->ef_construction());
  }
  if (scaling_factor() != hd->scaling_factor()) {
    LOG_WARN("Param scalingFactor %zu mismatch index previous %zu",
             scaling_factor(), hd->scaling_factor());
    return IndexError_Mismatch;
  }
  if (prune_cnt() != hd->neighbor_prune_cnt()) {
    LOG_WARN("Param pruneCnt %zu mismatch index previous %zu", prune_cnt(),
             hd->neighbor_prune_cnt());
    return IndexError_Mismatch;
  }
  if ((hd->entry_point() != kInvalidNodeId &&
       hd->entry_point() >= hd->doc_cnt()) ||
      (hd->entry_point() == kInvalidNodeId && hd->doc_cnt() > 0U)) {
    LOG_WARN("Invalid entryPoint %u, docCnt %u", hd->entry_point(),
             hd->doc_cnt());
    return IndexError_InvalidFormat;
  }
  if (hd->entry_point() == kInvalidNodeId &&
      broker_->get_chunk_cnt(SparseChunkBroker::CHUNK_TYPE_NODE) > 0) {
    LOG_WARN("The index is broken, maybe it haven't flush");
    return IndexError_InvalidFormat;
  }

  return 0;
}

int HnswSparseStreamerEntity::add_vector(level_t level, key_t key,
                                         const std::string &sparse_vec,
                                         uint32_t sparse_count, node_id_t *id) {
  // allocat sparse chunk
  uint32_t sparse_vector_len = sparse_vec.size();

  sparse_vector_len = AlignSize(sparse_vector_len);

  if (sparse_vector_len > sparse_chunk_size_) {
    LOG_ERROR(
        "Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk "
        "size: %u",
        sparse_vector_len, sparse_chunk_size_);
    return IndexError_InvalidArgument;
  }

  SparseChunk::Pointer node_chunk;
  SparseChunk::Pointer sparse_node_chunk;

  size_t chunk_offset = -1UL;
  size_t sparse_chunk_offset = -1UL;

  std::lock_guard<std::mutex> lock(mutex_);
  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  node_id_t local_id = static_cast<node_id_t>(doc_cnt());

  uint32_t chunk_index = node_chunks_.size() - 1U;
  if (chunk_index == -1U ||
      (node_chunks_[chunk_index]->data_size() >=
       node_cnt_per_chunk_ * node_size())) {  // no space left and need to alloc
    if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }
    chunk_index++;
    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NODE,
                                  chunk_index, chunk_size_);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return p.first;
    }
    node_chunk = p.second;
    chunk_offset = 0UL;
    node_chunks_.emplace_back(node_chunk);
  } else {
    node_chunk = node_chunks_[chunk_index];
    chunk_offset = node_chunk->data_size();
  }

  uint32_t sparse_chunk_index = sparse_node_chunks_.size() - 1U;
  if (sparse_chunk_index == -1U ||
      sparse_node_chunks_[sparse_chunk_index]->data_size() + sparse_vector_len >
          sparse_chunk_size_) {
    if (ailego_unlikely(sparse_node_chunks_.capacity() ==
                        sparse_node_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }
    sparse_chunk_index++;
    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE,
                                  sparse_chunk_index, sparse_chunk_size_);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return p.first;
    }
    sparse_node_chunk = p.second;

    sparse_node_chunks_.emplace_back(sparse_node_chunk);

    sparse_chunk_offset = 0UL;
  } else {
    sparse_node_chunk = sparse_node_chunks_[sparse_chunk_index];
    sparse_chunk_offset = sparse_node_chunk->data_size();
  }

  // write sparse vector
  if (sparse_vec.size() > 0) {
    size_t size = sparse_node_chunk->write(
        sparse_chunk_offset, sparse_vec.data(), sparse_vec.size());
    if (ailego_unlikely(size != sparse_vec.size())) {
      LOG_ERROR("SparseChunk write sparse vec failed, ret=%zu", size);
      return IndexError_WriteData;
    }
  }

  uint64_t sparse_offset = sparse_chunk_index;
  sparse_offset = (sparse_offset << 32) + sparse_chunk_offset;

  size_t size =
      node_chunk->write(chunk_offset, &sparse_offset, sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("SparseChunk write sparse vec index failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size = node_chunk->write(chunk_offset + sizeof(uint64_t), &sparse_vector_len,
                           sizeof(uint32_t));
  if (ailego_unlikely(size != sizeof(uint32_t))) {
    LOG_ERROR("SparseChunk write sparse vec len failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size =
      node_chunk->write(chunk_offset + sparse_meta_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("SparseChunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  //! level 0 neighbors is inited to zero by default
  int ret = add_upper_neighbor(level, local_id);
  if (ret != 0) {
    return ret;
  }

  if (sparse_vector_len > 0) {
    sparse_chunk_offset += sparse_vector_len;
    if (ailego_unlikely(sparse_node_chunk->resize(sparse_chunk_offset) !=
                        sparse_chunk_offset)) {
      LOG_ERROR("SparseChunk resize to %zu failed", sparse_chunk_offset);
      return IndexError_Runtime;
    }
  }

  chunk_offset += node_size();
  if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
    LOG_ERROR("SparseChunk resize to %zu failed", chunk_offset);
    return IndexError_Runtime;
  }

  if (filter_same_key_ || get_vector_enabled_) {
    keys_map_lock_->lock();
    (*keys_map_)[key] = local_id;
    keys_map_lock_->unlock();
  }

  *mutable_doc_cnt() += 1;
  *mutable_total_sparse_count() += sparse_count;

  broker_->mark_dirty();
  *id = local_id;

  return 0;
}

int HnswSparseStreamerEntity::add_vector_with_id(level_t level, node_id_t id,
                                                 const std::string &sparse_vec,
                                                 uint32_t sparse_count) {
  key_t key = id;
  SparseChunk::Pointer node_chunk;
  SparseChunk::Pointer sparse_node_chunk;
  size_t chunk_offset = -1UL;
  size_t sparse_chunk_offset = -1UL;

  // allocat sparse chunk
  uint32_t sparse_vector_len = sparse_vec.size();

  sparse_vector_len = AlignSize(sparse_vector_len);

  if (sparse_vector_len > sparse_chunk_size_) {
    LOG_ERROR(
        "Sparse Vector Length exceed the chunk size, sparse vec len: %u, chunk "
        "size: %u",
        sparse_vector_len, sparse_chunk_size_);
    return IndexError_InvalidArgument;
  }


  std::lock_guard<std::mutex> lock(mutex_);

  // duplicate check
  if (ailego_unlikely(filter_same_key_ && get_id(key) != kInvalidNodeId)) {
    LOG_WARN("Try to add duplicate key, ignore it");
    return IndexError_Duplicate;
  }

  auto func_get_sparse_node_chunk_and_offset = [&](node_id_t node_id) -> int {
    uint32_t chunk_index = node_id >> node_index_mask_bits_;
    ailego_assert_with(chunk_index <= node_chunks_.size(), "invalid chunk idx");
    // belongs to next chunk
    if (chunk_index == node_chunks_.size()) {
      if (ailego_unlikely(node_chunks_.capacity() == node_chunks_.size())) {
        LOG_ERROR("add vector failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_NODE,
                                    chunk_index, chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      node_chunk = p.second;
      node_chunks_.emplace_back(node_chunk);
    }

    node_chunk = node_chunks_[chunk_index];
    chunk_offset = (node_id & node_index_mask_) * node_size();
    return 0;
  };

  for (size_t start_id = doc_cnt(); start_id < id; ++start_id) {
    if (auto ret = func_get_sparse_node_chunk_and_offset(start_id); ret != 0) {
      LOG_ERROR("func_get_sparse_node_chunk_and_offset failed");
      return ret;
    }
    size_t size = node_chunk->write(chunk_offset + sparse_meta_size(),
                                    &kInvalidKey, sizeof(key_t));
    if (ailego_unlikely(size != sizeof(key_t))) {
      LOG_ERROR("SparseChunk write key failed, ret=%zu", size);
      return IndexError_WriteData;
    }

    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("SparseChunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (auto ret = func_get_sparse_node_chunk_and_offset(id); ret != 0) {
    LOG_ERROR("func_get_sparse_node_chunk_and_offset failed");
    return ret;
  }

  uint32_t sparse_chunk_index = sparse_node_chunks_.size() - 1U;
  if (sparse_chunk_index == -1U ||
      sparse_node_chunks_[sparse_chunk_index]->data_size() + sparse_vector_len >
          sparse_chunk_size_) {
    if (ailego_unlikely(sparse_node_chunks_.capacity() ==
                        sparse_node_chunks_.size())) {
      LOG_ERROR("add vector failed for no memory quota");
      return IndexError_IndexFull;
    }
    sparse_chunk_index++;
    auto p = broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE,
                                  sparse_chunk_index, sparse_chunk_size_);
    if (ailego_unlikely(p.first != 0)) {
      LOG_ERROR("Alloc data chunk failed");
      return p.first;
    }
    sparse_node_chunk = p.second;

    sparse_node_chunks_.emplace_back(sparse_node_chunk);

    sparse_chunk_offset = 0UL;
  } else {
    sparse_node_chunk = sparse_node_chunks_[sparse_chunk_index];
    sparse_chunk_offset = sparse_node_chunk->data_size();
  }

  // write sparse vector
  if (sparse_vec.size() > 0) {
    size_t size = sparse_node_chunk->write(
        sparse_chunk_offset, sparse_vec.data(), sparse_vec.size());
    if (ailego_unlikely(size != sparse_vec.size())) {
      LOG_ERROR("SparseChunk write sparse vec failed, ret=%zu", size);
      return IndexError_WriteData;
    }
  }

  uint64_t sparse_offset = sparse_chunk_index;
  sparse_offset = (sparse_offset << 32) + sparse_chunk_offset;

  size_t size =
      node_chunk->write(chunk_offset, &sparse_offset, sizeof(uint64_t));
  if (ailego_unlikely(size != sizeof(uint64_t))) {
    LOG_ERROR("SparseChunk write sparse vec index failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size = node_chunk->write(chunk_offset + sizeof(uint64_t), &sparse_vector_len,
                           sizeof(uint32_t));
  if (ailego_unlikely(size != sizeof(uint32_t))) {
    LOG_ERROR("SparseChunk write sparse vec len failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  size =
      node_chunk->write(chunk_offset + sparse_meta_size(), &key, sizeof(key_t));
  if (ailego_unlikely(size != sizeof(key_t))) {
    LOG_ERROR("SparseChunk write vec failed, ret=%zu", size);
    return IndexError_WriteData;
  }

  //! level 0 neighbors is inited to zero by default
  int ret = add_upper_neighbor(level, id);
  if (ret != 0) {
    return ret;
  }

  if (sparse_vector_len > 0) {
    sparse_chunk_offset += sparse_vector_len;
    if (ailego_unlikely(sparse_node_chunk->resize(sparse_chunk_offset) !=
                        sparse_chunk_offset)) {
      LOG_ERROR("SparseChunk resize to %zu failed", sparse_chunk_offset);
      return IndexError_Runtime;
    }
  }

  if (*mutable_doc_cnt() <= id) {
    *mutable_doc_cnt() = id + 1;
    chunk_offset += node_size();
    if (ailego_unlikely(node_chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("Chunk resize to %zu failed", chunk_offset);
      return IndexError_Runtime;
    }
  }
  *mutable_total_sparse_count() += sparse_count;

  if (filter_same_key_ || get_vector_enabled_) {
    keys_map_lock_->lock();
    (*keys_map_)[key] = id;
    keys_map_lock_->unlock();
  }

  broker_->mark_dirty();

  return 0;
}

void HnswSparseStreamerEntity::update_ep_and_level(node_id_t ep,
                                                   level_t level) {
  HnswSparseEntity::update_ep_and_level(ep, level);
  flush_header();

  return;
}

const HnswSparseEntity::Pointer HnswSparseStreamerEntity::clone() const {
  std::vector<SparseChunk::Pointer> node_chunks;
  node_chunks.reserve(node_chunks_.size());
  for (size_t i = 0UL; i < node_chunks_.size(); ++i) {
    node_chunks.emplace_back(node_chunks_[i]->clone());
    if (ailego_unlikely(!node_chunks[i])) {
      LOG_ERROR("HnswSparseStreamerEntity get chunk failed in clone");
      return HnswSparseEntity::Pointer();
    }
  }

  std::vector<SparseChunk::Pointer> sparse_node_chunks;
  sparse_node_chunks.reserve(sparse_node_chunks_.size());
  for (size_t i = 0UL; i < sparse_node_chunks_.size(); ++i) {
    sparse_node_chunks.emplace_back(sparse_node_chunks_[i]->clone());
    if (ailego_unlikely(!sparse_node_chunks[i])) {
      LOG_ERROR("HnswSparseStreamerEntity get sparse chunk failed in clone");
      return HnswSparseEntity::Pointer();
    }
  }

  std::vector<SparseChunk::Pointer> upper_neighbor_chunks;
  upper_neighbor_chunks.reserve(upper_neighbor_chunks_.size());
  for (size_t i = 0UL; i < upper_neighbor_chunks_.size(); ++i) {
    upper_neighbor_chunks.emplace_back(upper_neighbor_chunks_[i]->clone());
    if (ailego_unlikely(!upper_neighbor_chunks[i])) {
      LOG_ERROR("HnswSparseStreamerEntity get chunk failed in clone");
      return HnswSparseEntity::Pointer();
    }
  }

  HnswSparseStreamerEntity *entity =
      new (std::nothrow) HnswSparseStreamerEntity(
          stats_, header(), chunk_size_, node_index_mask_bits_,
          upper_neighbor_mask_bits_, filter_same_key_, get_vector_enabled_,
          sparse_chunk_size_, upper_neighbor_index_, keys_map_lock_, keys_map_,
          std::move(node_chunks), std::move(upper_neighbor_chunks),
          std::move(sparse_node_chunks), broker_);
  if (ailego_unlikely(!entity)) {
    LOG_ERROR("HnswSparseStreamerEntity new failed");
  }
  return HnswSparseEntity::Pointer(entity);
}

//! Get sparse vector feature data by key
int HnswSparseStreamerEntity::get_sparse_vector_by_key(
    key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  *sparse_count = 0;

  auto id = get_id(key);
  if (id == kInvalidNodeId) {
    return IndexError_NoExist;
  }

  return get_sparse_vector_by_id(id, sparse_count, sparse_indices_buffer,
                                 sparse_values_buffer);
}

int HnswSparseStreamerEntity::get_sparse_vector_by_id(
    node_id_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,
    std::string *sparse_values_buffer) const {
  IndexStorage::MemoryBlock block;
  get_sparse_data(id, block);
  const void *sparse_data = block.data();
  if (sparse_data == nullptr) {
    return IndexError_InvalidValue;
  }

  SparseUtility::ReverseSparseFormat(sparse_data, sparse_count,
                                     sparse_indices_buffer,
                                     sparse_values_buffer, sparse_unit_size());

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <iostream>
#include <ailego/parallel/lock.h>
#include <sparsehash/dense_hash_map>
#include <sparsehash/dense_hash_set>
#include <zvec/ailego/container/heap.h>
#include <zvec/core/framework/index_framework.h>
#include "hnsw_sparse_chunk.h"
#include "hnsw_sparse_entity.h"
#include "hnsw_sparse_index_hash.h"
#include "hnsw_sparse_params.h"

namespace zvec {
namespace core {

//! HnswSparseStreamerEntity manage vector data, pkey, and node's neighbors
class HnswSparseStreamerEntity : public HnswSparseEntity {
 public:
  //! Cleanup
  //! return 0 on success, or errCode in failure
  virtual int cleanup() override;

  //! Make a copy of streamer entity, to support thread-safe operation.
  //! The segment in container cannot be read concurrenly
  virtual const HnswSparseEntity::Pointer clone() const override;

  //! Get primary key of the node id
  virtual key_t get_key(node_id_t id) const override;

  //! Get vector feature data by key
  virtual const void *get_vector_meta(node_id_t id) const override;

  virtual int get_vector_meta(const node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Get vectors feature data by local ids
  virtual int get_vector_metas(const node_id_t *ids, uint32_t count,
                               const void **vecs) const override;
  virtual int get_vector_metas(
      const node_id_t *ids, uint32_t count,
      std::vector<IndexStorage::MemoryBlock> &block_vecs) const override;

  //! Get vector sparse feature data by chunk index and offset
  virtual const void *get_sparse_data(uint64_t offset,
                                      uint32_t len) const override;

  //! Get sparse data from id
  virtual const void *get_sparse_data(node_id_t id) const override;

  virtual int get_sparse_data(uint64_t offset, uint32_t len,
                              IndexStorage::MemoryBlock &block) const override;
  virtual int get_sparse_data(node_id_t id,
                              IndexStorage::MemoryBlock &block) const override;

  //! Get sparse data from vector
  virtual std::pair<const void *, uint32_t> get_sparse_data_from_vector(
      const void *vec) const override;
  virtual int get_sparse_data_from_vector(const void *vec,
                                          IndexStorage::MemoryBlock &block,
                                          int &sparse_length) const override;

  //! Get sparse vector feature data by key
  virtual int get_sparse_vector_by_key(
      key_t key, uint32_t *sparse_count, std::string *sparse_indices_buffer,
      std::string *sparse_values_buffer) const override;

  //! Get sparse vector feature data by id
  virtual int get_sparse_vector_by_id(
      node_id_t id, uint32_t *sparse_count, std::string *sparse_indices_buffer,
      std::string *sparse_values_buffer) const override;

  //! Get the node id's neighbors on graph level
  //! Note: the neighbors cannot be modified, using the following
  //! method to get WritableNeighbors if want to
  virtual const Neighbors get_neighbors(level_t level,
                                        node_id_t id) const override;


  //! Add vector and key to hnsw entity, and local id will be saved in id
  virtual int add_vector(level_t level, key_t key,
                         const std::string &sparse_vec_buffer,
                         uint32_t sparse_count, node_id_t *id) override;

  //! Add vector and id to hnsw entity
  virtual int add_vector_with_id(level_t level, node_id_t id,
                                 const std::string &sparse_vec,
                                 uint32_t sparse_count) override;

  virtual int update_neighbors(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors) override;

  //! Replace node id in level's neighbors
  int update_neighbors_dense(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors);

  //! Replace node id in level's neighbors
  int update_neighbors_sparse(
      level_t level, node_id_t id,
      const std::vector<std::pair<node_id_t, dist_t>> &neighbors);

  //! Append neighbor_id to node id neighbors on level
  //! Notice: the caller must be ensure the neighbors not full
  virtual void add_neighbor(level_t level, node_id_t id, uint32_t size,
                            node_id_t neighbor_id) override;

  //! Dump index by dumper
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  virtual void update_ep_and_level(node_id_t ep, level_t level) override;

 public:
  //! Constructor
  HnswSparseStreamerEntity(IndexStreamer::Stats &stats);

  //! Destructor
  ~HnswSparseStreamerEntity();

  //! Init entity
  int init(uint64_t max_index_size, size_t max_doc_cnt);

  //! Flush graph entity to disk
  //! return 0 on success, or errCode in failure
  int flush(uint64_t checkpoint);

  //! Open entity from storage
  //! return 0 on success, or errCode in failure
  int open(IndexStorage::Pointer stg, bool check_crc);

  //! Close entity
  //! return 0 on success, or errCode in failure
  int close();

  //! Set meta information from entity
  int set_index_meta(const IndexMeta &meta) const {
    return IndexHelper::SerializeToStorage(meta, broker_->storage().get());
  }

  //! Get meta information from entity
  int get_index_meta(IndexMeta *meta) const {
    return IndexHelper::DeserializeFromStorage(broker_->storage().get(), meta);
  }

  //! Set params: chunk size
  inline void set_chunk_size(size_t val) {
    chunk_size_ = val;
  }

  //! Set params
  inline void set_filter_same_key(bool val) {
    filter_same_key_ = val;
  }

  //! Set params
  inline void set_get_vector(bool val) {
    get_vector_enabled_ = val;
  }

  //! Get vector local id by key
  inline node_id_t get_id(key_t key) const {
    keys_map_lock_->lock_shared();
    auto it = keys_map_->find(key);
    keys_map_lock_->unlock_shared();
    return it == keys_map_->end() ? kInvalidNodeId : it->second;
  }

  void print_key_map() {
    std::cout << "key map begins" << std::endl;

    auto iter = keys_map_->begin();
    while (iter != keys_map_->end()) {
      std::cout << "key: " << iter->first << ", id: " << iter->second
                << std::endl;
      ;
      iter++;
    }

    std::cout << "key map ends" << std::endl;
  }

  //! Get neighbors size
  inline size_t neighbors_size() const {
    return sizeof(NeighborsHeader) + l0_neighbor_cnt() * sizeof(node_id_t);
  }

  //! Get upper neighbors size
  inline size_t upper_neighbors_size() const {
    return sizeof(NeighborsHeader) + upper_neighbor_cnt() * sizeof(node_id_t);
  }

 private:
  union UpperNeighborIndexMeta {
    struct {
      uint32_t level : 4;
      uint32_t index : 28;  // index is composite type: chunk idx, and the
                            // N th neighbors in chunk, they two composite
                            // the 28 bits location
    };
    uint32_t data;
  };

  template <class Key, class T>
  using HashMap = google::dense_hash_map<Key, T, std::hash<Key>>;
  template <class Key, class T>
  using HashMapPointer = std::shared_ptr<HashMap<Key, T>>;

  template <class Key>
  using HashSet = google::dense_hash_set<Key, std::hash<Key>>;
  template <class Key>
  using HashSetPointer = std::shared_ptr<HashSet<Key>>;

  //! upper neighbor index hashmap
  using NIHashMap = HnswSparseIndexHashMap<node_id_t, uint32_t>;
  using NIHashMapPointer = std::shared_ptr<NIHashMap>;

  //! Private construct, only be called by clone method
  HnswSparseStreamerEntity(
      IndexStreamer::Stats &stats, const HNSWSparseHeader &hd,
      size_t chunk_size, uint32_t node_index_mask_bits,
      uint32_t upper_neighbor_mask_bits, bool filter_same_key,
      bool get_vector_enabled, uint32_t sparse_chunk_size,
      const NIHashMapPointer &upper_neighbor_index,
      std::shared_ptr<ailego::SharedMutex> &keys_map_lock,
      const HashMapPointer<key_t, node_id_t> &keys_map,
      std::vector<SparseChunk::Pointer> &&node_chunks,
      std::vector<SparseChunk::Pointer> &&upper_neighbor_chunks,
      std::vector<SparseChunk::Pointer> &&sparse_node_chunks,
      const SparseChunkBroker::Pointer &broker)
      : stats_(stats),
        chunk_size_(chunk_size),
        node_index_mask_bits_(node_index_mask_bits),
        node_cnt_per_chunk_(1UL << node_index_mask_bits_),
        node_index_mask_(node_cnt_per_chunk_ - 1),
        upper_neighbor_mask_bits_(upper_neighbor_mask_bits),
        upper_neighbor_mask_((1U << upper_neighbor_mask_bits_) - 1),
        filter_same_key_(filter_same_key),
        get_vector_enabled_(get_vector_enabled),
        sparse_chunk_size_(sparse_chunk_size),
        upper_neighbor_index_(upper_neighbor_index),
        keys_map_lock_(keys_map_lock),
        keys_map_(keys_map),
        node_chunks_(std::move(node_chunks)),
        upper_neighbor_chunks_(std::move(upper_neighbor_chunks)),
        sparse_node_chunks_(std::move(sparse_node_chunks)),
        broker_(broker) {
    *mutable_header() = hd;

    neighbor_size_ = neighbors_size();
    upper_neighbor_size_ = upper_neighbors_size();
  }

  //! Called only in searching procedure per context, so no need to lock
  void sync_chunks(SparseChunkBroker::CHUNK_TYPE type, size_t idx,
                   std::vector<SparseChunk::Pointer> *chunks) const {
    if (ailego_likely(idx < chunks->size())) {
      return;
    }
    for (size_t i = chunks->size(); i <= idx; ++i) {
      auto chunk = broker_->get_chunk(type, i);
      // the storage can ensure get chunk will success after the first get
      ailego_assert_with(!!chunk, "get chunk failed");
      chunks->emplace_back(std::move(chunk));
    }
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_vector_chunk_loc(
      node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size();

    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_key_chunk_loc(node_id_t id) const {
    uint32_t chunk_idx = id >> node_index_mask_bits_;
    uint32_t offset = (id & node_index_mask_) * node_size() + vector_size();

    offset += sparse_meta_size();

    sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk index + chunk offset
  inline std::pair<uint32_t, uint32_t> get_sparse_chunk_loc(
      uint32_t chunk_index, uint32_t chunk_offset) const {
    sync_chunks(SparseChunkBroker::CHUNK_TYPE_SPARSE_NODE, chunk_index,
                &sparse_node_chunks_);

    return std::make_pair(chunk_index, chunk_offset);
  }

  inline std::pair<uint32_t, uint32_t> get_upper_neighbor_chunk_loc(
      level_t level, node_id_t id) const {
    auto it = upper_neighbor_index_->find(id);
    ailego_assert_abort(it != upper_neighbor_index_->end(),
                        "Get upper neighbor header failed");
    auto meta = reinterpret_cast<const UpperNeighborIndexMeta *>(&it->second);
    uint32_t chunk_idx = (meta->index) >> upper_neighbor_mask_bits_;
    uint32_t offset = (((meta->index) & upper_neighbor_mask_) + level - 1) *
                      upper_neighbor_size_;
    sync_chunks(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR, chunk_idx,
                &upper_neighbor_chunks_);
    ailego_assert_abort(chunk_idx < upper_neighbor_chunks_.size(),
                        "invalid chunk idx");
    ailego_assert_abort(offset < upper_neighbor_chunks_[chunk_idx]->data_size(),
                        "invalid chunk offset");
    return std::make_pair(chunk_idx, offset);
  }

  //! return pair: chunk + chunk offset
  inline std::pair<SparseChunk *, size_t> get_neighbor_chunk_loc(
      level_t level, node_id_t id) const {
    if (level == 0UL) {
      uint32_t chunk_idx = id >> node_index_mask_bits_;
      uint32_t offset =
          (id & node_index_mask_) * node_size() + vector_size() + sizeof(key_t);

      offset += sparse_meta_size();

      sync_chunks(SparseChunkBroker::CHUNK_TYPE_NODE, chunk_idx, &node_chunks_);
      ailego_assert_abort(chunk_idx < node_chunks_.size(), "invalid chunk idx");
      ailego_assert_abort(offset < node_chunks_[chunk_idx]->data_size(),
                          "invalid chunk offset");
      return std::make_pair(node_chunks_[chunk_idx].get(), offset);
    } else {
      auto p = get_upper_neighbor_chunk_loc(level, id);
      return std::make_pair(upper_neighbor_chunks_[p.first].get(), p.second);
    }
  }

  //! Chunk hnsw index valid
  int check_hnsw_index(const HNSWSparseHeader *hd) const;

  size_t get_total_upper_neighbors_size(level_t level) const {
    return level * upper_neighbor_size_;
  }

  //! Add upper neighbor header and reserve space for upper neighbor
  int add_upper_neighbor(level_t level, node_id_t id) {
    if (level == 0) {
      return 0;
    }
    SparseChunk::Pointer chunk;
    uint64_t chunk_offset = -1UL;
    size_t neighbors_size = get_total_upper_neighbors_size(level);
    uint64_t chunk_index = upper_neighbor_chunks_.size() - 1UL;
    if (chunk_index == -1UL ||
        (upper_neighbor_chunks_[chunk_index]->padding_size() <
         neighbors_size)) {  // no space left and need to alloc
      chunk_index++;
      if (ailego_unlikely(upper_neighbor_chunks_.capacity() ==
                          upper_neighbor_chunks_.size())) {
        LOG_ERROR("add upper neighbor failed for no memory quota");
        return IndexError_IndexFull;
      }
      auto p =
          broker_->alloc_chunk(SparseChunkBroker::CHUNK_TYPE_UPPER_NEIGHBOR,
                               chunk_index, upper_neighbor_chunk_size_);
      if (ailego_unlikely(p.first != 0)) {
        LOG_ERROR("Alloc data chunk failed");
        return p.first;
      }
      chunk = p.second;
      chunk_offset = 0UL;
      upper_neighbor_chunks_.emplace_back(chunk);
    } else {
      chunk = upper_neighbor_chunks_[chunk_index];
      chunk_offset = chunk->data_size();
    }
    ailego_assert_with((size_t)level < kMaxGraphLayers, "invalid level");
    ailego_assert_with(chunk_offset % upper_neighbor_size_ == 0,
                       "invalid offset");
    ailego_assert_with((chunk_offset / upper_neighbor_size_) <
                           (1U << upper_neighbor_mask_bits_),
                       "invalid offset");
    ailego_assert_with(chunk_index < (1U << (28 - upper_neighbor_mask_bits_)),
                       "invalid chunk index");
    UpperNeighborIndexMeta meta;
    meta.level = level;
    meta.index = (chunk_index << upper_neighbor_mask_bits_) |
                 (chunk_offset / upper_neighbor_size_);
    chunk_offset += upper_neighbor_size_ * level;
    if (ailego_unlikely(!upper_neighbor_index_->insert(id, meta.data))) {
      LOG_ERROR("HashMap insert value failed");
      return IndexError_Runtime;
    }

    if (ailego_unlikely(chunk->resize(chunk_offset) != chunk_offset)) {
      LOG_ERROR("SparseChunk resize to %zu failed", (size_t)chunk_offset);
      return IndexError_Runtime;
    }

    return 0;
  }

  size_t estimate_doc_capacity() const {
    return node_chunks_.capacity() * node_cnt_per_chunk_;
  }

  int init_chunk_params(size_t max_index_size) {
    sparse_chunk_size_ = AlignPageSize(chunk_size_);

    node_cnt_per_chunk_ = std::max<uint32_t>(1, chunk_size_ / node_size());
    //! align node cnt per chunk to pow of 2
    node_index_mask_bits_ = std::ceil(std::log2(node_cnt_per_chunk_));
    node_cnt_per_chunk_ = 1UL << node_index_mask_bits_;
    chunk_size_ = AlignPageSize(node_cnt_per_chunk_ * node_size());
    node_index_mask_ = node_cnt_per_chunk_ - 1;

    if (max_index_size == 0UL) {
      max_index_size_ = chunk_size_ * kDefaultMaxChunkCnt;
    } else {
      max_index_size_ = max_index_size;
    }

    //! To get a balanced upper neighbor chunk size.
    //! If the upper chunk size is equal to node chunk size, it may waste
    //! upper neighbor chunk space; if the upper neighbor chunk size is too
    //! small, the will need large upper neighbor chunks index space. So to
    //! get a balanced ratio be sqrt of the node/neighbor size ratio
    float ratio =
        std::sqrt(node_size() * scaling_factor() * 1.0f / upper_neighbor_size_);
    upper_neighbor_chunk_size_ =
        AlignPageSize(std::max(get_total_upper_neighbors_size(kMaxGraphLayers),
                               static_cast<size_t>(chunk_size_ / ratio)));
    upper_neighbor_mask_bits_ =
        std::ceil(std::log2(upper_neighbor_chunk_size_ / upper_neighbor_size_));
    upper_neighbor_mask_ = (1 << upper_neighbor_mask_bits_) - 1;

    size_t max_node_chunk_cnt = std::ceil(max_index_size_ / chunk_size_);
    size_t max_upper_chunk_cnt = std::ceil(
        (max_node_chunk_cnt * node_cnt_per_chunk_ * 1.0f / scaling_factor()) /
        (upper_neighbor_chunk_size_ / upper_neighbor_size_));
    max_upper_chunk_cnt =
        max_upper_chunk_cnt + std::ceil(max_upper_chunk_cnt / scaling_factor());

    //! reserve space to avoid memmove in chunks vector emplace chunk, so
    //! as to lock-free in reading chunk
    node_chunks_.reserve(max_node_chunk_cnt);
    sparse_node_chunks_.reserve(max_node_chunk_cnt);
    upper_neighbor_chunks_.reserve(max_upper_chunk_cnt);

    LOG_DEBUG(
        "Settings: nodeSize=%zu chunkSize=%u upperNeighborSize=%u "
        "upperNeighborChunkSize=%u "
        "nodeCntPerChunk=%u maxChunkCnt=%zu maxNeighborChunkCnt=%zu "
        "maxIndexSize=%zu ratio=%.3f",
        node_size(), chunk_size_, upper_neighbor_size_,
        upper_neighbor_chunk_size_, node_cnt_per_chunk_, max_node_chunk_cnt,
        max_upper_chunk_cnt, max_index_size_, ratio);

    return 0;
  }

  //! Init node chunk and neighbor chunks
  int init_chunks(const SparseChunk::Pointer &header_chunk);

  int flush_header(void) {
    if (!broker_->dirty()) {
      // do not need to flush
      return 0;
    }
    auto header_chunk =
        broker_->get_chunk(SparseChunkBroker::CHUNK_TYPE_HEADER,
                           SparseChunkBroker::kDefaultChunkSeqId);
    if (ailego_unlikely(!header_chunk)) {
      LOG_ERROR("get header chunk failed");
      return IndexError_Runtime;
    }
    size_t size = header_chunk->write(0UL, &header(), header_size());
    if (ailego_unlikely(size != header_size())) {
      LOG_ERROR("Write header chunk failed");
      return IndexError_WriteData;
    }

    return 0;
  }

 private:
  HnswSparseStreamerEntity(const HnswSparseStreamerEntity &) = delete;
  HnswSparseStreamerEntity &operator=(const HnswSparseStreamerEntity &) =
      delete;
  static constexpr uint64_t kUpperHashMemoryInflateRatio = 2.0f;

 private:
  IndexStreamer::Stats &stats_;
  HNSWSparseHeader header_{};
  std::mutex mutex_{};
  size_t max_index_size_{0UL};
  uint32_t chunk_size_{kDefaultChunkSize};
  uint32_t upper_neighbor_chunk_size_{kDefaultChunkSize};
  uint32_t node_index_mask_bits_{0U};
  uint32_t node_cnt_per_chunk_{0U};
  uint32_t node_index_mask_{0U};
  uint32_t neighbor_size_{0U};
  uint32_t upper_neighbor_size_{0U};
  //! UpperNeighborIndex.index composite chunkIdx and offset in chunk by the
  //! following mask
  uint32_t upper_neighbor_mask_bits_{0U};
  uint32_t upper_neighbor_mask_{0U};
  bool filter_same_key_{false};
  bool get_vector_enabled_{false};

  uint32_t sparse_chunk_size_{kDefaultChunkSize};

  NIHashMapPointer upper_neighbor_index_{};

  mutable std::shared_ptr<ailego::SharedMutex> keys_map_lock_{};
  HashMapPointer<key_t, node_id_t> keys_map_{};

  //! the chunks will be changed in searcher, so need mutable
  //! data chunk include: vector, key, level 0 neighbors
  mutable std::vector<SparseChunk::Pointer> node_chunks_{};
  //! upper neighbor chunk inlude: UpperNeighborHeader + (1~level) neighbors
  mutable std::vector<SparseChunk::Pointer> upper_neighbor_chunks_{};
  //! chunk that holds up sparse part
  mutable std::vector<SparseChunk::Pointer> sparse_node_chunks_{};

  SparseChunkBroker::Pointer broker_{};  // chunk broker
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/ivf/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_knn_ivf STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS zvec_ailego core_framework core_knn_cluster
    INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/algorithm/ivf/ivf_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_builder.h"
#include <ailego/pattern/defer.h>
#include <zvec/ailego/utility/string_helper.h>
#include "algorithm/cluster/cluster_params.h"
#include "ivf_dumper.h"

namespace zvec {
namespace core {

/*! IndexHolder support filtered by vector labels
 */
class LabelFilteredIndexHolder : public IndexHolder {
 public:
  /*! Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const IVFBuilder::RandomAccessIndexHolder::Pointer &holder,
             const std::vector<uint32_t> *elems)
        : holder_(holder), elems_(elems) {}

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    virtual const void *data(void) const override {
      return holder_->element((*elems_)[index_]);
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return index_ < elems_->size();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return (*elems_)[index_];
    }

    //! Next iterator
    virtual void next(void) override {
      ++index_;
    }

   private:
    //! Members
    const IVFBuilder::RandomAccessIndexHolder::Pointer holder_{nullptr};
    const std::vector<uint32_t> *elems_{nullptr};
    size_t index_{0};
  };

  //! Constructor
  LabelFilteredIndexHolder(
      const IVFBuilder::RandomAccessIndexHolder::Pointer &holder,
      const std::vector<uint32_t> &items)
      : holder_(holder), elems_(&items) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  virtual size_t count(void) const override {
    return elems_->size();
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const override {
    return holder_->dimension();
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const override {
    return holder_->data_type();
  }

  //! Retrieve element size in bytes
  virtual size_t element_size(void) const override {
    return holder_->element_size();
  }

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new LabelFilteredIndexHolder::Iterator(holder_, elems_));
  }

 private:
  //! Members
  const IVFBuilder::RandomAccessIndexHolder::Pointer holder_{};
  const std::vector<uint32_t> *elems_{};
};

IVFBuilder::IVFBuilder() {}

IVFBuilder::~IVFBuilder() {
  this->cleanup();
}

int IVFBuilder::init(const IndexMeta &meta, const ailego::Params &params) {
  LOG_INFO("Begin IVFBuilder::init!");

  if (state_ != INIT) {
    LOG_ERROR("IVFBuilder state wrong. state=%d", state_);
    return IndexError_Logic;
  }

  meta_ = meta;
  converted_meta_ = meta;
  quantized_meta_ = meta;
  // Clear the converter/reformer params for external transforms
  converted_meta_.set_reformer(std::string(), 0, ailego::Params());
  converted_meta_.set_converter(std::string(), 0, ailego::Params());
  quantized_meta_.set_reformer(std::string(), 0, ailego::Params());
  quantized_meta_.set_converter(std::string(), 0, ailego::Params());
  params_ = params;

  if (!IndexFactory::HasMetric(meta_.metric_name())) {
    LOG_ERROR("Metric %s not exist", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }

  int ret = parse_centroids_num(params);
  ivf_check_with_msg(ret, "Failed to parse centroids, ret=%d", ret);

  ret = parse_clustering_params(params);
  ivf_check_with_msg(ret, "Failed to parse clustering params, ret=%d", ret);

  ret = parse_general_params(params);
  ivf_check_with_msg(ret, "Failed to parse general params, ret=%d", ret);

  LOG_INFO("End IVFBuilder::init!");

  LOG_DEBUG(
      "Converter=%s Quantizer=%s Optimizer=%s "
      "OptimizerQuantizer=%s QuantizeByCentroid=%u StoreFeatures=%u "
      "ClusterClass=%s TrainSamplesCount=%u TrainSampleRatio=%f "
      "BlockVectorCount=%u",
      params.get_as_string(PARAM_IVF_BUILDER_CONVERTER_CLASS).c_str(),
      params.get_as_string(PARAM_IVF_BUILDER_QUANTIZER_CLASS).c_str(),
      params.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_CLASS).c_str(),
      params.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS).c_str(),
      params.get_as_bool(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID),
      params.get_as_bool(PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES),
      params.get_as_string(PARAM_IVF_BUILDER_CLUSTER_CLASS).c_str(),
      params.get_as_uint32(PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT),
      params.get_as_float(PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO),
      block_vector_count_);

  state_ = INITED;
  return 0;
}

int IVFBuilder::cleanup(void) {
  LOG_INFO("Begin IVFBuilder::cleanup");

  state_ = INIT;
  stats_.clear_attributes();
  stats_.set_built_costtime(0u);
  stats_.set_built_count(0u);
  stats_.set_discarded_count(0u);
  stats_.set_dumped_costtime(0u);
  stats_.set_dumped_count(0u);
  stats_.set_trained_costtime(0u);
  stats_.set_trained_count(0u);

  centroid_num_vec_.clear();
  cluster_class_.clear();
  converter_class_.clear();
  cluster_params_.clear();

  labels_.clear();
  centroid_index_.reset();
  holder_.reset();
  converted_meta_ = meta_;
  converter_.reset();
  quantized_meta_ = meta_;
  quantizers_.clear();

  error_ = false;
  err_code_ = 0;

  thread_count_ = 0;
  sample_count_ = 0;
  cluster_auto_tuning_ = false;
  store_original_features_ = false;
  quantize_by_centroid_ = false;

  LOG_INFO("End IVFBuilder::cleanup");

  return 0;
}

int IVFBuilder::train(IndexThreads::Pointer threads,
                      IndexHolder::Pointer holder) {
  LOG_INFO("Begin IVFBuilder::train with holder");
  if (state_ != INITED) {
    LOG_ERROR("IVFBuilder train failed, wrong state=%d", state_);
    return IndexError_Runtime;
  }

  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }
  ailego::ElapsedTime timer;
  if (!holder || holder->count() == 0) {
    LOG_ERROR("Input holder is nullptr or empty while train index");
    return IndexError_InvalidArgument;
  }
  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while train index");
    return IndexError_Mismatch;
  }

  if (converter_) {
    int ret = IndexConverter::TrainAndTransform(converter_, std::move(holder));
    ivf_check_with_msg(ret, "Failed to train or transform by converter %s",
                       converter_->name().c_str());
    converted_meta_ = converter_->meta();
    holder = converter_->result();
  }

  ailego::Params train_params;
  int ret = prepare_trainer_params(train_params);
  ivf_check_with_msg(ret, "Failed to prepare trainer params, ret=%d", ret);

  IndexTrainer::Pointer trainer =
      IndexFactory::CreateTrainer("StratifiedClusterTrainer");
  ivf_assert_with_msg(trainer, IndexError_NoExist, "Failed to create trainer");

  ret = trainer->init(converted_meta_, train_params);
  ivf_check_with_msg(ret, "Trainer init failed with ret %d", ret);

  ret = trainer->train(std::move(threads), std::move(holder));
  ivf_check_with_msg(ret, "Trainer train failed with ret %d", ret);

  ret = this->train(trainer);
  ivf_check_error_code(ret);

  stats_.set_trained_costtime(timer.milli_seconds());

  LOG_INFO("End IVFBuilder::train with holder");

  state_ = TRAINED;
  return 0;
}

int IVFBuilder::train(const IndexTrainer::Pointer &trainer) {
  LOG_DEBUG("Begin IVFBuilder::train by trainer");
  ailego::ElapsedTime timer;

  if (state_ != INITED) {
    LOG_ERROR("IVFBuilder train failed, wrong state=%d", state_);
    return IndexError_Runtime;
  }

  if (!trainer) {
    LOG_ERROR("Input trainer is nullptr while train index");
    return IndexError_InvalidArgument;
  }

  IndexCluster::CentroidList centroid_list;
  IndexBundle::Pointer boundle = trainer->indexes();
  int ret = IndexCluster::Deserialize(trainer->meta(), boundle, &centroid_list);
  ivf_check_with_msg(ret, "Failed to deserialize index");

  const IndexMeta &meta = trainer->meta();
  if (meta.data_type() != converted_meta_.data_type() ||
      meta.metric_name().compare(converted_meta_.metric_name()) != 0 ||
      meta.element_size() != converted_meta_.element_size()) {
    if (meta.converter_name() != converter_class_) {
      LOG_ERROR("Input trainer doesn't match index meta while train index");
      return IndexError_Mismatch;
    }
    //! Create converter from trainer params
    LOG_INFO("Train IVFBuilder by trainer with converter");
    converter_ = CreateAndInitConverter(meta_, meta.converter_name(),
                                        meta.converter_params());
    ivf_assert(converter_, IndexError_Runtime);
    converted_meta_ = meta;
  }

  centroid_index_ = std::make_shared<IVFCentroidIndex>();
  if (!centroid_index_) {
    return IndexError_NoMemory;
  }
  ret = centroid_index_->init(converted_meta_, params_);
  ivf_check_error_code(ret);

  ret = centroid_index_->build(centroid_list);
  ivf_check_with_msg(ret, "Failed to build centroid index");

  if (params_.has(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS)) {
    //! Quantize the centroids for searcher
    searcher_centroid_index_ = std::make_shared<IVFCentroidIndex>();
    if (!searcher_centroid_index_) {
      return IndexError_NoMemory;
    }
    ailego::Params params;
    params_.get(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_PARAMS, &params);
    searcher_centroid_index_->set_quantizer(
        params_.get_as_string(PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS),
        params);
    ret = searcher_centroid_index_->init(converted_meta_, params_);
    ivf_check_error_code(ret);

    ret = searcher_centroid_index_->build(centroid_list);
    ivf_check_with_msg(ret, "Failed to build centroid index");
  }

  stats_.set_trained_costtime(timer.milli_seconds());

  LOG_DEBUG("End IVFBuilder::train by trainer");

  state_ = TRAINED;
  return 0;
}

int IVFBuilder::build(IndexThreads::Pointer threads,
                      IndexHolder::Pointer holder) {
  LOG_INFO("Begin IVFBuilder::build!");

  if (state_ != TRAINED) {
    LOG_ERROR("Train the index first before build");
    return IndexError_Runtime;
  }

  ailego::ElapsedTime timer;
  if (!holder || holder->count() == 0) {
    LOG_ERROR("Input holder is nullptr or empty while building index");
    return IndexError_InvalidArgument;
  }

  if (!holder->is_matched(meta_)) {
    LOG_ERROR("Input holder doesn't match index meta while building index");
    return IndexError_Mismatch;
  }
  if (!threads) {
    threads = std::make_shared<SingleQueueIndexThreads>(thread_count_, false);
    if (!threads) {
      return IndexError_NoMemory;
    }
  }

  holder_ = std::make_shared<RandomAccessIndexHolder>(meta_);
  if (!holder_) {
    return IndexError_NoMemory;
  }
  if (holder->count() > 0) {
    holder_->reserve(holder->count());
  }
  for (auto iter = holder->create_iterator(); iter && iter->is_valid();
       iter->next()) {
    holder_->emplace(iter->key(), iter->data());
  }

  // Holder is not needed, cleanup it.
  holder.reset();

  IndexHolder::Pointer converted_holder = holder_;
  if (converter_) {
    int ret = converter_->transform(holder_);
    ivf_check_with_msg(ret, "Failed to transform by converter %s",
                       converter_->name().c_str());
    converted_holder = converter_->result();
  }

  labels_.resize(centroid_index_->centroids_count());
  int ret = this->build_label_index(threads.get(), converted_holder);
  ivf_check_with_msg(ret, "Failed to build index for %s",
                     IndexError::What(ret));

  ret = this->prepare_quantizer(threads.get());
  ivf_check_error_code(ret);

  stats_.set_built_costtime(timer.milli_seconds());

  LOG_INFO("End IVFBuilder::build");

  state_ = BUILT;
  return 0;
}

int IVFBuilder::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("Begin IVFBuilder::dump");

  if (state_ != BUILT) {
    LOG_ERROR("Build the index before dump QC Index");
    return IndexError_Runtime;
  }

  ailego::ElapsedTime timer;
  int ret = this->dump_index(dumper);
  ivf_check_with_msg(ret, "Failed to dump index with ret=%d", ret);

  // the fitting function for the follow points: 1000000(0.02) 10000000(0.01)
  // 50000000(0.005) 100000000(0.001)
  float scan_ratio = -0.004 * std::log(holder_->count()) + 0.0751;
  scan_ratio = std::max(scan_ratio, 0.0001f);

  // Set Searcher Params
  ailego::Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, scan_ratio);
  meta_.set_searcher("IVFSearcher", 0, std::move(params));
  meta_.set_builder("IVFBuilder", 0, std::move(params_));

  ret = IndexHelper::SerializeToDumper(meta_, dumper.get());
  if (ret != 0) {
    LOG_ERROR("Failed to serialize meta into dumper.");
    return ret;
  }

  stats_.set_discarded_count(stats_.built_count() - stats_.dumped_count());
  stats_.set_dumped_costtime(timer.milli_seconds());

  LOG_INFO("End IVFBuilder::dump");

  return 0;
}

int IVFBuilder::CheckAndUpdateMajorOrder(IndexMeta &meta) {
  const std::string &metric_name = meta.metric_name();
  auto metric = IndexFactory::CreateMetric(metric_name);
  if (!metric) {
    LOG_ERROR("CreateMetric %s failed", metric_name.c_str());
    return IndexError_InvalidArgument;
  }
  int ret = metric->init(meta, meta.metric_params());
  ivf_check_with_msg(ret, "IndexMetric %s init failed", metric_name.c_str());

  bool support_column_major = true;
  for (size_t m = 32; m != 0; m /= 2) {
    for (size_t n = m; n != 0; n /= 2) {
      if (metric->distance_matrix(m, n) == nullptr) {
        support_column_major = false;
        break;
      }
    }
    if (!support_column_major) {
      break;
    }
  }
  support_column_major &=
      meta.element_size() % IndexMeta::AlignSizeof(meta.data_type()) == 0;

  if (meta.major_order() == IndexMeta::MO_UNDEFINED) {
    if (support_column_major && meta.dimension() <= 512) {
      meta.set_major_order(IndexMeta::MO_COLUMN);
    } else {
      meta.set_major_order(IndexMeta::MO_ROW);
    }
  } else {
    if (!support_column_major && meta.major_order() == IndexMeta::MO_COLUMN) {
      LOG_WARN(
          "Index Metric %s Unsupported "
          "Column Major Order",
          metric_name.c_str());
      return IndexError_Unsupported;
    }
  }

  if (block_vector_count_ * quantized_meta_.element_size() % 32 != 0) {
    LOG_ERROR(
        "block_vector_count * quantized_element_size not align with 32 bytes.");
    return IndexError_InvalidArgument;
  }

  return 0;
}

int IVFBuilder::parse_centroids_num(const ailego::Params &params) {
  std::string centroids_num =
      params.get_as_string(PARAM_IVF_BUILDER_CENTROID_COUNT);
  if (centroids_num.empty()) {
    LOG_ERROR("Param %s is required", PARAM_IVF_BUILDER_CENTROID_COUNT.c_str());
    return IndexError_InvalidArgument;
  }

  std::vector<std::string> centroid_str_vec;
  ailego::StringHelper::Split(centroids_num, CENTROID_SEPERATOR,
                              &centroid_str_vec);
  size_t level_cnt = centroid_str_vec.size();
  if ((level_cnt <= 0) || (level_cnt > 2)) {
    LOG_ERROR("Centroids level count must be [1,2]");
    return IndexError_InvalidArgument;
  }

  for (size_t idx = 0; idx < level_cnt; ++idx) {
    uint32_t centroid_cnt = 0;
    if (!ailego::StringHelper::ToUint32(centroid_str_vec[idx], &centroid_cnt)) {
      LOG_ERROR("Invalid centroids count %s", centroid_str_vec[idx].c_str());
      return IndexError_InvalidArgument;
    }
    centroid_num_vec_.push_back(centroid_cnt);
  }

  return 0;
}

int IVFBuilder::parse_clustering_params(const ailego::Params &params) {
  params.get(PARAM_IVF_BUILDER_CLUSTER_AUTO_TUNING, &cluster_auto_tuning_);

  cluster_class_ = params.get_as_string(PARAM_IVF_BUILDER_CLUSTER_CLASS);
  if (cluster_class_.empty()) {
    // OptKmeansCluster does not support custom metric
    cluster_class_ = meta_.metric_name() == kMipsMetricName
                         ? "KmeansCluster"
                         : "OptKmeansCluster";
    LOG_INFO("Using [%s] as default cluster class", cluster_class_.c_str());
  }
  for (size_t i = 1; i <= centroid_num_vec_.size(); ++i) {
    std::string level_params_key =
        PARAM_IVF_BUILDER_CLUSTER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);
    ailego::Params level_params;
    params.get<ailego::Params>(level_params_key, &level_params);
    cluster_params_.push_back(level_params);
  }

  return 0;
}

int IVFBuilder::parse_general_params(const ailego::Params &params) {
  thread_count_ = params.get_as_uint32(PARAM_IVF_BUILDER_THREAD_COUNT);
  sample_count_ = params.get_as_uint32(PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT);
  sample_ratio_ = params.get_as_float(PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO);

  params.get(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, &quantize_by_centroid_);
  params.get(PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES,
             &store_original_features_);

  //! Prepare Converter for training
  if (meta_.metric_name() == kIPMetricName) {
    converter_class_ = kMipsConverterName;
  }
  params.get(PARAM_IVF_BUILDER_CONVERTER_CLASS, &converter_class_);
  if (!converter_class_.empty()) {
    ailego::Params converter_params;
    params_.get(PARAM_IVF_BUILDER_CONVERTER_PARAMS, &converter_params);
    converter_ =
        CreateAndInitConverter(meta_, converter_class_, converter_params);
    ivf_assert(converter_, IndexError_NoExist);
  }

  params_.get(PARAM_IVF_BUILDER_BLOCK_VECTOR_COUNT, &block_vector_count_);
  if (block_vector_count_ == 0) {
    block_vector_count_ = kDefaultBlockCount;
  }
  if (block_vector_count_ > kDefaultBlockCount ||
      block_vector_count_ & (block_vector_count_ - 1)) {
    LOG_ERROR("block_vector_count only can be [1|2|4|8|16|32].");
    return IndexError_InvalidArgument;
  }
  if (block_vector_count_ * meta_.element_size() % 32 != 0) {
    LOG_ERROR("block_vector_count * element_size not align with 32 bytes.");
    return IndexError_InvalidArgument;
  }
  return 0;
}

int IVFBuilder::prepare_trainer_params(ailego::Params &params) {
  params.set(STRATIFIED_TRAINER_SAMPLE_COUNT, sample_count_);
  params.set(STRATIFIED_TRAINER_SAMPLE_RATIO, sample_ratio_);
  params.set(STRATIFIED_TRAINER_THREAD_COUNT, thread_count_);
  params.set(STRATIFIED_TRAINER_AUTOAUNE, cluster_auto_tuning_);
  if (centroid_num_vec_.empty()) {
    LOG_ERROR("Centroids no specified.");
    return IndexError_InvalidArgument;
  }
  std::string cluster_count = std::to_string(centroid_num_vec_[0]);
  if (centroid_num_vec_.size() > 1) {
    cluster_count +=
        (CENTROID_SEPERATOR + std::to_string(centroid_num_vec_[1]));
  }
  params.set(STRATIFIED_TRAINER_CLUSTER_COUNT, cluster_count);

  for (size_t i = 1; i <= cluster_params_.size(); ++i) {
    std::string level_params_key =
        STRATIFIED_TRAINER_PARAMS_IN_LEVEL_PREFIX + std::to_string(i);
    params.set(level_params_key, cluster_params_[i - 1]);
  }
  params.set(STRATIFIED_TRAINER_CLASS_NAME, cluster_class_);

  return 0;
}

int IVFBuilder::build_label_index(IndexThreads *threads,
                                  const IndexHolder::Pointer &holder) {
  auto iter = holder->create_iterator();
  if (!iter) {
    LOG_ERROR("Create iterator for holder failed");
    return IndexError_Runtime;
  }

  auto task_group = threads->make_group();
  if (!task_group) {
    LOG_ERROR("Failed to create task group");
    return IndexError_Runtime;
  }

  size_t id = 0UL;
  AILEGO_DEFER([&]() {
    task_group->wait_finish();
    stats_.set_built_count(id);
    LOG_INFO("Finished building, total=%zu", id);
  });

  size_t elem_size = holder->element_size();
  std::shared_ptr<VectorList> vectors = std::make_shared<VectorList>();
  ivf_assert(vectors, IndexError_NoMemory);
  for (; iter && iter->is_valid(); iter->next()) {
    ivf_assert(!error_, err_code_);
    vectors->emplace_back(iter->data(), elem_size, id);
    id++;
    if (vectors->size() == kBatchSize || id == holder_->count()) {
      auto task = ailego::Closure ::New(const_cast<IVFBuilder *>(this),
                                        &IVFBuilder::label, vectors);
      task_group->submit(std::move(task));
      vectors = std::make_shared<VectorList>();
      ivf_assert(vectors, IndexError_NoMemory);
      vectors->reserve(kBatchSize);
    }
    if (!(id & 0xFFFFF)) {
      LOG_INFO("Current built count:%zu", id);
    }
  }
  ailego_assert_with(vectors->size() == 0, "invalid size");

  return err_code_;
}

int IVFBuilder::dump_index(const IndexDumper::Pointer &dumper) {
  int ret = CheckAndUpdateMajorOrder(quantized_meta_);
  ivf_check_error_code(ret);

  IVFDumper::Pointer ivf_dumper = std::make_shared<IVFDumper>(
      quantized_meta_, dumper, centroid_index_->centroids_count(),
      block_vector_count_);
  if (!ivf_dumper) {
    LOG_ERROR("Alloc IVFDumper failed");
    return IndexError_NoMemory;
  }

  //! Dump inverted vectors
  std::vector<uint32_t> dumped_ids;
  std::function<void(uint32_t)> record_dumped_id = [&](uint32_t) {};
  if (store_original_features_) {
    dumped_ids.reserve(holder_->count());
    record_dumped_id = [&](uint32_t id) { dumped_ids.emplace_back(id); };
  }
  if (quantizers_.size() == 0) {
    //! No quantizer for inverted vectors
    for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {
      ailego_assert_with(i < labels_.size(), "Index Overflow");
      for (size_t j = 0; j < labels_[i].size(); ++j) {
        auto id = labels_[i][j];
        record_dumped_id(id);
        ret = ivf_dumper->dump_inverted_vector(i, holder_->key(id),
                                               holder_->element(id));
        ivf_check_error_code(ret);
      }
    }
  } else {
    for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {
      ailego_assert_with(i < labels_.size(), "Index Overflow");
      auto holder =
          std::make_shared<LabelFilteredIndexHolder>(holder_, labels_[i]);
      if (!holder) {
        return IndexError_NoMemory;
      }
      auto quantizer = quantize_by_centroid_ ? quantizers_[i] : quantizers_[0];
      ret = quantizer->transform(holder);
      ivf_check_error_code(ret);

      auto iter = quantizer->result()->create_iterator();
      for (; iter->is_valid(); iter->next()) {
        uint32_t id = iter->key();
        record_dumped_id(id);
        ret =
            ivf_dumper->dump_inverted_vector(i, holder_->key(id), iter->data());
        ivf_check_error_code(ret);
      }
    }
  }

  ret = ivf_dumper->dump_inverted_vector_finished();
  ivf_check_error_code(ret);

  ret = ivf_dumper->dump_quantizer_params(quantizers_);
  ivf_check_error_code(ret);

  auto centroid_index =
      searcher_centroid_index_ ? searcher_centroid_index_ : centroid_index_;
  ret = ivf_dumper->dump_centroid_index(centroid_index->data(),
                                        centroid_index->size());
  ivf_check_with_msg(ret, "Failed to dump CentroidIndex");

  if (store_original_features_) {
    for (size_t i = 0; i < dumped_ids.size(); ++i) {
      ret = ivf_dumper->dump_original_vector(holder_->element(dumped_ids[i]),
                                             holder_->element_size());
      ivf_check_error_code(ret);
    }
  }

  stats_.set_dumped_count(stats_.dumped_count() + ivf_dumper->dumped_count());

  return 0;
}

int IVFBuilder::prepare_quantizer(IndexThreads *threads) {
  std::string quantizer_name;
  params_.get(PARAM_IVF_BUILDER_QUANTIZER_CLASS, &quantizer_name);
  if (quantizer_name.empty()) {
    return 0;
  }

  //! Prepare Quantizers for inverted index
  ailego::Params quantizer_params;
  params_.get(PARAM_IVF_BUILDER_QUANTIZER_PARAMS, &quantizer_params);
  if (((quantizer_name != kInt8QuantizerName &&
        quantizer_name != kInt4QuantizerName) ||
       meta_.metric_name() != kIPMetricName) &&
      quantize_by_centroid_) {
    LOG_WARN("%s is supported in InnerProduct only",
             PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID.c_str());
    quantize_by_centroid_ = false;
  }
  if (quantizer_name == kInt4QuantizerName && meta_.dimension() & 0x1) {
    LOG_ERROR("Unsupport quantizer=%s for dim=%u", kInt4QuantizerName,
              meta_.dimension());
    return IndexError_Unsupported;
  }

  int ret = 0;
  auto create_and_init_quantizer = [&]() {
    auto quantizer = IndexFactory::CreateConverter(quantizer_name);
    if (!quantizer) {
      LOG_ERROR("Failed to create converter %s", quantizer_name.c_str());
      ret = IndexError_NoExist;
      return IndexConverter::Pointer();
    }
    ret = quantizer->init(meta_, quantizer_params);
    if (ret != 0) {
      LOG_ERROR("Failed to initialize converter %s for %s",
                quantizer_name.c_str(), IndexError::What(ret));
      return IndexConverter::Pointer();
    }
    return quantizer;
  };
  for (size_t i = 0; i < centroid_index_->centroids_count(); ++i) {
    quantizers_.emplace_back(create_and_init_quantizer());
    ivf_check_error_code(ret);
    if (!quantize_by_centroid_) {
      break;
    }
  }

  //! Train the quantizers
  auto train_data = [&](size_t i) {
    IndexHolder::Pointer holder = holder_;
    size_t idx = 0;
    if (quantize_by_centroid_) {
      holder = std::make_shared<LabelFilteredIndexHolder>(holder_, labels_[i]);
      if (!holder && !error_.exchange(true)) {
        err_code_ = IndexError_NoMemory;
        return;
      }
      idx = i;
    }
    if (holder->count() == 0) {
      return;
    }
    ret = quantizers_[idx]->train(holder);
    if (ret != 0) {
      LOG_ERROR("Failed to train converter %s for %s", quantizer_name.c_str(),
                IndexError::What(ret));
      if (!error_.exchange(true)) {
        err_code_ = IndexError_Runtime;
      }
    }
  };

  auto task_group = threads->make_group();
  if (!task_group) {
    LOG_ERROR("Failed to create task group");
    return IndexError_Runtime;
  }

  for (size_t i = 0; i < quantizers_.size(); ++i) {
    if (error_) {
      task_group->wait_finish();
      return err_code_;
    }
    task_group->submit(ailego::Closure ::New(train_data, i));
  }

  task_group->wait_finish();
  if (quantizers_.size() > 0) {
    quantized_meta_ = quantizers_[0]->meta();
  }

  return 0;
}

INDEX_FACTORY_REGISTER_BUILDER(IVFBuilder);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_meta.h>
#include "ivf_centroid_index.h"

namespace zvec {
namespace core {

/*! IVF Builder
 */
class IVFBuilder : public IndexBuilder {
 public:
  //! Constructor
  IVFBuilder();

  //! Destructor
  ~IVFBuilder();

  //! Disable them
  IVFBuilder(const IVFBuilder &) = delete;
  IVFBuilder &operator=(const IVFBuilder &) = delete;

 public:
  //! Initialize the builder
  virtual int init(const IndexMeta &meta,
                   const ailego::Params &params) override;

  //! Cleanup the builder
  virtual int cleanup(void) override;

  //! Train the data
  virtual int train(IndexThreads::Pointer threads,
                    IndexHolder::Pointer holder) override;

  //! Train the data
  virtual int train(const IndexTrainer::Pointer &trainer) override;

  //! Build the index
  virtual int build(IndexThreads::Pointer threads,
                    IndexHolder::Pointer holder) override;

  //! Dump index into file system
  virtual int dump(const IndexDumper::Pointer &dumper) override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override {
    return stats_;
  }

  IVFCentroidIndex::Pointer centroid_index() const {
    return centroid_index_;
  }

 public:
  /*! Random Access Index Holder
   */
  class RandomAccessIndexHolder : public IndexHolder {
   public:
    //! Index Holder Iterator Pointer
    typedef std::shared_ptr<RandomAccessIndexHolder> Pointer;

    /*! Random Access Index Holder Iterator
     */
    class Iterator : public IndexHolder::Iterator {
     public:
      //! Index Holder Iterator Pointer
      typedef std::unique_ptr<Iterator> Pointer;

      //! Constructor
      Iterator(RandomAccessIndexHolder *owner) : holder_(owner) {}

      //! Destructor
      virtual ~Iterator(void) {}

      //! Retrieve pointer of data
      virtual const void *data(void) const override {
        return holder_->element(id_);
      }

      //! Test if the iterator is valid
      virtual bool is_valid(void) const override {
        return id_ < holder_->count();
      }

      //! Retrieve primary key
      virtual uint64_t key(void) const override {
        return holder_->key(id_);
      }

      //! Next iterator
      virtual void next(void) override {
        ++id_;
      }

     private:
      //! Members
      RandomAccessIndexHolder *holder_{nullptr};
      uint32_t id_{0};
    };

    //! Constructor
    RandomAccessIndexHolder(const IndexMeta &meta)
        : features_(std::make_shared<CompactIndexFeatures>(meta)) {}

    //! Retrieve count of elements in holder (-1 indicates unknown)
    virtual size_t count(void) const override {
      return features_->count();
    }

    //! Retrieve dimension
    virtual size_t dimension(void) const override {
      return features_->dimension();
    }

    //! Retrieve type information
    virtual IndexMeta::DataType data_type(void) const override {
      return features_->data_type();
    }

    //! Retrieve element size in bytes
    virtual size_t element_size(void) const override {
      return features_->element_size();
    }

    //! Retrieve if it can multi-pass
    virtual bool multipass(void) const override {
      return true;
    }

    //! Create a new iterator
    virtual IndexHolder::Iterator::Pointer create_iterator(void) override {
      return IndexHolder::Iterator::Pointer(
          new RandomAccessIndexHolder::Iterator(this));
    }

    void reserve(size_t elems) {
      features_->reserve(elems);
      keys_.reserve(elems);
    }

    //! Append an element into holder
    void emplace(uint64_t pkey, const void *vec) {
      features_->emplace(vec);
      keys_.emplace_back(pkey);
    }

    //! Retrieve feature via local id
    const void *element(size_t id) const {
      return features_->element(id);
    }

    //! Retrieve key via local id
    uint64_t key(size_t id) const {
      ailego_assert_with(id < keys_.size(), "Index Overflow");
      return keys_[id];
    }

   private:
    //! Disable them
    RandomAccessIndexHolder(void) = delete;

    //! Members
    CompactIndexFeatures::Pointer features_{};
    std::vector<uint64_t> keys_{};
  };

 private:
  /*! Wrapper of feature
   */
  class Vector {
   public:
    typedef std::shared_ptr<Vector> Pointer;

    Vector(const void *vec, size_t len, uint32_t idx)
        : vec_(reinterpret_cast<const char *>(vec), len), id_{idx} {}

    const void *data() const {
      return vec_.data();
    }

    size_t size() const {
      return vec_.size();
    }

    uint32_t id(void) const {
      return id_;
    }

   private:
    std::string vec_{};
    uint32_t id_{0u};
  };

  using VectorList = std::vector<Vector>;

  //! Check MajorOrder in meta, and update the major order if needed
  int CheckAndUpdateMajorOrder(IndexMeta &meta);

  //! Parse params
  int parse_centroids_num(const ailego::Params &params);
  int parse_clustering_params(const ailego::Params &params);
  int parse_general_params(const ailego::Params &params);

  //! Prepare params for trainer
  int prepare_trainer_params(ailego::Params &params);

  //! Build the index
  int build_label_index(IndexThreads *threads,
                        const IndexHolder::Pointer &holder);

  //! Dump the index to dumper
  int dump_index(const IndexDumper::Pointer &dumper);

  //! Prepare the quantizer for inverted index
  int prepare_quantizer(IndexThreads *threads);

  //! Quantize the centrods list
  int quantize_centroids();

  //! Create converter and init with params
  static IndexConverter::Pointer CreateAndInitConverter(
      const IndexMeta &meta, const std::string &name,
      const ailego::Params &params) {
    auto converter = IndexFactory::CreateConverter(name);
    if (!converter) {
      LOG_ERROR("Failed to create converter %s", name.c_str());
      return IndexConverter::Pointer();
    }
    int ret = converter->init(meta, params);
    if (ret != 0) {
      LOG_ERROR("Failed to initialize converter %s for %s", name.c_str(),
                IndexError::What(ret));
      return IndexConverter::Pointer();
    }
    return converter;
  }

  //! Select the nearest centroid id for the vector
  void label(const std::shared_ptr<VectorList> &vecs) {
    for (size_t i = 0; i < vecs->size(); ++i) {
      auto &vec = (*vecs)[i];

      uint32_t centroid_idx =
          centroid_index_->search_nearest_centroid(vec.data(), vec.size());
      if (centroid_idx == IVFCentroidIndex::kInvalidID) {
        LOG_ERROR("Failed to search nearest centroid in CentroidIndex");
        if (!error_.exchange(true)) {
          err_code_ = IndexError_Runtime;
        }
        return;
      }
      ailego_assert_with(centroid_idx < labels_.size(), "Index Overflow");
      mutex_.lock();
      labels_[centroid_idx].emplace_back(vec.id());
      mutex_.unlock();
    }
  }


 private:
  //! Constants
  static constexpr size_t kThreadPoolQueueSize = 300u;
  static constexpr size_t kBatchSize = 10u;
  static constexpr size_t kDefaultBlockCount = 32u;

  enum BuilderState { INIT = 0, INITED = 1, TRAINED = 2, BUILT = 3 };

  //! Members
  BuilderState state_{INIT};
  Stats stats_{};
  ailego::Params params_{};
  IndexMeta meta_{};

  std::vector<uint32_t> centroid_num_vec_{};
  std::string cluster_class_{};
  std::string converter_class_{};
  std::vector<ailego::Params> cluster_params_{};

  std::vector<std::vector<uint32_t>> labels_{};
  std::mutex mutex_{};
  IVFCentroidIndex::Pointer centroid_index_{};
  IVFCentroidIndex::Pointer searcher_centroid_index_{};
  RandomAccessIndexHolder::Pointer holder_{};
  IndexMeta converted_meta_{};
  IndexConverter::Pointer converter_{};
  IndexMeta quantized_meta_{};
  std::vector<IndexConverter::Pointer> quantizers_{};

  std::atomic_bool error_{false};
  int err_code_{0};

  uint32_t thread_count_{0};
  uint32_t sample_count_{0};
  float sample_ratio_{0.0};
  uint32_t block_vector_count_{kDefaultBlockCount};
  bool cluster_auto_tuning_{false};
  bool store_original_features_{false};
  bool quantize_by_centroid_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_centroid_index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_centroid_index.h"
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_framework.h>
#include "metric/metric_params.h"

namespace zvec {
namespace core {

/*! Fake Trainer to supply centroids in bundle
 */
class FakeClusterTrainer : public IndexTrainer {
 public:
  //! Constructor
  FakeClusterTrainer(const IndexMeta &imeta, const IndexBundle::Pointer &bundle)
      : meta_(imeta), bundle_(bundle) {}

  //! Destructor
  ~FakeClusterTrainer(void) {}

 protected:
  //! Initialize Trainer
  virtual int init(const IndexMeta &, const ailego::Params &) override {
    return 0;
  }

  //! Cleanup Trainer
  virtual int cleanup(void) override {
    return 0;
  }

  //! Train the data
  virtual int train(IndexHolder::Pointer) override {
    return 0;
  }

  //! Train the data
  virtual int train(IndexThreads::Pointer, IndexHolder::Pointer) override {
    return 0;
  }

  //! Load index from file path or dir
  virtual int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Dump index into file path or dir
  virtual int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve Index Meta
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve statistics
  virtual const IndexTrainer::Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve the output indexes
  virtual IndexBundle::Pointer indexes(void) const override {
    return bundle_;
  }

 private:
  //! Members
  IndexMeta meta_{};
  Stats stats_{};
  IndexBundle::Pointer bundle_{};
};

/*! Int8QuantizerReformer for InnerProduct Measure
 */
class Int8QuantizerReformer4IP : public IndexReformer {
 public:
  //! Initialize Reformer
  virtual int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup Reformer
  virtual int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  virtual int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  virtual int unload(void) override {
    return 0;
  }

  //! Transform query
  virtual int transform(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const override {
#if 0
        size_t dim = qmeta.dimension();
        out->resize(IndexMeta::ElementSizeof(
            IndexMeta::DataType::DT_INT8, dim));
        ometa->set_meta(IndexMeta::DataType::DT_INT8, dim);
        const float *ivec = reinterpret_cast<const float *>(query);
        int8_t *ovec = reinterpret_cast<int8_t *>(&(*out)[0]);
        float abs_max = 0.0f;
        for (size_t i = 0; i < dim; ++i) {
            auto abs = std::abs(ivec[i]);
            if (abs > abs_max) {
                abs_max = abs;
            }
        }
        if (abs_max > 0.0f) {
            float scale = 127 / abs_max;
            for (size_t i = 0; i < dim; ++i) {
                ovec[i] = static_cast<int8_t>(std::round(ivec[i] * scale));
            }
        } else {
            std::fill(ovec, ovec + dim, static_cast<int8_t>(1));
        }
        return 0;
#else
    return IndexError_NotImplemented;
#endif
  }

  //! Transform queries
  virtual int transform(const void *query, const IndexQueryMeta &qmeta,
                        uint32_t count, std::string *oquery,
                        IndexQueryMeta *ometa) const override {
    size_t dim = qmeta.dimension();
    oquery->resize(count *
                   IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8, dim));
    ometa->set_meta(IndexMeta::DataType::DT_INT8, dim);
    const float *ivec = reinterpret_cast<const float *>(query);
    int8_t *ovec = reinterpret_cast<int8_t *>(&(*oquery)[0]);
    for (size_t q = 0; q < count; ++q) {
      float abs_max = 0.0f;
      const float *in = &ivec[q * dim];
      int8_t *out = &ovec[q * dim];
      for (size_t i = 0; i < dim; ++i) {
        auto abs = std::abs(in[i]);
        if (abs > abs_max) {
          abs_max = abs;
        }
      }
      if (abs_max > 0.0f) {
        float scale = 127 / abs_max;
        for (size_t i = 0; i < dim; ++i) {
          out[i] = static_cast<int8_t>(std::round(in[i] * scale));
        }
      } else {
        std::fill(out, out + dim, static_cast<int8_t>(1));
      }
    }
    return 0;
  }

  //! Normalize results
  virtual int normalize(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/,
                        IndexDocumentList & /*result*/) const override {
    return 0;
  }
};

/*! Int4QuantizerReformer for InnerProduct Metric
 */
class Int4QuantizerReformer4IP : public IndexReformer {
 public:
  //! Initialize Reformer
  virtual int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup Reformer
  virtual int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  virtual int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  virtual int unload(void) override {
    return 0;
  }

  //! Transform query
  virtual int transform(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const override {
    return IndexError_NotImplemented;
  }

  //! Transform queries
  virtual int transform(const void *query, const IndexQueryMeta &qmeta,
                        uint32_t count, std::string *oquery,
                        IndexQueryMeta *ometa) const override {
    if (qmeta.dimension() & 0x1) {
      LOG_ERROR("Unsuuport dim=%u for transform", qmeta.dimension());
      return IndexError_Unsupported;
    }

    size_t dim = qmeta.dimension();
    oquery->resize(count *
                   IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4, dim));
    ometa->set_meta(IndexMeta::DataType::DT_INT4, dim);
    const float *ivec = reinterpret_cast<const float *>(query);
    uint8_t *ovec = reinterpret_cast<uint8_t *>(&(*oquery)[0]);
    for (size_t q = 0; q < count; ++q) {
      float abs_max = 0.0f;
      float max = -std::numeric_limits<float>::max();
      const float *in = &ivec[q * dim];
      uint8_t *out = &ovec[q * dim / 2];
      for (size_t i = 0; i < dim; ++i) {
        float abs = std::abs(in[i]);
        abs_max = std::max(abs_max, abs);
        max = std::max(max, in[i]);
      }
      if (abs_max > 0.0f) {
        float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;
        for (size_t i = 0; i < dim; i += 2) {
          auto v1 = static_cast<int8_t>(std::round(in[i] * scale));
          auto v2 = static_cast<int8_t>(std::round(in[i + 1] * scale));
          out[i / 2] = (static_cast<uint8_t>(v1) << 4) |
                       (static_cast<uint8_t>(v2) & 0xF);
        }
      } else {
        std::fill(out, out + dim / 2, static_cast<uint8_t>(9));
      }
    }
    return 0;
  }

  //! Normalize results
  virtual int normalize(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/,
                        IndexDocumentList & /*result*/) const override {
    return 0;
  }
};

int IVFCentroidIndex::init(const IndexMeta &meta,
                           const ailego::Params &params) {
  meta_ = meta;

  params.get(PARAM_IVF_BUILDER_OPTIMIZER_CLASS, &builder_class_);
  params.get(PARAM_IVF_BUILDER_OPTIMIZER_PARAMS, &builder_params_);
  params.get(PARAM_IVF_SEARCHER_OPTIMIZER, &searcher_class_);
  params.get(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, &searcher_params_);

  return 0;
}

int IVFCentroidIndex::search(const void *query, const IndexQueryMeta &qmeta,
                             size_t count,
                             IndexSearcher::Context::Pointer &ctx) {
  int ret = 0;

  if (reformer_) {
    std::string buffer;
    IndexQueryMeta ometa;
    ret = reformer_->transform(query, qmeta, count, &buffer, &ometa);
    if (ret != 0) {
      LOG_ERROR("Failed to transform querys by reformer");
      return ret;
    }
    ret = searcher_->search_impl(buffer.data(), ometa, count, ctx);
  } else {
    ret = searcher_->search_impl(query, qmeta, count, ctx);
  }

  ivf_check_with_msg(ret, "Failed to search in centroid index for %s",
                     IndexError::What(ret));

  return 0;
}

uint32_t IVFCentroidIndex::search_nearest_centroid(const void *query,
                                                   size_t len) {
  //! Called in building index precedure, so transform the query is needless
  if (len != meta_.element_size()) {
    LOG_ERROR("Invalid query size actual: %zu, expected: %u", len,
              meta_.element_size());
    return kInvalidID;
  }

  thread_local IndexSearcher::Context::Pointer context(
      searcher_->create_context());
  context->set_topk(1);

  IndexQueryMeta qmeta(meta_.data_type(), meta_.dimension());
  int ret = searcher_->search_impl(query, qmeta, context);
  if (ret != 0 || context->result().empty()) {
    LOG_ERROR("Failed to search nearest centroid, with ret %d", ret);
    return kInvalidID;
  }

  return static_cast<uint32_t>(context->result()[0].key());
}

uint32_t IVFCentroidIndex::transform_and_search_nearest_centroid(
    const void *record, const IndexQueryMeta &rmeta,
    IndexSearcher::Context::Pointer &ctx) const {
  int ret = 0;
  if (reformer_) {
    std::string buffer;
    IndexQueryMeta ometa;
    ret = reformer_->convert(record, rmeta, &buffer, &ometa);
    if (ret != 0) {
      LOG_ERROR("Failed to transform querys by reformer");
      return kInvalidID;
    }
    ret = searcher_->search_impl(buffer.data(), ometa, ctx);
  } else {
    ret = searcher_->search_impl(record, rmeta, ctx);
  }
  if (ret != 0 || ctx->result().empty()) {
    LOG_ERROR("Failed to search in centroid index for %s",
              IndexError::What(ret));
    return kInvalidID;
  }

  return static_cast<uint32_t>(ctx->result()[0].key());
}

IndexHolder::Pointer IVFCentroidIndex::quantize_holder(
    const IndexHolder::Pointer &holder) {
  auto input = holder;
  if (meta_.reformer_name() == kMipsReformerName &&
      meta_.metric_name() == kL2MetricName &&
      (quantizer_class_ == kInt8QuantizerName ||
       quantizer_class_ == kInt4QuantizerName)) {
    //! Reverse for Mips if do convert by integer quantizer
    auto reverse = IndexFactory::CreateConverter(kMipsRevConverterName);
    if (!reverse) {
      LOG_ERROR("Failed to create converter %s", kMipsRevConverterName);
      return nullptr;
    }
    ailego::Params params;
    auto p = meta_.reformer_params();
    params.set(MIPS_REVERSE_CONVERTER_M_VALUE,
               p.get_as_uint32(MIPS_REFORMER_M_VALUE));
    params.set(MIPS_REVERSE_CONVERTER_U_VALUE,
               p.get_as_float(MIPS_REFORMER_U_VALUE));
    params.set(MIPS_REVERSE_CONVERTER_L2_NORM,
               p.get_as_uint32(MIPS_REFORMER_L2_NORM));
    params.set(MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT,
               p.get_as_float(MIPS_REFORMER_FORCED_HALF_FLOAT));
    int ret = reverse->init(meta_, params);
    if (ret != 0) {
      LOG_ERROR("Fail to init converter %s", kMipsRevConverterName);
      return nullptr;
    }
    ret = IndexConverter::TrainAndTransform(reverse, holder);
    if (ret != 0) {
      LOG_ERROR("Fail to transform converter %s", kMipsRevConverterName);
      return nullptr;
    }
    input = reverse->result();
    meta_ = reverse->meta();
    meta_.set_metric(kIPMetricName, 0, ailego::Params());
    meta_.set_reformer("", 0, ailego::Params());
  }

  auto converter = IndexFactory::CreateConverter(quantizer_class_);
  if (!converter) {
    LOG_ERROR("Failed to create converter %s", quantizer_class_.c_str());
    return nullptr;
  }
  int ret = converter->init(meta_, quantizer_params_);
  if (ret != 0) {
    LOG_ERROR("Fail to init converter %s", quantizer_class_.c_str());
    return nullptr;
  }

  ret = IndexConverter::TrainAndTransform(converter, input);
  if (ret != 0) {
    LOG_ERROR("Fail to tranform converter %s", quantizer_class_.c_str());
    return nullptr;
  }

  meta_ = converter->meta();
  return converter->result();
}

int IVFCentroidIndex::build_index(
    const IndexCluster::CentroidList &centroid_list,
    const IndexDumper::Pointer &dumper) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder(builder_class_);
  if (!builder) {
    LOG_ERROR("Failed to create builder %s", builder_class_.c_str());
    return IndexError_NoExist;
  }

  IndexHolder::Pointer holder =
      std::make_shared<CentroidsIndexHolder>(meta_, centroid_list);
  if (!holder) {
    return IndexError_NoMemory;
  }
  if (holder->count() == 0) {
    LOG_ERROR("No centroids to build");
    return IndexError_InvalidArgument;
  }
  centroids_count_ = holder->count();

  //! Set default params if not given
  auto count = std::to_string(
      static_cast<size_t>(std::ceil(std::sqrt(centroids_count_ / 10.0))));
  // if (IsHcBuilder(builder_class_) &&
  //     !builder_params_.has(hc::PARAM_HC_BUILDER_CENTROID_COUNT)) {
  //   builder_params_.set(hc::PARAM_HC_BUILDER_CENTROID_COUNT, count);
  // } else if (builder_class_ == "GcBuilder" &&
  //            !builder_params_.has(hc::PARAM_GC_BUILDER_CENTROID_COUNT)) {
  //   builder_params_.set(hc::PARAM_GC_BUILDER_CENTROID_COUNT, count);
  // }
  if (!quantizer_class_.empty()) {
    holder = this->quantize_holder(holder);
    if (!holder) {
      return IndexError_Runtime;
    }
  }

  const auto name = builder_class_.c_str();
  int ret = builder->init(meta_, builder_params_);
  ivf_check_with_msg(ret, "%s init failed, ret=%d", name, ret);

  // if (IsHcBuilder(builder_class_) && quantizer_class_.empty()) {
  //   auto trainer = this->prepare_trainer(centroid_list);
  //   ret = trainer ? builder->train(trainer) : builder->train(holder);
  // } else {
  //   ret = builder->train(holder);
  // }

  ret = builder->train(holder);
  ivf_check_with_msg(ret, "%s train failed, ret=%d", name, ret);

  ret = builder->build(holder);
  ivf_check_with_msg(ret, "%s build failed, ret=%d", name, ret);

  ret = builder->dump(dumper);
  ivf_check_with_msg(ret, "%s dump failed, ret=%d", name, ret);

  ret = dumper->close();
  ivf_check_error_code(ret);

  return 0;
}

int IVFCentroidIndex::build(const IndexCluster::CentroidList &centroid_list) {
  index_building_ = true;
  //! Build and dump the index
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  if (!dumper) {
    LOG_ERROR("Failed to create MemoryDumper");
    return IndexError_NoExist;
  }
  path_ = IVFUtility::GenerateRandomPath(kTempralPathPrefix);
  int ret = dumper->create(path_);
  if (ret != 0) {
    LOG_ERROR("IndexDumper create path %s failed", path_.c_str());
    return ret;
  }
  ret = this->build_index(centroid_list, dumper);
  ivf_check_error_code(ret);

  auto rope = IndexMemory::Instance()->open(path_);
  if (!rope) {
    LOG_ERROR("Open memory path %s failed.", path_.c_str());
    return ret;
  }
  if (rope->count() != 1) {
    LOG_ERROR("Graph Rope block count not equal with 1.");
    return ret;
  }
  (*rope)[0].read(0, &data_, 0);
  size_ = (*rope)[0].size();

  //! Load the index
  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MemoryReadStorage");
  if (!container) {
    LOG_ERROR("Failed to create MemoryReadStorage");
    return IndexError_NoExist;
  }
  ret = container->init(ailego::Params());
  ivf_check_with_msg(ret, "Failed to initialize MemoryReadStorage for %s",
                     IndexError::What(ret));
  ret = container->open(path_, false);
  ivf_check_with_msg(ret, "Failed to load path in MemoryReadStorage for %s",
                     IndexError::What(ret));

  ailego::Params searcher_params;
  if (!searcher_class_.empty()) {
    searcher_params.set(PARAM_IVF_SEARCHER_OPTIMIZER, searcher_class_);
  }
  if (!searcher_params_.empty()) {
    searcher_params.set(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, searcher_params_);
  }
  ret = this->load(container, searcher_params);
  ivf_check_with_msg(ret, "IVFCentroidIndex load failed with %s",
                     IndexError::What(ret));

  return 0;
}

int IVFCentroidIndex::load(const IndexStorage::Pointer &container,
                           const ailego::Params params) {
  if (!container) {
    LOG_ERROR("Invalid container");
    return IndexError_InvalidArgument;
  }

  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  auto reformer_name = meta_.reformer_name();
  if (!reformer_name.empty()) {
    LOG_DEBUG("Load CentroidIndex with reformer %s, metric %s",
              reformer_name.c_str(), meta_.metric_name().c_str());
    if ((reformer_name == kInt8ReformerName ||
         reformer_name == kInt4ReformerName) &&
        meta_.metric_name() == kIPMetricName) {
      if (reformer_name == kInt8ReformerName) {
        reformer_ = std::make_shared<Int8QuantizerReformer4IP>();
      } else {
        reformer_ = std::make_shared<Int4QuantizerReformer4IP>();
      }
      if (!reformer_) {
        return IndexError_NoMemory;
      }
    } else {
      reformer_ = IndexFactory::CreateReformer(reformer_name);
      if (!reformer_) {
        LOG_ERROR("Failed to create reformer %s", reformer_name.c_str());
        return IndexError_NoExist;
      }
    }
    ret = reformer_->init(meta_.reformer_params());
    ivf_check_with_msg(ret, "Failed to initialize reformer %s",
                       reformer_name.c_str());
  }

  searcher_class_ = meta_.searcher_name();
  params.get(PARAM_IVF_SEARCHER_OPTIMIZER, &searcher_class_);
  params.get(PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS, &searcher_params_);
  searcher_ = IndexFactory::CreateSearcher(searcher_class_);
  if (!searcher_) {
    LOG_ERROR("Failed to create searcher %s", searcher_class_.c_str());
    return IndexError_Runtime;
  }

  auto searcher_params = meta_.searcher_params();
  searcher_params.merge(searcher_params_);
  ret = searcher_->init(searcher_params);
  ivf_check_with_msg(ret, "Failed to initialize searcher %s",
                     searcher_class_.c_str());

  IndexMetric::Pointer metric;
  if (index_building_) {
    // The searcher index metric should specified in building process,
    // otherwise the query_metric will be used in searching
    metric = IndexFactory::CreateMetric(meta_.metric_name());
    ivf_assert_with_msg(metric, IndexError_NoExist,
                        "Failed to create metric %s",
                        meta_.metric_name().c_str());
    ret = metric->init(meta_, meta_.metric_params());
    ivf_check_with_msg(ret, "Failed to initialize metric");
  }
  ret = searcher_->load(container, metric);
  ivf_check_with_msg(ret, "Failed to load searcher %s",
                     searcher_class_.c_str());

  return 0;
}

IndexTrainer::Pointer IVFCentroidIndex::prepare_trainer(
    const IndexCluster::CentroidList &centroid_list) {
  IndexCluster::CentroidList level1_centroids;
  bool two_level = false;
  for (auto &it : centroid_list) {
    auto centroid = it;
    if (!centroid.subitems().empty()) {
      two_level = true;
    }
    centroid.mutable_subitems()->clear();
    centroid.mutable_similars()->clear();
    level1_centroids.emplace_back(centroid);
  }
  if (!two_level) {
    return IndexTrainer::Pointer();
  }

  IndexBundle::Pointer bundle;
  IndexCluster::Serialize(meta_, level1_centroids, &bundle);
  return std::make_shared<FakeClusterTrainer>(meta_, bundle);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_centroid_index.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_framework.h>
#include "ivf_params.h"
#include "ivf_utility.h"

namespace zvec {
namespace core {

/*! IVF Centroid Index
 */
class IVFCentroidIndex {
 public:
  typedef std::shared_ptr<IVFCentroidIndex> Pointer;

  //! Constructor
  IVFCentroidIndex(void) {}

  //! Destructor
  ~IVFCentroidIndex(void) {
    IndexMemory *instance = IndexMemory::Instance();
    if (instance) {
      if (instance->has(path_)) {
        instance->remove(path_);
      }
    }
  }

  //! Initialize
  int init(const IndexMeta &meta, const ailego::Params &params);

  //! Set Quantizer for the index
  void set_quantizer(const std::string &quantizer_name,
                     ailego::Params &quantizer_params) {
    quantizer_class_ = quantizer_name;
    quantizer_params_ = quantizer_params;
  }

  //! Retrieve data address of the index
  const void *data(void) const {
    return data_;
  }

  //! Retrieve size of the index
  size_t size(void) const {
    return size_;
  }

  //! Create searcher context for centroid index
  IndexSearcher::Context::Pointer create_context() const {
    return searcher_ ? searcher_->create_context() : nullptr;
  }

  //! Similarity search
  int search(const void *query, const IndexQueryMeta &qmeta, size_t count,
             IndexSearcher::Context::Pointer &ctx);

  //! Search the nearest point, must be called in local thread pool
  uint32_t search_nearest_centroid(const void *query, size_t len);

  //! Transform Data and Search the nearest point, called while adding record
  uint32_t transform_and_search_nearest_centroid(
      const void *record, const IndexQueryMeta &rmeta,
      IndexSearcher::Context::Pointer &ctx) const;

  //! Build Centroid Index From Centroid List
  int build(const IndexCluster::CentroidList &centroid_list);

  //! Load Centroid Index From container
  int load(const IndexStorage::Pointer &container, const ailego::Params params);

  //! Retrieve centroid count of the index
  size_t centroids_count(void) const {
    return centroids_count_;
  }

  //! Retrieve meta
  const IndexMeta &meta() const {
    return meta_;
  }

  //! Retrieve reformer of the index
  const IndexReformer::Pointer reformer(void) const {
    return reformer_;
  }

  static constexpr uint32_t kInvalidID = std::numeric_limits<uint32_t>::max();

 private:
  /*! Centroids IndexHolder
   */
  class CentroidsIndexHolder : public IndexHolder {
   public:
    class Iterator : public IndexHolder::Iterator {
     public:
      //! Index Holder Iterator Pointer
      typedef std::unique_ptr<Iterator> Pointer;

      //! Constructor
      Iterator(std::vector<const void *> *features) : features_(features) {}

      //! Destructor
      virtual ~Iterator(void) {}

      //! Retrieve pointer of data
      virtual const void *data(void) const override {
        return (*features_)[id_];
      }

      //! Test if the iterator is valid
      virtual bool is_valid(void) const override {
        return id_ < features_->size();
      }

      //! Retrieve primary key
      virtual uint64_t key(void) const override {
        return id_;
      }

      //! Next iterator
      virtual void next(void) override {
        ++id_;
      }

     private:
      //! Members
      std::vector<const void *> *features_{nullptr};
      uint32_t id_{0};
    };

    //! Constructor
    CentroidsIndexHolder(const IndexMeta &meta,
                         const IndexCluster::CentroidList &centroid_list)
        : dimension_(meta.dimension()),
          element_size_(meta.element_size()),
          data_type_(meta.data_type()) {
      using CentroidList = IndexCluster::CentroidList;

      std::function<void(const CentroidList &)> get_leaf_features =
          [&](const CentroidList &cents) {
            if (cents.empty()) {
              return;
            }
            for (const auto &it : cents) {
              if (it.subitems().empty()) {
                features_.emplace_back(it.feature());
              } else {
                get_leaf_features(it.subitems());
              }
            }
          };

      get_leaf_features(centroid_list);
    }

    //! Retrieve count of elements in holder (-1 indicates unknown)
    virtual size_t count(void) const override {
      return features_.size();
    }

    //! Retrieve dimension
    virtual size_t dimension(void) const override {
      return dimension_;
    }

    //! Retrieve type information
    virtual IndexMeta::DataType data_type(void) const override {
      return data_type_;
    }

    //! Retrieve element size in bytes
    virtual size_t element_size(void) const override {
      return element_size_;
    }

    //! Retrieve if it can multi-pass
    virtual bool multipass(void) const override {
      return true;
    }

    //! Create a new iterator
    virtual IndexHolder::Iterator::Pointer create_iterator(void) override {
      return IndexHolder::Iterator::Pointer(
          new CentroidsIndexHolder::Iterator(&features_));
    }

   private:
    //! Members
    std::vector<const void *> features_{};
    size_t dimension_{0};
    size_t element_size_{0};
    IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};
  };

  int build_index(const IndexCluster::CentroidList &centroid_list,
                  const IndexDumper::Pointer &dumper);

  //! Prepare trainer for clustering index
  IndexTrainer::Pointer prepare_trainer(
      const IndexCluster::CentroidList &centroid_list);

  //! Quantize the centroid vectors in holder
  IndexHolder::Pointer quantize_holder(const IndexHolder::Pointer &holder);


 private:
  //! Constants
  constexpr static const char *kDefaultBuilder = "FlatBuilder";
  constexpr static const char *kTempralPathPrefix = "IVF";

  //! Members
  IndexMeta meta_{};

  IndexSearcher::Pointer searcher_{};
  IndexReformer::Pointer reformer_{};
  std::string builder_class_{kDefaultBuilder};
  std::string searcher_class_{};
  std::string quantizer_class_{};

  std::string path_{};

  ailego::Params builder_params_{};
  ailego::Params searcher_params_{};
  ailego::Params quantizer_params_{};

  const void *data_{};
  size_t size_{};
  size_t centroids_count_{0};
  bool index_building_{false};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_distance_calculator.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_distance_calculator.h"
#include <iostream>

namespace zvec {
namespace core {

IVFDistanceCalculator::IVFDistanceCalculator(const IndexMeta &meta,
                                             const IndexMetric::Pointer &metric,
                                             uint32_t block_vec_cnt)
    : metric_ptr_(metric), block_vec_cnt_(block_vec_cnt) {
  row_distance_ = metric->distance();
  distanceXx1_ = metric->distance_matrix(block_vec_cnt, 1);
  distances_.resize(33);
  for (size_t b = 32; b != 0; b /= 2) {
    distances_[b] = metric->distance_matrix(block_vec_cnt, b);
  }
  element_size_ = meta.element_size();
  dimension_ = meta.dimension();
  if (meta.major_order() == IndexMeta::MajorOrder::MO_COLUMN) {
    column_major_order_ = true;
  } else {
    column_major_order_ = false;
  }
}

IVFDistanceCalculator::~IVFDistanceCalculator() {
  row_distance_ = nullptr;
  distanceXx1_ = nullptr;
  distances_.clear();
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_distance_calculator.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_framework.h>

namespace zvec {
namespace core {

class IVFDistanceCalculator {
 public:
  typedef std::shared_ptr<IVFDistanceCalculator> Pointer;

  //! Constructor
  IVFDistanceCalculator(const IndexMeta &meta,
                        const IndexMetric::Pointer &metric,
                        uint32_t block_vec_cnt);

  virtual ~IVFDistanceCalculator();

 public:
  inline void query_centroids_distance(const void *query, size_t qnum,
                                       const void *feature, size_t fnum,
                                       float *distances);

  inline void query_centroids_distance(const void *query, const void *feature,
                                       size_t fnum, float *distances);

  inline void query_features_distance(const void *query, const void *feature,
                                      size_t fnum, float *distances);

  inline void query_features_distance(const void *query, const void *feature,
                                      bool column_major, size_t fnum,
                                      float *distances);

 protected:
  //! Row Major Distances -> Online
  inline void row_major_distance(const void *query, size_t qnum,
                                 const void *feature, size_t fnum, float *out);

  inline void row_major_distance(const void *query, const void *feature,
                                 size_t fnum, float *out);

  template <size_t Q>
  inline void batch_query_centroids_distance(const void *query,
                                             const void *feature, size_t fnum,
                                             float *distances);

 protected:
  IndexMetric::Pointer metric_ptr_{};
  IndexMetric::MatrixDistance row_distance_{nullptr};
  IndexMetric::MatrixDistance distanceXx1_{nullptr};
  std::vector<IndexMetric::MatrixDistance> distances_{};

  size_t element_size_{0};
  size_t dimension_{0};
  uint32_t block_vec_cnt_{0};
  bool column_major_order_{false};
};

void IVFDistanceCalculator::query_centroids_distance(const void *query,
                                                     size_t qnum,
                                                     const void *feature,
                                                     size_t fnum,
                                                     float *distances) {
  if (column_major_order_) {
    switch (qnum) {
      case 1:
        batch_query_centroids_distance<1>(query, feature, fnum, distances);
        break;
      case 16:
        batch_query_centroids_distance<16>(query, feature, fnum, distances);
        break;
      case 8:
        batch_query_centroids_distance<8>(query, feature, fnum, distances);
        break;
      case 4:
        batch_query_centroids_distance<4>(query, feature, fnum, distances);
        break;
      case 2:
        batch_query_centroids_distance<2>(query, feature, fnum, distances);
        break;
      case 32:
        batch_query_centroids_distance<32>(query, feature, fnum, distances);
        break;
      default:
        LOG_ERROR("Unsupported query num %zu.", qnum);
        break;
    }
  } else {
    const uint8_t *cur_query = reinterpret_cast<const uint8_t *>(query);
    for (size_t q = 0; q < qnum; ++q) {
      this->row_major_distance(cur_query, feature, fnum, distances);
      cur_query += element_size_;
      distances += block_vec_cnt_;
    }
  }
}

void IVFDistanceCalculator::query_centroids_distance(const void *query,
                                                     const void *feature,
                                                     size_t fnum,
                                                     float *distances) {
  this->query_features_distance(query, feature, fnum, distances);
}

void IVFDistanceCalculator::query_features_distance(const void *query,
                                                    const void *feature,
                                                    size_t fnum,
                                                    float *distances) {
  if (column_major_order_) {
    if (fnum == block_vec_cnt_) {
      distanceXx1_(feature, query, dimension_, distances);
    } else {
      this->row_major_distance(query, feature, fnum, distances);
    }
  } else {
    this->row_major_distance(query, feature, fnum, distances);
  }
}

void IVFDistanceCalculator::query_features_distance(const void *query,
                                                    const void *feature,
                                                    bool column_major,
                                                    size_t fnum,
                                                    float *distances) {
  if (column_major) {
    ailego_assert_with(fnum == block_vec_cnt_, "Invalid Block");
    distanceXx1_(feature, query, dimension_, distances);
  } else {
    this->row_major_distance(query, feature, fnum, distances);
  }
}

template <size_t Q>
void IVFDistanceCalculator::batch_query_centroids_distance(const void *query,
                                                           const void *feature,
                                                           size_t fnum,
                                                           float *distances) {
  if (fnum == block_vec_cnt_) {
    distances_[Q](feature, query, dimension_, distances);
  } else {
    row_major_distance(query, Q, feature, fnum, distances);
  }
}

void IVFDistanceCalculator::row_major_distance(const void *query, size_t qnum,
                                               const void *feature, size_t fnum,
                                               float *out) {
  const uint8_t *cur_query = reinterpret_cast<const uint8_t *>(query);
  for (size_t q = 0; q < qnum; ++q) {
    const uint8_t *tmp_feature = reinterpret_cast<const uint8_t *>(feature);
    float *cur_out = out + q * fnum;
    for (size_t f = 0; f < fnum; ++f) {
      row_distance_(cur_query, tmp_feature, dimension_, cur_out + f);
      tmp_feature += element_size_;
    }
    cur_query += element_size_;
  }
}

void IVFDistanceCalculator::row_major_distance(const void *query,
                                               const void *feature, size_t fnum,
                                               float *out) {
  const uint8_t *cur_feature = reinterpret_cast<const uint8_t *>(feature);
  for (size_t f = 0; f < fnum; ++f) {
    row_distance_(query, cur_feature, dimension_, out + f);
    cur_feature += element_size_;
  }
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_dumper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_dumper.h"

namespace zvec {
namespace core {

int IVFDumper::dump_inverted_vector(uint32_t inverted_list_id, uint64_t key,
                                    const void *vec) {
  int ret = this->check_dump_inverted_list(inverted_list_id);
  ivf_check_error_code(ret);

  ++inverted_lists_meta_[cur_list_id_].vector_count;
  ++header_.total_vector_count;
  block_.emplace(key, vec, IndexMeta::MajorOrder::MO_ROW);
  if (block_.full()) {
    ret = this->dump_block();
    ivf_check_error_code(ret);
  }
  return 0;
}

int IVFDumper::dump_inverted_block(uint32_t inverted_list_id,
                                   const uint64_t *keys, const void *vecs,
                                   uint32_t vector_count, bool column_major) {
  int ret = this->check_dump_inverted_list(inverted_list_id);
  ivf_check_error_code(ret);

  if (block_.match_order(column_major ? IndexMeta::MajorOrder::MO_COLUMN
                                      : IndexMeta::MajorOrder::MO_ROW) &&
      vector_count == block_.capacity()) {
    // Dump the block directly
    size_t size = vector_count * meta_.element_size();
    size_t pd_size = ailego_align(size, 32) - size;
    if (dumper_->write(vecs, size) != size) {
      LOG_ERROR("Failed to write data into dumper %s", dumper_->name().c_str());
      return IndexError_WriteData;
    }
    if (pd_size > 0) {
      std::string padding(pd_size, '\0');
      if (dumper_->write(padding.data(), pd_size) != pd_size) {
        return IndexError_WriteData;
      }
    }
    std::copy(keys, keys + vector_count, std::back_inserter(keys_));
    ++inverted_lists_meta_[cur_list_id_].block_count;
    ++header_.block_count;
    header_.inverted_body_size += size;
  } else {
    size_t step_size = meta_.element_size();
    if (column_major) {
      step_size = IndexMeta::AlignSizeof(meta_.data_type());
    }
    for (size_t i = 0; i < vector_count; ++i) {
      auto v = reinterpret_cast<const char *>(vecs) + i * step_size;
      block_.emplace(keys[i], v,
                     column_major ? IndexMeta::MajorOrder::MO_COLUMN
                                  : IndexMeta::MajorOrder::MO_ROW);
      if (block_.full()) {
        ret = this->dump_block();
        ivf_check_error_code(ret);
      }
    }
  }

  inverted_lists_meta_[cur_list_id_].vector_count += vector_count;
  header_.total_vector_count += vector_count;

  return 0;
}

int IVFDumper::dump_container_segment(const IndexStorage::Pointer &container,
                                      const std::string &segmemt_id) {
  auto seg = container->get(segmemt_id, 2);
  if (!seg) {
    LOG_ERROR("Failed to fetch segment %s from %s", segmemt_id.c_str(),
              container->name().c_str());
    return IndexError_InvalidFormat;
  }

  const size_t batch_size = 32 * 1024;
  const size_t total_size = seg->data_size() + seg->padding_size();
  size_t off = 0;
  while (off < total_size) {
    const void *data = nullptr;
    size_t rd_size = std::min(batch_size, total_size - off);
    if (seg->read(off, &data, rd_size) != rd_size) {
      LOG_ERROR("Failed to read data, off=%zu size=%zu", off, rd_size);
      return IndexError_ReadData;
    }
    if (dumper_->write(data, rd_size) != rd_size) {
      LOG_ERROR("Failed to write data, size=%zu", rd_size);
      return IndexError_WriteData;
    }
    off += rd_size;
  }

  int ret = dumper_->append(segmemt_id, seg->data_size(), seg->padding_size(),
                            seg->data_crc());
  ivf_check_with_msg(ret, "Failed to append %s", segmemt_id.c_str());

  dumped_size_ += total_size;

  return 0;
}

int IVFDumper::dump_inverted_vector_finished(void) {
  //! Dump Inverted Index Segment
  if (!block_.empty()) {
    int ret = this->dump_block();
    ivf_check_error_code(ret);
  }
  header_.block_size = block_.block_size();
  size_t segment_size = header_.inverted_body_size;
  int ret = dumper_->append(IVF_INVERTED_BODY_SEG_ID, segment_size, 0, 0);
  if (ret != 0) {
    LOG_ERROR("Failed to append to segment %s, ret=%d",
              IVF_INVERTED_BODY_SEG_ID.c_str(), ret);
    return ret;
  }
  dumped_size_ += segment_size;

  //! Dump Inverted Index Header Segment
  std::string str;
  meta_.serialize(&str);
  header_.header_size = sizeof(header_) + str.size();
  header_.index_meta_size = str.size();
  header_.inverted_list_count = inverted_lists_meta_.size();
  if (dumper_->write(&header_, sizeof(header_)) != sizeof(header_)) {
    LOG_ERROR("Failed to write data, size %zu", sizeof(header_));
    return IndexError_WriteData;
  }
  if (dumper_->write(str.data(), str.size()) != str.size()) {
    LOG_ERROR("Failed to write data, size %zu", str.size());
    return IndexError_WriteData;
  }
  size_t padding_size = 0;
  ret = this->dump_padding(header_.header_size, &padding_size);
  ivf_check_error_code(ret);
  ret = dumper_->append(IVF_INVERTED_HEADER_SEG_ID, header_.header_size,
                        padding_size, 0);
  if (ret != 0) {
    LOG_ERROR("Failed to append to segment %s, ret:%d",
              IVF_INVERTED_HEADER_SEG_ID.c_str(), ret);
    return ret;
  }
  dumped_size_ += header_.header_size + padding_size;

  LOG_DEBUG(
      "Dump header info: blocks=%u block_size=%u block_vec_count=%u "
      "inverted_list_count=%u total_vecs=%u inverted_size=%zu",
      header_.block_count, header_.block_size, header_.block_vector_count,
      header_.inverted_list_count, header_.total_vector_count,
      static_cast<size_t>(header_.inverted_body_size));

  //! Dump Inverted Lists Meta Segment
  segment_size = inverted_lists_meta_.size() * sizeof(InvertedListMeta);
  ret = this->dump_segment(IVF_INVERTED_META_SEG_ID,
                           inverted_lists_meta_.data(), segment_size);
  ivf_check_error_code(ret);

  //! Dump Keys Segment
  ret = this->dump_segment(IVF_KEYS_SEG_ID, keys_.data(),
                           keys_.size() * sizeof(keys_[0]));
  ivf_check_error_code(ret);

  //! Dump Mapping Segment
  auto mapping = std::make_shared<std::vector<uint32_t>>();
  IVFUtility::Sort(keys_.data(), mapping.get(), keys_.size());
  ret = this->dump_segment(IVF_MAPPING_SEG_ID, mapping->data(),
                           mapping->size() * sizeof(uint32_t));
  ivf_check_error_code(ret);
  mapping.reset();

  //! Dump the Offsets Segment
  return this->dump_offsets_segment();
}

int IVFDumper::dump_centroid_index(const void *data, size_t size) {
  int ret = this->dump_segment(IVF_CENTROID_SEG_ID, data, size);
  ivf_check_error_code(ret);

  return 0;
}

int IVFDumper::dump_quantizer_params(
    const std::vector<IndexConverter::Pointer> &quantizers) {
  if (meta_.reformer_name() != kInt8ReformerName &&
      meta_.reformer_name() != kInt4ReformerName) {
    // IntegerQuantizer params is support only
    return 0;
  }
  if (quantizers.size() == 1) {
    //! Donot dump, using reformer params in IndexMeta
    return 0;
  }

  if (quantizers.size() != header_.inverted_list_count) {
    LOG_ERROR("Mismatch size, quantizers=%zu, inverted_list_count=%u",
              quantizers.size(), header_.inverted_list_count);
    return IndexError_Logic;
  }
  bool int8_quantizer = meta_.reformer_name() == kInt8ReformerName;
  std::vector<InvertedIntegerQuantizerParams> params;
  params.resize(header_.inverted_list_count);
  for (size_t i = 0; i < quantizers.size(); ++i) {
    auto &p = quantizers[i]->meta().reformer_params();
    auto &scale_key = int8_quantizer ? INT8_QUANTIZER_REFORMER_SCALE
                                     : INT4_QUANTIZER_REFORMER_SCALE;
    auto &bias_key = int8_quantizer ? INT8_QUANTIZER_REFORMER_BIAS
                                    : INT4_QUANTIZER_REFORMER_BIAS;
    if (inverted_lists_meta_[i].vector_count > 0 &&
        (!p.has(scale_key) || !p.has(bias_key))) {
      LOG_ERROR("Miss reformer params %s or %s", bias_key.c_str(),
                scale_key.c_str());
      return IndexError_Logic;
    }

    params[i].bias = p.get_as_float(bias_key);
    params[i].scale = p.get_as_float(scale_key);
  }

  return this->dump_segment(
      int8_quantizer ? IVF_INT8_QUANTIZED_PARAMS_SEG_ID
                     : IVF_INT4_QUANTIZED_PARAMS_SEG_ID,
      params.data(), params.size() * sizeof(InvertedIntegerQuantizerParams));
}

int IVFDumper::dump_original_vector(const void *data, size_t size) {
  if (dumped_feature_count_ >= header_.total_vector_count) {
    LOG_ERROR("Dump too much orignal features, expect=%u",
              header_.total_vector_count);
    return IndexError_Logic;
  }

  if (dumper_->write(data, size) != size) {
    LOG_ERROR("Dumper write features failed");
    return IndexError_WriteData;
  }
  dumped_features_size_ += size;
  ++dumped_feature_count_;
  if (dumped_feature_count_ == header_.total_vector_count) {
    //! Dump features finished, dump the meta
    size_t padding_size = 0;
    int ret = this->dump_padding(size, &padding_size);
    ivf_check_error_code(ret);

    ret = dumper_->append(IVF_FEATURES_SEG_ID, dumped_features_size_,
                          padding_size, 0);
    if (ret != 0) {
      LOG_ERROR("Dumper append segment %s failed, ret:%d",
                IVF_FEATURES_SEG_ID.c_str(), ret);
      return ret;
    }
    dumped_size_ += dumped_features_size_;
  }

  return 0;
}

int IVFDumper::check_dump_inverted_list(uint32_t inverted_list_id) {
  if (inverted_list_id < cur_list_id_) {
    LOG_ERROR("Invalid backward vector dumping, want=%u cur=%u",
              inverted_list_id, cur_list_id_);
    return IndexError_Logic;
  }
  if (inverted_list_id >= inverted_lists_meta_.size()) {
    LOG_ERROR("Invalid inverted_list_id=%u, lists_size=%zu", inverted_list_id,
              inverted_lists_meta_.size());
    return IndexError_Logic;
  }
  if (inverted_list_id != cur_list_id_) {
    //! flush previous inverted_list block
    int ret = this->dump_block();
    ivf_check_error_code(ret);
    for (auto idx = cur_list_id_ + 1; idx <= inverted_list_id; ++idx) {
      inverted_lists_meta_[idx].offset = header_.inverted_body_size;
      inverted_lists_meta_[idx].id_offset = header_.total_vector_count;
    }
    cur_list_id_ = inverted_list_id;
  }

  return 0;
}

int IVFDumper::dump_offsets_segment(void) const {
  bool col_pri = meta_.major_order() == IndexMeta::MajorOrder::MO_COLUMN;
  size_t total_size = 0;
  for (size_t i = 0; i < inverted_lists_meta_.size(); ++i) {
    std::vector<InvertedVecLocation> offsets;
    const auto &m = inverted_lists_meta_[i];
    size_t vec_cnt = m.vector_count;
    size_t idx = 0;
    uint64_t off = m.offset;
    size_t align_idx = vec_cnt - vec_cnt % block_vector_count_;
    for (size_t j = 0; j < vec_cnt; ++j) {
      if (col_pri && j < align_idx) {
        offsets.emplace_back(off + idx * block_.align_size(), true);
      } else {
        offsets.emplace_back(off + idx * block_.element_size(), false);
      }
      ++idx;
      if (idx == block_vector_count_) {
        off += header_.block_size;
        idx = 0;
      }
    }
    if (idx != 0) {
      off += (vec_cnt - align_idx) * meta_.element_size();
    }

    size_t len = offsets.size() * sizeof(offsets[0]);
    size_t actual_len = dumper_->write(offsets.data(), len);
    if (actual_len != len) {
      LOG_ERROR("Write offsets failed expect %zu, actual: %zu.", len,
                actual_len);
      return IndexError_WriteData;
    }
    total_size += len;
  }

  size_t padding_size = 0;
  int ret = this->dump_padding(total_size, &padding_size);
  ivf_check_error_code(ret);

  ret = dumper_->append(IVF_OFFSETS_SEG_ID, total_size, padding_size, 0);
  if (ret != 0) {
    LOG_ERROR("Dumper append segment %s failed, ret:%d",
              IVF_OFFSETS_SEG_ID.c_str(), ret);
    return ret;
  }

  dumped_size_ += total_size + padding_size;

  return 0;
}

int IVFDumper::dump_segment(const std::string &segment_id, const void *data,
                            size_t size) const {
  size_t len = dumper_->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect=%zu, actual=%zu",
              segment_id.c_str(), size, len);
    return IndexError_WriteData;
  }

  size_t padding_size = 0;
  int ret = this->dump_padding(size, &padding_size);
  ivf_check_error_code(ret);

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  ret = dumper_->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return ret;
  }
  dumped_size_ += size + padding_size;

  return 0;
}

int IVFDumper::dump_padding(size_t data_size, size_t *padding_size) const {
  *padding_size = IVFUtility::AlignedSize(data_size) - data_size;
  if (*padding_size == 0) {
    return 0;
  }

  std::string padding(*padding_size, '\0');
  if (dumper_->write(padding.data(), *padding_size) != *padding_size) {
    LOG_ERROR("Append padding failed, size %lu", *padding_size);
    return IndexError_WriteData;
  }

  return 0;
}

int IVFDumper::dump_block(void) {
  if (block_.empty()) {
    return 0;
  }

  size_t size = ailego_align(block_.bytes(), 32);
  if (dumper_->write(block_.data(), size) != size) {
    LOG_ERROR("Failed to write data into dumper %s", dumper_->name().c_str());
    return IndexError_WriteData;
  }
  auto &keys = block_.keys();
  std::copy(keys.begin(), keys.end(), std::back_inserter(keys_));
  ++inverted_lists_meta_[cur_list_id_].block_count;
  ++header_.block_count;
  header_.inverted_body_size += size;
  block_.clear();

  return 0;
}

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/algorithm/ivf/ivf_dumper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_framework.h>
#include "metric/metric_params.h"
#include "ivf_index_format.h"
#include "ivf_params.h"
#include "ivf_utility.h"

namespace zvec {
namespace core {

/*! Quantized Clustering Dumper
 */
class IVFDumper {
 public:
  typedef std::shared_ptr<IVFDumper> Pointer;

  //! Vectors block
  class Block {
   public:
    //! Initialize block
    void init(const IndexMeta &meta, uint32_t max_vec_count) {
      element_size_ = meta.element_size();
      auto bsize = IVFUtility::AlignedSize(max_vec_count, element_size_);
      data_.resize(bsize);
      count_ = 0u;
      major_order_ = meta.major_order();
      align_size_ = IndexMeta::AlignSizeof(meta.data_type());
      units_ = element_size_ / align_size_;
      max_vec_count_ = max_vec_count;
      keys_.reserve(max_vec_count_);
    }

    //! Add a vector to the block in row major order
    //! If the block is full and the block order is column, make a
    //! transpose
    void emplace(uint64_t key, const void *vec, IndexMeta::MajorOrder order) {
      switch (align_size_) {
        case 2:
          do_emplace<uint16_t>(vec, order);
          break;
        case 4:
          do_emplace<uint32_t>(vec, order);
          break;
        case 8:
          do_emplace<uint64_t>(vec, order);
          break;
        default:
          ailego_check_with(false, "Unsupport Aligned Size");
      }
      keys_.emplace_back(key);
    }

    bool full(void) const {
      return count_ == max_vec_count_;
    }

    const void *data(void) const {
      return data_.data();
    }

    void clear(void) {
      count_ = 0u;
      keys_.clear();
    }

    bool empty(void) const {
      return count_ == 0u;
    }

    size_t size(void) const {
      return count_;
    }

    size_t capacity(void) const {
      return max_vec_count_;
    }

    size_t align_size(void) const {
      return align_size_;
    }

    size_t element_size(void) const {
      return element_size_;
    }

    //! Retrieve block data size
    size_t bytes(void) const {
      return element_size_ * count_;
    }

    //! Retrieve max block size in bytes
    size_t block_size(void) const {
      return data_.size();
    }

    IndexMeta::MajorOrder major_order(void) const {
      return major_order_;
    }

    const std::vector<uint64_t> &keys(void) const {
      return keys_;
    }

    bool match_order(IndexMeta::MajorOrder column_major) const {
      return major_order_ == column_major;
    }

   private:
    //! Transpose the block vectors
    void transpose() {
      std::vector<uint8_t> buf(data_.size());
      IVFUtility::Transpose(align_size_, data_.data(), count_, units_,
                            buf.data());
      data_.swap(buf);
    }

    template <typename T>
    void do_emplace(const void *vec, IndexMeta::MajorOrder order) {
      ailego_assert_with(count_ < max_vec_count_, "emplace a full block");

      T *dst = reinterpret_cast<T *>(data_.data() + element_size_ * count_);
      const T *src = reinterpret_cast<const T *>(vec);
      size_t step = order == IndexMeta::MO_ROW ? 1 : max_vec_count_;
      for (auto i = 0u; i < units_; ++i) {
        *dst = *src;
        dst++;
        src += step;
      }

      count_++;
      if (full() && major_order_ == IndexMeta::MO_COLUMN) {
        transpose();
      }
    }

   private:
    //! Members
    std::vector<uint8_t> data_{};
    std::vector<uint64_t> keys_{};
    uint32_t count_{0u};
    uint32_t units_{0u};
    uint32_t align_size_{0u};
    uint32_t element_size_{0u};
    uint32_t max_vec_count_{0u};
    IndexMeta::MajorOrder major_order_{};
  };

  //! Constructor
  IVFDumper(const IndexMeta &meta, const IndexDumper::Pointer &dumper,
            size_t inverted_list_count, size_t block_vector_count)
      : meta_(meta),
        dumper_(dumper),
        block_vector_count_(block_vector_count),
        inverted_lists_meta_(inverted_list_count) {
    block_.init(meta, block_vector_count_);
    header_.block_vector_count = block_vector_count_;
  }

  //! Constructor
  IVFDumper(const IndexMeta &meta, const IndexDumper::Pointer &dumper,
            size_t inverted_list_count)
      : IVFDumper(meta, dumper, inverted_list_count, kDefaultBlockCount) {}

  //! Destructor
  ~IVFDumper() {
    // Check the dumper status
    if (dumped_feature_count_ > 0 &&
        dumped_feature_count_ != header_.total_vector_count) {
      LOG_ERROR("Dumped features=%u mismatch from invertedVecs=%u",
                dumped_feature_count_, header_.total_vector_count);
      ailego_assert_with(false, "invalid status");
    }
  }

  //! Dump a vector in row major order
  int dump_inverted_vector(uint32_t inverted_list_id, uint64_t key,
                           const void *vec);

  int dump_inverted_block(uint32_t inverted_list_id, const uint64_t *keys,
                          const void *vecs, uint32_t vector_count,
                          bool column_major);

  //! Finish dump the inverted vectors
  int dump_inverted_vector_finished(void);

  //! Dump the centroids index
  int dump_centroid_index(const void *data, size_t size);

  //! Dump params for each inverted list quantizer
  int dump_quantizer_params(
      const std::vector<IndexConverter::Pointer> &quantizers);

  //! Dump the original vector, which doesnot been quantized
  int dump_original_vector(const void *data, size_t size);

  //! Retrieve total dumped size
  size_t dumped_size(void) const {
    return dumped_size_;
  }

  //! Retrieve total dumped vector count
  size_t dumped_count(void) const {
    return header_.total_vector_count;
  }

  //! Dump the segment from container
  int dump_container_segment(const IndexStorage::Pointer &container,
                             const std::string &segmemt_id);

 private:
  int check_dump_inverted_list(uint32_t inverted_list_id);

  //! Dump offsets segment
  int dump_offsets_segment(void) const;

  //! Dump a segment
  int dump_segment(const std::string &segment_id, const void *data,
                   size_t size) const;

  //! Dump segment padding
  int dump_padding(size_t data_size, size_t *padding_size) const;

  //! Dump a vector block
  int dump_block(void);

 private:
  //! Constants
  static constexpr size_t kDefaultBlockCount = 32u;

  //! Members
  Block block_{};           // vectors grouped in block
  const IndexMeta meta_{};  // IndexMeta of the inverted index
  const IndexDumper::Pointer dumper_{};
  size_t block_vector_count_{kDefaultBlockCount};
  std::vector<InvertedListMeta> inverted_lists_meta_{};
  std::vector<uint64_t> keys_{};
  InvertedIndexHeader header_{};
  uint32_t cur_list_id_{0};
  uint32_t dumped_feature_count_{0};
  size_t dumped_features_size_{0};
  mutable size_t dumped_size_{0};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_entity.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_entity.h"
#include <iostream>
#include "ivf_utility.h"
namespace zvec {
namespace core {

//! Initialize
int IVFEntity::IVFReformerWrapper::init(const IndexMeta &imeta) {
  auto &name = imeta.reformer_name();

  if (name.empty()) {
    type_ = kReformerTpNone;
    return 0;
  }

  auto reformer = IndexFactory::CreateReformer(name);
  if (!reformer) {
    LOG_ERROR("Failed to create reformer %s", name.c_str());
    return IndexError_NoExist;
  }
  int ret = reformer->init(imeta.reformer_params());
  ivf_check_with_msg(ret, "Failed to init reformer %s", name.c_str());

  reformer_ = std::move(reformer);

  if (name == kInt8ReformerName) {
    if (imeta.metric_name() == kIPMetricName) {
      type_ = kReformerTpInnerProductInt8;
      return 0;
    }
    auto &key = INT8_QUANTIZER_REFORMER_SCALE;
    if (!imeta.reformer_params().has(key)) {
      LOG_ERROR("Missing param %s in reformer %s", key.c_str(), name.c_str());
      return IndexError_InvalidArgument;
    };
    float scale = imeta.reformer_params().get_as_float(key);
    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale);
    type_ = kReformerTpInt8;
  } else if (name == kInt4ReformerName) {
    if (imeta.metric_name() == kIPMetricName) {
      type_ = kReformerTpInnerProductInt4;
      return 0;
    }
    auto &key = INT4_QUANTIZER_REFORMER_SCALE;
    if (!imeta.reformer_params().has(key)) {
      LOG_ERROR("Missing param %s in reformer %s", key.c_str(), name.c_str());
      return IndexError_InvalidArgument;
    };
    float scale = imeta.reformer_params().get_as_float(key);
    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale);
    type_ = kReformerTpInt4;
  } else {
    type_ = kReformerTpDefault;
  }

  LOG_DEBUG("Init QcReformer with %s, type=%u", name.c_str(), type_);

  return 0;
}

//! Update the params, Called by gpu searcher only
int IVFEntity::IVFReformerWrapper::update(const IndexMeta &meta) {
  auto &name = meta.reformer_name();
  if (name == kInt4ReformerName && meta.metric_name() == kL2MetricName) {
    auto &key = INT4_QUANTIZER_REFORMER_SCALE;
    if (!meta.reformer_params().has(key)) {
      LOG_ERROR("Missing param %s in reformer %s", key.c_str(), name.c_str());
      return IndexError_InvalidArgument;
    };
    float scale = meta.reformer_params().get_as_float(key);
    reciprocal_ = scale == 0.0 ? 1.0 : (1.0 / scale / kNormalizeScaleFactor);
    type_ = kReformerTpInt8;

    ailego::Params params;
    float int8_scale = scale * kNormalizeScaleFactor;
    params.set(INT8_QUANTIZER_REFORMER_SCALE, int8_scale);
    float bias =
        meta.reformer_params().get_as_float(INT4_QUANTIZER_REFORMER_BIAS);
    params.set(INT8_QUANTIZER_REFORMER_BIAS, bias);
    params.set(
        INT4_QUANTIZER_REFORMER_METRIC,
        meta.reformer_params().get_as_string(INT4_QUANTIZER_REFORMER_METRIC));

    auto reformer = IndexFactory::CreateReformer(kInt8ReformerName);
    if (!reformer) {
      LOG_ERROR("Failed to create reformer %s", name.c_str());
      return IndexError_NoExist;
    }
    int ret = reformer->init(params);
    ivf_check_with_msg(ret, "Failed to init reformer %s", name.c_str());

    reformer_ = reformer;

    LOG_DEBUG("Init QcReformer with %s, type=%u", name.c_str(), type_);
  }

  return 0;
}

//! Transform a query
int IVFEntity::IVFReformerWrapper::transform(const void *query,
                                             const IndexQueryMeta &qmeta,
                                             const void **out,
                                             IndexQueryMeta *ometa) {
  int ret = 0;

  switch (type_) {
    case kReformerTpNone:
      *out = query;
      *ometa = qmeta;
      break;

    case kReformerTpInnerProductInt8:
      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }
      scales_.resize(1);
      buffer_.resize(IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,
                                              qmeta.dimension()));
      this->transform(0, static_cast<const float *>(query), qmeta.dimension(),
                      reinterpret_cast<int8_t *>(&buffer_[0]));
      *ometa = qmeta;
      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());
      *out = buffer_.data();
      break;

    case kReformerTpInnerProductInt4:
      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }
      scales_.resize(1);
      buffer_.resize(IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4,
                                              qmeta.dimension()));
      this->transform(0, static_cast<const float *>(query), qmeta.dimension(),
                      reinterpret_cast<uint8_t *>(&buffer_[0]));
      *ometa = qmeta;
      ometa->set_meta(IndexMeta::DataType::DT_INT4, qmeta.dimension());
      *out = buffer_.data();
      break;

    case kReformerTpInt8:
    case kReformerTpInt4:
      /* FALLTHRU */
    case kReformerTpDefault:
      ret = reformer_->transform(query, qmeta, &buffer_, ometa);
      *out = buffer_.data();
      break;

    default:
      ret = IndexError_Unsupported;
      break;
  }

  return ret;
}

//! Transform querys
int IVFEntity::IVFReformerWrapper::transform(const void *query,
                                             const IndexQueryMeta &qmeta,
                                             uint32_t count, const void **out,
                                             IndexQueryMeta *ometa) {
  int ret = 0;

  switch (type_) {
    case kReformerTpNone:
      *out = query;
      *ometa = qmeta;
      break;

    case kReformerTpInnerProductInt8:
      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }
      scales_.resize(count);
      buffer_.resize(count *
                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,
                                              qmeta.dimension()));
      {
        const float *ivec = reinterpret_cast<const float *>(query);
        int8_t *ovec = reinterpret_cast<int8_t *>(&buffer_[0]);
        for (size_t i = 0; i < count; ++i) {
          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),
                          &ovec[i * qmeta.dimension()]);
        }
      }
      *ometa = qmeta;
      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());
      *out = buffer_.data();
      break;

    case kReformerTpInnerProductInt4:
      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }
      scales_.resize(count);
      buffer_.resize(count *
                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT4,
                                              qmeta.dimension()));
      {
        const float *ivec = reinterpret_cast<const float *>(query);
        uint8_t *ovec = reinterpret_cast<uint8_t *>(&buffer_[0]);
        for (size_t i = 0; i < count; ++i) {
          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),
                          &ovec[i * qmeta.dimension() / 2]);
        }
      }
      *ometa = qmeta;
      ometa->set_meta(IndexMeta::DataType::DT_INT4, qmeta.dimension());
      *out = buffer_.data();
      break;

    case kReformerTpInt8:
    case kReformerTpInt4:
      /* FALLTHRU */
    case kReformerTpDefault:
      ret = reformer_->transform(query, qmeta, count, &buffer_, ometa);
      *out = buffer_.data();
      break;

    default:
      ret = IndexError_Unsupported;
      break;
  }

  return ret;
}

//! Transform querys
int IVFEntity::IVFReformerWrapper::transform_gpu(const void *query,
                                                 const IndexQueryMeta &qmeta,
                                                 uint32_t count,
                                                 const void **out,
                                                 IndexQueryMeta *ometa) {
  int ret = 0;

  switch (type_) {
    case kReformerTpNone:
    case kReformerTpDefault:
      *out = query;
      *ometa = qmeta;
      break;

    case kReformerTpInnerProductInt4:
    case kReformerTpInnerProductInt8:
      if (qmeta.data_type() != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }
      scales_.resize(count);
      buffer_.resize(count *
                     IndexMeta::ElementSizeof(IndexMeta::DataType::DT_INT8,
                                              qmeta.dimension()));
      {
        const float *ivec = reinterpret_cast<const float *>(query);
        int8_t *ovec = reinterpret_cast<int8_t *>(&buffer_[0]);
        for (size_t i = 0; i < count; ++i) {
          this->transform(i, &ivec[i * qmeta.dimension()], qmeta.dimension(),
                          &ovec[i * qmeta.dimension()]);
        }
      }
      *ometa = qmeta;
      ometa->set_meta(IndexMeta::DataType::DT_INT8, qmeta.dimension());
      *out = buffer_.data();
      break;

    case kReformerTpInt8:
    case kReformerTpInt4:
      ret = reformer_->transform(query, qmeta, count, &buffer_, ometa);
      *out = buffer_.data();
      break;

    default:
      ret = IndexError_Unsupported;
      break;
  }

  return ret;
}


//! Convert a record
int IVFEntity::IVFReformerWrapper::convert(const void *record,
                                           const IndexQueryMeta &rmeta,
                                           const void **out,
                                           IndexQueryMeta *ometa) {
  if (type_ == kReformerTpNone) {
    *out = record;
    *ometa = rmeta;
    return 0;
  }

  int ret = reformer_->convert(record, rmeta, &buffer_, ometa);
  *out = buffer_.data();
  return ret;
}

//! Convert records
int IVFEntity::IVFReformerWrapper::convert(const void *records,
                                           const IndexQueryMeta &rmeta,
                                           uint32_t count, const void **out,
                                           IndexQueryMeta *ometa) {
  if (type_ == kReformerTpNone) {
    *out = records;
    *ometa = rmeta;
    return 0;
  }
  int ret = reformer_->convert(records, rmeta, count, &buffer_, ometa);
  *out = buffer_.data();
  return ret;
}

//! Normalize score
void IVFEntity::IVFReformerWrapper::normalize(size_t qidx,
                                              IndexDocumentHeap *heap) const {
  switch (type_) {
    case kReformerTpNone:
      return;

    case kReformerTpInnerProductInt8:
    case kReformerTpInnerProductInt4:
      ailego_assert_with(qidx < scales_.size(), "invalid index");
      {
        auto reciprocal = 1.0f / scales_[qidx];
        for (auto &it : *heap) {
          *it.mutable_score() *= reciprocal;
        }
      }
      break;

    case kReformerTpInt8:
    case kReformerTpInt4:
      for (auto &it : *heap) {
        *it.mutable_score() *= reciprocal_;
      }
      break;

    default:
      // Not support
      break;
  }
}

//! Normalize score
void IVFEntity::IVFReformerWrapper::normalize(size_t qidx, const void *query,
                                              const IndexQueryMeta &qmeta,
                                              IndexDocumentHeap *heap) const {
  switch (type_) {
    case kReformerTpNone:
      return;

    case kReformerTpInnerProductInt8:
    case kReformerTpInnerProductInt4:
      ailego_assert_with(qidx < scales_.size(), "invalid index");
      {
        auto reciprocal = 1.0f / scales_[qidx];
        for (auto &it : *heap) {
          *it.mutable_score() *= reciprocal;
        }
      }
      break;

    case kReformerTpInt8:
    case kReformerTpInt4:
      for (auto &it : *heap) {
        *it.mutable_score() *= reciprocal_;
      }
      break;

    case kReformerTpDefault:
      reformer_->normalize(query, qmeta, *heap);
      break;

    default:
      // Not support
      LOG_ERROR("Not a supported type in QC reformer, type: %u", type_);
      break;
  }
}

void IVFEntity::IVFReformerWrapper::transform(size_t qidx, const float *in,
                                              size_t dim, int8_t *out) {
  ailego_assert_with(qidx < scales_.size(), "invalid index");

  float abs_max = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    auto abs = std::abs(in[i]);
    if (abs > abs_max) {
      abs_max = abs;
    }
  }

  if (abs_max > 0.0f) {
    float scale = 127 / abs_max;
    for (size_t i = 0; i < dim; ++i) {
      out[i] = static_cast<int8_t>(std::round(in[i] * scale));
    }
    scales_[qidx] = scale;
  } else {
    std::fill(out, out + dim, static_cast<int8_t>(1));
    scales_[qidx] = std::numeric_limits<float>::max();
  }
}

void IVFEntity::IVFReformerWrapper::transform(size_t qidx, const float *in,
                                              size_t dim, uint8_t *out) {
  ailego_assert_with(qidx < scales_.size(), "invalid index");
  ailego_assert_with(dim % 2 == 0, "invalid dim");

  float abs_max = 0.0f;
  float max = -std::numeric_limits<float>::max();
  for (size_t i = 0; i < dim; ++i) {
    float abs = std::abs(in[i]);
    abs_max = std::max(abs_max, abs);
    max = std::max(max, in[i]);
  }
  if (abs_max > 0.0f) {
    float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;
    for (size_t i = 0; i < dim; i += 2) {
      auto v1 = static_cast<int8_t>(std::round(in[i] * scale));
      auto v2 = static_cast<int8_t>(std::round(in[i + 1] * scale));
      out[i / 2] =
          (static_cast<uint8_t>(v1) & 0xF) | (static_cast<uint8_t>(v2) << 4);
    }
    scales_[qidx] = scale;
  } else {
    std::fill(out, out + dim / 2, static_cast<uint8_t>(9));
    scales_[qidx] = std::numeric_limits<float>::max();
  }
}

int IVFEntity::load_header(const IndexStorage::Pointer &container) {
  //! Load the Header Segment
  auto header = container->get(IVF_INVERTED_HEADER_SEG_ID);
  if (!header) {
    LOG_ERROR("Failed to get segment %s", IVF_INVERTED_HEADER_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  if (header->data_size() < sizeof(header_)) {
    LOG_ERROR("Invalid format for segment %s",
              IVF_INVERTED_HEADER_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  const void *data = nullptr;
  if (header->read(0, &data, header->data_size()) != header->data_size()) {
    LOG_ERROR("Failed to read data, segment %s",
              IVF_INVERTED_HEADER_SEG_ID.c_str());
    return IndexError_ReadData;
  }
  std::memcpy(&header_, data, sizeof(header_));
  if (header_.header_size < sizeof(header_) + header_.index_meta_size ||
      header_.header_size > header->data_size()) {
    LOG_ERROR("Invalid header size %u", header_.header_size);
    return IndexError_InvalidFormat;
  }

  //! Load the index meta
  if (!meta_.deserialize(
          reinterpret_cast<const uint8_t *>(data) + sizeof(header_),
          header_.index_meta_size)) {
    LOG_ERROR("Failed to deserialize index meta");
    return IndexError_InvalidFormat;
  }

  int ret = reformer_.init(meta_);
  ivf_check_error_code(ret);

  //! Create the distance calculator
  auto metric = IndexFactory::CreateMetric(meta_.metric_name());
  if (!metric) {
    LOG_ERROR("Failed to create metric %s", meta_.metric_name().c_str());
    return IndexError_NoExist;
  }
  ret = metric->init(meta_, meta_.metric_params());
  if (ret != 0) {
    LOG_ERROR("Failed to initialize metric %s", meta_.metric_name().c_str());
    return ret;
  }
  calculator_ = std::make_shared<IVFDistanceCalculator>(
      meta_, metric->query_metric() ? metric->query_metric() : metric,
      header_.block_vector_count);
  if (!calculator_) {
    return IndexError_NoMemory;
  }

  return 0;
}

int IVFEntity::load(const IndexStorage::Pointer &container) {
  int ret = this->load_header(container);
  ivf_check_error_code(ret);

  //! Load the remaining segments
  container_ = container;
  size_t expect_size = header_.inverted_body_size;
  inverted_ = load_segment(IVF_INVERTED_BODY_SEG_ID, expect_size);
  if (!inverted_) {
    LOG_ERROR("Failed to load segment, inverted_size=%zu block_count=%u",
              static_cast<size_t>(header_.inverted_body_size),
              header_.block_count);
    return IndexError_InvalidFormat;
  }

  expect_size = header_.inverted_list_count * sizeof(InvertedListMeta);
  inverted_meta_ = load_segment(IVF_INVERTED_META_SEG_ID, expect_size);
  if (!inverted_meta_) {
    LOG_ERROR("Failed to load segment, inverted_lists=%u",
              header_.inverted_list_count);
    return IndexError_InvalidFormat;
  }

  expect_size = header_.total_vector_count * sizeof(uint64_t);
  keys_ = load_segment(IVF_KEYS_SEG_ID, expect_size);
  if (!keys_) {
    return IndexError_InvalidFormat;
  }

  expect_size = header_.total_vector_count * sizeof(InvertedVecLocation);
  offsets_ = load_segment(IVF_OFFSETS_SEG_ID, expect_size);
  if (!offsets_) {
    return IndexError_InvalidFormat;
  }

  expect_size = header_.total_vector_count * sizeof(uint32_t);
  mapping_ = load_segment(IVF_MAPPING_SEG_ID, expect_size);
  if (!mapping_) {
    return IndexError_InvalidFormat;
  }

  norm_value_sqrt_ =
      meta_.metric_name() == "Euclidean" || meta_.metric_name() == "Manhattan";
  if (container_->get(IVF_INT8_QUANTIZED_PARAMS_SEG_ID) ||
      container->get(IVF_INT4_QUANTIZED_PARAMS_SEG_ID)) {
    expect_size =
        header_.inverted_list_count * sizeof(InvertedIntegerQuantizerParams);
    auto &seg_id = meta_.reformer_name() == kInt8ReformerName
                       ? IVF_INT8_QUANTIZED_PARAMS_SEG_ID
                       : IVF_INT4_QUANTIZED_PARAMS_SEG_ID;
    integer_quantizer_params_ = load_segment(seg_id, expect_size);
    if (!integer_quantizer_params_) {
      return IndexError_InvalidFormat;
    }
    norm_value_ = 0.0f;
  } else if (meta_.reformer_name() == kInt8ReformerName ||
             meta_.reformer_name() == kInt4ReformerName) {
    auto &scale_key = meta_.reformer_name() == kInt8ReformerName
                          ? INT8_QUANTIZER_REFORMER_SCALE
                          : INT4_QUANTIZER_REFORMER_SCALE;
    auto scale = meta_.reformer_params().get_as_float(scale_key);
    norm_value_ = this->convert_to_normalize_value(scale);
  } else {
    norm_value_ = 1.0f;
  }

  if (container_->get(IVF_FEATURES_SEG_ID)) {
    features_ = load_segment(IVF_FEATURES_SEG_ID, 0);
    if (!features_) {
      return IndexError_InvalidFormat;
    }
    if (features_->data_size() % vector_count() != 0) {
      LOG_ERROR("Invalid featureSegment size=%zu, totalVecs=%zu",
                features_->data_size(), vector_count());
      return IndexError_InvalidFormat;
    }
  }

  LOG_DEBUG(
      "Load inverted index done, docs=%u invertedListCnt=%u "
      "elementSize=%u metric=%s reformer=%s",
      header_.total_vector_count, header_.inverted_list_count,
      meta_.element_size(), meta_.metric_name().c_str(),
      meta_.reformer_name().c_str());
  return 0;
}

int IVFEntity::search(size_t inverted_list_id, const void *query,
                      const IndexFilter &filter, uint32_t *scan_count,
                      IndexDocumentHeap *heap,
                      IndexContext::Stats *context_stats) const {
  ailego_assert_with(inverted_list_id < header_.inverted_list_count,
                     "invalid id");
  auto list_meta = this->inverted_list_meta(inverted_list_id);
  ivf_assert(list_meta, IndexError_ReadData);

  const void *data = nullptr;
  const size_t block_vecs = header_.block_vector_count;
  std::vector<float> distances(block_vecs);
  const size_t batch_size = kBatchBlocks;
  const size_t block_size = header_.block_size;
  const auto norm_val = this->inverted_list_normalize_value(inverted_list_id);
  for (size_t i = 0; i < list_meta->block_count; i += batch_size) {
    //! Read vecs
    const size_t off = list_meta->offset + i * block_size;
    const size_t blocks = std::min(batch_size, list_meta->block_count - i);
    const size_t size =
        std::min(blocks * block_size,
                 static_cast<size_t>(header_.inverted_body_size - off));
    if (inverted_->read(off, &data, size) != size) {
      LOG_ERROR("Failed to read block, off=%zu, size=%zu", off, size);
      return IndexError_ReadData;
    }

    //! Read keys
    size_t items = std::min(blocks * block_vecs,
                            list_meta->vector_count - (i * block_vecs));
    auto keys = get_keys(list_meta->id_offset + i * block_vecs, items);
    if (!keys) {
      return IndexError_ReadData;
    }

    //! Compute distances for each block
    for (size_t b = 0; b < blocks; ++b) {
      const size_t vecs_count =
          std::min(block_vecs, list_meta->vector_count - (i + b) * block_vecs);
      auto block_keys = keys + b * block_vecs;
      size_t keeps = 0;
      ailego_assert_with(block_vecs < sizeof(keeps) * 8, "bits overflow");
      for (size_t k = 0; k < vecs_count; ++k) {
        if (!filter(block_keys[k])) {
          keeps |= (1 << k);
        } else {
          ++(*context_stats->mutable_filtered_count());
        }
      }
      if (keeps == 0) {
        continue;
      }

      const void *block_data = static_cast<const char *>(data) + b * block_size;
      calculator_->query_features_distance(query, block_data, vecs_count,
                                           distances.data());

      *(context_stats->mutable_dist_calced_count()) += vecs_count;

      uint32_t id_off = list_meta->id_offset + (i + b) * block_vecs;
      for (size_t k = 0; k < vecs_count; ++k) {
        if (keeps & (1 << k)) {
          if (block_keys[k] != kInvalidKey) {
            heap->emplace(block_keys[k], distances[k] * norm_val, id_off + k);
          }
        }
      }
    }
  }

  *scan_count = list_meta->vector_count;
  return 0;
}

//! search in inverted list without filter
int IVFEntity::search(size_t inverted_list_id, const void *query,
                      uint32_t *scan_count, IndexDocumentHeap *heap,
                      IndexContext::Stats *context_stats) const {
  ailego_assert_with(inverted_list_id < header_.inverted_list_count,
                     "invalid id");
  auto list_meta = inverted_list_meta(inverted_list_id);
  ivf_assert(list_meta, IndexError_ReadData);

  const void *data = nullptr;
  const size_t block_vecs = header_.block_vector_count;
  std::vector<float> distances(block_vecs);
  const size_t batch_size = kBatchBlocks;
  const size_t block_size = header_.block_size;
  const auto norm_val = this->inverted_list_normalize_value(inverted_list_id);
  for (size_t i = 0; i < list_meta->block_count; i += batch_size) {
    //! Read vecs
    const size_t off = list_meta->offset + i * block_size;
    const size_t blocks = std::min(batch_size, list_meta->block_count - i);
    const size_t size =
        std::min(blocks * block_size,
                 static_cast<size_t>(header_.inverted_body_size - off));
    if (inverted_->read(off, &data, size) != size) {
      LOG_ERROR("Failed to read block, off=%zu, size=%zu", off, size);
      return IndexError_ReadData;
    }

    //! Read keys
    size_t items = std::min(blocks * block_vecs,
                            list_meta->vector_count - (i * block_vecs));
    auto keys = get_keys(list_meta->id_offset + i * block_vecs, items);
    if (!keys) {
      return IndexError_ReadData;
    }

    //! Compute distances for each block
    for (size_t b = 0; b < blocks; ++b) {
      const size_t vecs_count =
          std::min(block_vecs, list_meta->vector_count - (i + b) * block_vecs);
      auto block_keys = keys + b * block_vecs;
      const void *block_data = static_cast<const char *>(data) + b * block_size;
      calculator_->query_features_distance(query, block_data, vecs_count,
                                           distances.data());
      for (size_t k = 0; k < vecs_count; ++k) {
        if (block_keys[k] != kInvalidKey) {
          uint32_t id = list_meta->id_offset + (i + b) * block_vecs + k;
          heap->emplace(block_keys[k], distances[k] * norm_val, id);
        }
      }
      *(context_stats->mutable_dist_calced_count()) += vecs_count;
    }
  }

  *scan_count = list_meta->vector_count;
  return 0;
}

//! search all inverted list with filter
int IVFEntity::search(const void *query, const IndexFilter &filter,
                      IndexDocumentHeap *heap,
                      IndexContext::Stats *context_stats) const {
  for (size_t i = 0; i < header_.inverted_list_count; ++i) {
    uint32_t scan_count;
    int ret = this->search(i, query, filter, &scan_count, heap, context_stats);
    if (ret != 0) {
      return ret;
    }
  }

  return 0;
}

//! search all inverted list without filter
int IVFEntity::search(const void *query, IndexDocumentHeap *heap,
                      IndexContext::Stats *context_stats) const {
  for (size_t i = 0; i < header_.inverted_list_count; ++i) {
    uint32_t scan_count;
    int ret = this->search(i, query, &scan_count, heap, context_stats);
    if (ret != 0) {
      return ret;
    }
  }

  return 0;
}

const void *IVFEntity::get_vector(size_t id) const {
  if (features_) {
    const void *data = nullptr;
    size_t element_size = features_->data_size() / vector_count();
    size_t off = id * element_size;
    if (features_->read(off, &data, element_size) != element_size) {
      LOG_ERROR("Failed to read segment, off=%zu size=%zu", off, element_size);
      return nullptr;
    }
    return data;
  }

  const void *data = nullptr;
  size_t size = sizeof(InvertedVecLocation);
  if (offsets_->read(id * size, &data, size) != size) {
    LOG_ERROR("Failed to read offsets segment, id=%zu", id);
    return nullptr;
  }
  auto &loc = *reinterpret_cast<const InvertedVecLocation *>(data);
  if (loc.column_major) {
    vector_.resize(meta_.element_size());
    auto unit_size = IndexMeta::AlignSizeof(meta_.data_type());
    size_t cols = meta_.element_size() / unit_size;
    size_t step = block_vector_count() * unit_size;
    size_t rd_size = step * (cols - 1) + unit_size;
    if (inverted_->read(loc.offset, &data, rd_size) != rd_size) {
      LOG_ERROR("Failed to read data, off=%zu size=%zu",
                static_cast<size_t>(loc.offset), rd_size);
      return nullptr;
    }
    for (size_t c = 0; c < cols; ++c) {
      vector_.replace(c * unit_size, unit_size,
                      reinterpret_cast<const char *>(data) + c * step,
                      unit_size);
    }
    return vector_.data();
  } else {
    if (inverted_->read(loc.offset, &data, meta_.element_size()) !=
        meta_.element_size()) {
      LOG_ERROR("Failed to read data, off=%zu size=%u",
                static_cast<size_t>(loc.offset), meta_.element_size());
      return nullptr;
    }
    return data;
  }
}

int IVFEntity::get_vector(size_t id, IndexStorage::MemoryBlock &block) const {
  if (features_) {
    size_t element_size = features_->data_size() / vector_count();
    size_t off = id * element_size;
    if (features_->read(off, block, element_size) != element_size) {
      LOG_ERROR("Failed to read segment, off=%zu size=%zu", off, element_size);
      return IndexError_Runtime;
    }
    return 0;
  }


  IndexStorage::MemoryBlock data_block;
  size_t size = sizeof(InvertedVecLocation);
  if (offsets_->read(id * size, data_block, size) != size) {
    LOG_ERROR("Failed to read offsets segment, id=%zu", id);
    return IndexError_Runtime;
  }
  const void *data = data_block.data();
  auto &loc = *reinterpret_cast<const InvertedVecLocation *>(data);
  if (loc.column_major) {
    vector_.resize(meta_.element_size());
    auto unit_size = IndexMeta::AlignSizeof(meta_.data_type());
    size_t cols = meta_.element_size() / unit_size;
    size_t step = block_vector_count() * unit_size;
    size_t rd_size = step * (cols - 1) + unit_size;
    if (inverted_->read(loc.offset, &data, rd_size) != rd_size) {
      LOG_ERROR("Failed to read data, off=%zu size=%zu",
                static_cast<size_t>(loc.offset), rd_size);
      return IndexError_Runtime;
    }
    for (size_t c = 0; c < cols; ++c) {
      vector_.replace(c * unit_size, unit_size,
                      reinterpret_cast<const char *>(data) + c * step,
                      unit_size);
    }
    block.reset(vector_.data());
    return 0;
  } else {
    if (inverted_->read(loc.offset, block, meta_.element_size()) !=
        meta_.element_size()) {
      LOG_ERROR("Failed to read data, off=%zu size=%u",
                static_cast<size_t>(loc.offset), meta_.element_size());
      return IndexError_Runtime;
    }
    return 0;
  }
}

uint32_t IVFEntity::key_to_id(uint64_t key) const {
  //! Do binary search
  uint32_t start = 0UL;
  uint32_t end = vector_count();
  const void *data = nullptr;
  uint32_t idx = 0u;
  while (start < end) {
    idx = start + (end - start) / 2;
    if (ailego_unlikely(mapping_->read(idx * sizeof(uint32_t), &data,
                                       sizeof(uint32_t)) != sizeof(uint32_t))) {
      LOG_ERROR("Failed to read mapping segment, idx=%u", idx);
      return std::numeric_limits<uint32_t>::max();
    }
    const uint64_t *mkey;
    uint32_t local_id = *reinterpret_cast<const uint32_t *>(data);
    if (ailego_unlikely(keys_->read(local_id * sizeof(uint64_t),
                                    (const void **)(&mkey),
                                    sizeof(uint64_t)) != sizeof(uint64_t))) {
      LOG_ERROR("Read key from segment failed");
      return std::numeric_limits<uint32_t>::max();
    }
    if (*mkey < key) {
      start = idx + 1;
    } else if (*mkey > key) {
      end = idx;
    } else {
      return local_id;
    }
  }
  return std::numeric_limits<uint32_t>::max();
}

const void *IVFEntity::get_vector_by_key(uint64_t key) const {
  uint32_t id = this->key_to_id(key);
  if (id != std::numeric_limits<uint32_t>::max()) {
    return get_vector(id);
  } else {
    return nullptr;
  }
}

int IVFEntity::get_vector_by_key(uint64_t key,
                                 IndexStorage::MemoryBlock &block) const {
  uint32_t id = this->key_to_id(key);
  if (id != std::numeric_limits<uint32_t>::max()) {
    return get_vector(id, block);
  } else {
    return IndexError_Runtime;
  }
}

IVFEntity::Pointer IVFEntity::clone(void) const {
  auto entity = std::make_shared<IVFEntity>();
  return clone(entity);
}

IVFEntity::Pointer IVFEntity::clone(const IVFEntity::Pointer &entity) const {
  if (!entity) {
    LOG_ERROR("Failed to alloc IVFEntity");
    return nullptr;
  }

  auto inverted = inverted_->clone();
  ivf_assert_with_msg(inverted, nullptr, "Failed to clone inverted segment");

  auto inverted_meta = inverted_meta_->clone();
  ivf_assert_with_msg(inverted_meta, nullptr,
                      "Failed to clone inverted meta segment");

  auto keys = keys_->clone();
  ivf_assert_with_msg(keys, nullptr, "Failed to clone keys segment");

  auto offsets = offsets_->clone();
  ivf_assert_with_msg(offsets, nullptr, "Failed to clone offsets segment");

  auto mapping = mapping_->clone();
  ivf_assert_with_msg(mapping, nullptr, "Failed to clone mapping segment");

  IndexStorage::Segment::Pointer integer_quantizer_params;
  if (integer_quantizer_params_) {
    integer_quantizer_params = integer_quantizer_params_->clone();
    if (!integer_quantizer_params) {
      LOG_ERROR("Failed to clone integer quantizer params segment");
      return nullptr;
    }
  }
  IndexStorage::Segment::Pointer features;
  if (features_) {
    features = features_->clone();
    if (!features) {
      LOG_ERROR("Failed to clone features segment");
      return nullptr;
    }
  }

  entity->meta_ = this->meta_;
  entity->reformer_ = this->reformer_;
  entity->calculator_ = this->calculator_;
  entity->header_ = this->header_;
  entity->container_ = this->container_;

  entity->inverted_ = inverted;
  entity->inverted_meta_ = inverted_meta;
  entity->keys_ = keys;
  entity->offsets_ = offsets;
  entity->mapping_ = mapping;
  entity->integer_quantizer_params_ = integer_quantizer_params;
  entity->features_ = features;
  entity->norm_value_ = this->norm_value_;
  entity->norm_value_sqrt_ = this->norm_value_sqrt_;

  return entity;
}

IndexStorage::Segment::Pointer IVFEntity::load_segment(
    const std::string &seg_id, size_t expect_size) const {
  auto segment = container_->get(seg_id);
  if (!segment) {
    LOG_ERROR("Failed to get segment %s", seg_id.c_str());
    return nullptr;
  }
  if (expect_size && segment->data_size() != expect_size) {
    LOG_ERROR("Invalid segment %s size=%zu, total_vecs=%u", seg_id.c_str(),
              segment->data_size(), header_.total_vector_count);
    return nullptr;
  }
  return segment;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_entity.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_framework.h>
#include "metric/metric_params.h"
#include "ivf_distance_calculator.h"
#include "ivf_index_format.h"
#include "ivf_params.h"

namespace zvec {
namespace core {

/*! IVF Entity
 */
class IVFEntity {
 public:
  typedef std::shared_ptr<IVFEntity> Pointer;

  class IVFReformerWrapper;

  //! Constructor
  IVFEntity() {}

  //! Destructor
  virtual ~IVFEntity() {}

  //! Disable them
  IVFEntity(const IVFEntity &) = delete;
  IVFEntity &operator=(const IVFEntity &) = delete;

  //! load the index from container
  virtual int load(const IndexStorage::Pointer &container);

  //! search in inverted list with filter
  int search(size_t inverted_list_id, const void *query,
             const IndexFilter &filter, uint32_t *scan_count,
             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;

  //! search in inverted list without filter
  int search(size_t inverted_list_id, const void *query, uint32_t *scan_count,
             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;

  //! search all inverted list with filter
  int search(const void *query, const IndexFilter &filter,
             IndexDocumentHeap *heap, IndexContext::Stats *context_stats) const;

  //! search all inverted list without filter
  int search(const void *query, IndexDocumentHeap *heap,
             IndexContext::Stats *context_stats) const;

  //! Clone the entity
  virtual IVFEntity::Pointer clone(void) const;

  //! Clone the entity
  IVFEntity::Pointer clone(const IVFEntity::Pointer &entity) const;

  //! Retrieve the primary keys by local id in heap
  int retrieve_keys(IndexDocumentHeap *heap) const {
    for (auto &it : (*heap)) {
      uint64_t key = this->get_key(it.index());
      if (key == kInvalidKey) {
        return IndexError_ReadData;
      }
      it.set_key(key);
    }

    return 0;
  }

  //! Retrieve the total vectors in the index
  size_t vector_count(void) const {
    return header_.total_vector_count;
  }

  //! Retrieve the inverted list count
  size_t inverted_list_count(void) const {
    return header_.inverted_list_count;
  }

  //! Retrieve block size of the inverted vector
  size_t inverted_block_size(void) const {
    return header_.block_size;
  }

  //! Retrieve the vectors count in one block
  size_t block_vector_count(void) const {
    return header_.block_vector_count;
  }

  //! Retrieve IndexMeta of the inverted index
  const IndexMeta &meta(void) const {
    return meta_;
  }

  //! Retrieve a block of vectors
  const void *read_block(size_t inverted_list_id, size_t local_block_id,
                         size_t *vecs_count) const {
    auto iv_meta = this->inverted_list_meta(inverted_list_id);
    if (!iv_meta || local_block_id >= iv_meta->block_count) {
      LOG_ERROR("Failed to read inverted list, listId=%zu blockIdx=%zu",
                inverted_list_id, local_block_id);
      return nullptr;
    }

    size_t block_vecs = header_.block_vector_count;
    *vecs_count = std::min(block_vecs,
                           iv_meta->vector_count - local_block_id * block_vecs);
    ailego_assert_with(*vecs_count <= header_.block_vector_count,
                       "invalid vecs");
    const size_t off = iv_meta->offset + local_block_id * header_.block_size;
    const size_t size = *vecs_count * meta_.element_size();
    const void *data = nullptr;
    if (inverted_->read(off, &data, size) != size) {
      LOG_ERROR("Failed to read block off=%zu size=%zu", off, size);
      return nullptr;
    }

    return data;
  }

  //! Retrieve the inverted list meta
  const InvertedListMeta *inverted_list_meta(size_t inverted_list_id) const {
    const void *data = nullptr;
    const size_t size = sizeof(InvertedListMeta);
    const size_t offset = inverted_list_id * size;
    if (inverted_meta_->read(offset, &data, size) != size) {
      LOG_ERROR("Failed to read inverted meta, id=%zu, size=%zu",
                inverted_list_id, size);
      return nullptr;
    }

    return static_cast<const InvertedListMeta *>(data);
  }

  //! Retrieve the keys by consecutive local ids
  const uint64_t *get_keys(size_t id, size_t count) const {
    const void *data = nullptr;
    const size_t offset = id * sizeof(uint64_t);
    const size_t size = count * sizeof(uint64_t);
    if (keys_->read(offset, &data, size) != size) {
      LOG_ERROR("Failed to read keys, id=%zu, size=%zu", id, size);
      return nullptr;
    }

    return static_cast<const uint64_t *>(data);
  }

  //! Retrieve the key by local id
  uint64_t get_key(size_t id) const {
    const void *data = nullptr;
    const size_t offset = id * sizeof(uint64_t);
    const size_t size = sizeof(uint64_t);
    if (keys_->read(offset, &data, size) != size) {
      LOG_ERROR("Failed to read key, id=%zu", id);
      return kInvalidKey;
    }

    return *static_cast<const uint64_t *>(data);
  }

  //! Retrieve vector by local id
  const void *get_vector(size_t id) const;

  //! Retrieve vector by local id
  const void *get_vector_by_key(uint64_t key) const;

  int get_vector(size_t id, IndexStorage::MemoryBlock &block) const;

  int get_vector_by_key(uint64_t key, IndexStorage::MemoryBlock &block) const;

  uint32_t key_to_id(uint64_t key) const;

  //! Transform a query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                const void **out, IndexQueryMeta *ometa) const {
    return reformer_.transform(query, qmeta, out, ometa);
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                const void **out, IndexQueryMeta *ometa) const {
    return reformer_.transform(query, qmeta, count, out, ometa);
  }

  //! Normalize the score in query part
  void normalize(size_t qidx, IndexDocumentHeap *heap) const {
    return reformer_.normalize(qidx, heap);
  }

  //! Retrieve the value for each inverted list to multiply for normalizing
  float inverted_list_normalize_value(size_t inverted_list_id) const {
    if (norm_value_ != 0.0f) {
      return norm_value_;
    }

    // ailego_assert_with(integer_quantizer_params_, "nullptr");
    if (integer_quantizer_params_ != nullptr) {
      const void *data = nullptr;
      size_t size = sizeof(InvertedIntegerQuantizerParams);
      size_t off = inverted_list_id * size;
      if (integer_quantizer_params_->read(off, &data, size) != size) {
        LOG_ERROR("Failed to read data from segment, off=%zu", off);
        return 1.0f;
      }
      auto scale =
          static_cast<const InvertedIntegerQuantizerParams *>(data)->scale;
      return this->convert_to_normalize_value(scale);
    }

    return norm_value_;
  }

  //! Check whether the feature segment exist
  bool has_orignal_feature() const {
    return !!features_;
  }

  //! Retrieve reformer
  const IVFReformerWrapper &reformer(void) const {
    return reformer_;
  }

  /*! Index Reformer Wrapper
   *  To transform query in inverted index searching, and normalize the score
   */
  class IVFReformerWrapper {
   public:
    //! Constructor
    IVFReformerWrapper() {}

    //! Assignment
    IVFReformerWrapper &operator=(const IVFReformerWrapper &wrapper) {
      reformer_ = wrapper.reformer_;
      type_ = wrapper.type_;
      buffer_.clear();
      buffer_.shrink_to_fit();
      reciprocal_ = wrapper.reciprocal_;
      return *this;
    }

    //! Initialize
    int init(const IndexMeta &imeta);

    //! Update
    int update(const IndexMeta &meta);

    //! Transform a query
    int transform(const void *query, const IndexQueryMeta &qmeta,
                  const void **out, IndexQueryMeta *ometa);

    //! Transform queries
    int transform(const void *query, const IndexQueryMeta &qmeta,
                  uint32_t count, const void **out, IndexQueryMeta *ometa);

    //! Convert a record
    virtual int convert(const void *record, const IndexQueryMeta &rmeta,
                        const void **out, IndexQueryMeta *ometa);

    //! Convert records
    virtual int convert(const void *records, const IndexQueryMeta &rmeta,
                        uint32_t count, const void **out,
                        IndexQueryMeta *ometa);

    //! Transform queries
    int transform_gpu(const void *query, const IndexQueryMeta &qmeta,
                      uint32_t count, const void **out, IndexQueryMeta *ometa);

    //! Normalize the score in query part
    void normalize(size_t qidx, IndexDocumentHeap *heap) const;

    //! Normalize the score in query part
    void normalize(size_t qidx, const void *query, const IndexQueryMeta &qmeta,
                   IndexDocumentHeap *heap) const;

   private:
    //! Transform query from fp32 to int8
    void transform(size_t qidx, const float *in, size_t dim, int8_t *out);

    //! Transform query from fp32 to int4
    void transform(size_t qidx, const float *in, size_t dim, uint8_t *out);

   private:
    //! Constants
    enum Type {
      kReformerTpNone = 0,
      kReformerTpInnerProductInt8 = 1,
      kReformerTpInnerProductInt4 = 2,
      kReformerTpInt8 = 3,
      kReformerTpInt4 = 4,
      kReformerTpDefault = 7,
    };

    //! Members
    Type type_{kReformerTpNone};
    IndexReformer::Pointer reformer_{};
    std::string buffer_{};
    float reciprocal_{0.0};        // for int8
    std::vector<float> scales_{};  // for int8 IP
  };

 private:
  //! Load the segment by seg_id in expect_size segment size
  IndexStorage::Segment::Pointer load_segment(const std::string &seg_id,
                                              size_t expect_size) const;

  //! Load the header segment
  int load_header(const IndexStorage::Pointer &container);

  //! Convert the int8 quantizer scale to normalize value
  float convert_to_normalize_value(float scale) const {
    auto v = scale == 0.0 ? 1.0 : (1.0 / scale);
    return !norm_value_sqrt_ ? v : std::sqrt(v);
  }

 protected:
  //! Constants
  static constexpr size_t kBatchBlocks = 10u;

  //! Members
  IndexMeta meta_{};
  mutable IVFReformerWrapper reformer_{};
  IVFDistanceCalculator::Pointer calculator_{};
  InvertedIndexHeader header_{};
  IndexStorage::Pointer container_{};
  IndexStorage::Segment::Pointer inverted_{};
  IndexStorage::Segment::Pointer inverted_meta_{};
  IndexStorage::Segment::Pointer keys_{};
  IndexStorage::Segment::Pointer offsets_{};
  IndexStorage::Segment::Pointer mapping_{};
  IndexStorage::Segment::Pointer features_{};
  IndexStorage::Segment::Pointer integer_quantizer_params_{};
  mutable std::string vector_{};  // temporary buffer for colomn major order
  float norm_value_{0.0f};  // normalize the inverted vector to orignal score
  bool norm_value_sqrt_{false};  // does the norm value need to sqrt
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_index_format.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <ailego/container/bitmap.h>
#include <zvec/core/framework/index_framework.h>

namespace zvec {
namespace core {

static constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();

/*! Index Format of Inverted Index Header
 */
struct InvertedIndexHeader {
  uint32_t header_size{0};
  uint32_t total_vector_count{0};
  uint64_t inverted_body_size{0};
  uint32_t inverted_list_count{0};
  uint32_t block_vector_count{0};
  uint32_t block_size{0};
  uint32_t block_count{0};
  uint32_t index_meta_size{0};
  char reserved_[28];
  char index_meta[0];
};

/*! Index Format of Inverted Index Meta for each Inverted list
 */
struct InvertedListMeta {
  uint64_t offset{0};
  uint32_t block_count{0};
  uint32_t vector_count{0};
  uint32_t id_offset{0};
  char reserved_[16];
};

/*! Index Format of Location in Inverted Index for each vector
 */
struct InvertedVecLocation {
  InvertedVecLocation(size_t off, bool col)
      : offset(off), column_major(col), reserved(0u) {}

  uint64_t offset : 48;       // feature offset in posting block segment
  uint64_t column_major : 1;  // coloum major if true
  uint64_t reserved : 15;
};

/*! Index Format of Integer Quantizer params for each inverted list
 */
struct InvertedIntegerQuantizerParams {
  float scale{1.0};
  float bias{0.0};
};

/*! Location of Vectors Block in Storage Segment
 */
struct BlockLocation {
  uint16_t segment_id;
  uint16_t block_index;
};

/*! The Header of a Block in Storage Segment
 */
struct BlockHeader {
  BlockLocation next;
  uint16_t vector_count;
  uint16_t column_major : 1;
  uint16_t reserved_ : 15;
};

struct DeletionMap {
  void set(uint32_t index) {
    bitset.set(index);
  }

  void reset(uint32_t index) {
    bitset.reset(index);
  }

  bool test(uint32_t index) const {
    return bitset.test(index);
  }

  bool is_dirty() const {
    return bitset.test_any();
  }

  ailego::FixedBitset<32> bitset{};
};

static_assert(sizeof(DeletionMap) == 4, "DeletionMap must be 4 bytes");

/*! Meta Information of Streamer Entity
 */
struct StreamerInvertedMeta {
  uint64_t create_time{0};
  uint64_t update_time{0};
  uint64_t revision_id{0};
  uint32_t segment_count{0};
  uint32_t segment_size{0};
  uint8_t reserved_[32];
  InvertedIndexHeader header;
};

/*! Location of Vector in Storage Segment
 */
struct VectorLocation {
  //! Constructor
  VectorLocation(void) {}

  //! Constructor
  VectorLocation(uint16_t id, bool col, uint32_t off)
      : segment_id(id), column_major(col), offset(off) {}

  uint16_t segment_id;
  uint16_t column_major : 1;
  uint16_t reserved_ : 15;
  uint32_t offset;

 public:
  bool operator==(const VectorLocation &other) const {
    return segment_id == other.segment_id &&
           column_major == other.column_major && offset == other.offset;
  }
};

static_assert(sizeof(VectorLocation) == sizeof(uint64_t),
              "VectorLocation must be size of 8 bytes");

struct KeyInfo {
  KeyInfo(void) {}
  KeyInfo(uint32_t idx, const VectorLocation &loc)
      : centroid_idx(idx), location(loc) {}
  KeyInfo(VectorLocation loc) : location(loc) {}
  uint32_t centroid_idx;
  VectorLocation location;
};

// Segments ID
const std::string IVF_CENTROID_SEG_ID("ivf.centroid");
const std::string IVF_INVERTED_BODY_SEG_ID("ivf.inverted_body");
const std::string IVF_INVERTED_HEADER_SEG_ID("ivf.inverted_header");
const std::string IVF_INVERTED_META_SEG_ID("ivf.inverted_meta");
const std::string IVF_KEYS_SEG_ID("hc.keys");
const std::string IVF_OFFSETS_SEG_ID("ivf.offsets");
const std::string IVF_MAPPING_SEG_ID("ivf.mapping");
const std::string IVF_FEATURES_SEG_ID("ivf.features");
const std::string IVF_INT8_QUANTIZED_PARAMS_SEG_ID("ivf.int8_quantized_params");
const std::string IVF_INT4_QUANTIZED_PARAMS_SEG_ID("ivf.int4_quantized_params");

const std::string IVF_INVERTED_LIST_HEAD_SEG_ID("ivf.inverted_list_head");
const std::string IVF_STORAGE_SEGMENT_ID("ivf.S");

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_index_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_searcher.h>
#include "ivf_entity.h"

namespace zvec {
namespace core {

/*! IVF IndexProvider
 */
class IVFIndexProvider : public IndexProvider {
 public:
  IVFIndexProvider(const IndexMeta &meta, const IVFEntity::Pointer &entity,
                   const std::string &owner)
      : meta_(meta), entity_(entity), owner_class_(owner) {}

  IVFIndexProvider(const IVFIndexProvider &) = delete;
  IVFIndexProvider &operator=(const IVFIndexProvider &) = delete;

 public:
  //! Create a new iterator
  virtual Iterator::Pointer create_iterator(void) override {
    return Iterator::Pointer(new (std::nothrow) Iterator(entity_));
  }

  //! Retrieve count of vectors
  virtual size_t count(void) const override {
    return entity_->vector_count();
  }

  //! Retrieve dimension of vector
  virtual size_t dimension(void) const override {
    return meta_.dimension();
  }

  //! Retrieve type of vector
  virtual IndexMeta::DataType data_type(void) const override {
    return meta_.data_type();
  }

  //! Retrieve vector size in bytes
  virtual size_t element_size(void) const override {
    return meta_.element_size();
  }

  //! Retrieve a vector using a primary key
  virtual const void *get_vector(uint64_t key) const override {
    return entity_->get_vector_by_key(key);
  }

  //! Retrieve the owner class
  virtual const std::string &owner_class(void) const override {
    return owner_class_;
  }

 private:
  class Iterator : public IndexProvider::Iterator {
   public:
    Iterator(const IVFEntity::Pointer &entity) : entity_(entity) {}

    //! Retrieve pointer of data
    //! NOTICE: the vec feature will be changed after iterating to next, so
    //! the caller need to keep a copy of it before iterator to next vector
    virtual const void *data(void) const override {
      return entity_->get_vector(index_);
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return index_ < entity_->vector_count();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return entity_->get_key(index_);
    }

    //! Next iterator
    virtual void next(void) override {
      ++index_;
    }

   private:
    //! Members
    IVFEntity::Pointer entity_;
    size_t index_{0};
  };

 private:
  //! Members
  const IndexMeta &meta_;
  IVFEntity::Pointer entity_;
  std::string owner_class_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

static const std::string SEPARATOR("/");
static const std::string CENTROID_SEPERATOR = "*";

// builder params
static const std::string PARAM_IVF_BUILDER_CENTROID_COUNT(
    "proxima.ivf.builder.centroid_count");
static const std::string PARAM_IVF_BUILDER_CLUSTER_CLASS(
    "proxima.ivf.builder.cluster_class");
static const std::string PARAM_IVF_BUILDER_THREAD_COUNT(
    "proxima.ivf.builder.thread_count");
static const std::string PARAM_IVF_BUILDER_CLUSTER_AUTO_TUNING(
    "proxima.ivf.builder.cluster_auto_tuning");
static const std::string PARAM_IVF_BUILDER_TRAIN_SAMPLE_COUNT(
    "proxima.ivf.builder.train_sample_count");
static const std::string PARAM_IVF_BUILDER_TRAIN_SAMPLE_RATIO(
    "proxima.ivf.builder.train_sample_ratio");
static const std::string PARAM_IVF_BUILDER_CONVERTER_PARAMS(
    "proxima.ivf.builder.converter_params");
static const std::string PARAM_IVF_BUILDER_CONVERTER_CLASS(
    "proxima.ivf.builder.converter_class");
static const std::string PARAM_IVF_BUILDER_STORE_ORIGINAL_FEATURES(
    "proxima.ivf.builder.store_original_features");
static const std::string PARAM_IVF_BUILDER_QUANTIZER_CLASS(
    "proxima.ivf.builder.quantizer_class");
static const std::string PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID(
    "proxima.ivf.builder.quantize_by_centroid");
static const std::string PARAM_IVF_BUILDER_QUANTIZER_PARAMS(
    "proxima.ivf.builder.quantizer_params");
static const std::string PARAM_IVF_BUILDER_CLUSTER_PARAMS_IN_LEVEL_PREFIX(
    "proxima.ivf.builder.cluster_params_in_level_");
static const std::string PARAM_IVF_BUILDER_OPTIMIZER_CLASS(
    "proxima.ivf.builder.optimizer_class");
static const std::string PARAM_IVF_BUILDER_OPTIMIZER_PARAMS(
    "proxima.ivf.builder.optimizer_params");
static const std::string PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_CLASS(
    "proxima.ivf.builder.optimizer_quantizer_class");
static const std::string PARAM_IVF_BUILDER_OPTIMIZER_QUANTIZER_PARAMS(
    "proxima.ivf.builder.optimizer_quantizer_params");
static const std::string PARAM_IVF_BUILDER_BLOCK_VECTOR_COUNT(
    "proxima.ivf.builder.block_vector_count");

// searcher params
static const std::string PARAM_IVF_SEARCHER_SCAN_RATIO(
    "proxima.ivf.searcher.scan_ratio");
static const std::string PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD(
    "proxima.ivf.searcher.brute_force_threshold");
static const std::string PARAM_IVF_SEARCHER_OPTIMIZER(
    "proxima.ivf.searcher.optimizer");
static const std::string PARAM_IVF_SEARCHER_OPTIMIZER_PARAMS(
    "proxima.ivf.searcher.optimizer_params");
static const std::string PARAM_IVF_SEARCHER_CONVERTER_REFORMER(
    "proxima.ivf.searcher.converter_reformer");

// Constants
static constexpr char const *kIPMetricName = "InnerProduct";
static constexpr char const *kMipsMetricName = "MipsSquaredEuclidean";
static constexpr char const *kL2MetricName = "SquaredEuclidean";
static constexpr char const *kMipsConverterName = "MipsConverter";
static constexpr char const *kMipsRevConverterName = "MipsReverseConverter";
static constexpr char const *kMipsReformerName = "MipsReformer";
static constexpr char const *kInt8QuantizerName = "Int8QuantizerConverter";
static constexpr char const *kInt4QuantizerName = "Int4QuantizerConverter";
static constexpr char const *kInt8ReformerName = "Int8QuantizerReformer";
static constexpr char const *kInt4ReformerName = "Int4QuantizerReformer";
static constexpr float kNormalizeScaleFactor = 16.0f;

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_searcher.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_searcher.h"
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_segment_storage.h>
#include "ivf_centroid_index.h"
#include "ivf_index_provider.h"
#include "ivf_params.h"

namespace zvec {
namespace core {

int IVFSearcher::init(const ailego::Params &parameters) {
  params_ = parameters;

  params_.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);

  searcher_state_ = STATE_INITED;

  return 0;
}

int IVFSearcher::cleanup(void) {
  this->unload();

  params_.clear();
  bruteforce_threshold_ = kDefaultBfThreshold;

  searcher_state_ = STATE_INIT;
  return 0;
}

int IVFSearcher::load(IndexStorage::Pointer container,
                      IndexMetric::Pointer /*metric*/) {
  if (!container) {
    LOG_ERROR("Invalid container");
    return IndexError_InvalidArgument;
  }
  if (searcher_state_ != STATE_INITED) {
    LOG_ERROR("Initalize the searcher first before load index");
    return IndexError_Runtime;
  }

  ailego::ElapsedTime timer;
  int ret = IndexHelper::DeserializeFromStorage(container.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from container");
    return ret;
  }

  //! Load centroid index
  centroid_index_ = std::make_shared<IVFCentroidIndex>();
  if (!centroid_index_) {
    return IndexError_NoMemory;
  }
  auto seg = container->get(IVF_CENTROID_SEG_ID, 0);
  if (!seg) {
    LOG_ERROR("Failed to get segment %s", IVF_CENTROID_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  IndexStorage::Pointer seg_container =
      std::make_shared<IndexSegmentStorage>(seg);
  if (!seg_container) {
    return IndexError_NoMemory;
  }
  ret = seg_container->open(std::string(), false);
  if (ret != 0) {
    LOG_ERROR("IndexSegmentStorage load failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = centroid_index_->load(seg_container, params_);
  if (ret != 0) {
    LOG_ERROR("Failed to load index for %s", IndexError::What(ret));
    return ret;
  }

  auto reformer = centroid_index_->reformer();
  params_.set(PARAM_IVF_SEARCHER_CONVERTER_REFORMER, reformer);

  //! load iverted index
  entity_ = std::make_shared<IVFEntity>();
  if (!entity_) {
    return IndexError_NoMemory;
  }
  ret = entity_->load(container);
  ivf_check_error_code(ret);

  magic_ = IndexContext::GenerateMagic();

  stats_.set_loaded_count(entity_->vector_count());
  stats_.set_loaded_costtime(timer.milli_seconds());

  searcher_state_ = STATE_LOADED;
  return 0;
}

int IVFSearcher::unload(void) {
  magic_ = 0;
  centroid_index_.reset();
  entity_.reset();
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  stats_.clear_attributes();
  searcher_state_ = STATE_INITED;

  return 0;
}

int IVFSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                                Context::Pointer &context) const {
  return search_bf_impl(query, qmeta, 1, context);
}

int IVFSearcher::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
  if (!query || qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR("Null query or invalid qmeta");
    return IndexError_InvalidArgument;
  }
  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());
  if (!ctx || ctx->topk() == 0) {
    LOG_ERROR("Invalid context or topk not set yet");
    return IndexError_InvalidArgument;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher
    int ret = this->update_context(ctx);
    ivf_check_error_code(ret);
  }

  ctx->reset_results(count);
  auto &entity = ctx->entity();
  auto &filter = ctx->filter();

  //! Transform the querys for querying in inverted vector index later
  IndexQueryMeta iv_qmeta;
  int ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);
  ivf_check_with_msg(ret, "Failed to transform querys");

  // TODO: do batch search in matrix
  for (size_t q = 0; q < count; ++q) {
    auto &context_stats = ctx->mutable_stats(q);
    auto &heap = ctx->mutable_result_heap();
    heap.clear();
    if (!filter.is_valid()) {
      ret = entity->search(query, &heap, &context_stats);
    } else {
      ret = entity->search(query, filter, &heap, &context_stats);
    }
    ivf_check_with_msg(ret, "Failed to search in entity for %s",
                       IndexError::What(ret));
    heap.sort();  // sort the results
    if (!filter.is_valid()) {
      // mapping the local id to key if query without filter
      ret = entity->retrieve_keys(&heap);
      ivf_check_error_code(ret);
    }
    entity->normalize(q, &heap);
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + iv_qmeta.element_size();
  }

  return 0;
}

int IVFSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const {
  return this->search_impl(query, qmeta, 1, context);
}

int IVFSearcher::search_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count, Context::Pointer &context) const {
  if (entity_->vector_count() <= bruteforce_threshold_) {
    return this->search_bf_impl(query, qmeta, count, context);
  }
  if (!query || qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR("Null query or invalid qmeta");
    return IndexError_InvalidArgument;
  }

  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());
  if (!ctx || ctx->topk() == 0) {
    LOG_ERROR("Invalid context or topk not set yet");
    return IndexError_InvalidArgument;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher
    int ret = update_context(ctx);
    ivf_check_error_code(ret);
  }

  ctx->reset_results(count);
  auto &entity = ctx->entity();
  auto &filter = ctx->filter();

  auto &centroid_index_ctx = ctx->centroid_searcher_ctx();
  int ret = centroid_index_->search(query, qmeta, count, centroid_index_ctx);
  ivf_check_error_code(ret);

  //! Transform the querys for querying in inverted vector index later
  IndexQueryMeta iv_qmeta;
  ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);
  ivf_check_with_msg(ret, "Failed to transform querys");

  for (size_t q = 0; q < count; ++q) {
    auto &centroids = centroid_index_ctx->result(q);
    auto &context_stats = ctx->mutable_stats(q);
    auto &heap = ctx->mutable_result_heap();
    heap.clear();
    uint32_t total_scan_count = 0;
    for (size_t i = 0;
         i < centroids.size() && total_scan_count < ctx->max_scan_count();
         ++i) {
      auto cid = centroids[i].key();
      uint32_t scan_count = 0;
      if (!filter.is_valid()) {
        ret = entity->search(cid, query, &scan_count, &heap, &context_stats);
      } else {
        ret = entity->search(cid, query, filter, &scan_count, &heap,
                             &context_stats);
      }
      ivf_check_with_msg(ret, "Failed to search in entity for %s",
                         IndexError::What(ret));
      total_scan_count += scan_count;
    }
    heap.sort();  // sort the results
    if (!filter.is_valid()) {
      // mapping the local id to key if query without filter
      ret = entity->retrieve_keys(&heap);
      ivf_check_error_code(ret);
    }
    entity->normalize(q, &heap);
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + iv_qmeta.element_size();
  }

  return 0;
}

const IndexSearcher::Stats &IVFSearcher::stats(void) const {
  return stats_;
}

IndexSearcher::Context::Pointer IVFSearcher::create_context() const {
  if (searcher_state_ != STATE_LOADED) {
    LOG_ERROR("Load the index first before create context");
    return nullptr;
  }

  auto entity = entity_->clone();
  if (!entity) {
    LOG_ERROR("Failed to clone IVFEntity");
    return nullptr;
  }

  auto centroid_index_ctx = centroid_index_->create_context();
  if (!centroid_index_ctx) {
    LOG_ERROR("Failed to create centroid index context");
    return nullptr;
  }

  auto context =
      new (std::nothrow) IVFSearcherContext(entity, centroid_index_ctx);
  if (!context) {
    LOG_ERROR("Failed to alloc IVFSearcherContext");
    return nullptr;
  }
  int ret = context->init(params_);
  if (ret != 0) {
    delete context;
    return nullptr;
  }

  context->set_magic(magic_);

  return Context::Pointer(context);
}

IndexProvider::Pointer IVFSearcher::create_provider(void) const {
  if (searcher_state_ != STATE_LOADED) {
    LOG_ERROR("Load the index first before create provider");
    return nullptr;
  }

  auto entity = entity_->clone();
  if (!entity) {
    LOG_ERROR("Failed to clone IVFEntity");
    return Provider::Pointer();
  }

  auto *provider = new (std::nothrow)
      IVFIndexProvider(entity->has_orignal_feature() ? meta_ : entity->meta(),
                       entity, "IVFSearcher");
  if (!provider) {
    LOG_ERROR("Failed to alloc IVFIndexProvider");
    return Provider::Pointer();
  }

  return Provider::Pointer(provider);
}

INDEX_FACTORY_REGISTER_SEARCHER(IVFSearcher);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_searcher.h>
#include "ivf_centroid_index.h"
#include "ivf_entity.h"
#include "ivf_searcher_context.h"

namespace zvec {
namespace core {

/*! IVF Searcher
 */
class IVFSearcher : public IndexSearcher {
 public:
  //! Initialize Searcher
  virtual int init(const ailego::Params &parameters) override;

  //! Cleanup Searcher
  virtual int cleanup(void) override;

  //! Load index from container
  virtual int load(IndexStorage::Pointer container,
                   IndexMetric::Pointer metric) override;

  //! Unload index
  virtual int unload(void) override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          Context::Pointer &context) const override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override;

  //! Create a searcher context
  virtual Context::Pointer create_context(void) const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  //! Retrieve params of index
  virtual const ailego::Params &params(void) const override {
    return params_;
  }

 protected:
  int update_context(IVFSearcherContext *ctx) const {
    auto entity = entity_->clone();
    if (!entity) {
      LOG_ERROR("Failed to clone QcEntity");
      return IndexError_Runtime;
    }

    //! The centroid index searcher may be different, so need to create one
    auto centroid_ctx = centroid_index_->create_context();
    if (!centroid_ctx) {
      LOG_ERROR("Failed to create centroid index searcher context");
      return IndexError_Runtime;
    }

    return ctx->update_context(entity, centroid_ctx, params_, magic_);
  }

 private:
  //! Constants
  static constexpr uint32_t kDefaultBfThreshold = 1000u;

  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  //! Members
  IndexMeta meta_{};
  ailego::Params params_{};
  IVFCentroidIndex::Pointer centroid_index_{};
  IVFEntity::Pointer entity_{};
  uint32_t bruteforce_threshold_{kDefaultBfThreshold};
  uint32_t magic_{0};
  Stats stats_{};
  State searcher_state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_searcher_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/heap.h>
#include "ivf_entity.h"
#include "ivf_utility.h"

namespace zvec {
namespace core {

/*! IVF Searcher Context
 */
class IVFSearcherContext : public IndexSearcher::Context {
 public:
  IVFSearcherContext(const IVFEntity::Pointer &ivf_entity,
                     IndexSearcher::Context::Pointer &centroid_ctx)
      : entity_(ivf_entity), centroid_searcher_ctx_(std::move(centroid_ctx)) {}

 public:
  //! Set topk of search result
  virtual void set_topk(uint32_t k) override {
    topk_ = k;
    result_heap_.limit(topk_);
    result_heap_.set_threshold(this->threshold());
  }

  //! Retrieve search result
  virtual const IndexDocumentList &result(void) const override {
    return results_[0];
  }

  //! Retrieve search result with index
  virtual const IndexDocumentList &result(size_t idx) const override {
    ailego_assert_with(results_.size() > idx, "invalid index");
    return results_[idx];
  }

  //! Retrieve mutable result with index
  virtual IndexDocumentList *mutable_result(size_t idx) override {
    ailego_assert_with(idx < results_.size(), "invalid idx");
    return &results_[idx];
  }

  inline IndexDocumentHeap *result_heap() {
    return &result_heap_;
  }

  //! Update the parameters of context
  virtual int update(const ailego::Params &params) override {
    params.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD,
               &bruteforce_threshold_);
    params.get(PARAM_IVF_SEARCHER_SCAN_RATIO, &scan_ratio_);
    if (scan_ratio_ <= 0.0) {
      LOG_ERROR("Invalid params %s=%f", PARAM_IVF_SEARCHER_SCAN_RATIO.c_str(),
                scan_ratio_);
      return IndexError_InvalidArgument;
    }
    size_t topk_val =
        std::max(static_cast<uint32_t>(
                     std::round(entity_->inverted_list_count() * scan_ratio_)),
                 1u);
    centroid_searcher_ctx_->set_topk(topk_val);
    max_scan_count_ =
        static_cast<uint32_t>(std::ceil(entity_->vector_count() * scan_ratio_));
    max_scan_count_ = std::max(bruteforce_threshold_, max_scan_count_);
    return 0;
  }

  //! Retrieve magic number
  virtual uint32_t magic(void) const override {
    return magic_;
  }

 public:
  //! Initialize the context
  int init(const ailego::Params &params) {
    return this->update(params);
  }

  //! Update the magic number
  void set_magic(uint32_t mag) {
    magic_ = mag;
  }

  //! Get Topk Value
  uint32_t topk() const override {
    return topk_;
  }

  //! Retrieve scan ratio
  float scan_ratio(void) const {
    return scan_ratio_;
  }

  //! Retrieve max scan count
  uint32_t max_scan_count(void) const {
    return max_scan_count_;
  }

  uint32_t bruteforce_threshold() const {
    return bruteforce_threshold_;
  }

  //! Retrieve magic number
  const IVFEntity::Pointer &entity() const {
    return entity_;
  }

  //! Retrieve Mutable Query Result By Query Index
  IndexDocumentHeap &mutable_result_heap() {
    return result_heap_;
  }

  void set_fetch_vector(bool v) override {
    fetch_vector_ = v;
  }

  bool fetch_vector(void) const override {
    return fetch_vector_;
  }

  //! Reset all the query results
  void reset_results(size_t qnum) {
    results_.resize(qnum);
    stats_vec_.resize(qnum);
    for (size_t i = 0; i < qnum; ++i) {
      results_[i].clear();
      stats_vec_[i].clear();
    }
    result_heap_.clear();
    result_heap_.limit(topk_);
    result_heap_.set_threshold(this->threshold());
  }

  //! Update context, the context may be shared by different searcher
  int update_context(IVFEntity::Pointer &new_entity,
                     IndexSearcher::Context::Pointer &centroid_ctx,
                     const ailego::Params &params, uint32_t magic_num) {
    entity_ = new_entity;
    centroid_searcher_ctx_ = std::move(centroid_ctx);
    int ret = this->update(params);
    ivf_check_error_code(ret);

    magic_ = magic_num;

    return 0;
  }

  //! Retrieve the centroid index context
  IndexSearcher::Context::Pointer &centroid_searcher_ctx(void) {
    return centroid_searcher_ctx_;
  }

  const Stats &stats(size_t idx = 0) const {
    ailego_assert_with(stats_vec_.size() > idx, "invalid index");
    return stats_vec_[idx];
  }

  Stats &mutable_stats(size_t idx = 0) {
    ailego_assert_with(stats_vec_.size() > idx, "invalid index");
    return stats_vec_[idx];
  }

  void topk_to_result(uint32_t idx) {
    if (ailego_unlikely(result_heap_.size() == 0)) {
      return;
    }

    ailego_assert_with(idx < results_.size(), "invalid idx");
    int size = std::min(topk_, static_cast<uint32_t>(result_heap_.size()));
    result_heap_.sort();
    results_[idx].clear();
    for (int i = 0; i < size; ++i) {
      auto score = result_heap_[i].score();
      if (score > this->threshold()) {
        break;
      }

      key_t key = result_heap_[i].key();
      if (fetch_vector_) {
        IndexStorage::MemoryBlock block;
        entity_->get_vector_by_key(key, block);
        results_[idx].emplace_back(key, score, key, block);
      } else {
        results_[idx].emplace_back(key, score);
      }
    }
  }

 private:
  //! Constants
  static constexpr float kDefaultScanRatio = 0.1f;
  static constexpr uint32_t kDefaultBfThreshold = 1000u;

  //! Members
  IVFEntity::Pointer entity_{};
  IndexSearcher::Context::Pointer centroid_searcher_ctx_{};
  IndexDocumentHeap result_heap_;
  std::vector<IndexDocumentList> results_{};
  std::vector<Stats> stats_vec_{};

  bool fetch_vector_{false};
  uint32_t topk_{0};
  uint32_t magic_{0};
  float scan_ratio_{kDefaultScanRatio};
  uint32_t max_scan_count_{0};
  uint32_t bruteforce_threshold_{kDefaultBfThreshold};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_streamer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_streamer.h"
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_segment_storage.h>
#include "ivf_centroid_index.h"
#include "ivf_index_provider.h"
#include "ivf_params.h"

namespace zvec {
namespace core {

int IVFStreamer::init(const IndexMeta &meta, const ailego::Params &parameters) {
  meta_ = meta;
  params_ = parameters;

  params_.get(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, &bruteforce_threshold_);

  searcher_state_ = STATE_INITED;

  return 0;
}

int IVFStreamer::cleanup(void) {
  this->unload();

  params_.clear();
  bruteforce_threshold_ = kDefaultBfThreshold;

  searcher_state_ = STATE_INIT;
  return 0;
}

int IVFStreamer::open(IndexStorage::Pointer storage) {
  if (!storage) {
    LOG_ERROR("Invalid storage");
    return IndexError_InvalidArgument;
  }
  if (searcher_state_ != STATE_INITED) {
    LOG_ERROR("Initalize the searcher first before load index");
    return IndexError_Runtime;
  }

  ailego::ElapsedTime timer;
  int ret = IndexHelper::DeserializeFromStorage(storage.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize meta from storage");
    return ret;
  }

  //! Load centroid index
  centroid_index_ = std::make_shared<IVFCentroidIndex>();
  if (!centroid_index_) {
    return IndexError_NoMemory;
  }
  auto seg = storage->get(IVF_CENTROID_SEG_ID, 0);
  if (!seg) {
    LOG_ERROR("Failed to get segment %s", IVF_CENTROID_SEG_ID.c_str());
    return IndexError_InvalidFormat;
  }
  IndexStorage::Pointer seg_container =
      std::make_shared<IndexSegmentStorage>(seg);
  if (!seg_container) {
    return IndexError_NoMemory;
  }
  ret = seg_container->open(std::string(), false);
  if (ret != 0) {
    LOG_ERROR("IndexSegmentStorage load failed for %s", IndexError::What(ret));
    return ret;
  }
  ret = centroid_index_->load(seg_container, params_);
  if (ret != 0) {
    LOG_ERROR("Failed to load index for %s", IndexError::What(ret));
    return ret;
  }

  auto reformer = centroid_index_->reformer();
  params_.set(PARAM_IVF_SEARCHER_CONVERTER_REFORMER, reformer);

  //! load iverted index
  entity_ = std::make_shared<IVFEntity>();
  if (!entity_) {
    return IndexError_NoMemory;
  }
  ret = entity_->load(storage);
  ivf_check_error_code(ret);

  magic_ = IndexContext::GenerateMagic();

  stats_.set_loaded_count(entity_->vector_count());
  stats_.set_loaded_costtime(timer.milli_seconds());

  searcher_state_ = STATE_LOADED;
  return 0;
}

int IVFStreamer::unload(void) {
  magic_ = 0;
  centroid_index_.reset();
  entity_.reset();
  stats_.set_loaded_count(0UL);
  stats_.set_loaded_costtime(0UL);
  stats_.clear_attributes();
  searcher_state_ = STATE_INITED;

  return 0;
}

int IVFStreamer::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                                Context::Pointer &context) const {
  return search_bf_impl(query, qmeta, 1, context);
}

int IVFStreamer::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                                uint32_t count,
                                Context::Pointer &context) const {
  if (!query || qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR("Null query or invalid qmeta");
    return IndexError_InvalidArgument;
  }
  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());
  if (!ctx || ctx->topk() == 0) {
    LOG_ERROR("Invalid context or topk not set yet");
    return IndexError_InvalidArgument;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher
    int ret = this->update_context(ctx);
    ivf_check_error_code(ret);
  }

  ctx->reset_results(count);
  auto &entity = ctx->entity();
  auto &filter = ctx->filter();

  //! Transform the querys for querying in inverted vector index later
  IndexQueryMeta iv_qmeta;
  int ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);
  ivf_check_with_msg(ret, "Failed to transform querys");

  // TODO: do batch search in matrix
  for (size_t q = 0; q < count; ++q) {
    auto &context_stats = ctx->mutable_stats(q);
    auto &heap = ctx->mutable_result_heap();
    heap.clear();
    if (!filter.is_valid()) {
      ret = entity->search(query, &heap, &context_stats);
    } else {
      ret = entity->search(query, filter, &heap, &context_stats);
    }
    ivf_check_with_msg(ret, "Failed to search in entity for %s",
                       IndexError::What(ret));
    heap.sort();  // sort the results
    if (!filter.is_valid()) {
      // mapping the local id to key if query without filter
      ret = entity->retrieve_keys(&heap);
      ivf_check_error_code(ret);
    }
    entity->normalize(q, &heap);
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + iv_qmeta.element_size();
  }

  return 0;
}

int IVFStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const {
  return this->search_impl(query, qmeta, 1, context);
}

int IVFStreamer::search_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count, Context::Pointer &context) const {
  if (entity_->vector_count() <= bruteforce_threshold_) {
    return this->search_bf_impl(query, qmeta, count, context);
  }
  if (!query || qmeta.element_size() != meta_.element_size()) {
    LOG_ERROR("Null query or invalid qmeta");
    return IndexError_InvalidArgument;
  }

  IVFSearcherContext *ctx = dynamic_cast<IVFSearcherContext *>(context.get());
  if (!ctx || ctx->topk() == 0) {
    LOG_ERROR("Invalid context or topk not set yet");
    return IndexError_InvalidArgument;
  }
  if (ctx->magic() != magic_) {
    //! context is created by another searcher
    int ret = update_context(ctx);
    ivf_check_error_code(ret);
  }

  ctx->reset_results(count);
  auto &entity = ctx->entity();
  auto &filter = ctx->filter();

  auto &centroid_index_ctx = ctx->centroid_searcher_ctx();
  int ret = centroid_index_->search(query, qmeta, count, centroid_index_ctx);
  ivf_check_error_code(ret);

  //! Transform the querys for querying in inverted vector index later
  IndexQueryMeta iv_qmeta;
  ret = entity->transform(query, qmeta, count, &query, &iv_qmeta);
  ivf_check_with_msg(ret, "Failed to transform querys");

  for (size_t q = 0; q < count; ++q) {
    auto &centroids = centroid_index_ctx->result(q);
    auto &context_stats = ctx->mutable_stats(q);
    auto &heap = ctx->mutable_result_heap();
    heap.clear();
    uint32_t total_scan_count = 0;
    for (size_t i = 0;
         i < centroids.size() && total_scan_count < ctx->max_scan_count();
         ++i) {
      auto cid = centroids[i].key();
      uint32_t scan_count = 0;
      if (!filter.is_valid()) {
        ret = entity->search(cid, query, &scan_count, &heap, &context_stats);
      } else {
        ret = entity->search(cid, query, filter, &scan_count, &heap,
                             &context_stats);
      }
      ivf_check_with_msg(ret, "Failed to search in entity for %s",
                         IndexError::What(ret));
      total_scan_count += scan_count;
    }
    heap.sort();  // sort the results
    if (!filter.is_valid()) {
      // mapping the local id to key if query without filter
      ret = entity->retrieve_keys(&heap);
      ivf_check_error_code(ret);
    }
    entity->normalize(q, &heap);
    ctx->topk_to_result(q);
    query = static_cast<const char *>(query) + iv_qmeta.element_size();
  }

  return 0;
}

const IndexSearcher::Stats &IVFStreamer::stats(void) const {
  return stats_;
}

IndexSearcher::Context::Pointer IVFStreamer::create_context() const {
  if (searcher_state_ != STATE_LOADED) {
    LOG_ERROR("Load the index first before create context");
    return nullptr;
  }

  auto entity = entity_->clone();
  if (!entity) {
    LOG_ERROR("Failed to clone IVFEntity");
    return nullptr;
  }

  auto centroid_index_ctx = centroid_index_->create_context();
  if (!centroid_index_ctx) {
    LOG_ERROR("Failed to create centroid index context");
    return nullptr;
  }

  auto context =
      new (std::nothrow) IVFSearcherContext(entity, centroid_index_ctx);
  if (!context) {
    LOG_ERROR("Failed to alloc IVFSearcherContext");
    return nullptr;
  }
  int ret = context->init(params_);
  if (ret != 0) {
    delete context;
    return nullptr;
  }

  context->set_magic(magic_);

  return Context::Pointer(context);
}

IndexProvider::Pointer IVFStreamer::create_provider(void) const {
  if (searcher_state_ != STATE_LOADED) {
    LOG_ERROR("Load the index first before create provider");
    return nullptr;
  }

  auto entity = entity_->clone();
  if (!entity) {
    LOG_ERROR("Failed to clone IVFEntity");
    return Provider::Pointer();
  }

  auto *provider = new (std::nothrow)
      IVFIndexProvider(entity->has_orignal_feature() ? meta_ : entity->meta(),
                       entity, "IVFStreamer");
  if (!provider) {
    LOG_ERROR("Failed to alloc IVFIndexProvider");
    return Provider::Pointer();
  }

  return Provider::Pointer(provider);
}

INDEX_FACTORY_REGISTER_STREAMER(IVFStreamer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/algorithm/ivf/ivf_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef __IVF_STREAMER_H__
#define __IVF_STREAMER_H__

#include <zvec/core/framework/index_streamer.h>
#include "ivf_centroid_index.h"
#include "ivf_entity.h"
#include "ivf_searcher_context.h"

namespace zvec {
namespace core {

/*! IVF Searcher
 */
class IVFStreamer : public IndexStreamer {
 public:
  //! Initialize Searcher
  virtual int init(const IndexMeta & /*meta*/,
                   const ailego::Params & /*params*/) override;

  //! Cleanup Searcher
  virtual int cleanup(void) override;

  //! Load index from container
  virtual int open(IndexStorage::Pointer storage) override;

  virtual int flush(uint64_t /*check_point*/) override {
    return 0;
  }
  virtual int close(void) override {
    return this->unload();
  }

  //! Unload index
  virtual int unload(void) override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             Context::Pointer &context) const override;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                             uint32_t count,
                             Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          Context::Pointer &context) const override;

  //! Similarity search
  virtual int search_impl(const void *query, const IndexQueryMeta &qmeta,
                          uint32_t count,
                          Context::Pointer &context) const override;

  //! Retrieve statistics
  virtual const Stats &stats(void) const override;

  //! Create a searcher context
  virtual Context::Pointer create_context(void) const override;

  //! Create a new iterator
  virtual IndexProvider::Pointer create_provider(void) const override;

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const override {
    return meta_;
  }

  virtual int get_vector_by_id(
      const uint32_t id, IndexStorage::MemoryBlock &block) const override {
    return entity_->get_vector_by_key(id, block);
  }

 protected:
  int update_context(IVFSearcherContext *ctx) const {
    auto entity = entity_->clone();
    if (!entity) {
      LOG_ERROR("Failed to clone QcEntity");
      return IndexError_Runtime;
    }

    //! The centroid index searcher may be different, so need to create one
    auto centroid_ctx = centroid_index_->create_context();
    if (!centroid_ctx) {
      LOG_ERROR("Failed to create centroid index searcher context");
      return IndexError_Runtime;
    }

    return ctx->update_context(entity, centroid_ctx, params_, magic_);
  }

 private:
  //! Constants
  static constexpr uint32_t kDefaultBfThreshold = 1000u;

  enum State { STATE_INIT = 0, STATE_INITED = 1, STATE_LOADED = 2 };

  //! Members
  IndexMeta meta_{};
  ailego::Params params_{};
  IndexBuilder::Pointer builder_;
  IVFCentroidIndex::Pointer centroid_index_{};
  IVFEntity::Pointer entity_{};
  uint32_t bruteforce_threshold_{kDefaultBfThreshold};
  uint32_t magic_{0};
  Stats stats_{};
  State searcher_state_{STATE_INIT};
};

}  // namespace core
}  // namespace zvec

#endif  //__IVF_STREAMER_H__


================================================
FILE: src/core/algorithm/ivf/ivf_utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <algorithm>
#include <memory>
#include <mutex>
#include <numeric>
#include <vector>
#include <ailego/utility/matrix_helper.h>
#include <zvec/ailego/utility/time_helper.h>

namespace zvec {
namespace core {

#ifndef ivf_check_error_code
#define ivf_check_error_code(code) \
  if (ailego_unlikely((code) != 0)) return code
#endif

#ifndef ivf_assert
#define ivf_assert(cond, code) \
  if (ailego_unlikely(!(cond))) return code
#endif

#ifndef ivf_check_with_msg
#define ivf_check_with_msg(code, fmt, args...) \
  do {                                         \
    if (ailego_unlikely((code) != 0)) {        \
      LOG_ERROR(fmt, ##args);                  \
      return code;                             \
    }                                          \
  } while (0)
#endif

#ifndef ivf_assert_with_msg
#define ivf_assert_with_msg(cond, err, fmt, args...) \
  do {                                               \
    if (ailego_unlikely(!(cond))) {                  \
      LOG_ERROR(fmt, ##args);                        \
      return err;                                    \
    }                                                \
  } while (0)
#endif

/*! Quantized Clustering Utility
 */
class IVFUtility {
 public:
  //! Generator a random path with specificed prefix
  static inline std::string GenerateRandomPath(const std::string &prefix) {
    uint64_t timestamp = ailego::Monotime::MicroSeconds();
    return prefix + std::to_string(timestamp);
  }

  //! Compute the default scan ratio for total vectors
  static inline float ComputeScanRatio(size_t vector_count) {
    // the fitting function for the follow points: 1000000(0.02)
    // 10000000(0.01) 50000000(0.005) 100000000(0.001)
    float scan_ratio = -0.004 * std::log(vector_count) + 0.0751;
    scan_ratio = std::max(scan_ratio, 0.0001f);
    return scan_ratio;
  }

  //! Transpose the vectors in row major order to column major order
  static inline void Transpose(size_t align_size, const void *src, size_t m,
                               size_t dim, void *dst);

  //! Transpose the vectors in column major order to row major order
  static inline void ReverseTranspose(size_t align_size, const void *src,
                                      size_t m, size_t dim, void *dst);

  //! Aligned size of a block vectors buffer
  static inline size_t AlignedSize(size_t fnum, size_t element_size);

  //! Aligned size of one vector buffer
  static inline size_t AlignedSize(size_t element_size);

  //! Sort arr with size in ascending order, and keep the index postion
  //! n2o keep the mapping: new position => origin postion
  //! For example, the input arr = [5, 3, 9, 6, 7], size = 5, after sort
  //      arr = [3, 5, 6, 7, 9]
  //      n2o = [1, 0, 3, 4, 2]
  //! To save memory, no extra memory is allocated
  template <typename T, typename I>
  static void Sort(T *arr, std::vector<I> *n2o, size_t size) {
    std::vector<I> o2n;
    o2n.resize(size);
    n2o->resize(size);

    std::iota(n2o->begin(), n2o->end(), 0U);
    std::sort(n2o->begin(), n2o->end(),
              [&](I i, I j) { return arr[i] < arr[j]; });
    for (I i = 0U; i < size; ++i) {
      o2n[(*n2o)[i]] = i;
    }
    //! reorder arr in place, according to given n2o index
    for (I i = 0; i < size; ++i) {
      if (i != (*n2o)[i]) {
        T tmp = arr[i];
        I j = i, k;
        while (i != (k = (*n2o)[j])) {
          arr[j] = arr[k];
          (*n2o)[j] = j;
          j = k;
        }
        arr[j] = tmp;
        (*n2o)[j] = j;
      }
    }

    for (I i = 0U; i < size; ++i) {
      (*n2o)[o2n[i]] = i;
    }
  }

  //! Transpose one vector in block
  template <typename T>
  static inline void TransposeOne(const void *src, size_t M, size_t N,
                                  void *dst) {
    for (size_t i = 0; i < N; ++i) {
      reinterpret_cast<T *>(dst)[i] = reinterpret_cast<const T *>(src)[i * M];
    }
  }
};

void IVFUtility::Transpose(size_t align_size, const void *src, size_t m,
                           size_t dim, void *dst) {
  switch (align_size) {
    case 2:
      ailego::MatrixHelper::Transpose<uint16_t>(src, m, dim, dst);
      break;
    case 4:
      ailego::MatrixHelper::Transpose<uint32_t>(src, m, dim, dst);
      break;
    case 8:
      ailego::MatrixHelper::Transpose<uint64_t>(src, m, dim, dst);
      break;
  }
}

void IVFUtility::ReverseTranspose(size_t align_size, const void *src, size_t m,
                                  size_t dim, void *dst) {
  switch (align_size) {
    case 2:
      ailego::MatrixHelper::ReverseTranspose<uint16_t>(src, m, dim, dst);
      break;
    case 4:
      ailego::MatrixHelper::ReverseTranspose<uint32_t>(src, m, dim, dst);
      break;
    case 8:
      ailego::MatrixHelper::ReverseTranspose<uint64_t>(src, m, dim, dst);
      break;
  }
}

size_t IVFUtility::AlignedSize(size_t fnum, size_t element_size) {
  return ailego_align(fnum * element_size, 32);
}

size_t IVFUtility::AlignedSize(size_t element_size) {
  return ailego_align(element_size, 32);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_framework 
    STATIC STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS zvec_ailego
    INCS . ${PROJECT_ROOT_DIR}/src/core
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/framework/index_cluster.cc
================================================
// namespace aitheta2
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/core/framework/index_bundle.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_error.h>

namespace zvec {
namespace core {

static const std::string CLUSTER_CENTROIDS_FEATURES_NAME =
    "IndexCluster.Centroids.Features";
static const std::string CLUSTER_CENTROIDS_INDEXES_NAME =
    "IndexCluster.Centroids.Indexes";

/*! Item Centroid Format
 */
struct ItemCentroidFormat {
  uint32_t parent;
  uint32_t reserved0_;
  uint64_t follows;
  double score;
  uint64_t reserved1_;
};

static inline bool GatherSubitemsCount(const ItemCentroidFormat *format,
                                       size_t count,
                                       std::vector<uint32_t> *out) {
  out->resize(count + 1);

  for (const ItemCentroidFormat *it = format, *end = format + count; it != end;
       ++it) {
    uint32_t parent = it->parent + 1;
    if (parent > count) {
      return false;
    }
    (*out)[parent] += 1;
  }
  return (out->front() != 0);
}

int IndexCluster::Deserialize(const IndexMeta &meta,
                              IndexBundle::Pointer bundle,
                              CentroidList *cents) {
  if (!bundle || !cents) {
    return IndexError_InvalidArgument;
  }

  ailego::BlobWrap features = bundle->get(CLUSTER_CENTROIDS_FEATURES_NAME);
  ailego::BlobWrap indexes = bundle->get(CLUSTER_CENTROIDS_INDEXES_NAME);

  if (!features.is_valid() || !indexes.is_valid()) {
    return IndexError_InvalidArgument;
  }

  if (features.size() % meta.element_size() != 0 ||
      indexes.size() % sizeof(ItemCentroidFormat) != 0) {
    return IndexError_InvalidLength;
  }

  size_t count = features.size() / meta.element_size();
  if (indexes.size() / sizeof(ItemCentroidFormat) != count) {
    return IndexError_InvalidLength;
  }

  const ItemCentroidFormat *format =
      reinterpret_cast<const ItemCentroidFormat *>(indexes.buffer());
  std::vector<uint32_t> subitems;

  if (!GatherSubitemsCount(format, count, &subitems)) {
    return IndexError_InvalidFormat;
  }

  std::vector<Centroid *> items;
  items.reserve(count);
  cents->clear();
  cents->reserve(subitems.front());

  const uint8_t *feat = reinterpret_cast<const uint8_t *>(features.buffer());
  size_t feat_size = meta.element_size();

  for (size_t i = 0; i < count; ++i, ++format, feat += feat_size) {
    CentroidList *current = cents;

    if (format->parent != static_cast<uint32_t>(-1)) {
      if (format->parent >= items.size()) {
        return IndexError_InvalidFormat;
      }
      current = items[format->parent]->mutable_subitems();
    }
    current->emplace_back(feat, feat_size);

    // Update information
    Centroid *last_one = &(current->back());
    last_one->set_follows(static_cast<size_t>(format->follows));
    last_one->set_score(format->score);
    last_one->mutable_subitems()->reserve(subitems[i + 1]);
    items.push_back(last_one);
  }
  return 0;
}

static void SerializeToBuffers(const IndexCluster::CentroidList &cents,
                               std::string *features, std::string *indexes) {
  uint32_t parent =
      static_cast<uint32_t>(indexes->size() / sizeof(ItemCentroidFormat)) - 1;

  for (const auto &it : cents) {
    ItemCentroidFormat format{parent, 0, it.follows(), it.score(), 0};
    indexes->append(reinterpret_cast<const char *>(&format), sizeof(format));
    features->append(reinterpret_cast<const char *>(it.feature()), it.size());

    if (!it.subitems().empty()) {
      SerializeToBuffers(it.subitems(), features, indexes);
    }
  }
}

int IndexCluster::Serialize(const IndexMeta &meta, const CentroidList &cents,
                            IndexBundle::Pointer *out) {
  size_t cents_total = cents.size();

  // Check the centroids
  for (const auto &it : cents) {
    if (!it.is_matched(meta)) {
      return IndexError_Mismatch;
    }
    cents_total += it.subcount();
  }

  std::string features, indexes;
  features.reserve(cents_total * meta.element_size());
  indexes.reserve(cents_total * sizeof(ItemCentroidFormat));
  SerializeToBuffers(cents, &features, &indexes);

  std::shared_ptr<MemoryIndexBundle> bundle =
      std::make_shared<MemoryIndexBundle>();

  bundle->set(CLUSTER_CENTROIDS_FEATURES_NAME, std::move(features));
  bundle->set(CLUSTER_CENTROIDS_INDEXES_NAME, std::move(indexes));
  *out = std::move(bundle);

  return 0;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <atomic>
#include <random>
#include <zvec/core/framework/index_context.h>

namespace zvec {
namespace core {

uint32_t IndexContext::GenerateMagic(void) {
  static std::atomic_uint32_t magic_number{std::random_device()()};
  return magic_number.fetch_add(1);
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/core/framework/index_converter.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_helper.h>

namespace zvec {
namespace core {

int IndexConverter::TrainAndTransform(const IndexConverter::Pointer &converter,
                                      IndexHolder::Pointer holder) {
  auto two_pass_holder = IndexHelper::MakeTwoPassHolder(std::move(holder));
  int ret = converter->train(two_pass_holder);
  if (ret == 0) {
    ret = converter->transform(std::move(two_pass_holder));
  }
  return ret;
}

int IndexConverter::TrainTransformAndDump(
    const IndexConverter::Pointer &converter, IndexHolder::Pointer holder,
    const IndexDumper::Pointer &dumper) {
  int ret = IndexConverter::TrainAndTransform(converter, std::move(holder));
  if (ret == 0) {
    ret = converter->dump(dumper);
  }
  return ret;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_error.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/core/framework/index_error.h>

namespace zvec {
namespace core {

INDEX_ERROR_CODE_DEFINE(Success, 0, "Success");
INDEX_ERROR_CODE_DEFINE(Runtime, 1, "Runtime error");
INDEX_ERROR_CODE_DEFINE(Logic, 2, "Logic error");
INDEX_ERROR_CODE_DEFINE(Type, 3, "Type error");
INDEX_ERROR_CODE_DEFINE(System, 4, "System call error");
INDEX_ERROR_CODE_DEFINE(Cast, 5, "Cast error");
INDEX_ERROR_CODE_DEFINE(IO, 6, "IO error");
INDEX_ERROR_CODE_DEFINE(AuthExpired, 7, "Auth expired error");

INDEX_ERROR_CODE_DEFINE(NotImplemented, 11, "Not implemented");
INDEX_ERROR_CODE_DEFINE(Unsupported, 12, "Unsupported");
INDEX_ERROR_CODE_DEFINE(Denied, 13, "Permission denied");
INDEX_ERROR_CODE_DEFINE(Canceled, 14, "Operation canceled");
INDEX_ERROR_CODE_DEFINE(Overflow, 15, "Overflow");
INDEX_ERROR_CODE_DEFINE(Underflow, 16, "Underflow");
INDEX_ERROR_CODE_DEFINE(OutOfRange, 17, "Out of range");
INDEX_ERROR_CODE_DEFINE(NoBuffer, 18, "No buffer space available");
INDEX_ERROR_CODE_DEFINE(NoMemory, 19, "Not enough space");
INDEX_ERROR_CODE_DEFINE(NoParamFound, 20, "No parameter found");
INDEX_ERROR_CODE_DEFINE(NoReady, 21, "No ready");
INDEX_ERROR_CODE_DEFINE(NoExist, 22, "No exist");
INDEX_ERROR_CODE_DEFINE(Exist, 23, "Already exist");
INDEX_ERROR_CODE_DEFINE(Mismatch, 24, "Mismatch");
INDEX_ERROR_CODE_DEFINE(Duplicate, 25, "Duplicate");
INDEX_ERROR_CODE_DEFINE(Uninitialized, 26, "Uninitialized");

INDEX_ERROR_CODE_DEFINE(InvalidArgument, 31, "Invalid argument");
INDEX_ERROR_CODE_DEFINE(InvalidFormat, 32, "Invalid format");
INDEX_ERROR_CODE_DEFINE(InvalidLength, 33, "Invalid length");
INDEX_ERROR_CODE_DEFINE(InvalidChecksum, 34, "Invalid checksum");
INDEX_ERROR_CODE_DEFINE(InvalidValue, 35, "Invalid value");

INDEX_ERROR_CODE_DEFINE(CreateDirectory, 101, "Create directory error");
INDEX_ERROR_CODE_DEFINE(OpenDirectory, 102, "Open directory error");
INDEX_ERROR_CODE_DEFINE(Serialize, 105, "Serialize error");
INDEX_ERROR_CODE_DEFINE(Deserialize, 106, "Deserialize error");
INDEX_ERROR_CODE_DEFINE(CreateFile, 111, "Create file error");
INDEX_ERROR_CODE_DEFINE(OpenFile, 112, "Open file error");
INDEX_ERROR_CODE_DEFINE(SeekFile, 113, "Seek file error");
INDEX_ERROR_CODE_DEFINE(CloseFile, 114, "Close file error");
INDEX_ERROR_CODE_DEFINE(TruncateFile, 115, "TruncateFile file error");
INDEX_ERROR_CODE_DEFINE(MMapFile, 116, "MMap file error");
INDEX_ERROR_CODE_DEFINE(FlushFile, 117, "Flush file error");
INDEX_ERROR_CODE_DEFINE(WriteData, 121, "Write data error");
INDEX_ERROR_CODE_DEFINE(ReadData, 122, "Read data error");

INDEX_ERROR_CODE_DEFINE(PackIndex, 201, "Read data error");
INDEX_ERROR_CODE_DEFINE(UnpackIndex, 202, "Read data error");
INDEX_ERROR_CODE_DEFINE(IndexLoaded, 203, "Index loaded");
INDEX_ERROR_CODE_DEFINE(NoIndexLoaded, 204, "No index loaded");
INDEX_ERROR_CODE_DEFINE(NoTrained, 205, "No trained");
INDEX_ERROR_CODE_DEFINE(IndexFull, 206, "Index full");

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_factory.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

IndexMetric::Pointer IndexFactory::CreateMetric(const std::string &name) {
  IndexMetric::Pointer obj =
      ailego::Factory<IndexMetric>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasMetric(const std::string &name) {
  return ailego::Factory<IndexMetric>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllMetrics(void) {
  return ailego::Factory<IndexMetric>::Classes();
}

IndexLogger::Pointer IndexFactory::CreateLogger(const std::string &name) {
  IndexLogger::Pointer obj =
      ailego::Factory<IndexLogger>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasLogger(const std::string &name) {
  return ailego::Factory<IndexLogger>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllLoggers(void) {
  return ailego::Factory<IndexLogger>::Classes();
}

IndexDumper::Pointer IndexFactory::CreateDumper(const std::string &name) {
  IndexDumper::Pointer obj =
      ailego::Factory<IndexDumper>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasDumper(const std::string &name) {
  return ailego::Factory<IndexDumper>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllDumpers(void) {
  return ailego::Factory<IndexDumper>::Classes();
}

IndexStorage::Pointer IndexFactory::CreateStorage(const std::string &name) {
  IndexStorage::Pointer obj =
      ailego::Factory<IndexStorage>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasStorage(const std::string &name) {
  return ailego::Factory<IndexStorage>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllStorages(void) {
  return ailego::Factory<IndexStorage>::Classes();
}

IndexConverter::Pointer IndexFactory::CreateConverter(const std::string &name) {
  IndexConverter::Pointer obj =
      ailego::Factory<IndexConverter>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasConverter(const std::string &name) {
  return ailego::Factory<IndexConverter>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllConverters(void) {
  return ailego::Factory<IndexConverter>::Classes();
}

IndexReformer::Pointer IndexFactory::CreateReformer(const std::string &name) {
  IndexReformer::Pointer obj =
      ailego::Factory<IndexReformer>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasReformer(const std::string &name) {
  return ailego::Factory<IndexReformer>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllReformers(void) {
  return ailego::Factory<IndexReformer>::Classes();
}

IndexTrainer::Pointer IndexFactory::CreateTrainer(const std::string &name) {
  IndexTrainer::Pointer obj =
      ailego::Factory<IndexTrainer>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasTrainer(const std::string &name) {
  return ailego::Factory<IndexTrainer>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllTrainers(void) {
  return ailego::Factory<IndexTrainer>::Classes();
}

IndexBuilder::Pointer IndexFactory::CreateBuilder(const std::string &name) {
  IndexBuilder::Pointer obj =
      ailego::Factory<IndexBuilder>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasBuilder(const std::string &name) {
  return ailego::Factory<IndexBuilder>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllBuilders(void) {
  return ailego::Factory<IndexBuilder>::Classes();
}

IndexSearcher::Pointer IndexFactory::CreateSearcher(const std::string &name) {
  IndexSearcher::Pointer obj =
      ailego::Factory<IndexSearcher>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasSearcher(const std::string &name) {
  return ailego::Factory<IndexSearcher>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllSearchers(void) {
  return ailego::Factory<IndexSearcher>::Classes();
}

IndexStreamer::Pointer IndexFactory::CreateStreamer(const std::string &name) {
  IndexStreamer::Pointer obj =
      ailego::Factory<IndexStreamer>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasStreamer(const std::string &name) {
  return ailego::Factory<IndexStreamer>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllStreamers(void) {
  return ailego::Factory<IndexStreamer>::Classes();
}

IndexReducer::Pointer IndexFactory::CreateReducer(const std::string &name) {
  IndexReducer::Pointer obj =
      ailego::Factory<IndexReducer>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasReducer(const std::string &name) {
  return ailego::Factory<IndexReducer>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllReducers(void) {
  return ailego::Factory<IndexReducer>::Classes();
}


IndexCluster::Pointer IndexFactory::CreateCluster(const std::string &name) {
  IndexCluster::Pointer obj =
      ailego::Factory<IndexCluster>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasCluster(const std::string &name) {
  return ailego::Factory<IndexCluster>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllClusters(void) {
  return ailego::Factory<IndexCluster>::Classes();
}

IndexStreamerReducer::Pointer IndexFactory::CreateStreamerReducer(
    const std::string &name) {
  IndexStreamerReducer::Pointer obj =
      ailego::Factory<IndexStreamerReducer>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasStreamerReducer(const std::string &name) {
  return ailego::Factory<IndexStreamerReducer>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllStreamerReducers(void) {
  return ailego::Factory<IndexStreamerReducer>::Classes();
}

IndexRefiner::Pointer IndexFactory::CreateRefiner(const std::string &name) {
  IndexRefiner::Pointer obj =
      ailego::Factory<IndexRefiner>::MakeShared(name.c_str());
  if (obj) {
    obj->set_name(name);
  }
  return obj;
}

bool IndexFactory::HasRefiner(const std::string &name) {
  return ailego::Factory<IndexRefiner>::Has(name.c_str());
}

std::vector<std::string> IndexFactory::AllRefiners(void) {
  return ailego::Factory<IndexRefiner>::Classes();
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_flow.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/pattern/defer.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_flow.h>
#include <zvec/core/framework/index_helper.h>

//! Default storage
#define INDEX_FLOW_STORAGE_DEFAULT "MMapFileReadStorage"

namespace zvec {
namespace core {

// Index Flow
int IndexFlow::set_storage(const std::string &name,
                           const ailego::Params &params) {
  storage_ = IndexFactory::CreateStorage(name);
  if (!storage_) {
    LOG_ERROR("Failed to create a index storage with name: %s", name.c_str());
    return IndexError_NoExist;
  }
  int ret = storage_->init(params);
  if (ret < 0) {
    storage_ = nullptr;
    LOG_ERROR("Failed to initialize index storage %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexFlow::set_searcher(IndexSearcher::Pointer searcher) {
  user_searcher_ = searcher;

  return 0;
}

int IndexFlow::set_searcher(const std::string &name,
                            const ailego::Params &params) {
  user_searcher_ = IndexFactory::CreateSearcher(name);
  if (!user_searcher_) {
    LOG_ERROR("Failed to create a index searcher with name: %s", name.c_str());
    return IndexError_NoExist;
  }
  int ret = user_searcher_->init(params);
  if (ret < 0) {
    user_searcher_ = nullptr;
    LOG_ERROR("Failed to initialize index searcher %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexFlow::set_reformer(const std::string &name,
                            const ailego::Params &params) {
  user_reformer_ = IndexFactory::CreateReformer(name);
  if (!user_reformer_) {
    LOG_ERROR("Failed to create a index reformer with name: %s", name.c_str());
    return IndexError_NoExist;
  }
  int ret = user_reformer_->init(params);
  if (ret < 0) {
    user_reformer_ = nullptr;
    LOG_ERROR("Failed to initialize index reformer %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexFlow::set_metric(const std::string &name,
                          const ailego::Params &params) {
  if (!IndexFactory::HasMetric(name)) {
    LOG_ERROR("The index metric with name %s does not exist.", name.c_str());
    return IndexError_NoExist;
  }
  user_metric_name_ = name;
  user_metric_params_ = params;
  return 0;
}

int IndexFlow::load(const std::string &path) {
  // Prepare storage
  if (!storage_) {
    this->set_storage(INDEX_FLOW_STORAGE_DEFAULT, ailego::Params());
  }

  if (!storage_) {
    LOG_ERROR("The index storage is uninitialized.");
    return IndexError_Uninitialized;
  }

  int ret = storage_->open(path, false);
  if (ret != 0) {
    LOG_ERROR("Failed to load index with storage %s", storage_->name().c_str());
    return ret;
  }

  ret = IndexHelper::DeserializeFromStorage(storage_.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize index meta with storage %s",
              storage_->name().c_str());
    return ret;
  }

  ret = load_internal();
  if (ret != 0) {
    LOG_ERROR("Failed to load index with storage %s", storage_->name().c_str());
    return ret;
  }

  return 0;
}

int IndexFlow::load_internal() {
  // Prepare metric
  const std::string &metric_name =
      user_metric_name_.empty() ? meta_.metric_name() : user_metric_name_;
  const ailego::Params &metric_params =
      user_metric_name_.empty() ? meta_.metric_params() : user_metric_params_;
  if (metric_name.empty()) {
    LOG_ERROR("The metric name from index file is empty.");
    return IndexError_NoExist;
  }
  metric_ = IndexFactory::CreateMetric(metric_name);
  if (!metric_) {
    LOG_ERROR("Failed to create a index metric with name: %s",
              metric_name.c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta_, metric_params);
  if (ret < 0) {
    LOG_ERROR("Failed to initialize index metric %s", metric_name.c_str());
    metric_ = nullptr;
    return ret;
  }
  if (!metric_->is_matched(meta_)) {
    LOG_ERROR("The index meta is unmatched for index metric %s",
              metric_->name().c_str());
    return IndexError_Mismatch;
  }
  auto query_metric = metric_->query_metric();
  if (query_metric) {
    metric_ = query_metric;
  }

  // Prepare reformer
  if (!user_reformer_) {
    const std::string &reformer_name = meta_.reformer_name();
    if (!reformer_name.empty()) {
      reformer_ = IndexFactory::CreateReformer(reformer_name);
      if (!reformer_) {
        LOG_ERROR("Failed to create a index reformer with name: %s",
                  reformer_name.c_str());
        return IndexError_NoExist;
      }
      ret = reformer_->init(meta_.reformer_params());
      if (ret < 0) {
        LOG_ERROR("Failed to initialize index reformer %s",
                  reformer_name.c_str());
        reformer_ = nullptr;
        return ret;
      }
    }
  } else {
    // Using user reformer
    reformer_ = user_reformer_;
  }

  if (reformer_) {
    ret = reformer_->load(storage_);
    if (ret < 0) {
      LOG_ERROR("Failed to load index with reformer %s, storage %s",
                reformer_->name().c_str(), storage_->name().c_str());
      return ret;
    }
  }

  // Prepare searcher
  if (!user_searcher_) {
    const std::string &name = meta_.searcher_name();
    if (name.empty()) {
      LOG_ERROR("The searcher name from index file is empty.");
      return IndexError_NoExist;
    }
    searcher_ = IndexFactory::CreateSearcher(name);
    if (!searcher_) {
      LOG_ERROR("Failed to create a index searcher with name: %s",
                name.c_str());
      return IndexError_NoExist;
    }
    ret = searcher_->init(meta_.searcher_params());
    if (ret < 0) {
      LOG_ERROR("Failed to initialize index searcher %s", name.c_str());
      searcher_ = nullptr;
      return ret;
    }
  } else {
    // Using user searcher
    searcher_ = user_searcher_;
  }

  ret = searcher_->load(storage_, metric_);
  if (ret < 0) {
    LOG_ERROR("Failed to load index with searcher %s, storage %s, metric %s",
              searcher_->name().c_str(), storage_->name().c_str(),
              metric_->name().c_str());
    return ret;
  }

  // searcher_->print_all_neighbour();

  return 0;
}

int IndexFlow::unload(void) {
  if (searcher_) {
    int ret = searcher_->unload();
    if (ret < 0) {
      LOG_WARN("Unload index searcher %s error, %d", searcher_->name().c_str(),
               ret);
    }
    searcher_ = nullptr;
  }
  if (reformer_) {
    int ret = reformer_->unload();
    if (ret < 0) {
      LOG_WARN("Unload index reformer %s error, %d", reformer_->name().c_str(),
               ret);
    }
    reformer_ = nullptr;
  }
  if (metric_) {
    int ret = metric_->cleanup();
    if (ret < 0) {
      LOG_WARN("Cleanup index metric %s error, %d", metric_->name().c_str(),
               ret);
    }
    metric_ = nullptr;
  }
  if (storage_) {
    int ret = storage_->cleanup();
    if (ret < 0) {
      LOG_WARN("Unload index searcher %s error, %d", storage_->name().c_str(),
               ret);
    }
    storage_ = nullptr;
  }
  return 0;
}

int IndexFlow::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                              Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(query, qmeta, context->mutable_features(),
                                      &new_qmeta);
    if (error_code == 0) {
      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
        return IndexError_Mismatch;
      }
      error_code = searcher_->search_bf_impl(
          reinterpret_cast<const void *>(context->features().data()), new_qmeta,
          context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_bf_impl(query, qmeta, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (auto &it : const_cast<IndexDocumentList &>(
               context->searcher_context()->result())) {
        metric_->normalize(it.mutable_score());
      }
    }
    if (reformer_) {
      error_code =
          reformer_->normalize(query, qmeta,
                               const_cast<IndexDocumentList &>(
                                   context->searcher_context()->result()));
    }
  }
  return error_code;
}

int IndexFlow::search_impl(const void *query, const IndexQueryMeta &qmeta,
                           Context::Pointer &context) const {
  if (ailego_unlikely(!query || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(query, qmeta, context->mutable_features(),
                                      &new_qmeta);
    if (error_code == 0) {
      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
        return IndexError_Mismatch;
      }
      error_code = searcher_->search_impl(
          reinterpret_cast<const void *>(context->features().data()), new_qmeta,
          context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_impl(query, qmeta, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (auto &it : const_cast<IndexDocumentList &>(
               context->searcher_context()->result())) {
        metric_->normalize(it.mutable_score());
      }
    }
    if (reformer_) {
      error_code =
          reformer_->normalize(query, qmeta,
                               const_cast<IndexDocumentList &>(
                                   context->searcher_context()->result()));
    }
  }
  return error_code;
}

int IndexFlow::search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                              uint32_t count, Context::Pointer &context) const {
  if (ailego_unlikely(!query || !count || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(query, qmeta, count,
                                      context->mutable_features(), &new_qmeta);
    if (error_code == 0) {
      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
        return IndexError_Mismatch;
      }
      error_code = searcher_->search_bf_impl(
          reinterpret_cast<const void *>(context->features().data()), new_qmeta,
          count, context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code = searcher_->search_bf_impl(query, qmeta, count,
                                           context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (uint32_t i = 0; i < count; ++i) {
        IndexDocumentList &result = const_cast<IndexDocumentList &>(
            context->searcher_context()->result(i));

        for (auto &it : result) {
          metric_->normalize(it.mutable_score());
        }
      }
    }
    if (reformer_) {
      size_t offset = 0;
      for (uint32_t i = 0; i < count; ++i) {
        error_code = reformer_->normalize(
            reinterpret_cast<const uint8_t *>(query) + offset, qmeta,
            const_cast<IndexDocumentList &>(
                context->searcher_context()->result(i)));
        if (error_code != 0) {
          break;
        }
        offset += qmeta.element_size();
      }
    }
  }
  return error_code;
}

int IndexFlow::search_impl(const void *query, const IndexQueryMeta &qmeta,
                           uint32_t count, Context::Pointer &context) const {
  if (ailego_unlikely(!query || !count || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(query, qmeta, count,
                                      context->mutable_features(), &new_qmeta);
    if (error_code == 0) {
      if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
        return IndexError_Mismatch;
      }
      error_code = searcher_->search_impl(
          reinterpret_cast<const void *>(context->features().data()), new_qmeta,
          count, context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code = searcher_->search_impl(query, qmeta, count,
                                        context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (uint32_t i = 0; i < count; ++i) {
        IndexDocumentList &result = const_cast<IndexDocumentList &>(
            context->searcher_context()->result(i));

        for (auto &it : result) {
          metric_->normalize(it.mutable_score());
        }
      }
    }
    if (reformer_) {
      size_t offset = 0;
      for (uint32_t i = 0; i < count; ++i) {
        error_code = reformer_->normalize(
            reinterpret_cast<const uint8_t *>(query) + offset, qmeta,
            const_cast<IndexDocumentList &>(
                context->searcher_context()->result(i)));
        if (error_code != 0) {
          break;
        }
        offset += qmeta.element_size();
      }
    }
  }
  return error_code;
}

// Index Sparse Flow
int IndexSparseFlow::set_storage(const std::string &name,
                                 const ailego::Params &params) {
  storage_ = IndexFactory::CreateStorage(name);
  if (!storage_) {
    LOG_ERROR("Failed to create a index storage with name: %s", name.c_str());
    return IndexError_NoExist;
  }
  int ret = storage_->init(params);
  if (ret < 0) {
    storage_ = nullptr;
    LOG_ERROR("Failed to initialize index storage %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexSparseFlow::set_searcher(IndexSearcher::Pointer searcher) {
  user_searcher_ = searcher;

  return 0;
}

int IndexSparseFlow::set_searcher(const std::string &name,
                                  const ailego::Params &params) {
  user_searcher_ = IndexFactory::CreateSearcher(name);
  if (!user_searcher_) {
    LOG_ERROR("Failed to create a index sparse searcher with name: %s",
              name.c_str());
    return IndexError_NoExist;
  }
  int ret = user_searcher_->init(params);
  if (ret < 0) {
    user_searcher_ = nullptr;
    LOG_ERROR("Failed to initialize index sparse searcher %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexSparseFlow::set_reformer(const std::string &name,
                                  const ailego::Params &params) {
  user_reformer_ = IndexFactory::CreateReformer(name);
  if (!user_reformer_) {
    LOG_ERROR("Failed to create a index sparse reformer with name: %s",
              name.c_str());
    return IndexError_NoExist;
  }
  int ret = user_reformer_->init(params);
  if (ret < 0) {
    user_reformer_ = nullptr;
    LOG_ERROR("Failed to initialize index sparse reformer %s", name.c_str());
    return ret;
  }
  return 0;
}

int IndexSparseFlow::set_metric(const std::string &name,
                                const ailego::Params &params) {
  if (!IndexFactory::HasMetric(name)) {
    LOG_ERROR("The index metric with name %s does not exist.", name.c_str());
    return IndexError_NoExist;
  }
  user_metric_name_ = name;
  user_metric_params_ = params;
  return 0;
}

int IndexSparseFlow::load(const std::string &path) {
  // Prepare storage
  if (!storage_) {
    this->set_storage(INDEX_FLOW_STORAGE_DEFAULT, ailego::Params());
  }

  if (!storage_) {
    LOG_ERROR("The index storage is uninitialized.");
    return IndexError_Uninitialized;
  }

  int ret = storage_->open(path, false);
  if (ret != 0) {
    LOG_ERROR("Failed to load index with storage %s", storage_->name().c_str());
    return ret;
  }

  ret = IndexHelper::DeserializeFromStorage(storage_.get(), &meta_);
  if (ret != 0) {
    LOG_ERROR("Failed to deserialize index meta with storage %s",
              storage_->name().c_str());
    return ret;
  }

  ret = load_internal();
  if (ret != 0) {
    LOG_ERROR("Failed to load index with storage %s", storage_->name().c_str());
    return ret;
  }

  return 0;
}

int IndexSparseFlow::load_internal() {
  // Prepare metric
  const std::string &metric_name =
      user_metric_name_.empty() ? meta_.metric_name() : user_metric_name_;
  const ailego::Params &metric_params =
      user_metric_name_.empty() ? meta_.metric_params() : user_metric_params_;
  if (metric_name.empty()) {
    LOG_ERROR("The metric name from index file is empty.");
    return IndexError_NoExist;
  }
  metric_ = IndexFactory::CreateMetric(metric_name);
  if (!metric_) {
    LOG_ERROR("Failed to create a index metric with name: %s",
              metric_name.c_str());
    return IndexError_NoExist;
  }
  int ret = metric_->init(meta_, metric_params);
  if (ret < 0) {
    LOG_ERROR("Failed to initialize index metric %s", metric_name.c_str());
    metric_ = nullptr;
    return ret;
  }

  auto query_metric = metric_->query_metric();
  if (query_metric) {
    metric_ = query_metric;
  }

  // Prepare reformer
  if (!user_reformer_) {
    const std::string &reformer_name = meta_.reformer_name();
    if (!reformer_name.empty()) {
      reformer_ = IndexFactory::CreateReformer(reformer_name);
      if (!reformer_) {
        LOG_ERROR("Failed to create a index sparse reformer with name: %s",
                  reformer_name.c_str());
        return IndexError_NoExist;
      }
      ret = reformer_->init(meta_.reformer_params());
      if (ret < 0) {
        LOG_ERROR("Failed to initialize index reformer %s",
                  reformer_name.c_str());
        reformer_ = nullptr;
        return ret;
      }
    }
  } else {
    // Using user reformer
    reformer_ = user_reformer_;
  }

  if (reformer_) {
    ret = reformer_->load(storage_);
    if (ret < 0) {
      LOG_ERROR("Failed to load index with reformer %s, storage %s",
                reformer_->name().c_str(), storage_->name().c_str());
      return ret;
    }
  }

  // Prepare searcher
  if (!user_searcher_) {
    const std::string &name = meta_.searcher_name();
    if (name.empty()) {
      LOG_ERROR("The searcher name from index file is empty.");
      return IndexError_NoExist;
    }
    searcher_ = IndexFactory::CreateSearcher(name);
    if (!searcher_) {
      LOG_ERROR("Failed to create a index searcher with name: %s",
                name.c_str());
      return IndexError_NoExist;
    }
    ret = searcher_->init(meta_.searcher_params());
    if (ret < 0) {
      LOG_ERROR("Failed to initialize index searcher %s", name.c_str());
      searcher_ = nullptr;
      return ret;
    }
  } else {
    // Using user searcher
    searcher_ = user_searcher_;
  }

  ret = searcher_->load(storage_, metric_);
  if (ret < 0) {
    LOG_ERROR("Failed to load index with searcher %s, storage %s, metric %s",
              searcher_->name().c_str(), storage_->name().c_str(),
              metric_->name().c_str());
    return ret;
  }

  // searcher_->print_all_neighbour();

  return 0;
}

int IndexSparseFlow::unload(void) {
  if (searcher_) {
    int ret = searcher_->unload();
    if (ret < 0) {
      LOG_WARN("Unload index searcher %s error, %d", searcher_->name().c_str(),
               ret);
    }
    searcher_ = nullptr;
  }
  if (reformer_) {
    int ret = reformer_->unload();
    if (ret < 0) {
      LOG_WARN("Unload index reformer %s error, %d", reformer_->name().c_str(),
               ret);
    }
    reformer_ = nullptr;
  }
  if (metric_) {
    int ret = metric_->cleanup();
    if (ret < 0) {
      LOG_WARN("Cleanup index metric %s error, %d", metric_->name().c_str(),
               ret);
    }
    metric_ = nullptr;
  }
  if (storage_) {
    int ret = storage_->cleanup();
    if (ret < 0) {
      LOG_WARN("Unload index searcher %s error, %d", storage_->name().c_str(),
               ret);
    }
    storage_ = nullptr;
  }
  return 0;
}

int IndexSparseFlow::search_bf_impl(const uint32_t sparse_count,
                                    const uint32_t *sparse_indices,
                                    const void *sparse_query,
                                    const IndexQueryMeta &qmeta,
                                    Context::Pointer &context) const {
  if (ailego_unlikely(!context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    std::string ovec;
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(sparse_count, sparse_indices,
                                      sparse_query, qmeta, &ovec, &new_qmeta);
    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
      return IndexError_Mismatch;
    }
    if (error_code == 0) {
      error_code =
          searcher_->search_bf_impl(sparse_count, sparse_indices, ovec.data(),
                                    new_qmeta, context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_bf_impl(sparse_count, sparse_indices, sparse_query,
                                  qmeta, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (auto &it : const_cast<IndexDocumentList &>(
               context->searcher_context()->result())) {
        metric_->normalize(it.mutable_score());
      }
    }
  }
  return error_code;
}

int IndexSparseFlow::search_impl(const uint32_t sparse_count,
                                 const uint32_t *sparse_indices,
                                 const void *sparse_query,
                                 const IndexQueryMeta &qmeta,
                                 Context::Pointer &context) const {
  if (ailego_unlikely(!context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    std::string ovec;
    IndexQueryMeta new_qmeta;
    error_code = reformer_->transform(sparse_count, sparse_indices,
                                      sparse_query, qmeta, &ovec, &new_qmeta);
    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
      return IndexError_Mismatch;
    }
    if (error_code == 0) {
      error_code =
          searcher_->search_impl(sparse_count, sparse_indices, ovec.data(),
                                 new_qmeta, context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_impl(sparse_count, sparse_indices, sparse_query,
                               qmeta, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (auto &it : const_cast<IndexDocumentList &>(
               context->searcher_context()->result())) {
        metric_->normalize(it.mutable_score());
      }
    }
  }
  return error_code;
}

int IndexSparseFlow::search_bf_impl(const uint32_t *sparse_count,
                                    const uint32_t *sparse_indices,
                                    const void *sparse_query,
                                    const IndexQueryMeta &qmeta, uint32_t count,
                                    Context::Pointer &context) const {
  if (ailego_unlikely(!count || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    std::string ovec;
    IndexQueryMeta new_qmeta;
    error_code =
        reformer_->transform(sparse_count, sparse_indices, sparse_query, qmeta,
                             count, &ovec, &new_qmeta);

    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
      return IndexError_Mismatch;
    }

    if (error_code == 0) {
      error_code = searcher_->search_bf_impl(sparse_count, sparse_indices,
                                             ovec.data(), new_qmeta, count,
                                             context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_bf_impl(sparse_count, sparse_indices, sparse_query,
                                  qmeta, count, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (uint32_t i = 0; i < count; ++i) {
        IndexDocumentList &result = const_cast<IndexDocumentList &>(
            context->searcher_context()->result(i));

        for (auto &it : result) {
          metric_->normalize(it.mutable_score());
        }
      }
    }
  }
  return error_code;
}

int IndexSparseFlow::search_impl(const uint32_t *sparse_count,
                                 const uint32_t *sparse_indices,
                                 const void *sparse_query,
                                 const IndexQueryMeta &qmeta, uint32_t count,
                                 Context::Pointer &context) const {
  if (ailego_unlikely(!count || !context)) {
    return IndexError_InvalidArgument;
  }

  int error_code = 0;
  if (reformer_) {
    std::string ovec;
    IndexQueryMeta new_qmeta;
    error_code =
        reformer_->transform(sparse_count, sparse_indices, sparse_query, qmeta,
                             count, &ovec, &new_qmeta);

    if (ailego_unlikely(!metric_->is_matched(meta_, new_qmeta))) {
      return IndexError_Mismatch;
    }

    if (error_code == 0) {
      error_code =
          searcher_->search_impl(sparse_count, sparse_indices, ovec.data(),
                                 new_qmeta, count, context->searcher_context());
    }
  } else {
    if (ailego_unlikely(!metric_->is_matched(meta_, qmeta))) {
      return IndexError_Mismatch;
    }
    error_code =
        searcher_->search_impl(sparse_count, sparse_indices, sparse_query,
                               qmeta, count, context->searcher_context());
  }

  if (error_code == 0) {
    if (metric_->support_normalize()) {
      for (uint32_t i = 0; i < count; ++i) {
        IndexDocumentList &result = const_cast<IndexDocumentList &>(
            context->searcher_context()->result(i));

        for (auto &it : result) {
          metric_->normalize(it.mutable_score());
        }
      }
    }
  }
  return error_code;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/utility/memory_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_helper.h>

namespace zvec {
namespace core {

int IndexHelper::SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper,
                                   const std::string &key) {
  std::string buffer;
  mt.serialize(&buffer);

  size_t data_size = buffer.size();
  uint32_t data_crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0);
  buffer.resize((data_size + 31u) & ~31u);

  if (dumper->write(buffer.data(), buffer.size()) != buffer.size()) {
    return IndexError_WriteData;
  }
  if (dumper->append(key, data_size, buffer.size() - data_size, data_crc) !=
      0) {
    return IndexError_WriteData;
  }
  return IndexError_Success;
}

int IndexHelper::SerializeToStorage(const IndexMeta &mt, IndexStorage *storage,
                                    const std::string &key) {
  std::string buffer;
  mt.serialize(&buffer);

  auto segment = storage->get(key);
  if (!segment) {
    const size_t align_size = 4096 * 4;
    size_t meta_size =
        (buffer.size() + align_size - 1) / align_size * align_size;

    if (storage->append(key, meta_size) != 0) {
      return IndexError_WriteData;
    }

    segment = storage->get(key);
    if (!segment) {
      return IndexError_NoExist;
    }
  }

  if (segment->write(0, buffer.data(), buffer.size()) != buffer.size()) {
    return IndexError_WriteData;
  }
  segment->resize(buffer.size());
  segment->update_data_crc(
      ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0));
  return IndexError_Success;
}

int IndexHelper::DeserializeFromStorage(IndexStorage *storage,
                                        const std::string &key,
                                        IndexMeta *out) {
  auto segment = storage->get(key);
  if (!segment) {
    return IndexError_NoExist;
  }

  uint32_t crc = segment->data_crc();
  size_t len = segment->data_size();
  const void *data = nullptr;

  if (segment->read(0, &data, len) != len) {
    return IndexError_ReadData;
  }
  if (crc != 0u && ailego::Crc32c::Hash(data, len, 0u) != crc) {
    return IndexError_InvalidChecksum;
  }
  if (!out->deserialize(data, len)) {
    return IndexError_Deserialize;
  }
  return IndexError_Success;
}

/*! Two Pass Index Holder
 */
class TwoPassIndexHolder : public IndexHolder {
 private:
  /*! First Pass Iterator
   * store elements during iterating for second iterating.
   */
  class FirstPassIterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<FirstPassIterator> Pointer;

    //! Constructor
    FirstPassIterator(TwoPassIndexHolder *owner,
                      IndexHolder::Iterator::Pointer &&iter)
        : holder_(owner), front_iter_(std::move(iter)) {}

    //! Destructor
    virtual ~FirstPassIterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return front_iter_->data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.emplace_back(
          front_iter_->key(), std::string((const char *)front_iter_->data(),
                                          holder_->front_->element_size()));
      front_iter_->next();
    }

   private:
    TwoPassIndexHolder *holder_{nullptr};
    IndexHolder::Iterator::Pointer front_iter_{};
  };

  class SecondPassIterator : public IndexHolder::Iterator {
   public:
    //! Second Pass Iterator Pointer
    typedef std::unique_ptr<SecondPassIterator> Pointer;

    //! Constructor
    SecondPassIterator(TwoPassIndexHolder *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~SecondPassIterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.erase(features_iter_++);
    }

   private:
    TwoPassIndexHolder *holder_{nullptr};
    typename std::list<std::pair<uint64_t, std::string>>::iterator
        features_iter_{};
  };

 public:
  //! Constructor
  TwoPassIndexHolder(IndexHolder::Pointer &&front)
      : front_(std::move(front)),
        data_type_(front_->data_type()),
        dimension_(front_->dimension()),
        element_size_(front_->element_size()),
        count_(front_->count()) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return count_;
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return data_type_;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return element_size_;
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return false;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    ++pass_;
    if (pass_ == 1) {
      IndexHolder::Iterator::Pointer iter = front_->create_iterator();
      return iter ? IndexHolder::Iterator::Pointer(
                        new TwoPassIndexHolder::FirstPassIterator(
                            this, std::move(iter)))
                  : IndexHolder::Iterator::Pointer();
    } else if (pass_ == 2) {
      return IndexHolder::Iterator::Pointer(
          new TwoPassIndexHolder::SecondPassIterator(this));
    }
    return nullptr;
  }

 private:
  //! Disable them
  TwoPassIndexHolder(void) = delete;

  //! Members
  IndexHolder::Pointer front_{};
  std::list<std::pair<uint64_t, std::string>> features_{};
  size_t pass_{0};
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};
  size_t dimension_;
  size_t element_size_;
  size_t count_;
};

IndexHolder::Pointer IndexHelper::MakeTwoPassHolder(
    IndexHolder::Pointer holder) {
  if (holder->multipass()) {
    return holder;
  }
  return IndexHolder::Pointer(new TwoPassIndexHolder(std::move(holder)));
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_logger.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <sstream>
#include <thread>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_logger.h>

namespace zvec {
namespace core {

const int IndexLogger::LEVEL_DEBUG = 0;
const int IndexLogger::LEVEL_INFO = 1;
const int IndexLogger::LEVEL_WARN = 2;
const int IndexLogger::LEVEL_ERROR = 3;
const int IndexLogger::LEVEL_FATAL = 4;

/*! Console Logger
 */
struct ConsoleLogger : public IndexLogger {
  //! Initialize Logger
  int init(const zvec::ailego::Params &) override {
    return 0;
  }

  //! Cleanup Logger
  int cleanup(void) override {
    return 0;
  }

  //! Log Message
  void log(int level, const char *file, int line, const char *format,
           va_list args) override {
    char buffer[8192];
    std::ostringstream stream;

    ailego::Realtime::Localtime(buffer, sizeof(buffer));
    stream << '[' << LevelString(level) << ' ' << buffer << ' '
           << std::this_thread::get_id() << ' ' << ailego::File::BaseName(file)
           << ':' << line << "] ";

    vsnprintf(buffer, sizeof(buffer), format, args);
    stream << buffer << '\n';

    if (level <= LEVEL_INFO) {
      std::cout << stream.str() << std::flush;
    } else {
      std::cerr << stream.str() << std::flush;
    }
  }
};

//! Logger Level
int IndexLoggerBroker::logger_level_ = 0;

//! Logger
IndexLogger::Pointer IndexLoggerBroker::logger_(new ConsoleLogger);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/framework/index_mapping.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/io/mmap_file.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_mapping.h>
#include "ailego/utility/memory_helper.h"

#ifdef __linux__
#include <sys/statfs.h>
#include <sys/vfs.h>
#ifndef HUGETLBFS_MAGIC
#define HUGETLBFS_MAGIC 0x958458f6
#endif
#endif

namespace zvec {
namespace core {

static inline size_t CalcPageAlignedSize(size_t size, bool huge_size) {
  size_t page_size = ailego::MemoryHelper::PageSize();
  if (huge_size) {
    page_size = ailego::MemoryHelper::HugePageSize();
  }
  return (size + page_size - 1) / page_size * page_size;
}

static inline bool WritePadding(ailego::File &file, size_t size) {
  std::string padding(ailego::MemoryHelper::PageSize(), 0);
  for (size_t i = 0, count = size / padding.size(); i < count; ++i) {
    if (file.write(padding.data(), padding.size()) != padding.size()) {
      return false;
    }
  }
  padding.resize(size % padding.size());
  if (padding.size()) {
    if (file.write(padding.data(), padding.size()) != padding.size()) {
      return false;
    }
  }
  return true;
}

static inline int UnpackMappingSize(ailego::File &file, size_t *len) {
  IndexFormat::MetaHeader header;
  if (file.read(&header, sizeof(header)) != sizeof(header)) {
    LOG_ERROR("Failed to read file, errno %d, %s", errno, std::strerror(errno));
    return IndexError_ReadData;
  }

  if (header.meta_header_size != sizeof(IndexFormat::MetaHeader) ||
      header.meta_footer_size != sizeof(IndexFormat::MetaFooter)) {
    return IndexError_InvalidValue;
  }

  if (ailego::Crc32c::Hash(&header, sizeof(header), header.header_crc) !=
      header.header_crc) {
    return IndexError_InvalidChecksum;
  }

  if ((int32_t)header.meta_footer_offset < 0) {
    return IndexError_Unsupported;
  }

  *len = header.meta_footer_offset + header.meta_footer_size;
  if (*len > file.size()) {
    return IndexError_InvalidLength;
  }
  return 0;
}

int IndexMapping::open(const std::string &path, bool cow, bool full_mode) {
  path_ = path;
  full_mode_ = full_mode;
  copy_on_write_ = cow;
  huge_page_ = Ishugetlbfs(path);

  bool read_only = copy_on_write_ && !full_mode_;
  if (!file_.open(path.c_str(), read_only, false)) {
    LOG_ERROR("Failed to open file %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_OpenFile;
  }

  size_t mapping_size = 0u;
  int error_code = UnpackMappingSize(file_, &mapping_size);
  if (error_code != 0) {
    file_.close();
    return error_code;
  }

  if (!file_.seek(0, ailego::File::Origin::End)) {
    LOG_ERROR("Failed to seek file %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_SeekFile;
  }
  return this->init_index_mapping(mapping_size);
}

int IndexMapping::create(const std::string &path, size_t seg_meta_capacity) {
  path_ = path;
  seg_meta_capacity_ = seg_meta_capacity;
  current_header_start_offset_ = 0;

  // write() & copying to mmap() will auto extend the file size
  if (!file_.create(path.c_str(), 0)) {
    LOG_ERROR("Failed to create file %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_CreateFile;
  }
  huge_page_ = Ishugetlbfs(path);
  if (huge_page_) {
    return init_hugepage_meta_section();
  }
  return init_meta_section();
}

int IndexMapping::init_meta_section() {
  if (current_header_start_offset_ % ailego::MemoryHelper::PageSize() != 0) {
    LOG_ERROR("File offset %zu is not a multiple of the page size: %zu",
              current_header_start_offset_, ailego::MemoryHelper::PageSize());
    return IndexError_InvalidValue;
  }

  auto &path = path_;
  size_t len =
      CalcPageAlignedSize(seg_meta_capacity_ + sizeof(IndexFormat::MetaHeader) +
                              sizeof(IndexFormat::MetaFooter),
                          false);

  IndexFormat::MetaHeader meta_header;
  IndexFormat::MetaFooter meta_footer;

  // Write index header
  IndexFormat::SetupMetaHeader(&meta_header, len - sizeof(meta_footer), len);
  if (!file_.seek(current_header_start_offset_, ailego::File::Origin::Begin)) {
    LOG_ERROR("Failed to seek file %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_SeekFile;
  }
  if (file_.write(&meta_header, sizeof(meta_header)) != sizeof(meta_header)) {
    LOG_ERROR("Failed to write file: %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_WriteData;
  }

  // Write padding data
  uint32_t segments_meta_size =
      static_cast<uint32_t>(len - (sizeof(meta_header) + sizeof(meta_footer)));
  if (!WritePadding(file_, segments_meta_size)) {
    LOG_ERROR("Failed to write file: %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_WriteData;
  }

  // Write index footer
  IndexFormat::SetupMetaFooter(&meta_footer);
  meta_footer.segments_meta_size = segments_meta_size;
  meta_footer.total_size = len;
  IndexFormat::UpdateMetaFooter(&meta_footer, 0);
  if (file_.write(&meta_footer, sizeof(meta_footer)) != sizeof(meta_footer)) {
    LOG_ERROR("Failed to write file: %s, errno %d, %s", path.c_str(), errno,
              std::strerror(errno));
    return IndexError_WriteData;
  }
  return this->init_index_mapping(len);
}

int IndexMapping::init_hugepage_meta_section() {
  ssize_t file_offset = (ssize_t)current_header_start_offset_;
  if (file_offset % ailego::MemoryHelper::HugePageSize() != 0) {
    LOG_ERROR("File offset %zu is not a multiple of the page size: %zu",
              file_offset, ailego::MemoryHelper::HugePageSize());
    return IndexError_InvalidValue;
  }

  size_t len =
      CalcPageAlignedSize(seg_meta_capacity_ + sizeof(IndexFormat::MetaHeader) +
                              sizeof(IndexFormat::MetaFooter),
                          true);
  int opts = ailego::File::MMAP_SHARED | ailego::File::MMAP_HUGE_PAGE;
  void *addr =
      ailego::File::MemoryMap(file_.native_handle(), file_offset, len, opts);

  IndexFormat::MetaHeader meta_header;
  IndexFormat::MetaFooter meta_footer;

  // Write index header
  IndexFormat::SetupMetaHeader(&meta_header, len - sizeof(meta_footer), len);
  memcpy((char *)addr + file_offset, &meta_header, sizeof(meta_header));
  file_offset += sizeof(meta_header);

  // Write padding data
  uint32_t segments_meta_size =
      static_cast<uint32_t>(len - (sizeof(meta_header) + sizeof(meta_footer)));
  std::string padding(ailego::MemoryHelper::HugePageSize(), 0);
  for (size_t i = 0, count = segments_meta_size / padding.size(); i < count;
       ++i) {
    memcpy((char *)addr + file_offset, padding.data(), padding.size());
    file_offset += padding.size();
  }
  padding.resize(segments_meta_size % padding.size());
  if (padding.size()) {
    memcpy((char *)addr + file_offset, padding.data(), padding.size());
    file_offset += padding.size();
  }

  // Write index footer
  IndexFormat::SetupMetaFooter(&meta_footer);
  meta_footer.segments_meta_size = segments_meta_size;
  meta_footer.total_size = len;
  IndexFormat::UpdateMetaFooter(&meta_footer, 0);
  memcpy((char *)addr + file_offset, &meta_footer, sizeof(meta_footer));
  file_offset += sizeof(meta_footer);

  return this->init_index_mapping(len);
}

void IndexMapping::close(void) {
  // Unmap all memory
  this->unmap_all();
  if (header_) {
    for (auto item : header_addr_map_) {
      auto header = item.second;
      ailego::File::MemoryUnmap(header, header->content_offset);
    }
  }
  // Reset members
  segment_ids_offset_ = 0;
  segment_start_ = nullptr;
  header_ = nullptr;
  header_addr_map_.clear();
  footer_ = nullptr;
  index_size_ = 0u;
  segments_.clear();
  file_.close();
  copy_on_write_ = false;
  full_mode_ = false;
  header_dirty_ = false;
  huge_page_ = false;
}

void IndexMapping::refresh(uint64_t check_point) {
  // support add_with_id
  for (auto item : header_addr_map_) {
    auto header_start_offset = item.first;
    auto header = item.second;
    auto footer = reinterpret_cast<IndexFormat::MetaFooter *>(
        reinterpret_cast<uint8_t *>(header) + header->meta_footer_offset);
    auto segment_start = reinterpret_cast<IndexFormat::SegmentMeta *>(
        reinterpret_cast<uint8_t *>(header) +
        (header->meta_footer_offset - footer->segments_meta_size));
    footer->segments_meta_crc =
        ailego::Crc32c::Hash(segment_start, footer->segments_meta_size, 0);
    IndexFormat::UpdateMetaFooter(footer, check_point);
  }
  header_dirty_ = true;
}

int IndexMapping::append(const std::string &id, size_t size) {
  size = CalcPageAlignedSize(size, huge_page_);
  if (size == 0) {
    return IndexError_InvalidArgument;
  }

  if (segments_.find(id) != segments_.end()) {
    return IndexError_Duplicate;
  }

  size_t id_size = std::strlen(id.c_str()) + 1;
  size_t need_size = sizeof(IndexFormat::SegmentMeta) + id_size;
  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count + need_size >
      segment_ids_offset_) {
    LOG_DEBUG("segment meta section expanded: %s", path_.c_str());
    footer_->next_meta_header_offset = index_size_;
    refresh(0);
    flush();
    // mmap file storage write() will update segment's meta
    // ailego::File::MemoryUnmap(header_, header_->content_offset);
    header_ = nullptr;
    footer_ = nullptr;

    current_header_start_offset_ = index_size_;
    const int ret =
        huge_page_ ? init_hugepage_meta_section() : init_meta_section();
    if (ret != 0) {
      return ret;
    }
  }

  if (!copy_on_write_ && !file_.truncate(index_size_ + size)) {
    LOG_ERROR("Failed to truncate file, errno %d, %s", errno,
              std::strerror(errno));
    return IndexError_TruncateFile;
  }

  // Update segment table
  segment_ids_offset_ -= static_cast<uint32_t>(id_size);
  IndexFormat::SegmentMeta *segment = segment_start_ + footer_->segment_count;
  segment->segment_id_offset = segment_ids_offset_;
  segment->data_index =
      index_size_ - header_->content_offset - current_header_start_offset_;
  segment->data_size = 0;
  segment->data_crc = 0;
  segment->padding_size = size;
  memcpy((uint8_t *)segment_start_ + segment_ids_offset_, id.c_str(), id_size);
  index_size_ += size;

  // Update index footer
  footer_->segments_meta_crc =
      ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0);
  footer_->segment_count += 1;
  footer_->content_size += size;
  footer_->total_size += size;
  IndexFormat::UpdateMetaFooter(footer_, 0);
  segments_.emplace(
      id, SegmentInfo{Segment{segment}, current_header_start_offset_, header_});
  header_dirty_ = true;
  return 0;
}

IndexMapping::Segment *IndexMapping::map(const std::string &id, bool warmup,
                                         bool locked) {
  auto iter = segments_.find(id);
  if (iter == segments_.end()) {
    return nullptr;
  }
  SegmentInfo &segment_info = iter->second;
  Segment *item = &segment_info.segment;
  if (!item->data()) {
    auto meta = item->meta();
    size_t mapping_size = meta->data_size + meta->padding_size;
    size_t offset = segment_info.segment_header_start_offset +
                    segment_info.segment_header->content_offset +
                    meta->data_index;

    void *addr = nullptr;
    if (!copy_on_write_) {
      int opts = ailego::File::MMAP_SHARED;
      if (huge_page_) {
        opts |= ailego::File::MMAP_HUGE_PAGE;
      }
      addr = ailego::File::MemoryMap(file_.native_handle(), offset,
                                     mapping_size, opts);
    } else {
      size_t file_size = file_.size();
      int opts = ailego::File::MMAP_POPULATE;
      if (huge_page_) {
        opts |= ailego::File::MMAP_HUGE_PAGE;
      }
      if (offset < file_size) {
        ailego_assert(offset + mapping_size <= file_size);
        addr = ailego::File::MemoryMap(file_.native_handle(), offset,
                                       mapping_size, opts);
      } else {
        addr = ailego::File::MemoryMap(mapping_size, opts);
      }
    }

    if (!addr) {
      LOG_ERROR("Map segment failed, segment id %s", id.c_str());
      return nullptr;
    }
    item->set_data(addr);

    // Lock memory
    if (locked) {
      ailego::File::MemoryLock(item->data(), mapping_size);
    }
    // Warmup memory
    if (warmup && meta->data_size) {
      ailego::File::MemoryWarmup(item->data(), meta->data_size);
    }
  }
  return item;
}

void IndexMapping::unmap(const std::string &id) {
  auto iter = segments_.find(id);
  if (iter != segments_.end()) {
    SegmentInfo &segment_info = iter->second;
    Segment *item = &segment_info.segment;

    if (item->data()) {
      ailego::File::MemoryUnmap(
          item->data(), item->meta()->data_size + item->meta()->padding_size);
      item->set_data(nullptr);
    }
  }
}

void IndexMapping::unmap_all(void) {
  for (auto iter = segments_.begin(); iter != segments_.end(); ++iter) {
    SegmentInfo &segment_info = iter->second;
    Segment *item = &segment_info.segment;

    if (item->data()) {
      ailego::File::MemoryUnmap(
          item->data(), item->meta()->data_size + item->meta()->padding_size);
      item->set_data(nullptr);
    }
  }
}

int IndexMapping::flush(void) {
  if ((file_.size() < index_size_) && !file_.truncate(index_size_)) {
    LOG_ERROR("Failed to truncate file size %zu, errno %d, %s", index_size_,
              errno, std::strerror(errno));
    return IndexError_TruncateFile;
  }

  for (auto iter = segments_.begin(); iter != segments_.end(); ++iter) {
    SegmentInfo &segment_info = iter->second;
    Segment *item = &segment_info.segment;
    if (!item->data() || !item->dirty()) {
      continue;
    }

    size_t segment_size = item->meta()->data_size + item->meta()->padding_size;
    if (full_mode_ && copy_on_write_) {
      size_t off = segment_info.segment_header_start_offset +
                   segment_info.segment_header->content_offset +
                   item->meta()->data_index;
      if (file_.write(off, item->data(), segment_size) != segment_size) {
        LOG_ERROR("Failed to write segment, size %zu, errno %d, %s",
                  segment_size, errno, std::strerror(errno));
        return IndexError_WriteData;
      }
    } else {
      ailego::File::MemoryFlush(item->data(), segment_size);
    }
    item->reset_dirty();
  }

  if (!header_dirty_) {
    return 0;
  }

  header_dirty_ = false;
  if (full_mode_ && copy_on_write_) {
    for (auto item : header_addr_map_) {
      auto header_start_offset = item.first;
      auto header = item.second;
      if (file_.write(header_start_offset, header, header->content_offset) !=
          header->content_offset) {
        LOG_ERROR("Failed to write segment, size %lu, errno %d, %s",
                  header->content_offset, errno, std::strerror(errno));
        return IndexError_WriteData;
      }
    }
  } else {
    for (auto item : header_addr_map_) {
      auto header = item.second;
      ailego::File::MemoryFlush(header, header->content_offset);
    }
  }
  return 0;
}

int IndexMapping::init_index_mapping(size_t len) {
  int opts =
      copy_on_write_ ? ailego::File::MMAP_POPULATE : ailego::File::MMAP_SHARED;
  if (huge_page_) {
    opts |= ailego::File::MMAP_HUGE_PAGE;
  }
  uint8_t *start = reinterpret_cast<uint8_t *>(ailego::File::MemoryMap(
      file_.native_handle(), current_header_start_offset_, len, opts));
  if (!start) {
    LOG_ERROR("Failed to map file, errno %d, %s", errno, std::strerror(errno));
    return IndexError_MMapFile;
  }

  // Unpack header
  header_ = reinterpret_cast<IndexFormat::MetaHeader *>(start);
  header_addr_map_.insert({current_header_start_offset_, header_});
  if (header_->meta_header_size != sizeof(IndexFormat::MetaHeader)) {
    return IndexError_InvalidLength;
  }
  if (ailego::Crc32c::Hash(header_, sizeof(*header_), header_->header_crc) !=
      header_->header_crc) {
    return IndexError_InvalidChecksum;
  }

  switch (header_->version) {
    case IndexFormat::FORMAT_VERSION:
      break;
    default:
      LOG_ERROR("Unsupported index version: %u", header_->version);
      return IndexError_Unsupported;
  }

  // Unpack footer
  if (header_->meta_footer_size != sizeof(IndexFormat::MetaFooter)) {
    return IndexError_InvalidLength;
  }
  if ((int32_t)header_->meta_footer_offset < 0) {
    return IndexError_Unsupported;
  }
  size_t footer_offset = header_->meta_footer_offset;
  if (footer_offset + header_->meta_footer_size > len) {
    return IndexError_InvalidLength;
  }

  footer_ = reinterpret_cast<IndexFormat::MetaFooter *>(start + footer_offset);
  if (footer_offset < footer_->segments_meta_size) {
    return IndexError_InvalidLength;
  }

  index_size_ = file_.size();
  if ((footer_->total_size > index_size_) ||
      (footer_->content_size + footer_->content_padding_size +
           header_->content_offset >
       index_size_)) {
    return IndexError_InvalidLength;
  }
  if (ailego::Crc32c::Hash(footer_, sizeof(*footer_), footer_->footer_crc) !=
      footer_->footer_crc) {
    return IndexError_InvalidChecksum;
  }

  // Unpack segment table
  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count >
      footer_->segments_meta_size) {
    return IndexError_InvalidLength;
  }

  segment_start_ = reinterpret_cast<IndexFormat::SegmentMeta *>(
      start + (footer_offset - footer_->segments_meta_size));
  if (ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0u) !=
      footer_->segments_meta_crc) {
    LOG_ERROR("Index segments meta checksum is invalid.");
    return IndexError_InvalidChecksum;
  }

  segment_ids_offset_ = footer_->segments_meta_size;
  for (IndexFormat::SegmentMeta *iter = segment_start_,
                                *end = segment_start_ + footer_->segment_count;
       iter != end; ++iter) {
    if (iter->segment_id_offset > footer_->segments_meta_size) {
      return IndexError_InvalidValue;
    }
    if (iter->data_index > footer_->content_size) {
      return IndexError_InvalidValue;
    }
    if (iter->data_index + iter->data_size > footer_->content_size) {
      return IndexError_InvalidLength;
    }

    if (iter->segment_id_offset < segment_ids_offset_) {
      segment_ids_offset_ = iter->segment_id_offset;
    }
    segments_.emplace(
        std::string(reinterpret_cast<const char *>(segment_start_) +
                    iter->segment_id_offset),
        SegmentInfo{Segment{iter}, current_header_start_offset_, header_});
  }
  if (sizeof(IndexFormat::SegmentMeta) * footer_->segment_count >
      segment_ids_offset_) {
    return IndexError_InvalidLength;
  }

  // if (header_->version == IndexFormat::COMPATIBLE_FORMAT_VERSION_0X0002) {
  //   header_->version = IndexFormat::CURRENT_FORMAT_VERSION;
  //   LOG_INFO("Index file format upgraded");
  //   IndexFormat::UpdateMetaHeader(header_);
  //   footer_->segments_meta_crc =
  //       ailego::Crc32c::Hash(segment_start_, footer_->segments_meta_size, 0);
  //   IndexFormat::UpdateMetaFooter(footer_, 0);
  //   header_dirty_ = true;
  // }

  if (footer_->next_meta_header_offset > 0) {
    current_header_start_offset_ = footer_->next_meta_header_offset;
    // Meta sections have all the same size, so we can use the same size to map
    // the next meta section
    return this->init_index_mapping(len);
  }

  return 0;
}

bool IndexMapping::Ishugetlbfs(const std::string &path) const {
#ifdef __linux__
  struct statfs buf;
  if (statfs(path.c_str(), &buf) != 0) {
    perror("statfs");
    return false;
  }
  return static_cast<unsigned long>(buf.f_type) == HUGETLBFS_MAGIC;
#else
  static_cast<void>(path);
  return false;
#endif
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_meta.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/encoding/json.h>
#include <zvec/core/framework/index_meta.h>

namespace zvec {
namespace core {

/*! Index Meta Buffer Format
 */
struct IndexMetaFormatHeader {
  uint32_t header_size;
  uint32_t meta_type;
  uint32_t major_order;
  uint32_t data_type;
  uint32_t dimension;
  uint32_t unit_size;
  uint32_t space_id;
  uint32_t attachment_offset;
  uint32_t attachment_size;
  uint8_t reserved_[4092];
};

static_assert(sizeof(IndexMetaFormatHeader) % 32 == 0,
              "IndexMetaBufferFormat must be aligned with 32 bytes");

void IndexMeta::serialize(std::string *out) const {
  ailego::Params attachment;
  IndexMetaFormatHeader format;
  memset(&format, 0, sizeof(format));
  format.header_size = sizeof(format);
  format.meta_type = static_cast<uint32_t>(meta_type_);
  format.major_order = static_cast<uint32_t>(major_order_);
  format.data_type = static_cast<uint32_t>(data_type_);
  format.dimension = dimension_;
  format.unit_size = unit_size_;
  format.space_id = space_id_;

  if (!metric_name_.empty()) {
    ailego::Params item;
    item.set("name", metric_name_);
    item.set("revision", metric_revision_);
    item.set("params", metric_params_);
    attachment.set("metric", std::move(item));
  }

  if (!converter_name_.empty()) {
    ailego::Params item;
    item.set("name", converter_name_);
    item.set("revision", converter_revision_);
    item.set("params", converter_params_);
    attachment.set("converter", std::move(item));
  }
  if (!reformer_name_.empty()) {
    ailego::Params item;
    item.set("name", reformer_name_);
    item.set("revision", reformer_revision_);
    item.set("params", reformer_params_);
    attachment.set("reformer", std::move(item));
  }
  if (!trainer_name_.empty()) {
    ailego::Params item;
    item.set("name", trainer_name_);
    item.set("revision", trainer_revision_);
    item.set("params", trainer_params_);
    attachment.set("trainer", std::move(item));
  }
  if (!builder_name_.empty()) {
    ailego::Params item;
    item.set("name", builder_name_);
    item.set("revision", builder_revision_);
    item.set("params", builder_params_);
    attachment.set("builder", std::move(item));
  }
  if (!reducer_name_.empty()) {
    ailego::Params item;
    item.set("name", reducer_name_);
    item.set("revision", reducer_revision_);
    item.set("params", reducer_params_);
    attachment.set("reducer", std::move(item));
  }
  if (!searcher_name_.empty()) {
    ailego::Params item;
    item.set("name", searcher_name_);
    item.set("revision", searcher_revision_);
    item.set("params", searcher_params_);
    attachment.set("searcher", std::move(item));
  }
  if (!streamer_name_.empty()) {
    ailego::Params item;
    item.set("name", streamer_name_);
    item.set("revision", streamer_revision_);
    item.set("params", streamer_params_);
    attachment.set("streamer", std::move(item));
  }

  if (!attributes_.empty()) {
    attachment.set("attributes", attributes_);
  }

  out->assign(reinterpret_cast<const char *>(&format), sizeof(format));
  size_t offset = static_cast<uint32_t>(out->size());

  if (!attachment.empty()) {
    std::string buf;
    ailego::Params::SerializeToBuffer(attachment, &buf);
    out->append(buf.data(), buf.size());
    IndexMetaFormatHeader *header = (IndexMetaFormatHeader *)out->data();
    header->attachment_offset = static_cast<uint32_t>(offset);
    header->attachment_size = static_cast<uint32_t>(buf.size());
    offset += buf.size();
  }
}

bool IndexMeta::deserialize(const void *data, size_t len) {
  const IndexMetaFormatHeader *format =
      reinterpret_cast<const IndexMetaFormatHeader *>(data);

  this->clear();
  if (sizeof(IndexMetaFormatHeader) > len) {
    return false;
  }
  if (sizeof(IndexMetaFormatHeader) > format->header_size) {
    return false;
  }

  meta_type_ = static_cast<IndexMeta::MetaType>(format->meta_type);
  major_order_ = static_cast<IndexMeta::MajorOrder>(format->major_order);
  data_type_ = static_cast<IndexMeta::DataType>(format->data_type);
  dimension_ = format->dimension;
  unit_size_ = format->unit_size;
  element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dimension_);
  space_id_ = format->space_id;

  // Read attachment
  ailego::Params attachment;
  if (format->attachment_size) {
    if (format->attachment_offset + format->attachment_size > len) {
      return false;
    }
    std::string str(
        reinterpret_cast<const char *>(data) + format->attachment_offset,
        format->attachment_size);
    if (!ailego::Params::ParseFromBuffer(str, &attachment)) {
      return false;
    }
  }

  ailego::Params item;
  if (attachment.get("metric", &item)) {
    item.get("name", &metric_name_);
    item.get("revision", &metric_revision_);
    item.get("params", &metric_params_);
  }
  if (attachment.get("converter", &item)) {
    item.get("name", &converter_name_);
    item.get("revision", &converter_revision_);
    item.get("params", &converter_params_);
  }
  if (attachment.get("reformer", &item)) {
    item.get("name", &reformer_name_);
    item.get("revision", &reformer_revision_);
    item.get("params", &reformer_params_);
  }
  if (attachment.get("trainer", &item)) {
    item.get("name", &trainer_name_);
    item.get("revision", &trainer_revision_);
    item.get("params", &trainer_params_);
  }
  if (attachment.get("builder", &item)) {
    item.get("name", &builder_name_);
    item.get("revision", &builder_revision_);
    item.get("params", &builder_params_);
  }
  if (attachment.get("reducer", &item)) {
    item.get("name", &reducer_name_);
    item.get("revision", &reducer_revision_);
    item.get("params", &reducer_params_);
  }
  if (attachment.get("searcher", &item)) {
    item.get("name", &searcher_name_);
    item.get("revision", &searcher_revision_);
    item.get("params", &searcher_params_);
  }
  if (attachment.get("streamer", &item)) {
    item.get("name", &streamer_name_);
    item.get("revision", &streamer_revision_);
    item.get("params", &streamer_params_);
  }
  attachment.get("attributes", &attributes_);

  return true;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_plugin.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/utility/dl_helper.h>
#include <zvec/core/framework/index_plugin.h>

namespace zvec {
namespace core {

bool IndexPlugin::load(const std::string &path) {
  if (handle_) {
    return false;
  }
  handle_ = ailego::DLHelper::Load(path, nullptr);
  return (!!handle_);
}

bool IndexPlugin::load(const std::string &path, std::string *err) {
  if (handle_) {
    *err = "plugin loaded";
    return false;
  }
  handle_ = ailego::DLHelper::Load(path, err);
  return !!handle_;
}

void IndexPlugin::unload(void) {
  if (handle_) {
    ailego::DLHelper::Unload(handle_);
    handle_ = nullptr;
  }
}

bool IndexPluginBroker::emplace(IndexPlugin &&plugin) {
  if (!plugin.is_valid()) {
    return false;
  }
  for (auto iter = plugins_.begin(); iter != plugins_.end(); ++iter) {
    if (iter->handle() == plugin.handle()) {
      plugin.unload();
      return true;
    }
  }
  plugins_.push_back(std::move(plugin));
  return true;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/framework/index_version.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/version.i>
#include <zvec/core/framework/index_version.h>


namespace zvec {
namespace core {

static const char AITHETA_VERSION_DETAILS[] =
    AILEGO_VERSION_COMPILE_DETAILS("All rights reserved.\n");

const char *IndexVersion::String(void) {
  return AITHETA_VERSION_DETAILS;
}

const char *IndexVersion::Details(void) {
  return AITHETA_VERSION_DETAILS;
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/interface/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
        NAME core_interface STATIC STRICT ALWAYS_LINK
        SRCS *.cc indexes/*.cc
        INCS . ${PROJECT_ROOT_DIR}/src/ ${PROJECT_ROOT_DIR}/src/core
        LIBS zvec_ailego core_framework sparsehash magic_enum rabitqlib
        VERSION "${PROXIMA_ZVEC_VERSION}"
)


================================================
FILE: src/core/interface/index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <magic_enum/magic_enum.hpp>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/interface/index.h>
#include "mixed_reducer/mixed_reducer_params.h"

namespace zvec::core_interface {

// eliminate the pre-alloc of the context pool
thread_local static std::array<core::IndexContext::Pointer,
                               (magic_enum::enum_count<IndexType>() - 1) * 2>
    _context_list;


bool Index::init_context() {
  context_index_ = (magic_enum::enum_integer(param_.index_type) - 1) * 2 +
                   static_cast<size_t>(is_sparse_);
  if (_context_list[context_index_] == nullptr) {
    if ((_context_list[context_index_] = streamer_->create_context()) ==
        nullptr) {
      LOG_ERROR("Failed to create context");
      return false;
    }
  }
  return true;
}

core::IndexContext::Pointer &Index::acquire_context() {
  init_context();
  return _context_list[context_index_];
}

int Index::ParseMetricName(const BaseIndexParam &param) {
  std::string metric_name;
  if (is_sparse_) {
    // only inner product is supported for sparse index
    switch (param.metric_type) {
      case MetricType::kInnerProduct:
        metric_name = "InnerProductSparse";
        break;
      case MetricType::kMIPSL2sq:
        metric_name = "MipsSquaredEuclideanSparse";
        break;
      default:
        LOG_ERROR("Unsupported metric type");
        return core::IndexError_Runtime;
    }
  } else {
    switch (param.metric_type) {
      case MetricType::kL2sq:
        metric_name = "SquaredEuclidean";
        break;
      case MetricType::kInnerProduct:
        metric_name = "InnerProduct";
        break;
      case MetricType::kCosine:
        metric_name = "Cosine";  // This is already the normalizedCosine
        break;
      case MetricType::kMIPSL2sq:
        metric_name = "MipsSquaredEuclidean";
        break;
      default:
        LOG_ERROR("Unsupported metric type");
        return core::IndexError_Runtime;
    }
  }
  // TODO: MIPS need to set some param
  // for streamer open()
  proxima_index_meta_.set_metric(metric_name, 0, ailego::Params());
  return 0;
}

int Index::CreateAndInitMetric(const BaseIndexParam & /*param*/) {
  auto &metric_name = proxima_index_meta_.metric_name();

  metric_ = core::IndexFactory::CreateMetric(metric_name);
  if (!metric_) {
    LOG_ERROR("Failed to create metric, name %s", metric_name.c_str());
    return core::IndexError_Runtime;
  }
  if (const auto ret = metric_->init(proxima_index_meta_,
                                     proxima_index_meta_.metric_params());
      ret != 0) {
    LOG_ERROR("Failed to create and init metric, name %s, code %d, desc: %s",
              metric_name.c_str(), ret, core::IndexError::What(ret));
    return core::IndexError_Runtime;
  }
  if (metric_->query_metric()) {
    metric_ = metric_->query_metric();
  }

  return core::IndexError_Success;
}

int Index::CreateAndInitConverterReformer(const QuantizerParam &param,
                                          const BaseIndexParam &index_param) {
  ailego::Params converter_params;
  std::string converter_name;
  if (is_sparse_) {
    switch (param.type) {
      case QuantizerType::kNone:
        return core::IndexError_Success;
      case QuantizerType::kFP16:
        converter_name = "HalfFloatSparseConverter";
        break;
      default:
        LOG_ERROR("Unsupported quantizer type: ");
        return core::IndexError_Unsupported;
    }
  } else {
    if (index_param.metric_type == MetricType::kCosine) {
      switch (param.type) {
        case QuantizerType::kNone:
          if (index_param.data_type == DataType::DT_FP16) {
            converter_name = "CosineHalfFloatConverter";
          } else if (index_param.data_type == DataType::DT_FP32) {
            converter_name = "CosineNormalizeConverter";
          } else {
            LOG_ERROR("Unsupported data type: ");
            return core::IndexError_Unsupported;
          }
          break;
        case QuantizerType::kRabitq:
          if (index_param.data_type == DataType::DT_FP32) {
            converter_name = "CosineNormalizeConverter";
          } else {
            LOG_ERROR("Unsupported data type: ");
            return core::IndexError_Unsupported;
          }
          break;
        case QuantizerType::kFP16:
          converter_name = "CosineFp16Converter";
          break;
        case QuantizerType::kInt8:
          converter_name = "CosineInt8Converter";
          break;
        case QuantizerType::kInt4:
          converter_name = "CosineInt4Converter";
          break;
        default:
          LOG_ERROR("Unsupported quantizer type: ");
          return core::IndexError_Unsupported;
      }
    } else {
      switch (param.type) {
        case QuantizerType::kNone:
          return core::IndexError_Success;
        case QuantizerType::kFP16:
          converter_name = "HalfFloatConverter";
          break;
        case QuantizerType::kInt8:
          converter_name = "Int8StreamingConverter";
          break;
        case QuantizerType::kInt4:
          converter_name = "Int4StreamingConverter";
          break;
        case QuantizerType::kRabitq:
          // no converter here
          return 0;
        default:
          LOG_ERROR("Unsupported quantizer type: ");
          return core::IndexError_Unsupported;
      }
    }
  }

  proxima_index_meta_.set_converter(converter_name, 0, converter_params);
  converter_ = core::IndexFactory::CreateConverter(converter_name);
  if (converter_ == nullptr ||
      converter_->init(proxima_index_meta_, converter_params) != 0) {
    LOG_ERROR("Failed to create and init converter");
    return core::IndexError_Runtime;
  }

  proxima_index_meta_ = converter_->meta();
  reformer_ =
      core::IndexFactory::CreateReformer(proxima_index_meta_.reformer_name());
  if (reformer_ == nullptr ||
      reformer_->init(proxima_index_meta_.reformer_params()) != 0) {
    LOG_ERROR("Failed to create and init reformer");
    return core::IndexError_Runtime;
  }
  streamer_vector_meta_.set_meta(proxima_index_meta_.data_type(),
                                 proxima_index_meta_.dimension());
  streamer_vector_meta_.set_meta_type(proxima_index_meta_.meta_type());

  return core::IndexError_Success;
}

int Index::Init(const BaseIndexParam &param) {
  param_ = param;  // will lose the original type info

  is_sparse_ = param.is_sparse;
  is_huge_page_ = param.is_huge_page;

  proxima_index_meta_.set_meta(param.data_type, param.dimension);
  proxima_index_meta_.set_meta_type(is_sparse_ ? IndexMeta::MetaType::MT_SPARSE
                                               : IndexMeta::MetaType::MT_DENSE);

  input_vector_meta_.set_meta(proxima_index_meta_.data_type(),
                              proxima_index_meta_.dimension());
  input_vector_meta_.set_meta_type(proxima_index_meta_.meta_type());
  streamer_vector_meta_ = input_vector_meta_;


  // when quantizer=int8/int4, the converter.init() will change the metric to
  // QuantizedInteger with params
  if (ParseMetricName(param) != 0) {
    LOG_ERROR("Failed to parse metric name");
    return core::IndexError_Runtime;
  }

  if (CreateAndInitConverterReformer(param.quantizer_param, param) != 0) {
    LOG_ERROR("Failed to create and init converter");
    return core::IndexError_Runtime;
  }

  // must after quantizer handled. e.g., cosine doesn't support int8 quantizer
  if (CreateAndInitMetric(param) != 0) {
    LOG_ERROR("Failed to create and init metric");
    return core::IndexError_Runtime;
  }

  if (CreateAndInitStreamer(param) != 0) {
    LOG_ERROR("Failed to create and init streamer");
    return core::IndexError_Runtime;
  }
  return 0;
}


int Index::Open(const std::string &file_path, StorageOptions storage_options) {
  ailego::Params storage_params;
  // storage_params.set("proxima.mmap_file.storage.memory_warmup", true);
  // storage_params.set("proxima.mmap_file.storage.segment_meta_capacity",
  // 1024);
  switch (storage_options.type) {
    case StorageOptions::StorageType::kMMAP: {
      storage_ = core::IndexFactory::CreateStorage("MMapFileStorage");
      if (storage_ == nullptr) {
        LOG_ERROR("Failed to create MMapFileStorage");
        return core::IndexError_Runtime;
      }
      int ret = storage_->init(storage_params);
      if (ret != 0) {
        LOG_ERROR("Failed to init MMapFileStorage, path: %s, err: %s",
                  file_path.c_str(), core::IndexError::What(ret));
        return ret;
      }
      break;
    }
    case StorageOptions::StorageType::kBufferPool: {
      storage_ = core::IndexFactory::CreateStorage("BufferStorage");
      if (storage_ == nullptr) {
        LOG_ERROR("Failed to create BufferStorage");
        return core::IndexError_Runtime;
      }
      int ret = storage_->init(storage_params);
      if (ret != 0) {
        LOG_ERROR("Failed to init BufferStorage, path: %s, err: %s",
                  file_path.c_str(), core::IndexError::What(ret));
        return ret;
      }
      break;
    }
    default:
      LOG_ERROR("Unsupported storage type");
      return core::IndexError_Unsupported;
  }

  // read_options.create_new
  int ret = storage_->open(file_path, storage_options.create_new);
  if (ret != 0) {
    LOG_ERROR("Failed to open storage, path: %s, err: %s", file_path.c_str(),
              core::IndexError::What(ret));
    return core::IndexError_Runtime;
  }
  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
    LOG_ERROR("Failed to open streamer, path: %s", file_path.c_str());
    return core::IndexError_Runtime;
  }

  // converter/reformer/metric are created in IndexFactory::CreateIndex
  // TODO: init

  // TODO: context pool
  if (!init_context()) {  // to validate if any error, will be overwritten
    LOG_ERROR("Failed to init context");
    return core::IndexError_Runtime;
  }

  is_open_ = true;
  is_read_only_ = storage_options.read_only;
  return 0;
}

int Index::Close() {
  if (!is_open_) {
    LOG_ERROR("Index is not open");
    return core::IndexError_Runtime;
  }

  if (!is_read_only_) {
    if (ailego_unlikely(Flush() != 0)) {
      LOG_ERROR("Failed to cleanup streamer");
      return core::IndexError_Runtime;
    }
  }
  if (ailego_unlikely(streamer_->cleanup() != 0)) {
    LOG_ERROR("Failed to cleanup streamer");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(storage_->close() != 0)) {
    LOG_ERROR("Failed to close storage");
    return core::IndexError_Runtime;
  }
  is_open_ = false;
  return 0;
}

int Index::Flush() {
  if (!is_open_) {
    LOG_ERROR("Index is not open");
    return core::IndexError_Runtime;
  }

  if (is_read_only_) {
    LOG_ERROR("Cannot flush read-only index");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(streamer_->flush(0) != 0)) {
    LOG_ERROR("Failed to flush streamer");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(storage_->flush() != 0)) {
    LOG_ERROR("Failed to flush storage");
    return core::IndexError_Runtime;
  }
  return 0;
}

int Index::Fetch(const uint32_t doc_id, VectorDataBuffer *vector_data_buffer) {
  if (!is_open_) {
    LOG_ERROR("Index is not open");
    return core::IndexError_Runtime;
  }
  if (is_sparse_) {
    return _sparse_fetch(doc_id, vector_data_buffer);
  }
  return _dense_fetch(doc_id, vector_data_buffer);
}

int Index::Add(const VectorData &vector_data, const uint32_t doc_id) {
  if (!is_open_) {
    LOG_ERROR("Index is not open");
    return core::IndexError_Runtime;
  }

  if (is_read_only_) {
    LOG_ERROR("Cannot add to read-only index");
    return core::IndexError_Runtime;
  }

  auto &context = acquire_context();
  if (!context) {
    LOG_ERROR("Failed to acquire context");
    return core::IndexError_Runtime;
  }

  int ret = 0;
  if (is_sparse_) {
    ret = _sparse_add(vector_data, doc_id, context);
  } else {
    ret = _dense_add(vector_data, doc_id, context);
  }
  context->reset();
  return ret;
}


int Index::Search(const VectorData &vector_data,
                  const BaseIndexQueryParam::Pointer &search_param,
                  SearchResult *result) {
  if (!is_open_) {
    LOG_ERROR("Index is not open");
    return core::IndexError_Runtime;
  }

  if (!is_trained_ && this->Train() != 0) {
    LOG_ERROR("Failed to train index");
    return core::IndexError_Runtime;
  }

  auto &context = acquire_context();
  if (!context) {
    LOG_ERROR("Failed to acquire context");
    return core::IndexError_Runtime;
  }

  if (_prepare_for_search(vector_data, search_param, context) != 0) {
    LOG_ERROR("Failed to prepare for search");
    context->reset();
    return core::IndexError_Runtime;
  }

  if (is_sparse_) {
    int ret = _sparse_search(vector_data, search_param, result, context);
    context->reset();
    return ret;
  }

  // dense support refiner, but sparse doesn't
  int ret = 0;
  if (search_param->refiner_param == nullptr) {
    ret = _dense_search(vector_data, search_param, result, context);
    context->reset();
  } else {
    auto &reference_index = search_param->refiner_param->reference_index;
    if (reference_index == nullptr) {
      LOG_ERROR("Reference index is not set");
      context->reset();
      return core::IndexError_Runtime;
    }
    // TODO: tackle query_param's type info loss to loosen the constraint
    if (reference_index->param_.index_type != IndexType::kFlat) {
      LOG_ERROR("Reference index is not flat");
      context->reset();
      return core::IndexError_Runtime;
    }

    context->set_topk(_get_coarse_search_topk(search_param));
    context->set_fetch_vector(false);  // no need to fetch vector
    if (_dense_search(vector_data, search_param, result, context) != 0) {
      LOG_ERROR("Failed to search");
      context->reset();
      return core::IndexError_Runtime;
    }

    auto &base_result = context->result();
    std::vector<uint64_t> keys(base_result.size());
    for (size_t i = 0; i < base_result.size(); ++i) {
      keys[i] = base_result[i].key();
    }

    FlatQueryParam::Pointer flat_search_param =
        std::make_shared<FlatQueryParam>();
    flat_search_param->topk = search_param->topk;
    flat_search_param->fetch_vector = search_param->fetch_vector;
    flat_search_param->filter = search_param->filter;
    // TODO: should copy other params?
    flat_search_param->bf_pks = std::make_shared<std::vector<uint64_t>>(keys);

    ret = reference_index->Search(vector_data, flat_search_param, result);
  }
  context->reset();
  return ret;
}


int Index::_dense_fetch(const uint32_t doc_id,
                        VectorDataBuffer *vector_data_buffer) {
  core::IndexStorage::MemoryBlock vector_block;
  int ret = streamer_->get_vector_by_id(doc_id, vector_block);
  if (ret != 0) {
    LOG_ERROR("Failed to fetch vector, doc_id: %u", doc_id);
    return core::IndexError_Runtime;
  }
  const void *vector = vector_block.data();

  DenseVectorBuffer dense_vector_buffer;
  std::string &out_vector_buffer = dense_vector_buffer.data;
  // for int4, unit_size * dim != element_size
  out_vector_buffer.resize(input_vector_meta_.element_size());

  if (reformer_ != nullptr) {
    if (reformer_->revert(vector, streamer_vector_meta_, &out_vector_buffer) !=
        0) {
      LOG_ERROR("Failed to convert vector");
      return core::IndexError_Runtime;
    }
  } else {
    out_vector_buffer = std::string(
        static_cast<const char *>(vector),
        input_vector_meta_.dimension() * input_vector_meta_.unit_size());
  }
  vector_data_buffer->vector_buffer = std::move(dense_vector_buffer);
  return 0;
}


int Index::_sparse_fetch(const uint32_t doc_id,
                         VectorDataBuffer *vector_data_buffer) {
  SparseVectorBuffer sparse_vector_buffer;

  if (0 != streamer_->get_sparse_vector_by_id(
               doc_id, &sparse_vector_buffer.count,
               &sparse_vector_buffer.indices, &sparse_vector_buffer.values)) {
    LOG_ERROR("Failed to fetch vector");
    return core::IndexError_Runtime;
  }

  if (reformer_ != nullptr) {
    std::string reverted_sparse_values_buffer;
    if (reformer_->revert(
            sparse_vector_buffer.count, sparse_vector_buffer.get_indices(),
            sparse_vector_buffer.get_values(), streamer_vector_meta_,
            &reverted_sparse_values_buffer) != 0) {
      LOG_ERROR("Failed to convert vector");
      return core::IndexError_Runtime;
    }
    sparse_vector_buffer.values = std::move(reverted_sparse_values_buffer);
  }
  vector_data_buffer->vector_buffer = std::move(sparse_vector_buffer);
  return 0;
}

int Index::_dense_add(const VectorData &vector_data, const uint32_t doc_id,
                      core::IndexContext::Pointer &context) {
  if (!std::holds_alternative<DenseVector>(vector_data.vector)) {
    LOG_ERROR("Invalid vector data");
    return core::IndexError_Runtime;
  }
  const DenseVector &dense_vector = std::get<DenseVector>(vector_data.vector);
  if (reformer_ != nullptr) {
    core::IndexQueryMeta new_meta;
    std::string new_vector;
    int ret;
    ret = reformer_->convert(dense_vector.data, input_vector_meta_, &new_vector,
                             &new_meta);
    if (ret != 0) {
      LOG_ERROR("Failed to convert vector");
      return core::IndexError_Runtime;
    }
    ret = streamer_->add_with_id_impl(doc_id, new_vector.data(), new_meta,
                                      context);
    if (ret != 0) {
      LOG_ERROR("Failed to add vector");
      return core::IndexError_Runtime;
    }
  } else {
    int ret = streamer_->add_with_id_impl(doc_id, dense_vector.data,
                                          input_vector_meta_, context);
    if (ret != 0) {
      LOG_ERROR("Failed to add vector");
      return core::IndexError_Runtime;
    }
  }
  return 0;
}


int Index::_sparse_add(const VectorData &vector_data, const uint32_t doc_id,
                       core::IndexContext::Pointer &context) {
  if (!std::holds_alternative<SparseVector>(vector_data.vector)) {
    LOG_ERROR("Invalid vector data");
    return core::IndexError_Runtime;
  }
  const SparseVector &sparse_vector =
      std::get<SparseVector>(vector_data.vector);

  if (reformer_ != nullptr) {
    std::string converted_sparse_values_buffer;
    core::IndexQueryMeta new_meta;
    int ret;
    ret = reformer_->convert(sparse_vector.count, sparse_vector.get_indices(),
                             sparse_vector.get_values(), input_vector_meta_,
                             &converted_sparse_values_buffer, &new_meta);
    if (ret != 0) {
      LOG_ERROR("Failed to convert vector");
      return core::IndexError_Runtime;
    }
    ret = streamer_->add_with_id_impl(
        doc_id, sparse_vector.count, sparse_vector.get_indices(),
        converted_sparse_values_buffer.data(), new_meta, context);
    if (ret != 0) {
      LOG_ERROR("Failed to add vector");
      return core::IndexError_Runtime;
    }
  } else {
    int ret = streamer_->add_with_id_impl(
        doc_id, sparse_vector.count, sparse_vector.get_indices(),
        sparse_vector.get_values(), input_vector_meta_, context);
    if (ret != 0) {
      LOG_ERROR("Failed to add vector");
      return core::IndexError_Runtime;
    }
  }
  return 0;
}


int Index::_dense_search(const VectorData &vector_data,
                         const BaseIndexQueryParam::Pointer &search_param,
                         SearchResult *result,
                         core::IndexContext::Pointer &context) {
  if (!std::holds_alternative<DenseVector>(vector_data.vector)) {
    LOG_ERROR("Invalid vector data");
    return core::IndexError_Runtime;
  }
  const DenseVector &dense_vector = std::get<DenseVector>(vector_data.vector);
  auto vector = dense_vector.data;
  // Check if need to transform feature
  std::string new_vector;
  core::IndexQueryMeta new_meta = input_vector_meta_;
  if (reformer_ != nullptr) {
    if (reformer_->transform(dense_vector.data, input_vector_meta_, &new_vector,
                             &new_meta) != 0) {
      LOG_ERROR("Failed to transform vector");
      return core::IndexError_Runtime;
    }
    vector = new_vector.data();
  }
  // TODO: group by
  if (search_param->bf_pks != nullptr) {
    // should we eliminate the copy of bf_pks?
    if (streamer_->search_bf_by_p_keys_impl(
            vector, std::vector<std::vector<uint64_t>>{*search_param->bf_pks},
            new_meta, 1, context) != 0) {
      LOG_ERROR("Failed to search_bf_by_p_keys_impl vector");
      return core::IndexError_Runtime;
    }
  } else if (search_param->is_linear) {
    if (streamer_->search_bf_impl(vector, new_meta, 1, context) != 0) {
      LOG_ERROR("Failed to search vector");
      return core::IndexError_Runtime;
    }
  } else {
    if (streamer_->search_impl(vector, new_meta, 1, context) != 0) {
      LOG_ERROR("Failed to search vector");
      return core::IndexError_Runtime;
    }
  }
  result->doc_list_ = std::move(context->result());

  if (metric_->support_normalize()) {
    for (uint32_t i = 0; i < result->doc_list_.size(); ++i) {
      metric_->normalize(result->doc_list_[i].mutable_score());
    }
  }
  if (reformer_) {
    if (reformer_->normalize(dense_vector.data, input_vector_meta_,
                             result->doc_list_) != 0) {
      LOG_ERROR("Failed to normalize vector");
      return core::IndexError_Runtime;
    }
    if (context->fetch_vector() && reformer_->need_revert()) {
      // TODO: use std::pmr to optimize memory allocation
      result->reverted_vector_list_.resize(context->result().size());
      for (uint32_t i = 0; i < context->result().size(); ++i) {
        std::string &reverted_vector = result->reverted_vector_list_[i];
        reverted_vector.resize(input_vector_meta_.dimension() *
                               input_vector_meta_.unit_size());
        if (reformer_->revert(context->result()[i].vector(), new_meta,
                              &reverted_vector) != 0) {
          LOG_ERROR("Failed to revert vector");
          return core::IndexError_Runtime;
        }
      }
    }
  }

  return 0;
}


int Index::_sparse_search(const VectorData &vector_data,
                          const BaseIndexQueryParam::Pointer &search_param,
                          SearchResult *result,
                          core::IndexContext::Pointer &context) {
  if (!std::holds_alternative<SparseVector>(vector_data.vector)) {
    LOG_ERROR("Invalid vector data");
    return core::IndexError_Runtime;
  }
  const SparseVector &sparse_vector =
      std::get<SparseVector>(vector_data.vector);
  auto indices = sparse_vector.get_indices();
  auto values = sparse_vector.get_values();

  std::string converted_sparse_values_buffer;
  core::IndexQueryMeta new_meta = input_vector_meta_;
  if (reformer_ != nullptr) {
    if (reformer_->transform(sparse_vector.count, indices, values,
                             input_vector_meta_,
                             &converted_sparse_values_buffer, &new_meta) != 0) {
      LOG_ERROR("Failed to transform vector");
      return core::IndexError_Runtime;
    }
    values = converted_sparse_values_buffer.data();
  }

  if (search_param->bf_pks != nullptr) {
    if (streamer_->search_bf_by_p_keys_impl(
            sparse_vector.count, indices, values,
            std::vector<std::vector<uint64_t>>{*search_param->bf_pks}, new_meta,
            context) != 0) {
      LOG_ERROR("Failed to search_bf_by_p_keys_impl vector");
      return core::IndexError_Runtime;
    }
  } else if (search_param->is_linear) {
    if (streamer_->search_bf_impl(sparse_vector.count, indices, values,
                                  new_meta, context) != 0) {
      LOG_ERROR("Failed to search vector");
      return core::IndexError_Runtime;
    }
  } else {
    if (streamer_->search_impl(sparse_vector.count, indices, values, new_meta,
                               context) != 0) {
      LOG_ERROR("Failed to search vector");
      return core::IndexError_Runtime;
    }
  }
  result->doc_list_ = std::move(context->result());

  if (metric_->support_normalize()) {
    for (uint32_t i = 0; i < result->doc_list_.size(); ++i) {
      metric_->normalize(result->doc_list_[i].mutable_score());
    }
  }
  if (reformer_) {
    // TODO: no need to call reformer_->normalize() when sparse?
    if (context->fetch_vector() && reformer_->need_revert()) {
      // TODO: use std::pmr to optimize memory allocation
      auto &result_doc_list = context->result();
      result->reverted_sparse_values_list_.resize(result_doc_list.size());
      for (uint32_t i = 0; i < result_doc_list.size(); ++i) {
        auto &result_doc = result_doc_list[i].sparse_doc();
        std::string &reverted_sparse_values =
            result->reverted_sparse_values_list_[i];
        reverted_sparse_values.resize(result_doc.sparse_count() *
                                      input_vector_meta_.unit_size());
        if (reformer_->revert(result_doc.sparse_count(),
                              reinterpret_cast<const uint32_t *>(
                                  result_doc.sparse_indices().data()),
                              reinterpret_cast<const void *>(
                                  result_doc.sparse_values().data()),
                              new_meta, &reverted_sparse_values) != 0) {
          LOG_ERROR("Failed to revert sparse vector");
          return core::IndexError_Runtime;
        }
      }
    }
  }
  return 0;
}


int Index::Merge(const std::vector<Index::Pointer> &indexes,
                 const IndexFilter &filter, const MergeOptions &options) {
  if (indexes.empty()) {
    return core::IndexError_Success;
  }
  // ivf need builder
  auto reducer =
      core::IndexFactory::CreateStreamerReducer("MixedStreamerReducer");
  if (reducer == nullptr) {
    LOG_ERROR("Failed to create reducer");
    return core::IndexError_Runtime;
  }

  if (options.write_concurrency == 0) {
    LOG_ERROR("Write concurrency must be greater than 0");
    return core::IndexError_InvalidArgument;
  }
  // must declare here to ensure its lifespan can cover reducer->reduce()
  std::unique_ptr<ailego::ThreadPool> local_thread_pool = nullptr;
  if (options.pool != nullptr) {
    reducer->set_thread_pool(options.pool);
  } else {
    local_thread_pool =
        std::make_unique<ailego::ThreadPool>(options.write_concurrency);
    reducer->set_thread_pool(local_thread_pool.get());
  }

  ailego::Params reducer_params;
  reducer_params.set(core::PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE,
                     true);
  reducer_params.set(core::PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS,
                     options.write_concurrency);
  if (reducer->init(reducer_params) != 0) {
    LOG_ERROR("Failed to init reducer");
    return core::IndexError_Runtime;
  }
  if (reducer->set_target_streamer_wiht_info(builder_, streamer_, converter_,
                                             reformer_,
                                             input_vector_meta_) != 0) {
    LOG_ERROR("Failed to set target streamer");
    return core::IndexError_Runtime;
  }

  for (const auto &index : indexes) {
    if (reducer->feed_streamer_with_reformer(index->streamer_,
                                             index->reformer_) != 0) {
      LOG_ERROR("Failed to feed streamer");
      return core::IndexError_Runtime;
    }
  }
  if (reducer->reduce(filter) != 0) {
    LOG_ERROR("Failed to reduce");
    return core::IndexError_Runtime;
  }
  is_trained_ = true;
  return 0;
}

int Index::_get_coarse_search_topk(
    const BaseIndexQueryParam::Pointer &search_param) {
  float scale_factor = search_param->refiner_param->scale_factor_;
  if (scale_factor == 0) {
    scale_factor = 1;
  }
  return floor(search_param->topk * scale_factor);
}

std::string Index::get_metric_name(MetricType metric_type, bool is_sparse) {
  if (is_sparse) {
    switch (metric_type) {
      case MetricType::kInnerProduct:
        return "InnerProductSparse";
      case MetricType::kMIPSL2sq:
        return "MipsSquaredEuclideanSparse";
      default:
        return "";
    }
  } else {
    switch (metric_type) {
      case MetricType::kL2sq:
        return "SquaredEuclidean";
      case MetricType::kInnerProduct:
        return "InnerProduct";
      case MetricType::kCosine:
        return "Cosine";
      case MetricType::kMIPSL2sq:
        return "MipsSquaredEuclidean";
      default:
        return "";
    }
  }
}

}  // namespace zvec::core_interface


================================================
FILE: src/core/interface/index_factory.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/interface/index_factory.h>
#include <zvec/core/interface/index_param.h>
#include "core/interface/utils/utils.h"

namespace zvec::core_interface {


Index::Pointer IndexFactory::CreateAndInitIndex(const BaseIndexParam &param) {
  Index::Pointer ptr = nullptr;
  // if (param.index_type == IndexType::kIVF) {
  //   const IVFIndexParam *_param = dynamic_cast<const IVFIndexParam
  //   *>(&param); ptr = std::make_shared<IVFIndex>(param);

  //   if (_param->l1Index) {
  //     // TODO: create l1 index
  //   }
  //   if (_param->l2Index) {
  //     // TODO: create l2 index
  //   }
  // }
  // if (param.index_type == IndexType::kHNSW) {
  //   ptr = std::make_shared<HNSWIndex>(param);
  // }
  if (param.index_type == IndexType::kFlat) {
    // ptr = std::make_shared<FlatIndex>(param);
    ptr = std::make_shared<FlatIndex>();
  } else if (param.index_type == IndexType::kHNSW) {
    ptr = std::make_shared<HNSWIndex>();
  } else if (param.index_type == IndexType::kIVF) {
    ptr = std::make_shared<IVFIndex>();
  } else if (param.index_type == IndexType::kHNSWRabitq) {
    ptr = std::make_shared<HNSWRabitqIndex>();
  } else {
    LOG_ERROR("Unsupported index type: ");
    return nullptr;
  }

  if (!ptr) {
    LOG_ERROR("Failed to create index");
    return nullptr;
  }
  if (0 != ptr->Init(param)) {
    LOG_ERROR("Failed to init index");
    return nullptr;
  }
  return ptr;
}

BaseIndexParam::Pointer IndexFactory::DeserializeIndexParamFromJson(
    const std::string &json_str) {
  ailego::JsonValue json_value;
  if (!json_value.parse(json_str)) {
    LOG_ERROR("Failed to parse json string: %s", json_str.c_str());
    return nullptr;
  }
  ailego::JsonObject json_obj = json_value.as_object();
  ailego::JsonValue tmp_json_value;

  IndexType index_type;

  if (!extract_enum_from_json<IndexType>(json_obj, "index_type", index_type,
                                         tmp_json_value)) {
    LOG_ERROR("Failed to deserialize index type");
    return nullptr;
  }

  switch (index_type) {
    case IndexType::kFlat: {
      FlatIndexParam::Pointer param = std::make_shared<FlatIndexParam>();
      if (!param->DeserializeFromJson(json_str)) {
        LOG_ERROR("Failed to deserialize flat index param");
        return nullptr;
      }
      return param;
    }
    case IndexType::kHNSW: {
      HNSWIndexParam::Pointer param = std::make_shared<HNSWIndexParam>();
      if (!param->DeserializeFromJson(json_str)) {
        LOG_ERROR("Failed to deserialize hnsw index param");
        return nullptr;
      }
      return param;
    }
    case IndexType::kIVF: {
      IVFIndexParam::Pointer param = std::make_shared<IVFIndexParam>();
      if (!param->DeserializeFromJson(json_str)) {
        LOG_ERROR("Failed to deserialize hnsw index param");
        return nullptr;
      }
      return param;
    }
    case IndexType::kHNSWRabitq: {
      HNSWRabitqIndexParam::Pointer param =
          std::make_shared<HNSWRabitqIndexParam>();
      if (!param->DeserializeFromJson(json_str)) {
        LOG_ERROR("Failed to deserialize hnsqrabitq index param");
        return nullptr;
      }
      return param;
    }
    default:
      LOG_ERROR("Unsupported index type: %s",
                magic_enum::enum_name(index_type).data());
      return nullptr;
  }
}

template <typename QueryParamType,
          std::enable_if_t<
              std::is_base_of_v<BaseIndexQueryParam, QueryParamType>, bool> >
std::string IndexFactory::QueryParamSerializeToJson(const QueryParamType &param,
                                                    bool omit_empty_value) {
  ailego::JsonObject json_obj;

  // BaseIndexQueryParam
  // omit filter & bf_pks
  if (!omit_empty_value || param.topk != 0) {
    json_obj.set("topk", ailego::JsonValue(param.topk));
  }
  if (!omit_empty_value || param.fetch_vector) {
    json_obj.set("fetch_vector", ailego::JsonValue(param.fetch_vector));
  }
  if (!omit_empty_value || param.radius != 0.0f) {
    json_obj.set("radius", ailego::JsonValue(param.radius));
  }
  if (!omit_empty_value || param.is_linear) {
    json_obj.set("is_linear", ailego::JsonValue(param.is_linear));
  }

  IndexType index_type{IndexType::kNone};
  if constexpr (std::is_same_v<QueryParamType, FlatQueryParam>) {
    // index_type
    index_type = IndexType::kFlat;
  } else if constexpr (std::is_same_v<QueryParamType, HNSWQueryParam>) {
    if (!omit_empty_value || param.ef_search != 0) {
      json_obj.set("ef_search", ailego::JsonValue(param.ef_search));
    }
    index_type = IndexType::kHNSW;
  } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {
    if (!omit_empty_value || param.nprobe != 0) {
      json_obj.set("nprobe", ailego::JsonValue(param.nprobe));
    }
    index_type = IndexType::kIVF;
    // json_obj.set("l1QueryParam",
    // ailego::JsonValue(QueryParamSerializeToJson(param.l1QueryParam)));
    // json_obj.set("l2QueryParam",
    // ailego::JsonValue(QueryParamSerializeToJson(param.l2QueryParam)));
  } else if constexpr (std::is_same_v<QueryParamType, HNSWRabitqQueryParam>) {
    if (!omit_empty_value || param.ef_search != 0) {
      json_obj.set("ef_search", ailego::JsonValue(param.ef_search));
    }
    index_type = IndexType::kHNSWRabitq;
  }

  json_obj.set("index_type",
               ailego::JsonValue(magic_enum::enum_name(index_type).data()));

  return ailego::JsonValue(json_obj).as_json_string().as_stl_string();
}

template std::string
IndexFactory::QueryParamSerializeToJson<BaseIndexQueryParam>(
    const BaseIndexQueryParam &param, bool omit_empty_value);
template std::string IndexFactory::QueryParamSerializeToJson<FlatQueryParam>(
    const FlatQueryParam &param, bool omit_empty_value);
template std::string IndexFactory::QueryParamSerializeToJson<HNSWQueryParam>(
    const HNSWQueryParam &param, bool omit_empty_value);
template std::string IndexFactory::QueryParamSerializeToJson<IVFQueryParam>(
    const IVFQueryParam &param, bool omit_empty_value);

template <typename QueryParamType,
          std::enable_if_t<
              std::is_base_of_v<BaseIndexQueryParam, QueryParamType>, bool> >
typename QueryParamType::Pointer IndexFactory::QueryParamDeserializeFromJson(
    const std::string &json_str) {
  ailego::JsonValue tmp_json_value;
  if (!tmp_json_value.parse(json_str)) {
    LOG_ERROR("Failed to parse json string: %s", json_str.c_str());
    return nullptr;
  }
  ailego::JsonObject json_obj = tmp_json_value.as_object();

  auto parse_common_fields = [&](auto &param) -> bool {
    if (!extract_value_from_json(json_obj, "topk", param->topk,
                                 tmp_json_value)) {
      LOG_ERROR("Failed to deserialize topk");
      return false;
    }

    if (!extract_value_from_json(json_obj, "fetch_vector", param->fetch_vector,
                                 tmp_json_value)) {
      LOG_ERROR("Failed to deserialize fetch_vector");
      return false;
    }

    if (!extract_value_from_json(json_obj, "radius", param->radius,
                                 tmp_json_value)) {
      LOG_ERROR("Failed to deserialize radius");
      return false;
    }

    if (!extract_value_from_json(json_obj, "is_linear", param->is_linear,
                                 tmp_json_value)) {
      LOG_ERROR("Failed to deserialize is_linear");
      return false;
    }
    return true;
  };

  IndexType index_type;

  if (!extract_enum_from_json<IndexType>(json_obj, "index_type", index_type,
                                         tmp_json_value)) {
    LOG_ERROR("Failed to deserialize index type");
    return nullptr;
  }

  if constexpr (std::is_same_v<QueryParamType, BaseIndexQueryParam>) {
    if (index_type == IndexType::kFlat) {
      auto param = std::make_shared<FlatQueryParam>();
      if (!parse_common_fields(param)) {
        return nullptr;
      }
      return param;
    } else if (index_type == IndexType::kHNSW) {
      auto param = std::make_shared<HNSWQueryParam>();
      if (!parse_common_fields(param)) {
        return nullptr;
      }
      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize ef_search");
        return nullptr;
      }
      return param;
    } else if (index_type == IndexType::kIVF) {
      auto param = std::make_shared<IVFQueryParam>();
      if (!parse_common_fields(param)) {
        return nullptr;
      }
      if (!extract_value_from_json(json_obj, "nprobe", param->nprobe,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize nprobe");
        return nullptr;
      }
      return param;
    } else if (index_type == IndexType::kHNSWRabitq) {
      auto param = std::make_shared<HNSWRabitqQueryParam>();
      if (!parse_common_fields(param)) {
        return nullptr;
      }
      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize ef_search");
        return nullptr;
      }
      return param;
    } else {
      LOG_ERROR("Unsupported index type: %s",
                magic_enum::enum_name(index_type).data());
      return nullptr;
    }
  } else {
    auto param = std::make_shared<QueryParamType>();
    if (!parse_common_fields(param)) {
      return nullptr;
    }
    if constexpr (std::is_same_v<QueryParamType, FlatQueryParam>) {
    } else if constexpr (std::is_same_v<QueryParamType, HNSWQueryParam>) {
      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize ef_search");
        return nullptr;
      }
    } else if constexpr (std::is_same_v<QueryParamType, IVFQueryParam>) {
      if (!extract_value_from_json(json_obj, "nprobe", param->nprobe,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize nprobe");
        return nullptr;
      }
    } else if constexpr (std::is_same_v<QueryParamType, HNSWRabitqQueryParam>) {
      if (!extract_value_from_json(json_obj, "ef_search", param->ef_search,
                                   tmp_json_value)) {
        LOG_ERROR("Failed to deserialize ef_search");
        return nullptr;
      }
    } else {
      LOG_ERROR("Unsupported index type: %s",
                magic_enum::enum_name(index_type).data());
      return nullptr;
    }
    return param;
  }
}

template BaseIndexQueryParam::Pointer
IndexFactory::QueryParamDeserializeFromJson<BaseIndexQueryParam>(
    const std::string &json_str);
template FlatQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
    FlatQueryParam>(const std::string &json_str);
template HNSWQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
    HNSWQueryParam>(const std::string &json_str);
template IVFQueryParam::Pointer IndexFactory::QueryParamDeserializeFromJson<
    IVFQueryParam>(const std::string &json_str);

}  // namespace zvec::core_interface


================================================
FILE: src/core/interface/index_param.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/ailego/logger/logger.h>
#include <zvec/core/interface/index_param.h>
#include "core/interface/utils/utils.h"

namespace zvec {
namespace core_interface {
ailego::JsonObject BaseIndexParam::SerializeToJsonObject(
    bool omit_empty_value) const {
  ailego::JsonObject json_obj;

  if (!omit_empty_value || index_type != IndexType::kNone) {
    json_obj.set("index_type",
                 ailego::JsonValue(magic_enum::enum_name(index_type).data()));
  }
  if (!omit_empty_value || metric_type != MetricType::kNone) {
    json_obj.set("metric_type",
                 ailego::JsonValue(magic_enum::enum_name(metric_type).data()));
  }
  if (!omit_empty_value || dimension != 0) {
    json_obj.set("dimension", ailego::JsonValue(dimension));
  }
  if (!omit_empty_value || version != 0) {
    json_obj.set("version", ailego::JsonValue(version));
  }
  if (!omit_empty_value || is_sparse) {
    json_obj.set("is_sparse", ailego::JsonValue(is_sparse));
  }
  if (!omit_empty_value || data_type != DataType::DT_UNDEFINED) {
    json_obj.set("data_type",
                 ailego::JsonValue(magic_enum::enum_name(data_type).data()));
  }
  if (!omit_empty_value || use_id_map) {
    json_obj.set("use_id_map", ailego::JsonValue(use_id_map));
  }
  if (!omit_empty_value || is_huge_page) {
    json_obj.set("is_huge_page", ailego::JsonValue(is_huge_page));
  }

  // if (preprocess_param) {
  //   json.set("preprocess_param", preprocess_param->SerializeToJson());
  // }
  if (!omit_empty_value || quantizer_param.type != QuantizerType::kNone) {
    json_obj.set("quantizer_param",
                 quantizer_param.SerializeToJsonObject(omit_empty_value));
  }
  // if (refiner_param) {
  //   json.set("refiner_param", refiner_param->SerializeToJson());
  // }
  // if (default_query_param) {
  //   json.set("default_query_param",
  //   default_query_param->SerializeToJson());
  // }
  return json_obj;
}


ailego::JsonObject FlatIndexParam::SerializeToJsonObject(
    bool omit_empty_value) const {
  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);
  if (!omit_empty_value || major_order != IndexMeta::MajorOrder::MO_UNDEFINED) {
    json_obj.set("major_order",
                 ailego::JsonValue(magic_enum::enum_name(major_order).data()));
  }
  return json_obj;
}

ailego::JsonObject HNSWIndexParam::SerializeToJsonObject(
    bool omit_empty_value) const {
  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);
  json_obj.set("m", ailego::JsonValue(m));
  json_obj.set("ef_construction", ailego::JsonValue(ef_construction));
  return json_obj;
}

bool BaseIndexParam::DeserializeFromJsonObject(
    const ailego::JsonObject &json_obj) {
  DESERIALIZE_ENUM_FIELD(json_obj, index_type, IndexType);
  DESERIALIZE_ENUM_FIELD(json_obj, metric_type, MetricType);
  DESERIALIZE_ENUM_FIELD(json_obj, data_type, DataType);

  DESERIALIZE_VALUE_FIELD(json_obj, dimension);
  DESERIALIZE_VALUE_FIELD(json_obj, version);
  DESERIALIZE_VALUE_FIELD(json_obj, is_sparse);
  DESERIALIZE_VALUE_FIELD(json_obj, use_id_map);
  DESERIALIZE_VALUE_FIELD(json_obj, is_huge_page);

  ailego::JsonValue tmp_json_value;
  if (json_obj.has("quantizer_param")) {
    if (json_obj.get("quantizer_param", &tmp_json_value);
        tmp_json_value.is_object()) {
      quantizer_param.DeserializeFromJsonObject(tmp_json_value.as_object());
    }
  }

  return true;
}

bool FlatIndexParam::DeserializeFromJsonObject(
    const ailego::JsonObject &json_obj) {
  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {
    return false;
  }

  if (index_type != IndexType::kFlat) {
    LOG_ERROR("index_type is not kFlat");
    return false;
  }

  DESERIALIZE_ENUM_FIELD(json_obj, major_order, IndexMeta::MajorOrder);
  return true;
}

bool HNSWIndexParam::DeserializeFromJsonObject(
    const ailego::JsonObject &json_obj) {
  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {
    return false;
  }

  if (index_type != IndexType::kHNSW) {
    LOG_ERROR("index_type is not kHNSW");
    return false;
  }

  DESERIALIZE_VALUE_FIELD(json_obj, m);
  DESERIALIZE_VALUE_FIELD(json_obj, ef_construction);

  return true;
}

bool HNSWRabitqIndexParam::DeserializeFromJsonObject(
    const ailego::JsonObject &json_obj) {
  if (!BaseIndexParam::DeserializeFromJsonObject(json_obj)) {
    return false;
  }

  if (index_type != IndexType::kHNSWRabitq) {
    LOG_ERROR("index_type is not kHNSWRabitq");
    return false;
  }

  DESERIALIZE_VALUE_FIELD(json_obj, m);
  DESERIALIZE_VALUE_FIELD(json_obj, ef_construction);
  DESERIALIZE_VALUE_FIELD(json_obj, total_bits);
  DESERIALIZE_VALUE_FIELD(json_obj, num_clusters);
  DESERIALIZE_VALUE_FIELD(json_obj, sample_count);

  return true;
}

ailego::JsonObject HNSWRabitqIndexParam::SerializeToJsonObject(
    bool omit_empty_value) const {
  auto json_obj = BaseIndexParam::SerializeToJsonObject(omit_empty_value);
  json_obj.set("m", ailego::JsonValue(m));
  json_obj.set("ef_construction", ailego::JsonValue(ef_construction));
  json_obj.set("total_bits", ailego::JsonValue(total_bits));
  json_obj.set("num_clusters", ailego::JsonValue(num_clusters));
  if (!omit_empty_value || sample_count != 0) {
    json_obj.set("sample_count", ailego::JsonValue(sample_count));
  }
  return json_obj;
}

ailego::JsonObject QuantizerParam::SerializeToJsonObject(
    bool omit_empty_value) const {
  ailego::JsonObject json_obj;
  if (!omit_empty_value || type != QuantizerType::kNone) {
    json_obj.set("type",
                 zvec::ailego::JsonValue(magic_enum::enum_name(type).data()));
  }
  return json_obj;
}

bool QuantizerParam::DeserializeFromJsonObject(
    const ailego::JsonObject &json_obj) {
  DESERIALIZE_ENUM_FIELD(json_obj, type, QuantizerType);
  return true;
}

// bool BaseIndexQueryParam::DeserializeFromJsonObject(
//     const ailego::JsonObject &json_obj) {
//   DESERIALIZE_ENUM_FIELD(json_obj, index_type, IndexType);
//   DESERIALIZE_VALUE_FIELD(json_obj, topk);
//   DESERIALIZE_VALUE_FIELD(json_obj, fetch_vector);
//   DESERIALIZE_VALUE_FIELD(json_obj, radius);
//   DESERIALIZE_VALUE_FIELD(json_obj, is_linear);
//   return true;
// }

// ailego::JsonObject BaseIndexQueryParam::SerializeToJsonObject(
//     bool omit_empty_value) const {
//   ailego::JsonObject json_obj;
//   if (!omit_empty_value || index_type != IndexType::kNone) {
//     json_obj.set("index_type",
//                  ailego::JsonValue(magic_enum::enum_name(index_type).data()));
//   }
//   if (!omit_empty_value || topk != 0) {
//     json_obj.set("topk", ailego::JsonValue(topk));
//   }
//   if (!omit_empty_value || fetch_vector) {
//     json_obj.set("fetch_vector", ailego::JsonValue(fetch_vector));
//   }
//   if (!omit_empty_value || radius != 0.0f) {
//     json_obj.set("radius", ailego::JsonValue(radius));
//   }
//   if (!omit_empty_value || is_linear) {
//     json_obj.set("is_linear", ailego::JsonValue(is_linear));
//   }
//   return json_obj;
// }

// bool FlatQueryParam::DeserializeFromJsonObject(
//     const ailego::JsonObject &json_obj) {
//   if (!BaseIndexQueryParam::DeserializeFromJsonObject(json_obj)) {
//     return false;
//   }
//   if (index_type != IndexType::kFlat) {
//     LOG_ERROR("index_type is not kFlat");
//     return false;
//   }
//   return true;
// }

// ailego::JsonObject FlatQueryParam::SerializeToJsonObject(
//     bool omit_empty_value) const {
//   auto json_obj =
//       BaseIndexQueryParam::SerializeToJsonObject(omit_empty_value);
//   return json_obj;
// }

// bool HNSWQueryParam::DeserializeFromJsonObject(
//     const ailego::JsonObject &json_obj) {
//   if (!BaseIndexQueryParam::DeserializeFromJsonObject(json_obj)) {
//     return false;
//   }
//   if (index_type != IndexType::kHNSW) {
//     LOG_ERROR("index_type is not kHNSW");
//     return false;
//   }
//   DESERIALIZE_VALUE_FIELD(json_obj, ef_search);
//   return true;
// }

// ailego::JsonObject HNSWQueryParam::SerializeToJsonObject(
//     bool omit_empty_value) const {
//   auto json_obj =
//       BaseIndexQueryParam::SerializeToJsonObject(omit_empty_value);
//   if (!omit_empty_value || ef_search != 0) {
//     json_obj.set("ef_search", ailego::JsonValue(ef_search));
//   }
//   return json_obj;
// }


}  // namespace core_interface
}  // namespace zvec

================================================
FILE: src/core/interface/indexes/flat_index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <zvec/core/interface/index.h>
#include "algorithm/flat/flat_utility.h"

namespace zvec::core_interface {

int FlatIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
  param_ = dynamic_cast<const FlatIndexParam &>(param);

  proxima_index_params_.set(core::PARAM_FLAT_COLUMN_MAJOR_ORDER,
                            param_.major_order == IndexMeta::MO_COLUMN);
  proxima_index_params_.set(core::PARAM_FLAT_USE_ID_MAP, param_.use_id_map);
  if (is_sparse_) {
    streamer_ = core::IndexFactory::CreateStreamer("FlatSparseStreamer");
  } else {
    streamer_ = core::IndexFactory::CreateStreamer("FlatStreamer");
  }

  if (ailego_unlikely(!streamer_)) {
    LOG_ERROR("Failed to create streamer");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(
          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {
    LOG_ERROR("Failed to init streamer");
    return core::IndexError_Runtime;
  }
  return 0;
}

int FlatIndex::_prepare_for_search(
    const VectorData & /*vector_data*/,
    const BaseIndexQueryParam::Pointer &search_param,
    core::IndexContext::Pointer &context) {
  auto flat_search_param =
      std::dynamic_pointer_cast<FlatQueryParam>(search_param);

  if (ailego_unlikely(!flat_search_param)) {
    LOG_ERROR("Invalid search param type, expected FlatQueryParam");
    return core::IndexError_Runtime;
  }

  context->set_topk(flat_search_param->topk);
  context->set_fetch_vector(flat_search_param->fetch_vector);
  if (flat_search_param->filter) {
    context->set_filter(std::move(*flat_search_param->filter));
  }
  if (flat_search_param->radius > 0.0f) {
    context->set_threshold(flat_search_param->radius);
  }

  return 0;
}


}  // namespace zvec::core_interface

================================================
FILE: src/core/interface/indexes/hnsw_index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <zvec/core/interface/index.h>
#include "algorithm/hnsw/hnsw_params.h"
#include "algorithm/hnsw_sparse/hnsw_sparse_params.h"

namespace zvec::core_interface {

int HNSWIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
  param_ = dynamic_cast<const HNSWIndexParam &>(param);

  // valid
  param_.ef_construction = std::max(1, std::min(2048, param_.ef_construction));
  param_.m = std::max(5, std::min(1024, param_.m));

  if (is_sparse_) {
    proxima_index_params_.set(core::PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION,
                              param_.ef_construction);
    proxima_index_params_.set(
        core::PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, param_.m);

    // TODO: add_vector_with_id & fetch_by_id don't rely on this param
    proxima_index_params_.set(
        core::PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);

    // TODO: use index params'  default query param here
    proxima_index_params_.set(core::PARAM_HNSW_SPARSE_STREAMER_EF,
                              kDefaultHnswEfSearch);
    streamer_ = core::IndexFactory::CreateStreamer("HnswSparseStreamer");

  } else {
    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_EFCONSTRUCTION,
                              param_.ef_construction);
    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT,
                              param_.m);

    // TODO: add_vector_with_id & fetch_by_id don't rely on this param
    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE,
                              true);

    // TODO: use index params' default query param here
    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_EF,
                              kDefaultHnswEfSearch);
    proxima_index_params_.set(core::PARAM_HNSW_STREAMER_USE_ID_MAP,
                              param_.use_id_map);
    streamer_ = core::IndexFactory::CreateStreamer("HnswStreamer");
  }

  if (ailego_unlikely(!streamer_)) {
    LOG_ERROR("Failed to create streamer");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(
          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {
    LOG_ERROR("Failed to init streamer");
    return core::IndexError_Runtime;
  }
  return 0;
}


int HNSWIndex::_prepare_for_search(
    const VectorData & /*vector_data*/,
    const BaseIndexQueryParam::Pointer &search_param,
    core::IndexContext::Pointer &context) {
  const auto &hnsw_search_param =
      std::dynamic_pointer_cast<HNSWQueryParam>(search_param);

  if (ailego_unlikely(!hnsw_search_param)) {
    LOG_ERROR("Invalid search param type, expected HNSWQueryParam");
    return core::IndexError_Runtime;
  }

  if (0 >= hnsw_search_param->ef_search ||
      hnsw_search_param->ef_search > 2048) {
    LOG_ERROR(
        "ef_search must be greater than 0 and less than or equal to 2048.");
    return core::IndexError_Runtime;
  }

  context->set_topk(hnsw_search_param->topk);
  context->set_fetch_vector(hnsw_search_param->fetch_vector);
  if (hnsw_search_param->filter) {
    context->set_filter(std::move(*hnsw_search_param->filter));
  }
  if (hnsw_search_param->radius > 0.0f) {
    context->set_threshold(hnsw_search_param->radius);
  }
  ailego::Params params;
  const int real_search_ef =
      std::max(1u, std::min(2048u, hnsw_search_param->ef_search));
  params.set(core::PARAM_HNSW_STREAMER_EF, real_search_ef);
  context->update(params);
  return 0;
}

int HNSWIndex::_get_coarse_search_topk(
    const BaseIndexQueryParam::Pointer &search_param) {
  const auto &hnsw_search_param =
      std::dynamic_pointer_cast<HNSWQueryParam>(search_param);

  // scale_factor doesn't take effect for hnsw.
  auto ret = std::max(search_param->topk, hnsw_search_param->ef_search);
  return ret;
}

}  // namespace zvec::core_interface

================================================
FILE: src/core/interface/indexes/hnsw_rabitq_index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <zvec/core/interface/index.h>
#include "zvec/core/framework/index_error.h"

#if RABITQ_SUPPORTED
#include "algorithm/hnsw_rabitq/hnsw_rabitq_params.h"
#include "algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h"
#include "algorithm/hnsw_rabitq/rabitq_params.h"
#endif

namespace zvec::core_interface {

int HNSWRabitqIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
#if !RABITQ_SUPPORTED
  LOG_ERROR("RaBitQ is not supported on this platform (Linux x86_64 only)");
  return core::IndexError_Unsupported;
#else
  param_ = dynamic_cast<const HNSWRabitqIndexParam &>(param);

  if (is_sparse_) {
    LOG_ERROR("Sparse index is not supported");
    return core::IndexError_Runtime;
  }

  if (param.dimension < core::kMinRabitqDimSize ||
      param.dimension > core::kMaxRabitqDimSize) {
    LOG_ERROR("Unsupported dimension: %d", param.dimension);
    return core::IndexError_Unsupported;
  }

  // validate parameters
  param_.ef_construction = std::max(1, std::min(2048, param_.ef_construction));
  param_.m = std::max(5, std::min(1024, param_.m));

  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_EFCONSTRUCTION,
                            param_.ef_construction);
  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_MAX_NEIGHBOR_COUNT,
                            param_.m);
  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_GET_VECTOR_ENABLE,
                            true);
  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_EF,
                            kDefaultHnswEfSearch);
  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP,
                            param_.use_id_map);
  proxima_index_params_.set(core::PARAM_HNSW_RABITQ_GENERAL_DIMENSION,
                            input_vector_meta_.dimension());
  proxima_index_params_.set(core::PARAM_RABITQ_TOTAL_BITS, param_.total_bits);
  // num_clusters, sample_count are parameters for rabitq converter
  // proxima_index_params_.set(core::PARAM_RABITQ_NUM_CLUSTERS,
  //                           param_.num_clusters);

  auto streamer = std::make_shared<core::HnswRabitqStreamer>();
  streamer->set_provider(param_.provider);
  streamer->set_reformer(param_.reformer);
  streamer_ = streamer;

  if (ailego_unlikely(!streamer_)) {
    LOG_ERROR("Failed to create HnswRabitqStreamer");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(
          streamer_->init(proxima_index_meta_, proxima_index_params_) != 0)) {
    LOG_ERROR("Failed to init HnswRabitqStreamer");
    return core::IndexError_Runtime;
  }
  return 0;
#endif  // RABITQ_SUPPORTED
}

int HNSWRabitqIndex::_prepare_for_search(
    const VectorData & /*vector_data*/,
    const BaseIndexQueryParam::Pointer &search_param,
    core::IndexContext::Pointer &context) {
#if !RABITQ_SUPPORTED
  LOG_ERROR("RaBitQ is not supported on this platform (Linux x86_64 only)");
  return core::IndexError_Unsupported;
#else
  const auto &hnsw_search_param =
      std::dynamic_pointer_cast<HNSWRabitqQueryParam>(search_param);

  if (ailego_unlikely(!hnsw_search_param)) {
    LOG_ERROR("Invalid search param type, expected HNSWRabitqQueryParam");
    return core::IndexError_Runtime;
  }

  if (0 >= hnsw_search_param->ef_search ||
      hnsw_search_param->ef_search > 2048) {
    LOG_ERROR(
        "ef_search must be greater than 0 and less than or equal to 2048.");
    return core::IndexError_Runtime;
  }

  context->set_topk(hnsw_search_param->topk);
  context->set_fetch_vector(hnsw_search_param->fetch_vector);
  if (hnsw_search_param->filter) {
    context->set_filter(std::move(*hnsw_search_param->filter));
  }
  if (hnsw_search_param->radius > 0.0f) {
    context->set_threshold(hnsw_search_param->radius);
  }
  ailego::Params params;
  const int real_search_ef =
      std::max(1u, std::min(2048u, hnsw_search_param->ef_search));
  params.set(core::PARAM_HNSW_RABITQ_STREAMER_EF, real_search_ef);
  context->update(params);
  return 0;
#endif  // RABITQ_SUPPORTED
}

int HNSWRabitqIndex::_get_coarse_search_topk(
    const BaseIndexQueryParam::Pointer &search_param) {
#if !RABITQ_SUPPORTED
  LOG_ERROR("RaBitQ is not supported on this platform (Linux x86_64 only)");
  return -1;
#else
  const auto &hnsw_search_param =
      std::dynamic_pointer_cast<HNSWRabitqQueryParam>(search_param);

  auto ret = std::max(search_param->topk, hnsw_search_param->ef_search);
  return ret;
#endif  // RABITQ_SUPPORTED
}


}  // namespace zvec::core_interface


================================================
FILE: src/core/interface/indexes/ivf_index.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <zvec/core/interface/index.h>
#include "algorithm/ivf/ivf_params.h"

namespace zvec::core_interface {

static constexpr uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();

int IVFIndex::CreateAndInitStreamer(const BaseIndexParam &param) {
  if (is_sparse_) {
    LOG_ERROR("IVF Index not support sparse vector");
    return core::IndexError_InvalidArgument;
  }

  param_ = dynamic_cast<const IVFIndexParam &>(param);
  param_.nlist = std::max(1, std::min(1024, param_.nlist));
  param_.niters = std::max(1, std::min(1024, param_.niters));

  proxima_index_params_.set(core::PARAM_IVF_BUILDER_CENTROID_COUNT,
                            param_.nlist);

  // TODO: add_vector_with_id & fetch_by_id don't rely on this param
  builder_ = core::IndexFactory::CreateBuilder("IVFBuilder");
  streamer_ = core::IndexFactory::CreateStreamer("IVFStreamer");

  if (ailego_unlikely(!builder_)) {
    LOG_ERROR("Failed to create builder");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(!streamer_)) {
    LOG_ERROR("Failed to create streamer");
    return core::IndexError_Runtime;
  }
  IndexMeta real_meta;
  if (converter_) {
    real_meta = converter_->meta();
  } else {
    real_meta = proxima_index_meta_;
  }
  if (ailego_unlikely(builder_->init(real_meta, proxima_index_params_) != 0)) {
    LOG_ERROR("Failed to init builder");
    return core::IndexError_Runtime;
  }
  if (ailego_unlikely(streamer_->init(real_meta, proxima_index_params_) != 0)) {
    LOG_ERROR("Failed to init streamer");
    return core::IndexError_Runtime;
  }
  return 0;
}

int IVFIndex::Open(const std::string &file_path,
                   StorageOptions storage_options) {
  ailego::Params storage_params;
  file_path_ = file_path;
  is_read_only_ = storage_options.read_only;
  switch (storage_options.type) {
    case StorageOptions::StorageType::kMMAP: {
      storage_ = core::IndexFactory::CreateStorage("MMapFileReadStorage");
      if (storage_ == nullptr) {
        LOG_ERROR("Failed to create MMapFileStorage");
        return core::IndexError_Runtime;
      }
      int ret = storage_->init(storage_params);
      if (ret != 0) {
        LOG_ERROR("Failed to init MMapFileStorage, path: %s, err: %s",
                  file_path_.c_str(), core::IndexError::What(ret));
        return ret;
      }
      break;
    }
    case StorageOptions::StorageType::kBufferPool: {
      storage_ = core::IndexFactory::CreateStorage("BufferStorage");
      if (storage_ == nullptr) {
        LOG_ERROR("Failed to create BufferStorage");
        return core::IndexError_Runtime;
      }
      int ret = storage_->init(storage_params);
      if (ret != 0) {
        LOG_ERROR("Failed to init BufferStorage, path: %s, err: %s",
                  file_path_.c_str(), core::IndexError::What(ret));
        return ret;
      }
      break;
    }
    default: {
      LOG_ERROR("Unsupported storage type");
      return core::IndexError_Unsupported;
    }
  }

  if (is_read_only_ || !storage_options.create_new) {
    // read_options.create_new
    int ret = storage_->open(file_path_, false);
    if (ret != 0) {
      LOG_ERROR("Failed to open storage, path: %s, err: %s", file_path_.c_str(),
                core::IndexError::What(ret));
      return core::IndexError_Runtime;
    }
    if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
      LOG_ERROR("Failed to open streamer, path: %s", file_path_.c_str());
      return core::IndexError_Runtime;
    }
    is_trained_ = true;
  }
  is_open_ = true;
  return 0;
}

int IVFIndex::GenerateHolder() {
  if (param_.data_type == DataType::DT_FP16) {
    auto holder =
        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_FP16>>(
            param_.dimension);
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<uint16_t> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    holder_ = holder;
  } else if (param_.data_type == DataType::DT_FP32) {
    auto holder =
        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_FP32>>(
            param_.dimension);
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<float> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    holder_ = holder;
  } else if (param_.data_type == DataType::DT_INT8) {
    auto holder =
        std::make_shared<zvec::core::MultiPassIndexHolder<DataType::DT_INT8>>(
            param_.dimension);
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<uint8_t> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    holder_ = holder;
  } else {
    LOG_ERROR("data_type is not support");
    return core::IndexError_Runtime;
  }
  if (converter_) {
    core::IndexConverter::TrainAndTransform(converter_, holder_);
    holder_ = converter_->result();
  }
  return 0;
}

int IVFIndex::Add(const VectorData &vector, uint32_t doc_id) {
  if (is_trained_) {
    LOG_ERROR("this IVF index is trained");
    return core::IndexError_Runtime;
  }
  if (!std::holds_alternative<DenseVector>(vector.vector)) {
    LOG_ERROR("Invalid vector data");
    return core::IndexError_Runtime;
  }
  const DenseVector &dense_vector = std::get<DenseVector>(vector.vector);
  std::string out_vector_buffer = std::string(
      static_cast<const char *>(dense_vector.data),
      input_vector_meta_.dimension() * input_vector_meta_.unit_size());

  std::lock_guard<std::mutex> lock(mutex_);
  while (doc_cache_.size() <= doc_id) {
    std::string fake_data(
        input_vector_meta_.dimension() * input_vector_meta_.unit_size(), 0);
    doc_cache_.push_back(std::make_pair(kInvalidKey, fake_data));
  }
  doc_cache_[doc_id] = std::make_pair(doc_id, out_vector_buffer);
  return 0;
}

int IVFIndex::Train() {
  GenerateHolder();
  builder_->train(holder_);
  builder_->build(holder_);
  auto dumper = core::IndexFactory::CreateDumper("FileDumper");

  dumper->create(file_path_);
  builder_->dump(dumper);
  dumper->close();
  int ret = storage_->open(file_path_, false);
  if (ret != 0) {
    LOG_ERROR("Failed to open storage, path: %s, err: %s", file_path_.c_str(),
              core::IndexError::What(ret));
    return core::IndexError_Runtime;
  }
  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
    LOG_ERROR("Failed to open streamer, path: %s", file_path_.c_str());
    return core::IndexError_Runtime;
  }
  is_trained_ = true;
  return 0;
}

int IVFIndex::_dense_fetch(const uint32_t doc_id,
                           VectorDataBuffer *vector_data_buffer) {
  if (is_trained_) {
    return Index::_dense_fetch(doc_id, vector_data_buffer);
  } else {
    DenseVectorBuffer dense_vector_buffer;
    std::string &out_vector_buffer = dense_vector_buffer.data;
    out_vector_buffer = doc_cache_[doc_id].second;
    vector_data_buffer->vector_buffer = std::move(dense_vector_buffer);
    return 0;
  }
}

int IVFIndex::_prepare_for_search(
    const VectorData & /*query*/,
    const BaseIndexQueryParam::Pointer &search_param,
    core::IndexContext::Pointer &context) {
  const auto &ivf_search_param =
      std::dynamic_pointer_cast<IVFQueryParam>(search_param);

  context->set_topk(ivf_search_param->topk);
  context->set_fetch_vector(ivf_search_param->fetch_vector);
  if (ivf_search_param->filter) {
    context->set_filter(std::move(*ivf_search_param->filter));
  }
  if (ivf_search_param->radius > 0.0f) {
    context->set_threshold(ivf_search_param->radius);
  }

  if (ivf_search_param->nprobe > 0) {
    // TODO: 1. sparse; 2. default ef
    ailego::Params params;
    // need fix
    params.set(core::PARAM_IVF_BUILDER_CENTROID_COUNT,
               ivf_search_param->nprobe);
    context->update(params);
  }
  return 0;
}

int IVFIndex::Merge(const std::vector<Index::Pointer> &indexes,
                    const IndexFilter &filter, const MergeOptions &options) {
  int pre_ret = Index::Merge(indexes, filter, options);
  if (pre_ret != 0) {
    return pre_ret;
  }
  auto dumper = core::IndexFactory::CreateDumper("FileDumper");

  dumper->create(file_path_);
  builder_->dump(dumper);
  dumper->close();
  int ret = storage_->open(file_path_, false);
  if (ret != 0) {
    LOG_ERROR("Failed to open storage, path: %s, err: %s", file_path_.c_str(),
              core::IndexError::What(ret));
    return core::IndexError_Runtime;
  }
  if (streamer_ == nullptr || streamer_->open(storage_) != 0) {
    LOG_ERROR("Failed to open streamer, path: %s", file_path_.c_str());
    return core::IndexError_Runtime;
  }
  is_trained_ = true;
  return 0;
}
}  // namespace zvec::core_interface

================================================
FILE: src/core/interface/utils/utils.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <magic_enum/magic_enum.hpp>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/logger/logger.h>

namespace zvec {
namespace core_interface {

template <typename EnumType>
constexpr bool extract_enum_from_json(const ailego::JsonObject &json_obj,
                                      const char *key, EnumType &enum_value,
                                      ailego::JsonValue &tmp_json_value) {
  if (json_obj.has(key)) {
    if (json_obj.get(key, &tmp_json_value); tmp_json_value.is_string()) {
      auto optional_enum_value =
          magic_enum::enum_cast<EnumType>(tmp_json_value.as_stl_string());
      if (optional_enum_value.has_value()) {
        enum_value = optional_enum_value.value();
      } else {
        LOG_ERROR("Invalid enum value for key: %s, value: %s", key,
                  tmp_json_value.as_c_string());
        return false;
      }
    } else {
      LOG_ERROR("Invalid json field type for key: %s", key);
      return false;
    }
  }
  return true;
}

template <typename T>
constexpr bool extract_value_from_json(const ailego::JsonObject &json_obj,
                                       const char *key, T &value,
                                       ailego::JsonValue &tmp_json_value) {
  if (json_obj.has(key)) {
    json_obj.get(key, &tmp_json_value);
    if constexpr (std::is_same_v<T, bool>) {
      if (tmp_json_value.is_boolean()) {
        value = tmp_json_value.as_bool();
      } else {
        LOG_ERROR("Invalid json field type for key: %s; expected: boolean",
                  key);
        return false;
      }
    } else if constexpr (std::is_floating_point_v<T>) {
      if (tmp_json_value.is_float() || tmp_json_value.is_integer()) {
        value = static_cast<T>(tmp_json_value.as_float());
      } else {
        LOG_ERROR("Invalid json field type for key: %s; expected: float", key);
        return false;
      }
    } else if constexpr (std::is_integral_v<T>) {
      if (tmp_json_value.is_integer()) {
        value = static_cast<T>(tmp_json_value.as_integer());
      } else {
        LOG_ERROR("Invalid json field type for key: %s; expected: integer",
                  key);
        return false;
      }
    } else {
      abort();
    }
  }
  return true;
}

#define DESERIALIZE_ENUM_FIELD(json_obj, field_name, EnumType)               \
  {                                                                          \
    ailego::JsonValue tmp_json_value;                                        \
    if (!extract_enum_from_json<EnumType>(json_obj, #field_name, field_name, \
                                          tmp_json_value)) {                 \
      LOG_ERROR("Error when deserialize json - field:%s", #field_name);      \
      return false;                                                          \
    }                                                                        \
  }


#define DESERIALIZE_VALUE_FIELD(json_obj, field_name)                   \
  {                                                                     \
    ailego::JsonValue tmp_json_value;                                   \
    if (!extract_value_from_json(json_obj, #field_name, field_name,     \
                                 tmp_json_value)) {                     \
      LOG_ERROR("Error when deserialize json - field:%s", #field_name); \
      return false;                                                     \
    }                                                                   \
  }
}  // namespace core_interface
}  // namespace zvec

================================================
FILE: src/core/metric/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_metric 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS zvec_ailego zvec_turbo core_framework 
    INCS . ${PROJECT_ROOT_DIR}/src/core
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/metric/cosine_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/cosine_distance_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

//! Retrieve distance function for index features
inline IndexMetric::MatrixDistanceHandle CosineDistanceMatrixFp32(size_t m,
                                                                  size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<float, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
inline IndexMetric::MatrixDistanceHandle CosineDistanceMatrixFp16(size_t m,
                                                                  size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::CosineDistanceMatrix<ailego::Float16, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

/*! Cosine Metric
 */
class CosineMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType ft = meta.data_type();
    if (ft != IndexMeta::DataType::DT_FP16 &&
        ft != IndexMeta::DataType::DT_FP32) {
      return IndexError_Unsupported;
    }
    if (IndexMeta::UnitSizeof(ft) != meta.unit_size()) {
      return IndexError_Unsupported;
    }
    data_type_ = ft;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::CosineDistanceMatrix<ailego::Float16, 1, 1>::Compute);
      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::CosineDistanceMatrix<float, 1, 1>::Compute);

      default:
        return nullptr;
    }
  }

  //! Retrieve distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    if (m != 1 || n != 1) {
      return nullptr;
    }
    return distance();
  }

  //! Retrieve distance function for query
  MatrixBatchDistance batch_distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::CosineDistanceMatrix, float, 12,
                                 2>::ComputeBatch);
      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::CosineDistanceMatrix, ailego::Float16,
                                 12, 2>::ComputeBatch);
      default:
        return nullptr;
    }
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  ailego::Params params_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(Cosine, CosineMetric);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/euclidean_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/hamming_distance_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_metric.h>

namespace zvec {
namespace core {

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle
SquaredEuclideanDistanceMatrixFp32(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<float, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle
SquaredEuclideanDistanceMatrixFp16(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,
                                                  1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 2,
                                                  1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 2,
                                                  2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,
                                                  1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,
                                                  2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 4,
                                                  4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,
                                                  1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,
                                                  2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,
                                                  4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 8,
                                                  8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,
                                                  1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,
                                                  2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,
                                                  4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,
                                                  8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 16,
                                                  16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 32,
                                                  32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

static inline IndexMetric::MatrixDistanceHandle
SquaredEuclideanDistanceMatrixInt8(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<int8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features in Int4
static inline IndexMetric::MatrixDistanceHandle
SquaredEuclideanDistanceMatrixInt4(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::SquaredEuclideanDistanceMatrix<uint8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixFp32(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<float, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixFp16(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<ailego::Float16, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

static inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixInt8(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<int8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features in Int4
static inline IndexMetric::MatrixDistanceHandle EuclideanDistanceMatrixInt4(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::EuclideanDistanceMatrix<uint8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix32(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

#if defined(AILEGO_M64)
static inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix64(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}
#endif  // AILEGO_M64

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle
HammingSquareRootDistanceMatrix32(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint32_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

#if defined(AILEGO_M64)
static inline IndexMetric::MatrixDistanceHandle
HammingSquareRootDistanceMatrix64(size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingSquareRootDistanceMatrix<uint64_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}
#endif  // AILEGO_M64

/*! Squared Euclidean Distance Metric
 */
class SquaredEuclideanMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType dt = meta.data_type();
    if (dt != IndexMeta::DataType::DT_FP16 &&
        dt != IndexMeta::DataType::DT_FP32 &&
        dt != IndexMeta::DataType::DT_INT8 &&
        dt != IndexMeta::DataType::DT_INT4 &&
        dt != IndexMeta::DataType::DT_BINARY32 &&
        dt != IndexMeta::DataType::DT_BINARY64) {
      return IndexError_Unsupported;
    }
    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {
      return IndexError_Unsupported;
    }
    data_type_ = dt;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_BINARY32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute);

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute);
#endif  // AILEGO_M64

      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,
                                                   1>::Compute);

      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT8:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT4:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);

      default:
        return nullptr;
    }
  }

  //! Retrieve sparse distance function for query
  MatrixSparseDistance sparse_distance(void) const override {
    return reinterpret_cast<MatrixSparseDistanceHandle>(
        ailego::SquaredEuclideanSparseDistanceMatrix<float>::Compute);
  }

  //! Retrieve distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_BINARY32:
        return HammingDistanceMatrix32(m, n);

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64:
        return HammingDistanceMatrix64(m, n);
#endif  // AILEGO_M64

      case IndexMeta::DataType::DT_FP16:
        return SquaredEuclideanDistanceMatrixFp16(m, n);

      case IndexMeta::DataType::DT_FP32:
        return SquaredEuclideanDistanceMatrixFp32(m, n);

      case IndexMeta::DataType::DT_INT8:
        return SquaredEuclideanDistanceMatrixInt8(m, n);

      case IndexMeta::DataType::DT_INT4:
        return SquaredEuclideanDistanceMatrixInt4(m, n);

      default:
        return nullptr;
    }
  }

  //! Retrieve distance function for query
  MatrixBatchDistance batch_distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_BINARY32:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::HammingDistanceMatrix, uint32_t, 1,
                                 1>::ComputeBatch);

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::HammingDistanceMatrix, uint64_t, 1,
                                 1>::ComputeBatch);
#endif  // AILEGO_M64

      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix,
                                 ailego::Float16, 1, 1>::ComputeBatch);

      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix, float,
                                 1, 1>::ComputeBatch);

      case IndexMeta::DataType::DT_INT8:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix, int8_t,
                                 1, 1>::ComputeBatch);

      case IndexMeta::DataType::DT_INT4:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::SquaredEuclideanDistanceMatrix,
                                 uint8_t, 1, 1>::ComputeBatch);

      default:
        return nullptr;
    }
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  ailego::Params params_{};
};

/*! Euclidean Distance Metric
 */
class EuclideanMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType dt = meta.data_type();
    if (dt != IndexMeta::DataType::DT_FP16 &&
        dt != IndexMeta::DataType::DT_FP32 &&
        dt != IndexMeta::DataType::DT_INT8 &&
        dt != IndexMeta::DataType::DT_INT4 &&
        dt != IndexMeta::DataType::DT_BINARY32 &&
        dt != IndexMeta::DataType::DT_BINARY64) {
      return IndexError_Unsupported;
    }
    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {
      return IndexError_Unsupported;
    }
    data_type_ = dt;
    params_ = index_params;
    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_BINARY32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute);

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute);
#endif  // AILEGO_M64

      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::EuclideanDistanceMatrix<ailego::Float16, 1, 1>::Compute);

      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::EuclideanDistanceMatrix<float, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT8:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::EuclideanDistanceMatrix<int8_t, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT4:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);

      default:
        return nullptr;
    }
  }

  //! Retrieve distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_BINARY32:
        return HammingSquareRootDistanceMatrix32(m, n);

#if defined(AILEGO_M64)
      case IndexMeta::DataType::DT_BINARY64:
        return HammingSquareRootDistanceMatrix64(m, n);
#endif  // AILEGO_M64

      case IndexMeta::DataType::DT_FP16:
        return EuclideanDistanceMatrixFp16(m, n);

      case IndexMeta::DataType::DT_FP32:
        return EuclideanDistanceMatrixFp32(m, n);

      case IndexMeta::DataType::DT_INT8:
        return EuclideanDistanceMatrixInt8(m, n);

      case IndexMeta::DataType::DT_INT4:
        return EuclideanDistanceMatrixInt4(m, n);

      default:
        return nullptr;
    }
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  ailego::Params params_{};
};

/*! Squared Euclidean Sparse Metric
 */
class SquaredEuclideanSparseMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType data_type = meta.data_type();
    if (data_type != IndexMeta::DataType::DT_FP16 &&
        data_type != IndexMeta::DataType::DT_FP32) {
      return IndexError_Unsupported;
    }

    if (IndexMeta::UnitSizeof(data_type) != meta.unit_size()) {
      return IndexError_Unsupported;
    }

    data_type_ = data_type;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.data_type() == meta.data_type() &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.unit_size() == meta.unit_size());
  }

  //! Retrieve sparse distance function for query
  MatrixSparseDistance sparse_distance(void) const override {
    return reinterpret_cast<MatrixSparseDistanceHandle>(
        ailego::SquaredEuclideanSparseDistanceMatrix<float>::Compute);
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};

  ailego::Params params_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(SquaredEuclidean, SquaredEuclideanMetric);
INDEX_FACTORY_REGISTER_METRIC_ALIAS(Euclidean, EuclideanMetric);

INDEX_FACTORY_REGISTER_METRIC_ALIAS(SquaredEuclideanSparse,
                                    SquaredEuclideanSparseMetric);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/hamming_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/math/hamming_distance_matrix.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "ailego/math_batch/distance_batch.h"

namespace zvec {
namespace core {

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix32(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint32_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

#if defined(AILEGO_M64)
static inline IndexMetric::MatrixDistanceHandle HammingDistanceMatrix64(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::HammingDistanceMatrix<uint64_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}
#endif  // AILEOG_M64

/*! Hamming Metric
 */
class HammingMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    if (meta.data_type() != IndexMeta::DataType::DT_BINARY32 &&
        meta.data_type() != IndexMeta::DataType::DT_BINARY64) {
      return IndexError_Unsupported;
    }
    if (IndexMeta::UnitSizeof(meta.data_type()) != meta.unit_size()) {
      return IndexError_Unsupported;
    }
    feature_type_ = meta.data_type();
    params_ = index_params;
    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == feature_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(feature_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == feature_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(feature_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
#if defined(AILEGO_M64)
    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {
      return reinterpret_cast<MatrixDistanceHandle>(
          ailego::HammingDistanceMatrix<uint64_t, 1, 1>::Compute);
    }
#endif
    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {
      return reinterpret_cast<MatrixDistanceHandle>(
          ailego::HammingDistanceMatrix<uint32_t, 1, 1>::Compute);
    }
    return nullptr;
  }

  MatrixBatchDistance batch_distance(void) const override {
#if defined(AILEGO_M64)
    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {
      return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
          ailego::BaseDistance<ailego::HammingDistanceMatrix, uint64_t, 1,
                               1>::ComputeBatch);
    }
#endif
    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {
      return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
          ailego::BaseDistance<ailego::HammingDistanceMatrix, uint32_t, 1,
                               1>::ComputeBatch);
    }
    return nullptr;
  }

  //! Retrieve distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
#if defined(AILEGO_M64)
    if (feature_type_ == IndexMeta::DataType::DT_BINARY64) {
      return HammingDistanceMatrix64(m, n);
    }
#endif
    if (feature_type_ == IndexMeta::DataType::DT_BINARY32) {
      return HammingDistanceMatrix32(m, n);
    }
    return nullptr;
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType feature_type_{IndexMeta::DataType::DT_BINARY32};
  ailego::Params params_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(Hamming, HammingMetric);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/inner_product_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_metric.h>

namespace zvec {
namespace core {

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixFp32(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<float, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features
static inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixFp16(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<ailego::Float16, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

static inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixInt8(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<int8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

//! Retrieve distance function for index features in Int4
static inline IndexMetric::MatrixDistanceHandle MinusInnerProductMatrixInt4(
    size_t m, size_t n) {
  static const IndexMetric::MatrixDistanceHandle distance_table[6][6] = {
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 1, 1>::Compute),
       nullptr, nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 2, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 2, 2>::Compute),
       nullptr, nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 4, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 4, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 4, 4>::Compute),
       nullptr, nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 8, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 8, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 8, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 8, 8>::Compute),
       nullptr, nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 16, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 16, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 16, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 16, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 16, 16>::Compute),
       nullptr},
      {reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 1>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 2>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 4>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 8>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 16>::Compute),
       reinterpret_cast<IndexMetric::MatrixDistanceHandle>(
           ailego::MinusInnerProductMatrix<uint8_t, 32, 32>::Compute)},
  };
  if (m > 32 || n > 32 || ailego_popcount(m) != 1 || ailego_popcount(n) != 1) {
    return nullptr;
  }
  return distance_table[ailego_ctz(m)][ailego_ctz(n)];
}

/*! Inner Product Metric
 */
class InnerProductMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::MetaType mt = meta.meta_type();
    if (mt != IndexMeta::MetaType::MT_DENSE) {
      return IndexError_Unsupported;
    }

    IndexMeta::DataType dt = meta.data_type();
    if (dt != IndexMeta::DataType::DT_FP16 &&
        dt != IndexMeta::DataType::DT_FP32 &&
        dt != IndexMeta::DataType::DT_INT8 &&
        dt != IndexMeta::DataType::DT_INT4) {
      return IndexError_Unsupported;
    }
    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {
      return IndexError_Unsupported;
    }

    meta_type_ = mt;
    data_type_ = dt;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::MinusInnerProductMatrix<ailego::Float16, 1, 1>::Compute);

      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::MinusInnerProductMatrix<float, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT8:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::MinusInnerProductMatrix<int8_t, 1, 1>::Compute);

      case IndexMeta::DataType::DT_INT4:
        return reinterpret_cast<MatrixDistanceHandle>(
            ailego::MinusInnerProductMatrix<uint8_t, 1, 1>::Compute);

      default:
        return nullptr;
    }
  }

  //! Retrieve sparse distance function for query
  MatrixSparseDistance sparse_distance(void) const override {
    return reinterpret_cast<MatrixSparseDistanceHandle>(
        ailego::MinusInnerProductSparseMatrix<float>::Compute);
  }

  //! Retrieve distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP16:
        return MinusInnerProductMatrixFp16(m, n);

      case IndexMeta::DataType::DT_FP32:
        return MinusInnerProductMatrixFp32(m, n);

      case IndexMeta::DataType::DT_INT8:
        return MinusInnerProductMatrixInt8(m, n);

      case IndexMeta::DataType::DT_INT4:
        return MinusInnerProductMatrixInt4(m, n);

      default:
        return nullptr;
    }
  }

  //! Retrieve distance function for query
  MatrixBatchDistance batch_distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::MinusInnerProductMatrix, float, 1,
                                 1>::ComputeBatch);
      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::MinusInnerProductMatrix,
                                 ailego::Float16, 1, 1>::ComputeBatch);
      case IndexMeta::DataType::DT_INT8:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::MinusInnerProductMatrix, int8_t, 1,
                                 1>::ComputeBatch);
      case IndexMeta::DataType::DT_INT4:
        return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
            ailego::BaseDistance<ailego::MinusInnerProductMatrix, uint8_t, 1,
                                 1>::ComputeBatch);
      default:
        return nullptr;
    }
  }

  //! Normalize result
  void normalize(float *score) const override {
    *score = -(*score);
  }

  //! Denormalize threshold
  void denormalize(float *score) const override {
    *score = -(*score);
  }

  //! Retrieve if it supports normalization
  bool support_normalize(void) const override {
    return true;
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query measure object of this index measure
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::MetaType meta_type_{IndexMeta::MetaType::MT_DENSE};
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  ailego::Params params_{};
};

/*! Normalized Cosine Metric
 */
class NormalizedCosineMetric : public InnerProductMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType dt = meta.data_type();
    if (dt != IndexMeta::DataType::DT_FP16 &&
        dt != IndexMeta::DataType::DT_FP32) {
      return IndexError_Unsupported;
    }

    InnerProductMetric::init(meta, index_params);

    return 0;
  }

  //! Normalize result
  void normalize(float *score) const override {
    *score = 1 + (*score);
  }

  //! Denormalize threshold

  void denormalize(float *score) const override {
    *score -= 1;
  }
};

/*! Inner Product Sparse Metric
 */
class InnerProductSparseMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    IndexMeta::DataType dt = meta.data_type();
    if (dt != IndexMeta::DataType::DT_FP16 &&
        dt != IndexMeta::DataType::DT_FP32) {
      return IndexError_Unsupported;
    }

    if (IndexMeta::UnitSizeof(dt) != meta.unit_size()) {
      return IndexError_Unsupported;
    }

    data_type_ = dt;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.data_type() == meta.data_type() &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.unit_size() == meta.unit_size());
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    return nullptr;
  }

  //! Retrieve sparse distance function for query
  MatrixSparseDistance sparse_distance(void) const override {
    switch (data_type_) {
      case IndexMeta::DataType::DT_FP16:
        return reinterpret_cast<MatrixSparseDistanceHandle>(
            ailego::MinusInnerProductSparseMatrix<ailego::Float16>::Compute);
      case IndexMeta::DataType::DT_FP32:
        return reinterpret_cast<MatrixSparseDistanceHandle>(
            ailego::MinusInnerProductSparseMatrix<float>::Compute);
      default:
        return nullptr;
    }
  }

  //! Normalize result
  void normalize(float *score) const override {
    *score = -(*score);
  }

  //! Denormalize threshold
  void denormalize(float *score) const override {
    *score = -(*score);
  }

  //! Retrieve if it supports normalization
  bool support_normalize(void) const override {
    return true;
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Retrieve query measure object of this index measure
  Pointer query_metric(void) const override {
    return nullptr;
  }

 private:
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  ailego::Params params_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(InnerProduct, InnerProductMetric);
INDEX_FACTORY_REGISTER_METRIC_ALIAS(NormalizedCosine, NormalizedCosineMetric);

INDEX_FACTORY_REGISTER_METRIC_ALIAS(InnerProductSparse,
                                    InnerProductSparseMetric);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/metric_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

//! MipsEuclideanMetric
static const std::string MIPS_EUCLIDEAN_METRIC_M_VALUE =
    "mips_euclidean.metric.m_value";
static const std::string MIPS_EUCLIDEAN_METRIC_U_VALUE =
    "mips_euclidean.metric.u_value";
static const std::string MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM =
    "mips_euclidean.metric.max_l2_norm";
static const std::string MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE =
    "mips_euclidean.metric.injection_type";

//! QuantizedInteger Metric
static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME =
    "proxima.quantized_integer.metric.origin_metric_name";
static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS =
    "proxima.quantized_integer.metric.origin_metric_params";

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/metric/mips_euclidean_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math/mips_euclidean_distance_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "metric_params.h"

namespace zvec {
namespace core {

/*! Mips Squared Euclidean Metric
 */
template <bool is_spares = false>
class MipsSquaredEuclideanMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    data_type_ = meta.data_type();
    dimension_ = meta.dimension();

    int injection_type = static_cast<int>(kDefaultInjectionType);
    index_params.get(MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE, &injection_type);
    if (injection_type >= static_cast<int>(Injection::kNumInjections)) {
      LOG_WARN("Unsupported injection_type %u, using '%s' instead",
               injection_type, InjectionName(0));
      injection_type = static_cast<int>(Injection::kLocalizedSpherical);
    }
    injection_ = static_cast<Injection>(injection_type);
    LOG_DEBUG(
        "Initializing MipsSquaredEuclideanMetric with injection %s"
        " type %d dimension %d",
        InjectionName(injection_), data_type_, dimension_);

    float max_l2_norm = 0.0f;
    float u_value = 0.0f;
    index_params.get(MIPS_EUCLIDEAN_METRIC_M_VALUE, &m_value_);
    index_params.get(MIPS_EUCLIDEAN_METRIC_U_VALUE, &u_value);
    index_params.get(MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM, &max_l2_norm);
    CheckAndFixM(injection_, &m_value_);
    CheckAndFixU(injection_, m_value_, &u_value);

    squared_u_value_ = u_value * u_value;
    max_squared_l2_norm_ = max_l2_norm * max_l2_norm;
    if (injection_ == Injection::kIdentity ||
        injection_ == Injection::kLocalizedSpherical) {
      eta_ = 0.0f;
    } else if (max_squared_l2_norm_ < std::numeric_limits<float>::epsilon()) {
      eta_ = kDefaultEta;
    } else {
      eta_ = squared_u_value_ / max_squared_l2_norm_;
    }

    switch (data_type_) {
      case IndexMeta::DataType::DT_FP32:
        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(
            ailego::SquaredNorm2Matrix<float, 1>::Compute);
        break;

      case IndexMeta::DataType::DT_FP16:
        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(
            ailego::SquaredNorm2Matrix<ailego::Float16, 1>::Compute);
        break;

      case IndexMeta::DataType::DT_INT8:
        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(
            ailego::SquaredNorm2Matrix<int8_t, 1>::Compute);
        break;

      case IndexMeta::DataType::DT_INT4:
        squared_norm2_handle_ = reinterpret_cast<SquaredNorm2Handle>(
            ailego::SquaredNorm2Matrix<uint8_t, 1>::Compute);
        break;

      default:
        return IndexError_Unsupported;
    }

    query_metric_ = IndexFactory::CreateMetric(kQueryMetric);
    if (!query_metric_) {
      LOG_ERROR("Failed to create metric %s", kQueryMetric);
      return IndexError_NoExist;
    }
    int ret = query_metric_->init(meta, ailego::Params());
    if (ret != 0) {
      LOG_ERROR("Failed to initialize metric %s", kQueryMetric);
      return ret;
    }
    params_ = index_params;
    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    eta_ = 0.0f;
    m_value_ = 0;
    squared_u_value_ = 0.0f;
    max_squared_l2_norm_ = 0.0f;
    query_metric_.reset();
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return (meta.data_type() == data_type_ &&
            meta.unit_size() == IndexMeta::UnitSizeof(data_type_));
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return (qmeta.data_type() == data_type_ &&
            qmeta.unit_size() == IndexMeta::UnitSizeof(data_type_) &&
            qmeta.dimension() == meta.dimension());
  }

  //! Retrieve distance function for query
  MatrixBatchDistance batch_distance() const override {
    MatrixDistance dist_func = distance();

    return
        [=](const void **m, const void *q, size_t num, size_t dim, float *out) {
          for (size_t i = 0; i < num; ++i) {
            dist_func(m[i], q, dim, out + i);
          }
        };
  }


  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    if (injection_ == Injection::kLocalizedSpherical) {
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
                reinterpret_cast<const float *>(m),
                reinterpret_cast<const float *>(q), dim, 0.0f, out);
          };

        case IndexMeta::DataType::DT_FP16:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::
                Compute(reinterpret_cast<const ailego::Float16 *>(m),
                        reinterpret_cast<const ailego::Float16 *>(q), dim, 0.0f,
                        out);
          };

        case IndexMeta::DataType::DT_INT8:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
                reinterpret_cast<const int8_t *>(m),
                reinterpret_cast<const int8_t *>(q), dim, 0.0f, out);
          };

        case IndexMeta::DataType::DT_INT4:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
                reinterpret_cast<const uint8_t *>(m),
                reinterpret_cast<const uint8_t *>(q), dim, 0.0f, out);
          };

        default:
          return nullptr;
      }
    }

    if (injection_ == Injection::kRepeatedQuadratic) {
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
                reinterpret_cast<const float *>(m),
                reinterpret_cast<const float *>(q), dim, m_value_, eta_, out);
          };

        case IndexMeta::DataType::DT_FP16:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::
                Compute(reinterpret_cast<const ailego::Float16 *>(m),
                        reinterpret_cast<const ailego::Float16 *>(q), dim,
                        m_value_, eta_, out);
          };

        case IndexMeta::DataType::DT_INT8:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
                reinterpret_cast<const int8_t *>(m),
                reinterpret_cast<const int8_t *>(q), dim, m_value_, eta_, out);
          };

        case IndexMeta::DataType::DT_INT4:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
                reinterpret_cast<const uint8_t *>(m),
                reinterpret_cast<const uint8_t *>(q), dim, m_value_, eta_, out);
          };

        default:
          return nullptr;
      }
    }

    if (injection_ == Injection::kSpherical) {
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
                reinterpret_cast<const float *>(m),
                reinterpret_cast<const float *>(q), dim, eta_, out);
          };

        case IndexMeta::DataType::DT_FP16:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<ailego::Float16, 1, 1>::
                Compute(reinterpret_cast<const ailego::Float16 *>(m),
                        reinterpret_cast<const ailego::Float16 *>(q), dim, eta_,
                        out);
          };

        case IndexMeta::DataType::DT_INT8:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
                reinterpret_cast<const int8_t *>(m),
                reinterpret_cast<const int8_t *>(q), dim, eta_, out);
          };

        case IndexMeta::DataType::DT_INT4:
          return [&](const void *m, const void *q, size_t dim, float *out) {
            ailego::MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
                reinterpret_cast<const uint8_t *>(m),
                reinterpret_cast<const uint8_t *>(q), dim, eta_, out);
          };

        default:
          return nullptr;
      }
    }

    if (injection_ == Injection::kIdentity) {
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          return reinterpret_cast<MatrixDistanceHandle>(
              ailego::SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute);

        case IndexMeta::DataType::DT_FP16:
          return reinterpret_cast<MatrixDistanceHandle>(
              ailego::SquaredEuclideanDistanceMatrix<ailego::Float16, 1,
                                                     1>::Compute);

        case IndexMeta::DataType::DT_INT8:
          return reinterpret_cast<MatrixDistanceHandle>(
              ailego::SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute);

        case IndexMeta::DataType::DT_INT4:
          return reinterpret_cast<MatrixDistanceHandle>(
              ailego::SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute);

        default:
          return nullptr;
      }
    }
    return nullptr;
  }

  //! Retrieve distance function for query
  MatrixSparseDistance sparse_distance(void) const override {
    if (injection_ == Injection::kLocalizedSpherical) {
      return [&](const void *m_sparse, const void *q_sparse, float *out) {
        ailego::MipsSquaredEuclideanSparseDistanceMatrix<float>::Compute(
            m_sparse, q_sparse, out);
      };
    }

    if (injection_ == Injection::kRepeatedQuadratic) {
      LOG_ERROR(
          "Repeated Quadratic is not supported in MipsEuclideanMetric for "
          "Hybrid Vector!");

      return nullptr;
    }

    if (injection_ == Injection::kSpherical) {
      LOG_ERROR(
          "Spherical is not supported in MipsEuclideanMetric for Hybrid "
          "Vector!");

      return nullptr;
    }

    if (injection_ == Injection::kIdentity) {
      LOG_ERROR(
          "Identity is not supported in MipsEuclideanMetric for Hybrid "
          "Vector!");

      return nullptr;
    }

    return nullptr;
  }

  //! Retrieve matrix distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    if (injection_ == Injection::kLocalizedSpherical) {
      SphericalHandle<void> compute;
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    float, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_FP16:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    ailego::Float16, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_INT8:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    int8_t, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_INT4:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    uint8_t, SphericalHandle>(m, n);
          break;
        default:
          return nullptr;
      }
      return [=](const void *d, const void *q, size_t dim, float *out) {
        compute(d, q, dim, 0.0f, out);
      };
    }

    if (injection_ == Injection::kRepeatedQuadratic) {
      RepeatedQuadraticHandle<void> compute;
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    float, RepeatedQuadraticHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_FP16:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    ailego::Float16, RepeatedQuadraticHandle>(
                  m, n);
          break;
        case IndexMeta::DataType::DT_INT8:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    int8_t, RepeatedQuadraticHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_INT4:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    uint8_t, RepeatedQuadraticHandle>(m, n);
          break;
        default:
          return nullptr;
      }
      return [=](const void *d, const void *q, size_t dim, float *out) {
        compute(d, q, dim, m_value_, eta_, out);
      };
    }

    if (injection_ == Injection::kSpherical) {
      SphericalHandle<void> compute;
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    float, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_FP16:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    ailego::Float16, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_INT8:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    int8_t, SphericalHandle>(m, n);
          break;
        case IndexMeta::DataType::DT_INT4:
          compute =
              DistanceMatrixCompute<ailego::MipsSquaredEuclideanDistanceMatrix,
                                    uint8_t, SphericalHandle>(m, n);
          break;
        default:
          return nullptr;
      }
      return [=](const void *d, const void *q, size_t dim, float *out) {
        compute(d, q, dim, eta_, out);
      };
    }

    if (injection_ == Injection::kIdentity) {
      switch (data_type_) {
        case IndexMeta::DataType::DT_FP32:
          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,
                                       float, TypedDistanceHandle>(m, n);
        case IndexMeta::DataType::DT_FP16:
          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,
                                       ailego::Float16, TypedDistanceHandle>(m,
                                                                             n);
        case IndexMeta::DataType::DT_INT8:
          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,
                                       int8_t, TypedDistanceHandle>(m, n);
        case IndexMeta::DataType::DT_INT4:
          return DistanceMatrixCompute<ailego::SquaredEuclideanDistanceMatrix,
                                       uint8_t, TypedDistanceHandle>(m, n);
        default:
          return nullptr;
      }
    }
    return nullptr;
  }

  //! Normalize result
  void normalize(float *score) const override {
    query_metric_->normalize(score);
  }

  //! Denormalize threshold
  void denormalize(float *score) const override {
    query_metric_->denormalize(score);
  }

  //! Retrieve if it supports normalization
  bool support_normalize(void) const override {
    return query_metric_->support_normalize();
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Train the metric
  int train(const void *vec, size_t dim) override {
    if (eta_ == 0.0f) {  // No global norm scaling => no training.
      return 0;
    }
    if (!squared_norm2_handle_) {
      return IndexError_Unsupported;
    }

    float score;
    squared_norm2_handle_(vec, dim, &score);
    if (score > max_squared_l2_norm_) {
      max_squared_l2_norm_ = score;
      const float max_l2_norm = std::sqrt(score);
      params_.set(MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM, max_l2_norm);
      if (max_squared_l2_norm_ < 1.0 &&
          max_squared_l2_norm_ > squared_u_value_) {
        squared_u_value_ = max_squared_l2_norm_;
        params_.set(MIPS_EUCLIDEAN_METRIC_U_VALUE, max_l2_norm);
      }
      eta_ = squared_u_value_ / max_squared_l2_norm_;
    }
    return 0;
  }

  //! Retrieve if it supports training
  bool support_train(void) const override {
    // No global norm scaling => eta_ == 0 => no training.
    return eta_ != 0.0f;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    return query_metric_;
  }

 private:
  //! Type of MipsSquaredEuclideanDistanceMatrix::Compute overloaded for
  //  Spherical injection and LocalizedSpherical nonmetric.
  template <typename T>
  using SphericalHandle = void (*)(const T *m, const T *q, size_t dim,
                                   float eta, float *out);

  //! Type of MipsSquaredEuclideanDistanceMatrix::Compute overloaded for
  //  RepeatedQuadratic injection.
  template <typename T>
  using RepeatedQuadraticHandle = void (*)(const T *m, const T *q, size_t dim,
                                           size_t m_value, float eta,
                                           float *out);

  //! Type of squared L2 norm function.
  using SquaredNorm2Handle = void (*)(const void *m, size_t dim, float *out);

  enum struct Injection {     // Type of injective mapping into Euclidean space.
    kLocalizedSpherical = 0,  // spherical with pair-only max-norm
    kSpherical = 1,           // require global scaling/training
    kRepeatedQuadratic = 2,   // require global scaling/training
    kIdentity = 3,            // plain Euclidean distance
    kNumInjections = 4
  };

  static const char *InjectionName(int injection) {
    static const char *injection_names[] = {"LocalizedSpherical", "Spherical",
                                            "RepeatedQuadratic", "Identity"};
    if (injection >= 0 &&
        injection < static_cast<int>(Injection::kNumInjections)) {
      return injection_names[injection];
    }
    return "Invalid";
  }

  static const char *InjectionName(Injection injection) {
    return InjectionName(static_cast<int>(injection));
  }

  // Checks (and fixes) `*m_value`, no. additional dimensions for injection.
  // `dim` is the original dimension, used ONLY by RepeatedQuadratic
  // injection, where dim = 1 induces the default *m_value = 3. It's
  // positioned last to allow other injections to skip it.
  // Returns true if `*m_value` is modified.
  static bool CheckAndFixM(Injection injection, uint32_t *m_value) {
    if (injection == Injection::kRepeatedQuadratic) {
      if (*m_value == 0) {
        *m_value = 3u;  // Recommend value in paper (3.5 Practical
                        // Recommendation of Parameters)
        return true;
      }
    } else if (injection == Injection::kSpherical) {
      if (*m_value != 1) {
        if (*m_value != 0) {
          LOG_WARN("M value (%u) set to 1 for Spherical injection", *m_value);
        }
        *m_value = 1;
        return true;
      }
    } else {  // kLocalizedSpherical, kIdentity, or kInvalid
      if (*m_value != 0) {
        LOG_WARN("M value (%u) set to 0 for %s injections", *m_value,
                 InjectionName(injection));
        *m_value = 0;
        return true;
      }
    }
    return false;
  }

  // Checks and fixes `*u_value`, global L2 norm scalar.
  // `m_value` is no. additional dimensions, used ONLY by RepeatedQuadratic
  // injection. It's positioned last to allow other injections to skip it.
  // Returns true if `*u_value` is set to a new value.
  static bool CheckAndFixU(Injection injection, uint32_t m_value,
                           float *u_value) {
    if (injection == Injection::kRepeatedQuadratic) {
      if (*u_value <= std::numeric_limits<float>::epsilon() ||
          *u_value >= 1.0) {
        // Try computing a default U value
        constexpr float kLogError = -5.0;  // log_10(distance_error)
        float new_u_value = std::pow(10, kLogError / (1 << (m_value + 1)));
        if (*u_value != 0) {
          LOG_WARN("U value (%f) set to %f for RepeatedQuadratic injection",
                   *u_value, new_u_value);
        }
        *u_value = new_u_value;
        return true;
      } else if (std::pow(*u_value, (1 << m_value)) <
                 std::numeric_limits<float>::epsilon()) {
        LOG_WARN(
            "U value %f is too small, may cause loss of distance precision",
            *u_value);
      }
    } else if (injection == Injection::kSpherical) {
      // Spherical injection requires ||x'|| <= 1.0 for computing
      // std::sqrt(1 - ||x'||^2), x' = u_value * x / max_norm.  Set u_value
      // to slightly < 1.0 in case of precision loss in float computation.
      if (*u_value <= std::numeric_limits<float>::epsilon() ||
          *u_value >= 1.0) {
        static constexpr float kSphericalUValue = 1.0f - 1e-3;
        if (*u_value != 0.0f) {
          LOG_WARN("U value (%f) set to %f for Spherical injection", *u_value,
                   kSphericalUValue);
        }
        *u_value = kSphericalUValue;
        return true;
      }
    } else {  // kLocalizedSpherical, kIdentity, or kInvalid
      if (*u_value != 1.0) {
        if (*u_value != 0) {
          LOG_WARN("U value (%f) set to 1.0 for %s injection", *u_value,
                   InjectionName(injection));
        }
        *u_value = 1.0;
        return true;
      }
    }
    return false;
  }

 private:
  //! Type of basic DistanceMatrix::Compute function with typed parameter.
  template <typename T>
  using TypedDistanceHandle = void (*)(const T *m, const T *q, size_t dim,
                                       float *out);

  //! Returns m x n distance matrix compute function.
  //  Handle is used to resolve potential DistanceMatrix<T>::Compute overload.
  template <template <typename, size_t, size_t, typename = void>
            class DistanceMatrix,
            typename T, template <typename> class Handle = TypedDistanceHandle>
  static Handle<void> DistanceMatrixCompute(size_t m, size_t n) {
    static Handle<T> distance_table[6][6] = {
        {DistanceMatrix<T, 1, 1, void>::Compute, nullptr, nullptr, nullptr,
         nullptr, nullptr},
        {DistanceMatrix<T, 2, 1, void>::Compute,
         DistanceMatrix<T, 2, 2, void>::Compute, nullptr, nullptr, nullptr,
         nullptr},
        {DistanceMatrix<T, 4, 1, void>::Compute,
         DistanceMatrix<T, 4, 2, void>::Compute,
         DistanceMatrix<T, 4, 4, void>::Compute, nullptr, nullptr, nullptr},
        {DistanceMatrix<T, 8, 1, void>::Compute,
         DistanceMatrix<T, 8, 2, void>::Compute,
         DistanceMatrix<T, 8, 4, void>::Compute,
         DistanceMatrix<T, 8, 8, void>::Compute, nullptr, nullptr},
        {DistanceMatrix<T, 16, 1, void>::Compute,
         DistanceMatrix<T, 16, 2, void>::Compute,
         DistanceMatrix<T, 16, 4, void>::Compute,
         DistanceMatrix<T, 16, 8, void>::Compute,
         DistanceMatrix<T, 16, 16, void>::Compute, nullptr},
        {DistanceMatrix<T, 32, 1, void>::Compute,
         DistanceMatrix<T, 32, 2, void>::Compute,
         DistanceMatrix<T, 32, 4, void>::Compute,
         DistanceMatrix<T, 32, 8, void>::Compute,
         DistanceMatrix<T, 32, 16, void>::Compute,
         DistanceMatrix<T, 32, 32, void>::Compute}};
    if (m > 32 || n > 32 || ailego_popcount(m) != 1 ||
        ailego_popcount(n) != 1) {
      return nullptr;
    }
    return reinterpret_cast<Handle<void> >(
        distance_table[ailego_ctz(m)][ailego_ctz(n)]);
  }

  //! Constants
  // If the training data is not provided, we use a max squared l2 norm which
  // is as big as possible but also keep the precision, so estimate eta =  U /
  // max(l2 squared norm) = float epsilon
  static constexpr float kDefaultEta = std::numeric_limits<float>::epsilon();
  static constexpr char const *kQueryMetric =
      is_spares ? "InnerProductSparse" : "InnerProduct";
  static constexpr Injection kDefaultInjectionType =
      Injection::kLocalizedSpherical;

  //! Members
  SquaredNorm2Handle squared_norm2_handle_{nullptr};
  float eta_{0.0f};
  uint32_t m_value_{0};
  float squared_u_value_{0.0f};
  float max_squared_l2_norm_{0.0f};
  uint32_t dimension_{0};
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_FP32};
  Injection injection_{kDefaultInjectionType};
  IndexMetric::Pointer query_metric_{};
  ailego::Params params_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(MipsSquaredEuclidean,
                                    MipsSquaredEuclideanMetric<false>);
INDEX_FACTORY_REGISTER_METRIC_ALIAS(MipsSquaredEuclideanSparse,
                                    MipsSquaredEuclideanMetric<true>);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/quantized_integer_metric.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math/mips_euclidean_distance_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/turbo/turbo.h>
#include "metric_params.h"
#include "quantized_integer_metric_batch.h"
#include "quantized_integer_metric_matrix.h"

namespace zvec {
namespace core {

/*! Index Metric for quantized integer by IntegerStreamingConverter
 */
class QuantizedIntegerMetric : public IndexMetric {
 public:
  //! Initialize Metric
  int init(const IndexMeta &meta, const ailego::Params &index_params) override {
    if (meta.data_type() != IndexMeta::DataType::DT_INT8 &&
        meta.data_type() != IndexMeta::DataType::DT_INT4) {
      LOG_ERROR("Unsupported type %d", meta.data_type());
      return IndexError_Unsupported;
    }
    std::string metric_name;
    ailego::Params metric_params;
    index_params.get(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME, &metric_name);
    index_params.get(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,
                     &metric_params);
    if (metric_name.empty()) {
      LOG_ERROR("Param %s is required",
                QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME.c_str());
      return IndexError_InvalidArgument;
    }
    if (metric_name == "SquaredEuclidean") {
      origin_metric_type_ = MetricType::kSquaredEuclidean;
    } else if (metric_name == "InnerProduct") {
      origin_metric_type_ = MetricType::kInnerProduct;
    } else if (metric_name == "MipsSquaredEuclidean") {
      origin_metric_type_ = MetricType::kMipsSquaredEuclidean;
    } else if (metric_name == "NormalizedCosine") {
      origin_metric_type_ = MetricType::kNormalizedCosine;
    } else if (metric_name == "Cosine") {
      origin_metric_type_ = MetricType::kCosine;
    } else {
      LOG_ERROR("Unsupported metric %s", metric_name.c_str());
      return IndexError_Unsupported;
    }
    meta_ = meta;
    params_ = index_params;

    return 0;
  }

  //! Cleanup Metric
  int cleanup(void) override {
    return 0;
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta) const override {
    return meta.data_type() == meta_.data_type() &&
           meta.unit_size() == meta_.unit_size();
  }

  //! Retrieve if it matched
  bool is_matched(const IndexMeta &meta,
                  const IndexQueryMeta &qmeta) const override {
    return qmeta.data_type() == meta_.data_type() &&
           qmeta.unit_size() == meta_.unit_size() &&
           qmeta.dimension() == meta.dimension();
  }

  //! Retrieve distance function for query
  MatrixDistance distance(void) const override {
    return distance_matrix(1, 1);
  }

  //! Retrieve matrix distance function for index features
  MatrixDistance distance_matrix(size_t m, size_t n) const override {
    switch (origin_metric_type_) {
      case MetricType::kSquaredEuclidean:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          auto turbo_ret = turbo::get_distance_func(
              turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,
              turbo::QuantizeType::kDefault);
          if (turbo_ret && m == 1 && n == 1) {
            return turbo_ret;
          }
          return DistanceMatrixCompute<SquaredEuclidean, int8_t>(m, n);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return DistanceMatrixCompute<SquaredEuclidean, uint8_t>(m, n);
        }
        break;

      case MetricType::kInnerProduct:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return DistanceMatrixCompute<MinusInnerProduct, int8_t>(m, n);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return DistanceMatrixCompute<MinusInnerProduct, uint8_t>(m, n);
        }
        break;

      case MetricType::kMipsSquaredEuclidean:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return DistanceMatrixCompute<MipsSquaredEuclidean, int8_t>(m, n);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return DistanceMatrixCompute<MipsSquaredEuclidean, uint8_t>(m, n);
        }
        break;

      case MetricType::kNormalizedCosine:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return DistanceMatrixCompute<MinusInnerProduct, int8_t>(m, n);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return DistanceMatrixCompute<MinusInnerProduct, uint8_t>(m, n);
        }
        break;
      case MetricType::kCosine:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          auto turbo_ret = turbo::get_distance_func(
              turbo::MetricType::kCosine, turbo::DataType::kInt8,
              turbo::QuantizeType::kDefault);
          if (turbo_ret) {
            return turbo_ret;
          }
          return DistanceMatrixCompute<CosineMinusInnerProduct, int8_t>(m, n);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return DistanceMatrixCompute<CosineMinusInnerProduct, uint8_t>(m, n);
        }
        break;
    }
    return nullptr;
  }

  //! Retrieve distance function for query
  MatrixBatchDistance batch_distance(void) const override {
    switch (origin_metric_type_) {
      case MetricType::kSquaredEuclidean:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          auto turbo_ret = turbo::get_batch_distance_func(
              turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,
              turbo::QuantizeType::kDefault);
          if (turbo_ret) {
            return turbo_ret;
          }
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<SquaredEuclidean, int8_t,
                                                    12, 2>::ComputeBatch);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<SquaredEuclidean, uint8_t,
                                                    12, 2>::ComputeBatch);
        }
        break;

      case MetricType::kInnerProduct:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, int8_t,
                                                    12, 2>::ComputeBatch);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, uint8_t,
                                                    12, 2>::ComputeBatch);
        }
        break;
      case MetricType::kMipsSquaredEuclidean:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<
                  MipsSquaredEuclidean, int8_t, 12, 2>::ComputeBatch);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<
                  MipsSquaredEuclidean, uint8_t, 12, 2>::ComputeBatch);
        }
        break;
      case MetricType::kNormalizedCosine:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, int8_t,
                                                    12, 2>::ComputeBatch);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<MinusInnerProduct, uint8_t,
                                                    12, 2>::ComputeBatch);
        }
        break;
      case MetricType::kCosine:
        if (meta_.data_type() == IndexMeta::DataType::DT_INT8) {
          auto turbo_ret = turbo::get_batch_distance_func(
              turbo::MetricType::kCosine, turbo::DataType::kInt8,
              turbo::QuantizeType::kDefault);
          if (turbo_ret) {
            return turbo_ret;
          }
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<
                  CosineMinusInnerProduct, int8_t, 12, 2>::ComputeBatch);
        }
        if (meta_.data_type() == IndexMeta::DataType::DT_INT4) {
          return reinterpret_cast<IndexMetric::MatrixBatchDistanceHandle>(
              BaseDistanceBatchWithScoreUnquantized<
                  CosineMinusInnerProduct, uint8_t, 12, 2>::ComputeBatch);
        }
        break;
    }
    return nullptr;
  }

  //! Retrieve params of Metric
  const ailego::Params &params(void) const override {
    return params_;
  }

  //! Train the metric
  int train(const void * /*vec*/, size_t /*dim*/) override {
    return 0;
  }

  //! Retrieve if it supports training
  bool support_train(void) const override {
    // No global norm scaling => eta_ == 0 => no training.
    return false;
  }

  //! Normalize result
  void normalize(float *score) const override {
    if (origin_metric_type_ == MetricType::kInnerProduct) {
      *score = -(*score);
    } else if (origin_metric_type_ == MetricType::kNormalizedCosine) {
      *score = 1.0f + *score;
    } else if (origin_metric_type_ == MetricType::kCosine) {
      *score = 1.0f + *score;
    }
  }

  //! Retrieve if it supports normalization
  bool support_normalize(void) const override {
    return origin_metric_type_ == MetricType::kInnerProduct ||
           origin_metric_type_ == MetricType::kNormalizedCosine ||
           origin_metric_type_ == MetricType::kCosine;
  }

  //! Retrieve query metric object of this index metric
  Pointer query_metric(void) const override {
    if (origin_metric_type_ == MetricType::kMipsSquaredEuclidean) {
      auto metric = IndexFactory::CreateMetric("QuantizedInteger");
      if (metric) {
        ailego::Params metric_params;
        metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,
                          "InnerProduct");
        metric->init(meta_, metric_params);
      }
      return metric;
    }
    return nullptr;
  }

  virtual DistanceBatchQueryPreprocessFunc get_query_preprocess_func()
      const override {
    if (origin_metric_type_ == MetricType::kCosine &&
        meta_.data_type() == IndexMeta::DataType::DT_INT8) {
      auto turbo_ret = turbo::get_query_preprocess_func(
          turbo::MetricType::kCosine, turbo::DataType::kInt8,
          turbo::QuantizeType::kDefault);
      if (turbo_ret) {
        return turbo_ret;
      }
      return CosineMinusInnerProductDistanceBatchWithScoreUnquantized<
          int8_t, 1, 1>::GetQueryPreprocessFunc();
    } else if (origin_metric_type_ == MetricType::kSquaredEuclidean &&
               meta_.data_type() == IndexMeta::DataType::DT_INT8) {
      auto turbo_ret = turbo::get_query_preprocess_func(
          turbo::MetricType::kSquaredEuclidean, turbo::DataType::kInt8,
          turbo::QuantizeType::kDefault);
      if (turbo_ret) {
        return turbo_ret;
      }
      return SquaredEuclideanDistanceBatchWithScoreUnquantized<
          int8_t, 1, 1>::GetQueryPreprocessFunc();
    }
    return nullptr;
  }


 private:
  //! Returns m x n distance matrix compute function.
  template <template <typename, size_t, size_t> class DistanceMatrix,
            typename T>
  static MatrixDistanceHandle DistanceMatrixCompute(size_t m, size_t n) {
    static void (*distance_table[6][6])(const T *, const T *, size_t,
                                        float *) = {
        {DistanceMatrix<T, 1, 1>::Compute, nullptr, nullptr, nullptr, nullptr,
         nullptr},
        {DistanceMatrix<T, 2, 1>::Compute, DistanceMatrix<T, 2, 2>::Compute,
         nullptr, nullptr, nullptr, nullptr},
        {DistanceMatrix<T, 4, 1>::Compute, DistanceMatrix<T, 4, 2>::Compute,
         DistanceMatrix<T, 4, 4>::Compute, nullptr, nullptr, nullptr},
        {DistanceMatrix<T, 8, 1>::Compute, DistanceMatrix<T, 8, 2>::Compute,
         DistanceMatrix<T, 8, 4>::Compute, DistanceMatrix<T, 8, 8>::Compute,
         nullptr, nullptr},
        {DistanceMatrix<T, 16, 1>::Compute, DistanceMatrix<T, 16, 2>::Compute,
         DistanceMatrix<T, 16, 4>::Compute, DistanceMatrix<T, 16, 8>::Compute,
         DistanceMatrix<T, 16, 16>::Compute, nullptr},
        {DistanceMatrix<T, 32, 1>::Compute, DistanceMatrix<T, 32, 2>::Compute,
         DistanceMatrix<T, 32, 4>::Compute, DistanceMatrix<T, 32, 8>::Compute,
         DistanceMatrix<T, 32, 16>::Compute,
         DistanceMatrix<T, 32, 32>::Compute}};
    if (m > 32 || n > 32 || ailego_popcount(m) != 1 ||
        ailego_popcount(n) != 1) {
      return nullptr;
    }
    return reinterpret_cast<MatrixDistanceHandle>(
        distance_table[ailego_ctz(m)][ailego_ctz(n)]);
  }

  enum struct MetricType {
    kSquaredEuclidean = 0,
    kInnerProduct = 1,
    kMipsSquaredEuclidean = 2,
    kNormalizedCosine = 3,
    kCosine = 4
  };

  //! Members
  IndexMeta meta_{};
  ailego::Params params_{};
  MetricType origin_metric_type_{};
};

INDEX_FACTORY_REGISTER_METRIC_ALIAS(QuantizedInteger, QuantizedIntegerMetric);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/metric/quantized_integer_metric_batch.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math/mips_euclidean_distance_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include "quantized_integer_metric_matrix.h"

namespace zvec::core {

template <typename T, size_t BatchSize, size_t PrefetchStep>
struct MinusInnerProductDistanceBatchWithScoreUnquantized;

template <typename T, size_t BatchSize, size_t PrefetchStep>
struct CosineMinusInnerProductDistanceBatchWithScoreUnquantized;

template <typename T, size_t BatchSize, size_t PrefetchStep>
struct SquaredEuclideanDistanceBatchWithScoreUnquantized;

template <typename T, size_t BatchSize, size_t PrefetchStep>
struct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized;


template <template <typename, size_t, size_t> class DistanceType,
          typename ValueType, size_t BatchSize, size_t PrefetchStep,
          typename = void>
struct BaseDistanceBatchWithScoreUnquantized {
  static inline void _ComputeBatch(const ValueType **m, const ValueType *q,
                                   size_t num, size_t dim, float *out) {
    for (size_t i = 0; i < num; ++i) {
      DistanceType<ValueType, 1, 1>::Compute(m[i], q, dim, out + i);
    }
  }

  // If Distance has ComputeBatch, use it; otherwise fall back to _ComputeBatch.
  static inline void ComputeBatch(const ValueType **m, const ValueType *q,
                                  size_t num, size_t dim, float *out) {
    // if constexpr (detail::HasComputeBatch<Distance, ValueType>::value) {
    //   return Distance::ComputeBatch(m, q, num, dim, out);
    // }
    if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,
                                 CosineMinusInnerProduct<ValueType, 1, 1>>) {
      return CosineMinusInnerProductDistanceBatchWithScoreUnquantized<
          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,
                                                            out);
    } else if constexpr (std::is_same_v<DistanceType<ValueType, 1, 1>,
                                        SquaredEuclidean<ValueType, 1, 1>>) {
      return SquaredEuclideanDistanceBatchWithScoreUnquantized<
          ValueType, BatchSize, PrefetchStep>::ComputeBatch(m, q, num, dim,
                                                            out);
    }

    _ComputeBatch(m, q, num, dim, out);
  }
};

//===========================================================
// CosineMinusInnerProductDistanceBatchWithScoreUnquantized
//===========================================================

// Compute CosineMinusInnerProduct for quantized INT8
template <size_t BatchSize, size_t PrefetchStep>
struct CosineMinusInnerProductDistanceBatchWithScoreUnquantized<
    int8_t, BatchSize, PrefetchStep> {
  using ImplType =
      MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t, BatchSize,
                                                         PrefetchStep>;

  static inline void ComputeBatch(const int8_t **vecs, const int8_t *query,
                                  size_t num_vecs, size_t dim, float *results) {
    size_t original_dim = dim - 24;

    ImplType::ComputeBatch(vecs, query, num_vecs, original_dim, results);
  }

  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc
  GetQueryPreprocessFunc() {
    return QueryPreprocess;
  }

  static void QueryPreprocess(void *query, size_t dim) {
    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {
      return func(query, dim - 24);
    }
  }
};

// Compute CosineMinusInnerProduct for quantized INT4
template <size_t BatchSize, size_t PrefetchStep>
struct CosineMinusInnerProductDistanceBatchWithScoreUnquantized<
    uint8_t, BatchSize, PrefetchStep> {
  static inline void ComputeBatch(const uint8_t **vecs, const uint8_t *query,
                                  size_t num_vecs, size_t dim, float *results) {
    size_t original_dim = dim - 40;
    MinusInnerProductDistanceBatchWithScoreUnquantized<
        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                        original_dim, results);
  }
};

//===========================================================
// MinusInnerProductDistanceBatchWithScoreUnquantized
//===========================================================

// Compute MinusInnerProduct for quantized INT8
template <size_t BatchSize, size_t PrefetchStep>
struct MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t, BatchSize,
                                                          PrefetchStep> {
  using ImplType =
      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,
                                                       PrefetchStep>;
  static inline void ComputeBatch(const int8_t **vecs, const int8_t *query,
                                  size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim;
    ImplType::ComputeBatch(vecs, query, num_vecs, original_dim, results);

    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const uint8_t *>(query) + original_dim);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const uint8_t *>(vecs[i]) + original_dim);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float &result = results[i];
      if (ImplType::GetQueryPreprocessFunc() != nullptr) {
        int int_sum = reinterpret_cast<const int *>(m_tail)[4];
        result -= 128 * int_sum;
      }
      result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +
                 original_dim * qb * mb);
    }
  }

  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc
  GetQueryPreprocessFunc() {
    return ImplType::GetQueryPreprocessFunc();
  }
};

// Compute MinusInnerProduct for quantized INT4
template <size_t BatchSize, size_t PrefetchStep>
struct MinusInnerProductDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,
                                                          PrefetchStep> {
  static inline void ComputeBatch(const uint8_t **vecs, const uint8_t *query,
                                  size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim;
    const size_t original_dim_in_uint8_array = original_dim >> 1;

    ailego::DistanceBatch::InnerProductDistanceBatch<
        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                        original_dim, results);
    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const uint8_t *>(vecs[i]) +
          original_dim_in_uint8_array);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float &result = results[i];
      result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +
                 original_dim * qb * mb);
    }
  }
};

//===========================================================
// SquaredEuclideanDistanceBatchWithScoreUnquantized
//===========================================================

// Compute SquaredEuclidean for quantized INT8
template <size_t BatchSize, size_t PrefetchStep>
struct SquaredEuclideanDistanceBatchWithScoreUnquantized<int8_t, BatchSize,
                                                         PrefetchStep> {
  using ImplType =
      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,
                                                       PrefetchStep>;
  static void ComputeBatch(const int8_t **vecs, const int8_t *query,
                           size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim - 20;
    ailego::DistanceBatch::InnerProductDistanceBatch<
        int8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                       original_dim, results);

    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const uint8_t *>(query) + original_dim);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    float qs2 = q_tail[3];

    const float sum = qa * qs;
    const float sum2 = qa * qa * qs2;
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const uint8_t *>(vecs[i]) + original_dim);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float ms2 = m_tail[3];
      float &result = results[i];
      if (ImplType::GetQueryPreprocessFunc() != nullptr) {
        int int8_sum = reinterpret_cast<const int *>(m_tail)[4];
        result -= 128 * int8_sum;
      }
      result = ma * ma * ms2 + sum2 - 2 * ma * qa * result +
               (mb - qb) * (mb - qb) * original_dim +
               2 * (mb - qb) * (ms * ma - sum);
    }
  }

  static ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc
  GetQueryPreprocessFunc() {
    return QueryPreprocess;
  }

  static void QueryPreprocess(void *query, size_t dim) {
    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {
      return func(query, dim - 20);
    }
  }
};

// Compute SquaredEuclidean for quantized INT4
template <size_t BatchSize, size_t PrefetchStep>
struct SquaredEuclideanDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,
                                                         PrefetchStep> {
  static void ComputeBatch(const uint8_t **vecs, const uint8_t *query,
                           size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim - 32;
    const size_t original_dim_in_uint8_array = original_dim >> 1;
    ailego::DistanceBatch::InnerProductDistanceBatch<
        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                        original_dim, results);

    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    float qs2 = q_tail[3];

    const float sum = qa * qs;
    const float sum2 = qa * qa * qs2;
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const uint8_t *>(vecs[i]) +
          original_dim_in_uint8_array);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float ms2 = m_tail[3];
      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +
                 (mb - qb) * (mb - qb) * original_dim +
                 2 * (mb - qb) * (ms * ma - sum);
      ++results;
    }
  }
};


//===========================================================
// MipsSquaredEuclideanDistanceBatchWithScoreUnquantized
//===========================================================

// Compute MipsSquaredEuclidean for quantized INT8
template <size_t BatchSize, size_t PrefetchStep>
struct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized<int8_t, BatchSize,
                                                             PrefetchStep> {
  using ImplType =
      ailego::DistanceBatch::InnerProductDistanceBatch<int8_t, BatchSize,
                                                       PrefetchStep>;
  static void ComputeBatch(const int8_t **vecs, const int8_t *query,
                           size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim - 20;
    ailego::DistanceBatch::InnerProductDistanceBatch<
        int8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                       original_dim, results);

    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const int8_t *>(query) + original_dim);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    float qs2 = q_tail[3];

    const float sum = qa * qs;
    const float sum2 = qa * qa * qs2;
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const int8_t *>(vecs[i]) + original_dim);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float ms2 = m_tail[3];
      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +
                 (mb - qb) * (mb - qb) * original_dim +
                 2 * (mb - qb) * (ms * ma - sum);
      ++results;
    }
  }

  static void QueryPreprocess(void *query, size_t dim) {
    if (auto func = ImplType::GetQueryPreprocessFunc(); func != nullptr) {
      return func(query, dim - 20);
    }
  }
};

// Compute SquaredEuclidean for quantized INT4
template <size_t BatchSize, size_t PrefetchStep>
struct MipsSquaredEuclideanDistanceBatchWithScoreUnquantized<uint8_t, BatchSize,
                                                             PrefetchStep> {
  static void ComputeBatch(const uint8_t **vecs, const uint8_t *query,
                           size_t num_vecs, size_t dim, float *results) {
    const size_t original_dim = dim - 32;
    const size_t original_dim_in_uint8_array = original_dim >> 1;
    ailego::DistanceBatch::InnerProductDistanceBatch<
        uint8_t, BatchSize, PrefetchStep>::ComputeBatch(vecs, query, num_vecs,
                                                        original_dim, results);

    const float *q_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const uint8_t *>(query) + original_dim_in_uint8_array);
    float qa = q_tail[0];
    float qb = q_tail[1];
    float qs = q_tail[2];
    float qs2 = q_tail[3];

    const float sum = qa * qs;
    const float sum2 = qa * qa * qs2;
    for (size_t i = 0; i < num_vecs; ++i) {
      const float *m_tail = reinterpret_cast<const float *>(
          reinterpret_cast<const uint8_t *>(vecs[i]) +
          original_dim_in_uint8_array);
      float ma = m_tail[0];
      float mb = m_tail[1];
      float ms = m_tail[2];
      float ms2 = m_tail[3];
      *results = ma * ma * ms2 + sum2 - 2 * ma * qa * *results +
                 (mb - qb) * (mb - qb) * original_dim +
                 2 * (mb - qb) * (ms * ma - sum);
      ++results;
    }
  }
};

}  // namespace zvec::core


================================================
FILE: src/core/metric/quantized_integer_metric_matrix.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <ailego/math/euclidean_distance_matrix.h>
#include <ailego/math/inner_product_matrix.h>
#include <ailego/math/mips_euclidean_distance_matrix.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math_batch/distance_batch.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include "metric_params.h"


namespace zvec::core {
//===========================================================
// SquaredEuclidean
//===========================================================

template <typename T, size_t M, size_t N>
struct SquaredEuclidean;

// Compute SquaredEuclidean for quantized INT8
template <size_t M, size_t N>
struct SquaredEuclidean<int8_t, M, N> {
  static void Compute(const int8_t *m, const int8_t *q, size_t dim,
                      float *out) {
    const size_t d = dim - 20;
    ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);

    for (size_t i = 0; i < N; ++i) {
      float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);
      float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);
      float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);
      float qs2 = *reinterpret_cast<const float *>(&q[(d + 12) * N + i * 4]);
      const float sum = qa * qs;
      const float sum2 = qa * qa * qs2;
      for (size_t j = 0; j < M; ++j) {
        float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);
        float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);
        float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);
        float ms2 = *reinterpret_cast<const float *>(&m[(d + 12) * M + j * 4]);
        *out = ma * ma * ms2 + sum2 - 2 * ma * qa * *out +
               (mb - qb) * (mb - qb) * d + 2 * (mb - qb) * (ms * ma - sum);
        out++;
      }
    }
  }
};

// Compute SquaredEuclidean for quantized INT4
template <size_t M, size_t N>
struct SquaredEuclidean<uint8_t, M, N> {
  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,
                      float *out) {
    const size_t d = dim - 32;
    const size_t p = d >> 1;  // params
    ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);

    for (size_t i = 0; i < N; ++i) {
      float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);
      float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);
      float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);
      float qs2 = *reinterpret_cast<const float *>(&q[(p + 12) * N + i * 4]);
      const float sum = qa * qs;
      const float sum2 = qa * qa * qs2;
      for (size_t j = 0; j < M; ++j) {
        float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);
        float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);
        float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);
        float ms2 = *reinterpret_cast<const float *>(&m[(p + 12) * M + j * 4]);
        *out = ma * ma * ms2 + sum2 - 2 * ma * qa * *out +
               (mb - qb) * (mb - qb) * d + 2 * (mb - qb) * (ms * ma - sum);
        out++;
      }
    }
  }
};
//===========================================================
// MinusInnerProduct
//===========================================================

template <size_t M, size_t N>
static void MinusInnerProductImplInt8(const int8_t *m, const int8_t *q,
                                      size_t origin_dim, float *out) {
  const size_t d = origin_dim;
  ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);

  for (size_t i = 0; i < N; ++i) {
    float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);
    float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);
    float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);
    for (size_t j = 0; j < M; ++j) {
      float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);
      float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);
      float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);
      *out = -(ma * qa * *out + mb * qa * qs + qb * ma * ms + d * qb * mb);
      out++;
    }
  }
}

template <size_t M, size_t N>
static void MinusInnerProductImplUint8(const uint8_t *m, const uint8_t *q,
                                       size_t origin_dim, float *out) {
  const size_t d = origin_dim;
  const size_t p = d >> 1;  // params
  ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);

  for (size_t i = 0; i < N; ++i) {
    float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);
    float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);
    float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);
    for (size_t j = 0; j < M; ++j) {
      float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);
      float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);
      float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);
      *out = -(ma * qa * *out + mb * qa * qs + qb * ma * ms + d * qb * mb);
      out++;
    }
  }
}


template <typename T, size_t M, size_t N>
struct MinusInnerProduct;

// Compute MinusInnerProduct for quantized INT8
template <size_t M, size_t N>
struct MinusInnerProduct<int8_t, M, N> {
  static void Compute(const int8_t *m, const int8_t *q, size_t dim,
                      float *out) {
    const size_t origin_dim = dim - 20;
    MinusInnerProductImplInt8<M, N>(m, q, origin_dim, out);
  }
};

// Compute MinusInnerProduct for quantized INT4
template <size_t M, size_t N>
struct MinusInnerProduct<uint8_t, M, N> {
  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,
                      float *out) {
    const size_t origin_dim = dim - 32;
    MinusInnerProductImplUint8<M, N>(m, q, origin_dim, out);
  }
};


//===========================================================
// CosineMinusInnerProduct
//===========================================================
template <typename T, size_t M, size_t N>
struct CosineMinusInnerProduct;

// Compute CosineMinusInnerProduct for quantized INT8
template <size_t M, size_t N>
struct CosineMinusInnerProduct<int8_t, M, N> {
  static void Compute(const int8_t *m, const int8_t *q, size_t dim,
                      float *out) {
    const size_t origin_dim = dim - 24;
    MinusInnerProductImplInt8<M, N>(m, q, origin_dim, out);
  }
};

// Compute CosineMinusInnerProduct for quantized INT4
template <size_t M, size_t N>
struct CosineMinusInnerProduct<uint8_t, M, N> {
  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,
                      float *out) {
    const size_t origin_dim = dim - 40;
    MinusInnerProductImplUint8<M, N>(m, q, origin_dim, out);
  }
};

//===========================================================
// MipsSquaredEuclidean
//===========================================================

template <typename T, size_t M, size_t N>
struct MipsSquaredEuclidean;

// Compute MipsSquaredEuclidean for quantized INT8
template <size_t M, size_t N>
struct MipsSquaredEuclidean<int8_t, M, N> {
  static void Compute(const int8_t *m, const int8_t *q, size_t dim,
                      float *out) {
    const size_t d = dim - 20;
    ailego::InnerProductMatrix<int8_t, M, N>::Compute(m, q, d, out);

    for (size_t i = 0; i < N; ++i) {
      float qa = *reinterpret_cast<const float *>(&q[d * N + i * 4]);
      float qb = *reinterpret_cast<const float *>(&q[(d + 4) * N + i * 4]);
      float qs = *reinterpret_cast<const float *>(&q[(d + 8) * N + i * 4]);
      float qs2 = *reinterpret_cast<const float *>(&q[(d + 12) * N + i * 4]);
      float q2 = qa * qa * qs2 + 2 * qa * qb * qs + d * qb * qb;
      for (size_t j = 0; j < M; ++j) {
        float ma = *reinterpret_cast<const float *>(&m[d * M + j * 4]);
        float mb = *reinterpret_cast<const float *>(&m[(d + 4) * M + j * 4]);
        float ms = *reinterpret_cast<const float *>(&m[(d + 8) * M + j * 4]);
        float ms2 = *reinterpret_cast<const float *>(&m[(d + 12) * M + j * 4]);
        float m2 = ma * ma * ms2 + 2 * ma * mb * ms + d * mb * mb;
        *out = 2.0f - 2.0f *
                          (ma * qa * *out + mb * qa * qs + qb * ma * ms +
                           d * qb * mb) /
                          std::max(q2, m2);
        out++;
      }
    }
  }
};

// Compute MipsSquaredEuclidean for quantized INT4
template <size_t M, size_t N>
struct MipsSquaredEuclidean<uint8_t, M, N> {
  static void Compute(const uint8_t *m, const uint8_t *q, size_t dim,
                      float *out) {
    const size_t d = dim - 32;
    const size_t p = d >> 1;  // params
    ailego::InnerProductMatrix<uint8_t, M, N>::Compute(m, q, d, out);

    for (size_t i = 0; i < N; ++i) {
      float qa = *reinterpret_cast<const float *>(&q[p * N + i * 4]);
      float qb = *reinterpret_cast<const float *>(&q[(p + 4) * N + i * 4]);
      float qs = *reinterpret_cast<const float *>(&q[(p + 8) * N + i * 4]);
      float qs2 = *reinterpret_cast<const float *>(&q[(p + 12) * N + i * 4]);
      float q2 = qa * qa * qs2 + 2 * qa * qb * qs + d * qb * qb;
      for (size_t j = 0; j < M; ++j) {
        float ma = *reinterpret_cast<const float *>(&m[p * M + j * 4]);
        float mb = *reinterpret_cast<const float *>(&m[(p + 4) * M + j * 4]);
        float ms = *reinterpret_cast<const float *>(&m[(p + 8) * M + j * 4]);
        float ms2 = *reinterpret_cast<const float *>(&m[(p + 12) * M + j * 4]);
        float m2 = ma * ma * ms2 + 2 * ma * mb * ms + d * mb * mb;
        *out = 2.0f - 2.0f *
                          (ma * qa * *out + mb * qa * qs + qb * ma * ms +
                           d * qb * mb) /
                          std::max(q2, m2);
        out++;
      }
    }
  }
};

}  // namespace zvec::core


================================================
FILE: src/core/mixed_reducer/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
  NAME core_mix_reducer STATIC SHARED STRICT ALWAYS_LINK
  SRCS *.cc
  LIBS zvec_ailego core_framework
  INCS . ${PROJECT_ROOT_DIR}/src/core
  VERSION "${PROXIMA_ZVEC_VERSION}"
)


================================================
FILE: src/core/mixed_reducer/mixed_reducer_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {


static const std::string PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE(
    "proxima.mixed.reducer.enable_pk_rewrite");
static const std::string PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS(
    "proxima.mixed.reducer.num_of_add_threads");

static const std::string PARAM_MIXED_REDUCER_WORKING_PATH(
    "proxima.mixed.reducer.working_path");
static const std::string PARAM_MIXED_REDUCER_NUM_OF_ADD_THREADS(
    "proxima.mixed.reducer.num_of_add_threads");
static const std::string PARAM_MIXED_REDUCER_STREAMER_CLASS(
    "proxima.mixed.reducer.streamer_class");
static const std::string PARAM_MIXED_REDUCER_HYBRID_VECTOR_ENABLE(
    "proxima.mixed.reducer.hybrid_vector_enable");
static const std::string PARAM_MIXED_REDUCER_INDEX_NAME(
    "proxima.mixed.reducer.index_name");
static const std::string PARAM_MIXED_REDUCER_QUANTIZER_CLASS(
    "proxima.mixed.reducer.quantizer_class");


}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/mixed_reducer/mixed_streamer_reducer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mixed_streamer_reducer.h"
#include <ailego/pattern/defer.h>
#include <utility/sparse_utility.h>
#include <zvec/ailego/utility/file_helper.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_logger.h>
#include "mixed_reducer/mixed_reducer_params.h"

namespace zvec {
namespace core {

int MixedStreamerReducer::init(const ailego::Params &params) {
  enable_pk_rewrite_ =
      params.get_as_bool(PARAM_MIXED_STREAMER_REDUCER_ENABLE_PK_REWRITE);
  params.get(PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS,
             &num_of_add_threads_);
  if (num_of_add_threads_ <= 0) {
    LOG_ERROR("Wrong parameter. %s must be set greater than 0.",
              PARAM_MIXED_STREAMER_REDUCER_NUM_OF_ADD_THREADS.c_str());
    return IndexError_InvalidArgument;
  }

  params_ = params;

  state_ = STATE_INITED;
  return 0;
}

int MixedStreamerReducer::cleanup(void) {
  streamers_.clear();
  target_streamer_->cleanup();

  target_builder_->cleanup();
  doc_cache_.clear();

  stats_.clear_attributes();
  state_ = STATE_UNINITED;
  return 0;
}

int MixedStreamerReducer::set_target_streamer_wiht_info(
    const IndexBuilder::Pointer builder, const IndexStreamer::Pointer streamer,
    const IndexConverter::Pointer converter,
    const IndexReformer::Pointer reformer,
    const IndexQueryMeta &original_query_meta) {
  if (state_ != STATE_INITED) {
    LOG_ERROR("Set target streamer after init");
    return IndexError_Uninitialized;
  }

  target_builder_ = builder;
  target_streamer_ = streamer;
  target_builder_converter_ = converter;
  target_streamer_reformer_ = reformer;
  original_query_meta_ = original_query_meta;

  is_sparse_ =
      target_streamer_->meta().meta_type() == IndexMeta::MetaType::MT_SPARSE;

  state_ = STATE_STREAMER_SET;
  return 0;
}

int MixedStreamerReducer::feed_streamer_with_reformer(
    IndexStreamer::Pointer streamer, const IndexReformer::Pointer reformer) {
  if (!(state_ == STATE_STREAMER_SET || state_ == STATE_FEED)) {
    LOG_ERROR("Set target streamer or feed before feed");
    return IndexError_Uninitialized;
  }

  if (!streamer) {
    LOG_ERROR("Streamer nullptr");
    return IndexError_InvalidArgument;
  }

  auto check_datatype = [&](const IndexMeta & /*target_meta*/,
                            const IndexMeta &source_meta) -> bool {
    if (!streamers_.empty()) {
      auto &last_meta = streamers_.back()->meta();
      return last_meta.data_type() == source_meta.data_type() &&
             last_meta.dimension() == source_meta.dimension() &&
             last_meta.unit_size() == source_meta.unit_size();
    }
    // TODO: check target meta
    return true;
  };

  auto check_other = [&](const IndexMeta &target_meta,
                         const IndexMeta &source_meta) -> bool {
    return target_meta.meta_type() == source_meta.meta_type();
    // when create a new index, there is a case that ip_flat merged into l2_hnsw
    // target_meta.metric_name() == source_meta.metric_name();
  };

  if (!(check_datatype(target_streamer_->meta(), streamer->meta()) &&
        check_other(target_streamer_->meta(), streamer->meta()))) {
    LOG_ERROR("Streamer meta mismatch");
    return IndexError_InvalidArgument;
  }

  if (streamers_.empty()) {
    is_target_and_source_same_reformer_ =
        target_streamer_->meta().reformer_name() ==
        streamer->meta().reformer_name();
  }

  streamers_.push_back(streamer);
  source_streamers_reformers_.push_back(reformer);

  state_ = STATE_FEED;
  return 0;
}

int MixedStreamerReducer::reduce(const IndexFilter &filter) {
  if (state_ != STATE_FEED) {
    LOG_ERROR("Feed streamers first");
    return IndexError_Uninitialized;
  }
  if (thread_pool_ == nullptr) {
    LOG_ERROR("Thread pool is not set");
    return IndexError_Uninitialized;
  }

  ailego::ElapsedTime timer;


  std::vector<int> add_results(num_of_add_threads_, -1);
  auto add_group = thread_pool_->make_group();

  std::vector<int> read_results(streamers_.size(), -1);
  // TODO: use id instead of key
  uint32_t id_offset = 0, next_id = 0;

  if (is_sparse_) {
    for (size_t i = 0; i < num_of_add_threads_; i++) {
      add_group->submit(ailego::Closure::New(
          this, &MixedStreamerReducer::add_sparse_vec, &add_results[i]));
    }

    for (size_t i = 0; i < streamers_.size(); i++) {
      // due to filter, producing can't be parallel
      read_results[i] = read_sparse_vec(i, filter, id_offset, &next_id);
      id_offset += streamers_[i]->create_sparse_provider()->count();
    }

    sparse_mt_list_.done();
  } else {
    for (size_t i = 0; i < num_of_add_threads_; i++) {
      add_group->submit(ailego::Closure::New(
          this, &MixedStreamerReducer::add_vec, &add_results[i]));
      // add_vec(&add_results[i]);
    }

    for (size_t i = 0; i < streamers_.size(); i++) {
      read_results[i] = read_vec(i, filter, id_offset, &next_id);
      id_offset += streamers_[i]->create_provider()->count();
    }

    mt_list_.done();
  }
  add_group->wait_finish();

  auto check_results = [](const std::vector<int> &results) -> bool {
    return std::all_of(std::begin(results), std::end(results),
                       [](int item) { return item == 0; });
  };

  if (!check_results(read_results)) {
    LOG_ERROR("Get vector from entities failed");
    return IndexError_Runtime;
  }

  if (!check_results(add_results)) {
    LOG_ERROR("add vector failed");
    return IndexError_Runtime;
  }

  stats_.set_reduced_costtime(timer.seconds());
  state_ = STATE_REDUCE;
  if (target_builder_ != nullptr) {
    IndexBuild();
  }

  LOG_INFO("End brute force reduce. cost time: [%zu]s",
           (size_t)timer.seconds());
  return 0;
}

int MixedStreamerReducer::dump(const IndexDumper::Pointer &dumper) {
  LOG_INFO("Begin brute force reducer dump");

  if (state_ != STATE_REDUCE) {
    LOG_WARN("Reduce first before dump");
    return IndexError_NoReady;
  }

  ailego::ElapsedTime timer;
  int ret = 0;
  if (target_builder_ != nullptr) {
    target_builder_->dump(dumper);
  } else {
    target_streamer_->dump(dumper);
  }
  if (ret == IndexError_NotImplemented) {
    LOG_WARN("Dump index not implemented");
  } else if (ret < 0) {
    LOG_ERROR("Failed to dump in streamer");
  }

  return ret;
}

int MixedStreamerReducer::read_vec(size_t source_streamer_index,
                                   const IndexFilter &filter,
                                   const uint32_t id_offset,
                                   uint32_t *next_id) {
  const auto &streamer = streamers_[source_streamer_index];
  const auto &reformer = source_streamers_reformers_[source_streamer_index];
  const IndexQueryMeta source_streamer_query_meta{streamer->meta().data_type(),
                                                  streamer->meta().dimension()};

  bool need_revert = (target_streamer_->meta().reformer_name() !=
                          streamer->meta().reformer_name() &&
                      reformer != nullptr);
  if (target_builder_ && reformer) {
    need_revert = true;
  }

  IndexProvider::Pointer provider = streamer->create_provider();
  IndexProvider::Iterator::Pointer iterator = provider->create_iterator();

  while (iterator->is_valid()) {
    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
      LOG_DEBUG("read_vec cancelled.");
      return 0;
    }
    if (filter(iterator->key() + (uint64_t)id_offset)) {
      (*stats_.mutable_filtered_count())++;
      iterator->next();
      continue;
    }

    std::vector<uint8_t> bytes;
    if (need_revert) {
      std::string new_vector;
      if (reformer->revert(iterator->data(), source_streamer_query_meta,
                           &new_vector) != 0) {
        LOG_ERROR("Failed to revert the vector");
        return IndexError_Runtime;
      }
      bytes.resize(new_vector.size());
      memcpy(bytes.data(), new_vector.data(), bytes.size());
    } else {
      // TODO: eliminate the copy
      bytes.resize(provider->element_size());
      memcpy(bytes.data(), iterator->data(), bytes.size());
    }

    // TODO: use id instead of key
    if (!mt_list_.produce(VectorItem((*next_id)++, std::move(bytes)))) {
      LOG_ERROR("Produce vector to queue failed. key[%lu]",
                (size_t)iterator->key());
      return IndexError_Runtime;
    }
    iterator->next();
  }
  return 0;
}

void MixedStreamerReducer::add_vec(int *result) {
  if (target_builder_ != nullptr) {
    add_vec_with_builder(result);
    return;
  }
  ailego::ElapsedTime timer;
  auto target_streamer_context = target_streamer_->create_context();
  auto target_streamer_query_meta = IndexQueryMeta{
      IndexMeta::MetaType::MT_DENSE, target_streamer_->meta().data_type(),
      target_streamer_->meta().dimension()};
  const bool need_convert = (!is_target_and_source_same_reformer_) &&
                            target_streamer_reformer_ != nullptr;

  AILEGO_DEFER([&]() {
    // make producer quit
    mt_list_.done();
  });

  VectorItem vector_item;
  while (mt_list_.consume(&vector_item)) {
    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
      LOG_DEBUG("add_vec cancelled.");
      return;
    }

    const void *vector = vector_item.vec_.data();
    std::string new_vector;


    if (need_convert) {
      IndexQueryMeta new_meta;
      if (target_streamer_reformer_->convert(vector, original_query_meta_,
                                             &new_vector, &new_meta) != 0) {
        LOG_ERROR("Failed to transform vector");
        *result = IndexError_Runtime;
        return;
      }
      vector = new_vector.data();
    }
    // 1. no reformer: target_streamer_query_meta_ = original_query_meta_
    // 2. has reformer, matched(need_convert = false): use
    // target_streamer_query_meta_
    // 3. has reformer, not matched(need_convert = true): use
    // target_streamer_query_meta_


    // TODO: use id instead of key
    int ret = target_streamer_->add_with_id_impl(
        (uint32_t)vector_item.pkey_, vector, target_streamer_query_meta,
        target_streamer_context);
    if (ret != 0) {
      LOG_ERROR("Insert target streamer failed. ret[%d] reason[%s] pkey[%zu]",
                ret, IndexError::What(ret), (size_t)vector_item.pkey_);
      *result = ret;
      return;
    }
  }

  *result = 0;
  LOG_DEBUG("add_vec. cost time: [%zu]s", (size_t)timer.seconds());
  return;
}

void MixedStreamerReducer::add_vec_with_builder(int *result) {
  ailego::ElapsedTime timer;
  auto target_streamer_query_meta = IndexQueryMeta{
      IndexMeta::MetaType::MT_DENSE, target_streamer_->meta().data_type(),
      target_streamer_->meta().dimension()};

  AILEGO_DEFER([&]() {
    // make producer quit
    mt_list_.done();
  });

  VectorItem vector_item;
  while (mt_list_.consume(&vector_item)) {
    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
      LOG_DEBUG("add_vec cancelled.");
      return;
    }

    const void *vector = vector_item.vec_.data();
    std::string out_vector_buffer = std::string(
        static_cast<const char *>(vector),
        original_query_meta_.dimension() * original_query_meta_.unit_size());
    PushToDocCache(original_query_meta_, (uint32_t)vector_item.pkey_,
                   out_vector_buffer);
  }

  *result = 0;
  LOG_DEBUG("add_vec. cost time: [%zu]s", (size_t)timer.seconds());
  return;
}

void MixedStreamerReducer::add_sparse_vec(int *result) {
  ailego::ElapsedTime timer;
  auto target_streamer_context = target_streamer_->create_context();
  auto target_streamer_query_meta = IndexQueryMeta{
      IndexMeta::MetaType::MT_SPARSE,
      target_streamer_->meta().data_type(),
  };

  auto need_convert = !is_target_and_source_same_reformer_ &&
                      target_streamer_reformer_ != nullptr;

  AILEGO_DEFER([&]() {
    // make producer quit
    sparse_mt_list_.done();
  });

  SparseVectorItem sparse_vector_item;
  while (sparse_mt_list_.consume(&sparse_vector_item)) {
    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
      LOG_DEBUG("add_sparse_vec cancelled.");
      return;
    }
    auto sparse_count = sparse_vector_item.sparse_indices_.size();
    auto indices = sparse_vector_item.sparse_indices_.data();
    auto values = sparse_vector_item.sparse_values_.data();

    std::string converted_sparse_values_buffer;
    if (need_convert) {
      IndexQueryMeta new_meta;
      if (target_streamer_reformer_->convert(
              sparse_count, indices, values, original_query_meta_,
              &converted_sparse_values_buffer, &new_meta) != 0) {
        LOG_ERROR("Failed to transform vector");
        *result = IndexError_Runtime;
        return;
      }
      values = converted_sparse_values_buffer.data();
      target_streamer_query_meta = new_meta;
    }

    // TODO: use id instead of key
    int ret = target_streamer_->add_with_id_impl(
        (uint32_t)sparse_vector_item.pkey_, sparse_count, indices, values,
        target_streamer_query_meta, target_streamer_context);
    if (ret != 0) {
      LOG_ERROR("Insert target streamer failed. ret[%d] reason[%s] pkey[%zu]",
                ret, IndexError::What(ret), (size_t)sparse_vector_item.pkey_);
      *result = ret;
      return;
    }
  }

  *result = 0;
  LOG_DEBUG("add_sparse_vec. cost time: [%zu]s", (size_t)timer.seconds());
  return;
}


int MixedStreamerReducer::read_sparse_vec(size_t source_streamer_index,
                                          const IndexFilter &filter,
                                          const uint32_t id_offset,
                                          uint32_t *next_id) {
  const auto &streamer = streamers_[source_streamer_index];
  const auto &reformer = source_streamers_reformers_[source_streamer_index];
  const bool need_revert =
      !is_target_and_source_same_reformer_ && reformer != nullptr;

  IndexStreamer::SparseProvider::Pointer provider =
      streamer->create_sparse_provider();
  IndexStreamer::SparseProvider::Iterator::Pointer iterator =
      provider->create_iterator();

  while (iterator->is_valid()) {
    if (stop_flag_ != nullptr && stop_flag_->load(std::memory_order_relaxed)) {
      LOG_DEBUG("read_sparse_vec cancelled.");
      return 0;
    }
    if (filter(iterator->key() + (uint64_t)id_offset)) {
      (*stats_.mutable_filtered_count())++;
      iterator->next();
      continue;
    }

    auto sparse_count = iterator->sparse_count();
    std::vector<uint32_t> sparse_indices(sparse_count);
    std::string sparse_values;

    if (need_revert) {
      std::string new_sparse_values;
      if (reformer->revert(iterator->sparse_count(), iterator->sparse_indices(),
                           iterator->sparse_data(),
                           {
                               IndexMeta::MetaType::MT_SPARSE,
                               streamer->meta().data_type(),
                           },
                           &new_sparse_values) != 0) {
        LOG_ERROR("Failed to revert the sparse vector");
        return IndexError_Runtime;
      }
      sparse_values = std::move(new_sparse_values);
    } else {
      sparse_values.resize(sparse_count * streamer->meta().unit_size());
      memcpy(sparse_values.data(), iterator->sparse_data(),
             sparse_values.size());
    }

    // TODO: eliminate the copy
    memcpy(sparse_indices.data(), iterator->sparse_indices(),
           sparse_indices.size() * sizeof(uint32_t));

    // TODO: use id instead of key
    if (!sparse_mt_list_.produce(SparseVectorItem((*next_id)++,
                                                  std::move(sparse_indices),
                                                  std::move(sparse_values)))) {
      LOG_ERROR("Produce vector to queue failed. key[%lu]",
                (size_t)iterator->key());
      return IndexError_Runtime;
    }
    iterator->next();
  }
  return 0;
}

void MixedStreamerReducer::PushToDocCache(const IndexQueryMeta &meta,
                                          uint32_t doc_id, std::string &doc) {
  std::lock_guard<std::mutex> lock(mutex_);
  while (doc_cache_.size() <= doc_id) {
    std::string fake_data(meta.dimension() * meta.unit_size(), 0);
    doc_cache_.push_back(std::make_pair(kInvalidKey, fake_data));
  }
  doc_cache_[doc_id] = std::make_pair(doc_id, doc);
}

int MixedStreamerReducer::IndexBuild() {
  IndexHolder::Pointer target_holder;
  if (original_query_meta_.data_type() == core::IndexMeta::DataType::DT_FP16) {
    auto holder = std::make_shared<
        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_FP16>>(
        original_query_meta_.dimension());
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<uint16_t> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    target_holder = holder;
  } else if (original_query_meta_.data_type() ==
             core::IndexMeta::DataType::DT_FP32) {
    auto holder = std::make_shared<
        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_FP32>>(
        original_query_meta_.dimension());
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<float> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    target_holder = holder;
  } else if (original_query_meta_.data_type() ==
             core::IndexMeta::DataType::DT_INT8) {
    auto holder = std::make_shared<
        zvec::core::MultiPassIndexHolder<core::IndexMeta::DataType::DT_INT8>>(
        original_query_meta_.dimension());
    for (auto doc : doc_cache_) {
      ailego::NumericalVector<uint8_t> vec(doc.second);
      if (doc.first == kInvalidKey) {
        continue;
      }
      if (!holder->emplace(doc.first, vec)) {
        LOG_ERROR("Failed to add vector");
        return core::IndexError_Runtime;
      }
    }
    target_holder = holder;
  } else {
    LOG_ERROR("data_type is not support");
    return core::IndexError_Runtime;
  }
  if (target_builder_converter_) {
    core::IndexConverter::TrainAndTransform(target_builder_converter_,
                                            target_holder);
    target_holder = target_builder_converter_->result();
  }
  target_builder_->train(target_holder);
  target_builder_->build(target_holder);
  return 0;
}

INDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(MixedStreamerReducer,
                                              MixedStreamerReducer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/mixed_reducer/mixed_streamer_reducer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <vector>
#include <ailego/parallel/lock.h>
#include <ailego/parallel/multi_thread_list.h>
#include <utility/sparse_utility.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_reducer.h>
#include <zvec/core/framework/index_reformer.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {


class MixedStreamerReducer : public IndexStreamerReducer {
 public:
  //! Constructor
  MixedStreamerReducer(void) {}

  //! Initialize Reducer
  int init(const ailego::Params &params) override;

  //! Cleanup Reducer
  int cleanup(void) override;

  //! Reduce operator (with filter)
  int reduce(const IndexFilter &filter) override;

  //! Dump index by dumper
  int dump(const IndexDumper::Pointer &dumper) override;

 public:  // StreamerReducer's unique methods
  int set_target_streamer_wiht_info(
      const IndexBuilder::Pointer builder,
      const IndexStreamer::Pointer streamer,
      const IndexConverter::Pointer converter,
      const IndexReformer::Pointer reformer,
      const IndexQueryMeta &original_query_meta) override;
  // feed_streamer
  int feed_streamer_with_reformer(
      IndexStreamer::Pointer streamer,
      const IndexReformer::Pointer reformer) override;

 private:
  int read_vec(size_t source_streamer_index, const IndexFilter &filter,
               const uint32_t id_offset, uint32_t *next_id);
  void add_vec(int *result);
  void add_vec_with_builder(int *result);
  int read_sparse_vec(size_t source_streamer_index, const IndexFilter &filter,
                      const uint32_t id_offset, uint32_t *next_id);
  void add_sparse_vec(int *result);

  void PushToDocCache(const IndexQueryMeta &meta, uint32_t doc_id,
                      std::string &doc);
  int IndexBuild();

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

 private:
  enum State {
    STATE_UNINITED,
    STATE_INITED,
    STATE_STREAMER_SET,
    STATE_FEED,
    STATE_REDUCE
  };

  bool enable_pk_rewrite_{false};
  bool is_sparse_{false};

  Stats stats_{};
  State state_{STATE_UNINITED};

  size_t num_of_add_threads_{0};
  ailego::MultiThreadList<VectorItem> mt_list_;
  ailego::MultiThreadList<SparseVectorItem> sparse_mt_list_;


  ailego::Params params_;
  IndexStreamer::Pointer target_streamer_{nullptr};
  IndexReformer::Pointer target_streamer_reformer_{nullptr};
  bool is_target_and_source_same_reformer_{false};
  IndexQueryMeta original_query_meta_{};

  std::vector<IndexStreamer::Pointer> streamers_;
  std::vector<IndexReformer::Pointer> source_streamers_reformers_;

  IndexBuilder::Pointer target_builder_{nullptr};
  IndexConverter::Pointer target_builder_converter_{nullptr};
  std::mutex mutex_{};
  std::vector<std::pair<uint64_t, std::string>> doc_cache_;
  const uint64_t kInvalidKey = std::numeric_limits<uint64_t>::max();
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
        NAME core_quantizer 
        STATIC SHARED STRICT ALWAYS_LINK
        SRCS *.cc
        LIBS zvec_ailego core_framework
        INCS . ${PROJECT_ROOT_DIR}/src/core
        VERSION "${PROXIMA_ZVEC_VERSION}"
)


================================================
FILE: src/core/quantizer/binary_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iterator>
#include <ailego/algorithm/binary_quantizer.h>
#include <ailego/pattern/defer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

/*! Binary Quantizer Converter Holder
 */
class BinaryConverterHolder : public IndexHolder {
 public:
  /*! Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Constructor
    Iterator(const BinaryConverterHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(ailego::BinaryQuantizer::EncodedSizeInBinary32(
                      owner->dimension()),
                  0),
          front_iter_(std::move(iter)),
          quantizer_(owner->quantizer_),
          dim_{owner->dimension()} {
      this->encode_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->encode_record();
    }

   private:
    //! Encode the data by quantizer
    inline void encode_record(void) {
      if (front_iter_->is_valid()) {
        const float *vec = reinterpret_cast<const float *>(front_iter_->data());
        quantizer_->encode(vec, dim_ / 2, buffer_.data());
      }
    }

    //! Members
    std::vector<uint32_t> buffer_{};
    IndexHolder::Iterator::Pointer front_iter_{};
    std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};
    size_t dim_{0u};
  };

  //! Constructor
  BinaryConverterHolder(IndexHolder::Pointer front,
                        std::shared_ptr<ailego::BinaryQuantizer> quantizer)
      : front_(std::move(front)), quantizer_(std::move(quantizer)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return ailego::BinaryQuantizer::EncodedSizeInBinary32(front_->dimension()) *
           32u;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY32;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_BINARY32,
                                    this->dimension());
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter
               ? IndexHolder::Iterator::Pointer(
                     new BinaryConverterHolder::Iterator(this, std::move(iter)))
               : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Members
  IndexHolder::Pointer front_{};
  std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};
};

/*! Binary Converter
 */
class BinaryConverter : public IndexConverter {
 public:
  //! Destructor
  virtual ~BinaryConverter(void) {}

  //! Initialize Converter
  int init(const IndexMeta &mt, const ailego::Params &params) override {
    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||
                        mt.unit_size() != IndexMeta::UnitSizeof(
                                              IndexMeta::DataType::DT_FP32))) {
      LOG_ERROR("Unsupported type %d with unit size %u.", mt.data_type(),
                mt.unit_size());
      return IndexError_Unsupported;
    }

    meta_ = mt;

    ailego::Params reformer_params;
    meta_.set_reformer("BinaryReformer", 0, reformer_params);

    if (meta_.metric_name() != "InnerProduct") {
      LOG_ERROR("Only InnerProduct Supported");
      return IndexError_Unsupported;
    }

    dimension_ = meta_.dimension();

    size_t dim =
        ailego::BinaryQuantizer::EncodedSizeInBinary32(dimension_) * 32u;

    meta_.set_metric("Hamming", 0, ailego::Params());
    meta_.set_converter("BinaryConverter", 0, params);
    meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dim);

    return 0;
  }

  //! Cleanup Converter
  int cleanup(void) override {
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer holder) override {
    if (holder->dimension() != dimension_ ||
        holder->data_type() != IndexMeta::DataType::DT_FP32) {
      return IndexError_Mismatch;
    }

    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
        holder->dimension() != dimension_) {
      return IndexError_Mismatch;
    }

    if (holder->count() > 0) {
      *stats_.mutable_transformed_count() += holder->count();
    }
    holder_ =
        std::make_shared<BinaryConverterHolder>(std::move(holder), quantizer_);
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  //! Members
  IndexMeta meta_{};
  IndexHolder::Pointer holder_{};
  std::shared_ptr<ailego::BinaryQuantizer> quantizer_{};
  Stats stats_{};
  size_t dimension_{0u};
};

INDEX_FACTORY_REGISTER_CONVERTER(BinaryConverter);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/quantizer/binary_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/algorithm/binary_quantizer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

/*! Binary Reformer
 */
class BinaryReformer : public IndexReformer {
 public:
  //! Initialize Reformer
  int init(const ailego::Params & /*params*/) override {
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    size_t dim =
        ailego::BinaryQuantizer::EncodedSizeInBinary32(qmeta.dimension()) * 32u;
    out->resize(
        IndexMeta::ElementSizeof(IndexMeta::DataType::DT_BINARY32, dim));
    const float *vec = reinterpret_cast<const float *>(query);

    quantizer_.encode(vec, qmeta.dimension(),
                      reinterpret_cast<uint32_t *>(&(*out)[0]));
    *ometa = qmeta;
    ometa->set_meta(IndexMeta::DataType::DT_BINARY32, dim);

    return 0;
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();
    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    size_t dim =
        ailego::BinaryQuantizer::EncodedSizeInBinary32(qmeta.dimension()) * 32u;
    out->resize(count * IndexMeta::ElementSizeof(
                            IndexMeta::DataType::DT_BINARY32, dim));
    const float *vec = reinterpret_cast<const float *>(query);

    quantizer_.encode(vec, qmeta.dimension() * count,
                      reinterpret_cast<uint32_t *>(&(*out)[0]));
    *ometa = qmeta;
    ometa->set_meta(IndexMeta::DataType::DT_BINARY32, dim);

    return 0;
  }

  //! Normalize results
  int normalize(const void *, const IndexQueryMeta &,
                IndexDocumentList &) const override {
    return 0;
  }

 private:
  //! Members
  ailego::BinaryQuantizer quantizer_{};
};

INDEX_FACTORY_REGISTER_REFORMER(BinaryReformer);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/quantizer/cosine_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iterator>
#include <ailego/algorithm/integer_quantizer.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math/normalizer.h>
#include <ailego/pattern/defer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>
#include "record_quantizer.h"
#include "../metric/metric_params.h"

namespace zvec {
namespace core {

/*! Cosine Converter Holder
 */
class CosineConverterHolder : public IndexHolder {
 public:
  static constexpr size_t NORM_SIZE = sizeof(float);

  class Iterator : public IndexHolder::Iterator {
   public:
    //! Constructor
    Iterator(const CosineConverterHolder *owner,
             IndexHolder::Iterator::Pointer &&iter,
             IndexMeta::DataType original_type, IndexMeta::DataType type)
        : owner_(owner),
          front_iter_(std::move(iter)),
          original_type_(original_type),
          type_(type) {
      dimension_ = owner_->dimension(),
      original_dimension_ = dimension_ - ExtraDimension(type_);
      size_t element_size = owner->element_size();

      if (original_type_ == IndexMeta::DataType::DT_FP16) {
        normalize_buffer_.resize(dimension_ * sizeof(ailego::Float16));
      } else {  // original_type_ == IndexMeta::DataType::DT_FP32
        normalize_buffer_.resize(dimension_ * sizeof(float));

        if (type_ == IndexMeta::DataType::DT_FP16 ||
            type_ == IndexMeta::DataType::DT_INT4 ||
            type_ == IndexMeta::DataType::DT_INT8) {
          buffer_.resize(element_size, 0);
        }
      }

      this->convert_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return type_ == original_type_ ? normalize_buffer_.data()
                                     : buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->convert_record();
    }

   private:
    //! Encode the data by quantizer
    void convert_record(void) {
      if (!front_iter_->is_valid()) {
        return;
      }

      size_t element_size = owner_->element_size();
      size_t original_element_size =
          IndexMeta::ElementSizeof(original_type_, original_dimension_);

      if (original_type_ == IndexMeta::DataType::DT_FP16) {
        ::memcpy(reinterpret_cast<char *>(&normalize_buffer_[0]),
                 reinterpret_cast<const char *>(front_iter_->data()),
                 original_element_size);

        ailego::Float16 *buf =
            reinterpret_cast<ailego::Float16 *>(&normalize_buffer_[0]);

        float norm = 0.0f;
        ailego::Normalizer<ailego::Float16>::L2(buf, original_dimension_,
                                                &norm);

        ::memcpy(reinterpret_cast<uint16_t *>(&normalize_buffer_[0]) +
                     original_dimension_,
                 &norm, NORM_SIZE);
      } else {  // original_type_ == IndexMeta::DataType::DT_FP32
        ::memcpy(reinterpret_cast<char *>(&normalize_buffer_[0]),
                 reinterpret_cast<const char *>(front_iter_->data()),
                 original_element_size);

        float *buf = reinterpret_cast<float *>(&normalize_buffer_[0]);

        float norm = 0.0f;
        ailego::Normalizer<float>::L2(buf, original_dimension_, &norm);

        if (type_ == IndexMeta::DataType::DT_FP32) {
          ::memcpy(reinterpret_cast<float *>(&normalize_buffer_[0]) +
                       original_dimension_,
                   &norm, NORM_SIZE);
        } else if (type_ == IndexMeta::DataType::DT_FP16) {
          ailego::FloatHelper::ToFP16(
              buf, original_dimension_,
              reinterpret_cast<uint16_t *>(&buffer_[0]));

          ::memcpy(
              reinterpret_cast<uint16_t *>(&buffer_[0]) + original_dimension_,
              &norm, NORM_SIZE);
        } else if (type_ == IndexMeta::DataType::DT_INT4 ||
                   type_ == IndexMeta::DataType::DT_INT8) {
          RecordQuantizer::quantize_record(
              reinterpret_cast<const float *>(normalize_buffer_.data()),
              original_dimension_, type_, false, &buffer_[0]);

          ::memcpy(reinterpret_cast<uint8_t *>(&buffer_[0]) + element_size -
                       NORM_SIZE,
                   &norm, NORM_SIZE);
        }
      }
    }

    //! Members
    const CosineConverterHolder *owner_{nullptr};
    std::string buffer_{};
    std::string normalize_buffer_{};
    IndexHolder::Iterator::Pointer front_iter_{};
    size_t dimension_{0u};
    size_t original_dimension_{0u};
    IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};
    IndexMeta::DataType type_{IndexMeta::DataType::DT_UNDEFINED};
  };

  //! Constructor
  CosineConverterHolder(IndexHolder::Pointer front,
                        IndexMeta::DataType original_type,
                        IndexMeta::DataType type)
      : front_(std::move(front)),
        original_type_(original_type),
        type_(type),
        dimension_(front_->dimension()) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_ + ExtraDimension(type_);
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return type_;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(this->data_type(), this->dimension());
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();

    return iter ? IndexHolder::Iterator::Pointer(
                      new CosineConverterHolder::Iterator(this, std::move(iter),
                                                          this->original_type_,
                                                          this->type_))
                : IndexHolder::Iterator::Pointer();
  }

  static size_t ExtraDimension(IndexMeta::DataType type) {
    // The extra quantized params storage size to save for each vector
    if (type == IndexMeta::DataType::DT_INT4)
      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)
    else if (type == IndexMeta::DataType::DT_INT8)
      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)
    else if (type == IndexMeta::DataType::DT_FP16)
      return 2;  // 2* sizeof(float) / sizeof(FT_FP16)
    else if (type == IndexMeta::DataType::DT_FP32) {
      return 1;  // sizeof(float) / sizeof(FT_FP32)
    } else {
      return 0;
    }
  }

 private:
  //! Members
  IndexHolder::Pointer front_{};
  IndexMeta::DataType original_type_{};
  IndexMeta::DataType type_{};
  uint32_t dimension_{0};
};

/*! Converter of Cosine
 */
class CosineConverter : public IndexConverter {
 public:
  static constexpr size_t NORM_SIZE = sizeof(float);

 public:
  //! Constructor
  CosineConverter(IndexMeta::DataType original_type,
                  IndexMeta::DataType dst_type)
      : original_type_(original_type), dst_type_(dst_type) {}

  //! Constructor
  CosineConverter(IndexMeta::DataType dst_type)
      : original_type_(IndexMeta::DataType::DT_FP32), dst_type_(dst_type) {}

  CosineConverter()
      : original_type_(IndexMeta::DataType::DT_UNDEFINED),
        dst_type_(IndexMeta::DataType::DT_UNDEFINED) {}

  //! Destructor
  ~CosineConverter() override {}

  //! Initialize Converter
  int init(const IndexMeta &index_meta, const ailego::Params &params) override {
    meta_ = index_meta;

    IndexMeta::DataType type = meta_.data_type();

    if (type != original_type_) {
      LOG_ERROR("Orignal Type Not Matched: (%d, %d)", type, original_type_);
      return IndexError_Mismatch;
    }

    if (meta_.unit_size() != IndexMeta::UnitSizeof(type)) {
      LOG_ERROR("Unsupported type %d with unit size %u", type,
                meta_.unit_size());
      return IndexError_Unsupported;
    }

    ailego::Params reformer_params;

    if (dst_type_ == IndexMeta::DataType::DT_INT8) {
      meta_.set_converter("CosineInt8Converter", 0, params);
      meta_.set_reformer("CosineInt8Reformer", 0, reformer_params);

      ailego::Params metric_params;
      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,
                        index_meta.metric_name());
      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,
                        index_meta.metric_params());
      meta_.set_metric("QuantizedInteger", 0, metric_params);
    } else if (dst_type_ == IndexMeta::DataType::DT_INT4) {
      if (index_meta.dimension() % 2) {
        LOG_ERROR("Unsupported dimension %u for INT4 type",
                  index_meta.dimension());
        return IndexError_Unsupported;
      }

      meta_.set_converter("CosineInt4Converter", 0, params);
      meta_.set_reformer("CosineInt4Reformer", 0, reformer_params);

      ailego::Params metric_params;
      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,
                        index_meta.metric_name());
      metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,
                        index_meta.metric_params());
      meta_.set_metric("QuantizedInteger", 0, metric_params);
    } else if (dst_type_ == IndexMeta::DataType::DT_FP16) {
      if (original_type_ == IndexMeta::DataType::DT_FP16) {
        meta_.set_reformer("CosineHalfFloatReformer", 0, reformer_params);
        meta_.set_converter("CosineHalfFloatConverter", 0, params);
      } else {
        meta_.set_reformer("CosineFp16Reformer", 0, reformer_params);
        meta_.set_converter("CosineFp16Converter", 0, params);
      }
    } else {
      dst_type_ = type;

      meta_.set_reformer("CosineFp32Reformer", 0, reformer_params);
      meta_.set_converter("CosineFp32Converter", 0, params);
    }

    meta_.set_meta(dst_type_, meta_.dimension() + ExtraDimension(dst_type_));

    return 0;
  }

  //! Cleanup Converter
  virtual int cleanup(void) override {
    *stats_.mutable_transformed_count() = 0;
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer /*holder*/) override {
    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->data_type() != original_type_ ||
        holder->dimension() != meta_.dimension() - ExtraDimension(dst_type_)) {
      return IndexError_Mismatch;
    }

    *stats_.mutable_transformed_count() += holder->count();

    holder_ = std::make_shared<CosineConverterHolder>(
        holder, holder->data_type(), dst_type_);
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer & /*dumper*/) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

  static size_t ExtraDimension(IndexMeta::DataType type) {
    // The extra quantized params storage size to save for each vector
    if (type == IndexMeta::DataType::DT_INT4)
      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)
    else if (type == IndexMeta::DataType::DT_INT8)
      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)
    else if (type == IndexMeta::DataType::DT_FP16)
      return 2;  // sizeof(float) / sizeof(FT_FP16)
    else if (type == IndexMeta::DataType::DT_FP32) {
      return 1;  // sizeof(float) / sizeof(FT_FP32)
    } else {
      return 0;
    }
  }

  //! Members
  IndexMeta meta_{};
  Stats stats_{};
  IndexHolder::Pointer holder_{};
  IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};
  IndexMeta::DataType dst_type_{IndexMeta::DataType::DT_UNDEFINED};
};

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineNormalizeConverter,
                                       CosineConverter,
                                       IndexMeta::DataType::DT_FP32);

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineFp32Converter, CosineConverter,
                                       IndexMeta::DataType::DT_FP32);

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineFp16Converter, CosineConverter,
                                       IndexMeta::DataType::DT_FP16);

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineInt8Converter, CosineConverter,
                                       IndexMeta::DataType::DT_INT8);

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineInt4Converter, CosineConverter,
                                       IndexMeta::DataType::DT_INT4);

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(CosineHalfFloatConverter,
                                       CosineConverter,
                                       IndexMeta::DataType::DT_FP16,
                                       IndexMeta::DataType::DT_FP16);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/cosine_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <ailego/algorithm/integer_quantizer.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math/normalizer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>
#include "record_quantizer.h"

namespace zvec {
namespace core {

/*! Reformer of Cosine
 */
class CosineReformer : public IndexReformer {
 public:
  static constexpr size_t NORM_SIZE = sizeof(float);

  //! Constructor
  CosineReformer(IndexMeta::DataType original_type,
                 IndexMeta::DataType dst_type)
      : original_type_(original_type), dst_type_(dst_type) {}

  //! Constructor
  CosineReformer(IndexMeta::DataType dst_type)
      : original_type_(IndexMeta::DataType::DT_FP32), dst_type_(dst_type) {}

  //! Constructor
  CosineReformer()
      : original_type_(IndexMeta::DataType::DT_UNDEFINED),
        dst_type_(IndexMeta::DataType::DT_UNDEFINED) {}

  //! Initialize Reformer
  int init(const ailego::Params & /*params*/) override {
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType type = qmeta.data_type();

    if (type == IndexMeta::DataType::DT_FP32) {
      if (dst_type_ != IndexMeta::DataType::DT_FP32 &&
          dst_type_ != IndexMeta::DataType::DT_FP16 &&
          dst_type_ != IndexMeta::DataType::DT_INT4 &&
          dst_type_ != IndexMeta::DataType::DT_INT8) {
        return IndexError_Unsupported;
      }

      if (qmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }

      *ometa = qmeta;
      ometa->set_meta(dst_type_, qmeta.dimension() + ExtraDimension(dst_type_));
      out->resize(ometa->element_size());

      float norm = 0.0f;
      size_t origin_dimension = qmeta.dimension();
      std::string normalized_buffer(reinterpret_cast<const char *>(query),
                                    qmeta.element_size());

      float *buf = reinterpret_cast<float *>(&normalized_buffer[0]);

      ailego::Normalizer<float>::L2(buf, origin_dimension, &norm);

      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]) + ometa->element_size() -
                   NORM_SIZE,
               &norm, NORM_SIZE);

      if (dst_type_ == IndexMeta::DataType::DT_FP32) {
        ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]), buf,
                 ometa->element_size() - NORM_SIZE);
      } else if (dst_type_ == IndexMeta::DataType::DT_FP16) {
        RecordQuantizer::quantize_record(buf, origin_dimension, dst_type_,
                                         false, &(*out)[0]);
      } else if (dst_type_ == IndexMeta::DataType::DT_INT4 ||
                 dst_type_ == IndexMeta::DataType::DT_INT8) {
        RecordQuantizer::quantize_record(buf, qmeta.dimension(), dst_type_,
                                         false, &(*out)[0]);
      }
    } else if (type == IndexMeta::DataType::DT_FP16) {
      if (dst_type_ != IndexMeta::DataType::DT_FP16) {
        return IndexError_Unsupported;
      }

      if (qmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }

      *ometa = qmeta;
      ometa->set_meta(
          IndexMeta::DataType::DT_FP16,
          qmeta.dimension() + ExtraDimension(IndexMeta::DataType::DT_FP16));
      out->resize(ometa->element_size());

      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]), query,
               ometa->element_size() - NORM_SIZE);

      float norm = 0.0f;
      auto data = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);
      ailego::Normalizer<ailego::Float16>::L2(
          data,
          ometa->dimension() - ExtraDimension(IndexMeta::DataType::DT_FP16),
          &norm);

      ::memcpy(reinterpret_cast<uint8_t *>(&(*out)[0]) + ometa->element_size() -
                   NORM_SIZE,
               &norm, NORM_SIZE);
    } else {
      return IndexError_Unsupported;
    }

    return 0;
  }

  //! Transform queries
  int transform(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                uint32_t /*count*/, std::string * /*out*/,
                IndexQueryMeta * /*ometa*/) const override {
    return IndexError_Unsupported;
  }

  //! Convert records
  int convert(const void * /*records*/, const IndexQueryMeta & /*rmeta*/,
              uint32_t /*count*/, std::string * /*out*/,
              IndexQueryMeta * /*ometa*/) const override {
    return IndexError_Unsupported;
  }

  //! Normalize results
  int normalize(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                IndexDocumentList & /*result*/) const override {
    return 0;
  }

  bool need_revert() const override {
    return true;
  }

  int revert(const void *in, const IndexQueryMeta &qmeta,
             std::string *out) const override {
    IndexMeta::DataType type = qmeta.data_type();

    if (type != IndexMeta::DataType::DT_FP32 &&
        type != IndexMeta::DataType::DT_INT8 &&
        type != IndexMeta::DataType::DT_INT4 &&
        type != IndexMeta::DataType::DT_FP16) {
      return IndexError_Unsupported;
    }

    size_t dimension = qmeta.dimension() - ExtraDimension(dst_type_);
    out->resize(dimension * IndexMeta::UnitSizeof(original_type_));

    float norm;
    ::memcpy(&norm,
             reinterpret_cast<const uint8_t *>(in) + qmeta.element_size() -
                 NORM_SIZE,
             NORM_SIZE);

    if (type == IndexMeta::DataType::DT_FP32) {
      if (dst_type_ != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }

      float *out_buf = reinterpret_cast<float *>(&(*out)[0]);
      const float *in_buf = reinterpret_cast<const float *>(in);

      this->denormalize(in_buf, out_buf, qmeta, norm);
    } else if (type == IndexMeta::DataType::DT_FP16) {
      if (dst_type_ != IndexMeta::DataType::DT_FP16) {
        return IndexError_Unsupported;
      }

      if (original_type_ != IndexMeta::DataType::DT_FP16 &&
          original_type_ != IndexMeta::DataType::DT_FP32) {
        return IndexError_Unsupported;
      }

      if (original_type_ == IndexMeta::DataType::DT_FP32) {
        float *out_buf = reinterpret_cast<float *>(&(*out)[0]);
        RecordQuantizer::unquantize_record(in, dimension, dst_type_, out_buf);

        this->denormalize(out_buf, out_buf, qmeta, norm);
      } else {
        ailego::Float16 *out_buf =
            reinterpret_cast<ailego::Float16 *>(&(*out)[0]);
        const ailego::Float16 *in_buf =
            reinterpret_cast<const ailego::Float16 *>(in);
        this->denormalize(in_buf, out_buf, qmeta, norm);
      }
    } else if (type == IndexMeta::DataType::DT_INT8 ||
               type == IndexMeta::DataType::DT_INT4) {
      if (dst_type_ != IndexMeta::DataType::DT_INT8 &&
          dst_type_ != IndexMeta::DataType::DT_INT4) {
        return IndexError_Unsupported;
      }

      float *out_buf = reinterpret_cast<float *>(&(*out)[0]);
      RecordQuantizer::unquantize_record(in, dimension, dst_type_, out_buf);

      this->denormalize(out_buf, out_buf, qmeta, norm);
    }

    return 0;
  }

 private:
  template <typename T>
  void denormalize(const T *in, T *out, const IndexQueryMeta &qmeta,
                   float norm) const {
    size_t origin_dim = qmeta.dimension() - ExtraDimension(dst_type_);

    for (size_t d = 0; d < origin_dim; ++d) {
      out[d] = in[d] * norm;
    }
  }

  static size_t ExtraDimension(IndexMeta::DataType type) {
    // The extra quantized params storage size to save for each vector
    if (type == IndexMeta::DataType::DT_INT4)
      return 40;  // 5 * sizeof(float) / sizeof(FT_INT4)
    else if (type == IndexMeta::DataType::DT_INT8)
      return 24;  // (5 * sizeof(float) + sizeof(int)) / sizeof(FT_INT8)
    else if (type == IndexMeta::DataType::DT_FP16)
      return 2;  // sizeof(float) / sizeof(FT_FP16)
    else if (type == IndexMeta::DataType::DT_FP32) {
      return 1;  // sizeof(float) / sizeof(FT_FP32)
    } else {
      return 0;
    }
  }

  //! Members
  IndexMeta::DataType original_type_{IndexMeta::DataType::DT_UNDEFINED};
  IndexMeta::DataType dst_type_{IndexMeta::DataType::DT_UNDEFINED};
};

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineNormalizeReformer, CosineReformer,
                                      IndexMeta::DataType::DT_FP32);

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineFp32Reformer, CosineReformer,
                                      IndexMeta::DataType::DT_FP32);

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineFp16Reformer, CosineReformer,
                                      IndexMeta::DataType::DT_FP16);

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineInt8Reformer, CosineReformer,
                                      IndexMeta::DataType::DT_INT8);

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineInt4Reformer, CosineReformer,
                                      IndexMeta::DataType::DT_INT4);

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(CosineHalfFloatReformer, CosineReformer,
                                      IndexMeta::DataType::DT_FP16,
                                      IndexMeta::DataType::DT_FP16);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/half_float_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/core/framework/index_framework.h>

namespace zvec {
namespace core {

/*! Half Float Holder
 */
class HalfFloatHolder : public IndexHolder {
 public:
  /*! Half Float Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const HalfFloatHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(owner->dimension(), 0), front_iter_(std::move(iter)) {
      this->transform_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->transform_record();
    }

   private:
    inline void transform_record(void) {
      if (front_iter_->is_valid()) {
        ailego::FloatHelper::ToFP16(
            reinterpret_cast<const float *>(front_iter_->data()),
            buffer_.size(), buffer_.data());
      }
    }

    std::vector<uint16_t> buffer_{};
    IndexHolder::Iterator::Pointer front_iter_{};
  };

  //! Constructor
  HalfFloatHolder(IndexHolder::Pointer front) : front_(std::move(front)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return front_->dimension();
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,
                                    front_->dimension());
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter ? IndexHolder::Iterator::Pointer(
                      new HalfFloatHolder::Iterator(this, std::move(iter)))
                : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Disable them
  HalfFloatHolder(void) = delete;

  //! Members
  IndexHolder::Pointer front_{};
};

/*! Half Float Converter
 */
class HalfFloatConverter : public IndexConverter {
 public:
  //! Destructor
  virtual ~HalfFloatConverter(void) {}

  //! Initialize Converter
  int init(const IndexMeta &mt, const ailego::Params &) override {
    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||
                        mt.unit_size() != sizeof(float))) {
      LOG_ERROR("Unsupported type %d with unit size %u.", mt.data_type(),
                mt.unit_size());
      return IndexError_Unsupported;
    }

    meta_ = mt;
    meta_.set_meta(IndexMeta::DataType::DT_FP16, mt.dimension());
    meta_.set_converter("HalfFloatConverter", 0, ailego::Params());
    meta_.set_reformer("HalfFloatReformer", 0, ailego::Params());
    return 0;
  }

  //! Cleanup Converter
  int cleanup(void) override {
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer) override {
    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
        holder->dimension() != meta_.dimension()) {
      return IndexError_Mismatch;
    }
    holder_ = std::make_shared<HalfFloatHolder>(std::move(holder));
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  IndexMeta meta_{};
  IndexHolder::Pointer holder_{};
  Stats stats_{};
};

/*! Half Float Sparse Holder
 */
class HalfFloatSparseHolder : public IndexSparseHolder {
 public:
  /*! Half Float Holder Iterator
   */
  class Iterator : public IndexSparseHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const HalfFloatSparseHolder * /*owner*/,
             IndexSparseHolder::Iterator::Pointer &&iter)
        : sparse_buffer_(MAX_DIM_COUNT * sizeof(uint16_t), 0),
          front_iter_(std::move(iter)) {
      this->transform_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Retrieve sparse count
    uint32_t sparse_count() const override {
      return front_iter_->sparse_count();
    }

    //! Retrieve sparse indices
    const uint32_t *sparse_indices() const override {
      return front_iter_->sparse_indices();
    }

    //! Retrieve sparse data
    const void *sparse_data() const override {
      return sparse_buffer_.data();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->transform_record();
    }

   private:
    inline void transform_record(void) {
      if (front_iter_->is_valid()) {
        ailego::FloatHelper::ToFP16(
            reinterpret_cast<const float *>(front_iter_->sparse_data()),
            front_iter_->sparse_count(), sparse_buffer_.data());
      }
    }

    constexpr static uint32_t MAX_DIM_COUNT = 4096;
    std::vector<uint16_t> sparse_buffer_{};

    IndexSparseHolder::Iterator::Pointer front_iter_{};
  };

  //! Constructor
  HalfFloatSparseHolder(IndexSparseHolder::Pointer front)
      : front_(std::move(front)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {
    IndexSparseHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter
               ? IndexSparseHolder::Iterator::Pointer(
                     new HalfFloatSparseHolder::Iterator(this, std::move(iter)))
               : IndexSparseHolder::Iterator::Pointer();
  }

  size_t total_sparse_count(void) const override {
    return front_->total_sparse_count();
  }

 private:
  //! Disable them
  HalfFloatSparseHolder(void) = delete;

  //! Members
  IndexSparseHolder::Pointer front_{};
};

/*! Half Float Sparse Converter
 */
class HalfFloatSparseConverter : public IndexConverter {
 public:
  //! Destructor
  virtual ~HalfFloatSparseConverter(void) {}

  //! Initialize Converter
  int init(const IndexMeta &mt, const ailego::Params &) override {
    if (ailego_unlikely(mt.data_type() != IndexMeta::DataType::DT_FP32 ||
                        mt.unit_size() != sizeof(float))) {
      LOG_ERROR("Unsupported type %d with unit size %u.", mt.data_type(),
                mt.unit_size());
      return IndexError_Unsupported;
    }

    meta_ = mt;
    meta_.set_data_type(IndexMeta::DataType::DT_FP16);
    meta_.set_converter("HalfFloatSparseConverter", 0, ailego::Params());
    meta_.set_reformer("HalfFloatSparseReformer", 0, ailego::Params());
    return 0;
  }

  //! Cleanup Converter
  int cleanup(void) override {
    return 0;
  }

  //! Train the data
  int train(IndexSparseHolder::Pointer) override {
    return 0;
  }

  //! Transform the data
  int transform(IndexSparseHolder::Pointer holder) override {
    if (holder->data_type() != IndexMeta::DataType::DT_FP32) {
      return IndexError_Mismatch;
    }

    holder_ = std::make_shared<HalfFloatSparseHolder>(std::move(holder));
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexSparseHolder::Pointer sparse_result(void) const override {
    return holder_;
  }

  //! Retrieve Index Sparse Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  IndexMeta meta_{};
  IndexSparseHolder::Pointer holder_{};
  Stats stats_{};
};

INDEX_FACTORY_REGISTER_CONVERTER(HalfFloatConverter);
INDEX_FACTORY_REGISTER_CONVERTER(HalfFloatSparseConverter);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/quantizer/half_float_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/core/framework/index_factory.h>
#include "record_quantizer.h"

namespace zvec {
namespace core {

/*! Half Float Reformer
 */
class HalfFloatReformer : public IndexReformer {
 public:
  //! Initialize Reformer
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    switch (qmeta.data_type()) {
      case IndexMeta::DataType::DT_FP16:
        out->assign(reinterpret_cast<const char *>(query),
                    qmeta.element_size());
        *ometa = qmeta;
        break;

      case IndexMeta::DataType::DT_FP32:
        if (qmeta.unit_size() != sizeof(float)) {
          return IndexError_Unsupported;
        }
        out->resize(qmeta.dimension() * sizeof(ailego::Float16));
        ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),
                                    qmeta.dimension(),
                                    reinterpret_cast<uint16_t *>(&(*out)[0]));
        *ometa = qmeta;
        ometa->set_meta(IndexMeta::DataType::DT_FP16, qmeta.dimension());
        break;

      default:
        return IndexError_Unsupported;
    }
    return 0;
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                std::string *out, IndexQueryMeta *ometa) const override {
    switch (qmeta.data_type()) {
      case IndexMeta::DataType::DT_FP16:
        out->assign(reinterpret_cast<const char *>(query),
                    qmeta.element_size() * count);
        *ometa = qmeta;
        break;

      case IndexMeta::DataType::DT_FP32:
        if (qmeta.unit_size() != sizeof(float)) {
          return IndexError_Unsupported;
        }
        out->resize(qmeta.dimension() * count * sizeof(ailego::Float16));
        ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),
                                    qmeta.dimension() * count,
                                    reinterpret_cast<uint16_t *>(&(*out)[0]));
        *ometa = qmeta;
        ometa->set_meta(IndexMeta::DataType::DT_FP16, qmeta.dimension());
        break;

      default:
        return IndexError_Unsupported;
    }
    return 0;
  }

  //! Normalize results
  int normalize(const void *, const IndexQueryMeta &,
                IndexDocumentList &) const override {
    return 0;
  }

  bool need_revert() const override {
    return true;
  }

  int revert(const void *in, const IndexQueryMeta &qmeta,
             std::string *out) const override {
    IndexMeta::DataType type = qmeta.data_type();

    if (type != IndexMeta::DataType::DT_FP16) {
      return IndexError_Unsupported;
    }

    if (type == IndexMeta::DataType::DT_FP16) {
      size_t dimension = qmeta.dimension();

      out->resize(dimension * sizeof(float));
      float *out_buf = reinterpret_cast<float *>(out->data());

      RecordQuantizer::unquantize_record(in, dimension,
                                         IndexMeta::DataType::DT_FP16, out_buf);
    }

    return 0;
  }
};

/*! Half Float Sparse Reformer
 */
class HalfFloatSparseReformer : public IndexReformer {
 public:
  //! Initialize Reformer
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(uint32_t sparse_count, const uint32_t * /*sparse_indices*/,
                const void *sparse_query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    switch (qmeta.data_type()) {
      case IndexMeta::DataType::DT_FP16:
        out->assign(reinterpret_cast<const char *>(sparse_query),
                    qmeta.unit_size() * sparse_count);
        *ometa = qmeta;

        break;

      case IndexMeta::DataType::DT_FP32:
        if (qmeta.unit_size() != sizeof(float)) {
          return IndexError_Unsupported;
        }

        out->resize(sparse_count * sizeof(ailego::Float16));
        ailego::FloatHelper::ToFP16(
            reinterpret_cast<const float *>(sparse_query), sparse_count,
            reinterpret_cast<uint16_t *>(&(*out)[0]));

        *ometa = qmeta;
        ometa->set_data_type(IndexMeta::DataType::DT_FP16);

        break;

      default:
        return IndexError_Unsupported;
    }

    return 0;
  }

  //! Transform queries
  int transform(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                const void *sparse_query, const IndexQueryMeta &qmeta,
                uint32_t count, std::string *out,
                IndexQueryMeta *ometa) const override {
    size_t sparse_count_total = 0;
    for (size_t i = 0; i < count; i++) {
      sparse_count_total += sparse_count[i];
    }

    if (sparse_count_total > std::numeric_limits<uint32_t>::max()) {
      return IndexError_OutOfRange;
    }

    return this->transform((uint32_t)sparse_count_total, sparse_indices,
                           sparse_query, qmeta, out, ometa);
  }

  bool need_revert() const override {
    return true;
  }

  int revert(const uint32_t sparse_count, const uint32_t * /*sparse_indices*/,
             const void *sparse_query, const IndexQueryMeta &qmeta,
             std::string *sparse_query_out) const override {
    IndexMeta::DataType data_type = qmeta.data_type();

    if (data_type != IndexMeta::DataType::DT_FP16) {
      return IndexError_Unsupported;
    }

    if (data_type == IndexMeta::DataType::DT_FP16) {
      sparse_query_out->resize(sparse_count * sizeof(float));

      float *out_buf = reinterpret_cast<float *>(&(*sparse_query_out)[0]);
      RecordQuantizer::unquantize_sparse_record(
          sparse_query, sparse_count, IndexMeta::DataType::DT_FP16, out_buf);
    }

    return 0;
  }
};

INDEX_FACTORY_REGISTER_REFORMER(HalfFloatReformer);
INDEX_FACTORY_REGISTER_REFORMER(HalfFloatSparseReformer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/integer_quantizer_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iterator>
#include <ailego/algorithm/integer_quantizer.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math/normalizer.h>
#include <ailego/pattern/defer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>
#include "record_quantizer.h"
#include "../metric/metric_params.h"

namespace zvec {
namespace core {

/*! Integer Quantizer Converter Holder
 */
template <class Quantizer>
class IntegerQuantizerConverterHolder : public IndexHolder {
 public:
  /*! Integer Quantizer Converter Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Constructor
    Iterator(const IntegerQuantizerConverterHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(owner->element_size(), 0),
          front_iter_(std::move(iter)),
          quantizer_(owner->quantizer_),
          dim_(owner->dimension()) {
      this->encode_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->encode_record();
    }

   private:
    //! Encode the data by quantizer
    inline void encode_record(void) {
      if (front_iter_->is_valid()) {
        const float *vec = reinterpret_cast<const float *>(front_iter_->data());
        quantizer_->encode(
            vec, dim_,
            reinterpret_cast<typename Quantizer::ValueType *>(buffer_.data()));
      }
    }

    //! Members
    std::vector<uint8_t> buffer_{};
    IndexHolder::Iterator::Pointer front_iter_{};
    std::shared_ptr<Quantizer> quantizer_{};
    size_t dim_{0u};
  };

  //! Constructor
  IntegerQuantizerConverterHolder(IndexHolder::Pointer front,
                                  std::shared_ptr<Quantizer> quantizer,
                                  IndexMeta::DataType tp)
      : front_(std::move(front)),
        quantizer_(std::move(quantizer)),
        data_type_(tp) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return front_->dimension();
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return data_type_;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(this->data_type(), front_->dimension());
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter ? IndexHolder::Iterator::Pointer(
                      new IntegerQuantizerConverterHolder::Iterator(
                          this, std::move(iter)))
                : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Members
  IndexHolder::Pointer front_{};
  std::shared_ptr<Quantizer> quantizer_{};
  IndexMeta::DataType data_type_{};
};


/*! Integer Quantizer Converter
 */
template <class Quantizer>
class IntegerQuantizerConverter : public IndexConverter {
 public:
  //! Constructor
  IntegerQuantizerConverter(IndexMeta::DataType dst_type)
      : data_type_(dst_type) {}

  //! Destructor
  virtual ~IntegerQuantizerConverter() {}

//! Get param name
#define P_NAME(NAME)                                                 \
  data_type_ == IndexMeta::DataType::DT_INT8 ? INT8_QUANTIZER_##NAME \
                                             : INT4_QUANTIZER_##NAME

  //! Initialize Converter
  int init(const IndexMeta &mt, const ailego::Params &params) override {
    if (mt.data_type() != IndexMeta::DataType::DT_FP32 ||
        mt.unit_size() != IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      LOG_ERROR("Unsupported type %d with unit size %u", mt.data_type(),
                mt.unit_size());
      return IndexError_Unsupported;
    }
    quantizer_ = std::make_shared<Quantizer>();
    if (!quantizer_) {
      LOG_ERROR("Malloc EntropyIntegerQuantizer failed");
      return IndexError_NoMemory;
    }

    size_t count;
    if (params.get(P_NAME(CONVERTER_HISTOGRAM_BINS_COUNT), &count)) {
      quantizer_->set_histogram_bins(count);
      LOG_DEBUG("Init Converter with bins=%zu", count);
    }
    float scale;
    if (params.get(P_NAME(CONVERTER_SCALE), &scale)) {
      quantizer_->set_scale(scale);
      LOG_DEBUG("Init Converter with scale=%f", scale);
    }
    float bias = 0.0f;
    if (params.get(P_NAME(CONVERTER_BIAS), &bias)) {
      quantizer_->set_bias(bias);
      LOG_DEBUG("Init Converter with bias=%f", bias);
    }

    meta_ = mt;
    meta_.set_meta(data_type_, meta_.dimension());
    meta_.set_converter(data_type_ == IndexMeta::DataType::DT_INT8
                            ? "Int8QuantizerConverter"
                            : "Int4QuantizerConverter",
                        0, params);

    bool disable_bias = false;
    if (meta_.metric_name() == "InnerProduct" ||
        meta_.metric_name() == "MipsSquaredEuclidean") {
      disable_bias = true;
    }
    params.get(P_NAME(CONVERTER_DISABLE_BIAS), &disable_bias);
    quantizer_->set_non_bias(disable_bias);

    return 0;
  }

  //! Cleanup Converter
  int cleanup(void) override {
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer holder) override {
    if (holder->dimension() != meta_.dimension() ||
        holder->data_type() != IndexMeta::DataType::DT_FP32) {
      return IndexError_Mismatch;
    }

    ailego::ElapsedTime timer;
    AILEGO_DEFER([&]() { stats_.set_trained_costtime(timer.milli_seconds()); });

    if (holder->multipass()) {
      {
        //! step1: compute max/min value
        auto iter = holder->create_iterator();
        if (!iter) {
          LOG_ERROR("Failed to create iterator of holder");
          return IndexError_Runtime;
        }
        float max = -std::numeric_limits<float>::max();
        float min = std::numeric_limits<float>::max();
        for (; iter->is_valid(); iter->next()) {
          const float *vec = reinterpret_cast<const float *>(iter->data());
          for (size_t i = 0; i < meta_.dimension(); ++i) {
            max = std::max(max, vec[i]);
            min = std::min(min, vec[i]);
          }
        }
        quantizer_->set_max(max);
        quantizer_->set_min(min);

        //! step2: feed quantizer with training data
        iter = holder->create_iterator();
        if (!iter) {
          LOG_ERROR("Failed to create iterator of holder");
          return IndexError_Runtime;
        }
        for (; iter->is_valid(); iter->next()) {
          (*stats_.mutable_trained_count())++;
          quantizer_->feed(reinterpret_cast<const float *>(iter->data()),
                           meta_.dimension());
        }
      }
    } else {
      //! step1: compute max/min value
      auto iter = holder->create_iterator();
      if (!iter) {
        LOG_ERROR("Failed to create iterator of holder");
        return IndexError_Runtime;
      }
      std::vector<float> features;
      float max = -std::numeric_limits<float>::max();
      float min = std::numeric_limits<float>::max();
      for (; iter->is_valid(); iter->next()) {
        const float *vec = reinterpret_cast<const float *>(iter->data());
        for (size_t i = 0; i < meta_.dimension(); ++i) {
          max = std::max(max, vec[i]);
          min = std::min(min, vec[i]);
          features.emplace_back(vec[i]);
        }
      }
      quantizer_->set_max(max);
      quantizer_->set_min(min);

      //! step2: feed quantizer with training data
      for (size_t i = 0; i < features.size(); i += meta_.dimension()) {
        quantizer_->feed(&features[i], meta_.dimension());
        (*stats_.mutable_trained_count())++;
      }
    }

    //! step3: feed quantizer with training data
    if (!quantizer_->train()) {
      LOG_ERROR("Quantizer train failed");
      return IndexError_Runtime;
    }

    //! Setting of Integer Reformer
    ailego::Params reformer_params;
    float scale = quantizer_->scale();
    float bias = quantizer_->bias();
    float inf = std::numeric_limits<float>::infinity();
    if (scale == inf || bias == inf) {
      reformer_params.set(P_NAME(REFORMER_SCALE), std::to_string(scale));
      reformer_params.set(P_NAME(REFORMER_BIAS), std::to_string(bias));
    } else {
      reformer_params.set(P_NAME(REFORMER_SCALE), scale);
      reformer_params.set(P_NAME(REFORMER_BIAS), bias);
    }
    reformer_params.set(P_NAME(REFORMER_METRIC), meta_.metric_name());
    meta_.set_reformer(data_type_ == IndexMeta::DataType::DT_INT8
                           ? "Int8QuantizerReformer"
                           : "Int4QuantizerReformer",
                       0, reformer_params);

    ailego::Params params = meta_.converter_params();
    if (scale == inf || bias == inf) {
      params.set(P_NAME(CONVERTER_SCALE), std::to_string(scale));
      params.set(P_NAME(CONVERTER_BIAS), std::to_string(bias));
    } else {
      params.set(P_NAME(CONVERTER_SCALE), scale);
      params.set(P_NAME(CONVERTER_BIAS), bias);
    }
    meta_.set_converter(meta_.converter_name(), 0, params);

    LOG_DEBUG(
        "IntegerQuantizerConverter train done, costtime %zums, scale %f, bias "
        "%f",
        (size_t)timer.milli_seconds(), quantizer_->scale(), quantizer_->bias());

    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
        holder->dimension() != meta_.dimension()) {
      return IndexError_Mismatch;
    }

    if (holder->count() > 0) {
      *stats_.mutable_transformed_count() += holder->count();
    }
    holder_ = std::make_shared<IntegerQuantizerConverterHolder<Quantizer>>(
        holder, quantizer_, data_type_);
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  //! Members
  IndexMeta meta_{};
  IndexHolder::Pointer holder_{};
  std::shared_ptr<Quantizer> quantizer_{};
  Stats stats_{};
  IndexMeta::DataType data_type_{};
};


/*! Converter of Integer Streaming Quantizer
 */
class IntegerStreamingConverter : public IndexConverter {
 public:
  //! Constructor
  IntegerStreamingConverter(IndexMeta::DataType dst_type)
      : data_type_(dst_type) {}

  //! Destructor
  ~IntegerStreamingConverter() override {}

  //! Initialize Converter
  int init(const IndexMeta &index_meta, const ailego::Params &params) override {
    meta_ = index_meta;
    params.get(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE,
               &enable_normalize_);
    ailego::Params reformer_params;
    if (enable_normalize_) {
      reformer_params.set(INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE, true);
    }

    is_euclidean_ = index_meta.metric_name() == "MipsSquaredEuclidean" ||
                    index_meta.metric_name() == "SquaredEuclidean" ||
                    index_meta.metric_name() == "Euclidean";
    if (is_euclidean_) {
      reformer_params.set(INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN, true);
    }


    if (data_type_ == IndexMeta::DataType::DT_INT8) {
      meta_.set_converter("Int8StreamingConverter", 0, params);
      meta_.set_reformer("Int8StreamingReformer", 0, reformer_params);
    } else {
      if (index_meta.dimension() % 2) {
        LOG_ERROR("Unsupported dimension %u for INT4 type",
                  index_meta.dimension());
        return IndexError_Unsupported;
      }
      meta_.set_converter("Int4StreamingConverter", 0, params);
      meta_.set_reformer("Int4StreamingReformer", 0, reformer_params);
    }
    ailego::Params metric_params;
    metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME,
                      index_meta.metric_name());
    metric_params.set(QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS,
                      index_meta.metric_params());
    meta_.set_metric("QuantizedInteger", 0, metric_params);
    meta_.set_meta(data_type_, meta_.dimension() + ExtraDimension(data_type_));
    return 0;
  }

  //! Cleanup Converter
  virtual int cleanup(void) override {
    *stats_.mutable_transformed_count() = 0;
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer /*holder*/) override {
    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->data_type() != IndexMeta::DataType::DT_FP32 ||
        holder->dimension() != meta_.dimension() - ExtraDimension(data_type_)) {
      return IndexError_Mismatch;
    }

    *stats_.mutable_transformed_count() += holder->count();
    holder_ = std::make_shared<IntegerStreamingConverterHolder>(
        holder, data_type_, enable_normalize_, is_euclidean_);
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer & /*dumper*/) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  //! IndexHolder for IntegerStreamingConverter
  class IntegerStreamingConverterHolder : public IndexHolder {
   public:
    class Iterator : public IndexHolder::Iterator {
     public:
      //! Constructor
      Iterator(const IntegerStreamingConverterHolder *owner,
               IndexHolder::Iterator::Pointer &&iter)
          : owner_(owner),
            buffer_(owner->element_size(), 0),
            normalize_buffer_(owner->front_->element_size(), 0),
            front_iter_(std::move(iter)) {
        this->encode_record();
      }

      //! Destructor
      virtual ~Iterator(void) {}

      //! Retrieve pointer of data
      const void *data(void) const override {
        return buffer_.data();
      }

      //! Test if the iterator is valid
      bool is_valid(void) const override {
        return front_iter_->is_valid();
      }

      //! Retrieve primary key
      uint64_t key(void) const override {
        return front_iter_->key();
      }

      //! Next iterator
      void next(void) override {
        front_iter_->next();
        this->encode_record();
      }

     private:
      //! Encode the data by quantizer
      void encode_record(void) {
        if (front_iter_->is_valid()) {
          const float *vec =
              reinterpret_cast<const float *>(front_iter_->data());
          if (owner_->enable_normalize_) {
            float norm = 0.0;
            memcpy((void *)normalize_buffer_.data(), vec,
                   owner_->front_->element_size());
            ailego::Normalizer<float>::L2((float *)normalize_buffer_.data(),
                                          owner_->dimension_, &norm);
            vec = (float *)normalize_buffer_.data();
          }

          RecordQuantizer::quantize_record(
              vec, owner_->dimension_, owner_->data_type(),
              owner_->is_euclidean_, buffer_.data());
        }
      }

      //! Members
      const IntegerStreamingConverterHolder *owner_{nullptr};
      std::vector<uint8_t> buffer_{};
      std::string normalize_buffer_{};
      IndexHolder::Iterator::Pointer front_iter_{};
    };

    //! Constructor
    IntegerStreamingConverterHolder(IndexHolder::Pointer front,
                                    IndexMeta::DataType tp,
                                    bool enable_normalize, bool is_euclidean)
        : front_(std::move(front)),
          data_type_(tp),
          dimension_(front_->dimension()),
          enable_normalize_(enable_normalize),
          is_euclidean_(is_euclidean) {}

    //! Retrieve count of elements in holder (-1 indicates unknown)
    size_t count(void) const override {
      return front_->count();
    }

    //! Retrieve dimension
    size_t dimension(void) const override {
      return dimension_ + ExtraDimension(data_type_);
    }

    //! Retrieve type information
    IndexMeta::DataType data_type(void) const override {
      return data_type_;
    }

    //! Retrieve element size in bytes
    size_t element_size(void) const override {
      return IndexMeta::ElementSizeof(this->data_type(), this->dimension());
    }

    //! Retrieve if it can multi-pass
    bool multipass(void) const override {
      return front_->multipass();
    }

    //! Create a new iterator
    IndexHolder::Iterator::Pointer create_iterator(void) override {
      IndexHolder::Iterator::Pointer iter = front_->create_iterator();
      return iter ? IndexHolder::Iterator::Pointer(
                        new IntegerStreamingConverterHolder::Iterator(
                            this, std::move(iter)))
                  : IndexHolder::Iterator::Pointer();
    }

   private:
    //! Members
    IndexHolder::Pointer front_{};
    IndexMeta::DataType data_type_{};
    uint32_t dimension_{0};
    bool enable_normalize_{false};
    bool is_euclidean_{false};
  };

  static size_t ExtraDimension(IndexMeta::DataType type) {
    // The extra quantized params storage size to save for each vector
    constexpr size_t kExtraSize = 4 * sizeof(float);
    constexpr size_t kAdditionalInt32 = sizeof(int32_t);
    return type == IndexMeta::DataType::DT_INT8
               ? (kExtraSize + kAdditionalInt32)
               : (kExtraSize * 2);
  }

  //! Members
  IndexMeta meta_{};
  Stats stats_{};
  IndexHolder::Pointer holder_{};
  IndexMeta::DataType data_type_{};
  bool enable_normalize_{false};
  bool is_euclidean_{false};
};

INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(
    Int8QuantizerConverter,
    IntegerQuantizerConverter<ailego::EntropyInt8Quantizer>,
    IndexMeta::DataType::DT_INT8);
INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(
    Int4QuantizerConverter,
    IntegerQuantizerConverter<ailego::EntropyInt4Quantizer>,
    IndexMeta::DataType::DT_INT4);
INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(Int8StreamingConverter,
                                       IntegerStreamingConverter,
                                       IndexMeta::DataType::DT_INT8);
INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(Int4StreamingConverter,
                                       IntegerStreamingConverter,
                                       IndexMeta::DataType::DT_INT4);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/integer_quantizer_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/algorithm/integer_quantizer.h>
#include <ailego/math/norm2_matrix.h>
#include <ailego/math/normalizer.h>
#include <ailego/pattern/defer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>
#include "record_quantizer.h"

namespace zvec {
namespace core {

/*! Integer Quantizer Reformer
 */
template <class Quantizer>
class IntegerQuantizerReformer : public IndexReformer {
 public:
  using IndexReformer::transform;

  //! Constructor
  IntegerQuantizerReformer(IndexMeta::DataType dst_type)
      : data_type_(dst_type) {}

//! Get param name
#define P_NAME(NAME)                                                 \
  data_type_ == IndexMeta::DataType::DT_INT8 ? INT8_QUANTIZER_##NAME \
                                             : INT4_QUANTIZER_##NAME

  //! Initialize Reformer
  int init(const ailego::Params &params) override {
    float bias;
    float scale;
    if (!params.get(P_NAME(REFORMER_BIAS), &bias) ||
        !params.get(P_NAME(REFORMER_SCALE), &scale)) {
      LOG_ERROR("Init IntegerReformer failed, required params bias and scale");
      return IndexError_InvalidArgument;
    }

    quantizer_.set_bias(bias);
    quantizer_.set_scale(scale);

    auto metric = params.get_as_string(P_NAME(REFORMER_METRIC));
    auto reciprocal = scale == 0.0 ? 1.0f : (1.0f / scale);
    if (metric == "SquaredEuclidean") {
      scale_reciprocal_ = reciprocal * reciprocal;
    } else if (metric == "Euclidean") {
      scale_reciprocal_ = reciprocal;
    } else if (metric == "Manhattan") {
      scale_reciprocal_ = reciprocal;
    } else if (metric == "InnerProduct" || metric == "MipsSquaredEuclidean") {
      inner_product_ = true;
      scale_reciprocal_ = reciprocal;  // missing query part
    } else {
      LOG_WARN("Unsupported normalize the score for %s", metric.c_str());
      scale_reciprocal_ = 1.0f;
    }
    LOG_DEBUG("Init integer reformer, bias %f, scale %f", bias, scale);
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    inner_product_ = false;
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = qmeta;
    ometa->set_meta(data_type_, qmeta.dimension());
    out->resize(
        IndexMeta::ElementSizeof(ometa->data_type(), ometa->dimension()));
    const float *vec = reinterpret_cast<const float *>(query);
    auto ovec = reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]);

    if (!inner_product_) {
      quantizer_.encode(vec, qmeta.dimension(), ovec);
    } else {
      this->transform(vec, qmeta.dimension(), ovec);
    }
    return 0;
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();
    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = qmeta;
    ometa->set_meta(data_type_, qmeta.dimension());
    out->resize(count * IndexMeta::ElementSizeof(ometa->data_type(),
                                                 ometa->dimension()));
    const float *vec = reinterpret_cast<const float *>(query);

    if (!inner_product_) {
      quantizer_.encode(
          vec, qmeta.dimension() * count,
          reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]));
    } else if (ometa->data_type() == IndexMeta::DataType::DT_INT8) {
      int8_t *ovec = reinterpret_cast<int8_t *>(&(*out)[0]);
      for (size_t i = 0; i < count; ++i) {
        this->transform(&vec[i * qmeta.dimension()], qmeta.dimension(),
                        &ovec[i * qmeta.dimension()]);
      }
    } else {
      uint8_t *ovec = reinterpret_cast<uint8_t *>(&(*out)[0]);
      for (size_t i = 0; i < count; ++i) {
        this->transform(&vec[i * qmeta.dimension()], qmeta.dimension(),
                        &ovec[i * qmeta.dimension() / 2]);
      }
    }

    return 0;
  }

  //! Convert a record
  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,
              IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = rmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        rmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = rmeta;
    ometa->set_meta(data_type_, rmeta.dimension());
    out->resize(ometa->element_size());
    const float *vec = reinterpret_cast<const float *>(record);
    auto ovec = reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]);

    quantizer_.encode(vec, rmeta.dimension(), ovec);

    return 0;
  }

  //! Convert records
  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,
              std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = rmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        rmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = rmeta;
    ometa->set_meta(data_type_, rmeta.dimension());
    out->resize(count * ometa->element_size());
    const float *vec = reinterpret_cast<const float *>(records);
    quantizer_.encode(
        vec, rmeta.dimension() * count,
        reinterpret_cast<typename Quantizer::ValueType *>(&(*out)[0]));

    return 0;
  }

  //! Normalize results
  int normalize(const void *query, const IndexQueryMeta &qmeta,
                IndexDocumentList &result) const override {
    IndexMeta::DataType ft = qmeta.data_type();
    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    auto scale = scale_reciprocal_;
    if (inner_product_) {
      float abs_max = 0.0f;
      const float *vec = static_cast<const float *>(query);
      if (data_type_ == IndexMeta::DataType::DT_INT8) {
        for (size_t i = 0; i < qmeta.dimension(); ++i) {
          float abs = std::abs(vec[i]);
          abs_max = std::max(abs, abs_max);
        }
        scale *= abs_max / 127;
      } else {
        float max = -std::numeric_limits<float>::max();
        for (size_t i = 0; i < qmeta.dimension(); ++i) {
          float abs = std::abs(vec[i]);
          abs_max = std::max(abs_max, abs);
          max = std::max(max, vec[i]);
        }
        scale *= abs_max / ((7 * abs_max > 8 * max) ? 8 : 7);
      }
    }
    for (auto &it : result) {
      *it.mutable_score() *= scale;
    }

    return 0;
  }

 private:
  //! Quantize the query to int8 in InnerProduct
  void transform(const float *in, size_t dim, int8_t *out) const {
    float abs_max = 0.0f;
    for (size_t i = 0; i < dim; ++i) {
      float abs = std::abs(in[i]);
      abs_max = std::max(abs, abs_max);
    }
    float scale = 127 / abs_max;
    for (size_t i = 0; i < dim; ++i) {
      out[i] = static_cast<int8_t>(std::round(in[i] * scale));
    }
  }

  //! Quantize the query to int4 in InnerProduct
  void transform(const float *in, size_t dim, uint8_t *out) const {
    float abs_max = 0.0f;
    float max = -std::numeric_limits<float>::max();
    for (size_t i = 0; i < dim; ++i) {
      float abs = std::abs(in[i]);
      abs_max = std::max(abs_max, abs);
      max = std::max(max, in[i]);
    }
    float scale = ((7 * abs_max > 8 * max) ? 8 : 7) / abs_max;
    for (size_t i = 0; i < dim; i += 2) {
      auto lo = std::round(in[i] * scale);
      auto hi = std::round(in[i + 1] * scale);
      out[i / 2] = (static_cast_from_float_to_uint8(hi) << 4) |
                   (static_cast_from_float_to_uint8(lo) & 0xF);
    }
  }

 private:
  //! Members
  Quantizer quantizer_;
  float scale_reciprocal_{1.0};
  bool inner_product_{false};
  IndexMeta::DataType data_type_{};
};


/*! Reformer of Integer Streaming Quantizer
 */
class IntegerStreamingReformer : public IndexReformer {
 public:
  //! Constructor
  IntegerStreamingReformer(IndexMeta::DataType dst_type)
      : data_type_(dst_type),
        extra_dimension_(data_type_ == IndexMeta::DataType::DT_INT8 ? 20 : 32) {
  }

  //! Initialize Reformer
  int init(const ailego::Params &params) override {
    params.get(INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE, &enable_normalize_);
    params.get(INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN, &is_euclidean_);
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = qmeta;
    ometa->set_meta(data_type_, qmeta.dimension() + extra_dimension_);
    out->resize(ometa->element_size());
    const float *vec = reinterpret_cast<const float *>(query);
    std::unique_ptr<float[]> normalized;
    if (enable_normalize_) {
      normalized.reset(new float[qmeta.dimension()]);
      vec = normalize(query, qmeta, normalized.get());
    }

    RecordQuantizer::quantize_record(vec, qmeta.dimension(), data_type_,
                                     is_euclidean_, &(*out)[0]);

    return 0;
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = qmeta.data_type();
    if (ft != IndexMeta::DataType::DT_FP32 ||
        qmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = qmeta;
    ometa->set_meta(data_type_, qmeta.dimension() + extra_dimension_);
    out->resize(count * ometa->element_size());
    std::unique_ptr<float[]> normalized;
    if (enable_normalize_) {
      normalized.reset(new float[qmeta.dimension()]);
    }
    for (size_t i = 0; i < count; ++i) {
      const float *vec =
          reinterpret_cast<const float *>(query) + i * qmeta.dimension();
      if (enable_normalize_) {
        vec = normalize(vec, qmeta, normalized.get());
      }

      RecordQuantizer::quantize_record(vec, qmeta.dimension(), data_type_,
                                       is_euclidean_,
                                       &(*out)[i * ometa->element_size()]);
    }

    return 0;
  }

  //! Convert a record
  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,
              IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = rmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        rmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = rmeta;
    ometa->set_meta(data_type_, rmeta.dimension() + extra_dimension_);
    out->resize(ometa->element_size());
    const float *vec = reinterpret_cast<const float *>(record);
    std::unique_ptr<float[]> normalized;
    if (enable_normalize_) {
      normalized.reset(new float[rmeta.dimension()]);
      vec = normalize(record, rmeta, normalized.get());
    }

    RecordQuantizer::quantize_record(vec, rmeta.dimension(), data_type_,
                                     is_euclidean_, &(*out)[0]);

    return 0;
  }

  //! Convert records
  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,
              std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType ft = rmeta.data_type();

    if (ft != IndexMeta::DataType::DT_FP32 ||
        rmeta.unit_size() !=
            IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32)) {
      return IndexError_Unsupported;
    }

    *ometa = rmeta;
    ometa->set_meta(data_type_, rmeta.dimension() + extra_dimension_);
    out->resize(count * ometa->element_size());
    std::unique_ptr<float[]> normalized;
    if (enable_normalize_) {
      normalized.reset(new float[rmeta.dimension()]);
    }
    for (size_t i = 0; i < count; ++i) {
      const float *vec =
          reinterpret_cast<const float *>(records) + i * rmeta.dimension();
      if (enable_normalize_) {
        vec = normalize(vec, rmeta, normalized.get());
      }

      RecordQuantizer::quantize_record(vec, rmeta.dimension(), data_type_,
                                       is_euclidean_,
                                       &(*out)[i * ometa->element_size()]);
    }

    return 0;
  }

  //! Normalize results
  int normalize(const void * /*query*/, const IndexQueryMeta & /*qmeta*/,
                IndexDocumentList & /*result*/) const override {
    return 0;
  }

 private:
  //! Normalize a query to `normalized`
  float *normalize(const void *query, const IndexQueryMeta &qmeta,
                   float *normalized) const {
    memcpy(normalized, query, qmeta.element_size());
    float norm = 0.0;
    ailego::Normalizer<float>::L2(normalized, qmeta.dimension(), &norm);
    return normalized;
  }

  bool need_revert() const override {
    return true;
  }

  int revert(const void *in, const IndexQueryMeta &qmeta,
             std::string *out) const override {
    if (enable_normalize_) {
      LOG_ERROR("Unsupported revert for normalized value");

      return IndexError_Unsupported;
    }

    out->resize((qmeta.dimension() - extra_dimension_) * sizeof(float));
    float *out_buf = reinterpret_cast<float *>(out->data());

    RecordQuantizer::unquantize_record(in, qmeta.dimension() - extra_dimension_,
                                       data_type_, out_buf);

    return 0;
  }

  //! Members
  IndexMeta::DataType data_type_{};
  uint32_t extra_dimension_{0};
  bool enable_normalize_{false};
  bool is_euclidean_{false};
};

INDEX_FACTORY_REGISTER_REFORMER_ALIAS(
    Int8QuantizerReformer,
    IntegerQuantizerReformer<ailego::EntropyInt8Quantizer>,
    IndexMeta::DataType::DT_INT8);
INDEX_FACTORY_REGISTER_REFORMER_ALIAS(
    Int4QuantizerReformer,
    IntegerQuantizerReformer<ailego::EntropyInt4Quantizer>,
    IndexMeta::DataType::DT_INT4);
INDEX_FACTORY_REGISTER_REFORMER_ALIAS(Int8StreamingReformer,
                                      IntegerStreamingReformer,
                                      IndexMeta::DataType::DT_INT8);
INDEX_FACTORY_REGISTER_REFORMER_ALIAS(Int4StreamingReformer,
                                      IntegerStreamingReformer,
                                      IndexMeta::DataType::DT_INT4);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/mips_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/norm2_matrix.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/ailego/utility/type_helper.h>
#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

/*! Convert the vector By Mips RepeatedQuadraticInjection
 */
template <typename T1, typename T2,
          typename =
              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&
                                      ailego::IsFloatingPoint<T2>::value>::type>
static inline void ConvertRepeatedQuadraticInjection(const T1 *src, size_t dim,
                                                     size_t m_value,
                                                     float u_value,
                                                     float l2_norm, T2 *dst) {
  float squared_norm = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = src[i] * u_value / l2_norm;
    dst[i] = val;
    squared_norm += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    dst[i] = 0.5f - squared_norm;
    squared_norm *= squared_norm;
  }
}

/*! Convert the vector By Mips SphericalInjection
 */
template <typename T1, typename T2,
          typename =
              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&
                                      ailego::IsFloatingPoint<T2>::value>::type>
static inline void ConvertSphericalInjection(const T1 *src, size_t dim,
                                             float u_value, float l2_norm,
                                             T2 *dst) {
  float squared_norm = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = src[i] * u_value / l2_norm;
    dst[i] = val;
    squared_norm += val * val;
  }
  dst[dim] = squared_norm < 1.0
                 ? (1.0 - std::sqrt(1.0 - static_cast<double>(squared_norm)))
                 : 1.0f;
}

/*! MIPS Holder (Float)
 */
class MipsConverterHolder : public IndexHolder {
 public:
  /*! MIPS Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const MipsConverterHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(owner->dimension()),
          m_value_(owner->m_value_),
          u_value_(owner->u_value_),
          l2_norm_(owner->l2_norm_),
          spherical_injection_(owner->spherical_injection_),
          front_iter_(std::move(iter)) {
      this->transform_data();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->transform_data();
    }

   private:
    //! Transform the data
    void transform_data(void) {
      if (!front_iter_->is_valid()) {
        return;
      }

      const float *src = reinterpret_cast<const float *>(front_iter_->data());
      float *dst = buffer_.data();
      if (!spherical_injection_) {
        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,
                                          m_value_, u_value_, l2_norm_, dst);
      } else {
        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,
                                  l2_norm_, dst);
      }
    }

    std::vector<float> buffer_{};
    uint32_t m_value_{0u};
    float u_value_{0.0f};
    float l2_norm_{0.0f};
    bool spherical_injection_{false};
    IndexHolder::Iterator::Pointer front_iter_{};
  };

  //! Constructor
  MipsConverterHolder(IndexHolder::Pointer front, uint32_t m_val, float u_val,
                      float l2_norm, bool spherical_injection)
      : m_value_(m_val),
        u_value_(u_val),
        l2_norm_(l2_norm),
        spherical_injection_(spherical_injection),
        front_(std::move(front)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return front_->dimension() + m_value_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP32,
                                    front_->dimension() + m_value_);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter ? IndexHolder::Iterator::Pointer(
                      new MipsConverterHolder::Iterator(this, std::move(iter)))
                : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Disable them
  MipsConverterHolder(void) = delete;

  //! Members
  uint32_t m_value_{0u};
  float u_value_{0.0f};
  float l2_norm_{0.0f};
  bool spherical_injection_{false};
  IndexHolder::Pointer front_{};
};

/*! MIPS Holder (Forced Half Float)
 */
class MipsConverterForcedHalfHolder : public IndexHolder {
 public:
  /*! MIPS Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const MipsConverterForcedHalfHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(owner->dimension()),
          m_value_(owner->m_value_),
          u_value_(owner->u_value_),
          l2_norm_(owner->l2_norm_),
          spherical_injection_(owner->spherical_injection_),
          front_iter_(std::move(iter)) {
      this->transform_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->transform_record();
    }

   private:
    void transform_record(void) {
      if (!front_iter_->is_valid()) {
        return;
      }

      const float *src = reinterpret_cast<const float *>(front_iter_->data());
      ailego::Float16 *dst = buffer_.data();
      if (!spherical_injection_) {
        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,
                                          m_value_, u_value_, l2_norm_, dst);
      } else {
        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,
                                  l2_norm_, dst);
      }
    }

    std::vector<ailego::Float16> buffer_{};
    uint32_t m_value_{0u};
    float u_value_{0.0f};
    float l2_norm_{0.0f};
    bool spherical_injection_{false};
    IndexHolder::Iterator::Pointer front_iter_{};
  };

  //! Constructor
  MipsConverterForcedHalfHolder(IndexHolder::Pointer front, uint32_t m_val,
                                float u_val, float l2_norm,
                                bool spherical_injection)
      : m_value_(m_val),
        u_value_(u_val),
        l2_norm_(l2_norm),
        spherical_injection_(spherical_injection),
        front_(std::move(front)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return front_->dimension() + m_value_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,
                                    front_->dimension() + m_value_);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter ? IndexHolder::Iterator::Pointer(
                      new MipsConverterForcedHalfHolder::Iterator(
                          this, std::move(iter)))
                : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Disable them
  MipsConverterForcedHalfHolder(void) = delete;

  //! Members
  uint32_t m_value_{0u};
  float u_value_{0.0f};
  float l2_norm_{0.0f};
  bool spherical_injection_{false};
  IndexHolder::Pointer front_{};
};

/*! MIPS Holder (Half Float)
 */
class MipsConverterHalfHolder : public IndexHolder {
 public:
  /*! MIPS Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(const MipsConverterHalfHolder *owner,
             IndexHolder::Iterator::Pointer &&iter)
        : buffer_(owner->dimension()),
          m_value_(owner->m_value_),
          u_value_(owner->u_value_),
          l2_norm_(owner->l2_norm_),
          spherical_injection_(owner->spherical_injection_),
          front_iter_(std::move(iter)) {
      this->transform_record();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return buffer_.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return front_iter_->is_valid();
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return front_iter_->key();
    }

    //! Next iterator
    void next(void) override {
      front_iter_->next();
      this->transform_record();
    }

   private:
    void transform_record(void) {
      if (!front_iter_->is_valid()) {
        return;
      }

      const ailego::Float16 *src =
          reinterpret_cast<const ailego::Float16 *>(front_iter_->data());
      ailego::Float16 *dst = buffer_.data();
      if (!spherical_injection_) {
        ConvertRepeatedQuadraticInjection(src, buffer_.size() - m_value_,
                                          m_value_, u_value_, l2_norm_, dst);
      } else {
        ConvertSphericalInjection(src, buffer_.size() - m_value_, u_value_,
                                  l2_norm_, dst);
      }
    }

    std::vector<ailego::Float16> buffer_{};
    uint32_t m_value_{0u};
    float u_value_{0.0f};
    float l2_norm_{0.0f};
    bool spherical_injection_{false};
    IndexHolder::Iterator::Pointer front_iter_{};
  };

  //! Constructor
  MipsConverterHalfHolder(IndexHolder::Pointer front, uint32_t m_val,
                          float u_val, float l2_norm, bool spherical_injection)
      : m_value_(m_val),
        u_value_(u_val),
        l2_norm_(l2_norm),
        spherical_injection_(spherical_injection),
        front_(std::move(front)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return front_->count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return front_->dimension() + m_value_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return IndexMeta::ElementSizeof(IndexMeta::DataType::DT_FP16,
                                    front_->dimension() + m_value_);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return front_->multipass();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    IndexHolder::Iterator::Pointer iter = front_->create_iterator();
    return iter ? IndexHolder::Iterator::Pointer(
                      new MipsConverterHalfHolder::Iterator(this,
                                                            std::move(iter)))
                : IndexHolder::Iterator::Pointer();
  }

 private:
  //! Disable them
  MipsConverterHalfHolder(void) = delete;

  //! Members
  uint32_t m_value_{0u};
  float u_value_{0.0f};
  float l2_norm_{0.0f};
  bool spherical_injection_{false};
  IndexHolder::Pointer front_{};
};

/*! MIPS Converter
 */
class MipsConverter : public IndexConverter {
 public:
  //! Destructor
  virtual ~MipsConverter(void) {}

  //! Initialize Converter
  int init(const IndexMeta &mt, const ailego::Params &params) override {
    IndexMeta::DataType dt = mt.data_type();
    if (ailego_unlikely((dt != IndexMeta::DataType::DT_FP32 &&
                         dt != IndexMeta::DataType::DT_FP16) ||
                        mt.unit_size() != IndexMeta::UnitSizeof(dt))) {
      LOG_ERROR("Unsupported type %d with unit size %u.", dt, mt.unit_size());
      return IndexError_Unsupported;
    }

    params.get(MIPS_CONVERTER_FORCED_HALF_FLOAT, &forced_half_float_);
    params.get(MIPS_CONVERTER_SPHERICAL_INJECTION, &spherical_injection_);
    params.get(MIPS_CONVERTER_M_VALUE, &m_value_);
    params.get(MIPS_CONVERTER_U_VALUE, &u_value_);
    params.get(MIPS_CONVERTER_L2_NORM, &l2_norm_);

    if (!spherical_injection_) {
      if (!m_value_) {
        static const uint32_t m_values[4] = {4, 3, 6, 5};
        m_value_ = m_values[mt.dimension() % 4];
      }
      if (u_value_ <= std::numeric_limits<float>::epsilon() ||
          u_value_ >= 1.0) {
        // Try computing a default U value
        constexpr float kLogError = -5.0;  // log_10(distance_error)
        u_value_ = std::pow(10, kLogError / (1 << (m_value_ + 1)));
      }
      if (std::pow(u_value_, (1 << m_value_)) <
          std::numeric_limits<float>::epsilon()) {
        LOG_WARN("U value %f too small, may cause loss of distance precision.",
                 u_value_);
      }
    } else {
      if (m_value_ != 0u || u_value_ != 0.0f) {
        LOG_WARN(
            "Ignore invalid M value or U value if spherical_injection enabled");
      }
      // SphericalInjection requires ||x{i}|| <= 1 for the computation
      // std::sqrt(1 - ||x{i}||^2), so let the u_value be a little less
      // than 1.0 for its precision loss in float computation
      u_value_ = 1.0f - 1e-2;
      m_value_ = 1;
    }

    // Setting of MIPS Converter
    meta_ = mt;
    if (forced_half_float_) {
      meta_.set_meta(IndexMeta::DataType::DT_FP16, mt.dimension() + m_value_);
    } else {
      meta_.set_meta(dt, mt.dimension() + m_value_);
    }
    meta_.set_converter("MipsConverter", 0, params);
    return 0;
  }

  //! Cleanup Converter
  int cleanup(void) override {
    return 0;
  }

  //! Train the data
  int train(IndexHolder::Pointer holder) override {
    if (holder->dimension() + m_value_ != meta_.dimension()) {
      return IndexError_Mismatch;
    }

    ailego::ElapsedTime timer;
    auto iter = holder->create_iterator();
    if (!iter) {
      LOG_ERROR("Failed to create iterator of holder");
      return IndexError_Runtime;
    }

    size_t dim = holder->dimension();
    switch (holder->data_type()) {
      case IndexMeta::DataType::DT_FP16:
        for (; iter->is_valid(); iter->next()) {
          float score;
          ailego::Norm2Matrix<ailego::Float16, 1>::Compute(
              reinterpret_cast<const ailego::Float16 *>(iter->data()), dim,
              &score);

          if (score > l2_norm_) {
            l2_norm_ = score;
            if (l2_norm_ < 1.0 && l2_norm_ > u_value_) {
              u_value_ = l2_norm_;
            }
          }
          (*stats_.mutable_trained_count())++;
        }
        break;

      case IndexMeta::DataType::DT_FP32:
        for (; iter->is_valid(); iter->next()) {
          float score;
          ailego::Norm2Matrix<float, 1>::Compute(
              reinterpret_cast<const float *>(iter->data()), dim, &score);

          if (score > l2_norm_) {
            l2_norm_ = score;
            if (l2_norm_ < 1.0 && l2_norm_ > u_value_) {
              u_value_ = l2_norm_;
            }
          }
          (*stats_.mutable_trained_count())++;
        }
        break;

      default:
        return IndexError_Mismatch;
    }

    // Setting of MIPS Reformer
    ailego::Params reformer_params;
    reformer_params.set(MIPS_REFORMER_M_VALUE, m_value_);
    reformer_params.set(MIPS_REFORMER_U_VALUE, u_value_);
    reformer_params.set(MIPS_REFORMER_L2_NORM, l2_norm_);
    reformer_params.set(MIPS_REFORMER_FORCED_HALF_FLOAT, forced_half_float_);
    reformer_params.set(MIPS_REFORMER_NORMALIZE, true);
    reformer_params.set(MIPS_REFORMER_SPHERICAL_INJECTION,
                        spherical_injection_);
    meta_.set_reformer("MipsReformer", 0, reformer_params);
    if (meta_.metric_name() == "InnerProduct") {
      LOG_INFO("Convert IndexMeasure from InnerProduct to SquaredEuclidean");
      meta_.set_metric("SquaredEuclidean", 0, ailego::Params());
    }

    // Setting of MIPS Converter Params
    ailego::Params params = meta_.converter_params();
    params.set(MIPS_CONVERTER_FORCED_HALF_FLOAT, forced_half_float_);
    params.set(MIPS_CONVERTER_M_VALUE, m_value_);
    params.set(MIPS_CONVERTER_U_VALUE, u_value_);
    params.set(MIPS_CONVERTER_L2_NORM, l2_norm_);
    params.set(MIPS_CONVERTER_SPHERICAL_INJECTION, spherical_injection_);
    meta_.set_converter("MipsConverter", 0, params);

    stats_.set_trained_costtime(timer.milli_seconds());
    return 0;
  }

  //! Transform the data
  int transform(IndexHolder::Pointer holder) override {
    if (holder->dimension() + m_value_ != meta_.dimension()) {
      return IndexError_Mismatch;
    }

    switch (holder->data_type()) {
      case IndexMeta::DataType::DT_FP16:
        holder_ = std::make_shared<MipsConverterHalfHolder>(
            holder, m_value_, u_value_, l2_norm_, spherical_injection_);
        break;

      case IndexMeta::DataType::DT_FP32:
        if (forced_half_float_) {
          holder_ = std::make_shared<MipsConverterForcedHalfHolder>(
              holder, m_value_, u_value_, l2_norm_, spherical_injection_);
        } else {
          holder_ = std::make_shared<MipsConverterHolder>(
              holder, m_value_, u_value_, l2_norm_, spherical_injection_);
        }
        break;

      default:
        return IndexError_Mismatch;
    }
    return 0;
  }

  //! Dump index into storage
  int dump(const IndexDumper::Pointer &) override {
    return 0;
  }

  //! Retrieve statistics
  const Stats &stats(void) const override {
    return stats_;
  }

  //! Retrieve a holder as result
  IndexHolder::Pointer result(void) const override {
    return holder_;
  }

  //! Retrieve Index Meta
  const IndexMeta &meta(void) const override {
    return meta_;
  }

 private:
  uint32_t m_value_{0u};
  float u_value_{0.0f};
  float l2_norm_{0.0f};
  bool forced_half_float_{false};
  bool spherical_injection_{false};
  IndexMeta meta_{};
  IndexHolder::Pointer holder_{};
  Stats stats_{};
};

INDEX_FACTORY_REGISTER_CONVERTER(MipsConverter);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/mips_reformer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <ailego/math/normalizer.h>
#include <core/quantizer/quantizer_params.h>
#include <zvec/core/framework/index_factory.h>

namespace zvec {
namespace core {

/*! Convert the vector By Mips RepeatedQuadraticInjection
 */
template <typename T1, typename T2,
          typename =
              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&
                                      ailego::IsFloatingPoint<T2>::value>::type>
static inline void ConvertRepeatedQuadraticInjection(const T1 *src, size_t dim,
                                                     size_t m_value,
                                                     float u_value,
                                                     float l2_norm, T2 *dst) {
  float squared_norm = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = src[i] * u_value / l2_norm;
    dst[i] = val;
    squared_norm += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    dst[i] = 0.5f - squared_norm;
    squared_norm *= squared_norm;
  }
}

/*! Convert the vector By Mips SphericalInjection
 */
template <typename T1, typename T2,
          typename =
              typename std::enable_if<ailego::IsFloatingPoint<T1>::value &&
                                      ailego::IsFloatingPoint<T2>::value>::type>
static inline void ConvertSphericalInjection(const T1 *src, size_t dim,
                                             float u_value, float l2_norm,
                                             T2 *dst) {
  float squared_norm = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = src[i] * u_value / l2_norm;
    dst[i] = val;
    squared_norm += val * val;
  }
  dst[dim] = squared_norm < 1.0
                 ? (1.0 - std::sqrt(1.0 - static_cast<double>(squared_norm)))
                 : 1.0f;
}

/*! MIPS Reformer
 */
class MipsReformer : public IndexReformer {
 public:
  //! Initialize Reformer
  int init(const ailego::Params &params) override {
    params.get(MIPS_REFORMER_M_VALUE, &m_value_);
    params.get(MIPS_REFORMER_U_VALUE, &u_value_);
    params.get(MIPS_REFORMER_L2_NORM, &l2_norm_);
    params.get(MIPS_REFORMER_NORMALIZE, &normalize_);
    params.get(MIPS_REFORMER_FORCED_HALF_FLOAT, &forced_half_float_);
    params.get(MIPS_REFORMER_SPHERICAL_INJECTION, &spherical_injection_);
    if (spherical_injection_) {
      if (m_value_ != 1u) {
        LOG_WARN("Invalid M value or U value if spherical_injection enabled");
      }
      m_value_ = 1;
    }
    return 0;
  }

  //! Cleanup Reformer
  int cleanup(void) override {
    return 0;
  }

  //! Load index from container
  int load(IndexStorage::Pointer) override {
    return 0;
  }

  //! Unload index
  int unload(void) override {
    return 0;
  }

  //! Transform query
  int transform(const void *query, const IndexQueryMeta &qmeta,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType dt = qmeta.data_type();

    if (dt == IndexMeta::DataType::DT_FP32) {
      if (qmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }

      if (forced_half_float_) {
        out->clear();
        out->resize((qmeta.dimension() + m_value_) * sizeof(ailego::Float16));

        if (normalize_) {
          float norm;
          ailego::Norm2Matrix<float, 1>::Compute(
              reinterpret_cast<const float *>(query), qmeta.dimension(), &norm);

          ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),
                                      qmeta.dimension(), norm,
                                      reinterpret_cast<uint16_t *>(&(*out)[0]));
        } else {
          ailego::FloatHelper::ToFP16(reinterpret_cast<const float *>(query),
                                      qmeta.dimension(),
                                      reinterpret_cast<uint16_t *>(&(*out)[0]));
        }
        if (spherical_injection_) {
          reinterpret_cast<ailego::Float16 *>(&(*out)[0])[qmeta.dimension()] =
              1.0f;
        }
        *ometa = qmeta;
        ometa->set_meta(IndexMeta::DataType::DT_FP16,
                        qmeta.dimension() + m_value_);

      } else {
        out->assign(reinterpret_cast<const char *>(query),
                    qmeta.element_size());
        out->resize((qmeta.dimension() + m_value_) * sizeof(float));

        if (normalize_) {
          float norm;
          ailego::Normalizer<float>::L2(reinterpret_cast<float *>(&(*out)[0]),
                                        qmeta.dimension(), &norm);
        }
        if (spherical_injection_) {
          reinterpret_cast<float *>(&(*out)[0])[qmeta.dimension()] = 1.0f;
        }
        *ometa = qmeta;
        ometa->set_dimension(qmeta.dimension() + m_value_);
      }
    } else if (dt == IndexMeta::DataType::DT_FP16) {
      if (qmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }
      out->assign(reinterpret_cast<const char *>(query), qmeta.element_size());
      out->resize((qmeta.dimension() + m_value_) * sizeof(ailego::Float16));

      if (normalize_) {
        float norm;
        ailego::Normalizer<ailego::Float16>::L2(
            reinterpret_cast<ailego::Float16 *>(&(*out)[0]), qmeta.dimension(),
            &norm);
      }
      if (spherical_injection_) {
        reinterpret_cast<ailego::Float16 *>(&(*out)[0])[qmeta.dimension()] =
            1.0f;
      }
      *ometa = qmeta;
      ometa->set_dimension(qmeta.dimension() + m_value_);
    } else {
      return IndexError_Unsupported;
    }
    return 0;
  }

  //! Transform queries
  int transform(const void *query, const IndexQueryMeta &qmeta, uint32_t count,
                std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType dt = qmeta.data_type();

    if (dt == IndexMeta::DataType::DT_FP32) {
      if (qmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }
      out->clear();

      if (forced_half_float_) {
        for (uint32_t i = 0; i < count; ++i) {
          size_t offset = out->size();
          out->resize(offset +
                      (qmeta.dimension() + m_value_) * sizeof(ailego::Float16));

          const float *sub_query =
              reinterpret_cast<const float *>(query) + i * qmeta.dimension();

          if (normalize_) {
            float norm;
            ailego::Norm2Matrix<float, 1>::Compute(sub_query, qmeta.dimension(),
                                                   &norm);
            ailego::FloatHelper::ToFP16(
                sub_query, qmeta.dimension(), norm,
                reinterpret_cast<uint16_t *>(&(*out)[offset]));
          } else {
            ailego::FloatHelper::ToFP16(
                sub_query, qmeta.dimension(),
                reinterpret_cast<uint16_t *>(&(*out)[offset]));
          }
          if (spherical_injection_) {
            reinterpret_cast<ailego::Float16 *>(
                &(*out)[offset])[qmeta.dimension()] = 1.0f;
          }
        }
        *ometa = qmeta;
        ometa->set_meta(IndexMeta::DataType::DT_FP16,
                        qmeta.dimension() + m_value_);

      } else {
        for (uint32_t i = 0; i < count; ++i) {
          size_t offset = out->size();
          out->append(
              reinterpret_cast<const char *>(query) + i * qmeta.element_size(),
              qmeta.element_size());
          out->resize(offset + (qmeta.dimension() + m_value_) * sizeof(float));

          if (normalize_) {
            float norm;
            ailego::Normalizer<float>::L2(
                reinterpret_cast<float *>(&(*out)[offset]), qmeta.dimension(),
                &norm);
          }
          if (spherical_injection_) {
            reinterpret_cast<float *>(&(*out)[offset])[qmeta.dimension()] =
                1.0f;
          }
        }
        *ometa = qmeta;
        ometa->set_dimension(qmeta.dimension() + m_value_);
      }
    } else if (dt == IndexMeta::DataType::DT_FP16) {
      if (qmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }
      out->clear();

      for (uint32_t i = 0; i < count; ++i) {
        size_t offset = out->size();
        out->append(
            reinterpret_cast<const char *>(query) + i * qmeta.element_size(),
            qmeta.element_size());
        out->resize(offset +
                    (qmeta.dimension() + m_value_) * sizeof(ailego::Float16));

        if (normalize_) {
          float norm;
          ailego::Normalizer<ailego::Float16>::L2(
              reinterpret_cast<ailego::Float16 *>(&(*out)[offset]),
              qmeta.dimension(), &norm);
        }
        if (spherical_injection_) {
          reinterpret_cast<ailego::Float16 *>(
              &(*out)[offset])[qmeta.dimension()] = 1.0f;
        }
      }
      *ometa = qmeta;
      ometa->set_dimension(qmeta.dimension() + m_value_);

    } else {
      return IndexError_Unsupported;
    }
    return 0;
  }

  //! Convert a record
  int convert(const void *record, const IndexQueryMeta &rmeta, std::string *out,
              IndexQueryMeta *ometa) const override {
    IndexMeta::DataType dt = rmeta.data_type();

    if (dt == IndexMeta::DataType::DT_FP32) {
      if (rmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }

      const float *vec = reinterpret_cast<const float *>(record);
      if (forced_half_float_) {
        *ometa = rmeta;
        ometa->set_meta(IndexMeta::DataType::DT_FP16,
                        rmeta.dimension() + m_value_);
        out->resize(ometa->element_size());

        ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);
        if (!spherical_injection_) {
          ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,
                                            u_value_, l2_norm_, dst);
        } else {
          ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,
                                    dst);
        }
      } else {
        *ometa = rmeta;
        ometa->set_dimension(rmeta.dimension() + m_value_);
        out->resize(ometa->element_size());

        float *dst = reinterpret_cast<float *>(&(*out)[0]);
        if (!spherical_injection_) {
          ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,
                                            u_value_, l2_norm_, dst);
        } else {
          ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,
                                    dst);
        }
      }
    } else if (dt == IndexMeta::DataType::DT_FP16) {
      if (rmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }
      *ometa = rmeta;
      ometa->set_dimension(rmeta.dimension() + m_value_);
      out->resize(ometa->element_size());

      const auto *vec = reinterpret_cast<const ailego::Float16 *>(record);
      ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(&(*out)[0]);
      if (!spherical_injection_) {
        ConvertRepeatedQuadraticInjection(vec, rmeta.dimension(), m_value_,
                                          u_value_, l2_norm_, dst);
      } else {
        ConvertSphericalInjection(vec, rmeta.dimension(), u_value_, l2_norm_,
                                  dst);
      }
    } else {
      return IndexError_Unsupported;
    }
    return 0;
  }

  //! Convert records
  int convert(const void *records, const IndexQueryMeta &rmeta, uint32_t count,
              std::string *out, IndexQueryMeta *ometa) const override {
    IndexMeta::DataType dt = rmeta.data_type();

    if (dt == IndexMeta::DataType::DT_FP32) {
      if (rmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }
      *ometa = rmeta;

      if (forced_half_float_) {
        ometa->set_meta(IndexMeta::DataType::DT_FP16,
                        rmeta.dimension() + m_value_);
        out->resize(ometa->element_size() * count);
        for (uint32_t i = 0; i < count; ++i) {
          const float *sub_query =
              reinterpret_cast<const float *>(records) + i * rmeta.dimension();
          ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(
              &(*out)[i * ometa->element_size()]);
          if (!spherical_injection_) {
            ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),
                                              m_value_, u_value_, l2_norm_,
                                              dst);
          } else {
            ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,
                                      l2_norm_, dst);
          }
        }
      } else {
        ometa->set_dimension(rmeta.dimension() + m_value_);
        out->resize(ometa->element_size() * count);
        for (uint32_t i = 0; i < count; ++i) {
          const float *sub_query =
              reinterpret_cast<const float *>(records) + i * rmeta.dimension();
          float *dst =
              reinterpret_cast<float *>(&(*out)[i * ometa->element_size()]);
          if (!spherical_injection_) {
            ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),
                                              m_value_, u_value_, l2_norm_,
                                              dst);
          } else {
            ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,
                                      l2_norm_, dst);
          }
        }
      }
    } else if (dt == IndexMeta::DataType::DT_FP16) {
      if (rmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }
      *ometa = rmeta;
      ometa->set_dimension(rmeta.dimension() + m_value_);
      out->resize(ometa->element_size() * count);

      for (uint32_t i = 0; i < count; ++i) {
        const ailego::Float16 *sub_query =
            reinterpret_cast<const ailego::Float16 *>(records) +
            i * rmeta.dimension();
        ailego::Float16 *dst = reinterpret_cast<ailego::Float16 *>(
            &(*out)[i * ometa->element_size()]);
        if (!spherical_injection_) {
          ConvertRepeatedQuadraticInjection(sub_query, rmeta.dimension(),
                                            m_value_, u_value_, l2_norm_, dst);
        } else {
          ConvertSphericalInjection(sub_query, rmeta.dimension(), u_value_,
                                    l2_norm_, dst);
        }
      }
    } else {
      return IndexError_Unsupported;
    }
    return 0;
  }

  //! Normalize results
  int normalize(const void *query, const IndexQueryMeta &qmeta,
                IndexDocumentList &result) const override {
    IndexMeta::DataType dt = qmeta.data_type();
    float norm = 1.0f;

    if (dt == IndexMeta::DataType::DT_FP32) {
      if (qmeta.unit_size() != sizeof(float)) {
        return IndexError_Unsupported;
      }
      if (normalize_) {
        ailego::Norm2Matrix<float, 1>::Compute(
            reinterpret_cast<const float *>(query), qmeta.dimension(), &norm);
      }
    } else if (dt == IndexMeta::DataType::DT_FP16) {
      if (qmeta.unit_size() != sizeof(ailego::Float16)) {
        return IndexError_Unsupported;
      }
      if (normalize_) {
        ailego::Norm2Matrix<ailego::Float16, 1>::Compute(
            reinterpret_cast<const ailego::Float16 *>(query), qmeta.dimension(),
            &norm);
      }
    } else {
      return IndexError_Unsupported;
    }

    if (!spherical_injection_) {
      const float a = 1.0f + m_value_ * 0.25f;
      const float lamba = 0.5f * norm * l2_norm_ / u_value_;
      for (auto &it : result) {
        *it.mutable_score() = (a - it.score()) * lamba;
      }
    } else {
      const float lambda = norm * l2_norm_ / u_value_;
      for (auto &it : result) {
        *it.mutable_score() = (1.0f - 0.5f * it.score()) * lambda;
      }
    }
    return 0;
  }

 private:
  bool normalize_{false};
  bool forced_half_float_{false};
  bool spherical_injection_{false};
  uint32_t m_value_{0u};
  float u_value_{0.0f};
  float l2_norm_{0.0f};
};

INDEX_FACTORY_REGISTER_REFORMER(MipsReformer);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/quantizer/quantizer_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

//! MipsConverter
static const std::string MIPS_CONVERTER_M_VALUE = "mips.converter.m_value";
static const std::string MIPS_CONVERTER_U_VALUE = "mips.converter.u_value";
static const std::string MIPS_CONVERTER_L2_NORM = "mips.converter.l2_norm";
static const std::string MIPS_CONVERTER_FORCED_HALF_FLOAT =
    "mips.converter.forced_half_float";
static const std::string MIPS_CONVERTER_SPHERICAL_INJECTION =
    "mips.converter.spherical_injection";

//! MipsReverseConverter
static const std::string MIPS_REVERSE_CONVERTER_M_VALUE =
    "mips_reverse.converter.m_value";
static const std::string MIPS_REVERSE_CONVERTER_U_VALUE =
    "mips_reverse.converter.u_value";
static const std::string MIPS_REVERSE_CONVERTER_L2_NORM =
    "mips_reverse.converter.l2_norm";
static const std::string MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT =
    "mips_reverse.converter.forced_single_float";
static const std::string MIPS_REVERSE_CONVERTER_SPHERICAL_INJECTION =
    "mips_reverse.converter.spherical_injection";

//! MipsReformer
static const std::string MIPS_REFORMER_M_VALUE = "mips.reformer.m_value";
static const std::string MIPS_REFORMER_U_VALUE = "mips.reformer.u_value";
static const std::string MIPS_REFORMER_L2_NORM = "mips.reformer.l2_norm";
static const std::string MIPS_REFORMER_NORMALIZE = "mips.reformer.normalize";
static const std::string MIPS_REFORMER_FORCED_HALF_FLOAT =
    "mips.reformer.forced_half_float";
static const std::string MIPS_REFORMER_SPHERICAL_INJECTION =
    "mips.reformer.spherical_injection";

//! NormalizeConverter
static const std::string NORMALIZE_CONVERTER_FORCED_HALF_FLOAT =
    "normalize.converter.forced_half_float";
static const std::string NORMALIZE_CONVERTER_P_VALUE =
    "normalize.converter.p_value";

//! NormalizeReformer
static const std::string NORMALIZE_REFORMER_FORCED_HALF_FLOAT =
    "normalize.reformer.forced_half_float";
static const std::string NORMALIZE_REFORMER_P_VALUE =
    "normalize.reformer.p_value";

//! Int8Converter
static const std::string INT8_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =
    "int8_quantizer.converter.histogram_bins_count";
static const std::string INT8_QUANTIZER_CONVERTER_DISABLE_BIAS =
    "int8_quantizer.converter.disable_bias";
static const std::string INT8_QUANTIZER_CONVERTER_BIAS =
    "int8_quantizer.converter.bias";
static const std::string INT8_QUANTIZER_CONVERTER_SCALE =
    "int8_quantizer.converter.scale";

//! Int4Converter
static const std::string INT4_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =
    "int4_quantizer.converter.histogram_bins_count";
static const std::string INT4_QUANTIZER_CONVERTER_DISABLE_BIAS =
    "int4_quantizer.converter.disable_bias";
static const std::string INT4_QUANTIZER_CONVERTER_BIAS =
    "int4_quantizer.converter.bias";
static const std::string INT4_QUANTIZER_CONVERTER_SCALE =
    "int4_quantizer.converter.scale";

//! Int8Reformer
static const std::string INT8_QUANTIZER_REFORMER_BIAS =
    "int8_quantizer.reformer.bias";
static const std::string INT8_QUANTIZER_REFORMER_SCALE =
    "int8_quantizer.reformer.scale";
static const std::string INT8_QUANTIZER_REFORMER_METRIC =
    "int8_quantizer.reformer.metric";

//! Int4Reformer
static const std::string INT4_QUANTIZER_REFORMER_BIAS =
    "int4_quantizer.reformer.bias";
static const std::string INT4_QUANTIZER_REFORMER_SCALE =
    "int4_quantizer.reformer.scale";
static const std::string INT4_QUANTIZER_REFORMER_METRIC =
    "int4_quantizer.reformer.metric";

//! CosineConverter
static const std::string COSINE_CONVERTER_FORCED_HALF_FLOAT =
    "cosine.converter.forced_half_float";

//! CosineReformer
static const std::string COSINE_REFORMER_FORCED_HALF_FLOAT =
    "cosine.reformer.forced_half_float";

//! IntegerStreamingConverter
static const std::string INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE =
    "integer_streaming.converter.enable_normalize";

//! IntegerStreamingConverter
static const std::string INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE =
    "integer_streaming.reformer.enable_normalize";
static const std::string INTEGER_STREAMING_REFORMER_IS_EUCLIDEAN =
    "integer_streaming.reformer.is_euclidean";

//! DoubleBitConverter
static const std::string DOUBLE_BIT_CONVERTER_TRAIN_SAMPLE_COUNT =
    "double_bit.converter.train_sample_count";
static const std::string DOUBLE_BIT_CONVERTER_A_VALUE =
    "double_bit.converter.a_value";
static const std::string DOUBLE_BIT_CONVERTER_B_VALUE =
    "double_bit.converter.b_value";

//! DoubleBitReformer
static const std::string DOUBLE_BIT_REFORMER_A_VALUE =
    "double_bit.reformer.a_value";
static const std::string DOUBLE_BIT_REFORMER_B_VALUE =
    "double_bit.reformer.b_value";

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/quantizer/record_quantizer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/core/framework/index_meta.h>

#pragma once

namespace zvec {
namespace core {

class RecordQuantizer {
 public:
  //! Convert the float feature to int8 or int4 feature
  static inline void quantize_record(const float *vec, size_t dim,
                                     IndexMeta::DataType type,
                                     bool is_euclidean, void *out) {
    if (type == IndexMeta::DataType::DT_FP16) {
      ailego::FloatHelper::ToFP16(vec, dim, reinterpret_cast<uint16_t *>(out));
    } else if (type == IndexMeta::DataType::DT_INT4 ||
               type == IndexMeta::DataType::DT_INT8) {
      float min = std::numeric_limits<float>::max();
      float max = std::numeric_limits<float>::lowest();
      constexpr float epsilon = std::numeric_limits<float>::epsilon();
      for (size_t i = 0; i < dim; ++i) {
        min = std::min(min, vec[i]);
        max = std::max(max, vec[i]);
      }

      float sum = 0.0f;
      float squared_sum = 0.0f;
      int int8_sum = 0;
      float *extras, scale, bias;
      if (type == IndexMeta::DataType::DT_INT8) {
        scale = 254 / std::max(max - min, epsilon);
        bias = -min * scale - 127;
        for (size_t i = 0; i < dim; ++i) {
          float v = vec[i] * scale + bias;
          squared_sum += v * v;
          sum += v;
          (reinterpret_cast<int8_t *>(out))[i] =
              static_cast<int8_t>(std::round(v));
          int8_sum += (reinterpret_cast<int8_t *>(out))[i];
        }
        extras = reinterpret_cast<float *>(static_cast<int8_t *>(out) + dim);
      } else {
        scale = 15 / std::max(max - min, epsilon);
        bias = -min * scale - 8;
        for (size_t i = 0; i < dim; i += 2) {
          float lo = vec[i] * scale + bias;
          float hi = vec[i + 1] * scale + bias;
          squared_sum += lo * lo;
          sum += lo;
          squared_sum += hi * hi;
          sum += hi;
          (reinterpret_cast<uint8_t *>(out))[i / 2] =
              (static_cast_from_float_to_uint8(std::round(hi)) << 4) |
              (static_cast_from_float_to_uint8(std::round(lo)) & 0xF);
        }
        extras =
            reinterpret_cast<float *>(static_cast<uint8_t *>(out) + dim / 2);
      }

      // Save the feature quantization params for IndexMeasure
      extras[0] = 1.0f / scale;
      extras[1] = -bias / scale;
      extras[2] = sum;

      if (type == IndexMeta::DataType::DT_INT8) {
        extras[3] = squared_sum;
        reinterpret_cast<int32_t *>(extras + 4)[0] = int8_sum;
      } else {
        if (is_euclidean) {
          extras[3] = squared_sum;
        } else {
          reinterpret_cast<int *>(extras)[3] = int8_sum;
        }
      }
    }
  }

  static inline void unquantize_record(const void *vec, size_t origin_dim,
                                       IndexMeta::DataType type, float *out) {
    if (type == IndexMeta::DataType::DT_INT8) {
      const float *extras = reinterpret_cast<const float *>(
          static_cast<const int8_t *>(vec) + origin_dim);

      const int8_t *buf = reinterpret_cast<const int8_t *>(vec);
      for (size_t i = 0; i < origin_dim; ++i) {
        out[i] = buf[i] * extras[0] + extras[1];
      }

    } else if (type == IndexMeta::DataType::DT_INT4) {
      const float *extras = reinterpret_cast<const float *>(
          static_cast<const uint8_t *>(vec) + origin_dim / 2);

      const uint8_t *buf = reinterpret_cast<const uint8_t *>(vec);

      for (size_t i = 0; i < origin_dim / 2; ++i) {
        int8_t lo = (static_cast<int8_t>(buf[i] << 4) >> 4);
        int8_t hi = (static_cast<int8_t>(buf[i] & 0xf0) >> 4);

        out[2 * i] = lo * extras[0] + extras[1];
        out[2 * i + 1] = hi * extras[0] + extras[1];
      }
    } else if (type == IndexMeta::DataType::DT_FP16) {
      const uint16_t *in_buf = reinterpret_cast<const uint16_t *>(vec);
      for (size_t i = 0; i < origin_dim; ++i) {
        out[i] = ailego::FloatHelper::ToFP32(in_buf[i]);
      }
    }
  }

  static inline void unquantize_sparse_record(const void *sparse_value,
                                              size_t sparse_count,
                                              IndexMeta::DataType type,
                                              float *sparse_value_out) {
    if (type == IndexMeta::DataType::DT_FP16) {
      const uint16_t *in_buf = reinterpret_cast<const uint16_t *>(sparse_value);
      for (size_t i = 0; i < sparse_count; ++i) {
        sparse_value_out[i] = ailego::FloatHelper::ToFP32(in_buf[i]);
      }
    }
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME core_utility 
    STATIC SHARED STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS zvec_ailego core_framework
    INCS . ${PROJECT_ROOT_DIR}/src/core
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/core/utility/basic_refiner.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_refiner.h>

namespace zvec {
namespace core {

/*! Basic Refiner
 */
class BasicRefiner : public IndexRefiner {
 public:
  const uint32_t kScaleFactor = 10;

 public:
  class BasicRefinerContext : public Context {
   public:
    //! Construct
    BasicRefinerContext() = default;
    ~BasicRefinerContext() = default;

    int set_contexts(IndexRunner::Context::Pointer base_ctx,
                     IndexRunner::Context::Pointer refine_ctx) override {
      base_ctx_ = std::move(base_ctx);
      refine_ctx_ = std::move(refine_ctx);

      return 0;
    }

    //! Set topk of search result
    void set_topk(uint32_t topk) override {
      topk_ = topk;
    }

    uint32_t topk() const override {
      return topk_;
    }

    //! Retrieve search result with index
    const IndexDocumentList &result(void) const override {
      return results_[0];
    }

    //! Retrieve search result with index
    const IndexDocumentList &result(size_t idx) const override {
      return results_[idx];
    }

    //! Retrieve mutable result with index
    IndexDocumentList *mutable_result(size_t idx) override {
      ailego_assert_with(idx < results_.size(), "invalid idx");
      return &results_[idx];
    }

    void resize_results(size_t size) {
      results_.resize(size);
    }

    IndexRunner::Context::Pointer &base_context() {
      return base_ctx_;
    }

    IndexRunner::Context::Pointer &refine_context() {
      return refine_ctx_;
    }

   private:
    uint32_t topk_{0};
    std::vector<IndexDocumentList> results_{};
    std::vector<IndexGroupDocumentList> group_results_{};

    IndexRunner::Context::Pointer base_ctx_{nullptr};
    IndexRunner::Context::Pointer refine_ctx_{nullptr};
  };

 public:
  //! Create a context
  Context::Pointer create_context(void) const override {
    auto base_ctx = base_runner_->create_context();
    auto refine_ctx = refine_runner_->create_context();

    BasicRefinerContext *ctx = new (std::nothrow) BasicRefinerContext();

    ctx->set_contexts(std::move(base_ctx), std::move(refine_ctx));

    return Context::Pointer(ctx);
  }

  //! Initialize refiner with streamer
  int init(IndexRunner::Pointer base_runner, IndexRunner::Pointer refine_runner,
           const ailego::Params &params) override {
    base_runner_ = base_runner;
    refine_runner_ = refine_runner;

    params_ = params;

    return 0;
  }

  //! Cleanup
  int cleanup() override {
    return 0;
  }

  //! Add a vector into index
  virtual int add_impl(uint64_t key, const void *base_query,
                       const IndexQueryMeta &base_qmeta,
                       const void *refine_query,
                       const IndexQueryMeta &refine_qmeta,
                       Context::Pointer &context) override {
    BasicRefinerContext *ctx =
        dynamic_cast<BasicRefinerContext *>(context.get());

    int ret = base_runner_->add_impl(key, base_query, base_qmeta,
                                     ctx->base_context());
    if (ret != 0) {
      LOG_ERROR("Error in adding vector to base index");

      return ret;
    }

    ret = refine_runner_->add_impl(key, refine_query, refine_qmeta,
                                   ctx->refine_context());
    if (ret != 0) {
      LOG_ERROR("Error in adding vector to refine index");

      return ret;
    }

    return 0;
  }

  //! Similarity search
  virtual int search_impl(const void *base_query,
                          const IndexQueryMeta &base_qmeta,
                          const void *refine_query,
                          const IndexQueryMeta &refine_qmeta, uint32_t count,
                          Context::Pointer &context) const override {
    BasicRefinerContext *ctx =
        dynamic_cast<BasicRefinerContext *>(context.get());

    uint32_t topk = ctx->topk();

    ctx->resize_results(count);

    int ret;
    for (size_t q = 0; q < count; ++q) {
      auto &base_ctx = ctx->base_context();
      auto &refine_ctx = ctx->refine_context();

      base_ctx->set_topk(topk * scale_factor_);
      ret = base_runner_->search_impl(base_query, base_qmeta, base_ctx);
      if (ret != 0) {
        LOG_ERROR("Error in searching vector from base index");

        return ret;
      }

      auto base_result = base_ctx->result();

      std::vector<uint64_t> keys;
      for (size_t i = 0; i < base_result.size(); ++i) {
        keys.push_back(base_result[i].key());
      }

      std::vector<std::vector<uint64_t>> keys_array;
      keys_array.push_back(std::move(keys));

      refine_ctx->set_topk(topk);
      ret = refine_runner_->search_bf_by_p_keys_impl(refine_query, keys_array,
                                                     refine_qmeta, refine_ctx);
      if (ret != 0) {
        LOG_ERROR("Error in searching vector from refine index");

        return ret;
      }

      auto refine_result = refine_ctx->result();
      *ctx->mutable_result(q) = refine_result;

      base_query =
          static_cast<const char *>(base_query) + base_qmeta.element_size();
      refine_query =
          static_cast<const char *>(refine_query) + refine_qmeta.element_size();
    }

    return 0;
  }

  //! Similarity search
  virtual int search_impl(const void *base_query,
                          const IndexQueryMeta &base_qmeta,
                          const void *refine_query,
                          const IndexQueryMeta &refine_qmeta,
                          Context::Pointer &context) const override {
    return search_impl(base_query, base_qmeta, refine_query, refine_qmeta, 1,
                       context);
  }

  //! Similarity brute force search
  virtual int search_bf_impl(const void *base_query,
                             const IndexQueryMeta &base_qmeta,
                             const void *refine_query,
                             const IndexQueryMeta &refine_qmeta, uint32_t count,
                             Context::Pointer &context) const override {
    BasicRefinerContext *ctx =
        dynamic_cast<BasicRefinerContext *>(context.get());

    for (size_t q = 0; q < count; ++q) {
      int ret;

      auto &base_ctx = ctx->base_context();
      auto &refine_ctx = ctx->refine_context();

      ret = base_runner_->search_impl(base_query, base_qmeta, base_ctx);
      if (ret != 0) {
        LOG_ERROR("Error in searching vector from base index");

        return ret;
      }

      auto results = base_ctx->result();
      std::vector<std::vector<uint64_t>> keys;

      ret = refine_runner_->search_bf_by_p_keys_impl(refine_query, keys,
                                                     refine_qmeta, refine_ctx);
      if (ret != 0) {
        LOG_ERROR("Error in searching vector from refine index");

        return ret;
      }
      auto refine_result = refine_ctx->result();
      *ctx->mutable_result(q) = refine_result;

      base_query =
          static_cast<const char *>(base_query) + base_qmeta.element_size();
      refine_query =
          static_cast<const char *>(refine_query) + refine_qmeta.element_size();
    }

    return 0;
  }

  //! Similarity brute force search
  virtual int search_bf_impl(const void *base_query,
                             const IndexQueryMeta &base_qmeta,
                             const void *refine_query,
                             const IndexQueryMeta &refine_qmeta,
                             Context::Pointer &context) const override {
    return search_bf_impl(base_query, base_qmeta, refine_query, refine_qmeta, 1,
                          context);
  }

 private:
  uint32_t scale_factor_{kScaleFactor};
  ailego::Params params_;

  IndexRunner::Pointer base_runner_{nullptr};
  IndexRunner::Pointer refine_runner_{nullptr};
};

INDEX_FACTORY_REGISTER_REFINER(BasicRefiner);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/buffer_storage.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
#include <mutex>
#include <zvec/ailego/buffer/buffer_pool.h>
#include <zvec/ailego/utility/time_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_mapping.h>
#include <zvec/core/framework/index_version.h>
#include "utility_params.h"

namespace zvec {
namespace core {

/*! MMap File Storage
 */
class BufferStorage : public IndexStorage {
 public:
  /*! Index Storage Segment
   */
  class WrappedSegment : public IndexStorage::Segment,
                         public std::enable_shared_from_this<Segment> {
   public:
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    WrappedSegment(BufferStorage *owner, IndexMapping::Segment *segment,
                   uint64_t segment_header_start_offset,
                   IndexFormat::MetaHeader *segment_header, size_t segment_id)
        : segment_(segment),
          owner_(owner),
          segment_id_(segment_id),
          capacity_(static_cast<size_t>(segment->meta()->data_size +
                                        segment->meta()->padding_size)),
          segment_header_start_offset_(segment_header_start_offset),
          segment_header_(segment_header) {}
    //! Destructor
    virtual ~WrappedSegment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return static_cast<size_t>(segment_->meta()->data_size);
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return segment_->meta()->data_crc;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return static_cast<size_t>(segment_->meta()->padding_size);
    }

    //! Retrieve capacity of segment
    size_t capacity(void) const override {
      return capacity_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      size_t buffer_offset = segment_header_start_offset_ +
                             segment_header_->content_offset +
                             segment_->meta()->data_index;
      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);
      if (!raw) {
        return 0;
      }
      auto *data = raw + offset;
      memmove(buf, data, len);
      return len;
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      size_t buffer_offset = segment_header_start_offset_ +
                             segment_header_->content_offset +
                             segment_->meta()->data_index;
      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);
      if (!raw) {
        return 0;
      }
      *data = raw + offset;
      return len;
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      size_t buffer_offset = segment_header_start_offset_ +
                             segment_header_->content_offset +
                             segment_->meta()->data_index;
      auto *raw = owner_->get_buffer(buffer_offset, capacity_, segment_id_);
      if (!raw) {
        return 0;
      }

      data.reset(owner_->buffer_pool_handle_.get(), segment_id_, raw + offset);
      if (data.data()) {
        return len;
      } else {
        LOG_ERROR("read error.");
        return -1;
      }
    }

    //! Write data into the storage with offset
    size_t write(size_t /*offset*/, const void * /*data*/,
                 size_t len) override {
      return len;
    }

    //! Resize size of data
    size_t resize(size_t /*size*/) override {
      return 0;
    }

    //! Update crc of data
    void update_data_crc(uint32_t /*crc*/) override {}

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   protected:
    friend BufferStorage;
    IndexMapping::Segment *segment_{};

   private:
    BufferStorage *owner_{nullptr};
    size_t segment_id_{};
    size_t capacity_{};
    uint64_t segment_header_start_offset_;
    IndexFormat::MetaHeader *segment_header_;
  };

  //! Destructor
  virtual ~BufferStorage(void) {
    this->cleanup();
  }

  //! Initialize storage
  int init(const ailego::Params &params) override {
    params.get(BUFFER_STORAGE_MEMORY_SIZE, &buffer_size_);
    LOG_INFO("buffer size: %lu", buffer_size_);
    return 0;
  }

  //! Cleanup storage
  int cleanup(void) override {
    this->close_index();
    return 0;
  }

  //! Open storage
  int open(const std::string &path, bool /*create*/) override {
    file_name_ = path;
    buffer_pool_ = std::make_shared<ailego::VecBufferPool>(path);
    buffer_pool_handle_ = std::make_shared<ailego::VecBufferPoolHandle>(
        buffer_pool_->get_handle());
    int ret = ParseToMapping();
    if (ret != 0) {
      return ret;
    }
    ret = buffer_pool_->init(buffer_size_, max_segment_size_, segments_.size());
    // for (auto iter = segments_.begin(); iter != segments_.end(); iter++) {
    //   auto seg = this->get(iter->first, 0);
    //   MemoryBlock block;
    //   int len = seg->read(0, block, 1);
    //   LOG_ERROR("segment %s: %d", iter->first.c_str(), len);
    // }
    if (ret != 0) {
      return ret;
    }
    return 0;
  }

  char *get_buffer(size_t offset, size_t length, size_t block_id) {
    return buffer_pool_handle_->get_block(offset, length, block_id);
  }

  int get_meta(size_t offset, size_t length, char *out) {
    return buffer_pool_handle_->get_meta(offset, length, out);
  }

  int ParseHeader(size_t offset) {
    std::unique_ptr<char[]> buffer(new char[sizeof(header_)]);
    if (get_meta(offset, sizeof(header_), buffer.get()) != 0) {
      LOG_ERROR("Get segment header failed.");
      return IndexError_Runtime;
    }
    uint8_t *header_ptr = reinterpret_cast<uint8_t *>(buffer.get());
    memcpy(&header_, header_ptr, sizeof(header_));
    if (header_.meta_header_size != sizeof(IndexFormat::MetaHeader)) {
      LOG_ERROR("Header meta size is invalid.");
      return IndexError_InvalidLength;
    }
    if (ailego::Crc32c::Hash(&header_, sizeof(header_), header_.header_crc) !=
        header_.header_crc) {
      LOG_ERROR("Header meta checksum is invalid.");
      return IndexError_InvalidChecksum;
    }
    return 0;
  }

  int ParseFooter(size_t offset) {
    std::unique_ptr<char[]> buffer(new char[sizeof(footer_)]);
    if (get_meta(offset, sizeof(footer_), buffer.get()) != 0) {
      LOG_ERROR("Get segment footer failed.");
      return IndexError_Runtime;
    }
    uint8_t *footer_ptr = reinterpret_cast<uint8_t *>(buffer.get());
    memcpy(&footer_, footer_ptr, sizeof(footer_));
    if (offset < (size_t)footer_.segments_meta_size) {
      LOG_ERROR("Footer meta size is invalid.");
      return IndexError_InvalidLength;
    }
    if (ailego::Crc32c::Hash(&footer_, sizeof(footer_), footer_.footer_crc) !=
        footer_.footer_crc) {
      LOG_ERROR("Footer meta checksum is invalid.");
      return IndexError_InvalidChecksum;
    }
    return 0;
  }

  int ParseSegment(size_t offset) {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    std::unique_ptr<char[]> segment_buffer =
        std::make_unique<char[]>(footer_.segments_meta_size);
    if (get_meta(offset, footer_.segments_meta_size, segment_buffer.get()) !=
        0) {
      LOG_ERROR("Get segment meta failed.");
      return IndexError_Runtime;
    }
    if (ailego::Crc32c::Hash(segment_buffer.get(), footer_.segments_meta_size,
                             0u) != footer_.segments_meta_crc) {
      LOG_ERROR("Index segments meta checksum is invalid.");
      return IndexError_InvalidChecksum;
    }
    IndexFormat::SegmentMeta *segment_start =
        reinterpret_cast<IndexFormat::SegmentMeta *>(segment_buffer.get());
    uint32_t segment_ids_offset = footer_.segments_meta_size;
    for (IndexFormat::SegmentMeta *iter = segment_start,
                                  *end = segment_start + footer_.segment_count;
         iter != end; ++iter) {
      if (iter->segment_id_offset > footer_.segments_meta_size) {
        return IndexError_InvalidValue;
      }
      if (iter->data_index > footer_.content_size) {
        return IndexError_InvalidValue;
      }
      if (iter->data_index + iter->data_size > footer_.content_size) {
        return IndexError_InvalidLength;
      }

      if (iter->segment_id_offset < segment_ids_offset) {
        segment_ids_offset = iter->segment_id_offset;
      }
      id_hash_.emplace(
          std::string(reinterpret_cast<const char *>(segment_start) +
                      iter->segment_id_offset),
          segments_.size());
      segments_.emplace(
          std::string(reinterpret_cast<const char *>(segment_start) +
                      iter->segment_id_offset),
          IndexMapping::SegmentInfo{IndexMapping::Segment{iter},
                                    current_header_start_offset_, &header_});
      max_segment_size_ =
          std::max(max_segment_size_, iter->data_size + iter->padding_size);
      if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >
          footer_.segments_meta_size) {
        return IndexError_InvalidLength;
      }
    }
    buffer_pool_buffers_.push_back(std::move(segment_buffer));
    return 0;
  }

  int ParseToMapping() {
    while (true) {
      int ret;
      ret = ParseHeader(current_header_start_offset_);
      if (ret != 0) {
        LOG_ERROR("Failed to parse header, errno %d, %s", ret,
                  IndexError::What(ret));
        return ret;
      }

      switch (header_.version) {
        case IndexFormat::FORMAT_VERSION:
          break;
        default:
          LOG_ERROR("Unsupported index version: %u", header_.version);
          return IndexError_Unsupported;
      }

      // Unpack footer
      if (header_.meta_footer_size != sizeof(IndexFormat::MetaFooter)) {
        return IndexError_InvalidLength;
      }
      if ((int32_t)header_.meta_footer_offset < 0) {
        return IndexError_Unsupported;
      }
      uint64_t footer_offset =
          header_.meta_footer_offset + current_header_start_offset_;
      ret = ParseFooter(footer_offset);
      if (ret != 0) {
        LOG_ERROR("Failed to parse footer, errno %d, %s", ret,
                  IndexError::What(ret));
        return ret;
      }

      // Unpack segment table
      if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >
          footer_.segments_meta_size) {
        return IndexError_InvalidLength;
      }
      const uint64_t segment_start_offset =
          footer_offset - footer_.segments_meta_size;
      ret = ParseSegment(segment_start_offset);
      if (ret != 0) {
        LOG_ERROR("Failed to parse segment, errno %d, %s", ret,
                  IndexError::What(ret));
        return ret;
      }

      if (footer_.next_meta_header_offset == 0) {
        break;
      }
      current_header_start_offset_ = footer_.next_meta_header_offset;
    }
    return 0;
  }

  //! Flush storage
  int flush(void) override {
    return this->flush_index();
  }

  //! Close storage
  int close(void) override {
    this->close_index();
    return 0;
  }

  //! Append a segment into storage
  int append(const std::string &id, size_t size) override {
    return this->append_segment(id, size);
  }

  //! Refresh meta information (checksum, update time, etc.)
  void refresh(uint64_t chkp) override {
    this->refresh_index(chkp);
  }

  //! Retrieve check point of storage
  uint64_t check_point(void) const override {
    return footer_.check_point;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id, int) override {
    auto segment_info = this->get_segment_info(id);
    if (!segment_info) {
      return WrappedSegment::Pointer{};
    }
    return std::make_shared<WrappedSegment>(
        this, &segment_info->segment, segment_info->segment_header_start_offset,
        segment_info->segment_header, id_hash_[id]);
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return this->has_segment(id);
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return header_.magic;
  }

 protected:
  //! Initialize index version segment
  int init_version_segment(void) {
    size_t data_size = std::strlen(IndexVersion::Details());
    int error_code =
        this->append_segment(INDEX_VERSION_SEGMENT_NAME, data_size);
    if (error_code != 0) {
      return error_code;
    }

    auto segment = &get_segment_info(INDEX_VERSION_SEGMENT_NAME)->segment;
    if (!segment) {
      return IndexError_MMapFile;
    }
    auto meta = segment->meta();
    size_t capacity = static_cast<size_t>(meta->padding_size + meta->data_size);
    memcpy(segment->data(), IndexVersion::Details(), data_size);
    segment->set_dirty();
    meta->data_crc = ailego::Crc32c::Hash(segment->data(), data_size, 0);
    meta->data_size = data_size;
    meta->padding_size = capacity - data_size;
    return 0;
  }

  //! Initialize index file
  int init_index(const std::string & /*path*/) {
    // Add index version
    int error_code = this->init_version_segment();
    if (error_code != 0) {
      return error_code;
    }

    // Refresh mapping
    this->refresh_index(0);
    return 0;
  }

  //! Set the index file as dirty
  void set_as_dirty(void) {
    index_dirty_ = true;
  }

  //! Refresh meta information (checksum, update time, etc.)
  void refresh_index(uint64_t /*chkp*/) {}

  //! Flush index storage
  int flush_index(void) {
    return 0;
  }

  //! Close index storage
  void close_index(void) {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    file_name_.clear();
    id_hash_.clear();
    segments_.clear();
    memset(&header_, 0, sizeof(header_));
    memset(&footer_, 0, sizeof(footer_));
    buffer_pool_handle_.reset();
    buffer_pool_.reset();
    max_segment_size_ = 0;
    buffer_pool_buffers_.clear();
  }

  //! Append a segment into storage
  int append_segment(const std::string & /*id*/, size_t /*size*/) {
    return 0;
  }

  //! Test if a segment exists
  bool has_segment(const std::string &id) const {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    return (segments_.find(id) != segments_.end());
  }

  //! Get a segment from storage
  IndexMapping::SegmentInfo *get_segment_info(const std::string &id) {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    auto iter = segments_.find(id);
    if (iter == segments_.end()) {
      return nullptr;
    }
    return &iter->second;
  }

 private:
  bool index_dirty_{false};
  mutable std::mutex mapping_mutex_{};

  // buffer manager
  std::string file_name_;
  IndexFormat::MetaHeader header_{};
  IndexFormat::MetaFooter footer_{};
  std::unordered_map<std::string, IndexMapping::SegmentInfo> segments_{};
  std::unordered_map<std::string, size_t> id_hash_{};
  uint64_t max_segment_size_{0};
  std::vector<std::unique_ptr<char[]>> buffer_pool_buffers_{};

  ailego::VecBufferPool::Pointer buffer_pool_{nullptr};
  ailego::VecBufferPoolHandle::Pointer buffer_pool_handle_{nullptr};
  uint64_t current_header_start_offset_{0u};
  uint64_t buffer_size_{2lu * 1024 * 1024 * 1024};  // 2G
};

INDEX_FACTORY_REGISTER_STORAGE(BufferStorage);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/file_dumper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cerrno>
#include <zvec/ailego/io/file.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_packer.h>

namespace zvec {
namespace core {

/*! File Dumper
 */
struct FileDumper : public IndexDumper {
 public:
  //! Constructor
  FileDumper(void) {}

  //! Destructor
  virtual ~FileDumper(void) {
    this->cleanup();
  }

  //! Initialize dumper
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup dumper
  int cleanup(void) override {
    if (!this->close_index()) {
      return IndexError_PackIndex;
    }
    return 0;
  }

  //! Create a file for dumping
  int create(const std::string &path) override {
    size_t last_slash = path.rfind('/');
    if (last_slash != std::string::npos) {
      ailego::File::MakePath(path.substr(0, last_slash));
    }

    if (!file_.create(path.c_str(), sizeof(IndexFormat::MetaHeader))) {
      LOG_ERROR("Failed to create file %s, errno %d, %s", path.c_str(), errno,
                std::strerror(errno));
      return IndexError_CreateFile;
    }

    auto write_data = [this](const void *buf, size_t size) {
      return this->file_.write(buf, size);
    };
    if (!packer_.setup(write_data)) {
      LOG_ERROR("Failed to setup index package, errno %d, %s", errno,
                std::strerror(errno));
      return IndexError_WriteData;
    }
    return 0;
  }

  //! Close file
  int close(void) override {
    if (!this->close_index()) {
      return IndexError_PackIndex;
    }
    return 0;
  }

  //! Append a segment meta into table
  int append(const std::string &id, size_t data_size, size_t padding_size,
             uint32_t crc) override {
    stab_.emplace_back(id, data_size, padding_size, crc);
    return 0;
  }

  //! Write data to the storage
  size_t write(const void *data, size_t len) override {
    return packer_.pack(
        [this](const void *buf, size_t size) {
          return this->file_.write(buf, size);
        },
        data, len);
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return packer_.magic();
  }

 protected:
  //! Close index file
  bool close_index(void) {
    if (file_.is_valid()) {
      auto write_data = [this](const void *buf, size_t size) {
        return this->file_.write(buf, size);
      };

      if (!packer_.finish(write_data, stab_)) {
        LOG_ERROR("Failed to finish packing index package");
        return false;
      }
      stab_.clear();
      file_.close();
      packer_.reset();
    }
    return true;
  }

 private:
  std::vector<IndexPacker::SegmentMeta> stab_{};
  ailego::File file_{};
  IndexPacker packer_{};
};

INDEX_FACTORY_REGISTER_DUMPER(FileDumper);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/file_read_storage.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cerrno>
#include <ailego/utility/memory_helper.h>
#include <zvec/ailego/io/file.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_unpacker.h>
#include "utility_params.h"

namespace zvec {
namespace core {

/*! File Storage
 */
class FileReadStorage : public IndexStorage {
 public:
  /*! File Storage Segment
   */
  class Segment : public IndexStorage::Segment {
   public:
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    Segment(const FileReadStorage &container,
            const IndexUnpacker::SegmentMeta &segment,
            const std::shared_ptr<ailego::File> &file_ptr, size_t offset)
        : data_offset_(offset + segment.data_offset()),
          data_size_(segment.data_size()),
          padding_size_(segment.padding_size()),
          region_size_(segment.data_size() + segment.padding_size()),
          data_crc_(segment.data_crc()),
          enable_direct_io_(container.enable_direct_io_),
          alone_file_handle_(container.alone_file_handle_),
          file_ptr_(file_ptr) {
      if (alone_file_handle_) {
        file_path_ = container.file_path_;
      }
    }

    //! Constructor
    Segment(const Segment &rhs, const std::shared_ptr<ailego::File> &file_ptr)
        : data_offset_(rhs.data_offset_),
          data_size_(rhs.data_size_),
          padding_size_(rhs.padding_size_),
          region_size_(rhs.region_size_),
          data_crc_(rhs.data_crc_),
          file_ptr_(file_ptr),
          file_path_(rhs.file_path_) {}

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return data_size_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return padding_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return data_crc_;
    }

    size_t capacity(void) const override {
      return region_size_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      return file_ptr_->read(data_offset_ + offset, buf, len);
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      buffer_.reserve(len);
      *data = buffer_.data();
      return file_ptr_->read(data_offset_ + offset, (void *)*data, len);
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      buffer_.reserve(len);
      data.reset(buffer_.data());
      return file_ptr_->read(data_offset_ + offset, (void *)data.data(), len);
    }

    //! Read data from segment
    bool read(SegmentData *iovec, size_t count) override {
      size_t total = 0u;
      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {
        ailego_false_if_false(it->offset + it->length <= region_size_);
        total += it->length;
      }
      ailego_false_if_false(total != 0);

      buffer_.reserve(total);
      uint8_t *buf = buffer_.data();
      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {
        ailego_false_if_false(file_ptr_->read(data_offset_ + it->offset, buf,
                                              it->length) == it->length);
        it->data = buf;
        buf += it->length;
      }
      return true;
    }

    size_t write(size_t, const void *, size_t) override {
      return IndexError_NotImplemented;
    }

    size_t resize(size_t) override {
      return IndexError_NotImplemented;
    }

    void update_data_crc(uint32_t) override {
      return;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return this->clone_segment<FileReadStorage>();
    }

   protected:
    //! Clone the segment
    template <typename T>
    inline IndexStorage::Segment::Pointer clone_segment(void) {
      auto file_ptr = alone_file_handle_ ? FileReadStorage::OpenFile(
                                               file_path_, enable_direct_io_)
                                         : file_ptr_;
      if (file_ptr) {
        return std::make_shared<typename T::Segment>(
            *(static_cast<typename T::Segment *>(this)), file_ptr);
      }
      return IndexStorage::Segment::Pointer();
    }

   protected:
    size_t data_offset_{0u};
    size_t data_size_{0u};
    size_t padding_size_{0u};
    size_t region_size_{0u};
    uint32_t data_crc_{0u};
    bool enable_direct_io_{false};
    bool alone_file_handle_{false};
    std::vector<uint8_t> buffer_{};
    std::shared_ptr<ailego::File> file_ptr_{};
    std::string file_path_{};
  };

  /*! MMapFile Storage Segment
   */
  class MMapSegment : public Segment,
                      public std::enable_shared_from_this<Segment> {
   public:
    //! Constructor
    MMapSegment(const FileReadStorage &container,
                const IndexUnpacker::SegmentMeta &segment,
                const std::shared_ptr<ailego::File> &file_ptr, size_t offset,
                const void *data, std::function<void()> &&cb)
        : Segment(container, segment, file_ptr, offset),
          data_(static_cast<const char *>(data)),
          cleanup_(std::move(cb)) {
      ailego_assert_with(data_, "Null Pointer");
    }

    virtual ~MMapSegment(void) {
      cleanup_();
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      memcpy(buf, data_ + offset, len);
      return len;
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      *data = data_ + offset;
      return len;
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      data.reset((void *)(data_ + offset));
      return len;
    }

    //! Read data from segment
    bool read(SegmentData *iovec, size_t count) override {
      for (auto *it = iovec, *end = iovec + count; it != end; ++it) {
        ailego_false_if_false(it->offset + it->length <= region_size_);
        it->data = data_ + it->offset;
      }
      return true;
    }

    size_t write(size_t, const void *, size_t) override {
      return IndexError_NotImplemented;
    }

    size_t resize(size_t) override {
      return IndexError_NotImplemented;
    }

    void update_data_crc(uint32_t) override {
      return;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   private:
    const char *data_{nullptr};
    std::function<void()> cleanup_{nullptr};
  };

  //! Destructor
  virtual ~FileReadStorage(void) {}

  //! Initialize container
  int init(const ailego::Params &params) override {
    params.get(FILE_READ_STORAGE_CHECKSUM_VALIDATION, &checksum_validation_);
    params.get(FILE_READ_STORAGE_ENABLE_DIRECT_IO, &enable_direct_io_);
    params.get(FILE_READ_STORAGE_ALONE_FILE_HANDLE, &alone_file_handle_);
    params.get(FILE_READ_STORAGE_MEMORY_LOCKED, &memory_locked_);
    params.get(FILE_READ_STORAGE_MEMORY_WARMUP, &memory_warmup_);
    params.get(FILE_READ_STORAGE_MEMORY_SHARED, &memory_shared_);
    params.get(FILE_READ_STORAGE_HEADER_OFFSET, &header_offset_);
    params.get(FILE_READ_STORAGE_FOOTER_OFFSET, &footer_offset_);
    return 0;
  }

  int flush(void) override {
    return IndexError_NotImplemented;
  }

  int append(const std::string & /*id*/, size_t /*size*/) override {
    return IndexError_NotImplemented;
  }

  void refresh(uint64_t) override {
    return;
  }

  uint64_t check_point(void) const override {
    return 0;
  }

  //! Cleanup container
  int cleanup(void) override {
    return this->close();
  }

  //! Load a index file into container
  int open(const std::string &path, bool) override {
    auto file_ptr = FileReadStorage::OpenFile(path, enable_direct_io_);
    if (!file_ptr) {
      return IndexError_OpenFile;
    }

    index_offset_ =
        (header_offset_ >= 0 ? 0 : file_ptr->size()) + header_offset_;
    size_t end_offset =
        (footer_offset_ > 0 ? 0 : file_ptr->size()) + footer_offset_;
    size_t size = end_offset > index_offset_ ? end_offset - index_offset_ : 0;
    auto read_data = [this, &file_ptr, end_offset](
                         size_t offset, const void **data, size_t len) {
      buffer_.reserve(len);
      *data = buffer_.data();
      size_t off = index_offset_ + offset;
      if (off + len > end_offset) {
        if (off > end_offset) {
          off = end_offset;
        }
        len = end_offset - off;
      }
      return file_ptr->read(off, (void *)*data, len);
    };

    IndexUnpacker unpacker;
    if (!unpacker.unpack(read_data, size, checksum_validation_)) {
      LOG_ERROR("Failed to unpack file: %s", path.c_str());
      return IndexError_UnpackIndex;
    }
    segments_ = std::move(*unpacker.mutable_segments());
    magic_ = unpacker.magic();
    file_path_ = path;
    file_ptr_ = alone_file_handle_ ? nullptr : file_ptr;
    return 0;
  }

  int close(void) override {
    file_ptr_ = nullptr;
    segments_.clear();
    return 0;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id,
                                     int level) override {
    return level == 0 ? this->get_mmap_segment<FileReadStorage>(id)
                      : this->get_segment<FileReadStorage>(id);
  }

  //! Retrieve all segments
  std::map<std::string, IndexStorage::Segment::Pointer> get_all(
      void) const override {
    std::map<std::string, IndexStorage::Segment::Pointer> result;
    auto file_ptr =
        alone_file_handle_ && !file_path_.empty()
            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)
            : file_ptr_;
    if (file_ptr) {
      for (const auto &it : segments_) {
        result.emplace(it.first,
                       std::make_shared<FileReadStorage::Segment>(
                           *(static_cast<const FileReadStorage *>(this)),
                           it.second, file_ptr, index_offset_));
      }
    }
    return result;
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return (segments_.find(id) != segments_.end());
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return magic_;
  }

 protected:
  //! Open a index file
  static inline std::shared_ptr<ailego::File> OpenFile(const std::string &path,
                                                       bool direct_io) {
    auto file_ptr = std::make_shared<ailego::File>();
    if (!file_ptr) {
      LOG_ERROR("Failed to create file object, errno %d, %s", errno,
                std::strerror(errno));
      return nullptr;
    }
    if (!file_ptr->open(path, true, direct_io)) {
      LOG_ERROR("Failed to open file %s, errno %d, %s", path.c_str(), errno,
                std::strerror(errno));
      return nullptr;
    }
    return file_ptr;
  }

  //! Retrieve a segment by id
  template <typename T>
  inline IndexStorage::Segment::Pointer get_segment(
      const std::string &id) const {
    auto it = segments_.find(id);
    if (it == segments_.end()) {
      return IndexStorage::Segment::Pointer();
    }
    auto file_ptr =
        alone_file_handle_ && !file_path_.empty()
            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)
            : file_ptr_;
    if (!file_ptr) {
      return IndexStorage::Segment::Pointer();
    }
    return std::make_shared<typename T::Segment>(
        *(static_cast<const T *>(this)), it->second, file_ptr, index_offset_);
  }

  //! Retrieve a mmap segment by id
  template <typename T>
  inline IndexStorage::Segment::Pointer get_mmap_segment(
      const std::string &id) const {
    auto it = segments_.find(id);
    if (it == segments_.end()) {
      return IndexStorage::Segment::Pointer();
    }
    const auto &segment = it->second;
    auto file_ptr =
        alone_file_handle_ && !file_path_.empty()
            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)
            : file_ptr_;
    if (!file_ptr) {
      return IndexStorage::Segment::Pointer();
    }

    int opt = memory_locked_ ? ailego::File::MMAP_LOCKED : 0;
    opt |= memory_warmup_ ? ailego::File::MMAP_WARMUP : 0;
    opt |= memory_shared_ ? ailego::File::MMAP_SHARED : 0;
    size_t size = segment.data_size() + segment.padding_size();
    size_t segment_offset = index_offset_ + segment.data_offset();
    size_t offset = segment_offset / ailego::MemoryHelper::PageSize() *
                    ailego::MemoryHelper::PageSize();
    size_t bias = segment_offset - offset;

    size += bias;
    void *data = file_ptr->map(offset, size, opt);
    if (data == nullptr) {
      LOG_ERROR("Failed to mmap file: %s, offset: %zu, size: %zu",
                file_path_.c_str(), offset, size);
      return IndexStorage::Segment::Pointer();
    }
    return std::make_shared<typename T::MMapSegment>(
        *(static_cast<const T *>(this)), segment, file_ptr, index_offset_,
        static_cast<char *>(data) + bias,
        [=]() { ailego::File::MemoryUnmap(data, size); });
  }

  //! Retrieve all segments
  template <typename T>
  inline std::map<std::string, IndexStorage::Segment::Pointer> get_all_segments(
      void) const {
    std::map<std::string, IndexStorage::Segment::Pointer> result;
    auto file_ptr =
        alone_file_handle_ && !file_path_.empty()
            ? FileReadStorage::OpenFile(file_path_, enable_direct_io_)
            : file_ptr_;
    if (file_ptr) {
      for (const auto &it : segments_) {
        result.emplace(it.first, std::make_shared<typename T::Segment>(
                                     *(static_cast<const T *>(this)), it.second,
                                     file_ptr, index_offset_));
      }
    }
    return result;
  }

 protected:
  bool checksum_validation_{false};
  bool enable_direct_io_{false};
  bool alone_file_handle_{false};
  bool memory_locked_{false};
  bool memory_warmup_{false};
  bool memory_shared_{false};
  uint32_t magic_{0};
  int64_t header_offset_{0};
  int64_t footer_offset_{0};
  size_t index_offset_{0};
  std::vector<uint8_t> buffer_{};
  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};
  std::shared_ptr<ailego::File> file_ptr_{nullptr};
  std::string file_path_{};
};

INDEX_FACTORY_REGISTER_STORAGE(FileReadStorage);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/memory_dumper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cerrno>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_memory.h>
#include <zvec/core/framework/index_packer.h>

namespace zvec {
namespace core {

/*! Memory Dumper
 */
struct MemoryDumper : public IndexDumper {
 public:
  //! Constructor
  MemoryDumper(void) {}

  //! Destructor
  virtual ~MemoryDumper(void) {}

  //! Initialize dumper
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup dumper
  int cleanup(void) override {
    stab_.clear();
    packer_.reset();
    rope_ = nullptr;
    return 0;
  }

  //! Create a memory block for dumping
  int create(const std::string &path) override {
    rope_ = IndexMemory::Instance()->create(path);
    if (!rope_) {
      LOG_ERROR("Failed to create memory block %s, errno %d, %s", path.c_str(),
                errno, std::strerror(errno));
      return IndexError_CreateFile;
    }
    // Append a memory block
    rope_->append(0);

    auto write_data = [this](const void *buf, size_t size) {
      return (*this->rope_)[0].append(buf, size);
    };
    if (!packer_.setup(write_data)) {
      LOG_ERROR("Failed to setup index package, errno %d, %s", errno,
                std::strerror(errno));
      return IndexError_WriteData;
    }
    return 0;
  }

  //! Close memory block
  int close(void) override {
    auto write_data = [this](const void *buf, size_t size) {
      return (*this->rope_)[0].append(buf, size);
    };

    if (!packer_.finish(write_data, stab_)) {
      LOG_ERROR("Failed to finish packing index package");
      return IndexError_PackIndex;
    }
    stab_.clear();
    packer_.reset();
    rope_ = nullptr;
    return 0;
  }

  //! Append a segment meta into table
  int append(const std::string &id, size_t data_size, size_t padding_size,
             uint32_t crc) override {
    stab_.emplace_back(id, data_size, padding_size, crc);
    return 0;
  }

  //! Append data to the storage
  size_t write(const void *data, size_t len) override {
    return packer_.pack(
        [this](const void *buf, size_t size) {
          return (*this->rope_)[0].append(buf, size);
        },
        data, len);
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return packer_.magic();
  }

 private:
  std::vector<IndexPacker::SegmentMeta> stab_{};
  IndexMemory::Rope::Pointer rope_{};
  IndexPacker packer_{};
};

INDEX_FACTORY_REGISTER_DUMPER(MemoryDumper);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/memory_read_storage.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cerrno>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_memory.h>
#include <zvec/core/framework/index_unpacker.h>
#include "utility_params.h"

namespace zvec {
namespace core {

/*! Memory Storage
 */
class MemoryReadStorage : public IndexStorage {
 public:
  /*! Memory Storage Segment
   */
  class Segment : public IndexStorage::Segment,
                  public std::enable_shared_from_this<Segment> {
   public:
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    Segment(const IndexMemory::Rope::Pointer &rope,
            const IndexUnpacker::SegmentMeta &segment)
        : data_offset_(segment.data_offset()),
          data_size_(segment.data_size()),
          padding_size_(segment.padding_size()),
          region_size_(segment.data_size() + segment.padding_size()),
          data_crc_(segment.data_crc()),
          block_(&(*rope)[0]),
          rope_(rope) {}

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return data_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return data_crc_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return padding_size_;
    }

    size_t capacity(void) const override {
      return region_size_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      return block_->fetch(data_offset_ + offset, buf, len);
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      return block_->read(data_offset_ + offset, data, len);
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      const void *data_ptr = nullptr;
      size_t return_value = block_->read(data_offset_ + offset, &data_ptr, len);
      data.reset((void *)data_ptr);
      return return_value;
    }

    //! Read data from segment
    bool read(SegmentData *iovec, size_t count) override {
      for (auto *end = iovec + count; iovec != end; ++iovec) {
        ailego_false_if_false(iovec->offset + iovec->length <= region_size_);
        block_->read(data_offset_ + iovec->offset, &iovec->data, iovec->length);
      }
      return true;
    }

    size_t write(size_t, const void *, size_t) override {
      return IndexError_NotImplemented;
    }

    size_t resize(size_t) override {
      return IndexError_NotImplemented;
    }

    void update_data_crc(uint32_t) override {
      return;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   private:
    size_t data_offset_{0u};
    size_t data_size_{0u};
    size_t padding_size_{0u};
    size_t region_size_{0u};
    uint32_t data_crc_{0u};
    IndexMemory::Block *block_{nullptr};
    IndexMemory::Rope::Pointer rope_{};
  };

  //! Destructor
  virtual ~MemoryReadStorage(void) {}

  //! Initialize container
  int init(const ailego::Params &params) override {
    params.get(MEMORY_CONTAINER_CHECKSUM_VALIDATION, &checksum_validation_);
    return 0;
  }

  //! Cleanup container
  int flush(void) override {
    return IndexError_NotImplemented;
  }

  int append(const std::string &, size_t) override {
    return IndexError_NotImplemented;
  }

  void refresh(uint64_t) override {
    return;
  }

  uint64_t check_point(void) const override {
    return 0;
  }

  //! Cleanup container
  int cleanup(void) override {
    return this->close();
  }

  //! Load a index file into container
  int open(const std::string &path, bool) override {
    rope_ = IndexMemory::Instance()->open(path);
    if (!rope_) {
      LOG_ERROR("Failed to open memory rope %s", path.c_str());
      return IndexError_NoExist;
    }
    if (rope_->empty()) {
      LOG_ERROR("The memory rope %s is empty.", path.c_str());
      return IndexError_NoExist;
    }

    auto read_data = [this](size_t offset, const void **data, size_t len) {
      return (*this->rope_)[0].read(offset, data, len);
    };

    IndexUnpacker unpacker;
    if (!unpacker.unpack(read_data, (*rope_)[0].size(), checksum_validation_)) {
      LOG_ERROR("Failed to unpack memory block: %s", path.c_str());
      return IndexError_UnpackIndex;
    }
    segments_ = std::move(*unpacker.mutable_segments());
    magic_ = unpacker.magic();
    return 0;
  }

  //! Unload all indexes
  int close(void) override {
    rope_ = nullptr;
    segments_.clear();
    return 0;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id, int) override {
    if (!rope_) {
      return IndexStorage::Segment::Pointer();
    }
    auto it = segments_.find(id);
    if (it == segments_.end()) {
      return IndexStorage::Segment::Pointer();
    }
    return std::make_shared<Segment>(rope_, it->second);
  }

  //! Retrieve all segments
  std::map<std::string, IndexStorage::Segment::Pointer> get_all(
      void) const override {
    std::map<std::string, IndexStorage::Segment::Pointer> result;
    if (rope_) {
      for (const auto &it : segments_) {
        result.emplace(it.first, std::make_shared<Segment>(rope_, it.second));
      }
    }
    return result;
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return (segments_.find(id) != segments_.end());
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return magic_;
  }

 private:
  bool checksum_validation_{false};
  uint32_t magic_{0};
  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};
  IndexMemory::Rope::Pointer rope_{};
};

INDEX_FACTORY_REGISTER_STORAGE(MemoryReadStorage);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/mmap_file_read_storage.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cerrno>
#include <zvec/ailego/io/mmap_file.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_unpacker.h>
#include "utility_params.h"

namespace zvec {
namespace core {

/*! MMap File Storage
 */
class MMapFileReadStorage : public IndexStorage {
 public:
  /*! MMap File Storage Segment
   */
  class Segment : public IndexStorage::Segment,
                  public std::enable_shared_from_this<Segment> {
   public:
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    Segment(const std::shared_ptr<ailego::MMapFile> &file_ptr, size_t offset,
            const IndexUnpacker::SegmentMeta &segment)
        : data_ptr_(reinterpret_cast<uint8_t *>(file_ptr->region()) + offset +
                    segment.data_offset()),
          data_size_(segment.data_size()),
          padding_size_(segment.padding_size()),
          region_size_(segment.data_size() + segment.padding_size()),
          data_crc_(segment.data_crc()),
          file_ptr_(file_ptr) {}

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return data_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return data_crc_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return padding_size_;
    }

    size_t capacity(void) const override {
      return region_size_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      memcpy(buf, data_ptr_ + offset, len);
      return len;
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      *data = data_ptr_ + offset;
      return len;
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > region_size_)) {
        if (offset > region_size_) {
          offset = region_size_;
        }
        len = region_size_ - offset;
      }
      data.reset((void *)(data_ptr_ + offset));
      return len;
    }

    //! Read data from segment
    bool read(SegmentData *iovec, size_t count) override {
      for (auto *end = iovec + count; iovec != end; ++iovec) {
        ailego_false_if_false(iovec->offset + iovec->length <= region_size_);
        iovec->data = data_ptr_ + iovec->offset;
      }
      return true;
    }

    size_t write(size_t, const void *, size_t) override {
      return IndexError_NotImplemented;
    }

    size_t resize(size_t) override {
      return IndexError_NotImplemented;
    }

    void update_data_crc(uint32_t) override {
      return;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   private:
    const uint8_t *data_ptr_{nullptr};
    size_t data_size_{0u};
    size_t padding_size_{0u};
    size_t region_size_{0u};
    uint32_t data_crc_{0u};
    std::shared_ptr<ailego::MMapFile> file_ptr_{nullptr};
  };

  //! Destructor
  virtual ~MMapFileReadStorage(void) {}

  //! Initialize container
  int init(const ailego::Params &params) override {
    params.get(MMAPFILE_READ_STORAGE_MEMORY_LOCKED, &memory_locked_);
    params.get(MMAPFILE_READ_STORAGE_MEMORY_WARMUP, &memory_warmup_);
    params.get(MMAPFILE_READ_STORAGE_MEMORY_SHARED, &memory_shared_);
    params.get(MMAPFILE_READ_STORAGE_CHECKSUM_VALIDATION,
               &checksum_validation_);
    params.get(MMAPFILE_READ_STORAGE_HEADER_OFFSET, &header_offset_);
    params.get(MMAPFILE_READ_STORAGE_FOOTER_OFFSET, &footer_offset_);
    return 0;
  }

  int flush(void) override {
    return 0;
  }

  int append(const std::string &, size_t) override {
    return IndexError_NotImplemented;
  }

  void refresh(uint64_t) override {
    return;
  }

  uint64_t check_point(void) const override {
    return 0;
  }

  //! Cleanup container
  int cleanup(void) override {
    return this->close();
  }

  //! Load a index file into container
  int open(const std::string &path, bool) override {
    file_ptr_ = std::make_shared<ailego::MMapFile>();
    if (!file_ptr_) {
      LOG_ERROR("Failed to create mmap file object, errno %d, %s", errno,
                std::strerror(errno));
      return IndexError_NoMemory;
    }

    if (!file_ptr_->open(path.c_str(), true, memory_shared_)) {
      LOG_ERROR("Failed to open file %s, errno %d, %s", path.c_str(), errno,
                std::strerror(errno));
      return IndexError_OpenFile;
    }

    index_offset_ =
        (header_offset_ >= 0 ? 0 : file_ptr_->size()) + header_offset_;
    size_t end_offset =
        (footer_offset_ > 0 ? 0 : file_ptr_->size()) + footer_offset_;
    size_t size = end_offset > index_offset_ ? end_offset - index_offset_ : 0;
    if (memory_locked_ && !file_ptr_->lock()) {
      LOG_WARN("Failed to lock pages with size %zu, errno %d, %s",
               file_ptr_->size(), errno, std::strerror(errno));
    }
    if (memory_warmup_ && !checksum_validation_) {
      ailego::File::MemoryWarmup(
          static_cast<char *>(file_ptr_->region()) + index_offset_, size);
    }

    auto read_data = [this, end_offset](size_t offset, const void **data,
                                        size_t len) {
      size_t off = offset + index_offset_;
      if (off + len > end_offset) {
        if (off > end_offset) {
          off = end_offset;
        }
        len = end_offset - off;
      }
      *data = (uint8_t *)file_ptr_->region() + off;
      return len;
    };

    IndexUnpacker unpacker;
    if (!unpacker.unpack(read_data, size, checksum_validation_)) {
      LOG_ERROR("Failed to unpack file: %s", path.c_str());
      return IndexError_UnpackIndex;
    }
    segments_ = std::move(*unpacker.mutable_segments());
    magic_ = unpacker.magic();
    return 0;
  }

  int close(void) override {
    file_ptr_ = nullptr;
    segments_.clear();
    return 0;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id, int) override {
    if (!file_ptr_) {
      return IndexStorage::Segment::Pointer();
    }
    auto it = segments_.find(id);
    if (it == segments_.end()) {
      return IndexStorage::Segment::Pointer();
    }
    return std::make_shared<MMapFileReadStorage::Segment>(
        file_ptr_, index_offset_, it->second);
  }

  std::map<std::string, IndexStorage::Segment::Pointer> get_all(
      void) const override {
    std::map<std::string, IndexStorage::Segment::Pointer> result;
    if (file_ptr_) {
      for (const auto &it : segments_) {
        result.emplace(it.first, std::make_shared<MMapFileReadStorage::Segment>(
                                     file_ptr_, index_offset_, it.second));
      }
    }
    return result;
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return (segments_.find(id) != segments_.end());
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return magic_;
  }

 private:
  bool memory_locked_{false};
  bool memory_warmup_{false};
  bool memory_shared_{false};
  bool checksum_validation_{false};
  int64_t header_offset_{0};
  int64_t footer_offset_{0};
  size_t index_offset_{0};
  uint32_t magic_{0};
  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};
  std::shared_ptr<ailego::MMapFile> file_ptr_{nullptr};
};

INDEX_FACTORY_REGISTER_STORAGE(MMapFileReadStorage);

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/utility/mmap_file_storage.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <mutex>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_mapping.h>
#include <zvec/core/framework/index_version.h>
#include "utility_params.h"

namespace zvec {
namespace core {

/*! MMap File Storage
 */
class MMapFileStorage : public IndexStorage {
 public:
  /*! Index Storage Segment
   */
  class Segment : public IndexStorage::Segment,
                  public std::enable_shared_from_this<Segment> {
   public:
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    Segment(MMapFileStorage *owner, IndexMapping::Segment *segment)
        : segment_(segment),
          owner_(owner),
          capacity_(static_cast<size_t>(segment->meta()->data_size +
                                        segment->meta()->padding_size)) {}

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return static_cast<size_t>(segment_->meta()->data_size);
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return segment_->meta()->data_crc;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return static_cast<size_t>(segment_->meta()->padding_size);
    }

    //! Retrieve capacity of segment
    size_t capacity(void) const override {
      return capacity_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      memmove(buf, (const uint8_t *)segment_->data() + offset, len);
      return len;
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      *data = (uint8_t *)segment_->data() + offset;
      return len;
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      if (ailego_unlikely(offset + len > segment_->meta()->data_size)) {
        auto meta = segment_->meta();
        if (offset > meta->data_size) {
          offset = meta->data_size;
        }
        len = meta->data_size - offset;
      }
      data.reset((uint8_t *)segment_->data() + offset);
      return len;
    }

    //! Write data into the storage with offset
    size_t write(size_t offset, const void *data, size_t len) override {
      size_t data_tail = offset + len;
      ailego_zero_if_false(data_tail <= capacity_);
      auto meta = segment_->meta();
      if (data_tail > meta->data_size) {
        meta->data_size = data_tail;
        meta->padding_size = capacity_ - data_tail;
        owner_->set_as_dirty();
      }
      memmove((uint8_t *)segment_->data() + offset, data, len);
      segment_->set_dirty();
      return len;
    }

    //! Resize size of data
    size_t resize(size_t size) override {
      auto meta = segment_->meta();
      if (meta->data_size != size) {
        if (size > capacity_) {
          size = capacity_;
        }
        meta->data_size = size;
        meta->padding_size = capacity_ - size;
        owner_->set_as_dirty();
      }
      return size;
    }

    //! Update crc of data
    void update_data_crc(uint32_t crc) override {
      segment_->meta()->data_crc = crc;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   private:
    IndexMapping::Segment *segment_{};
    MMapFileStorage *owner_{nullptr};
    size_t capacity_{};
  };

  //! Destructor
  virtual ~MMapFileStorage(void) {
    this->cleanup();
  }

  //! Initialize storage
  int init(const ailego::Params &params) override {
    uint32_t val = params.get_as_uint32(MMAPFILE_STORAGE_SEGMENT_META_CAPACITY);
    if (val != 0) {
      segment_meta_capacity_ = val;
    }
    params.get(MMAPFILE_STORAGE_COPY_ON_WRITE, &copy_on_write_);
    params.get(MMAPFILE_STORAGE_FORCE_FLUSH, &force_flush_);
    params.get(MMAPFILE_STORAGE_MEMORY_LOCKED, &memory_locked_);
    params.get(MMAPFILE_STORAGE_MEMORY_WARMUP, &memory_warmup_);
    return 0;
  }

  //! Cleanup storage
  int cleanup(void) override {
    this->close_index();
    return 0;
  }

  //! Open storage
  int open(const std::string &path, bool create) override {
    if (!ailego::File::IsExist(path) && create) {
      size_t last_slash = path.rfind('/');
      if (last_slash != std::string::npos) {
        ailego::File::MakePath(path.substr(0, last_slash));
      }

      int error_code = this->init_index(path);
      if (error_code != 0) {
        return error_code;
      }
    }
    return mapping_.open(path, copy_on_write_, force_flush_);
  }

  //! Flush storage
  int flush(void) override {
    return this->flush_index();
  }

  //! Close storage
  int close(void) override {
    this->close_index();
    return 0;
  }

  //! Append a segment into storage
  int append(const std::string &id, size_t size) override {
    return this->append_segment(id, size);
  }

  //! Refresh meta information (checksum, update time, etc.)
  void refresh(uint64_t chkp) override {
    this->refresh_index(chkp);
  }

  //! Retrieve check point of storage
  uint64_t check_point(void) const override {
    return mapping_.footer().check_point;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id, int) override {
    IndexMapping::Segment *segment = this->get_segment(id);
    if (!segment) {
      return MMapFileStorage::Segment::Pointer();
    }
    return std::make_shared<MMapFileStorage::Segment>(this, segment);
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return this->has_segment(id);
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return mapping_.magic();
  }

 protected:
  //! Initialize index version segment
  int init_version_segment(void) {
    size_t data_size = std::strlen(IndexVersion::Details());
    int error_code =
        this->append_segment(INDEX_VERSION_SEGMENT_NAME, data_size);
    if (error_code != 0) {
      return error_code;
    }

    IndexMapping::Segment *segment = get_segment(INDEX_VERSION_SEGMENT_NAME);
    if (!segment) {
      return IndexError_MMapFile;
    }
    auto meta = segment->meta();
    size_t capacity = static_cast<size_t>(meta->padding_size + meta->data_size);
    memcpy(segment->data(), IndexVersion::Details(), data_size);
    segment->set_dirty();
    meta->data_crc = ailego::Crc32c::Hash(segment->data(), data_size, 0);
    meta->data_size = data_size;
    meta->padding_size = capacity - data_size;
    return 0;
  }

  //! Initialize index file
  int init_index(const std::string &path) {
    int error_code = mapping_.create(path, segment_meta_capacity_);
    if (error_code != 0) {
      return error_code;
    }

    // Add index version
    error_code = this->init_version_segment();
    if (error_code != 0) {
      return error_code;
    }

    // Refresh mapping
    this->refresh_index(0);

    // Close mapping
    mapping_.close();
    return 0;
  }

  bool isHugePage(void) const override {
    return mapping_.huge_page();
  }

  //! Set the index file as dirty
  void set_as_dirty(void) {
    index_dirty_ = true;
  }

  //! Refresh meta information (checksum, update time, etc.)
  void refresh_index(uint64_t chkp) {
    mapping_.refresh(chkp);
    index_dirty_ = false;
  }

  //! Flush index storage
  int flush_index(void) {
    if (index_dirty_) {
      this->refresh_index(0);
    }
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    return mapping_.flush();
  }

  //! Close index storage
  void close_index(void) {
    if (index_dirty_) {
      this->refresh_index(0);
    }
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    mapping_.close();
  }

  //! Append a segment into storage
  int append_segment(const std::string &id, size_t size) {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    return mapping_.append(id, size);
  }

  //! Test if a segment exists
  bool has_segment(const std::string &id) const {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    return mapping_.has(id);
  }

  //! Get a segment from storage
  IndexMapping::Segment *get_segment(const std::string &id) {
    std::lock_guard<std::mutex> latch(mapping_mutex_);
    return mapping_.map(id, memory_warmup_, memory_locked_);
  }

 private:
  uint32_t segment_meta_capacity_{1024 * 1024};
  bool copy_on_write_{false};
  bool force_flush_{false};
  bool memory_locked_{false};
  bool memory_warmup_{false};
  bool index_dirty_{false};
  mutable IndexMapping mapping_{};
  mutable std::mutex mapping_mutex_{};
};

INDEX_FACTORY_REGISTER_STORAGE(MMapFileStorage);

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/sparse_utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <algorithm>
#include <cmath>
#include <iostream>
#include <numeric>
#include <vector>
#include <zvec/core/framework/index_document.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_meta.h>

namespace zvec {
namespace core {

constexpr uint32_t SEGMENT_ID_BITS = 16;
constexpr uint32_t SEGMENT_ID_MASK = 0xFFFF;

struct SparseSegmentInfo {
 public:
  uint32_t seg_id_{-1U};
  uint32_t vec_cnt_{0};

 public:
  SparseSegmentInfo() : seg_id_{-1U}, vec_cnt_{0} {}

  SparseSegmentInfo(uint32_t seg_id, uint32_t vec_cnt)
      : seg_id_{seg_id}, vec_cnt_{vec_cnt} {}
};

struct VectorItem {
  key_t pkey_{0};
  std::vector<uint8_t> vec_{};
  // TODO: drop support for hybrid vectors
  std::string sparse_buffer_{};
  uint32_t sparse_unit_size_{0};

  VectorItem() {}
  VectorItem(key_t pkey, std::vector<uint8_t> vec)
      : pkey_(pkey), vec_(std::move(vec)) {}
  // TODO: drop support for hybrid vectors
  VectorItem(key_t pkey, std::vector<uint8_t> vec, std::string sparse_buffer,
             uint32_t sparse_unit_size)
      : pkey_(pkey),
        vec_(std::move(vec)),
        sparse_buffer_(std::move(sparse_buffer)),
        sparse_unit_size_{sparse_unit_size} {}
};

struct SparseVectorItem {
  key_t pkey_{0};
  std::vector<uint32_t> sparse_indices_{};
  std::string sparse_values_{};

  SparseVectorItem() {}
  SparseVectorItem(key_t pkey, std::vector<uint32_t> sparse_indices,
                   std::string sparse_values)
      : pkey_(pkey),
        sparse_indices_(std::move(sparse_indices)),
        sparse_values_(std::move(sparse_values)) {}
};

class SparseUtility {
 public:
  //! Check the arr is an arithmetic sequence,
  //! For example: 1,3,5,7,9,11...
  template <typename T>
  static bool IsArithmeticSequence(T *arr, size_t size) {
    static_assert(std::is_integral<T>::value, "Integral required");
    if (size <= 2) return true;

    T step = arr[1] - arr[0];
    for (size_t i = 2; i < size; ++i) {
      if (arr[i] - arr[i - 1] != step) {
        return false;
      }
    }
    return true;
  }

  //! Sort arr with size in ascending order, and keep the index postion
  //! o2n keep the mapping: origin position => new postion
  //! n2o keep the mapping: new position => origin postion
  //! For example, the input arr = [5, 3, 9, 6, 7], size = 5, after sort
  //      arr = [3, 5, 6, 7, 9]
  //      o2n = [1, 0, 4, 2, 3]
  //      n2o = [1, 0, 3, 4, 2]
  //! To save memory, no extra memory is allocated
  //! return false, if the arr is in order and do not need sorting
  template <typename T, typename I>
  static bool Sort(T *arr, std::vector<I> *o2n, std::vector<I> *n2o,
                   size_t size) {
    {  //! checking the arr is already in ascending order
      size_t i = 1;
      for (; i < size; ++i) {
        if (arr[i - 1] > arr[i]) {
          break;
        }
      }
      if (i >= size) {
        return false;
      }
    }
    o2n->resize(size);
    n2o->resize(size);

    std::iota(n2o->begin(), n2o->end(), 0U);
    std::sort(n2o->begin(), n2o->end(),
              [&](I i, I j) { return arr[i] < arr[j]; });
    for (I i = 0U; i < size; ++i) {
      (*o2n)[(*n2o)[i]] = i;
    }
    //! reorder arr in place, according to given n2o index
    for (I i = 0; i < size; ++i) {
      if (i != (*n2o)[i]) {
        T tmp = arr[i];
        I j = i, k;
        while (i != (k = (*n2o)[j])) {
          arr[j] = arr[k];
          (*n2o)[j] = j;
          j = k;
        }
        arr[j] = tmp;
        (*n2o)[j] = j;
      }
    }

    for (I i = 0U; i < size; ++i) {
      (*n2o)[(*o2n)[i]] = i;
    }

    return true;
  }

  static inline bool filter_sparse_query_fp16(
      const uint32_t sparse_count, const uint32_t *sparse_indices,
      const ailego::Float16 *sparse_query, uint32_t &new_sparse_count,
      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,
      float filtering_budget) {
    ailego::Float16 max_sparse_dim_value{0.0f};

    for (size_t i = 0; i < sparse_count; ++i) {
      if (ailego::Float16::Absolute(sparse_query[i]) > max_sparse_dim_value) {
        max_sparse_dim_value = ailego::Float16::Absolute(sparse_query[i]);
      }
    }

    ailego::Float16 threshold{max_sparse_dim_value};
    threshold *= filtering_budget;

    size_t unit_size = IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP16);

    new_sparse_count = 0;

    std::vector<ailego::Float16> temp_sparse_query;
    for (size_t i = 0; i < sparse_count; i++) {
      if (ailego::Float16::Absolute(sparse_query[i]) > threshold) {
        new_sparse_indices.push_back(sparse_indices[i]);
        temp_sparse_query.push_back(sparse_query[i]);

        new_sparse_count++;
      }
    }

    size_t buffer_size = new_sparse_count * unit_size;
    new_sparse_query.reserve(buffer_size);
    new_sparse_query.append(
        reinterpret_cast<const char *>(temp_sparse_query.data()), buffer_size);

    return true;
  }

  static inline bool filter_sparse_query_fp32(
      const uint32_t sparse_count, const uint32_t *sparse_indices,
      const float *sparse_query, uint32_t &new_sparse_count,
      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,
      float filtering_budget) {
    float max_sparse_dim_value{0.0f};

    for (size_t i = 0; i < sparse_count; ++i) {
      if (std::fabs(sparse_query[i]) > max_sparse_dim_value) {
        max_sparse_dim_value = std::fabs(sparse_query[i]);
      }
    }

    float threshold = max_sparse_dim_value * filtering_budget;

    size_t unit_size = IndexMeta::UnitSizeof(IndexMeta::DataType::DT_FP32);

    new_sparse_count = 0;

    std::vector<float> temp_sparse_query;
    for (size_t i = 0; i < sparse_count; i++) {
      if (std::fabs(sparse_query[i]) > threshold) {
        new_sparse_indices.push_back(sparse_indices[i]);
        temp_sparse_query.push_back(sparse_query[i]);

        new_sparse_count++;
      }
    }

    size_t buffer_size = new_sparse_count * unit_size;
    new_sparse_query.reserve(buffer_size);
    new_sparse_query.append(
        reinterpret_cast<const char *>(temp_sparse_query.data()), buffer_size);

    return true;
  }

  static inline bool filter_sparse_query_impl(
      const uint32_t sparse_count, const uint32_t *sparse_indices,
      const void *sparse_query, uint32_t &new_sparse_count,
      std::vector<uint32_t> &new_sparse_indices, std::string &new_sparse_query,
      float filtering_budget, IndexMeta::DataType type) {
    switch (type) {
      case IndexMeta::DataType::DT_FP32:
        return filter_sparse_query_fp32(
            sparse_count, sparse_indices,
            reinterpret_cast<const float *>(sparse_query), new_sparse_count,
            new_sparse_indices, new_sparse_query, filtering_budget);
      case IndexMeta::DataType::DT_FP16:
        return filter_sparse_query_fp16(
            sparse_count, sparse_indices,
            reinterpret_cast<const ailego::Float16 *>(sparse_query),
            new_sparse_count, new_sparse_indices, new_sparse_query,
            filtering_budget);
      default:
        LOG_ERROR("Data type not supported");
        return false;
    }

    return false;
  }

  static int FilterSparseQuery(uint32_t sparse_count,
                               const uint32_t *sparse_index,
                               const void *sparse_value,
                               IndexMeta::DataType type, uint32_t unit_size,
                               float filtering_ratio,
                               std::string *filtered_buffer) {
    uint32_t new_sparse_count;
    std::vector<uint32_t> new_sparse_indices;
    std::string new_sparse_query;

    bool ret = filter_sparse_query_impl(
        sparse_count, sparse_index, sparse_value, new_sparse_count,
        new_sparse_indices, new_sparse_query, filtering_ratio, type);
    if (!ret) {
      LOG_ERROR("sparse query filter failed");
      return false;
    }

    SparseUtility::TransSparseFormat(
        new_sparse_count, new_sparse_indices.data(), new_sparse_query.data(),
        unit_size, *filtered_buffer);

    return true;
  }

  static void TransSparseFormat(uint32_t sparse_count,
                                const uint32_t *sparse_index,
                                const void *sparse_value, uint32_t unit_size,
                                std::string &buffer) {
    uint32_t seg_count = 0;
    if (sparse_count == 0) {
      buffer.reserve(sizeof(uint32_t) + sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&sparse_count),
                    sizeof(uint32_t));

      buffer.append(reinterpret_cast<const char *>(&seg_count),
                    sizeof(uint32_t));

      return;
    }

    std::vector<SparseSegmentInfo> seg_infos;

    uint32_t cur_seg_id = -1U;
    uint32_t cur_vec_cnt = 0;

    for (size_t i = 0; i < sparse_count; ++i) {
      uint32_t seg_id = sparse_index[i] >> SEGMENT_ID_BITS;
      if (cur_seg_id == -1U) {
        cur_seg_id = seg_id;
        cur_vec_cnt++;
      } else {
        if (seg_id == cur_seg_id) {
          cur_vec_cnt++;
        } else if (seg_id > cur_seg_id) {
          seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);

          cur_seg_id = seg_id;
          cur_vec_cnt = 1;
        } else {
          // std::abort();
        }
      }
    }

    if (cur_vec_cnt > 0) {
      seg_infos.emplace_back(cur_seg_id, cur_vec_cnt);
    }

    uint32_t buffer_len = 2 * sizeof(uint32_t) +
                          seg_infos.size() * 2 * sizeof(uint32_t) +
                          sparse_count * (sizeof(uint16_t) + sizeof(float));

    buffer.reserve(buffer_len);

    buffer.append(reinterpret_cast<const char *>(&sparse_count),
                  sizeof(uint32_t));

    seg_count = seg_infos.size();
    buffer.append(reinterpret_cast<const char *>(&seg_count), sizeof(uint32_t));

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t seg_id = seg_infos[i].seg_id_;
      buffer.append(reinterpret_cast<const char *>(&seg_id), sizeof(uint32_t));
    }

    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t vec_cnt = seg_infos[i].vec_cnt_;
      buffer.append(reinterpret_cast<const char *>(&vec_cnt), sizeof(uint32_t));
    }

    for (size_t i = 0; i < sparse_count; ++i) {
      uint16_t temp_dim = sparse_index[i] & SEGMENT_ID_MASK;
      buffer.append(reinterpret_cast<const char *>(&temp_dim),
                    sizeof(uint16_t));
    }

    const char *sparse_value_ptr = reinterpret_cast<const char *>(sparse_value);
    for (size_t i = 0; i < sparse_count; ++i) {
      buffer.append(sparse_value_ptr, unit_size);
      sparse_value_ptr += unit_size;
    }
  }

  static void ReverseSparseFormat(const void *buffer, uint32_t *sparse_count,
                                  std::string *sparse_indices_buffer,
                                  std::string *sparse_values_buffer,
                                  uint32_t unit_size) {
    const uint8_t *buffer_data = reinterpret_cast<const uint8_t *>(buffer);

    *sparse_count = *reinterpret_cast<const uint32_t *>(buffer_data);

    if (*sparse_count == 0) return;

    uint32_t sparse_count_value = *sparse_count;

    sparse_indices_buffer->reserve(sparse_count_value * sizeof(uint32_t));
    sparse_values_buffer->reserve(sparse_count_value * unit_size);

    const uint32_t seg_count =
        *reinterpret_cast<const uint32_t *>(buffer_data + sizeof(uint32_t));
    const uint32_t *seg_id =
        reinterpret_cast<const uint32_t *>(buffer_data + 2 * sizeof(uint32_t));
    const uint32_t *seg_vec_cnt = reinterpret_cast<const uint32_t *>(
        buffer_data + 2 * sizeof(uint32_t) + seg_count * sizeof(uint32_t));
    const uint16_t *sparse_indices = reinterpret_cast<const uint16_t *>(
        buffer_data + 2 * sizeof(uint32_t) + seg_count * 2 * sizeof(uint32_t));
    const char *sparse_value = reinterpret_cast<const char *>(
        buffer_data + 2 * sizeof(uint32_t) + seg_count * 2 * sizeof(uint32_t) +
        sparse_count_value * sizeof(uint16_t));

    uint32_t cnt = 0;
    for (size_t i = 0; i < seg_count; ++i) {
      uint32_t cur_seg_id = *(seg_id + i);
      uint32_t cur_seg_vec_cnt = *(seg_vec_cnt + i);

      for (size_t j = 0; j < cur_seg_vec_cnt; ++j) {
        uint32_t cur_sparse_index = *(sparse_indices + cnt);

        cur_sparse_index = cur_sparse_index + (cur_seg_id << SEGMENT_ID_BITS);
        sparse_indices_buffer->append(
            reinterpret_cast<const char *>(&cur_sparse_index),
            sizeof(uint32_t));

        cnt++;
      }
    }

    sparse_values_buffer->append(sparse_value, unit_size * sparse_count_value);
  }

  static void ReverseSparseFormat(const std::string &buffer,
                                  uint32_t *sparse_count,
                                  std::string *sparse_indices_buffer,
                                  std::string *sparse_values_buffer,
                                  uint32_t unit_size) {
    return ReverseSparseFormat(buffer.data(), sparse_count,
                               sparse_indices_buffer, sparse_values_buffer,
                               unit_size);
  }

  static void ReverseSparseFormat(const void *buffer,
                                  IndexSparseDocument &sparse_doc,
                                  uint32_t unit_size) {
    return ReverseSparseFormat(buffer, sparse_doc.mutable_sparse_count(),
                               sparse_doc.mutable_sparse_indices(),
                               sparse_doc.mutable_sparse_values(), unit_size);
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/core/utility/utility_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <string>

namespace zvec {
namespace core {

static const std::string INDEX_META_SEGMENT_NAME = "IndexMeta";
static const std::string INDEX_VERSION_SEGMENT_NAME = "IndexVersion";

//! FileLogger
static const std::string FILE_LOGGER_PATH = "proxima.file.logger.path";

//! FileContainer
static const std::string FILE_READ_STORAGE_CHECKSUM_VALIDATION =
    "proxima.file.read_storage.checksum_validation";
static const std::string FILE_READ_STORAGE_ENABLE_DIRECT_IO =
    "proxima.file.read_storage.enable_direct_io";
static const std::string FILE_READ_STORAGE_ALONE_FILE_HANDLE =
    "proxima.file.read_storage.alone_file_handle";
static const std::string FILE_READ_STORAGE_MEMORY_LOCKED =
    "proxima.file.read_storage.memory_locked";
static const std::string FILE_READ_STORAGE_MEMORY_WARMUP =
    "proxima.file.read_storage.memory_warmup";
static const std::string FILE_READ_STORAGE_MEMORY_SHARED =
    "proxima.file.read_storage.memory_shared";
static const std::string FILE_READ_STORAGE_HEADER_OFFSET =
    "proxima.file.read_storage.header_offset";
static const std::string FILE_READ_STORAGE_FOOTER_OFFSET =
    "proxima.file.read_storage.footer_offset";

//! MemoryContainer
static const std::string MEMORY_CONTAINER_CHECKSUM_VALIDATION =
    "proxima.memory.container.checksum_validation";

//! MMapFileContainer
static const std::string MMAPFILE_READ_STORAGE_MEMORY_LOCKED =
    "proxima.mmap_file.container.memory_locked";
static const std::string MMAPFILE_READ_STORAGE_MEMORY_WARMUP =
    "proxima.mmap_file.container.memory_warmup";
static const std::string MMAPFILE_READ_STORAGE_MEMORY_SHARED =
    "proxima.mmap_file.container.memory_shared";
static const std::string MMAPFILE_READ_STORAGE_CHECKSUM_VALIDATION =
    "proxima.mmap_file.container.checksum_validation";
static const std::string MMAPFILE_READ_STORAGE_HEADER_OFFSET =
    "proxima.mmap_file.container.header_offset";
static const std::string MMAPFILE_READ_STORAGE_FOOTER_OFFSET =
    "proxima.mmap_file.container.footer_offset";

//! MMapFileStorage
static const std::string MMAPFILE_STORAGE_MEMORY_LOCKED =
    "proxima.mmap_file.storage.memory_locked";
static const std::string MMAPFILE_STORAGE_MEMORY_WARMUP =
    "proxima.mmap_file.storage.memory_warmup";
static const std::string MMAPFILE_STORAGE_COPY_ON_WRITE =
    "proxima.mmap_file.storage.copy_on_write";
static const std::string MMAPFILE_STORAGE_FORCE_FLUSH =
    "proxima.mmap_file.storage.force_flush";
static const std::string MMAPFILE_STORAGE_SEGMENT_META_CAPACITY =
    "proxima.mmap_file.storage.segment_meta_capacity";

//! BufferStorage
static const std::string BUFFER_STORAGE_MEMORY_SIZE =
    "proxima.buffer.storage.memory_size";

//! MipsConverter
static const std::string MIPS_CONVERTER_M_VALUE =
    "proxima.mips.converter.m_value";
static const std::string MIPS_CONVERTER_U_VALUE =
    "proxima.mips.converter.u_value";
static const std::string MIPS_CONVERTER_L2_NORM =
    "proxima.mips.converter.l2_norm";
static const std::string MIPS_CONVERTER_FORCED_HALF_FLOAT =
    "proxima.mips.converter.forced_half_float";
static const std::string MIPS_CONVERTER_SPHERICAL_INJECTION =
    "proxima.mips.converter.spherical_injection";

//! MipsReverseConverter
static const std::string MIPS_REVERSE_CONVERTER_M_VALUE =
    "proxima.mips_reverse.converter.m_value";
static const std::string MIPS_REVERSE_CONVERTER_U_VALUE =
    "proxima.mips_reverse.converter.u_value";
static const std::string MIPS_REVERSE_CONVERTER_L2_NORM =
    "proxima.mips_reverse.converter.l2_norm";
static const std::string MIPS_REVERSE_CONVERTER_FORCED_SINGLE_FLOAT =
    "proxima.mips_reverse.converter.forced_single_float";
static const std::string MIPS_REVERSE_CONVERTER_SPHERICAL_INJECTION =
    "proxima.mips_reverse.converter.spherical_injection";

//! MipsReformer
static const std::string MIPS_REFORMER_M_VALUE =
    "proxima.mips.reformer.m_value";
static const std::string MIPS_REFORMER_U_VALUE =
    "proxima.mips.reformer.u_value";
static const std::string MIPS_REFORMER_L2_NORM =
    "proxima.mips.reformer.l2_norm";
static const std::string MIPS_REFORMER_NORMALIZE =
    "proxima.mips.reformer.normalize";
static const std::string MIPS_REFORMER_FORCED_HALF_FLOAT =
    "proxima.mips.reformer.forced_half_float";
static const std::string MIPS_REFORMER_SPHERICAL_INJECTION =
    "proxima.mips.reformer.spherical_injection";

//! MipsEuclideanMeasure
static const std::string MIPS_EUCLIDEAN_METRIC_M_VALUE =
    "proxima.mips_euclidean.metric.m_value";
static const std::string MIPS_EUCLIDEAN_METRIC_U_VALUE =
    "proxima.mips_euclidean.metric.u_value";
static const std::string MIPS_EUCLIDEAN_METRIC_MAX_L2_NORM =
    "proxima.mips_euclidean.metric.max_l2_norm";
static const std::string MIPS_EUCLIDEAN_METRIC_INJECTION_TYPE =
    "proxima.mips_euclidean.metric.injection_type";

//! NormalizeConverter
static const std::string NORMALIZE_CONVERTER_FORCED_HALF_FLOAT =
    "proxima.normalize.converter.forced_half_float";
static const std::string NORMALIZE_CONVERTER_P_VALUE =
    "proxima.normalize.converter.p_value";

//! NormalizeReformer
static const std::string NORMALIZE_REFORMER_FORCED_HALF_FLOAT =
    "proxima.normalize.reformer.forced_half_float";
static const std::string NORMALIZE_REFORMER_P_VALUE =
    "proxima.normalize.reformer.p_value";

//! Int8Converter
static const std::string INT8_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =
    "proxima.int8_quantizer.converter.histogram_bins_count";
static const std::string INT8_QUANTIZER_CONVERTER_DISABLE_BIAS =
    "proxima.int8_quantizer.converter.disable_bias";
static const std::string INT8_QUANTIZER_CONVERTER_BIAS =
    "proxima.int8_quantizer.converter.bias";
static const std::string INT8_QUANTIZER_CONVERTER_SCALE =
    "proxima.int8_quantizer.converter.scale";

//! Int4Converter
static const std::string INT4_QUANTIZER_CONVERTER_HISTOGRAM_BINS_COUNT =
    "proxima.int4_quantizer.converter.histogram_bins_count";
static const std::string INT4_QUANTIZER_CONVERTER_DISABLE_BIAS =
    "proxima.int4_quantizer.converter.disable_bias";
static const std::string INT4_QUANTIZER_CONVERTER_BIAS =
    "proxima.int4_quantizer.converter.bias";
static const std::string INT4_QUANTIZER_CONVERTER_SCALE =
    "proxima.int4_quantizer.converter.scale";

//! Int8Reformer
static const std::string INT8_QUANTIZER_REFORMER_BIAS =
    "proxima.int8_quantizer.reformer.bias";
static const std::string INT8_QUANTIZER_REFORMER_SCALE =
    "proxima.int8_quantizer.reformer.scale";
static const std::string INT8_QUANTIZER_REFORMER_METRIC =
    "proxima.int8_quantizer.reformer.metric";

//! Int4Reformer
static const std::string INT4_QUANTIZER_REFORMER_BIAS =
    "proxima.int4_quantizer.reformer.bias";
static const std::string INT4_QUANTIZER_REFORMER_SCALE =
    "proxima.int4_quantizer.reformer.scale";
static const std::string INT4_QUANTIZER_REFORMER_METRIC =
    "proxima.int4_quantizer.reformer.metric";

//! CosineConverter
static const std::string COSINE_CONVERTER_FORCED_HALF_FLOAT =
    "proxima.cosine.converter.forced_half_float";

//! CosineReformer
static const std::string COSINE_REFORMER_FORCED_HALF_FLOAT =
    "proxima.cosine.reformer.forced_half_float";

//! QuantizedInteger Metric
static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_NAME =
    "proxima.quantized_integer.metric.origin_metric_name";
static const std::string QUANTIZED_INTEGER_METRIC_ORIGIN_METRIC_PARAMS =
    "proxima.quantized_integer.metric.origin_metric_params";

//! IntegerStreamingConverter
static const std::string INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE =
    "proxima.integer_streaming.converter.enable_normalize";

//! IntegerStreamingConverter
static const std::string INTEGER_STREAMING_REFORMER_ENABLE_NORMALIZE =
    "proxima.integer_streaming.reformer.enable_normalize";

//! DoubleBitConverter
static const std::string DOUBLE_BIT_CONVERTER_TRAIN_SAMPLE_COUNT =
    "proxima.double_bit.converter.train_sample_count";
static const std::string DOUBLE_BIT_CONVERTER_A_VALUE =
    "proxima.double_bit.converter.a_value";
static const std::string DOUBLE_BIT_CONVERTER_B_VALUE =
    "proxima.double_bit.converter.b_value";

//! DoubleBitReformer
static const std::string DOUBLE_BIT_REFORMER_A_VALUE =
    "proxima.double_bit.reformer.a_value";
static const std::string DOUBLE_BIT_REFORMER_B_VALUE =
    "proxima.double_bit.reformer.b_value";

//! SimpleForward
static const std::string SIMPLE_FORWARD_DATA_BLOCK_SIZE =
    "proxima.simple.forward.data_block_size";
static const std::string SIMPLE_FORWARD_INDEX_BLOCK_SIZE =
    "proxima.simple.forward.index_block_size";

//! SimpleForward
static const std::string SIMPLE_CLOSET_DATA_BLOCK_SIZE =
    "proxima.simple.closet.data_block_size";
static const std::string SIMPLE_CLOSET_INDEX_BLOCK_SIZE =
    "proxima.simple.closet.index_block_size";

//! ChainCloset
static const std::string CHAIN_CLOSET_SLOT_SIZE =
    "proxima.chain.closet.slot_size";
static const std::string CHAIN_CLOSET_INDEX_BLOCK_SIZE =
    "proxima.chain.closet.index_block_size";
static const std::string CHAIN_CLOSET_DATA_BLOCK_SIZE =
    "proxima.chain.closet.data_block_size";

//! IndexForward
static const std::string PARAM_FORWARD_MULTI_VALUE =
    "proxima.param.forward.multi_value";
static const std::string PARAM_FORWARD_MULTI_COUNT =
    "proxima.param.forward.multi_count";

}  // namespace core
}  // namespace zvec

================================================
FILE: src/core/utility/visit_filter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <algorithm>
#include <chrono>
#include <cstdint>
#include <limits>
#include <random>
#include <tuple>
#include <vector>
#include <ailego/container/bloom_filter.h>
#include <ailego/utility/bitset_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_logger.h>

namespace zvec {
namespace core {

struct VisitFilterHeader {
  VisitFilterHeader() : maxDocCnt(0), maxScanNum(0) {}
  uint64_t maxDocCnt;
  uint64_t maxScanNum;
};

constexpr int PROXIMA_HNSW_VISITFILTER_CUSTOM_PARAMS_INDEX_NEGPROB = 0;

class VisitBloomFilter {
 public:
  static constexpr int mode = 1;

  static constexpr int N = 5;
  struct Context {
    Context()
        : mt(std::chrono::system_clock::now().time_since_epoch().count()) {};
    VisitFilterHeader h;
    std::mt19937 mt;
    ailego::BloomFilter<N> *filter{nullptr};
    int offset[N] = {0};
  };
#define BLOOM_FILTER_HASH_BITS_OFFSETS(i)                                 \
  i + c->offset[0], i + c->offset[1], i + c->offset[2], i + c->offset[3], \
      i + c->offset[4]

  VisitBloomFilter() = delete;

  inline static void set_visited(Context *c, id_t idx) {
    c->filter->force_insert(BLOOM_FILTER_HASH_BITS_OFFSETS(idx));
    return;
  }

  inline static void *get_visited(Context *, id_t) {
    // TODO
    return nullptr;
  }

  inline static bool visited(Context *c, id_t idx) {
    return c->filter->has(BLOOM_FILTER_HASH_BITS_OFFSETS(idx));
  }

  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {
    if (maxScanNum == c->h.maxScanNum) {
      return 0;
    }
    c->h.maxScanNum = maxScanNum;
    if (c->filter->reset(maxScanNum, c->filter->probability()) != 0) {
      LOG_ERROR("reset BloomFilter failed");
      return IndexError_Runtime;
    }
    genRandomHashBits(c);
    return 0;
  }

  inline static void clear(Context *c) {
    c->filter->clear();
    return;
  }

  inline static bool reset(Context *c, uint64_t maxDocCnt,
                           uint64_t max_scan_num) {
    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||
                        max_scan_num > c->h.maxScanNum)) {
      // Create a new one, if failed, we can reuse the old one
      auto filter = new (std::nothrow) ailego::BloomFilter<VisitBloomFilter::N>(
          max_scan_num, c->filter->probability());
      if (ailego_unlikely(filter == nullptr)) {
        LOG_ERROR("reset bloomfilter failed, maxScanNum %zu prob %f",
                  (size_t)max_scan_num, c->filter->probability());
        c->filter->clear();
        return false;
      }

      delete c->filter;
      c->filter = filter;
      c->h.maxScanNum = max_scan_num;
      c->h.maxDocCnt = maxDocCnt;
      genRandomHashBits(c);
    }
    return true;
  }

  inline static void genRandomHashBits(Context *c) {
    std::uniform_int_distribution<int> dt(0, c->h.maxDocCnt);
    for (size_t i = 0; i < sizeof(c->offset) / sizeof(c->offset[0]); ++i) {
      int r = dt(c->mt);
      size_t j = 0;
      do {  // gen distinct number
        for (j = 0; j < i; ++j) {
          if (c->offset[j] == r) {
            r = dt(c->mt);
            break;
          }
        }
      } while (j < i);
      c->offset[i] = r;
    }
    std::sort(c->offset, c->offset + N);
  }

  template <class... T>
  static int init(Context *, void **ctx, uint64_t maxDocCnt,
                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {
    Context *c = new (std::nothrow) Context;
    if (c == nullptr) {
      LOG_ERROR("New memory in initVisitBitMap failed");
      return IndexError_NoMemory;
    }
    c->h.maxDocCnt = maxDocCnt;
    c->h.maxScanNum = maxScanNum;
    float p =
        std::get<PROXIMA_HNSW_VISITFILTER_CUSTOM_PARAMS_INDEX_NEGPROB>(tpl);
    c->filter = new (std::nothrow)
        ailego::BloomFilter<VisitBloomFilter::N>(maxScanNum, p);
    if (c->filter == nullptr) {
      LOG_ERROR("New BloomFilter failed, reuse old one");
      return IndexError_NoMemory;
    }
    genRandomHashBits(c);
    *ctx = c;
    return 0;
  }

  inline static void destroy(Context *c) {
    delete c->filter;
    delete c;
  }
#undef BLOOM_FILTER_HASH_BITS_OFFSETS
};  // end of VisitBloomFilter

class VisitBitMap {
 public:
  static constexpr int mode = 2;

  struct Context {
    VisitFilterHeader h;
    ailego::BitsetHelper bitset;
    char *buf{nullptr};
  };

  VisitBitMap() = delete;

  inline static void set_visited(Context *c, id_t idx) {
    c->bitset.set(idx);
    return;
  }

  inline static void *get_visited(Context *c, id_t idx) {
    return &c->buf[idx >> 3];
  }

  inline static bool visited(Context *c, id_t idx) {
    return c->bitset.test(idx);
  }

  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {
    c->h.maxScanNum = maxScanNum;
    return 0;
  }

  inline static void clear(Context *c) {
    c->bitset.clear();
    return;
  }

  inline static bool reset(Context *c, uint64_t maxDocCnt,
                           uint64_t maxScanNum) {
    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||
                        maxScanNum > c->h.maxScanNum)) {
      uint64_t len = ((maxDocCnt + 31) >> 5) << 2;  // round to uint32_t
      auto buf = new (std::nothrow) char[len];
      if (buf == nullptr) {
        LOG_ERROR("New memory in initVisitBitMap failed");
        c->bitset.clear();
        return false;
      }

      c->h.maxDocCnt = maxDocCnt;
      c->h.maxScanNum = maxScanNum;
      delete[] c->buf;
      c->buf = buf;
      memset(c->buf, 0, len);
      c->bitset.mount(c->buf, len);
    }
    return true;
  }

  template <class... T>
  static int init(Context *, void **ctx, uint64_t maxDocCnt,
                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {
    (void)tpl;  // unused warning
    Context *c = new (std::nothrow) Context;
    if (c == nullptr) {
      LOG_ERROR("New memory in initVisitBitMap failed");
      return IndexError_NoMemory;
    }
    c->h.maxDocCnt = maxDocCnt;
    c->h.maxScanNum = maxScanNum;
    uint64_t len = ((maxDocCnt + 31) >> 5) << 2;  // round to uint32_t
    c->buf = new (std::nothrow) char[len];
    if (c->buf == nullptr) {
      LOG_ERROR("New memory in initVisitBitMap failed, reuse old one");
      delete c;
      return IndexError_NoMemory;
    }
    memset(c->buf, 0, len);
    c->bitset.mount(c->buf, len);
    *ctx = c;
    return 0;
  }

  inline static void destroy(Context *c) {
    delete[] c->buf;
    delete c;
  }
};  // end of VisitBitMap

class VisitByteMap {
 public:
  static constexpr int mode = 3;
  struct Context {
    VisitFilterHeader h;
    uint8_t curNum{0};
    std::vector<uint8_t> buf;
  };

  VisitByteMap() = delete;

  inline static void set_visited(Context *c, id_t idx) {
    if (ailego_unlikely(idx > c->h.maxDocCnt)) {
      c->h.maxDocCnt = idx + 1024;  // reserved
      c->buf.resize(c->h.maxDocCnt);
    }
    c->buf[idx] = c->curNum;
    return;
  }

  inline static void *get_visited(Context *c, id_t idx) {
    return c->buf.data() + idx;
  }

  inline static bool visited(Context *c, id_t idx) {
    if (ailego_unlikely(idx > c->h.maxDocCnt)) {
      return false;
    }
    return c->buf[idx] == c->curNum;
  }

  inline static int set_max_scan_num(Context *c, uint64_t maxScanNum) {
    c->h.maxScanNum = maxScanNum;
    return 0;
  }

  inline static void clear(Context *c) {
    c->curNum++;
    if (c->curNum == 0) {
      memset(c->buf.data(), 0, c->h.maxDocCnt * sizeof(uint8_t));
      c->curNum = 1;
    }
    return;
  }

  inline static bool reset(Context *c, uint64_t maxDocCnt,
                           uint64_t maxScanNum) {
    if (ailego_unlikely(maxDocCnt > c->h.maxDocCnt ||
                        maxScanNum > c->h.maxScanNum)) {
      try {
        c->buf.resize(maxDocCnt);
      } catch (const std::exception &e) {
        LOG_ERROR("New memory in initVisitByteMap failed, reuse old one");
        return false;
      }
      memset(c->buf.data(), 0, maxDocCnt * sizeof(uint8_t));
      c->curNum = 1;
      c->h.maxDocCnt = maxDocCnt;
      c->h.maxScanNum = maxScanNum;
      return true;
    }
    return true;
  }

  template <class... T>
  static int init(Context *, void **ctx, uint64_t maxDocCnt,
                  uint64_t maxScanNum, std::tuple<T...> &&tpl) {
    (void)tpl;  // unused warning
    Context *c = new (std::nothrow) Context;
    if (c == nullptr) {
      LOG_ERROR("New memory in initVisitByteMap failed");
      return IndexError_NoMemory;
    }
    c->h.maxDocCnt = maxDocCnt;
    c->h.maxScanNum = maxScanNum;
    try {
      c->buf.resize(maxDocCnt);
    } catch (const std::exception &e) {
      LOG_ERROR("New memory in initVisitByteMap failed");
      delete c;
      return IndexError_NoMemory;
    }
    memset(c->buf.data(), 0, maxDocCnt * sizeof(uint8_t));
    c->curNum = 1;
    *ctx = c;
    return 0;
  }

  inline static void destroy(Context *c) {
    delete c;
  }
};  // end of VisitByteMap


#define PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(cls, impl, ctx, ...) \
  case cls::mode:                                                 \
    return cls::impl(static_cast<cls::Context *>(ctx), ##__VA_ARGS__);

#define PROXIMA_HNSW_VISITFILTER_CALL_IMPL(impl, ...)                  \
  switch (mode_) {                                                     \
    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitBloomFilter, impl, ctx_, \
                                         ##__VA_ARGS__)                \
    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitBitMap, impl, ctx_,      \
                                         ##__VA_ARGS__)                \
    PROXIMA_HNSW_VISITFILTER_SWITCH_CASE(VisitByteMap, impl, ctx_,     \
                                         ##__VA_ARGS__)                \
  }


// visit list will be called with high frequency,
// so using switch instead of std::function or virtual class
// funtion point, lambda, virtual class all cannot be inlined
class VisitFilter {
 public:
  enum Mode {
    Default = 0,
    BloomFilter = VisitBloomFilter::mode,
    BitMap = VisitBitMap::mode,
    ByteMap = VisitByteMap::mode
  };

  VisitFilter() : mode_(0), ctx_(nullptr) {};

  inline bool visited(id_t idx) {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(visited, idx);
    return true;  // place holder
  }

  inline void set_visited(id_t idx) {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(set_visited, idx);
  }

  inline void *get_visited(id_t idx) {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(get_visited, idx);
    return nullptr;  // place holder
  }

  inline int set_max_scan_num(id_t idx) {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(set_max_scan_num, idx);
    return 0;  // place holder
  }

  inline void clear() {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(clear);
  }

  inline bool reset(uint64_t maxDocCnt, uint64_t maxScanNum) {
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(reset, maxDocCnt, maxScanNum);
    return true;
  }

  inline void destroy() {
    if (ctx_ != nullptr) {
      PROXIMA_HNSW_VISITFILTER_CALL_IMPL(destroy);
    }
  }

  int init(int mode, uint64_t maxDocCnt, uint64_t maxScanNum,
           float negativeProbability) {
    mode_ = mode;
    PROXIMA_HNSW_VISITFILTER_CALL_IMPL(init, &ctx_, maxDocCnt, maxScanNum,
                                       std::make_tuple(negativeProbability));
    return 0;  // place holder
  }

  int get_mode(void) const {
    return mode_;
  }


 private:
  VisitFilter(const VisitFilter &) = delete;
  VisitFilter &operator=(const VisitFilter &) = delete;

  int mode_{0U};  // custom data for each method
  void *ctx_{nullptr};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/db/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_proto_library(
  NAME zvec_proto STATIC
  SRCS proto/*.proto
  PROTOROOT ./
)

cc_directory(common)
cc_directory(index)
cc_directory(sqlengine)

file(GLOB_RECURSE ALL_DB_SRCS *.cc *.c *.h)

cc_library(
  NAME zvec_db STATIC STRICT SRCS_NO_GLOB
  SRCS ${ALL_DB_SRCS} ${CMAKE_CURRENT_BINARY_DIR}/proto/zvec.pb.cc
  INCS . ${CMAKE_CURRENT_BINARY_DIR}
  PUBINCS ${PROJECT_ROOT_DIR}/src/include
  LIBS 
    zvec_ailego
    zvec_core
    glog
    roaring
    rocksdb
    antlr4
    libprotobuf
    Arrow::arrow_static
    Arrow::arrow_compute
    Arrow::arrow_dataset
    Arrow::arrow_acero
  DEPS zvec_proto
  VERSION "${PROXIMA_ZVEC_VERSION}"
)

================================================
FILE: src/db/collection.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <atomic>
#include <cstdint>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <string>
#include <variant>
#include <vector>
#include <ailego/io/file_lock.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/file_helper.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/collection.h>
#include <zvec/db/doc.h>
#include <zvec/db/options.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include "db/common/constants.h"
#include "db/common/file_helper.h"
#include "db/common/profiler.h"
#include "db/common/typedef.h"
#include "db/index/common/delete_store.h"
#include "db/index/common/id_map.h"
#include "db/index/common/index_filter.h"
#include "db/index/common/version_manager.h"
#include "db/index/segment/segment.h"
#include "db/index/segment/segment_helper.h"
#include "db/index/segment/segment_manager.h"
#include "db/sqlengine/sqlengine.h"

namespace zvec {

enum class WriteMode : uint8_t {
  UNDEFINED = 0,
  INSERT,
  UPDATE,
  UPSERT,
};

Collection::~Collection() = default;

class CollectionImpl : public Collection {
  friend class Collection;

 public:
  explicit CollectionImpl(const std::string &path,
                          const CollectionSchema &schema);

  explicit CollectionImpl(const std::string &path);

  ~CollectionImpl() override;

 private:
  Status Open(const CollectionOptions &options);

  Status Close();

 public:
  Status Destroy() override;

  Status Flush() override;

  Result<std::string> Path() const override;

  Result<CollectionStats> Stats() const override;

  Result<CollectionSchema> Schema() const override;

  Result<CollectionOptions> Options() const override;

 public:
  Status CreateIndex(const std::string &column_name,
                     const IndexParams::Ptr &index_params,
                     const CreateIndexOptions &options) override;

  Status DropIndex(const std::string &column_name) override;

  Status Optimize(const OptimizeOptions &options) override;

  Status AddColumn(const FieldSchema::Ptr &column_schema,
                   const std::string &expression,
                   const AddColumnOptions &options) override;

  Status DropColumn(const std::string &column_name) override;

  Status AlterColumn(
      const std::string &column_name, const std::string &rename,
      const FieldSchema::Ptr &new_column_schema = nullptr,
      const AlterColumnOptions &options = AlterColumnOptions()) override;

  Result<WriteResults> Insert(std::vector<Doc> &docs) override;

  Result<WriteResults> Upsert(std::vector<Doc> &docs) override;

  Result<WriteResults> Update(std::vector<Doc> &docs) override;

  Result<WriteResults> Delete(const std::vector<std::string> &pks) override;

  Status DeleteByFilter(const std::string &filter) override;

  Result<DocPtrList> Query(const VectorQuery &query) const override;

  Result<GroupResults> GroupByQuery(
      const GroupByVectorQuery &query) const override;

  Result<DocPtrMap> Fetch(const std::vector<std::string> &pks) const override;

 private:
  void prepare_schema();

  Status close_unsafe();

  Status flush_unsafe();

  Status create();

  Status recovery();

  Status create_idmap_and_delete_store();

  Status recover_idmap_and_delete_store();

  Status acquire_file_lock(bool create = false);

  Status init_version_manager();

  Status init_writing_segment();

  bool need_switch_to_new_segment() const;

  Status switch_to_new_segment_for_writing(
      const CollectionSchema::Ptr &schema = nullptr);

  Result<WriteResults> write_impl(std::vector<Doc> &docs, WriteMode mode);

  std::vector<Segment::Ptr> get_all_segments() const;

  std::vector<Segment::Ptr> get_all_persist_segments() const;

  Segment::Ptr local_segment_by_doc_id(
      uint64_t doc_id, const std::vector<Segment::Ptr> &segments) const;

  SegmentID allocate_segment_id() {
    return segment_id_allocator_.fetch_add(1);
  }

  SegmentID allocate_segment_id_for_tmp_segment() {
    return tmp_segment_id_allocator_.fetch_add(1);
  }

  std::vector<SegmentTask::Ptr> build_compact_task(
      const CollectionSchema::Ptr &schema,
      const std::vector<Segment::Ptr> &segments, int concurrency,
      const IndexFilter::Ptr filter);

  Status execute_compact_task(std::vector<SegmentTask::Ptr> &tasks) const;

  std::vector<SegmentTask::Ptr> build_create_vector_index_task(
      const std::vector<Segment::Ptr> &segments, const std::string &column,
      const IndexParams::Ptr &index_params, int concurrency);

  std::vector<SegmentTask::Ptr> build_create_scalar_index_task(
      const std::vector<Segment::Ptr> &segments, const std::string &column,
      const IndexParams::Ptr &index_params, int concurrency);

  std::vector<SegmentTask::Ptr> build_drop_vector_index_task(
      const std::vector<Segment::Ptr> &segments, const std::string &column);

  std::vector<SegmentTask::Ptr> build_drop_scalar_index_task(
      const std::vector<Segment::Ptr> &segments, const std::string &column);

  Status execute_tasks(std::vector<SegmentTask::Ptr> &tasks) const;

 private:
  Status handle_upsert(Doc &doc);

  Status handle_update(Doc &doc);

  Status handle_insert(Doc &doc);

  Status internal_fetch_by_doc(const Doc &doc, Doc::Ptr *doc_out);

 private:
  // Helper functions for add/alter/drop column
  Status validate(const std::string &column, const FieldSchema::Ptr &schema,
                  const std::string &expression, const std::string &rename,
                  ColumnOp op);

 private:
  std::string path_;

  bool destroyed_{false};

  CollectionSchema::Ptr schema_;

  CollectionOptions options_;

  mutable std::shared_mutex schema_handle_mtx_;
  mutable std::shared_mutex write_mtx_;

  std::atomic<SegmentID> segment_id_allocator_;
  std::atomic<SegmentID> tmp_segment_id_allocator_;

  // writing segment
  Segment::Ptr writing_segment_;
  // non-writing segments, sort by doc_id range
  SegmentManager::Ptr segment_manager_;

  // latest version: std::vector<SegmentMeta>
  VersionManager::Ptr version_manager_;

  // file lock
  ailego::File lock_file_;

  IDMap::Ptr id_map_;
  DeleteStore::Ptr delete_store_;

  sqlengine::SQLEngine::Ptr sql_engine_;
};

Result<Collection::Ptr> Collection::CreateAndOpen(
    const std::string &path, const CollectionSchema &schema,
    const CollectionOptions &options) {
  auto collection = std::make_shared<CollectionImpl>(path, schema);

  auto s = collection->Open(options);
  CHECK_RETURN_STATUS_EXPECTED(s);

  return collection;
}

Result<Collection::Ptr> Collection::Open(const std::string &path,
                                         const CollectionOptions &options) {
  auto collection = std::make_shared<CollectionImpl>(path);

  auto s = collection->Open(options);
  CHECK_RETURN_STATUS_EXPECTED(s);

  return collection;
}

CollectionImpl::CollectionImpl(const std::string &path,
                               const CollectionSchema &schema)
    : path_(path), schema_(std::make_shared<CollectionSchema>(schema)) {
  prepare_schema();
}

void CollectionImpl::prepare_schema() {
  // set default index params for vector fields
  for (auto &field : schema_->fields()) {
    if (field->is_vector_field()) {
      if (field->index_params() == nullptr) {
        field->set_index_params(DefaultVectorIndexParams);
      }
    }
  }
}

CollectionImpl::CollectionImpl(const std::string &path) : path_(path) {}

CollectionImpl::~CollectionImpl() {
  if (!destroyed_) {
    Close();
  }
}

Status CollectionImpl::Open(const CollectionOptions &options) {
  options_ = options;

  if (schema_ != nullptr && options_.read_only_) {
    return Status::InvalidArgument(
        "Unable to create collection with read-only mode.");
  }

  Status s;
  if (schema_ == nullptr) {
    // recovery from disk
    s = recovery();
  } else {
    // create new collection with existing schema
    s = create();
  }

  auto profiler = std::make_shared<Profiler>();
  sql_engine_ = sqlengine::SQLEngine::create(profiler);

  return s;
}

Status CollectionImpl::Close() {
  // only called in deconstructor
  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  return close_unsafe();
}

Status CollectionImpl::close_unsafe() {
  // flush
  if (!options_.read_only_) {
    auto s = flush_unsafe();
    CHECK_RETURN_STATUS(s);
  }

  // reset
  writing_segment_.reset();
  segment_manager_.reset();
  version_manager_.reset();
  id_map_.reset();
  delete_store_.reset();

  lock_file_.close();

  return Status::OK();
}

Status CollectionImpl::Destroy() {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  auto s = close_unsafe();
  CHECK_RETURN_STATUS(s);

  ailego::FileHelper::RemoveDirectory(path_.c_str());

  destroyed_ = true;

  return Status::OK();
}

Status CollectionImpl::Flush() {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);
  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  return flush_unsafe();
}

Status CollectionImpl::flush_unsafe() {
  if (!writing_segment_) {
    return Status::InternalError(
        "flush writing segment failed because writing segment is nullptr");
  }
  return writing_segment_->flush();
}

Result<std::string> CollectionImpl::Path() const {
  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  return path_;
}

Result<CollectionStats> CollectionImpl::Stats() const {
  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  auto segments = get_all_segments();

  CollectionStats stats;
  auto vector_fields = schema_->vector_fields();
  if (segments.empty()) {
    stats.doc_count = 0;
    for (auto &field : vector_fields) {
      stats.index_completeness[field->name()] =
          1;  // if no doc, completeness is 1
    }
    return stats;
  }

  for (auto &segment : segments) {
    stats.doc_count += segment->doc_count(delete_store_->make_filter());
  }

  for (auto &field : vector_fields) {
    if (stats.doc_count == 0) {
      stats.index_completeness[field->name()] = 1;
      continue;
    }

    uint32_t indexed_doc_count{0};
    for (auto &segment : segments) {
      if (segment->meta()->vector_indexed(field->name())) {
        indexed_doc_count += segment->doc_count(delete_store_->make_filter());
      }
    }
    stats.index_completeness[field->name()] =
        indexed_doc_count * 1.0 / stats.doc_count;
  }

  return stats;
}

Result<CollectionSchema> CollectionImpl::Schema() const {
  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  return *schema_;
}

Result<CollectionOptions> CollectionImpl::Options() const {
  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  return options_;
}

Status CollectionImpl::CreateIndex(const std::string &column_name,
                                   const IndexParams::Ptr &index_params,
                                   const CreateIndexOptions &options) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  auto new_schema = std::make_shared<CollectionSchema>(*schema_);
  auto s = new_schema->add_index(column_name, index_params);
  CHECK_RETURN_STATUS(s);
  s = new_schema->validate();
  CHECK_RETURN_STATUS(s);

  auto field = schema_->get_field(column_name);
  if (field->index_params() != nullptr &&
      *field->index_params() == *index_params) {
    // equal index params
    return Status::OK();
  }

  // forbidden writing until index is ready
  std::lock_guard write_lock(write_mtx_);

  Version new_version = version_manager_->get_current_version();

  if (writing_segment_->doc_count() > 0) {
    s = writing_segment_->dump();
    CHECK_RETURN_STATUS(s);

    s = segment_manager_->add_segment(writing_segment_);
    CHECK_RETURN_STATUS(s);

    auto seg_options =
        SegmentOptions{false, options_.enable_mmap_, options_.max_buffer_size_};
    auto new_segment = Segment::CreateAndOpen(
        path_, *new_schema, allocate_segment_id(),
        writing_segment_->meta()->max_doc_id() + 1, id_map_, delete_store_,
        version_manager_, seg_options);
    if (!new_segment) {
      return new_segment.error();
    }

    s = new_version.add_persisted_segment_meta(writing_segment_->meta());
    CHECK_RETURN_STATUS(s);

    writing_segment_ = new_segment.value();
    new_version.set_next_segment_id(segment_id_allocator_.load());

  } else {
    // TODO: allocate new segment id and clear current writing segment at last
    // recreate writing segment
    s = writing_segment_->destroy();
    CHECK_RETURN_STATUS(s);
    auto id = writing_segment_->id();
    auto min_doc_id = writing_segment_->meta()->min_doc_id();
    writing_segment_.reset();
    SegmentOptions seg_options;
    seg_options.enable_mmap_ = options_.enable_mmap_;
    seg_options.max_buffer_size_ = options_.max_buffer_size_;
    seg_options.read_only_ = options_.read_only_;
    auto writing_segment =
        Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,
                               delete_store_, version_manager_, seg_options);
    if (!writing_segment) {
      return writing_segment.error();
    }
    writing_segment_ = writing_segment.value();
  }
  new_version.reset_writing_segment_meta(writing_segment_->meta());

  // get_all_segment will return writing segment if it has docs
  auto persist_segments = get_all_persist_segments();

  bool is_vector_field = field->is_vector_field();

  std::vector<SegmentTask::Ptr> tasks;
  if (is_vector_field) {
    tasks = build_create_vector_index_task(persist_segments, column_name,
                                           index_params, options.concurrency_);

  } else {
    tasks = build_create_scalar_index_task(persist_segments, column_name,
                                           index_params, options.concurrency_);
  }

  if (tasks.empty()) {
    new_version.set_schema(*new_schema);

    s = version_manager_->apply(new_version);
    CHECK_RETURN_STATUS(s);

    // persist manifest
    s = version_manager_->flush();
    CHECK_RETURN_STATUS(s);

    schema_ = new_schema;
    return Status::OK();
  }

  s = execute_tasks(tasks);
  CHECK_RETURN_STATUS(s);

  new_version.set_schema(*new_schema);

  for (auto &task : tasks) {
    auto task_info = task->task_info();

    if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
      auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
      s = new_version.update_persisted_segment_meta(
          create_index_task.output_segment_meta_);
    } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {
      auto create_index_task = std::get<CreateScalarIndexTask>(task_info);
      s = new_version.update_persisted_segment_meta(
          create_index_task.output_segment_meta_);
    }
    CHECK_RETURN_STATUS(s);
  }

  // 2. update version
  s = version_manager_->apply(new_version);
  CHECK_RETURN_STATUS(s);

  // 3. persist version
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  // 4. remove old segments or block
  for (auto &task : tasks) {
    auto task_info = task->task_info();

    if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
      auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
      s = create_index_task.input_segment_->reload_vector_index(
          *new_schema, create_index_task.output_segment_meta_,
          create_index_task.output_vector_indexers_,
          create_index_task.output_quant_vector_indexers_);
    } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {
      auto create_index_task = std::get<CreateScalarIndexTask>(task_info);
      s = create_index_task.input_segment_->reload_scalar_index(
          *new_schema, create_index_task.output_segment_meta_,
          create_index_task.output_scalar_indexer_);
    }
    CHECK_RETURN_STATUS(s);
  }

  schema_ = new_schema;

  return Status::OK();
}

std::vector<SegmentTask::Ptr> CollectionImpl::build_create_vector_index_task(
    const std::vector<Segment::Ptr> &segments, const std::string &column,
    const IndexParams::Ptr &index_params, int concurrency) {
  std::vector<SegmentTask::Ptr> tasks;
  for (auto &segment : segments) {
    if (!segment->vector_index_ready(column, index_params)) {
      tasks.push_back(SegmentTask::CreateCreateVectorIndexTask(
          CreateVectorIndexTask{segment, column, index_params, concurrency}));
    }
  }
  return tasks;
}

std::vector<SegmentTask::Ptr> CollectionImpl::build_create_scalar_index_task(
    const std::vector<Segment::Ptr> &segments, const std::string &column,
    const IndexParams::Ptr &index_params, int concurrency) {
  std::vector<SegmentTask::Ptr> tasks;
  for (auto &segment : segments) {
    tasks.push_back(SegmentTask::CreateCreateScalarIndexTask(
        CreateScalarIndexTask{segment, {column}, index_params, concurrency}));
  }
  return tasks;
}

Status CollectionImpl::execute_tasks(
    std::vector<SegmentTask::Ptr> &tasks) const {
  Status s;
  for (auto &task : tasks) {
    s = SegmentHelper::Execute(task);
    if (!s.ok()) {
      return s;
    }
  }

  return Status::OK();
}

Status CollectionImpl::DropIndex(const std::string &column_name) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;
  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  std::lock_guard lock(schema_handle_mtx_);

  auto new_schema = std::make_shared<CollectionSchema>(*schema_);
  auto s = new_schema->drop_index(column_name);
  CHECK_RETURN_STATUS(s);

  auto field = schema_->get_field(column_name);
  if (field->index_params() == nullptr) {
    return Status::OK();  // return ok if not indexed
  }

  if (field->is_vector_field() &&
      *field->index_params() == DefaultVectorIndexParams) {
    return Status::OK();
  }

  // forbidden writing until index is ready
  std::lock_guard write_lock(write_mtx_);

  Version new_version = version_manager_->get_current_version();

  bool is_vector_field = field->is_vector_field();

  if (writing_segment_->doc_count() > 0) {
    s = writing_segment_->dump();
    CHECK_RETURN_STATUS(s);

    s = segment_manager_->add_segment(writing_segment_);
    CHECK_RETURN_STATUS(s);

    auto new_segment =
        Segment::CreateAndOpen(path_, *new_schema, allocate_segment_id(),
                               writing_segment_->meta()->max_doc_id() + 1,
                               id_map_, delete_store_, version_manager_,
                               SegmentOptions{false, options_.enable_mmap_,
                                              options_.max_buffer_size_});
    if (!new_segment) {
      return new_segment.error();
    }

    s = new_version.add_persisted_segment_meta(writing_segment_->meta());
    CHECK_RETURN_STATUS(s);

    writing_segment_ = new_segment.value();
    new_version.set_next_segment_id(segment_id_allocator_.load());

  } else {
    // recreate writing segment
    s = writing_segment_->destroy();
    CHECK_RETURN_STATUS(s);
    auto id = writing_segment_->id();
    auto min_doc_id = writing_segment_->meta()->min_doc_id();
    writing_segment_.reset();
    SegmentOptions seg_options;
    seg_options.enable_mmap_ = options_.enable_mmap_;
    seg_options.max_buffer_size_ = options_.max_buffer_size_;
    seg_options.read_only_ = options_.read_only_;
    auto writing_segment =
        Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,
                               delete_store_, version_manager_, seg_options);
    if (!writing_segment) {
      return writing_segment.error();
    }

    writing_segment_ = writing_segment.value();
  }
  new_version.reset_writing_segment_meta(writing_segment_->meta());

  auto persist_semgents = get_all_persist_segments();

  std::vector<SegmentTask::Ptr> tasks;
  if (is_vector_field) {
    tasks = build_drop_vector_index_task(persist_semgents, column_name);
  } else {
    tasks = build_drop_scalar_index_task(persist_semgents, column_name);
  }

  if (tasks.empty()) {
    new_version.set_schema(*new_schema);

    s = version_manager_->apply(new_version);
    CHECK_RETURN_STATUS(s);

    // persist manifest
    s = version_manager_->flush();
    CHECK_RETURN_STATUS(s);

    schema_ = new_schema;
    return Status::OK();
  }

  s = execute_tasks(tasks);
  CHECK_RETURN_STATUS(s);

  new_version.set_schema(*new_schema);

  for (auto &task : tasks) {
    auto task_info = task->task_info();

    if (std::holds_alternative<DropVectorIndexTask>(task_info)) {
      auto drop_index_task = std::get<DropVectorIndexTask>(task_info);
      s = new_version.update_persisted_segment_meta(
          drop_index_task.output_segment_meta_);
    } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {
      auto drop_index_task = std::get<DropScalarIndexTask>(task_info);
      s = new_version.update_persisted_segment_meta(
          drop_index_task.output_segment_meta_);
    }
    CHECK_RETURN_STATUS(s);
  }

  s = version_manager_->apply(new_version);
  CHECK_RETURN_STATUS(s);

  // persist manifest
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  // 4. remove old segments or block
  for (auto &task : tasks) {
    auto task_info = task->task_info();

    if (std::holds_alternative<DropVectorIndexTask>(task_info)) {
      auto drop_index_task = std::get<DropVectorIndexTask>(task_info);
      s = drop_index_task.input_segment_->reload_vector_index(
          *new_schema, drop_index_task.output_segment_meta_,
          drop_index_task.output_vector_indexers_);
    } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {
      auto drop_index_task = std::get<DropScalarIndexTask>(task_info);
      s = drop_index_task.input_segment_->reload_scalar_index(
          *new_schema, drop_index_task.output_segment_meta_,
          drop_index_task.output_scalar_indexer_);
    }
    CHECK_RETURN_STATUS(s);
  }

  schema_ = new_schema;

  return Status::OK();
}

std::vector<SegmentTask::Ptr> CollectionImpl::build_drop_vector_index_task(
    const std::vector<Segment::Ptr> &segments, const std::string &column) {
  std::vector<SegmentTask::Ptr> tasks;
  for (auto &segment : segments) {
    tasks.emplace_back(SegmentTask::CreateDropVectorIndexTask(
        DropVectorIndexTask{segment, column}));
  }
  return tasks;
}

std::vector<SegmentTask::Ptr> CollectionImpl::build_drop_scalar_index_task(
    const std::vector<Segment::Ptr> &segments, const std::string &column) {
  std::vector<SegmentTask::Ptr> tasks;
  for (auto &segment : segments) {
    tasks.emplace_back(SegmentTask::CreateDropScalarIndexTask(
        DropScalarIndexTask(segment, {column})));
  }
  return tasks;
}

Status CollectionImpl::Optimize(const OptimizeOptions &options) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);
  // when optimizing, schema operations(include another optimize) are not
  // allowed

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  std::vector<Segment::Ptr> persist_segments;

  {
    // forbidden writing for a while
    std::lock_guard write_lock(write_mtx_);

    if (writing_segment_->doc_count() != 0) {
      // flush and create new segment
      auto s = switch_to_new_segment_for_writing();
      if (!s.ok()) {
        return s;
      }
    }

    persist_segments =
        get_all_persist_segments();  // will not return writing segment
    // after leave this scope, writing action is allowed
  }

  if (persist_segments.size() == 0) {
    // no need to optimize
    return Status::OK();
  }

  // build segment compact task
  auto delete_store_clone = delete_store_->clone();
  auto tasks =
      build_compact_task(schema_, persist_segments, options.concurrency_,
                         delete_store_clone->make_filter());

  // execute segment compact task
  auto s = execute_compact_task(tasks);
  CHECK_RETURN_STATUS(s);

  {
    // forbidden writing for updating version
    // writing action may trigger updating version where confict occurs
    std::lock_guard write_lock(write_mtx_);

    Version new_version = version_manager_->get_current_version();

    for (auto &task : tasks) {
      auto task_info = task->task_info();

      if (std::holds_alternative<CompactTask>(task_info)) {
        auto compact_task = std::get<CompactTask>(task_info);

        // 0. check if has output segment meta
        if (compact_task.output_segment_meta_) {
          // 1. rename built tmp segments
          auto tmp_segment_id = compact_task.output_segment_id_;
          auto tmp_segment_path =
              FileHelper::MakeTempSegmentPath(path_, tmp_segment_id);

          auto new_segment_id = allocate_segment_id();
          auto new_segment_path =
              FileHelper::MakeSegmentPath(path_, new_segment_id);

          if (!FileHelper::MoveDirectory(tmp_segment_path, new_segment_path)) {
            return Status::InternalError("move segment directory failed");
          }

          // update output_segment_meta_'s segment id
          compact_task.output_segment_meta_->set_id(new_segment_id);

          s = new_version.add_persisted_segment_meta(
              compact_task.output_segment_meta_);
          CHECK_RETURN_STATUS(s);
          new_version.set_next_segment_id(segment_id_allocator_.load());
        }

        for (auto input_segment : compact_task.input_segments_) {
          s = new_version.remove_persisted_segment_meta(input_segment->id());
          CHECK_RETURN_STATUS(s);
        }
      } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);
        s = new_version.update_persisted_segment_meta(
            create_index_task.output_segment_meta_);
        CHECK_RETURN_STATUS(s);
      }
    }

    // 2. update version
    s = version_manager_->apply(new_version);
    CHECK_RETURN_STATUS(s);

    // 3. persist version
    s = version_manager_->flush();
    CHECK_RETURN_STATUS(s);

    // 4. remove old segments or block
    for (auto &task : tasks) {
      auto task_info = task->task_info();

      if (std::holds_alternative<CompactTask>(task_info)) {
        auto compact_task = std::get<CompactTask>(task_info);

        if (compact_task.output_segment_meta_) {
          auto new_segment =
              Segment::Open(path_, *schema_, *compact_task.output_segment_meta_,
                            id_map_, delete_store_, version_manager_,
                            SegmentOptions{true, options_.enable_mmap_});
          if (!new_segment.has_value()) {
            return new_segment.error();
          }
          s = segment_manager_->add_segment(new_segment.value());
          CHECK_RETURN_STATUS(s);
        }

        for (auto input_segment : compact_task.input_segments_) {
          s = segment_manager_->destroy_segment(input_segment->id());
          CHECK_RETURN_STATUS(s);
        }
      } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
        auto create_index_task = std::get<CreateVectorIndexTask>(task_info);

        s = create_index_task.input_segment_->reload_vector_index(
            *schema_, create_index_task.output_segment_meta_,
            create_index_task.output_vector_indexers_,
            create_index_task.output_quant_vector_indexers_);
        CHECK_RETURN_STATUS(s);
      }
    }
  }

  return Status::OK();
}

std::vector<SegmentTask::Ptr> CollectionImpl::build_compact_task(
    const CollectionSchema::Ptr &schema,
    const std::vector<Segment::Ptr> &segments, int concurrency,
    const IndexFilter::Ptr filter) {
  std::vector<SegmentTask::Ptr> tasks;
  if (segments.empty()) return tasks;

  bool rebuild = false;
  size_t current_doc_count = 0;
  size_t current_actual_doc_count = 0;
  for (auto &segment : segments) {
    current_doc_count += segment->doc_count();
    current_actual_doc_count += segment->doc_count(filter);
  }
  if (current_actual_doc_count <
      current_doc_count * (1 - COMPACT_DELETE_RATIO_THRESHOLD)) {
    // if delete ratio is large enough, rebuild
    rebuild = true;
  }

  auto max_doc_count_per_segment = schema->max_doc_count_per_segment();

  std::vector<Segment::Ptr> current_group;
  current_doc_count = 0;
  current_actual_doc_count = 0;

  for (const auto &seg : segments) {
    uint64_t doc_count = seg->doc_count();
    uint64_t actual_doc_count = seg->doc_count(filter);

    if (!current_group.empty()) {
      SegmentTask::Ptr task;
      bool skip_task{false};
      if (rebuild) {
        if (current_actual_doc_count + actual_doc_count >
            max_doc_count_per_segment) {
          // only create SegmentCompactTask when rebuild=true
          task = SegmentTask::CreateComapctTask(
              CompactTask{path_, schema, current_group,
                          allocate_segment_id_for_tmp_segment(), filter,
                          !options_.enable_mmap_, concurrency});
        }
      } else {
        if (current_doc_count + doc_count > max_doc_count_per_segment) {
          // check current_group size
          if (current_group.size() == 1) {
            task =
                SegmentTask::CreateCreateVectorIndexTask(CreateVectorIndexTask{
                    current_group[0], "", nullptr, concurrency});
            skip_task = current_group[0]->all_vector_index_ready();
          } else {
            task = SegmentTask::CreateComapctTask(
                CompactTask{path_, schema, current_group,
                            allocate_segment_id_for_tmp_segment(), nullptr,
                            !options_.enable_mmap_, concurrency});
          }
        }
      }

      if (task) {
        current_group.clear();
        current_doc_count = 0;
        current_actual_doc_count = 0;
        if (!skip_task) {
          tasks.push_back(task);
        }
      }
    }

    current_group.push_back(seg);
    current_doc_count += doc_count;
    current_actual_doc_count += actual_doc_count;
  }

  if (current_group.size() > 0) {
    SegmentTask::Ptr task;
    if (current_group.size() == 1 && !rebuild) {
      task = SegmentTask::CreateCreateVectorIndexTask(
          CreateVectorIndexTask{current_group[0], "", nullptr, concurrency});
    } else {
      task = SegmentTask::CreateComapctTask(CompactTask{
          path_, schema, current_group, allocate_segment_id_for_tmp_segment(),
          rebuild ? filter : nullptr, !options_.enable_mmap_, concurrency});
    }
    tasks.push_back(task);
  }

  return tasks;
}

Status CollectionImpl::execute_compact_task(
    std::vector<SegmentTask::Ptr> &tasks) const {
  Status s;
  for (auto &task : tasks) {
    s = SegmentHelper::Execute(task);
    if (!s.ok()) {
      return s;
    }
  }

  return Status::OK();
}

Status CollectionImpl::validate(const std::string &column,
                                const FieldSchema::Ptr &schema,
                                const std::string &expression,
                                const std::string &rename, ColumnOp op) {
  auto check_data_type = [&](const FieldSchema *field) -> Status {
    if (field->data_type() < DataType::INT32 ||
        field->data_type() > DataType::DOUBLE) {
      return Status::InvalidArgument(
          "Only support basic numeric data type [int32, int64, uint32, uint64, "
          "float, double]: ",
          field->to_string());
    }
    return Status::OK();
  };

  switch (op) {
    case ColumnOp::ADD: {
      if (schema == nullptr) {
        return Status::InvalidArgument("Column schema is null");
      }

      if (schema->name().empty()) {
        return Status::InvalidArgument("Column name is empty");
      }
      if (schema_->has_field(schema->name())) {
        return Status::InvalidArgument("column already exists");
      }

      auto s = schema->validate();
      CHECK_RETURN_STATUS(s);

      s = check_data_type(schema.get());
      CHECK_RETURN_STATUS(s);

      if (expression.empty() && !schema->nullable()) {
        return Status::InvalidArgument(
            "Add column is not supported for non-nullable column");
      }

      break;
    }
    case ColumnOp::ALTER: {
      if (column.empty()) {
        return Status::InvalidArgument("column name is empty");
      }

      if (!schema_->has_field(column)) {
        return Status::InvalidArgument("column ", column, " not found");
      }

      if (!rename.empty() && schema) {
        return Status::InvalidArgument(
            "cannot specify both rename and new column schema");
      }

      auto *old_field_schema = schema_->get_field(column);
      auto s = check_data_type(old_field_schema);
      CHECK_RETURN_STATUS(s);

      if (!rename.empty()) {
        // rename case
        if (schema_->has_field(rename)) {
          return Status::InvalidArgument("new column name ", rename,
                                         " already exists");
        }
      } else {
        // schema change case
        if (!schema) {
          return Status::InvalidArgument("New column schema is null");
        }

        s = schema->validate();
        CHECK_RETURN_STATUS(s);

        if (schema->name().empty()) {
          return Status::InvalidArgument("new column schema name is empty");
        }

        if (!schema->nullable() && old_field_schema->nullable()) {
          return Status::InvalidArgument(
              "new column schema is not nullable, but old column schema is "
              "nullable");
        }

        if (*old_field_schema == *schema) {
          // equal schema
          return Status::OK();
        }

        s = check_data_type(schema.get());
        CHECK_RETURN_STATUS(s);
      }

      break;
    }
    case ColumnOp::DROP: {
      if (!schema_->has_field(column)) {
        return Status::InvalidArgument("Column not exists: ", column);
      }

      auto *old_field_schema = schema_->get_field(column);
      auto s = check_data_type(old_field_schema);
      CHECK_RETURN_STATUS(s);
      break;
    }
    default:
      break;
  }

  return Status::OK();
}

Status CollectionImpl::AddColumn(const FieldSchema::Ptr &column_schema,
                                 const std::string &expression,
                                 const AddColumnOptions &options) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  // validate
  auto s = validate("", column_schema, expression, "", ColumnOp::ADD);
  CHECK_RETURN_STATUS(s);

  // forbidden writing until index is ready
  std::lock_guard write_lock(write_mtx_);

  auto new_schema = std::make_shared<CollectionSchema>(*schema_);
  s = new_schema->add_field(column_schema);
  CHECK_RETURN_STATUS(s);

  if (writing_segment_->doc_count() > 0) {
    s = switch_to_new_segment_for_writing();
    CHECK_RETURN_STATUS(s);
  }

  Version new_version = version_manager_->get_current_version();

  // add column on segment manager
  s = segment_manager_->add_column(column_schema, expression,
                                   options.concurrency_);
  CHECK_RETURN_STATUS(s);

  // reset writing segment with new schema
  auto id = writing_segment_->id();
  auto min_doc_id = writing_segment_->meta()->min_doc_id();

  s = writing_segment_->destroy();
  CHECK_RETURN_STATUS(s);
  writing_segment_.reset();

  SegmentOptions seg_options;
  seg_options.enable_mmap_ = options_.enable_mmap_;
  seg_options.max_buffer_size_ = options_.max_buffer_size_;
  seg_options.read_only_ = options_.read_only_;
  auto writing_segment =
      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,
                             delete_store_, version_manager_, seg_options);
  if (!writing_segment) {
    return writing_segment.error();
  }
  writing_segment_ = writing_segment.value();

  // update new version
  new_version.set_schema(*new_schema);
  new_version.reset_writing_segment_meta(writing_segment_->meta());

  auto new_segment_metas = segment_manager_->get_segments_meta();
  for (auto meta : new_segment_metas) {
    s = new_version.update_persisted_segment_meta(meta);
    CHECK_RETURN_STATUS(s);
  }

  s = version_manager_->apply(new_version);
  CHECK_RETURN_STATUS(s);

  // persist manifest
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  schema_ = new_schema;

  return Status::OK();
}

Status CollectionImpl::DropColumn(const std::string &column_name) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  // validate
  auto s = validate(column_name, nullptr, "", "", ColumnOp::DROP);
  CHECK_RETURN_STATUS(s);

  // forbidden writing until index is ready
  std::lock_guard write_lock(write_mtx_);

  auto new_schema = std::make_shared<CollectionSchema>(*schema_);
  s = new_schema->drop_field(column_name);
  CHECK_RETURN_STATUS(s);

  if (writing_segment_->doc_count() > 0) {
    s = switch_to_new_segment_for_writing();
    CHECK_RETURN_STATUS(s);
  }

  Version new_version = version_manager_->get_current_version();

  // drop column on segment manager
  s = segment_manager_->drop_column(column_name);
  CHECK_RETURN_STATUS(s);

  // reset writing segment with new schema
  auto id = writing_segment_->id();
  auto min_doc_id = writing_segment_->meta()->min_doc_id();

  s = writing_segment_->destroy();
  CHECK_RETURN_STATUS(s);
  writing_segment_.reset();

  SegmentOptions seg_options;
  seg_options.enable_mmap_ = options_.enable_mmap_;
  seg_options.max_buffer_size_ = options_.max_buffer_size_;
  seg_options.read_only_ = options_.read_only_;
  auto writing_segment =
      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,
                             delete_store_, version_manager_, seg_options);
  if (!writing_segment) {
    return writing_segment.error();
  }
  writing_segment_ = writing_segment.value();

  // update new version
  new_version.set_schema(*new_schema);
  new_version.reset_writing_segment_meta(writing_segment_->meta());

  auto new_segment_metas = segment_manager_->get_segments_meta();
  for (auto meta : new_segment_metas) {
    s = new_version.update_persisted_segment_meta(meta);
    CHECK_RETURN_STATUS(s);
  }

  s = version_manager_->apply(new_version);
  CHECK_RETURN_STATUS(s);

  // persist manifest
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  schema_ = new_schema;

  return Status::OK();
}

Status CollectionImpl::AlterColumn(const std::string &column_name,
                                   const std::string &rename,
                                   const FieldSchema::Ptr &new_column_schema,
                                   const AlterColumnOptions &options) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::lock_guard lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  // validate
  auto s =
      validate(column_name, new_column_schema, "", rename, ColumnOp::ALTER);
  CHECK_RETURN_STATUS(s);

  // forbidden writing until index is ready
  std::lock_guard write_lock(write_mtx_);

  std::shared_ptr<FieldSchema> new_field_schema{nullptr};
  if (!rename.empty()) {
    new_field_schema =
        std::make_shared<FieldSchema>(*schema_->get_field(column_name));
    new_field_schema->set_name(rename);
  } else {
    new_field_schema = std::make_shared<FieldSchema>(*new_column_schema);
  }

  auto new_schema = std::make_shared<CollectionSchema>(*schema_);
  s = new_schema->alter_field(column_name, new_field_schema);
  CHECK_RETURN_STATUS(s);

  if (writing_segment_->doc_count() > 0) {
    s = switch_to_new_segment_for_writing();
    CHECK_RETURN_STATUS(s);
  }

  Version new_version = version_manager_->get_current_version();

  // alter column on segment manager
  s = segment_manager_->alter_column(column_name, new_field_schema,
                                     options.concurrency_);
  CHECK_RETURN_STATUS(s);

  // reset writing segment with new schema
  auto id = writing_segment_->id();
  auto min_doc_id = writing_segment_->meta()->min_doc_id();

  s = writing_segment_->destroy();
  CHECK_RETURN_STATUS(s);
  writing_segment_.reset();

  SegmentOptions seg_options;
  seg_options.enable_mmap_ = options_.enable_mmap_;
  seg_options.max_buffer_size_ = options_.max_buffer_size_;
  seg_options.read_only_ = options_.read_only_;
  auto writing_segment =
      Segment::CreateAndOpen(path_, *new_schema, id, min_doc_id, id_map_,
                             delete_store_, version_manager_, seg_options);
  if (!writing_segment) {
    return writing_segment.error();
  }
  writing_segment_ = writing_segment.value();

  // update new version
  new_version.set_schema(*new_schema);
  new_version.reset_writing_segment_meta(writing_segment_->meta());

  auto new_segment_metas = segment_manager_->get_segments_meta();
  for (auto meta : new_segment_metas) {
    s = new_version.update_persisted_segment_meta(meta);
    CHECK_RETURN_STATUS(s);
  }

  s = version_manager_->apply(new_version);
  CHECK_RETURN_STATUS(s);

  // persist manifest
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  schema_ = new_schema;

  return Status::OK();
}

Result<WriteResults> CollectionImpl::Insert(std::vector<Doc> &docs) {
  return write_impl(docs, WriteMode::INSERT);
}

Result<WriteResults> CollectionImpl::Update(std::vector<Doc> &docs) {
  return write_impl(docs, WriteMode::UPDATE);
}

Result<WriteResults> CollectionImpl::Upsert(std::vector<Doc> &docs) {
  return write_impl(docs, WriteMode::UPSERT);
}

Status CollectionImpl::internal_fetch_by_doc(const Doc &doc,
                                             Doc::Ptr *doc_out) {
  auto segments = get_all_segments();
  uint64_t doc_id;
  bool has = id_map_->has(doc.pk(), &doc_id);
  if (!has) {
    return Status::NotFound("Document not found");
  }
  if (delete_store_->is_deleted(doc_id)) {
    return Status::NotFound("Document already deleted");
  }

  auto segment = local_segment_by_doc_id(doc_id, segments);
  if (!segment) {
    LOG_WARN("doc_id: %zu segment not found", (size_t)doc_id);
    return Status::InternalError("Segment not found");
  }

  auto old_doc = segment->Fetch(doc_id);
  if (!old_doc) {
    LOG_WARN("doc_id: %zu fetch doc failed", (size_t)doc_id);
    return Status::InternalError("Fetch doc failed");
  }
  *doc_out = old_doc;
  return Status::OK();
}

Status CollectionImpl::handle_upsert(Doc &doc) {
  return writing_segment_->Upsert(doc);
}

Status CollectionImpl::handle_update(Doc &doc) {
  Doc::Ptr old_doc{nullptr};
  auto s = internal_fetch_by_doc(doc, &old_doc);
  CHECK_RETURN_STATUS(s);

  old_doc->merge(doc);
  return writing_segment_->Update(*old_doc);
}

Status CollectionImpl::handle_insert(Doc &doc) {
  return writing_segment_->Insert(doc);
}

Result<WriteResults> CollectionImpl::write_impl(std::vector<Doc> &docs,
                                                WriteMode mode) {
  CHECK_READONLY_RETURN_STATUS_EXPECTED();

  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  for (auto &&doc : docs) {
    auto validate = doc.validate(schema_, mode == WriteMode::UPDATE);
    CHECK_RETURN_STATUS_EXPECTED(validate);
  }

  // TODO: The granularity of the write_lock is too coarse.
  std::lock_guard write_lock(write_mtx_);

  WriteResults results;
  // validate write batch size
  if (docs.size() > kMaxWriteBatchSize) {
    CHECK_RETURN_STATUS_EXPECTED(Status::InvalidArgument("Too many docs"));
  }

  // validate docs
  for (auto &&doc : docs) {
    if (need_switch_to_new_segment()) {
      auto s = switch_to_new_segment_for_writing();
      CHECK_RETURN_STATUS_EXPECTED(s);
    }

    Status s;

    switch (mode) {
      case WriteMode::UPSERT:
        s = handle_upsert(doc);
        break;
      case WriteMode::UPDATE:
        s = handle_update(doc);
        break;
      case WriteMode::INSERT:
        s = handle_insert(doc);
        break;
      default:
        s = Status::InvalidArgument("Invalid write mode");
    }

    results.push_back(s);
  }

  return results;
}

bool CollectionImpl::need_switch_to_new_segment() const {
  return writing_segment_->doc_count() >= schema_->max_doc_count_per_segment();
}

Status CollectionImpl::switch_to_new_segment_for_writing(
    const CollectionSchema::Ptr &schema) {
  auto s = writing_segment_->dump();
  CHECK_RETURN_STATUS(s);

  s = segment_manager_->add_segment(writing_segment_);
  CHECK_RETURN_STATUS(s);

  // when create new segment, segment meta should create a first new block
  // meta
  auto new_segment = Segment::CreateAndOpen(
      path_, schema == nullptr ? *schema_ : *schema, allocate_segment_id(),
      writing_segment_->meta()->max_doc_id() + 1, id_map_, delete_store_,
      version_manager_,
      SegmentOptions{false, options_.enable_mmap_, options_.max_buffer_size_});
  if (!new_segment) {
    return new_segment.error();
  }

  Version version = version_manager_->get_current_version();
  auto writing_segment_meta = writing_segment_->meta();
  writing_segment_meta->remove_writing_forward_block();
  s = version.add_persisted_segment_meta(writing_segment_meta);
  CHECK_RETURN_STATUS(s);

  writing_segment_ = new_segment.value();
  version.reset_writing_segment_meta(writing_segment_->meta());
  version.set_next_segment_id(segment_id_allocator_.load());

  s = version_manager_->apply(version);
  CHECK_RETURN_STATUS(s);
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  return Status::OK();
}

Result<WriteResults> CollectionImpl::Delete(
    const std::vector<std::string> &pks) {
  CHECK_READONLY_RETURN_STATUS_EXPECTED();

  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  // TODO: The granularity of the write_lock is too coarse.
  std::lock_guard write_lock(write_mtx_);
  WriteResults results;
  for (auto &&pk : pks) {
    Status s = writing_segment_->Delete(pk);
    results.push_back(s);
  }

  return results;
}

Status CollectionImpl::DeleteByFilter(const std::string &filter) {
  CHECK_COLLECTION_READONLY_RETURN_STATUS;

  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS(destroyed_, false);

  auto segments = get_all_segments();

  VectorQuery query;
  query.filter_ = filter;
  query.topk_ = INT32_MAX;
  query.output_fields_ = std::vector<std::string>{};
  query.include_doc_id_ = true;

  auto ret = sql_engine_->execute(schema_, query, get_all_segments());
  if (!ret.has_value()) {
    return ret.error();
  }

  // TODO: The granularity of the write_lock is too coarse.
  std::lock_guard write_lock(write_mtx_);
  for (auto &doc : ret.value()) {
    Status s = writing_segment_->Delete(doc->doc_id());
    if (!s.ok()) {
      LOG_ERROR("Delete doc_id failed");
      return s;
    }
  }

  return Status::OK();
}

Result<DocPtrList> CollectionImpl::Query(const VectorQuery &query) const {
  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  auto s = query.validate(schema_->get_vector_field(query.field_name_));
  CHECK_RETURN_STATUS_EXPECTED(s);

  auto segments = get_all_segments();
  if (segments.empty()) {
    return DocPtrList();
  }

  return sql_engine_->execute(schema_, query, segments);
}

Result<GroupResults> CollectionImpl::GroupByQuery(
    const GroupByVectorQuery &query) const {
  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  auto segments = get_all_segments();
  if (segments.empty()) {
    return GroupResults();
  }

  return sql_engine_->execute_group_by(schema_, query, segments);
}

Result<DocPtrMap> CollectionImpl::Fetch(
    const std::vector<std::string> &pks) const {
  std::shared_lock lock(schema_handle_mtx_);

  CHECK_DESTROY_RETURN_STATUS_EXPECTED(destroyed_, false);

  auto segments = get_all_segments();

  DocPtrMap results;

  for (auto &pk : pks) {
    uint64_t doc_id;
    bool has = id_map_->has(pk, &doc_id);
    if (!has) {
      results.insert({pk, nullptr});
      continue;
    }
    if (delete_store_->is_deleted(doc_id)) {
      results.insert({pk, nullptr});
      continue;
    }
    auto segment = local_segment_by_doc_id(doc_id, segments);
    if (!segment) {
      LOG_WARN("doc_id: %zu segment not found", (size_t)doc_id);
      results.insert({pk, nullptr});
      continue;
    }
    results.insert({pk, segment->Fetch(doc_id)});
  }

  return results;
}

Status CollectionImpl::recovery() {
  if (!FileHelper::DirectoryExists(path_.c_str())) {
    return Status::InvalidArgument("collection path{", path_, "} not exist.");
  }

  // get lock file
  auto s = acquire_file_lock(false);
  CHECK_RETURN_STATUS(s);

  // recovery version first
  auto version_manager = VersionManager::Recovery(path_);
  if (!version_manager.has_value()) {
    return version_manager.error();
  }

  version_manager_ = version_manager.value();
  const auto v = version_manager_->get_current_version();
  schema_ = std::make_shared<CollectionSchema>(v.schema());
  options_.enable_mmap_ = v.enable_mmap();
  s = recover_idmap_and_delete_store();
  CHECK_RETURN_STATUS(s);

  // recover persist segments
  segment_manager_ = std::make_shared<SegmentManager>();

  auto segment_metas = v.persisted_segment_metas();

  SegmentOptions seg_options;
  seg_options.read_only_ = true;
  seg_options.enable_mmap_ = options_.enable_mmap_;
  for (size_t i = 0; i < segment_metas.size(); ++i) {
    auto segment = Segment::Open(path_, *schema_, *segment_metas[i], id_map_,
                                 delete_store_, version_manager_, seg_options);
    if (!segment) {
      return segment.error();
    }

    segment_manager_->add_segment(segment.value());
  }

  seg_options.read_only_ = options_.read_only_;
  seg_options.max_buffer_size_ = options_.max_buffer_size_;

  // recover writing segment
  auto writing_segment =
      Segment::Open(path_, *schema_, *v.writing_segment_meta(), id_map_,
                    delete_store_, version_manager_, seg_options);
  if (!writing_segment) {
    return writing_segment.error();
  }

  writing_segment_ = writing_segment.value();
  segment_id_allocator_.store(v.next_segment_id());

  // recover id map & delete store
  return Status::OK();
}

Status CollectionImpl::recover_idmap_and_delete_store() {
  const auto v = version_manager_->get_current_version();

  // idmap
  std::string idmap_path =
      FileHelper::MakeFilePath(path_, FileID::ID_FILE, v.id_map_path_suffix());
  id_map_ = IDMap::CreateAndOpen(schema_->name(), idmap_path, false,
                                 options_.read_only_);
  if (!id_map_) {
    return Status::InternalError("recovery idmap failed");
  }

  // delete store
  std::string delete_store_path = FileHelper::MakeFilePath(
      path_, FileID::DELETE_FILE, v.delete_snapshot_path_suffix());
  delete_store_ =
      DeleteStore::CreateAndLoad(schema_->name(), delete_store_path);
  if (!delete_store_) {
    return Status::InternalError("recovery delete store failed");
  }

  return Status::OK();
}

Status CollectionImpl::create() {
  // check path
  if (path_.empty()) {
    return Status::InvalidArgument("path validate failed: path is empty");
  }
  if (!std::regex_match(path_, COLLECTION_PATH_REGEX)) {
    return Status::InvalidArgument("path validate failed: path[", path_,
                                   "] cannot pass the regex verification");
  }
  if (ailego::FileHelper::IsExist(path_.c_str())) {
    return Status::InvalidArgument("path validate failed: path[", path_,
                                   "] exists");
  }

  // check schema
  auto s = schema_->validate();
  CHECK_RETURN_STATUS(s);

  if (!ailego::FileHelper::MakePath(path_.c_str())) {
    return Status::InvalidArgument("create collection path failed: ", path_,
                                   ", error: ", strerror(errno));
  }

  // init lock file
  s = acquire_file_lock(true);
  CHECK_RETURN_STATUS(s);

  // init idmap & delete store
  s = create_idmap_and_delete_store();
  CHECK_RETURN_STATUS(s);

  // init version manager
  s = init_version_manager();
  CHECK_RETURN_STATUS(s);

  // create segment
  s = init_writing_segment();
  CHECK_RETURN_STATUS(s);

  // init version
  Version version;
  version.set_schema(*schema_);
  version.set_enable_mmap(options_.enable_mmap_);
  version.reset_writing_segment_meta(writing_segment_->meta());
  version.set_id_map_path_suffix(0);
  version.set_delete_snapshot_path_suffix(0);
  version.set_next_segment_id(1);

  version_manager_->apply(version);
  s = version_manager_->flush();
  CHECK_RETURN_STATUS(s);

  segment_id_allocator_.store(1);
  segment_manager_ = std::make_unique<SegmentManager>();

  return Status::OK();
}

Status CollectionImpl::create_idmap_and_delete_store() {
  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(path_, FileID::ID_FILE, 0);
  id_map_ = IDMap::CreateAndOpen(schema_->name(), idmap_path, true,
                                 options_.read_only_);
  if (!id_map_) {
    return Status::InternalError("create id map failed");
  }

  std::string delete_store_path =
      FileHelper::MakeFilePath(path_, FileID::DELETE_FILE, 0);
  delete_store_ = std::make_shared<DeleteStore>(schema_->name());
  // when first create collection, delete store will flush a empty snapshot
  delete_store_->flush(delete_store_path);

  return Status::OK();
}

Status CollectionImpl::init_version_manager() {
  // use empty version to init version manager
  auto version_manager = VersionManager::Create(path_, Version{});
  if (!version_manager.has_value()) {
    return version_manager.error();
  }

  version_manager_ = version_manager.value();
  return Status::OK();
}

Status CollectionImpl::init_writing_segment() {
  SegmentOptions options;
  options.enable_mmap_ = options_.enable_mmap_;
  options.max_buffer_size_ = options_.max_buffer_size_;
  options.read_only_ = options_.read_only_;

  auto writing_segment = Segment::CreateAndOpen(
      path_, *schema_, 0, 0, id_map_, delete_store_, version_manager_, options);

  if (!writing_segment) {
    return writing_segment.error();
  }

  writing_segment_ = writing_segment.value();

  return Status::OK();
}

Status CollectionImpl::acquire_file_lock(bool create) {
  std::string lock_file_path = ailego::StringHelper::Concat(path_, "/", "LOCK");

  if (create) {
    if (!lock_file_.create(lock_file_path.c_str(), 0)) {
      return Status::InternalError("Can't create lock file");
    }
  } else {
    if (!lock_file_.open(lock_file_path.c_str(), false)) {
      return Status::InternalError("Can't open lock file");
    }
  }

  if (options_.read_only_) {
    if (!ailego::FileLock::TryLockShared(lock_file_.native_handle())) {
      return Status::InternalError("Can't lock read-only collection");
    }
  } else {
    if (!ailego::FileLock::TryLock(lock_file_.native_handle())) {
      return Status::InternalError("Can't lock read-write collection");
    }
  }

  return Status::OK();
}

Segment::Ptr CollectionImpl::local_segment_by_doc_id(
    uint64_t doc_id, const std::vector<Segment::Ptr> &segments) const {
  size_t left = 0;
  size_t right = segments.size();

  while (left < right) {
    size_t mid = left + (right - left) / 2;
    uint64_t min_id = segments[mid]->meta()->min_doc_id();
    uint64_t max_id = segments[mid]->meta()->max_doc_id();

    if (doc_id < min_id) {
      right = mid;
    } else if (doc_id > max_id) {
      left = mid + 1;
    } else {
      return segments[mid];
    }
  }

  return nullptr;
}

std::vector<Segment::Ptr> CollectionImpl::get_all_segments() const {
  std::vector<Segment::Ptr> segments = get_all_persist_segments();
  if (writing_segment_->doc_count() > 0) {
    segments.push_back(writing_segment_);
  }
  return segments;
}

std::vector<Segment::Ptr> CollectionImpl::get_all_persist_segments() const {
  return segment_manager_->get_segments();
}

}  // namespace zvec


================================================
FILE: src/db/common/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

# proxima_zvec_store library
cc_library(
    NAME zvec_common STATIC STRICT ALWAYS_LINK
    SRCS *.cc
    LIBS    glog
            zvec_ailego
            roaring
            rocksdb
    INCS .
    VERSION "${PROXIMA_ZVEC_VERSION}"
)


================================================
FILE: src/db/common/cgroup_util.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/common/cgroup_util.h"

namespace zvec {

// Static member definitions
int CgroupUtil::cpu_cores_ = 0;
uint64_t CgroupUtil::memory_limit_ = 0;
bool CgroupUtil::initialized_ = false;
unsigned long long CgroupUtil::last_idle_time_ = 0;
unsigned long long CgroupUtil::last_total_time_ = 0;
std::chrono::steady_clock::time_point CgroupUtil::last_cpu_check_;

#define ZVEC_CGROUP_MEMORY_UNLIMITED (9223372036854771712ULL)

// Static initialization method
void CgroupUtil::initialize() {
  if (initialized_) {
    return;
  }

  updateCpuCores();
  updateMemoryLimit();
  initializeCpuStats();

  initialized_ = true;
}

int CgroupUtil::getCpuLimit() {
  initialize();
  return cpu_cores_;
}

uint64_t CgroupUtil::getMemoryLimit() {
  initialize();
  return memory_limit_;
}

// Other static methods implementation
double CgroupUtil::getCpuUsage() {
  initialize();
  return calculateCpuUsage();
}

uint64_t CgroupUtil::getMemoryUsage() {
  initialize();
  return getCurrentMemoryUsage();
}

uint64_t CgroupUtil::getUptime() {
#if defined(PLATFORM_LINUX)
  struct sysinfo info;
  if (sysinfo(&info) == 0) {
    return info.uptime;
  }
#elif defined(PLATFORM_MACOS)
  struct timeval boottime;
  size_t len = sizeof(boottime);
  int mib[2] = {CTL_KERN, KERN_BOOTTIME};
  if (sysctl(mib, 2, &boottime, &len, NULL, 0) == 0) {
    time_t bsec = boottime.tv_sec;
    time_t csec = time(NULL);
    return csec - bsec;
  }
#endif
  return 0;
}

void CgroupUtil::updateCpuCores() {
  if (readCpuCgroup()) {
    return;
  }

#if defined(PLATFORM_MACOS)
  int cores;
  size_t len = sizeof(cores);
  if (sysctlbyname("hw.ncpu", &cores, &len, nullptr, 0) == 0) {
    cpu_cores_ = cores;
  } else {
    cpu_cores_ = 1;
  }
#elif defined(PLATFORM_LINUX)
  cpu_cores_ = sysconf(_SC_NPROCESSORS_ONLN);
  if (cpu_cores_ <= 0) {
    cpu_cores_ = 1;
  }
#endif
}

bool CgroupUtil::readCpuCgroup() {
#if defined(PLATFORM_LINUX)
  // cgroup v2
  std::ifstream file("/sys/fs/cgroup/cpu.max");
  if (file.is_open()) {
    uint64_t quota, period;
    char slash;
    file >> quota >> slash >> period;
    file.close();

    if (quota != std::numeric_limits<uint64_t>::max() && quota != 0 &&
        period > 0) {
      cpu_cores_ =
          static_cast<int>(std::ceil(static_cast<double>(quota) / period));
      return true;
    } else {
      return false;
    }
  }

  // cgroup v1
  std::ifstream quota_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us");
  std::ifstream period_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us");

  if (quota_file.is_open() && period_file.is_open()) {
    long long quota, period;
    quota_file >> quota;
    period_file >> period;
    quota_file.close();
    period_file.close();

    if (quota > 0 && period > 0) {
      cpu_cores_ =
          static_cast<int>(std::ceil(static_cast<double>(quota) / period));
      return true;
    }
  }
#endif
  return false;
}

void CgroupUtil::updateMemoryLimit() {
  if (readMemoryCgroup()) {
    return;
  }

#if defined(PLATFORM_MACOS)
  uint64_t mem;
  size_t len = sizeof(mem);
  if (sysctlbyname("hw.memsize", &mem, &len, nullptr, 0) == 0) {
    memory_limit_ = mem;
  } else {
    memory_limit_ = 0;
  }
#elif defined(PLATFORM_LINUX)
  long pages = sysconf(_SC_PHYS_PAGES);
  long page_size = sysconf(_SC_PAGE_SIZE);
  if (pages > 0 && page_size > 0) {
    memory_limit_ = static_cast<uint64_t>(pages) * page_size;
  } else {
    memory_limit_ = 0;
  }
#endif
}

bool CgroupUtil::readMemoryCgroup() {
#if defined(PLATFORM_LINUX)
  // cgroup v2
  std::ifstream file("/sys/fs/cgroup/memory.max");
  if (file.is_open()) {
    uint64_t limit;
    file >> limit;
    file.close();

    if (limit != std::numeric_limits<uint64_t>::max() && limit != 0 &&
        limit != ZVEC_CGROUP_MEMORY_UNLIMITED) {
      memory_limit_ = limit;
      return true;
    } else {
      return false;
    }
  }

  // cgroup v1
  std::ifstream v1_file("/sys/fs/cgroup/memory/memory.limit_in_bytes");
  if (v1_file.is_open()) {
    uint64_t limit;
    v1_file >> limit;
    v1_file.close();

    if (limit < std::numeric_limits<uint64_t>::max() &&
        limit != ZVEC_CGROUP_MEMORY_UNLIMITED) {
      memory_limit_ = limit;
      return true;
    }
  }
#endif
  return false;
}

void CgroupUtil::initializeCpuStats() {
  last_cpu_check_ = std::chrono::steady_clock::now();
#if defined(PLATFORM_LINUX)
  readProcStat();
#endif
}

#if defined(PLATFORM_LINUX)
bool CgroupUtil::readProcStat() {
  std::ifstream file("/proc/stat");
  if (!file.is_open()) {
    return false;
  }

  std::string line;
  if (!std::getline(file, line)) {
    return false;
  }

  std::istringstream iss(line);
  std::string cpu_label;
  iss >> cpu_label;

  if (cpu_label != "cpu") {
    return false;
  }

  unsigned long long user, nice, system, idle, iowait, irq, softirq, steal;
  iss >> user >> nice >> system >> idle >> iowait >> irq >> softirq >> steal;

  unsigned long long idle_time = idle + iowait;
  unsigned long long total_time =
      user + nice + system + irq + softirq + steal + idle_time;

  last_idle_time_ = idle_time;
  last_total_time_ = total_time;

  return true;
}
#endif

uint64_t CgroupUtil::getCurrentMemoryUsage() {
#if defined(PLATFORM_LINUX)
  // cgroup
  uint64_t usage = readMemoryUsageCgroup();
  if (usage > 0) {
    return usage;
  }

  // back to /proc/meminfo
  return readMemoryUsageProc();
#elif defined(PLATFORM_MACOS)
  return getMacOSMemoryUsage();
#endif
}

#if defined(PLATFORM_LINUX)
uint64_t CgroupUtil::readMemoryUsageCgroup() {
  // cgroup v2
  std::ifstream file("/sys/fs/cgroup/memory.current");
  if (file.is_open()) {
    uint64_t usage;
    file >> usage;
    file.close();
    return usage;
  }

  // cgroup v1
  std::ifstream v1_file("/sys/fs/cgroup/memory/memory.usage_in_bytes");
  if (v1_file.is_open()) {
    uint64_t usage;
    v1_file >> usage;
    v1_file.close();
    return usage;
  }

  return 0;
}

uint64_t CgroupUtil::readMemoryUsageProc() {
  std::ifstream file("/proc/meminfo");
  if (!file.is_open()) {
    return 0;
  }

  std::string line;
  uint64_t total_mem = 0;
  uint64_t free_mem = 0;
  uint64_t available_mem = 0;
  uint64_t buffers = 0;
  uint64_t cached = 0;

  while (std::getline(file, line)) {
    if (line.find("MemTotal:") == 0) {
      total_mem = extractMemoryValue(line);
    } else if (line.find("MemFree:") == 0) {
      free_mem = extractMemoryValue(line);
    } else if (line.find("MemAvailable:") == 0) {
      available_mem = extractMemoryValue(line);
    } else if (line.find("Buffers:") == 0) {
      buffers = extractMemoryValue(line);
    } else if (line.find("Cached:") == 0) {
      cached = extractMemoryValue(line);
    }
  }

  if (available_mem > 0 && total_mem > available_mem) {
    return total_mem - available_mem;
  }

  if (total_mem > 0 && free_mem > 0) {
    return total_mem - free_mem - buffers - cached;
  }

  return 0;
}
#endif

#if defined(PLATFORM_MACOS)
uint64_t CgroupUtil::getMacOSMemoryUsage() {
  mach_port_t host_port = mach_host_self();
  mach_msg_type_number_t host_size =
      sizeof(vm_statistics64_data_t) / sizeof(integer_t);
  vm_size_t page_size;
  vm_statistics64_data_t vm_stat;

  if (host_page_size(host_port, &page_size) != KERN_SUCCESS) {
    return 0;
  }

  if (host_statistics64(host_port, HOST_VM_INFO64, (host_info64_t)&vm_stat,
                        &host_size) != KERN_SUCCESS) {
    return 0;
  }

  uint64_t used_memory =
      ((vm_stat.active_count + vm_stat.inactive_count + vm_stat.wire_count) *
       page_size);

  return used_memory;
}
#endif

uint64_t CgroupUtil::extractMemoryValue(const std::string &line) {
  size_t colon_pos = line.find(':');
  if (colon_pos == std::string::npos) {
    return 0;
  }

  std::string value_str = line.substr(colon_pos + 1);
  std::istringstream iss(value_str);
  uint64_t value;
  std::string unit;

  iss >> value;
  if (iss >> unit) {
    if (unit == "kB") {
      value *= 1024;
    }
  }

  return value;
}

double CgroupUtil::calculateCpuUsage() {
#if defined(PLATFORM_LINUX)
  return calculateLinuxCpuUsage();
#elif defined(PLATFORM_MACOS)
  return calculateMacOSCpuUsage();
#endif
  return 0.0;
}

#if defined(PLATFORM_LINUX)
double CgroupUtil::calculateLinuxCpuUsage() {
  if (!readProcStat()) {
    return 0.0;
  }

  std::this_thread::sleep_for(std::chrono::milliseconds(100));

  std::ifstream file("/proc/stat");
  if (!file.is_open()) {
    return 0.0;
  }

  std::string line;
  if (!std::getline(file, line)) {
    return 0.0;
  }

  std::istringstream iss(line);
  std::string cpu_label;
  iss >> cpu_label;

  if (cpu_label != "cpu") {
    return 0.0;
  }

  unsigned long long user, nice, system, idle, iowait, irq, softirq, steal;
  iss >> user >> nice >> system >> idle >> iowait >> irq >> softirq >> steal;

  unsigned long long current_idle = idle + iowait;
  unsigned long long current_total =
      user + nice + system + irq + softirq + steal + current_idle;

  unsigned long long idle_delta = current_idle - last_idle_time_;
  unsigned long long total_delta = current_total - last_total_time_;

  last_idle_time_ = current_idle;
  last_total_time_ = current_total;

  if (total_delta == 0) {
    return 0.0;
  }

  double cpu_usage =
      100.0 * (1.0 - static_cast<double>(idle_delta) / total_delta);
  return std::max(0.0, std::min(100.0, cpu_usage));
}
#endif

#if defined(PLATFORM_MACOS)
double CgroupUtil::calculateMacOSCpuUsage() {
  host_cpu_load_info_data_t cpuinfo;
  mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;

  if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO,
                      (host_info_t)&cpuinfo, &count) != KERN_SUCCESS) {
    return 0.0;
  }

  unsigned long long total_tick =
      cpuinfo.cpu_ticks[CPU_STATE_USER] + cpuinfo.cpu_ticks[CPU_STATE_SYSTEM] +
      cpuinfo.cpu_ticks[CPU_STATE_NICE] + cpuinfo.cpu_ticks[CPU_STATE_IDLE];

  unsigned long long idle_tick = cpuinfo.cpu_ticks[CPU_STATE_IDLE];

  static unsigned long long prev_total = 0;
  static unsigned long long prev_idle = 0;

  if (prev_total == 0) {
    prev_total = total_tick;
    prev_idle = idle_tick;
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
    return calculateMacOSCpuUsage();
  }

  unsigned long long total_delta = total_tick - prev_total;
  unsigned long long idle_delta = idle_tick - prev_idle;

  prev_total = total_tick;
  prev_idle = idle_tick;

  if (total_delta == 0) {
    return 0.0;
  }

  double cpu_usage =
      100.0 * (1.0 - static_cast<double>(idle_delta) / total_delta);
  return std::max(0.0, std::min(100.0, cpu_usage));
}
#endif

}  // namespace zvec

================================================
FILE: src/db/common/cgroup_util.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <chrono>
#include <cmath>
#include <cstdint>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>

#if defined(__APPLE__)
#define PLATFORM_MACOS 1
#include <mach/mach.h>
#include <sys/sysctl.h>
#elif defined(__linux__)
#define PLATFORM_LINUX 1
#include <sys/sysinfo.h>
#include <unistd.h>
#endif

namespace zvec {

class CgroupUtil {
 public:
  // Static methods to get CPU and memory limits
  static int getCpuLimit();
  static uint64_t getMemoryLimit();

  // Static methods to get other resources
  static double getCpuUsage();
  static uint64_t getMemoryUsage();
  static uint64_t getUptime();

 private:
  CgroupUtil() = default;
  ~CgroupUtil() = default;

  // Static member variables to store the computed values
  static int cpu_cores_;
  static uint64_t memory_limit_;
  static bool initialized_;

  // Other member variables for tracking state
  static unsigned long long last_idle_time_;
  static unsigned long long last_total_time_;
  static std::chrono::steady_clock::time_point last_cpu_check_;

  // Static initialization method
  static void initialize();

  // Helper methods (also made static)
  static void updateCpuCores();
  static bool readCpuCgroup();
  static void updateMemoryLimit();
  static bool readMemoryCgroup();
  static void initializeCpuStats();

#if defined(PLATFORM_LINUX)
  static bool readProcStat();
#endif

  static uint64_t getCurrentMemoryUsage();

#if defined(PLATFORM_LINUX)
  static uint64_t readMemoryUsageCgroup();
  static uint64_t readMemoryUsageProc();
#endif

#if defined(PLATFORM_MACOS)
  static uint64_t getMacOSMemoryUsage();
#endif

  static uint64_t extractMemoryValue(const std::string &line);
  static double calculateCpuUsage();

#if defined(PLATFORM_LINUX)
  static double calculateLinuxCpuUsage();
#endif

#if defined(PLATFORM_MACOS)
  static double calculateMacOSCpuUsage();
#endif
};

}  // namespace zvec

================================================
FILE: src/db/common/concurrent_roaring_bitmap.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "concurrent_roaring_bitmap.h"
#include <zvec/ailego/hash/crc32c.h>


namespace zvec {


Status ConcurrentRoaringBitmap32::serialize(std::string *out) {
  std::unique_lock<std::shared_mutex> lock(mutex_);
  size_t bitmap_size = roaring_bitmap_portable_size_in_bytes(bitmap_);
  out->resize(bitmap_size);
  size_t written_size = roaring_bitmap_portable_serialize(bitmap_, out->data());
  if (written_size == bitmap_size) {
    return Status::OK();
  } else {
    LOG_ERROR("Failed to serialize bitmap");
    return Status::InternalError();
  }
}


Status ConcurrentRoaringBitmap32::deserialize(const std::string &in) {
  std::unique_lock<std::shared_mutex> lock(mutex_);
  roaring_bitmap_free(bitmap_);
  bitmap_ = nullptr;
  bitmap_ = roaring_bitmap_portable_deserialize_safe(in.data(), in.size());
  if (bitmap_) {
    return Status::OK();
  } else {
    LOG_ERROR("Failed to deserialize bitmap");
    return Status::InternalError();
  }
}


Status ConcurrentRoaringBitmap64::serialize(const std::string &file_path,
                                            bool overwrite) {
  std::unique_lock<std::shared_mutex> lock(mutex_);
  FILE file;
  const std::string file_str = "[" + file_path + "]";

  if (FILE::IsExist(file_path)) {
    if (!FILE::IsRegular(file_path)) {
      auto msg = debug_str(file_str, " is not a regular file");
      LOG_ERROR("%s", msg.c_str());
      return Status::InvalidArgument(msg);
    }
    if (!overwrite) {
      auto msg = debug_str(file_str, " already exists");
      LOG_ERROR("%s", msg.c_str());
      return Status::AlreadyExists(msg);
    }
    if (!FILE::RemovePath(file_path)) {
      auto msg = debug_str("failed to remove ", file_str);
      LOG_ERROR("%s", msg.c_str());
      return Status::InternalError(msg);
    }
  }
  if (!file.create(file_path.c_str(), 0)) {
    auto msg = debug_str("failed to create ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }

  // Serialize bitmap to buffer
  BitmapMetaHeader header;
  size_t bitmap_size;
  std::vector<char> bitmap_buffer;
  if (is_32bit_) {
    bitmap_size = bitmap32_->getSizeInBytes();
    bitmap_buffer.resize(bitmap_size);
    if (bitmap32_->write(bitmap_buffer.data()) != bitmap_size) {
      auto msg = debug_str("failed to serialize bitmap to buffer");
      LOG_ERROR("%s", msg.c_str());
      return Status::InternalError(msg);
    }
    header.is_32bit = 1;
  } else {
    bitmap_size = bitmap64_->getSizeInBytes();
    bitmap_buffer.resize(bitmap_size);
    if (bitmap64_->write(bitmap_buffer.data()) != bitmap_size) {
      auto msg = debug_str("failed to serialize bitmap to buffer");
      LOG_ERROR("%s", msg.c_str());
      return Status::InternalError(msg);
    }
    header.is_32bit = 0;
  }
  header.magic = roaring_magic_number;
  header.checksum = ailego::Crc32c::Hash(bitmap_buffer.data(), bitmap_size);
  header.timestamp = time(nullptr);

  // Write meta header to file
  if (file.write(&header, sizeof(header)) != sizeof(header)) {
    auto msg = debug_str("failed to serialize header to ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }

  // Write serialized bitmap to file
  if (file.write(bitmap_buffer.data(), bitmap_size) != bitmap_size) {
    auto msg = debug_str("failed to write bitmap data to ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  };

  LOG_DEBUG("%s: serialized bitmap to file[%s], checksum[%u], timestamp[%zu]",
            identifier_.c_str(), file_path.c_str(), header.checksum,
            (size_t)header.timestamp);
  return Status::OK();
}


Status ConcurrentRoaringBitmap64::deserialize(const std::string &file_path) {
  std::unique_lock<std::shared_mutex> lock(mutex_);
  FILE file;
  const std::string file_str = "[" + file_path + "]";

  if (!FILE::IsExist(file_path)) {
    auto msg = debug_str(file_str, " does not exist");
    LOG_ERROR("%s", msg.c_str());
    return Status::NotFound(msg);
  }
  if (!FILE::IsRegular(file_path)) {
    auto msg = debug_str(file_str, " is not a regular file");
    LOG_ERROR("%s", msg.c_str());
    return Status::InvalidArgument(msg);
  }
  if (!file.open(file_path.c_str(), true, false)) {
    auto msg = debug_str("failed to open ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }

  // Deserialize and verify the meta header
  BitmapMetaHeader header;
  if (file.size() < sizeof(BitmapMetaHeader)) {
    auto msg =
        debug_str(file_str, " is too small to to contain a valid bitmap");
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }
  if (file.read(&header, sizeof(header)) != sizeof(header)) {
    auto msg = debug_str("failed to read meta header from ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }
  if (header.magic != roaring_magic_number) {
    auto msg = debug_str("magic number mismatch, ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }
  if (header.is_32bit != 0 && header.is_32bit != 1) {
    auto msg = debug_str("bitmap type mismatch, ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }
  is_32bit_ = header.is_32bit == 1 ? true : false;

  // Read from file to buffer
  size_t bitmap_size = file.size() - sizeof(BitmapMetaHeader);
  std::vector<char> bitmap_buffer(bitmap_size);
  if (file.read(bitmap_buffer.data(), bitmap_size) != bitmap_size) {
    auto msg = debug_str("failed to read bitmap data from ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }
  if (header.checksum !=
      ailego::Crc32c::Hash(bitmap_buffer.data(), bitmap_size)) {
    auto msg = debug_str("checksum mismatch, ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }

  // Deserialize from buffer
  try {
    if (is_32bit_) {
      bitmap32_ = std::make_unique<roaring::Roaring>(
          roaring::Roaring::readSafe(bitmap_buffer.data(), bitmap_size));
    } else {
      bitmap64_ = std::make_unique<roaring::Roaring64Map>(
          roaring::Roaring64Map::readSafe(bitmap_buffer.data(), bitmap_size));
    }
  } catch (...) {
    auto msg = debug_str("failed to deserialize bitmap from ", file_str);
    LOG_ERROR("%s", msg.c_str());
    return Status::InternalError(msg);
  }

  LOG_DEBUG(
      "%s: deserialized bitmap from file[%s], checksum[%u], timestamp[%zu]",
      identifier_.c_str(), file_path.c_str(), header.checksum,
      (size_t)header.timestamp);
  return Status::OK();
}


}  // namespace zvec


================================================
FILE: src/db/common/concurrent_roaring_bitmap.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <mutex>
#include <shared_mutex>
#include <sstream>
#include <roaring.hh>
#include <roaring64map.hh>
#include <roaring/roaring.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/status.h>


namespace zvec {


/*
 * A thread-safe 32-bit Roaring bitmap implementation.
 */
class ConcurrentRoaringBitmap32 {
 public:
  using Ptr = std::shared_ptr<ConcurrentRoaringBitmap32>;

  explicit ConcurrentRoaringBitmap32()
      : identifier_("Roaring bitmap[32-bit]"),
        bitmap_(roaring_bitmap_create()) {}

  ~ConcurrentRoaringBitmap32() {
    roaring_bitmap_free(bitmap_);
  }

  ConcurrentRoaringBitmap32(const ConcurrentRoaringBitmap32 &) = delete;
  ConcurrentRoaringBitmap32 &operator=(const ConcurrentRoaringBitmap32 &) =
      delete;
  ConcurrentRoaringBitmap32 &operator=(ConcurrentRoaringBitmap32 &&) = delete;


  /*****  Serialization and Deserialization - Start  *****/
 public:
  Status serialize(std::string *out);

  Status deserialize(const std::string &in);
  /*****  Serialization and Deserialization - End  *****/


 public:
  bool contains(uint32_t pos) const {
    std::shared_lock<std::shared_mutex> lock(mutex_);
    return roaring_bitmap_contains(bitmap_, pos);
  }


  size_t cardinality() const {
    std::shared_lock<std::shared_mutex> lock(mutex_);
    return roaring_bitmap_get_cardinality(bitmap_);
  }


  size_t range_cardinality(uint32_t min_doc_id, uint32_t max_doc_id) const {
    if (ailego_unlikely(min_doc_id > max_doc_id)) {
      LOG_WARN("%s: input range min_doc_id[%u] > max_doc_id[%u]",
               identifier_.c_str(), min_doc_id, max_doc_id);
      return 0;
    }
    std::shared_lock<std::shared_mutex> lock(mutex_);
    uint64_t max_rank{0}, min_rank{0};
    max_rank = roaring_bitmap_rank(bitmap_, max_doc_id);
    min_rank = roaring_bitmap_rank(bitmap_, min_doc_id - 1);
    return max_rank - min_rank;
  }


  void add(uint32_t pos) {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    roaring_bitmap_add(bitmap_, pos);
  }


  void clear() {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    roaring_bitmap_clear(bitmap_);
  }


  //! Remove all values in the closed interval [min, max]
  void remove_range_closed(uint32_t min, uint32_t max) {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    roaring_bitmap_remove_range_closed(bitmap_, min, max);
  }


  size_t storage_size_in_bytes() const {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    return roaring_bitmap_portable_size_in_bytes(bitmap_);
  }


  roaring_bitmap_t *bitmap() const {
    return bitmap_;
  }


  roaring_bitmap_t *copy() const {
    std::shared_lock<std::shared_mutex> lock(mutex_);
    return roaring_bitmap_copy(bitmap_);
  }


 private:
  std::string identifier_;
  roaring_bitmap_t *bitmap_{nullptr};
  mutable std::shared_mutex mutex_;
};


/*
 * A thread-safe Roaring bitmap implementation supporting both 32-bit and 64-bit
 * bitmaps with transparent conversion between them.
 */
class ConcurrentRoaringBitmap64 {
 public:
  using Ptr = std::shared_ptr<ConcurrentRoaringBitmap64>;


  explicit ConcurrentRoaringBitmap64()
      : is_32bit_(true),
        identifier_("Roaring bitmap[32-bit]"),
        bitmap32_(std::make_unique<roaring::Roaring>()) {}

  explicit ConcurrentRoaringBitmap64(const std::string &name)
      : name_(name),
        is_32bit_(true),
        identifier_("Roaring bitmap[" + name_ + ", 32-bit]"),
        bitmap32_(std::make_unique<roaring::Roaring>()) {}

  ~ConcurrentRoaringBitmap64() = default;

  ConcurrentRoaringBitmap64 &operator=(const ConcurrentRoaringBitmap64 &other) {
    if (this != &other) {
      std::unique_lock<std::shared_mutex> lock(mutex_, std::defer_lock);
      std::shared_lock<std::shared_mutex> other_lock(other.mutex_,
                                                     std::defer_lock);
      std::lock(lock, other_lock);

      name_ = other.name_;
      is_32bit_ = other.is_32bit_;
      identifier_ = other.identifier_;

      if (other.is_32bit_) {
        bitmap32_ = std::make_unique<roaring::Roaring>(*other.bitmap32_);
        bitmap64_.reset();
      } else {
        bitmap64_ = std::make_unique<roaring::Roaring64Map>(*other.bitmap64_);
        bitmap32_.reset();
      }
    }
    return *this;
  }

  /*****  Serialization and Deserialization - Start  *****/
 public:
  Status serialize(const std::string &file_path, bool overwrite);

  Status deserialize(const std::string &file_path);

 private:
  static const uint64_t roaring_magic_number{0x362DDA444AC1B99A};

  struct BitmapMetaHeader {
    uint64_t magic;
    uint32_t is_32bit;
    uint32_t checksum;
    uint64_t timestamp;
    uint32_t reserved_[10];
  };
  /*****  Serialization and Deserialization - End  *****/


 public:
  bool contains(size_t pos) const {
    std::shared_lock<std::shared_mutex> lock(mutex_);
    if (is_32bit_) {
      return bitmap32_->contains(static_cast<uint32_t>(pos));
    } else {
      return bitmap64_->contains(static_cast<uint64_t>(pos));
    }
  }


  size_t cardinality() const {
    std::shared_lock<std::shared_mutex> lock(mutex_);
    if (is_32bit_) {
      return bitmap32_->cardinality();
    } else {
      return bitmap64_->cardinality();
    }
  }


  size_t range_cardinality(uint64_t min_doc_id, uint64_t max_doc_id) const {
    if (ailego_unlikely(min_doc_id > max_doc_id)) {
      LOG_WARN("%s: input range min_doc_id[%zu] > max_doc_id[%zu]",
               identifier_.c_str(), static_cast<size_t>(min_doc_id),
               static_cast<size_t>(max_doc_id));
      return 0;
    }
    std::shared_lock<std::shared_mutex> lock(mutex_);
    uint64_t max_rank{0}, min_rank{0};
    if (is_32bit_) {
      max_rank = bitmap32_->rank(max_doc_id);
      min_rank = min_doc_id <= 0 ? 0 : bitmap32_->rank(min_doc_id - 1);
    } else {
      max_rank = bitmap64_->rank(max_doc_id);
      min_rank = min_doc_id <= 0 ? 0 : bitmap64_->rank(min_doc_id - 1);
    }
    return max_rank - min_rank;
  }


  void add(size_t pos) {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    if (ailego_unlikely(pos > std::numeric_limits<uint32_t>::max() &&
                        is_32bit_)) {
      upgrade_from_32_to_64();
    }
    if (is_32bit_) {
      return bitmap32_->add(static_cast<uint32_t>(pos));
    } else {
      return bitmap64_->add(static_cast<uint64_t>(pos));
    }
  }


  void clear() {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    bitmap32_.reset();
    bitmap64_.reset();
    if (is_32bit_) {
      bitmap32_ = std::make_unique<roaring::Roaring>();
    } else {
      bitmap64_ = std::make_unique<roaring::Roaring64Map>();
    }
  }


  //! Remove all values in the closed interval [min, max]
  void remove_range_closed(uint64_t min, uint64_t max) {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    if (!is_32bit_) {
      return bitmap64_->removeRangeClosed(min, max);
    }
    if (min > std::numeric_limits<uint32_t>::max()) {
      return;  // No valid values in the 32-bit range that can be removed
    }
    if (max > std::numeric_limits<uint32_t>::max()) {
      max = std::numeric_limits<uint32_t>::max();
    }
    bitmap32_->removeRangeClosed(min, max);
  }


  size_t storage_size_in_bytes() const {
    std::unique_lock<std::shared_mutex> lock(mutex_);
    if (is_32bit_) {
      return bitmap32_->getSizeInBytes() + sizeof(BitmapMetaHeader);
    } else {
      return bitmap64_->getSizeInBytes() + sizeof(BitmapMetaHeader);
    }
  }


 private:
  using FILE = ailego::File;


  template <typename... Args>
  std::string debug_str(Args &&...args) {
    std::ostringstream oss;
    oss << identifier_ << ": ";
    (oss << ... << args);
    return oss.str();
  }


  void upgrade_from_32_to_64() {
    if (ailego_unlikely(!is_32bit_)) {
      LOG_WARN("%s: bitmap is already 64-bit", identifier_.c_str());
      return;
    }
    bitmap64_ = std::make_unique<roaring::Roaring64Map>(
        roaring::Roaring64Map{std::move(*bitmap32_)});
    is_32bit_ = false;
    bitmap32_.reset();
    identifier_ = "Roaring bitmap[" + name_ + ", 64-bit]";
    LOG_DEBUG("%s: upgraded to 64-bit", identifier_.c_str());
  }


  std::string name_;
  bool is_32bit_;
  std::string identifier_;
  std::unique_ptr<roaring::Roaring> bitmap32_{nullptr};
  std::unique_ptr<roaring::Roaring64Map> bitmap64_{nullptr};
  mutable std::shared_mutex mutex_;
};


}  // namespace zvec


================================================
FILE: src/db/common/config.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <memory>
#include <zvec/db/config.h>
#include <zvec/db/status.h>
#include "db/common/constants.h"
#include "db/common/global_resource.h"
#include "cgroup_util.h"
#include "global_resource.h"
#include "glogger.h"
#include "logger.h"
#include "typedef.h"

namespace zvec {

static void ExitLogHandler() {
  LogUtil::Shutdown();
}

GlobalConfig::ConfigData::ConfigData()
    : memory_limit_bytes(CgroupUtil::getMemoryLimit() *
                         DEFAULT_MEMORY_LIMIT_RATIO),
      log_config(std::make_shared<ConsoleLogConfig>()),
      query_thread_count(CgroupUtil::getCpuLimit()),
      invert_to_forward_scan_ratio(0.9),
      brute_force_by_keys_ratio(0.1),
      optimize_thread_count(CgroupUtil::getCpuLimit()) {}

Status GlobalConfig::Validate(const ConfigData &config) const {
  if (config.memory_limit_bytes < MIN_MEMORY_LIMIT_BYTES) {
    return Status::InvalidArgument("memory_limit_bytes must be greater than ",
                                   MIN_MEMORY_LIMIT_BYTES);
  }

  if (config.memory_limit_bytes > CgroupUtil::getMemoryLimit()) {
    return Status::InvalidArgument("memory_limit_bytes must be less than ",
                                   CgroupUtil::getMemoryLimit());
  }

  // Validate query thread count
  if (config.query_thread_count == 0) {
    return Status::InvalidArgument("query_thread_count must be greater than 0");
  }

  // Validate invert_to_forward_scan_ratio (should be between 0 and 1)
  if (config.invert_to_forward_scan_ratio < 0.0f ||
      config.invert_to_forward_scan_ratio > 1.0f) {
    return Status::InvalidArgument(
        "invert_to_forward_scan_ratio must be between 0 and 1");
  }

  // Validate brute_force_by_keys_ratio (should be between 0 and 1)
  if (config.brute_force_by_keys_ratio < 0.0f ||
      config.brute_force_by_keys_ratio > 1.0f) {
    return Status::InvalidArgument(
        "brute_force_by_keys_ratio must be between 0 and 1");
  }

  // Validate optimize thread count
  if (config.optimize_thread_count == 0) {
    return Status::InvalidArgument(
        "optimize_thread_count must be greater than 0");
  }

  // Validate log configuration
  if (config.log_config->GetLoggerType() == FILE_LOG_TYPE_NAME) {
    auto log_config =
        std::dynamic_pointer_cast<FileLogConfig>(config.log_config);

    // Validate file log specific configurations
    if (log_config->dir.empty()) {
      return Status::InvalidArgument(
          "log_dir cannot be empty when set to FileLogger");
    }

    if (log_config->basename.empty()) {
      return Status::InvalidArgument(
          "log_file basename cannot be empty when set to FileLogger");
    }

    if (log_config->file_size <= MIN_LOG_FILE_SIZE) {
      return Status::InvalidArgument("log file_size must be greater than ",
                                     MIN_LOG_FILE_SIZE,
                                     " when set to FileLogger");
    }

    if (log_config->overdue_days == 0) {
      return Status::InvalidArgument(
          "log_overdue_days must be greater than 0 when set to FileLogger");
    }
  }

  return Status::OK();
}

Status GlobalConfig::Initialize(const ConfigData &config) {
  // Use atomic compare-exchange to ensure only one initialization
  bool expected = false;
  if (!initialized_.compare_exchange_strong(expected, true)) {
    return Status::OK();
  }

  auto s = Validate(config);
  CHECK_RETURN_STATUS(s);

  config_ = config;

  s = LogUtil::Init(log_dir(), log_file_basename(), int(log_level()),
                    log_type(), log_file_size(), log_overdue_days());
  CHECK_RETURN_STATUS(s);

  if (std::atexit(ExitLogHandler) != 0) {
    std::cerr << "Failed to register exit handler" << std::endl;
    return Status::InternalError("Failed to register exit handler");
  }

  GlobalResource::Instance().initialize();
  return Status::OK();
}

uint64_t GlobalConfig::memory_limit_bytes() const noexcept {
  return config_.memory_limit_bytes;
}

FACTORY_REGISTER_LOGGER(AppendLogger);

}  // namespace zvec

================================================
FILE: src/db/common/constants.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <regex>
#include <string>

namespace zvec {

// global config
const float DEFAULT_MEMORY_LIMIT_RATIO = 0.8f;

const uint32_t MIN_MEMORY_LIMIT_BYTES = 100 * 1024 * 1024;

const uint64_t INVALID_DOC_ID = -1UL;

const std::string LOCAL_ROW_ID = "_zvec_row_id_";

const std::string GLOBAL_DOC_ID = "_zvec_g_doc_id_";

const std::string USER_ID = "_zvec_uid_";

const int kSparseMaxDimSize = 16384;

const int64_t kMaxRecordBatchNumRows = 4096;

constexpr uint32_t MAX_ARRAY_FIELD_LEN = 32;

const float COMPACT_DELETE_RATIO_THRESHOLD = 0.3f;

const std::regex COLLECTION_NAME_REGEX("^[a-zA-Z0-9_-]{3,64}$");

const std::regex FIELD_NAME_REGEX("^[a-zA-Z0-9_-]{1,32}$");

const std::regex DOC_PK_REGEX("^[a-zA-Z0-9_!@#$%+=.-]{1,64}$");

const std::regex COLLECTION_PATH_REGEX(
    R"(^/?(?:[a-zA-Z0-9_.\-]+/)*[a-zA-Z0-9_.\-]+$)");

constexpr uint32_t kMaxDenseDimSize = 20000;

constexpr uint32_t kMaxScalarFieldSize = 1024;

constexpr uint32_t kMaxVectorFieldSize = 5;

constexpr uint32_t kMaxQueryTopk = 1024;

constexpr uint32_t kMaxOutputFieldSize = 1024;

constexpr uint32_t kMaxWriteBatchSize = 1024;

constexpr uint32_t kMinRabitqDimSize = 64;
constexpr uint32_t kMaxRabitqDimSize = 4095;

// Inverted index
const std::string INVERT_SUFFIX_TERMS{"$TERMS"};

const std::string INVERT_SUFFIX_REVERSED_TERMS{"$SMRET"};

const std::string INVERT_SUFFIX_ARRAY_LEN{"$ARRAY_LEN"};

const std::string INVERT_SUFFIX_RANGES{"$RANGES"};

const std::string INVERT_CDF{"$CDF"};

const std::string INVERT_KEY_MAX_ID{"$ZVEC$MAX_ID"};

const std::string INVERT_KEY_NULL{"$ZVEC$NULL"};

const std::string INVERT_KEY_SEALED{"$ZVEC$SEALED"};

const uint32_t INVERT_ID_LIST_SIZE_THRESHOLD = 3;


}  // namespace zvec


================================================
FILE: src/db/common/error_code.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "error_code.h"

namespace zvec {

// 0~999  [Builtin]
PROXIMA_ZVEC_ERROR_CODE_DEFINE(Success, 0, "Success");

// 1000~1999 [Common Error]
PROXIMA_ZVEC_ERROR_CODE_DEFINE(RuntimeError, 1000, "Runtime Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(LogicError, 1001, "Logic Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(StatusError, 1002, "Status Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(LoadConfig, 1003, "Load Config Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ConfigError, 1004, "Config Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidArgument, 1005, "Invalid Argument");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(NotInitialized, 1006, "Not Initialized");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(OpenFile, 1007, "Open File Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ReadData, 1008, "Read Data Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(WriteData, 1009, "Write Data Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedLimit, 1010, "Exceed Limit");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(SerializeError, 1011, "Serialize Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DeserializeError, 1012, "Deserialize Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(StartServer, 1013, "Start Server Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(StoppedService, 1014, "Visit Stopped Service");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(FileSystem, 1015, "File System Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(RpcError, 1016, "RPC Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InitChannelError, 1017,
                               "Init brpc channel Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(AddSubChannelError, 1018,
                               "Add sub channel Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(NoNeedProcess, 1019, "No need process");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EtcdError, 1020, "Etcd Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(MessageQueueError, 1021, "Message Queue Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaSubTopicExistErr, 1022,
                               "Kafka topic subscribe already exist Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaUnSubTopicNotExistErr, 1023,
                               "Kafka topic unsubscribe not exist Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InitKafkaError, 1024, "Init kafka error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(KafkaPublishError, 1025, "Kafka publish error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedKafkaMessageSizeLimit, 1026,
                               "Exceed kafka message size limit");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(NotImplemented, 1027,
                               "The function is not implemented");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(Timeout, 1028, "Timeout");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(MasterNoLeader, 1029, "Master no leader");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(NeedRetry, 1030, "Need retry");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(Abort, 1031, "Abort");

PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyCollectionName, 2000,
                               "Empty Collection Name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyColumnName, 2001, "Empty Column Name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyPartitionName, 2002,
                               "Empty collection partition name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyColumns, 2003, "Empty Columns");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyPrimaryKey, 2004, "Empty primary key");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyDocList, 2005, "Empty doc list");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyDocFields, 2006, "Empty doc fields");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyIndexField, 2007, "Empty index field");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidRecord, 2008, "Invalid Record");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidQuery, 2009, "Invalid Query");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidWriteRequest, 2010,
                               "Invalid Write Request");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidVectorFormat, 2011,
                               "Invalid Vector Format");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidDataType, 2012, "Invalid Data Type");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidIndexType, 2013, "Invalid Index Type");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFeature, 2014, "Invalid Feature");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFilter, 2015, "Invalid Filter");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidPrimaryKey, 2016, "Invalid primary key");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidField, 2017, "Invalid field");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedIndexColumn, 2018,
                               "Mismatched Index Column");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedDimension, 2019,
                               "Mismatched Dimension");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(MismatchedDataType, 2020,
                               "Mismatched Data Type");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentCollection, 2021,
                               "Collection Not Exist");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentPartition, 2022,
                               "Inexistent collection partition");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentColumn, 2023, "Column Not Exist");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentKey, 2024, "Key Not Exist");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateCollection, 2025,
                               "Duplicate Collection");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicatePartition, 2026,
                               "Duplicate collection partition");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateKey, 2027, "Duplicate Key");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateField, 2028, "Duplicate field");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyPartition, 2029,
                               "Status of collection partition is incorrect");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyCollection, 2030,
                               "Status of collection is incorrect");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(UnsupportedCondition, 2031,
                               "Query condition has error or not supported");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(
    OrderbyNotInSelectItems, 2032,
    "Order by column must exists in select item list");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(PbToSqlInfoError, 2033, "Pb to sql info error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ExceedRateLimit, 2034, "Exceed Rate Limit");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidSparseValues, 2035,
                               "Invalid Sparse Values");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidBatchSize, 2036, "Invalid batch size");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidDimension, 2037, "Invalid dimension");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidExtraParam, 2038, "Invalid extra param");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidRadius, 2039, "Invalid radius");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidLinear, 2040, "Invalid is linear");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidTopk, 2041, "Invalid topk");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidCollectionName, 2042,
                               "Invalid collection name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidPartitionName, 2043,
                               "Invalid partition name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidFieldName, 2044, "Invalid field name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidChannelCount, 2045, "Invalid field name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidReplicaCount, 2046, "Invalid field name");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidJson, 2047, "Invalid json");
// used by master
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidClusterConfig, 2048,
                               "Invalid cluster config");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(DuplicateCluster, 2049, "Duplicate Cluster");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InexistentCluster, 2050, "Inexistent Cluster");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidClusterStatus, 2051,
                               "Invalid Cluster Status");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(RpcTimedout, 2052, "Rpc Timedout");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidGroupBy, 2053, "Invalid GroupBy Request");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(EmptyVectorField, 2054, "Empty vector field");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(VectorNotAllowed, 2055, "Vector not allowed");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidReferenceCollection, 2056,
                               "Invalid reference collection");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(InvalidOrderBy, 2057, "Invalid OrderBy field");


PROXIMA_ZVEC_ERROR_CODE_DEFINE(UnreadyQueue, 5002,
                               "Compute Queue Is Unready Yet");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(ScheduleError, 5003, "Schedule Task Error");
PROXIMA_ZVEC_ERROR_CODE_DEFINE(TaskIsRunning, 5004,
                               "Task is running in other coroutine");

const char *ErrorCode::What(int val) {
  return ErrorCode::Instance()->what(val);
}

}  // namespace zvec


================================================
FILE: src/db/common/error_code.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <map>
#include <zvec/ailego/pattern/expected.hpp>
namespace zvec {

/*! Error
 */

class ErrorCode;

class ErrorCode {
 public:
  /*! Error Code
   */
  class Code {
   public:
    //! Constructor
    Code(int val, const char *str) : value_(-val), desc_(str) {
      ErrorCode::Instance()->emplace(this);
    }

    //! Retrieve the value of code
    operator int() const {
      return (this->value_);
    }

    //! Retrieve the value of code
    int value() const {
      return (this->value_);
    }

    //! Retrieve the description of code
    const char *desc() const {
      return (this->desc_);
    }

   private:
    int value_;
    const char *desc_;
  };

  //! Retrieve the description of code
  static const char *What(int val);

 protected:
  //! Constructor
  ErrorCode(void) : map_() {}

  //! Inserts a new code into map
  void emplace(const ErrorCode::Code *code) {
    map_.emplace(code->value(), code);
  }

  //! Retrieve the description of code
  const char *what(int val) const {
    auto iter = map_.find(val);
    if (iter != map_.end()) {
      return iter->second->desc();
    }
    return "";
  }

  //! Retrieve the singleton
  static ErrorCode *Instance(void) {
    static ErrorCode error;
    return (&error);
  }

 private:
  //! Disable them
  ErrorCode(const ErrorCode &) = delete;
  ErrorCode(ErrorCode &&) = delete;
  ErrorCode &operator=(const ErrorCode &) = delete;

  //! Error code map
  std::map<int, const ErrorCode::Code *> map_;
};

//! Error Code Define
#define PROXIMA_ZVEC_ERROR_CODE_DEFINE(__NAME__, __VAL__, __DESC__)        \
  const zvec::ErrorCode::Code ErrorCode_##__NAME__((__VAL__), (__DESC__)); \
  const zvec::ErrorCode::Code &_ErrorCode_##__VAL__##_Register(            \
      ErrorCode_##__NAME__)

//! Proxima SE Error Code Declare
#define PROXIMA_ZVEC_ERROR_CODE_DECLARE(__NAME__) \
  extern const zvec::ErrorCode::Code ErrorCode_##__NAME__

//! Error code helper
#define PROXIMA_ZVEC_ERROR_CODE(__NAME__) zvec::ErrorCode_##__NAME__

// 0~999  [Builtin]
PROXIMA_ZVEC_ERROR_CODE_DECLARE(Success);

// 1000~1999 [Common Error]
PROXIMA_ZVEC_ERROR_CODE_DECLARE(RuntimeError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(LogicError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(StatusError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(LoadConfig);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ConfigError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidArgument);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(NotInitialized);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(OpenFile);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ReadData);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(WriteData);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedLimit);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(SerializeError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DeserializeError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(StartServer);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(StoppedService);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(FileSystem);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(RpcError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InitChannelError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(AddSubChannelError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(NoNeedProcess);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EtcdError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(MessageQueueError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaSubTopicExistErr);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaUnSubTopicNotExistErr);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InitKafkaError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(KafkaPublishError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedKafkaMessageSizeLimit);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(NotImplemented);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(Timeout);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(MasterNoLeader);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(NeedRetry);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(Abort);

PROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyQueue);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ScheduleError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(TaskIsRunning);

PROXIMA_ZVEC_ERROR_CODE_DECLARE(DirectoryAlreadyExists);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DirectoryNotExists);

// 2000~2999 [Client Check]
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyCollectionName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyColumnName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyPartitionName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyColumns);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyPrimaryKey);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyDocList);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyDocFields);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyIndexField);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidRecord);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidQuery);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidWriteRequest);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidVectorFormat);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidDataType);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidIndexType);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFeature);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFilter);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidPrimaryKey);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidField);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedIndexColumn);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedDimension);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(MismatchedDataType);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentCollection);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentPartition);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentColumn);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentKey);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateCollection);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicatePartition);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateKey);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateField);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyPartition);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(UnreadyCollection);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(UnsupportedCondition);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(OrderbyNotInSelectItems);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(PbToSqlInfoError);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(ExceedRateLimit);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidSparseValues);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidBatchSize);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidDimension);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidExtraParam);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidRadius);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidLinear);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidTopk);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidCollectionName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidPartitionName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidFieldName);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidChannelCount);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidReplicaCount);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidJson);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidClusterConfig);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(DuplicateCluster);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InexistentCluster);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidClusterStatus);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(RpcTimedout);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidGroupBy);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(EmptyVectorField);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(VectorNotAllowed);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidReferenceCollection);
PROXIMA_ZVEC_ERROR_CODE_DECLARE(InvalidOrderBy);


// 40000~49999 [De Admin]
}  // namespace zvec


================================================
FILE: src/db/common/file_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "file_helper.h"
#include <sys/stat.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <algorithm>
#include <cstdio>
#include <ailego/pattern/defer.h>

namespace zvec {


// keep consistent with MANIFEST_BACKUP_FILE
const std::string FileHelper::BACKUP_SUFFIX = ".backup_";
const std::string FileHelper::RECOVER_SUFFIX = ".recovering";

bool FileHelper::CopyFile(const std::string &src_file_path,
                          const std::string &dst_file_path) {
  int src_fd = open(src_file_path.c_str(), O_RDONLY, 0);
  if (src_fd < 0) {
    return false;
  }
  AILEGO_DEFER([src_fd] { close(src_fd); });

  std::string dst_file_path_tmp = dst_file_path + ".tmp";
  int dst_fd =
      open(dst_file_path_tmp.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0644);
  if (dst_fd < 0) {
    return false;
  }
  AILEGO_DEFER([dst_fd] { close(dst_fd); });

  ssize_t size;
  char buf[BUFSIZ];
  while ((size = read(src_fd, buf, BUFSIZ)) > 0) {
    if (size != write(dst_fd, buf, size)) {
      return false;
    }
  }
  return rename(dst_file_path_tmp.c_str(), dst_file_path.c_str()) == 0;
}

bool FileHelper::CopyDirectory(const std::string &src_dir_path,
                               const std::string &dst_dir_path) {
  DIR *dir = opendir(src_dir_path.c_str());
  if (!dir) {
    return false;
  }
  AILEGO_DEFER([dir] { closedir(dir); });

  if (!ailego::FileHelper::IsExist(dst_dir_path.c_str())) {
    if (!ailego::FileHelper::MakePath(dst_dir_path.c_str())) {
      return false;
    }
  }

  struct dirent *dent;
  while ((dent = readdir(dir)) != nullptr) {
    if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
      continue;
    }
    std::string src_full_path =
        ailego::StringHelper::Concat(src_dir_path, "/", dent->d_name);
    std::string dst_full_path =
        ailego::StringHelper::Concat(dst_dir_path, "/", dent->d_name);

    if (ailego::FileHelper::IsDirectory(src_full_path.c_str())) {
      if (!CopyDirectory(src_full_path, dst_full_path)) {
        return false;
      }
    } else {
      if (!CopyFile(src_full_path, dst_full_path)) {
        return false;
      }
    }
  }
  return true;
}

void FileHelper::CleanupDirectory(const std::string &backup_dir,
                                  size_t max_backup_count,
                                  const char *prefix_name) {
  if (max_backup_count <= 0) {
    return;
  }

  DIR *dir = opendir(backup_dir.c_str());
  if (!dir) {
    return;
  }

  AILEGO_DEFER([dir] { closedir(dir); });

  size_t prefix_len = strlen(prefix_name);
  std::vector<std::string> candidates;
  struct dirent *dent;
  while ((dent = readdir(dir)) != nullptr) {
    if (strncmp(dent->d_name, prefix_name, prefix_len) == 0) {
      candidates.emplace_back(dent->d_name);
    }
  }
  if (candidates.size() <= max_backup_count) {
    return;
  }
  std::sort(candidates.begin(), candidates.end());
  for (size_t i = 0; i < candidates.size() - max_backup_count; ++i) {
    std::string path =
        ailego::StringHelper::Concat(backup_dir, "/", candidates[i].c_str());
    ailego::FileHelper::RemovePath(path.c_str());
  }
}

}  // namespace zvec

================================================
FILE: src/db/common/file_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <stdint.h>
#include <cstdint>
#include <string>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/utility/file_helper.h>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec {

/*
 * File type and id
 */
enum class FileID : uint32_t {
  UNDEFINED = 0,
  ID_FILE,
  DELETE_FILE,
  FORWARD_FILE,
  PROXIMA_FILE,
  SEGMENT_FILE,
  LSN_FILE,
  MANIFEST_FILE,
  WAL_FILE,
  RESHARD_STATE,
};

/*
 * File name coresponding to file id
 */
static const char *GetFileName(FileID t) {
  switch (t) {
    case FileID::ID_FILE:
      return "idmap";
    case FileID::DELETE_FILE:
      return "del";
    case FileID::FORWARD_FILE:
      return "data.fwd";
    case FileID::PROXIMA_FILE:
      return "data.pxa";
    case FileID::SEGMENT_FILE:
      return "data.seg";
    case FileID::LSN_FILE:
      return "data.lsn";
    case FileID::MANIFEST_FILE:
      return "manifest";
    case FileID::WAL_FILE:
      return "data.wal";
    case FileID::RESHARD_STATE:
      return "reshard.state";
    default:
      return "UnknownFile";
  };
}

/*
 * This helper class is mainly to wrapper filesystem operations.
 */
class FileHelper {
 public:
  static const std::string MakeWalPath(const std::string &path, uint32_t seg_id,
                                       uint32_t block_id) {
    return ailego::StringHelper::Concat(path, "/", seg_id, "/", block_id,
                                        ".wal");
  }

  static std::string MakeSegmentPath(const std::string &path, uint32_t id,
                                     const std::string &suffix = "") {
    if (suffix.empty()) {
      return ailego::StringHelper::Concat(path, "/", id);
    }
    return ailego::StringHelper::Concat(path, "/", id, ".", suffix);
  }

  static std::string MakeTempSegmentPath(const std::string &path, uint32_t id) {
    return MakeSegmentPath(path, id, "tmp");
  }

  // e.g.: **/seg1/scalar.block.1.ipc, **/seg1/scalar.block.1.parquet
  static const std::string MakeForwardBlockPath(const std::string &path,
                                                uint32_t seg_id,
                                                uint32_t block_id,
                                                bool use_parquet = false) {
    return use_parquet ? MakeForwardBlockPath(path, seg_id, block_id,
                                              std::string("parquet"))
                       : MakeForwardBlockPath(path, seg_id, block_id,
                                              std::string("ipc"));
  }

  static const std::string MakeForwardBlockPath(const std::string &path,
                                                uint32_t seg_id,
                                                uint32_t block_id,
                                                const std::string &suffix) {
    return ailego::StringHelper::Concat(path, "/", seg_id, "/scalar.", block_id,
                                        ".", suffix);
  }

  static const std::string MakeForwardBlockPath(const std::string &seg_path,
                                                uint32_t block_id,
                                                bool use_parquet = false) {
    return use_parquet ? ailego::StringHelper::Concat(seg_path, "/scalar.",
                                                      block_id, ".parquet")
                       : ailego::StringHelper::Concat(seg_path, "/scalar.",
                                                      block_id, ".ipc");
  }

  static const std::string MakeForwardBlockPath(const std::string &seg_path,
                                                uint32_t block_id,
                                                const std::string &suffix) {
    return ailego::StringHelper::Concat(seg_path, "/scalar.", block_id, ".",
                                        suffix);
  }

  // e.g.: **/seg1/scalar.index.block.1.rocksdb
  static const std::string MakeInvertIndexPath(const std::string &path,
                                               uint32_t seg_id,
                                               uint32_t block_id) {
    return ailego::StringHelper::Concat(path, "/", seg_id, "/scalar.index.",
                                        block_id, ".rocksdb");
  }

  static const std::string MakeInvertIndexPath(const std::string &seg_path,
                                               uint32_t block_id) {
    return ailego::StringHelper::Concat(seg_path, "/scalar.index.", block_id,
                                        ".rocksdb");
  }

  static const std::string MakeVectorIndexPath(const std::string &path,
                                               const std::string &column,
                                               uint32_t seg_id,
                                               uint32_t block_id) {
    return ailego::StringHelper::Concat(path, "/", seg_id, "/", column,
                                        ".index.", block_id, ".proxima");
  }

  static const std::string MakeVectorIndexPath(const std::string &seg_path,
                                               const std::string &column,
                                               uint32_t block_id) {
    return ailego::StringHelper::Concat(seg_path, "/", column, ".index.",
                                        block_id, ".proxima");
  }

  // e.g.: **/{seg_id}/{column}.index.block.{block_id}.proxima
  static const std::string MakeQuantizeVectorIndexPath(
      const std::string &path, const std::string &column, uint32_t seg_id,
      uint32_t block_id) {
    return ailego::StringHelper::Concat(path, "/", seg_id, "/", column,
                                        ".qindex.", block_id, ".proxima");
  }

  static const std::string MakeQuantizeVectorIndexPath(
      const std::string &seg_path, const std::string &column,
      uint32_t block_id) {
    return ailego::StringHelper::Concat(seg_path, "/", column, ".qindex.",
                                        block_id, ".proxima");
  }

  //! Make file path with ${prefix_path}/${file_name}
  static std::string MakeFilePath(const std::string &prefix_path,
                                  FileID file_id) {
    return ailego::StringHelper::Concat(prefix_path, "/", GetFileName(file_id));
  }

  //! Make file path with ${prefix_path}/${file_name}.${number}
  static std::string MakeFilePath(const std::string &prefix_path,
                                  FileID file_id, uint32_t number) {
    return ailego::StringHelper::Concat(prefix_path, "/", GetFileName(file_id),
                                        ".", number);
  }

  //! Make file path with ${prefix_path}/${file_name}.${suffix_name}.${number}
  static std::string MakeFilePath(const std::string &prefix_path,
                                  FileID file_id, uint32_t number,
                                  const std::string &suffix_name) {
    return ailego::StringHelper::Concat(prefix_path, "/", GetFileName(file_id),
                                        ".", suffix_name, ".", number);
  }

  //! Create directory
  static bool CreateDirectory(const std::string &dir_path) {
    return ailego::File::MakePath(dir_path);
  }

  //! Remove directory
  static bool RemoveDirectory(const std::string &dir_path) {
    return ailego::File::RemoveDirectory(dir_path);
  }

  //! Remove file
  static bool RemoveFile(const std::string &file_path) {
    return ailego::File::Delete(file_path);
  }

  //! Move file
  static bool MoveFile(const std::string &src_path,
                       const std::string &dest_path) {
    return ailego::File::Rename(src_path, dest_path);
  }

  //! Move directory
  static bool MoveDirectory(const std::string &src_path,
                            const std::string &dest_path) {
    return ailego::File::Rename(src_path, dest_path);
  }

  //! Check if file exists
  static bool FileExists(const std::string &file_path) {
    return ailego::File::IsExist(file_path);
  }

  //! Check if directory exists
  static bool DirectoryExists(const std::string &dir_path) {
    return ailego::File::IsExist(dir_path);
  }

  //! Return file size
  static size_t FileSize(const std::string &file_path) {
    return ailego::FileHelper::FileSize(file_path.c_str());
  }

  //! Copy file
  //! src_file_path and dst_file_path must be the full path
  //! dst_file_path/.. must exist
  static bool CopyFile(const std::string &src_file_path,
                       const std::string &dst_file_path);

  //! Copy directory recursively
  //! src_dir_path and dst_dir_path must be the full path
  //! dst_dir_path will be created if not exist
  static bool CopyDirectory(const std::string &src_dir_path,
                            const std::string &dst_dir_path);

  //! Clean up file or directory with the prefix `prefix_name` under
  //! `backup_dir`, keep at most `max_backup_count` file or directory.
  //! If `max_backup_count` is 0, nothing is performed.
  //!
  //! The name pattern must be `prefix_name`_`number`, comparable by name.
  static void CleanupDirectory(const std::string &backup_dir,
                               size_t max_backup_count,
                               const char *prefix_name);

  static const std::string BACKUP_SUFFIX;
  static const std::string RECOVER_SUFFIX;
};


}  // namespace zvec

================================================
FILE: src/db/common/global_resource.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "db/common/global_resource.h"
#include <mutex>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/db/config.h>

namespace zvec {

void GlobalResource::initialize() {
  static std::once_flag flag;
  std::call_once(flag, [this]() mutable {
    this->query_thread_pool_.reset(
        new ailego::ThreadPool(GlobalConfig::Instance().query_thread_count()));
    this->optimize_thread_pool_.reset(new ailego::ThreadPool(
        GlobalConfig::Instance().optimize_thread_count()));
    ailego::BufferManager::Instance().init(
        GlobalConfig::Instance().memory_limit_bytes(), 1);
  });
}

}  // namespace zvec

================================================
FILE: src/db/common/global_resource.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/pattern/singleton.h>

namespace zvec {

class GlobalResource : public ailego::Singleton<GlobalResource> {
 public:
  void initialize();

  ailego::ThreadPool *query_thread_pool() {
    initialize();
    return query_thread_pool_.get();
  }

  ailego::ThreadPool *optimize_thread_pool() {
    initialize();
    return optimize_thread_pool_.get();
  }

 private:
  std::unique_ptr<ailego::ThreadPool> query_thread_pool_;
  std::unique_ptr<ailego::ThreadPool> optimize_thread_pool_;
};

}  // namespace zvec


================================================
FILE: src/db/common/glogger.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>

#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif

#include <glog/logging.h>

#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif

namespace google {
namespace glog_internal_namespace_ {
extern bool IsGoogleLoggingInitialized(void);
extern bool ShutdownGoogleLoggingUtilities(void);
}  // namespace glog_internal_namespace_
}  // namespace google

namespace zvec {

class AppendLogger : public ailego::Logger {
 public:
  AppendLogger() = default;

  ~AppendLogger() {
    this->cleanup();
  }

 public:
  int init(const ailego::Params &params) override {
    if (!google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {
      std::string log_dir = params.get_as_string("proxima.file.logger.log_dir");
      std::string log_file =
          params.get_as_string("proxima.file.logger.log_file");
      uint32_t log_file_size =
          params.get_as_uint32("proxima.file.logger.file_size");
      uint32_t log_overdue_days =
          params.get_as_uint32("proxima.file.logger.overdue_days");

      if (!ailego::File::IsExist(log_dir)) {
        ailego::File::MakePath(log_dir);
      }

      FLAGS_log_dir = log_dir;
      FLAGS_max_log_size = log_file_size;
      FLAGS_logbufsecs = 1;
      // it's really a bad feature for glog
      // logs <= LOG_FATAL will also output to stderr
      // and we can only set FATAL at most
      // and so we should avoid to use LOG_FATAL
      FLAGS_stderrthreshold = google::GLOG_FATAL;

      static std::string new_log_file = log_file;
      google::InitGoogleLogging(new_log_file.c_str());
      google::EnableLogCleaner(log_overdue_days);
    }
    return 0;
  }

  int cleanup() override {
    if (google::glog_internal_namespace_::IsGoogleLoggingInitialized()) {
      google::DisableLogCleaner();
      google::ShutdownGoogleLogging();
    }
    return 0;
  }

  void log(int level, const char *file, int line, const char *format,
           va_list args) override {
    static google::LogSeverity severities[] = {
        google::GLOG_INFO, google::GLOG_INFO, google::GLOG_WARNING,
        google::GLOG_ERROR, google::GLOG_FATAL};
    char buf[2048];
    vsnprintf(buf, sizeof(buf), format, args);
    google::LogMessage(file, line, severities[level]).stream() << buf;
    // NOTE: glog will flush WARN and above immediately, flush INFO every
    // `FLAGS_logbufsecs` or every 1M bytes. FlushLogFiles not needed.
    // google::FlushLogFiles(severities[level]);
  }
};

}  // namespace zvec


================================================
FILE: src/db/common/logger.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <gflags/gflags.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/factory.h>
#include <zvec/db/status.h>
#include "db/common/constants.h"
#include "error_code.h"

namespace zvec {

class LogUtil {
 public:
  static Status Init(const std::string &log_dir, const std::string &log_file,
                     int log_level, const std::string &logger_type,
                     int log_file_size, int log_overdue_days) {
    if (logger_type == FILE_LOG_TYPE_NAME) {
      if (log_dir.empty() || log_file.empty()) {
        return Status::InvalidArgument("log_dir or log_file is empty");
      }

      if (!ailego::File::IsExist(log_dir)) {
        ailego::File::MakePath(log_dir);
      }
    }

    auto logger =
        ailego::Factory<ailego::Logger>::MakeShared(logger_type.c_str());
    if (!logger) {
      LOG_FATAL("Invalid logger_type[%s]", logger_type.c_str());
      return Status::InvalidArgument("Invalid logger_type: ", logger_type);
    }

    ailego::Params params;
    if (logger_type == FILE_LOG_TYPE_NAME) {
      params.set("proxima.file.logger.log_dir", log_dir);
      params.set("proxima.file.logger.log_file", log_file);
      params.set("proxima.file.logger.path", log_dir + "/" + log_file);
      std::string program_name = ailego::File::BaseName(gflags::GetArgv0());
      params.set("proxima.program.program_name", program_name);
      params.set("proxima.file.logger.file_size", log_file_size);
      params.set("proxima.file.logger.overdue_days", log_overdue_days);
    }

    int ret = logger->init(params);
    if (ret != 0) {
      return Status::InternalError(ErrorCode::What(ret));
    }

    zvec::ailego::LoggerBroker::SetLevel(log_level);
    zvec::ailego::LoggerBroker::Register(logger);
    return Status::OK();
  }

  static void Shutdown() {
    zvec::ailego::LoggerBroker::Unregister();
  }
};

}  // namespace zvec


================================================
FILE: src/db/common/profiler.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <vector>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/time_helper.h>
#include "error_code.h"

namespace zvec {

//! Profiler collecting all the latency and other information during query
class Profiler {
 public:
  using Ptr = std::shared_ptr<Profiler>;

 private:
  //! Stage object
  struct Stage {
    //! Constructor
    explicit Stage(ailego::JsonObject *node) : node_(node) {}
    //! Stage node, which stored in JsonTree held by Profiler
    ailego::JsonObject *node_{nullptr};
    //! Stage latency, started when creating Stage object
    ailego::ElapsedTime latency_;
  };

 public:
  //! Constructor
  explicit Profiler(bool enable = false) : enable_(enable) {
    if (enabled()) {
      root_.assign(ailego::JsonObject());
    }
  }

  //! Check enabled
  bool enabled() const {
    return (enabled_debug() || enabled_trace());
  }

  bool enabled_debug() const {
    return enable_;
  }

  bool enabled_trace() const {
    return !trace_id_.empty();
  }

  //! Start profiler
  void start() {
    if (enabled() && path_.empty()) {
      path_.emplace_back(Stage(&root_.as_object()));
    }
  }

  //! Stop profiler
  void stop() {
    if (enabled()) {
      if (path_.size() == 1) {
        // Root always held in path_[0]
        close_stage();
      } else {
        LOG_WARN("There are stages have not been closed, stages[%zu]",
                 path_.size());
        // Manually set latency to root, which should not be normal way
        root_["latency"] = path_.begin()->latency_.micro_seconds();
      }
    }
  }

  //! Open stage, start timer of stage
  int open_stage(const std::string &name) {
    if (enabled()) {
      if (path_.empty()) {
        LOG_ERROR("Profiler did not start yet. name[%s]", name.c_str());
        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);
      }
      if (name.empty()) {
        LOG_ERROR("Can't open stage with empty name");
        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);
      }
      ailego::JsonString key(name);
      ailego::JsonObject child;

      current_path()->set(key, child);  // add child
      path_.emplace_back(Stage(
          &((*current_path())[name.c_str()].as_object())));  // move to child
    }
    return 0;
  }

  //! Close stage and stop timer of stage(represent by stage.latency)
  int close_stage() {
    if (enabled()) {
      if (path_.empty()) {
        LOG_ERROR("No available stage can be closed");
        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);
      }
      ailego::JsonValue latency(current()->latency_.micro_seconds());
      current_path()->set("latency", latency);
      path_.pop_back();
    }
    return 0;
  }

  //! add value to profiler
  template <typename VALUE_TYPE>
  int add(const std::string &name, const VALUE_TYPE &v) {
    if (enabled()) {
      if (path_.empty()) {
        return PROXIMA_ZVEC_ERROR_CODE(RuntimeError);
      }

      ailego::JsonString key(name);
      ailego::JsonValue value(v);
      current_path()->set(key, value);
    }
    return 0;
  }

  //! Serialize profiler to string(Json Format)
  std::string as_json_string() const {
    return enabled() ? root_.as_json_string().as_stl_string()
                     : std::string("{}");
  }


  void set_trace_id(const std::string &trace_id) {
    trace_id_ = trace_id;
    if (enabled()) {
      root_.assign(ailego::JsonObject());
    }
  }


  const std::string &trace_id() const {
    return trace_id_;
  }

  const ailego::JsonValue &root() const {
    return root_;
  }

 private:
  Stage *current() {
    return path_.rbegin().operator->();
  }

  ailego::JsonObject *current_path() {
    return current()->node_;
  }

 private:
  //! enable flag
  bool enable_{false};

  std::string trace_id_{};
  //! root handler
  ailego::JsonValue root_;

  //! Depth-First paths
  std::vector<Stage> path_;
};

//! Helper for latency
class ScopedLatency {
 public:
  //! Constructor
  explicit ScopedLatency(const char *name, Profiler::Ptr profiler)
      : name_(name), profiler_(std::move(profiler)) {}

  //! Destructor
  ~ScopedLatency() {
    profiler_->add(name_, latency_.micro_seconds());
  }

 private:
  //! Name of latency
  const char *name_{nullptr};

  //! Timer handler
  ailego::ElapsedTime latency_;

  //! Profiler handler
  Profiler::Ptr profiler_;
};

}  // namespace zvec


================================================
FILE: src/db/common/rocbsdb_context.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <rocksdb/filter_policy.h>
#include <rocksdb/statistics.h>
#include <rocksdb/table.h>
#include <rocksdb/utilities/checkpoint.h>
#include <zvec/ailego/logger/logger.h>
#include "rocksdb_context.h"


namespace zvec {


Status RocksdbContext::create(
    const std::string &db_path,
    std::shared_ptr<rocksdb::MergeOperator> merge_op) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (db_) {
    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
    return Status::PermissionDenied();
  }

  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {
    return s;
  }

  create_opts_.create_if_missing = true;
  prepare_options(merge_op);

  // Open RocksDB
  rocksdb::DB *db;
  if (auto s = rocksdb::DB::Open(create_opts_, db_path, &db); !s.ok()) {
    LOG_ERROR("Failed to create RocksDB[%s], code[%d], reason[%s]",
              db_path.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }

  db_.reset(db);
  read_only_ = false;
  write_opts_.disableWAL = true;
  LOG_DEBUG("Created RocksDB[%s]", db_path.c_str());
  return Status::OK();
}


Status RocksdbContext::create(
    const std::string &db_path, const std::vector<std::string> &column_names,
    std::shared_ptr<rocksdb::MergeOperator> merge_op) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (db_) {
    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
    return Status::PermissionDenied();
  }

  if (auto s = validate_and_set_db_path(db_path, false); !s.ok()) {
    return s;
  }

  create_opts_.create_if_missing = true;
  prepare_options(merge_op);

  // Open RocksDB
  rocksdb::DB *db;
  rocksdb::Status s = rocksdb::DB::Open(create_opts_, db_path, &db);
  if (!s.ok()) {
    LOG_ERROR("Failed to create RocksDB[%s], code[%d], reason[%s]",
              db_path.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
  db_.reset(db);

  // Create column families
  bool has_default = false;
  for (auto const &column_name : column_names) {
    if (column_name == rocksdb::kDefaultColumnFamilyName) {
      cf_handles_.push_back(db->DefaultColumnFamily());
      has_default = true;
      continue;
    }
    rocksdb::ColumnFamilyHandle *cf_handle{nullptr};
    rocksdb::ColumnFamilyOptions cf_options(create_opts_);
    s = db->CreateColumnFamily(cf_options, column_name, &cf_handle);
    if (!s.ok()) {
      LOG_ERROR("Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]",
                column_name.c_str(), db_path.c_str(), s.code(),
                s.ToString().c_str());
      delete_cf_handles();
      db->Close();
      db_.reset();
      return Status::InternalError();
    }
    cf_handles_.push_back(cf_handle);
  }
  if (!has_default) {
    cf_handles_.push_back(db->DefaultColumnFamily());
  }

  read_only_ = false;
  write_opts_.disableWAL = true;
  LOG_DEBUG("Created RocksDB[%s]", db_path.c_str());
  return Status::OK();
}


Status RocksdbContext::open(const std::string &db_path, bool read_only,
                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (db_) {
    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
    return Status::PermissionDenied();
  }

  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {
    return s;
  }

  create_opts_.create_if_missing = false;
  prepare_options(merge_op);

  // Open RocksDB
  rocksdb::DB *db;
  rocksdb::Status s;
  if (read_only) {
    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, &db);
  } else {
    s = rocksdb::DB::Open(create_opts_, db_path, &db);
  }
  if (!s.ok()) {
    LOG_ERROR("Failed to open RocksDB[%s], code[%d], reason[%s]",
              db_path.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }

  db_.reset(db);
  read_only_ = read_only;
  write_opts_.disableWAL = true;
  LOG_DEBUG("Opened RocksDB[%s]", db_path.c_str());
  return Status::OK();
}


Status RocksdbContext::open(const std::string &db_path,
                            const std::vector<std::string> &column_names,
                            bool read_only,
                            std::shared_ptr<rocksdb::MergeOperator> merge_op) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (db_) {
    LOG_ERROR("RocksDB[%s] is already opened", db_path_.c_str());
    return Status::PermissionDenied();
  }

  if (auto s = validate_and_set_db_path(db_path, true); !s.ok()) {
    return s;
  }

  create_opts_.create_if_missing = false;
  prepare_options(merge_op);

  // Set up column families
  rocksdb::Status s;
  std::vector<std::string> existing_cf_names{};
  std::vector<rocksdb::ColumnFamilyDescriptor> cf_descriptors{};
  s = rocksdb::DB::ListColumnFamilies(create_opts_, db_path,
                                      &existing_cf_names);
  if (!s.ok()) {
    LOG_ERROR("Failed to list cf in RocksDB[%s], code[%d], reason[%s]",
              db_path.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
  rocksdb::ColumnFamilyOptions cf_options(create_opts_);
  if (column_names.empty()) {  // Get all column families from DB
    for (auto const &column_name : existing_cf_names) {
      cf_descriptors.emplace_back(column_name, cf_options);
    }
  } else {
    bool has_default = false;
    for (const auto &column_name : column_names) {
      if (std::find(existing_cf_names.begin(), existing_cf_names.end(),
                    column_name) == existing_cf_names.end()) {
        LOG_ERROR("Column family[%s] does not exist in RocksDB[%s]",
                  column_name.c_str(), db_path.c_str());
        return Status::InvalidArgument();
      }
      if (column_name == rocksdb::kDefaultColumnFamilyName) {
        has_default = true;
      }
    }
    if (read_only) {
      for (const auto &column_name : column_names) {
        cf_descriptors.emplace_back(column_name, cf_options);
      }
      if (!has_default) {
        cf_descriptors.emplace_back(rocksdb::kDefaultColumnFamilyName,
                                    cf_options);
      }
    } else {  // Rocksdb must be opened with all column families in write mode
      for (auto const &column_name : existing_cf_names) {
        cf_descriptors.emplace_back(column_name, cf_options);
      }
    }
  }

  // Open RocksDB
  rocksdb::DB *db;
  if (read_only) {
    s = rocksdb::DB::OpenForReadOnly(create_opts_, db_path, cf_descriptors,
                                     &cf_handles_, &db);
  } else {
    s = rocksdb::DB::Open(create_opts_, db_path, cf_descriptors, &cf_handles_,
                          &db);
  }
  if (!s.ok()) {
    LOG_ERROR("Failed to open RocksDB[%s], code[%d], reason[%s]",
              db_path.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }

  db_.reset(db);
  read_only_ = read_only;
  write_opts_.disableWAL = true;
  LOG_DEBUG("Opened RocksDB[%s]", db_path.c_str());
  return Status::OK();
}


Status RocksdbContext::validate_and_set_db_path(const std::string &db_path,
                                                bool should_exist) {
  if (db_path.empty()) {
    LOG_ERROR("RocksDB path cannot be empty");
    return Status::InvalidArgument();
  }

  if (FILE::IsExist(db_path)) {
    if (!should_exist) {
      LOG_ERROR("RocksDB path[%s] already exists", db_path.c_str());
      return Status::InvalidArgument();
    }
    if (!FILE::IsDirectory(db_path)) {
      LOG_ERROR("RocksDB path[%s] is not a directory", db_path.c_str());
      return Status::InvalidArgument();
    }
  } else {
    if (should_exist) {
      LOG_ERROR("RocksDB path[%s] does not exist", db_path.c_str());
      return Status::NotFound();
    }
  }

  db_path_ = db_path;
  return Status::OK();
}


void RocksdbContext::prepare_options(
    std::shared_ptr<rocksdb::MergeOperator> merge_op) {
  // Increase parallelism with default thread count (typically 16)
  create_opts_.IncreaseParallelism();

  // Optimize for level-based compaction style with default setting
  create_opts_.OptimizeLevelStyleCompaction();

  // TODO: enable compression?

  // Setting this to 1 means that when a memtable is full, it will be flushed
  // to disk immediately rather than being merged with other memtables
  create_opts_.min_write_buffer_number_to_merge = 1;

  // Set the block size for the arena memory allocator to 64KB, which controls
  // how much memory is allocated at a time for internal operations
  create_opts_.arena_block_size = 1024 * 64;

  // Do not create LOG.old when reopen
  create_opts_.keep_log_file_num = 1;

  // Warnings and errors only
  create_opts_.info_log_level = rocksdb::WARN_LEVEL;

  rocksdb::BlockBasedTableOptions table_options;

  // Turn on bloom filters
  table_options.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));

  // Merge operator
  if (merge_op) {
    create_opts_.merge_operator = merge_op;
    create_opts_.max_successive_merges = 100;
    create_opts_.write_buffer_size = 8 << 20;
  }

  // Create default cache
  table_options.block_cache = nullptr;

  auto table_factory = NewBlockBasedTableFactory(table_options);
  create_opts_.table_factory.reset(table_factory);

  // Enable statistics
  create_opts_.statistics = rocksdb::CreateDBStatistics();

  // Disable external write buffer manager, let RocksDB manage it
  create_opts_.write_buffer_manager = nullptr;

  // Reduce preallocation size for manifest file to 512KB to save disk space
  create_opts_.manifest_preallocation_size = 512 * 1024;

  // Disable direct reads (use buffered I/O instead)
  create_opts_.use_direct_reads = false;
}


Status RocksdbContext::close() {
  std::lock_guard<std::mutex> lock(mutex_);

  if (db_ == nullptr) {
    LOG_ERROR("RocksDB[%s] is not opened", db_path_.c_str());
    return Status::InternalError();
  }

  if (!read_only_) {
    if (auto s = flush_unlocked(); !s.ok()) {
      LOG_ERROR("Failed to close RocksDB[%s] due to flush failure",
                db_path_.c_str());
      return s;
    }
  }

  delete_cf_handles();

  if (auto s = db_->Close(); s.ok()) {
    LOG_DEBUG("Closed RocksDB[%s]", db_path_.c_str());
    db_.reset();
    return Status::OK();
  } else {
    LOG_ERROR("Failed to close RocksDB[%s], code[%d], reason[%s]",
              db_path_.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
}


Status RocksdbContext::flush_unlocked() {
  if (read_only_) {
    LOG_ERROR("Cannot flush RocksDB[%s] in read-only mode", db_path_.c_str());
    return Status::PermissionDenied();
  }

  for (const auto &cf : cf_handles_) {
    if (auto s = db_->Flush(flush_opts_, cf); !s.ok()) {
      LOG_ERROR("Failed to flush cf[%s] of RocksDB[%s], code[%d], reason[%s]",
                cf->GetName().c_str(), db_path_.c_str(), s.code(),
                s.ToString().c_str());
      return Status::InternalError();
    }
  }

  if (auto s = db_->Flush(flush_opts_); s.ok()) {
    LOG_DEBUG("Flushed RocksDB[%s]", db_path_.c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to flush Rocksdb[%s], code[%d], reason[%s]",
              db_path_.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
}


Status RocksdbContext::flush() {
  std::lock_guard<std::mutex> lock(mutex_);
  return flush_unlocked();
}


Status RocksdbContext::create_checkpoint(const std::string &checkpoint_dir) {
  std::lock_guard<std::mutex> lock(mutex_);

  rocksdb::Checkpoint *cp{nullptr};
  if (auto s = rocksdb::Checkpoint::Create(db_.get(), &cp); !s.ok()) {
    LOG_ERROR(
        "Failed to create a checkpoint object of Rocksdb[%s], code[%d], "
        "reason[%s]",
        db_path_.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }

  if (auto s = cp->CreateCheckpoint(checkpoint_dir); s.ok()) {
    LOG_DEBUG("Created a checkpoint of Rocksdb[%s] to [%s]", db_path_.c_str(),
              checkpoint_dir.c_str());
    delete cp;
    return Status::OK();
  } else {
    LOG_ERROR(
        "Failed to create a checkpoint of Rocksdb[%s], code[%d], reason[%s]",
        db_path_.c_str(), s.code(), s.ToString().c_str());
    delete cp;
    return Status::InternalError();
  }
}


rocksdb::ColumnFamilyHandle *RocksdbContext::get_cf(
    const std::string &cf_name) {
  std::lock_guard<std::mutex> lock(mutex_);
  for (auto cf_handle : cf_handles_) {
    if (cf_handle->GetName() == cf_name) {
      return cf_handle;
    }
  }
  return nullptr;
}


Status RocksdbContext::create_cf(const std::string &cf_name) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (cf_name == rocksdb::kDefaultColumnFamilyName) {
    LOG_ERROR("Forbidden to create default cf in RocksDB[%s]",
              db_path_.c_str());
    return Status::InvalidArgument();
  }

  for (auto cf_handle : cf_handles_) {
    if (cf_handle->GetName() == cf_name) {
      LOG_ERROR("Column family[%s] already exists in RocksDB[%s]",
                cf_name.c_str(), db_path_.c_str());
      return Status::InvalidArgument();
    }
  }

  rocksdb::ColumnFamilyHandle *cf_handle{nullptr};
  auto s = db_->CreateColumnFamily(rocksdb::ColumnFamilyOptions(create_opts_),
                                   cf_name, &cf_handle);
  if (s.ok()) {
    cf_handles_.push_back(cf_handle);
    LOG_DEBUG("Created cf[%s] in RocksDB[%s]", cf_name.c_str(),
              db_path_.c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]",
              cf_name.c_str(), db_path_.c_str(), s.code(),
              s.ToString().c_str());
    return Status::InternalError();
  }
}


Status RocksdbContext::drop_cf(const std::string &cf_name) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (cf_name == rocksdb::kDefaultColumnFamilyName) {
    LOG_ERROR("Forbidden to drop default cf in RocksDB[%s]", db_path_.c_str());
    return Status::InvalidArgument();
  }

  auto it = std::find_if(cf_handles_.begin(), cf_handles_.end(),
                         [&cf_name](rocksdb::ColumnFamilyHandle *handle) {
                           return handle->GetName() == cf_name;
                         });
  if (it == cf_handles_.end()) {
    LOG_WARN("Failed to find column family[%s] in RocksDB[%s]", cf_name.c_str(),
             db_path_.c_str());
    return Status::OK();
  }

  auto s = db_->DropColumnFamily(*it);
  if (s.ok()) {
    delete *it;
    cf_handles_.erase(it);
    LOG_DEBUG("Dropped cf[%s] in RocksDB[%s]", cf_name.c_str(),
              db_path_.c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to drop cf[%s] in RocksDB[%s], code[%d], reason[%s]",
              cf_name.c_str(), db_path_.c_str(), s.code(),
              s.ToString().c_str());
    return Status::InternalError();
  }
}


Status RocksdbContext::reset_cf(const std::string &cf_name) {
  std::lock_guard<std::mutex> lock(mutex_);

  if (cf_name == rocksdb::kDefaultColumnFamilyName) {
    LOG_ERROR("Forbidden to reset default cf in RocksDB[%s]", db_path_.c_str());
    return Status::InvalidArgument();
  }

  rocksdb::ColumnFamilyHandle *cf_handle{nullptr};
  size_t index;
  for (size_t i = 0; i < cf_handles_.size(); ++i) {
    if (cf_handles_[i]->GetName() == cf_name) {
      cf_handle = cf_handles_[i];
      index = i;
      break;
    }
  }
  if (cf_handle == nullptr) {
    LOG_ERROR("Column family[%s] does not exist in RocksDB[%s]",
              cf_name.c_str(), db_path_.c_str());
    return Status::InvalidArgument();
  }

  auto options = db_->GetOptions(cf_handle);
  auto s = db_->DropColumnFamily(cf_handle);
  if (!s.ok()) {
    LOG_ERROR("Failed to drop cf[%s] in RocksDB[%s], code[%d], reason[%s]",
              cf_name.c_str(), db_path_.c_str(), s.code(),
              s.ToString().c_str());
    return Status::InternalError();
  }
  delete cf_handle;

  rocksdb::ColumnFamilyHandle *new_cf_handle{nullptr};
  s = db_->CreateColumnFamily(options, cf_name, &new_cf_handle);
  if (s.ok()) {
    cf_handles_[index] = new_cf_handle;
    LOG_DEBUG("Reset cf[%s] in RocksDB[%s]", cf_name.c_str(), db_path_.c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to create cf[%s] in RocksDB[%s], code[%d], reason[%s]",
              cf_name.c_str(), db_path_.c_str(), s.code(),
              s.ToString().c_str());
    return Status::InternalError();
  }
}


void RocksdbContext::delete_cf_handles() {
  for (auto cf : cf_handles_) {
    db_->DestroyColumnFamilyHandle(cf);
  }
  cf_handles_.clear();
}


Status RocksdbContext::compact() {
  std::lock_guard<std::mutex> lock(mutex_);

  for (auto cf : cf_handles_) {
    auto s = db_->CompactRange(compact_range_opts_, cf, nullptr, nullptr);
    if (!s.ok()) {
      LOG_ERROR("Failed to compact cf[%s] in RocksDB[%s], code[%d], reason[%s]",
                cf->GetName().c_str(), db_path_.c_str(), s.code(),
                s.ToString().c_str());
    }
  }
  auto s = db_->CompactRange(compact_range_opts_, nullptr, nullptr);
  if (s.ok()) {
    LOG_DEBUG("Compacted RocksDB[%s]", db_path_.c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to compact RocksDB[%s], code[%d], reason[%s]",
              db_path_.c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
}


size_t RocksdbContext::sst_file_size() {
  uint64_t int_num = 0;
  if (db_->GetIntProperty("rocksdb.live-sst-files-size", &int_num)) {
    return int_num;
  } else {
    return 0;
  }
}


size_t RocksdbContext::count() {
  uint64_t int_num = 0;
  if (db_->GetIntProperty("rocksdb.estimate-num-keys", &int_num)) {
    return int_num;
  } else {
    return 0;
  }
}


}  // namespace zvec

================================================
FILE: src/db/common/rocksdb_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <rocksdb/db.h>
#include <zvec/ailego/io/file.h>
#include <zvec/db/status.h>


namespace zvec {


// A very thin wrapper around RocksDB
struct RocksdbContext {
 public:
  std::unique_ptr<rocksdb::DB> db_{nullptr};
  std::string db_path_;
  bool read_only_;
  std::vector<rocksdb::ColumnFamilyHandle *> cf_handles_;
  rocksdb::Options create_opts_;
  rocksdb::WriteOptions write_opts_;
  rocksdb::ReadOptions read_opts_;
  rocksdb::FlushOptions flush_opts_;
  rocksdb::CompactRangeOptions compact_range_opts_;
  std::mutex mutex_;


 public:
  // Create a Rocksdb instance
  Status create(const std::string &db_path,
                std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);


  // Create a Rocksdb instance
  Status create(const std::string &db_path,
                const std::vector<std::string> &column_names,
                std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);


  // Open an existing Rocksdb instance
  Status open(const std::string &db_path, bool read_only = false,
              std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);


  // Open an existing Rocksdb instance
  Status open(const std::string &db_path,
              const std::vector<std::string> &column_names,
              bool read_only = false,
              std::shared_ptr<rocksdb::MergeOperator> merge_op = nullptr);


  // Close and flush data if needed
  Status close();


  // Flush data
  Status flush();


  // Create a checkpoint
  Status create_checkpoint(const std::string &checkpoint_dir);


  // Get a column family
  rocksdb::ColumnFamilyHandle *get_cf(const std::string &cf_name);


  // Create a column family
  Status create_cf(const std::string &cf_name);


  // Drop a column family
  Status drop_cf(const std::string &cf_name);


  // Reset a column family
  Status reset_cf(const std::string &cf_name);


  // Compact db
  Status compact();


  // Get the size of the SST files
  size_t sst_file_size();


  // Get the estimated number of keys in the database
  size_t count();


 private:
  using FILE = ailego::File;


  Status validate_and_set_db_path(const std::string &db_path,
                                  bool should_exist);

  void prepare_options(std::shared_ptr<rocksdb::MergeOperator> merge_op);

  Status flush_unlocked();

  void delete_cf_handles();
};


}  // namespace zvec

================================================
FILE: src/db/common/status.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <unordered_map>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/status.h>

namespace zvec {

const char *GetDefaultMessage(StatusCode code) {
  static const std::unordered_map<StatusCode, const char *> kMessages = {
      {StatusCode::OK, "OK"},
      {StatusCode::NOT_FOUND, "Not found"},
      {StatusCode::ALREADY_EXISTS, "Already exists"},
      {StatusCode::INVALID_ARGUMENT, "Invalid argument"},
      {StatusCode::PERMISSION_DENIED, "Permission denied"},
      {StatusCode::FAILED_PRECONDITION, "Failed precondition"},
      {StatusCode::RESOURCE_EXHAUSTED, "Resource exhausted"},
      {StatusCode::UNAVAILABLE, "Unavailable"},
      {StatusCode::INTERNAL_ERROR, "Internal error"},
      {StatusCode::NOT_SUPPORTED, "Not supported"},
      {StatusCode::UNKNOWN, "Unknown error"}};
  auto it = kMessages.find(code);
  return it != kMessages.end() ? it->second : "Unknown status code";
}

// Implementation of operator<<
std::ostream &operator<<(std::ostream &os, const Status &s) {
  if (s.ok()) {
    os << "OK";
  } else {
    os << "Status(" << GetDefaultMessage(s.code()) << ", " << s.message()
       << ")";
  }
  return os;
}

// Implementation of comparison
bool Status::operator==(const Status &other) const noexcept {
  if (code_ != other.code_) return false;
  if (code_ == StatusCode::OK) return true;
  return msg_ == other.msg_;
}

}  // namespace zvec

================================================
FILE: src/db/common/typedef.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/logger/logger.h>
#include "error_code.h"

using idx_t = uint64_t;

#define PROXIMA_DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName &) = delete;             \
  TypeName &operator=(const TypeName &) = delete;


#define COLLECTION_FORMAT " collection[%s] "

#define CLOG_DEBUG(format, ...) \
  LOG_DEBUG(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())

#define CLOG_INFO(format, ...) \
  LOG_INFO(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())

#define CLOG_WARN(format, ...) \
  LOG_WARN(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())

#define CLOG_ERROR(format, ...) \
  LOG_ERROR(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())

#define CLOG_FATAL(format, ...) \
  LOG_FATAL(format COLLECTION_FORMAT, ##__VA_ARGS__, collection_name().c_str())

#define ELOG_ERROR(format, ...) \
  LOG_ERROR(format " errno[%s] ", ##__VA_ARGS__, std::strerror(errno))

#define WAL_FORMAT " wal_path_[%s] "

#define WLOG_DEBUG(format, ...) \
  LOG_DEBUG(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())

#define WLOG_INFO(format, ...) \
  LOG_INFO(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())


#define WLOG_WARN(format, ...) \
  LOG_WARN(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())

#define WLOG_ERROR(format, ...) \
  LOG_ERROR(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())

#define WLOG_FATAL(format, ...) \
  LOG_FATAL(format WAL_FORMAT, ##__VA_ARGS__, wal_path_.c_str())

#define SEGMENT_FORMAT " segment[%zu] collection[%s] "

#define SLOG_DEBUG(format, ...)                                         \
  LOG_DEBUG(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \
            collection_name().c_str())

#define SLOG_INFO(format, ...)                                         \
  LOG_INFO(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \
           collection_name().c_str())

#define SLOG_WARN(format, ...)                                         \
  LOG_WARN(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \
           collection_name().c_str())

#define SLOG_ERROR(format, ...)                                         \
  LOG_ERROR(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \
            collection_name().c_str())

#define SLOG_FATAL(format, ...)                                         \
  LOG_FATAL(format SEGMENT_FORMAT, ##__VA_ARGS__, (size_t)segment_id(), \
            collection_name().c_str())

#define COLUMN_FORMAT " column[%s] segment[%zu] collection[%s] "

#define LLOG_DEBUG(format, ...)                                         \
  LOG_DEBUG(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \
            (size_t)segment_id(), collection_name().c_str())

#define LLOG_INFO(format, ...)                                         \
  LOG_INFO(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \
           (size_t)segment_id(), collection_name().c_str())

#define LLOG_WARN(format, ...)                                         \
  LOG_WARN(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \
           (size_t)segment_id(), collection_name().c_str())

#define LLOG_ERROR(format, ...)                                         \
  LOG_ERROR(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \
            (size_t)segment_id(), collection_name().c_str())

#define LLOG_FATAL(format, ...)                                         \
  LOG_FATAL(format COLUMN_FORMAT, ##__VA_ARGS__, column_name().c_str(), \
            (size_t)segment_id(), collection_name().c_str())

#define CHECK_STATUS(status, expect)                                         \
  if (status != expect) {                                                    \
    LOG_ERROR("Check status failed. status[%d] expect[%d]", status, expect); \
    return PROXIMA_ZVEC_ERROR_CODE(StatusError);                             \
  }

#define CHECK_STATUS_CLOSURE(status, expect)                                 \
  if (status != expect) {                                                    \
    LOG_ERROR("Check status failed. status[%d] expect[%d]", status, expect); \
    done->set_code(PROXIMA_ZVEC_ERROR_CODE(StatusError));                    \
    return;                                                                  \
  }

#define CHECK_RETURN(ret, expect_ret) \
  if (ret != expect_ret) {            \
    return ret;                       \
  }

#define CHECK_RETURN_WITH_LOG(ret, expect_ret, format, ...) \
  if (ret != expect_ret) {                                  \
    LOG_ERROR(format, ##__VA_ARGS__);                       \
    return ret;                                             \
  }

#define CHECK_RETURN_WITH_CLOG(ret, expect_ret, format, ...) \
  if (ret != expect_ret) {                                   \
    CLOG_ERROR(format, ##__VA_ARGS__);                       \
    return ret;                                              \
  }

#define CHECK_RETURN_WITH_SLOG(ret, expect_ret, format, ...) \
  if (ret != expect_ret) {                                   \
    SLOG_ERROR(format, ##__VA_ARGS__);                       \
    return ret;                                              \
  }

#define CHECK_RETURN_WITH_LLOG(ret, expect_ret, format, ...) \
  if (ret != expect_ret) {                                   \
    LLOG_ERROR(format, ##__VA_ARGS__);                       \
    return ret;                                              \
  }

#define CHECK_DESTROY_RETURN_STATUS(status, expect)                     \
  if (status != expect) {                                               \
    LOG_ERROR("Collection[%s] is already destroyed.",                   \
              schema_->name().c_str());                                 \
    return Status::InvalidArgument("collection is already destroyed."); \
  }

#define CHECK_DESTROY_RETURN_STATUS_EXPECTED(status, expect)          \
  if (status != expect) {                                             \
    LOG_ERROR("Collection[%s] is already destroyed.",                 \
              schema_->name().c_str());                               \
    return tl::make_unexpected(                                       \
        Status::InvalidArgument("collection is already destroyed.")); \
  }

#define CHECK_RETURN_STATUS(status) \
  if (!status.ok()) {               \
    return status;                  \
  }

#define CHECK_RETURN_STATUS_EXPECTED(status) \
  if (!status.ok()) {                        \
    return tl::make_unexpected(status);      \
  }

#define CHECK_COLLECTION_READONLY_RETURN_STATUS \
  CHECK_READONLY_RETURN_STATUS(Collection)

#define CHECK_SEGMENT_READONLY_RETURN_STATUS \
  CHECK_READONLY_RETURN_STATUS(Segment)

#define CHECK_READONLY_RETURN_STATUS(type)                \
  if (options_.read_only_) {                              \
    return Status::InvalidArgument(#type                  \
                                   " is "                 \
                                   "opened in read-only " \
                                   "mode");               \
  }

#define CHECK_COLLECTION_READONLY_RETURN_STATUS_EXPECTED \
  CHECK_READONLY_RETURN_STATUS_EXPECTED(Collection)

#define CHECK_SEGMENT_READONLY_RETURN_STATUS_EXPECTED \
  CHECK_READONLY_RETURN_STATUS_EXPECTED(Segment)

#define CHECK_READONLY_RETURN_STATUS_EXPECTED(type)                           \
  if (options_.read_only_) {                                                  \
    return tl::make_unexpected(Status::InvalidArgument(#type                  \
                                                       " is "                 \
                                                       "opened in read-only " \
                                                       "mode"));              \
  }


================================================
FILE: src/db/common/utils.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "utils.h"

namespace zvec {

std::string indent(int level) {
  return std::string(level * 2, ' ');
}

}  // namespace zvec

================================================
FILE: src/db/common/utils.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>

namespace zvec {
std::string indent(int level);

}  // namespace zvec

================================================
FILE: src/db/index/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME zvec_index STATIC STRICT
    SRCS *.cc segment/*.cc column/vector_column/*.cc column/inverted_column/*.cc storage/*.cc storage/wal/*.cc common/*.cc
    LIBS zvec_common
         zvec_proto
         rocksdb
         core_interface
         Arrow::arrow_static
         Arrow::arrow_compute
         Arrow::arrow_dataset
    INCS .  ${PROJECT_ROOT_DIR}/src
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/db/index/column/column_indexer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>

namespace zvec {
class ColumnIndexer {
 public:
  using Ptr = std::shared_ptr<ColumnIndexer>;
};

}  // namespace zvec

================================================
FILE: src/db/index/column/common/index_results.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include "db/common/typedef.h"
#include "db/index/column/vector_column/vector_column_params.h"

namespace zvec {

class IndexResults {
 public:
  using Ptr = std::shared_ptr<IndexResults>;
  class Iterator {
   public:
    virtual ~Iterator() = default;

    virtual idx_t doc_id() const = 0;

    virtual float score() const = 0;

    virtual void next() = 0;

    virtual bool valid() const = 0;

    virtual const std::string &group_id() const {
      return kEmpty;
    }

    virtual const vector_column_params::VectorData vector() const {
      return vector_column_params::VectorData{};
    }

    bool is_sparse() const {
      return is_sparse_;
    }
    bool set_is_sparse(bool is_sparse) {
      is_sparse_ = is_sparse;
      return true;
    }

   private:
    bool is_sparse_{false};
    inline static const std::string kEmpty{""};
  };
  using IteratorUPtr = std::unique_ptr<IndexResults::Iterator>;

 public:
  virtual ~IndexResults() = default;

  virtual size_t count() const = 0;

  virtual IteratorUPtr create_iterator() = 0;
};


}  // namespace zvec


================================================
FILE: src/db/index/column/inverted_column/inverted_codec.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <string>
#include <roaring/roaring.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"


namespace zvec {


/*
 *
 * This class provides encoding/decoding functionality for inverted index data.
 *
 *
 * RocksDB stores key-value pairs as byte strings. Therefore, all data types
 * must be converted to a string format for storage.
 *
 * To ensure correct lexicographic sorting of numeric values (integers and
 * floating-point numbers), we need to transform them so that their byte
 * representation sorts in the same order as their numerical values.
 *
 *
 * The codec also manages storage of mapped document IDs, using either list or
 * bitmap structures depending on the data size for optimal storage efficiency.
 *
 */
class InvertedIndexCodec {
 public:
  static std::string Encode(const std::string &term, DataType type) {
    switch (type) {
      case DataType::INT32:
      case DataType::INT64: {
        auto result = term;
        convert_to_big_endian(result.data(), result.size());
        // For signed, reverse sign bit, set positive to 1, negative to 0
        result[0] ^= 0x80;
        return result;
      }
      case DataType::UINT32:
      case DataType::UINT64: {
        auto result = term;
        convert_to_big_endian(result.data(), result.size());
        return result;
      }
      case DataType::FLOAT:
      case DataType::DOUBLE: {
        auto result = term;
        convert_to_big_endian(result.data(), result.size());
        // If float is negative, negate each byte; else reverse sign bit
        if ((result[0] & 0x80) > 0) {
          for (size_t i = 0; i < result.size(); i++) {
            result[i] = ~result[i];
          }
        } else {
          result[0] ^= 0x80;
        }
        return result;
      }
      default:
        return term;
    }
  }


  static std::string Encode(const std::string_view &term, DataType type) {
    switch (type) {
      case DataType::INT32:
      case DataType::INT64: {
        std::string result(term);
        convert_to_big_endian(result.data(), result.size());
        // For signed, reverse sign bit, set positive to 1, negative to 0
        result[0] ^= 0x80;
        return result;
      }
      case DataType::UINT32:
      case DataType::UINT64: {
        std::string result(term);
        convert_to_big_endian(result.data(), result.size());
        return result;
      }
      case DataType::FLOAT:
      case DataType::DOUBLE: {
        std::string result(term);
        convert_to_big_endian(result.data(), result.size());
        // If float is negative, negate each byte; else reverse sign bit
        if ((result[0] & 0x80) > 0) {
          for (size_t i = 0; i < result.size(); i++) {
            result[i] = ~result[i];
          }
        } else {
          result[0] ^= 0x80;
        }
        return result;
      }
      default:
        return std::string(term);
    }
  }


  static std::string Encode(bool value) {
    if (value) {
      return "true";
    } else {
      return "false";
    }
  }


  static std::string Encode_Reversed(const std::string &term) {
    std::string reversed = term;
    std::reverse(reversed.begin(), reversed.end());
    return reversed;
  }


  // Format of range key:
  // [range_begin_key][separator_byte][range_end_key][range_begin_key_size]
  static void Decode_Range_Key(const char *range_key_ptr, size_t range_key_size,
                               char **range_begin_pos,
                               size_t *range_begin_key_size,
                               char **range_end_pos,
                               size_t *range_end_key_size) {
    *range_begin_key_size =
        *(uint64_t *)(range_key_ptr + (range_key_size - sizeof(uint64_t)));
    *range_begin_pos = (char *)range_key_ptr;

    *range_end_key_size =
        range_key_size - sizeof(uint64_t) - (*range_begin_key_size) - 1;
    *range_end_pos = (char *)(range_key_ptr + (*range_begin_key_size) + 1);
  }


  // Return negative number if s1 < s2, positive number if s1 > s2, 0 if equal
  static int CMP(const char *s1, size_t s1_len, const char *s2, size_t s2_len) {
    size_t min_len = std::min(s1_len, s2_len);
    int r = memcmp(s1, s2, min_len);
    if (r == 0) {
      if (s1_len < s2_len)
        r = -1;
      else if (s1_len > s2_len)
        r = +1;
    }
    return r;
  }


  static bool Has_Prefix(const char *value, size_t value_len,
                         const char *prefix, size_t prefix_len) {
    if (value_len < prefix_len) {
      return false;
    }
    return memcmp(value, prefix, prefix_len) == 0;
  }


  static Status Serialize(roaring_bitmap_t *bitmap, std::string *out) {
    if (!bitmap) {
      LOG_ERROR("Invalid bitmap pointer");
      return Status::InvalidArgument();
    }
    if (!out) {
      LOG_ERROR("Invalid output pointer");
      return Status::InvalidArgument();
    }
    out->clear();

    uint64_t count = roaring_bitmap_get_cardinality(bitmap);
    if (count == 0) {
      LOG_ERROR("Bitmap is empty");
      return Status::InternalError();
    } else if (count > INVERT_ID_LIST_SIZE_THRESHOLD) {
      return serialize_bitmap(bitmap, out);
    } else {
      return serialize_docid_list(bitmap, out);
    }
  }


  static Status Deserialize(const char *data, size_t size,
                            roaring_bitmap_t **bitmap) {
    if (!data || size == 0) {
      LOG_ERROR("Input data is invalid");
      return Status::InvalidArgument();
    }

    unsigned char header = data[0];
    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {
      LOG_ERROR("Invalid header found in inverted data");
      return Status::InternalError();
    }

    if (header == 0) {  // This is a bitmap
      *bitmap = roaring_bitmap_portable_deserialize_safe(data + 1, size - 1);
      if (*bitmap) {
        return Status::OK();
      } else {
        LOG_ERROR("Failed to deserialize bitmap");
        return Status::InternalError();
      }
    }

    // This is a id list
    if ((size - 1) != header * sizeof(uint32_t)) {
      LOG_ERROR("Failed to deserialize docid_list");
      return Status::InternalError();
    }
    *bitmap = roaring_bitmap_create();
    if (*bitmap == nullptr) {
      LOG_ERROR("Failed to create bitmap");
      return Status::InternalError();
    }
    for (size_t i = 1; i < size; i += sizeof(uint32_t)) {
      roaring_bitmap_add(*bitmap,
                         *reinterpret_cast<const uint32_t *>(data + i));
    }
    return Status::OK();
  }


  static Status Merge_OR(const char *data, size_t size, bool lazy,
                         roaring_bitmap_t *bitmap) {
    if (!data || size == 0) {
      LOG_ERROR("Input data is invalid");
      return Status::InvalidArgument();
    }

    unsigned char header = data[0];
    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {
      LOG_ERROR("Invalid header found in inverted data");
      return Status::InternalError();
    }

    if (header == 0) {  // This is a bitmap
      auto bitmap_other = roaring_bitmap_portable_deserialize_frozen(data + 1);
      if (!bitmap_other) {
        LOG_ERROR("Failed to deserialize bitmap");
        return Status::InternalError();
      }
      if (lazy) {
        roaring_bitmap_lazy_or_inplace(bitmap, bitmap_other, true);
      } else {
        roaring_bitmap_or_inplace(bitmap, bitmap_other);
      }
      roaring_bitmap_free(bitmap_other);
      return Status::OK();
    }

    // This is a id list
    if ((size - 1) != header * sizeof(uint32_t)) {
      LOG_ERROR("Failed to deserialize docid_list");
      return Status::InternalError();
    }
    auto doc_list = reinterpret_cast<const uint32_t *>(data + 1);
    for (size_t i = 0; i < header; ++i) {
      roaring_bitmap_add(bitmap, doc_list[i]);
    }
    return Status::OK();
  }


  static Status Merge_AND(const char *data, size_t size,
                          roaring_bitmap_t *bitmap) {
    if (!data || size == 0) {
      LOG_ERROR("Input data is invalid");
      return Status::InvalidArgument();
    }

    unsigned char header = data[0];
    if (header > INVERT_ID_LIST_SIZE_THRESHOLD) {
      LOG_ERROR("Invalid header found in inverted data");
      return Status::InternalError();
    }

    if (header == 0) {  // This is a bitmap
      auto bitmap_other = roaring_bitmap_portable_deserialize_frozen(data + 1);
      if (!bitmap_other) {
        LOG_ERROR("Failed to deserialize bitmap");
        return Status::InternalError();
      }
      roaring_bitmap_and_inplace(bitmap, bitmap_other);
      roaring_bitmap_free(bitmap_other);
      return Status::OK();
    }

    // This is a id list
    if ((size - 1) != header * sizeof(uint32_t)) {
      LOG_ERROR("Failed to deserialize docid_list");
      return Status::InternalError();
    }
    auto doc_list = reinterpret_cast<const uint32_t *>(data + 1);
    uint32_t tmp = 0;
    for (size_t i = 0; i < header; ++i) {
      tmp |= roaring_bitmap_contains(bitmap, doc_list[i]) << i;
    }
    roaring_bitmap_clear(bitmap);
    for (size_t i = 0; i < header; ++i) {
      if (tmp & (1 << i)) {
        roaring_bitmap_add(bitmap, doc_list[i]);
      }
    }
    return Status::OK();
  }


 private:
  static void convert_to_big_endian(char *in, size_t size) {
    static const bool isBigEndianSystem = []() {
      int i = 0x1243;
      char *ch = (char *)&i;
      return (*ch == 0x12);
    }();

    if (isBigEndianSystem) {
      return;
    }

    char *p = in;
    for (size_t i = 0; i < size / 2; ++i) {
      std::swap(p[i], p[size - i - 1]);
    }
  }


  static Status serialize_bitmap(const roaring_bitmap_t *bitmap,
                                 std::string *out) {
    size_t bitmap_size = roaring_bitmap_portable_size_in_bytes(bitmap);
    out->resize(1 + bitmap_size);

    // Set the first byte with value 0, indicating the data is a bitmap
    (*out)[0] = static_cast<char>(0);
    size_t written_size = roaring_bitmap_portable_serialize(
        bitmap, const_cast<char *>(out->data()) + 1);
    if (written_size == bitmap_size) {
      return Status::OK();
    } else {
      LOG_ERROR("Failed to serialize bitmap");
      return Status::InternalError();
    }
  }


  static Status serialize_docid_list(const roaring_bitmap_t *bitmap,
                                     std::string *out) {
    auto doc_count = roaring_bitmap_get_cardinality(bitmap);
    out->reserve(1 + doc_count * sizeof(uint32_t));
    // Adds a single byte at the beginning indicating the count of document IDs
    out->append(1, static_cast<unsigned char>(doc_count));

    auto iter = roaring_create_iterator(bitmap);
    while (iter->has_value) {
      out->append(reinterpret_cast<const char *>(&(iter->current_value)),
                  sizeof(uint32_t));
      roaring_advance_uint32_iterator(iter);
    }
    roaring_free_uint32_iterator(iter);
    return Status::OK();
  }
};


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_column_indexer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/db/schema.h>
#include "db/common/concurrent_roaring_bitmap.h"
#include "db/common/rocksdb_context.h"
#include "inverted_codec.h"
#include "inverted_doc_range.h"
#include "inverted_search_result.h"


namespace zvec {


//
// An inverted column indexer manages document-term indexing in two states:
// 1. Streaming:
//      - allows insertion of new terms and indexing of document-term pairs
// 2. Sealed:
//      - read-only, no further writes permitted
//      - statistical index generated for optimized search performance
//
//
// InvertedColumnIndexer requires document IDs to be sequential integers
// starting from 0 without any gaps.
// While documents can be inserted in any order, the complete sequence from 0 to
// max id must be present before sealing the index.
//
// Multiple inverted column indexers share the same RocksDB instance but each
// indexer uses its own RocksDB column families within that shared RocksDB
// instance. This design allows for efficient resource utilization while
// maintaining data separation between different columns.
//
class InvertedColumnIndexer {
 public:
  using Ptr = std::shared_ptr<InvertedColumnIndexer>;


  static Ptr CreateAndOpen(const std::string &collection_name,
                           const FieldSchema &field, RocksdbContext &context,
                           bool read_only = false);


  virtual ~InvertedColumnIndexer();


 protected:
  explicit InvertedColumnIndexer(const std::string &collection_name,
                                 const FieldSchema &field,
                                 RocksdbContext &context, bool read_only)
      : collection_name_(collection_name),
        field_(field),
        path_(context.db_path_),
        ctx_(context),
        read_only_(read_only) {};

  InvertedColumnIndexer(const InvertedColumnIndexer &) = delete;
  InvertedColumnIndexer(InvertedColumnIndexer &&) = delete;
  InvertedColumnIndexer &operator=(const InvertedColumnIndexer &) = delete;
  InvertedColumnIndexer &operator=(InvertedColumnIndexer &&) = delete;


  // TODO： for ut, remove this
  InvertedColumnIndexer(RocksdbContext &ctx) : ctx_(ctx) {}


 public:
  /*!
   * \brief Search for documents matching the given value and operation
   * \param value The value to compare against (e.g., "5", "10")
   * \param op The comparison operation (e.g., EQ, GT, LT)
   * \return Pointer to search results containing matching documents
   */
  virtual InvertedSearchResult::Ptr search(const std::string &value,
                                           CompareOp op) const;


  /*!
   * \brief Search for documents matching multiple values
   * \param values List of values to compare against (e.g., {"5", "10", "15"})
   * \param comp_op The comparison operation to apply (e.g., CONTAIN_ANY)
   * \return Pointer to search results containing matching documents
   */
  virtual InvertedSearchResult::Ptr multi_search(
      const std::vector<std::string> &values, CompareOp op) const;


  /*!
   * \brief Search for documents matching array length
   * \param len The array length value to compare against
   * \param op The comparison operation to apply (e.g., EQ)
   * \return Pointer to search results containing matching documents
   */
  virtual InvertedSearchResult::Ptr search_array_len(uint32_t len,
                                                     CompareOp op) const;


  /*!
   * \brief Search for documents that have null values
   * \return Pointer to search results containing matching documents
   */
  virtual InvertedSearchResult::Ptr search_null() const;


  /*!
   * \brief Search for documents that have non-null values
   * \return Pointer to search results containing matching documents
   */
  virtual InvertedSearchResult::Ptr search_non_null() const;


  /*!
   * \brief Evaluate the ratio of matching documents compared to total documents
   * \param value The value to compare against (e.g., "5", "10")
   * \param op The comparison operation (e.g., EQ, GT, LT)
   * \param total_size Pointer to store the total number of documents
   * \param range_size Pointer to store the number of matching documents
   * \return Status indicating success or failure of the evaluation
   */
  virtual Status evaluate_ratio(const std::string &value, CompareOp op,
                                uint64_t *total_size,
                                uint64_t *range_size) const;


  /*!
   * \brief Insert a document-term pair into the inverted index
   * \param id The document ID to insert
   * \param value The string-encoded representation of the value to index. This
   *              parameter may contain either a single value or an array of
   *              values depending on the field type. The underlying data type
   *              might differ from std::string - the string serves as a generic
   *              serialization buffer for the actual typed data.
   * \return Status indicating success or failure of the insert operation
   */
  Status insert(uint32_t id, const std::string &value);


  /*!
   * \brief Insert a document with multiple strings into the inverted index
   * \param id The document ID to insert
   * \param values Multiple string values to index
   * \return Status indicating success or failure of the insert operation
   */
  Status insert(uint32_t id, const std::vector<std::string> &values);


  /*!
   * \brief Insert a document with a boolean value into the inverted index
   * \param id The document ID to insert
   * \param value The boolean value to index
   * \return Status indicating success or failure of the insert operation
   */
  Status insert(uint32_t id, bool value);


  /*!
   * \brief Insert a document with multiple booleans into the inverted index
   * \param id The document ID to insert
   * \param value Multiple boolean values to index
   * \return Status indicating success or failure of the insert operation
   */
  Status insert(uint32_t id, const std::vector<bool> &values);


  /*!
   * \brief Insert a document with null value into the inverted index
   * \param id The document ID to insert
   * \return Status indicating success or failure of the insert operation
   */
  Status insert_null(uint32_t id);


  /*!
   * \brief Serialize special values, e.g., null-value bitmap and max doc id
   * \return Status indicating success or failure of the serialization
   */
  Status flush_special_values();


  /*!
   * \brief Seal the index and generate statistical indexes
   * \return Status indicating success or failure of the operation
   */
  Status seal();


  /*!
   * \brief Check if the index is sealed
   * \return True if the index is sealed, false otherwise
   */
  inline bool is_sealed() const {
    return sealed_;
  }


  /*!
   * \brief Drop the index storage
   * \return Status indicating success or failure of the operation
   */
  Status drop_storage();


  /*!
   * \brief Get the name of the corresponding collection
   * \return The name of the corresponding collection
   */
  inline const std::string &collection_name() const {
    return collection_name_;
  }


  inline const std::string ID() const {
    return "InvertedColumnIndexer[collection:" + collection_name_ +
           "|field:" + field_.name() + "|path:'" + path_ + "']";
  }


 private:
  using Slice = rocksdb::Slice;
  using PinnableSlice = rocksdb::PinnableSlice;


  Status open();

  inline std::string encode(const std::string &term) const {
    return InvertedIndexCodec::Encode(term, field_.element_data_type());
  }

  inline std::vector<std::string> encode_array(const std::string &terms) const {
    std::vector<std::string> result{};
    size_t s = field_.element_data_size();
    if (s == 0) {
      return result;
    }
    size_t num_terms = terms.size() / s;
    result.reserve(num_terms);
    for (size_t i = 0; i < num_terms; ++i) {
      std::string_view sv(terms.data() + (i * s), s);
      result.emplace_back(
          InvertedIndexCodec::Encode(sv, field_.element_data_type()));
    }
    return result;
  }

  inline std::vector<std::string> encode(
      const std::vector<std::string> &terms) const {
    std::vector<std::string> result;
    result.reserve(terms.size());
    for (auto &term : terms) {
      result.emplace_back(encode(term));
    }
    return result;
  }

  inline std::string encode(bool value) {
    return InvertedIndexCodec::Encode(value);
  }

  inline std::string encode_reversed(const std::string &term) const {
    return InvertedIndexCodec::Encode_Reversed(term);
  }

  inline int cmp(const char *s1, size_t s1_len, const char *s2,
                 size_t s2_len) const {
    return InvertedIndexCodec::CMP(s1, s1_len, s2, s2_len);
  }

  inline bool cmp_lt(const char *s1, size_t s1_len, const char *s2,
                     size_t s2_len, bool include_eq) const {
    int ret = InvertedIndexCodec::CMP(s1, s1_len, s2, s2_len);
    return (include_eq && ret <= 0) || (!include_eq && ret < 0);
  }

  inline bool has_prefix(const char *value, size_t value_len,
                         const char *prefix, size_t prefix_len) const {
    return InvertedIndexCodec::Has_Prefix(value, value_len, prefix, prefix_len);
  }

  inline void update_max_id(uint32_t id) {
    uint32_t expected_id = max_id_.load();
    while (expected_id < id &&
           !max_id_.compare_exchange_weak(expected_id, id)) {
      ;
    }
  }

  inline Status estimate_range_ratio(const std::string &term, CompareOp op,
                                     uint64_t *total_count,
                                     uint64_t *matching_count) const;

  inline bool range_covers_most_values(const std::string &term,
                                       CompareOp op) const;

  inline roaring_bitmap_t *flip_bitmap(roaring_bitmap_t *bitmap) const;

  Result<roaring_bitmap_t *> get_bitmap_eq(const std::string &term) const;

  Result<roaring_bitmap_t *> get_bitmap_contain(
      const std::vector<std::string> &terms, bool is_any) const;

  Result<roaring_bitmap_t *> get_bitmap_ne(const std::string &term) const;

  Result<roaring_bitmap_t *> get_bitmap_not_contain(
      const std::vector<std::string> &terms, bool is_any) const;

  Result<roaring_bitmap_t *> get_bitmap_lt(const std::string &term,
                                           bool include_eq) const;

  Result<roaring_bitmap_t *> get_bitmap_gt(const std::string &term,
                                           bool include_eq) const;

  Result<roaring_bitmap_t *> get_bitmap_array_len_eq(uint32_t len) const;

  Result<roaring_bitmap_t *> get_bitmap_array_len_ne(uint32_t len) const;

  Result<roaring_bitmap_t *> get_bitmap_array_len_lt(uint32_t len,
                                                     bool include_eq) const;

  Result<roaring_bitmap_t *> get_bitmap_array_len_gt(uint32_t len,
                                                     bool include_eq) const;

  Result<roaring_bitmap_t *> get_bitmap_like(std::string term) const;

  Result<roaring_bitmap_t *> get_bitmap_prefix(const std::string &term) const;

  Result<roaring_bitmap_t *> get_bitmap_suffix(const std::string &term) const;

  Result<roaring_bitmap_t *> get_bitmap_null() const;

  Result<roaring_bitmap_t *> get_bitmap_non_null() const;

  rocksdb::Status index_array_len(uint32_t id, uint32_t len);

  Status generate_statistical_indexes();


 private:
  inline std::string cf_name_terms() const {
    return field_.name() + INVERT_SUFFIX_TERMS;
  };

  inline std::string cf_name_reversed_terms() const {
    return field_.name() + INVERT_SUFFIX_REVERSED_TERMS;
  };

  inline std::string cf_name_array_len() const {
    return field_.name() + INVERT_SUFFIX_ARRAY_LEN;
  };

  inline std::string cf_name_ranges() const {
    return field_.name() + INVERT_SUFFIX_RANGES;
  };

  inline std::string cf_name_cdf() const {
    return INVERT_CDF;
  };

  inline std::string key_max_id() const {
    return field_.name() + INVERT_KEY_MAX_ID;
  };

  inline std::string key_null() const {
    return field_.name() + INVERT_KEY_NULL;
  };

  inline std::string key_sealed() const {
    return field_.name() + INVERT_KEY_SEALED;
  };

  inline bool allow_range_optimization(const FieldSchema &field) const {
    bool not_allowed =
        field.is_array_type() || field.data_type() == DataType::BOOL;
    return !not_allowed;
  }

  inline bool allow_extended_wildcard(const FieldSchema &field) const {
    return field.data_type() == DataType::STRING;
  }


 private:
  const std::string collection_name_{};
  const FieldSchema field_{};
  const std::string path_{};


  // Column families:
  // 1. cf_terms_:              Inverted index for terms
  // 2. cf_reversed_terms_:     Inverted index for reversed terms
  // 3. cf_array_len_:          Inverted index for array length
  // 4. cf_ranges_:             Range index
  // 5. cf_cdf_:                Cumulative distribution function
  // 6. default cf:             Stores special values
  //                              - null-value bitmap
  //                              - max id
  //                              - is_sealed
  //
  // Some column families are optional and may be nullptr.
  // For example, cf_ranges_ is nullptr when the indexer is not sealed (range
  // index not yet generated) or when range optimization is explicitly disabled.
  RocksdbContext &ctx_;
  rocksdb::ColumnFamilyHandle *cf_terms_{nullptr};
  rocksdb::ColumnFamilyHandle *cf_reversed_terms_{nullptr};
  rocksdb::ColumnFamilyHandle *cf_array_len_{nullptr};
  rocksdb::ColumnFamilyHandle *cf_ranges_{nullptr};
  rocksdb::ColumnFamilyHandle *cf_cdf_{nullptr};


  bool read_only_{false};
  bool sealed_{false};
  bool enable_range_optimization_{false};
  bool enable_extended_wildcard_{false};
  std::atomic<uint32_t> max_id_{0};
  ConcurrentRoaringBitmap32 null_bitmap_{};
  SegmentDocRangeStat::Ptr doc_range_stat_{nullptr};
};


};  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_column_indexer_search.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <optional>
#include <ailego/pattern/defer.h>
#include <zvec/ailego/pattern/expected.hpp>
#include "inverted_codec.h"
#include "inverted_column_indexer.h"


namespace zvec {


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_eq(
    const std::string &term) const {
  PinnableSlice bitmap_slice;
  auto s = ctx_.db_->Get(ctx_.read_opts_, cf_terms_, term, &bitmap_slice);
  if (!s.ok()) {
    if (s.code() == rocksdb::Status::kNotFound) {
      return nullptr;
    }
    LOG_ERROR(
        "Failed to retrieve data for term[%s] from %s, code[%d], reason[%s]",
        term.c_str(), ID().c_str(), s.code(), s.ToString().c_str());
    return tl::make_unexpected(Status::InternalError());
  }

  roaring_bitmap_t *bitmap{nullptr};
  Status status = InvertedIndexCodec::Deserialize(bitmap_slice.data(),
                                                  bitmap_slice.size(), &bitmap);
  if (status.ok()) {
    return bitmap;
  } else {
    LOG_ERROR(
        "Failed to deserialize bitmap for term[%s] from %s, bitmap size[%zu]",
        term.c_str(), ID().c_str(), bitmap_slice.size());
    return tl::make_unexpected(Status::InternalError());
  }
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_contain(
    const std::vector<std::string> &terms, bool is_any) const {
  if (terms.empty()) {
    LOG_ERROR("Terms is empty");
    return tl::make_unexpected(Status::InvalidArgument());
  }

  // Shall we sort the terms here? Does it make any difference in performance?
  std::vector<Slice> slice_terms(terms.begin(), terms.end());
  std::vector<PinnableSlice> bitmap_slices;
  bitmap_slices.resize(terms.size());
  std::vector<rocksdb::Status> statuses;
  statuses.resize(terms.size());
  ctx_.db_->MultiGet(ctx_.read_opts_, cf_terms_, slice_terms.size(),
                     slice_terms.data(), bitmap_slices.data(), statuses.data(),
                     false);

  roaring_bitmap_t *bitmap{nullptr};
  Status s = Status::OK();
  AILEGO_DEFER([&]() {
    if (!s.ok() && bitmap) {
      roaring_bitmap_free(bitmap);
    }
  });

  auto init_or_merge_at_i = [&](size_t i) {
    if (bitmap == nullptr) {
      s = InvertedIndexCodec::Deserialize(bitmap_slices[i].data(),
                                          bitmap_slices[i].size(), &bitmap);
      if (!s.ok()) {
        LOG_ERROR("Failed to deserialize bitmap for term[%s] from %s",
                  terms[i].c_str(), ID().c_str());
      }
      return;
    }

    if (is_any) {
      s = InvertedIndexCodec::Merge_OR(bitmap_slices[i].data(),
                                       bitmap_slices[i].size(), true, bitmap);
    } else {
      s = InvertedIndexCodec::Merge_AND(bitmap_slices[i].data(),
                                        bitmap_slices[i].size(), bitmap);
    }
    if (!s.ok()) {
      LOG_ERROR("Failed to merge bitmap for term[%s] from %s", terms[i].c_str(),
                ID().c_str());
    }
  };

  for (size_t i = 0; i < terms.size(); i++) {
    if (statuses[i].ok()) {
      init_or_merge_at_i(i);
      if (!s.ok()) {
        return tl::make_unexpected(s);
      }
    } else if (statuses[i].code() == rocksdb::Status::kNotFound) {
      if (!is_any) {  // For contain_all, if any term is not found, return empty
        s = Status::NotFound();
        return nullptr;
      }
    } else {
      LOG_ERROR(
          "Failed to retrieve data for term[%s] from %s, code[%d], reason[%s]",
          terms[i].c_str(), ID().c_str(), statuses[i].code(),
          statuses[i].ToString().c_str());
      s = Status::InternalError();
      return tl::make_unexpected(s);
    }
  }

  if (is_any && bitmap) {
    roaring_bitmap_repair_after_lazy(bitmap);
  }
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_ne(
    const std::string &term) const {
  if (sealed_) {
    auto ret = get_bitmap_eq(term);
    if (ret) {
      ret = flip_bitmap(ret.value());
    } else {
      LOG_ERROR("Failed to retrieve bitmap for term[%s] from %s", term.c_str(),
                ID().c_str());
    }
    return ret;
  } else {
    roaring_bitmap_t *bitmap = roaring_bitmap_create();
    if (!bitmap) {
      LOG_ERROR("Failed to create bitmap");
      return tl::make_unexpected(Status::InternalError());
    }
    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
    AILEGO_DEFER([&]() { delete iter; });
    Status s;
    iter->SeekToFirst();
    while (iter->Valid()) {
      if (iter->key() == term) {
        iter->Next();
        continue;
      }
      s = InvertedIndexCodec::Merge_OR(iter->value().data(),
                                       iter->value().size(), true, bitmap);
      if (s.ok()) {
        iter->Next();
      } else {
        roaring_bitmap_free(bitmap);
        LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
        return tl::make_unexpected(s);
      }
    }
    roaring_bitmap_repair_after_lazy(bitmap);
    return bitmap;
  }
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_not_contain(
    const std::vector<std::string> &terms, bool is_any) const {
  if (terms.empty()) {
    LOG_ERROR("Terms is empty");
    return tl::make_unexpected(Status::InvalidArgument());
  }

  roaring_bitmap_t *non_null_bitmap{nullptr};
  AILEGO_DEFER([&]() {
    if (non_null_bitmap) {
      roaring_bitmap_free(non_null_bitmap);
    }
  });

  if (sealed_) {
    non_null_bitmap = null_bitmap_.copy();
    roaring_bitmap_flip_inplace(non_null_bitmap, 0, max_id_ + 1);
  } else {
    Status s;
    non_null_bitmap = roaring_bitmap_create();
    if (!non_null_bitmap) {
      LOG_ERROR("Failed to create bitmap");
      return tl::make_unexpected(Status::InternalError());
    }
    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
    AILEGO_DEFER([&]() { delete iter; });
    iter->SeekToFirst();
    while (iter->Valid()) {
      s = InvertedIndexCodec::Merge_OR(
          iter->value().data(), iter->value().size(), true, non_null_bitmap);
      if (s.ok()) {
        iter->Next();
      } else {
        LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
        return tl::make_unexpected(s);
      }
    }
    roaring_bitmap_repair_after_lazy(non_null_bitmap);
  }

  auto ret = get_bitmap_contain(terms, is_any);
  if (ret) {
    if (ret.value() == nullptr) {
      ret = roaring_bitmap_create();
    }
    roaring_bitmap_flip_inplace(ret.value(), 0, max_id_ + 1);
  } else {
    LOG_ERROR("Failed to retrieve bitmap[%s] from %s, term size[%zu]",
              is_any ? "contain_any" : "contain_all", ID().c_str(),
              terms.size());
    return ret;
  }

  roaring_bitmap_and_inplace(ret.value(), non_null_bitmap);
  return ret;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_lt(
    const std::string &term, bool include_eq) const {
  if (field_.element_data_type() == DataType::BOOL) {
    LOG_ERROR("Bool type is not supported for range query");
    return tl::make_unexpected(Status::InternalError());
  }

  // For range queries that match most values, it's more efficient to compute
  // the result by getting the complement and flipping bits.
  if (range_covers_most_values(term, CompareOp::LT)) {
    auto ret = get_bitmap_gt(term, !include_eq);
    if (ret) {
      ret = flip_bitmap(ret.value());
    } else {
      LOG_ERROR("Failed to retrieve range bitmap for term[%s] from %s",
                term.c_str(), ID().c_str());
    }
    return ret;
  }

  Status s = Status::OK();
  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }
  AILEGO_DEFER([&]() {
    if (!s.ok()) {
      roaring_bitmap_free(bitmap);
    }
  });

  rocksdb::Iterator *iter_point, *iter_range;
  iter_point = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
  iter_point->SeekToFirst();
  AILEGO_DEFER([&]() { delete iter_point; });
  if (sealed_ && cf_ranges_) {
    iter_range = ctx_.db_->NewIterator(ctx_.read_opts_, cf_ranges_);
    iter_range->SeekToFirst();
  } else {
    iter_range = nullptr;
  }
  AILEGO_DEFER([&]() {
    if (iter_range) {
      delete iter_range;
    }
  });

  bool lt;  // True if the current range or term is "less than" the search term

  // Process pre-aggregated range entries and merge matching bitmaps
  if (iter_range && iter_range->Valid()) {
    std::optional<std::string> point_seek_start_pos;
    // 1. Merge ranges where the end boundary is less than the search term
    while (iter_range->Valid()) {
      char *range_begin, *range_end;
      size_t range_begin_key_size, range_end_key_size;
      InvertedIndexCodec::Decode_Range_Key(
          iter_range->key().data(), iter_range->key().size(), &range_begin,
          &range_begin_key_size, &range_end, &range_end_key_size);
      lt = cmp_lt(range_end, range_end_key_size, term.data(), term.length(),
                  include_eq);
      if (!lt) {
        point_seek_start_pos.emplace(range_begin, range_begin_key_size);
        break;
      }
      s = InvertedIndexCodec::Merge_OR(
          iter_range->value().data(), iter_range->value().size(), true, bitmap);
      if (!s.ok()) {
        LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
        return tl::make_unexpected(s);
      }
      iter_range->Next();
    }
    // 2. Change the start position of the point iterator
    if (point_seek_start_pos) {
      iter_point->Seek(*point_seek_start_pos);
      if (iter_point->Valid() && iter_point->key() != *point_seek_start_pos) {
        LOG_ERROR(
            "Failed to initialize the point iterator, seek_pos[%s], "
            "first_key_found[%s], term[%s]",
            (*point_seek_start_pos).c_str(),
            iter_point->key().ToStringView().data(), term.c_str());
        s = Status::InternalError();
        return tl::make_unexpected(s);
      }
    } else {
      iter_point->SeekToLast();
    }
  }

  // Process individual point entries
  while (iter_point->Valid()) {
    lt = cmp_lt(iter_point->key().data(), iter_point->key().size(), term.data(),
                term.size(), include_eq);
    if (!lt) {
      break;
    }
    s = InvertedIndexCodec::Merge_OR(iter_point->value().data(),
                                     iter_point->value().size(), true, bitmap);
    if (!s.ok()) {
      LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
    iter_point->Next();
  }

  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_gt(
    const std::string &term, bool include_eq) const {
  if (field_.element_data_type() == DataType::BOOL) {
    LOG_ERROR("Bool type is not supported for range query");
    return tl::make_unexpected(Status::InternalError());
  }

  // For range queries that match most values, it's more efficient to compute
  // the result by getting the complement and flipping bits.
  if (range_covers_most_values(term, CompareOp::GT)) {
    auto ret = get_bitmap_lt(term, !include_eq);
    if (ret) {
      ret = flip_bitmap(ret.value());
    } else {
      LOG_ERROR("Failed to retrieve range bitmap for term[%s] from %s",
                term.c_str(), ID().c_str());
    }
    return ret;
  }

  Status s = Status::OK();
  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }
  AILEGO_DEFER([&]() {
    if (!s.ok()) {
      roaring_bitmap_free(bitmap);
    }
  });

  rocksdb::Iterator *iter_point, *iter_range;
  iter_point = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
  AILEGO_DEFER([&]() { delete iter_point; });
  if (sealed_ && cf_ranges_) {
    iter_range = ctx_.db_->NewIterator(ctx_.read_opts_, cf_ranges_);
  } else {
    iter_range = nullptr;
  }
  AILEGO_DEFER([&]() {
    if (iter_range) {
      delete iter_range;
    }
  });

  std::optional<std::string> point_seek_end_pos;

  // Process pre-aggregated range entries and merge matching bitmaps
  if (iter_range) {
    // 1. Seek to the first range entry that is greater than the search term
    iter_range->Seek(term);
    if (iter_range->Valid()) {
      char *range_begin, *range_end;
      size_t range_begin_key_size, range_end_key_size;
      InvertedIndexCodec::Decode_Range_Key(
          iter_range->key().data(), iter_range->key().size(), &range_begin,
          &range_begin_key_size, &range_end, &range_end_key_size);
      int ret =
          cmp(range_begin, range_begin_key_size, term.data(), term.size());
      if (ret == 0 && !include_eq) {
        iter_range->Next();
        if (iter_range->Valid()) {
          InvertedIndexCodec::Decode_Range_Key(
              iter_range->key().data(), iter_range->key().size(), &range_begin,
              &range_begin_key_size, &range_end, &range_end_key_size);
          point_seek_end_pos.emplace(range_begin, range_begin_key_size);
        }
      } else {
        point_seek_end_pos.emplace(range_begin, range_begin_key_size);
      }
    }
    // 2. Merge ranges where the begin boundary is greater than the search term
    while (iter_range->Valid()) {
      s = InvertedIndexCodec::Merge_OR(
          iter_range->value().data(), iter_range->value().size(), true, bitmap);
      if (!s.ok()) {
        LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
        return tl::make_unexpected(s);
      }
      iter_range->Next();
    }
  }

  // Process individual point entries
  iter_point->Seek(term);
  if (!include_eq) {
    if (iter_point->Valid() && iter_point->key() == term) {
      iter_point->Next();
    }
  }
  while (iter_point->Valid()) {
    if (point_seek_end_pos &&
        cmp(iter_point->key().data(), iter_point->key().size(),
            (*point_seek_end_pos).data(), (*point_seek_end_pos).size()) >= 0) {
      break;
    }
    s = InvertedIndexCodec::Merge_OR(iter_point->value().data(),
                                     iter_point->value().size(), true, bitmap);
    if (!s.ok()) {
      LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
    iter_point->Next();
  }

  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_eq(
    uint32_t len) const {
  std::string encoded_len = InvertedIndexCodec::Encode(
      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);

  PinnableSlice bitmap_slice;
  auto rs =
      ctx_.db_->Get(ctx_.read_opts_, cf_array_len_, encoded_len, &bitmap_slice);
  if (!rs.ok()) {
    if (rs.code() == rocksdb::Status::kNotFound) {
      return nullptr;
    }
    LOG_ERROR(
        "Failed to retrieve data for len[%u] from %s, code[%d], reason[%s]",
        len, ID().c_str(), rs.code(), rs.ToString().c_str());
    return tl::make_unexpected(Status::InternalError());
  }

  roaring_bitmap_t *bitmap{nullptr};
  Status status = InvertedIndexCodec::Deserialize(bitmap_slice.data(),
                                                  bitmap_slice.size(), &bitmap);
  if (status.ok()) {
    return bitmap;
  } else {
    LOG_ERROR(
        "Failed to deserialize bitmap for len[%u] from %s, bitmap size[%zu]",
        len, ID().c_str(), bitmap_slice.size());
    return tl::make_unexpected(Status::InternalError());
  }
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_ne(
    uint32_t len) const {
  std::string encoded_len = InvertedIndexCodec::Encode(
      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);

  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }
  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);
  AILEGO_DEFER([&]() { delete iter; });
  Status s;
  iter->SeekToFirst();
  while (iter->Valid()) {
    if (iter->key() == encoded_len) {
      iter->Next();
      continue;
    }
    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),
                                     true, bitmap);
    if (s.ok()) {
      iter->Next();
    } else {
      roaring_bitmap_free(bitmap);
      LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
  }
  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_lt(
    uint32_t len, bool include_eq) const {
  std::string encoded_len = InvertedIndexCodec::Encode(
      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);

  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }
  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);
  AILEGO_DEFER([&]() { delete iter; });
  Status s;
  iter->SeekToFirst();
  while (iter->Valid()) {
    bool lt = cmp_lt(iter->key().data(), iter->key().size(), encoded_len.data(),
                     encoded_len.size(), include_eq);
    if (!lt) {
      break;
    }
    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),
                                     true, bitmap);
    if (s.ok()) {
      iter->Next();
    } else {
      roaring_bitmap_free(bitmap);
      LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
  }
  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_array_len_gt(
    uint32_t len, bool include_eq) const {
  std::string encoded_len = InvertedIndexCodec::Encode(
      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);

  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }
  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_array_len_);
  AILEGO_DEFER([&]() { delete iter; });
  Status s;
  iter->Seek(encoded_len);
  if (!include_eq) {
    if (iter->Valid() && iter->key() == encoded_len) {
      iter->Next();
    }
  }
  while (iter->Valid()) {
    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),
                                     true, bitmap);
    if (s.ok()) {
      iter->Next();
    } else {
      roaring_bitmap_free(bitmap);
      LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
  }
  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_like(
    std::string term) const {
  // convert to `is not null` if `%` is the only character
  if (term == "%") {
    return get_bitmap_non_null();
  }
  size_t percent_loc = std::string::npos;
  size_t size = 0;
  int percent_count = 0;
  // unescape \% and \_, detect % location
  for (size_t i = 0; i < term.size(); i++) {
    if (term[i] == '\\') {
      i++;
      if (i < term.size()) {
        term[size++] = term[i];
      }
      continue;
    }
    if (term[i] == '%') {
      percent_loc = size;
      percent_count += 1;
    }
    term[size++] = term[i];
  }
  term.resize(size);
  // convert to `=` filter if no percent
  if (percent_count == 0) {
    return get_bitmap_eq(term);
  } else if (percent_count != 1) {
    return tl::make_unexpected(Status::InvalidArgument(
        "like should have exactly one percent, unescaped:", term));
  }
  if (percent_loc == 0) {
    return get_bitmap_suffix(term);
  } else if (percent_loc == size - 1) {
    return get_bitmap_prefix(term.substr(0, percent_loc));
  } else {
    std::string prefix = term.substr(0, percent_loc - 1);
    std::string suffix = term.substr(percent_loc + 1, size - percent_loc - 1);
    auto prefix_bitmap = get_bitmap_prefix(prefix);
    if (!prefix_bitmap.has_value()) {
      return tl::make_unexpected(
          Status::InternalError("Get bitmap prefix failed, unescaped:", term));
    }
    auto suffix_bitmap = get_bitmap_suffix(suffix);
    if (!suffix_bitmap.has_value()) {
      return tl::make_unexpected(
          Status::InternalError("Get bitmap suffix failed, unescaped:", term));
    }
    auto *result = prefix_bitmap.value();
    roaring_bitmap_and_inplace(result, suffix_bitmap.value());
    roaring_bitmap_free(suffix_bitmap.value());
    return result;
  }
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_prefix(
    const std::string &term) const {
  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
  AILEGO_DEFER([&]() { delete iter; });

  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }

  Status s;
  iter->Seek(term);
  while (iter->Valid()) {
    if (!has_prefix(iter->key().data(), iter->key().size(), term.data(),
                    term.size())) {
      break;
    }
    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),
                                     true, bitmap);
    if (!s.ok()) {
      roaring_bitmap_free(bitmap);
      LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
    iter->Next();
  }

  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_suffix(
    const std::string &term) const {
  if (!cf_reversed_terms_) {
    LOG_ERROR("%s doesn't support suffix matching", ID().c_str());
    return tl::make_unexpected(Status::PermissionDenied());
  }

  auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_reversed_terms_);
  AILEGO_DEFER([&]() { delete iter; });

  roaring_bitmap_t *bitmap = roaring_bitmap_create();
  if (!bitmap) {
    LOG_ERROR("Failed to create bitmap");
    return tl::make_unexpected(Status::InternalError());
  }

  Status s;
  auto reversed_term = encode_reversed(term);
  iter->Seek(reversed_term);
  while (iter->Valid()) {
    if (!has_prefix(iter->key().data(), iter->key().size(),
                    reversed_term.data(), reversed_term.size())) {
      break;
    }
    s = InvertedIndexCodec::Merge_OR(iter->value().data(), iter->value().size(),
                                     true, bitmap);
    if (!s.ok()) {
      roaring_bitmap_free(bitmap);
      LOG_ERROR("Failed to merge range bitmap from %s", ID().c_str());
      return tl::make_unexpected(s);
    }
    iter->Next();
  }

  roaring_bitmap_repair_after_lazy(bitmap);
  return bitmap;
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_null() const {
  return null_bitmap_.copy();
}


Result<roaring_bitmap_t *> InvertedColumnIndexer::get_bitmap_non_null() const {
  if (sealed_) {
    roaring_bitmap_t *bitmap = null_bitmap_.copy();
    roaring_bitmap_flip_inplace(bitmap, 0, max_id_ + 1);
    return bitmap;
  } else {
    Status s = Status::OK();
    auto iter = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
    AILEGO_DEFER([&]() { delete iter; });
    roaring_bitmap_t *bitmap = roaring_bitmap_create();
    if (!bitmap) {
      LOG_ERROR("Failed to create bitmap");
      return tl::make_unexpected(Status::InternalError());
    }
    iter->SeekToFirst();
    while (iter->Valid()) {
      s = InvertedIndexCodec::Merge_OR(iter->value().data(),
                                       iter->value().size(), true, bitmap);
      if (s.ok()) {
        iter->Next();
      } else {
        roaring_bitmap_free(bitmap);
        LOG_ERROR("Failed to merge bitmap from %s", ID().c_str());
        return tl::make_unexpected(s);
      }
    }
    roaring_bitmap_repair_after_lazy(bitmap);
    return bitmap;
  }
}


InvertedSearchResult::Ptr InvertedColumnIndexer::search(
    const std::string &value, CompareOp op) const {
  if (field_.is_array_type()) {
    LOG_ERROR("%s: array type doesn't support single value search",
              ID().c_str());
    return nullptr;
  }

  std::string encoded_value = encode(value);
  auto search_res = std::make_shared<InvertedSearchResult>();
  Result<roaring_bitmap_t *> bitmap_res;

  switch (op) {
    case CompareOp::EQ: {
      bitmap_res = get_bitmap_eq(encoded_value);
      break;
    }
    case CompareOp::NE: {
      bitmap_res = get_bitmap_ne(encoded_value);
      break;
    }
    case CompareOp::LT: {
      bitmap_res = get_bitmap_lt(encoded_value, false);
      break;
    }
    case CompareOp::LE: {
      bitmap_res = get_bitmap_lt(encoded_value, true);
      break;
    }
    case CompareOp::GT: {
      bitmap_res = get_bitmap_gt(encoded_value, false);
      break;
    }
    case CompareOp::GE: {
      bitmap_res = get_bitmap_gt(encoded_value, true);
      break;
    }
    case CompareOp::LIKE: {
      bitmap_res = get_bitmap_like(std::move(encoded_value));
      break;
    }
    case CompareOp::HAS_PREFIX: {
      bitmap_res = get_bitmap_prefix(std::move(encoded_value));
      break;
    }
    case CompareOp::HAS_SUFFIX: {
      bitmap_res = get_bitmap_suffix(std::move(encoded_value));
      break;
    }
    default:
      LOG_ERROR("%s: unsupported operator[%u]", ID().c_str(),
                static_cast<uint32_t>(op));
      return nullptr;
  }

  if (bitmap_res) {
    search_res->set_and_own_bitmap(bitmap_res.value());
    return search_res;
  } else {
    LOG_ERROR("%s: failed to search, code[%d]", ID().c_str(),
              static_cast<int>(bitmap_res.error().code()));
    return nullptr;
  }
}


InvertedSearchResult::Ptr InvertedColumnIndexer::multi_search(
    const std::vector<std::string> &values, CompareOp op) const {
  auto encoded_values = encode(values);
  auto search_res = std::make_shared<InvertedSearchResult>();
  Result<roaring_bitmap_t *> bitmap_res;

  switch (op) {
    case CompareOp::CONTAIN_ANY: {
      bitmap_res = get_bitmap_contain(encoded_values, true);
      break;
    }
    case CompareOp::CONTAIN_ALL: {
      bitmap_res = get_bitmap_contain(encoded_values, false);
      break;
    }
    case CompareOp::NOT_CONTAIN_ANY: {
      bitmap_res = get_bitmap_not_contain(encoded_values, true);
      break;
    }
    case CompareOp::NOT_CONTAIN_ALL: {
      bitmap_res = get_bitmap_not_contain(encoded_values, false);
      break;
    }
    default:
      LOG_ERROR("%s: unsupported operator[%u]", ID().c_str(),
                static_cast<uint32_t>(op));
      return nullptr;
  }

  if (bitmap_res) {
    search_res->set_and_own_bitmap(bitmap_res.value());
    return search_res;
  } else {
    LOG_ERROR("%s: failed to search, code[%d]", ID().c_str(),
              static_cast<int>(bitmap_res.error().code()));
    return nullptr;
  }
}


InvertedSearchResult::Ptr InvertedColumnIndexer::search_array_len(
    uint32_t len, CompareOp op) const {
  if (!field_.is_array_type()) {
    LOG_ERROR("%s: non-array type doesn't array length search", ID().c_str());
    return nullptr;
  }

  auto search_res = std::make_shared<InvertedSearchResult>();
  Result<roaring_bitmap_t *> bitmap_res;

  switch (op) {
    case CompareOp::EQ: {
      bitmap_res = get_bitmap_array_len_eq(len);
      break;
    }
    case CompareOp::NE: {
      bitmap_res = get_bitmap_array_len_ne(len);
      break;
    }
    case CompareOp::LT: {
      bitmap_res = get_bitmap_array_len_lt(len, false);
      break;
    }
    case CompareOp::LE: {
      bitmap_res = get_bitmap_array_len_lt(len, true);
      break;
    }
    case CompareOp::GT: {
      bitmap_res = get_bitmap_array_len_gt(len, false);
      break;
    }
    case CompareOp::GE: {
      bitmap_res = get_bitmap_array_len_gt(len, true);
      break;
    }
    default:
      LOG_ERROR("%s: unsupported operator[%u]", ID().c_str(),
                static_cast<uint32_t>(op));
      return nullptr;
  }

  if (bitmap_res) {
    search_res->set_and_own_bitmap(bitmap_res.value());
    return search_res;
  } else {
    LOG_ERROR("%s: failed to search, code[%d]", ID().c_str(),
              static_cast<int>(bitmap_res.error().code()));
    return nullptr;
  }
}


InvertedSearchResult::Ptr InvertedColumnIndexer::search_null() const {
  auto search_res = std::make_shared<InvertedSearchResult>();
  auto bitmap_res = get_bitmap_null();
  if (bitmap_res) {
    search_res->set_and_own_bitmap(bitmap_res.value());
    return search_res;
  } else {
    LOG_ERROR("%s: failed to search, code[%d]", ID().c_str(),
              static_cast<int>(bitmap_res.error().code()));
    return nullptr;
  }
}


InvertedSearchResult::Ptr InvertedColumnIndexer::search_non_null() const {
  auto search_res = std::make_shared<InvertedSearchResult>();
  auto bitmap_res = get_bitmap_non_null();
  if (bitmap_res) {
    search_res->set_and_own_bitmap(bitmap_res.value());
    return search_res;
  } else {
    LOG_ERROR("%s: failed to search, code[%d]", ID().c_str(),
              static_cast<int>(bitmap_res.error().code()));
    return nullptr;
  }
}


Status InvertedColumnIndexer::evaluate_ratio(const std::string &value,
                                             CompareOp op, uint64_t *total_size,
                                             uint64_t *range_size) const {
  if (field_.is_array_type()) {
    LOG_ERROR("%s: array type doesn't support ratio evaluation", ID().c_str());
    return Status::PermissionDenied();
  }

  if (sealed_ && doc_range_stat_) {
    std::string encoded_value = encode(value);
    doc_range_stat_->evaluate_ratio(encoded_value, op, total_size, range_size);
  } else {
    *range_size = 0;
    *total_size = 1;
  }
  return Status::OK();
}


inline Status InvertedColumnIndexer::estimate_range_ratio(
    const std::string &term, CompareOp op, uint64_t *total_count,
    uint64_t *matching_count) const {
  if (field_.is_array_type() || field_.element_data_type() == DataType::BOOL) {
    LOG_ERROR("%s: type[%d] doesn't support range ratio estimation",
              ID().c_str(), (int)field_.data_type());
    return Status::PermissionDenied();
  }

  if (sealed_ && doc_range_stat_) {
    doc_range_stat_->evaluate_ratio(term, op, total_count, matching_count);
  } else {
    *matching_count = 0;
    *total_count = 1;
  }
  return Status::OK();
}


inline bool InvertedColumnIndexer::range_covers_most_values(
    const std::string &term, CompareOp op) const {
  constexpr float HIGH_SELECTIVITY_THRESHOLD = 0.7;

  // Estimation is only available for sealed indexes as they have the cumulative
  // distribution index.
  if (!sealed_) {
    return false;
  }

  uint64_t total_cnt{0}, matching_cnt{0};
  if (auto s = estimate_range_ratio(term, op, &total_cnt, &matching_cnt);
      s.ok()) {
    return (total_cnt != 0) &&
           ((1.0f * matching_cnt / total_cnt) > HIGH_SELECTIVITY_THRESHOLD);

  } else {
    return false;
  }
}


inline roaring_bitmap_t *InvertedColumnIndexer::flip_bitmap(
    roaring_bitmap_t *bitmap) const {
  roaring_bitmap_t *ret;
  if (ret = bitmap; ret == nullptr) {
    ret = null_bitmap_.copy();
  } else {
    roaring_bitmap_or_inplace(ret, null_bitmap_.bitmap());
  }
  roaring_bitmap_flip_inplace(ret, 0, max_id_ + 1);
  return ret;
}


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_column_indexer_util.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <ailego/pattern/defer.h>
#include "inverted_column_indexer.h"


namespace zvec {


InvertedColumnIndexer::~InvertedColumnIndexer() {
  LOG_INFO("Closed %s", ID().c_str());
}


Status InvertedColumnIndexer::open() {
  if (field_.index_type() != IndexType::INVERT) {
    return Status::InvalidArgument();
  }
  auto params =
      std::dynamic_pointer_cast<InvertIndexParams>(field_.index_params());
  enable_range_optimization_ =
      allow_range_optimization(field_) && params->enable_range_optimization();
  enable_extended_wildcard_ =
      allow_extended_wildcard(field_) && params->enable_extended_wildcard();

  rocksdb::Status s;
  std::string value{};

  cf_terms_ = ctx_.get_cf(cf_name_terms());
  if (!cf_terms_) {
    LOG_ERROR("Failed to get cf_terms for %s", ID().c_str());
    return Status::InternalError();
  }

  if (field_.is_array_type()) {
    cf_array_len_ = ctx_.get_cf(cf_name_array_len());
    if (!cf_array_len_) {
      LOG_ERROR("Failed to get cf_array_len for %s", ID().c_str());
      return Status::InternalError();
    }
  }

  if (enable_range_optimization_) {
    cf_ranges_ = ctx_.get_cf(cf_name_ranges());
    if (!cf_ranges_) {
      LOG_ERROR("Failed to get cf_ranges for %s", ID().c_str());
      return Status::InternalError();
    }
    cf_cdf_ = ctx_.get_cf(cf_name_cdf());
    if (!cf_cdf_) {
      LOG_ERROR("Failed to get cf_cdf for %s", ID().c_str());
      return Status::InternalError();
    }
    s = ctx_.db_->Get(ctx_.read_opts_, cf_cdf_, field_.name(), &value);
    if (s.ok()) {
      doc_range_stat_ = SegmentDocRangeStat::Create(value);
      if (!doc_range_stat_) {
        LOG_ERROR("Failed to create doc range stats from %s", ID().c_str());
        return Status::InternalError();
      }
    } else if (s.code() != rocksdb::Status::kNotFound) {
      LOG_ERROR("Failed to retrieve cdf from %s", ID().c_str());
      return Status::InternalError();
    }
  }

  if (enable_extended_wildcard_) {
    cf_reversed_terms_ = ctx_.get_cf(cf_name_reversed_terms());
    if (!cf_reversed_terms_) {
      LOG_ERROR("Failed to get cf_reversed_terms for %s", ID().c_str());
      return Status::InternalError();
    }
  }

  // Get max id if exists
  s = ctx_.db_->Get(ctx_.read_opts_, key_max_id(), &value);
  if (s.ok()) {
    try {
      max_id_ = std::stoul(value);
    } catch (const std::exception &e) {
      LOG_ERROR("Failed to parse max id from %s for %s, exception[%s]",
                value.c_str(), ID().c_str(), e.what());
      return Status::InternalError();
    }
  } else if (s.code() != rocksdb::Status::kNotFound) {
    LOG_ERROR("Failed to retrieve max id from %s", ID().c_str());
    return Status::InternalError();
  }

  // Get null bitmap if exists
  s = ctx_.db_->Get(ctx_.read_opts_, key_null(), &value);
  if (s.ok()) {
    if (auto status = null_bitmap_.deserialize(value); !status.ok()) {
      LOG_ERROR("Failed to deserialize null bitmap from %s", ID().c_str());
      return status;
    }
  } else if (s.code() != rocksdb::Status::kNotFound) {
    LOG_ERROR("Failed to retrieve null bitmap from %s", ID().c_str());
    return Status::InternalError();
  }

  // Get indexer state
  s = ctx_.db_->Get(ctx_.read_opts_, key_sealed(), &value);
  if (s.ok()) {
    sealed_ = true;
    read_only_ = true;
  } else if (s.code() == rocksdb::Status::kNotFound) {
    sealed_ = false;
  } else {
    LOG_ERROR("Failed to retrieve indexer state from %s", ID().c_str());
    return Status::InternalError();
  }

  LOG_INFO("Opened %s", ID().c_str());
  return Status::OK();
}


InvertedColumnIndexer::Ptr InvertedColumnIndexer::CreateAndOpen(
    const std::string &collection_name, const FieldSchema &field,
    RocksdbContext &context, bool read_only) {
  auto ptr =
      new InvertedColumnIndexer(collection_name, field, context, read_only);
  auto indexer = std::shared_ptr<InvertedColumnIndexer>(ptr);
  if (indexer->open().ok()) {
    return indexer;
  } else {
    return nullptr;
  }
}


Status InvertedColumnIndexer::drop_storage() {
  Status s = Status::OK();
  rocksdb::Status rs;
  AILEGO_DEFER([&]() {
    if (s.ok()) {
      LOG_INFO("Dropped storage of %s", ID().c_str());
    } else {
      LOG_ERROR("Failed to drop storage of %s", ID().c_str());
    }
  });

  if (s = ctx_.drop_cf(cf_name_terms()); !s.ok()) {
    return s;
  }
  if (field_.is_array_type()) {
    if (s = ctx_.drop_cf(cf_name_array_len()); !s.ok()) {
      return s;
    }
  }
  if (enable_range_optimization_) {
    if (s = ctx_.drop_cf(cf_name_ranges()); !s.ok()) {
      return s;
    }
    rs = ctx_.db_->Delete(ctx_.write_opts_, cf_cdf_, field_.name());
    if (!rs.ok()) {
      LOG_ERROR("Failed to delete cdf of %s", ID().c_str());
      s = Status::InternalError();
      return s;
    }
  }
  if (enable_extended_wildcard_) {
    if (s = ctx_.drop_cf(cf_name_reversed_terms()); !s.ok()) {
      return s;
    }
  }

  rs = ctx_.db_->Delete(ctx_.write_opts_, key_max_id());
  if (!rs.ok()) {
    LOG_ERROR("Failed to delete max_id of %s", ID().c_str());
    s = Status::InternalError();
    return s;
  }

  rs = ctx_.db_->Delete(ctx_.write_opts_, key_null());
  if (!rs.ok()) {
    LOG_ERROR("Failed to delete null bitmap of %s", ID().c_str());
    s = Status::InternalError();
    return s;
  }

  rs = ctx_.db_->Delete(ctx_.write_opts_, key_sealed());
  if (!rs.ok()) {
    LOG_ERROR("Failed to delete indexer state of %s", ID().c_str());
    s = Status::InternalError();
    return s;
  }

  return s;
}


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_column_indexer_write.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <ailego/pattern/defer.h>
#include <zvec/ailego/encoding/json.h>
#include "inverted_codec.h"
#include "inverted_column_indexer.h"


namespace zvec {


Status InvertedColumnIndexer::insert(uint32_t id, const std::string &value) {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  std::string encoded_id = std::string{1}.append(
      reinterpret_cast<const char *>(&id), sizeof(uint32_t));

  rocksdb::Status s;
  AILEGO_DEFER([&]() {
    if (!s.ok()) {
      LOG_ERROR("Failed to insert terms of id[%u] to %s, code[%d], reason[%s]",
                id, ID().c_str(), s.code(), s.ToString().c_str());
    }
  });

  if (field_.is_array_type()) {
    std::vector<std::string> encoded_values = encode_array(value);
    std::sort(encoded_values.begin(), encoded_values.end());
    rocksdb::WriteBatch write_batch;
    for (auto encoded_value : encoded_values) {
      s = write_batch.Merge(cf_terms_, encoded_value, encoded_id);
      if (!s.ok()) {
        return Status::InternalError();
      }
    }
    if (s = ctx_.db_->Write(ctx_.write_opts_, &write_batch); !s.ok()) {
      return Status::InternalError();
    }
    if (s = index_array_len(id, encoded_values.size()); !s.ok()) {
      return Status::InternalError();
    }
  } else {
    std::string encoded_value = encode(value);
    s = ctx_.db_->Merge(ctx_.write_opts_, cf_terms_, encoded_value, encoded_id);
    if (!s.ok()) {
      return Status::InternalError();
    }
    if (cf_reversed_terms_) {
      s = ctx_.db_->Merge(ctx_.write_opts_, cf_reversed_terms_,
                          encode_reversed(value), encoded_id);
      if (!s.ok()) {
        return Status::InternalError();
      }
    }
  }

  update_max_id(id);
  return Status::OK();
}


Status InvertedColumnIndexer::insert(uint32_t id,
                                     const std::vector<std::string> &values) {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  std::string encoded_id = std::string{1}.append(
      reinterpret_cast<const char *>(&id), sizeof(uint32_t));
  auto encoded_values = encode(values);

  rocksdb::Status s;
  AILEGO_DEFER([&]() {
    if (!s.ok()) {
      LOG_ERROR("Failed to insert terms of id[%u] to %s, code[%d], reason[%s]",
                id, ID().c_str(), s.code(), s.ToString().c_str());
    }
  });

  if (s = index_array_len(id, encoded_values.size()); !s.ok()) {
    return Status::InternalError();
  }

  std::sort(encoded_values.begin(), encoded_values.end());
  rocksdb::WriteBatch write_batch;
  for (auto encoded_value : encoded_values) {
    s = write_batch.Merge(cf_terms_, encoded_value, encoded_id);
    if (!s.ok()) {
      return Status::InternalError();
    }
  }
  s = ctx_.db_->Write(ctx_.write_opts_, &write_batch);
  if (s.ok()) {
    update_max_id(id);
    return Status::OK();
  } else {
    return Status::InternalError();
  }
}


Status InvertedColumnIndexer::insert(uint32_t id, bool value) {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  std::string encoded_id = std::string{1}.append(
      reinterpret_cast<const char *>(&id), sizeof(uint32_t));
  std::string encoded_value = encode(value);

  auto s =
      ctx_.db_->Merge(ctx_.write_opts_, cf_terms_, encoded_value, encoded_id);
  if (s.ok()) {
    update_max_id(id);
    return Status::OK();
  } else {
    LOG_ERROR("Failed to insert terms of id[%u] to %s, code[%d], reason[%s]",
              id, ID().c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
}


Status InvertedColumnIndexer::insert(uint32_t id,
                                     const std::vector<bool> &values) {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  std::string encoded_id = std::string{1}.append(
      reinterpret_cast<const char *>(&id), sizeof(uint32_t));

  rocksdb::Status rs;
  if (rs = index_array_len(id, values.size()); !rs.ok()) {
    LOG_ERROR("Failed to index array length for %s", ID().c_str());
    return Status::InternalError();
  }

  bool has_true = false;
  bool has_false = false;
  for (bool value : values) {
    if (value) {
      has_true = true;
    } else {
      has_false = true;
    }
  }

  rocksdb::WriteBatch write_batch;
  if (has_true) {
    write_batch.Merge(cf_terms_, encode(true), encoded_id);
  }
  if (has_false) {
    write_batch.Merge(cf_terms_, encode(false), encoded_id);
  }
  rs = ctx_.db_->Write(ctx_.write_opts_, &write_batch);
  if (rs.ok()) {
    update_max_id(id);
    return Status::OK();
  } else {
    LOG_ERROR("Failed to insert terms of id[%u] to %s, code[%d], reason[%s]",
              id, ID().c_str(), rs.code(), rs.ToString().c_str());
    return Status::InternalError();
  }
}


Status InvertedColumnIndexer::insert_null(uint32_t id) {
  if (read_only_) {
    return Status::PermissionDenied();
  }
  null_bitmap_.add(id);
  update_max_id(id);
  return Status::OK();
}


Status InvertedColumnIndexer::flush_special_values() {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  std::string value;
  if (null_bitmap_.cardinality() != 0) {
    if (!null_bitmap_.serialize(&value).ok()) {
      LOG_ERROR("Failed to serialize null bitmap");
      return Status::InternalError();
    }
    auto s = ctx_.db_->Put(ctx_.write_opts_, key_null(), value);
    if (!s.ok()) {
      LOG_ERROR("Failed to insert null bitmap to %s, code[%d], reason[%s]",
                ID().c_str(), s.code(), s.ToString().c_str());
      return Status::InternalError();
    }
  }

  auto s =
      ctx_.db_->Put(ctx_.write_opts_, key_max_id(), std::to_string(max_id_));
  if (s.ok()) {
    LOG_DEBUG("Special values flushed to %s", ID().c_str());
    return Status::OK();
  } else {
    LOG_ERROR("Failed to insert max_id to %s, code[%d], reason[%s]",
              ID().c_str(), s.code(), s.ToString().c_str());
    return Status::InternalError();
  }
}


rocksdb::Status InvertedColumnIndexer::index_array_len(uint32_t id,
                                                       uint32_t len) {
  if (!cf_array_len_) {
    LOG_ERROR("%s doesn't support array length index", ID().c_str());
    return rocksdb::Status::NotSupported();
  }

  std::string encoded_id = std::string{1}.append(
      reinterpret_cast<const char *>(&id), sizeof(uint32_t));
  std::string encoded_len = InvertedIndexCodec::Encode(
      std::string((char *)&len, sizeof(uint32_t)), DataType::UINT32);

  return ctx_.db_->Merge(ctx_.write_opts_, cf_array_len_, encoded_len,
                         encoded_id);
}


Status InvertedColumnIndexer::generate_statistical_indexes() {
  if (read_only_) {
    return Status::PermissionDenied();
  }
  if (!enable_range_optimization_) {
    return Status::PermissionDenied();
  }

  if (!ctx_.reset_cf(cf_name_ranges()).ok()) {
    // Reset the range index in case it is corrupted
    LOG_ERROR("Failed to reset range index");
    return Status::InternalError();
  }
  cf_ranges_ = ctx_.get_cf(cf_name_ranges());
  if (!cf_ranges_) {
    LOG_ERROR("Failed to get column families for %s", ID().c_str());
    return Status::InternalError();
  }

  // TODO: make them configurable
  const uint32_t num_range_slot = 1000;
  const uint32_t num_cdf_slot = 100;

  const uint32_t num_doc_per_range_slot = (max_id_ + 1) / num_range_slot;
  const uint32_t num_doc_per_cdf_slot = (max_id_ + 1) / num_cdf_slot;

  // Iterator for terms in the inverted index
  auto iter_term = ctx_.db_->NewIterator(ctx_.read_opts_, cf_terms_);
  iter_term->SeekToFirst();
  AILEGO_DEFER([&]() { delete iter_term; });

  size_t doc_count = 0, term_count = 0;
  Status s;
  rocksdb::Status rs;

  // Range tracking variables
  std::string range_begin_key{""}, range_end_key{""};
  size_t range_slot_doc_count{0};
  size_t num_range_slot_created = 0;
  roaring_bitmap_t *bitmap_range = roaring_bitmap_create();
  if (bitmap_range == nullptr) {
    LOG_ERROR("Failed to create bitmap");
    return Status::InternalError();
  }
  AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap_range); });

  // Function to create a range slot
  auto create_range_slot = [&]() -> Status {
    std::string range_key = range_begin_key;
    range_key.append(1, '\0');  // Separator byte
    range_key.append(range_end_key.data(), range_end_key.size());
    uint64_t range_key_begin_size = range_begin_key.size();
    range_key.append((char *)&range_key_begin_size, sizeof(uint64_t));
    std::string range_value_str;
    s = InvertedIndexCodec::Serialize(bitmap_range, &range_value_str);
    if (!s.ok()) {
      LOG_ERROR("Failed to serialize bitmap");
      return Status::InternalError();
    }
    rs =
        ctx_.db_->Put(ctx_.write_opts_, cf_ranges_, range_key, range_value_str);
    if (!rs.ok()) {
      LOG_ERROR("Failed to put range slot: %s", rs.ToString().c_str());
      return Status::InternalError();
    }
    num_range_slot_created++;
    return Status::OK();
  };

  // CDF tracking variables
  ailego::JsonArray cdf_json_array;
  size_t cdf_slot_doc_count = 0;

  // Function to create a CDF slot
  auto create_cdf_slot = [&]() {
    ailego::JsonObject json_obj;
    json_obj.set(ailego::JsonString("key").encode(),
                 ailego::JsonString(iter_term->key().ToString()).encode());
    json_obj.set(ailego::JsonString("doc_count").encode(),
                 ailego::JsonValue(doc_count));
    cdf_json_array.push(json_obj);
  };

  // Is the current slot initialized?
  bool range_slot_initialized{false}, cdf_slot_initialized{false};


  // Scan
  roaring_bitmap_t *bitmap_cur{nullptr};
  AILEGO_DEFER([&]() {
    if (bitmap_cur) {
      roaring_bitmap_free(bitmap_cur);
    }
  });

  while (iter_term->Valid()) {
    term_count++;
    s = InvertedIndexCodec::Deserialize(iter_term->value().data(),
                                        iter_term->value().size(), &bitmap_cur);
    if (!s.ok()) {
      LOG_ERROR("Failed to deserialize bitmap for term[%s] from %s",
                iter_term->key().ToString().c_str(), ID().c_str());
      return Status::InternalError();
    }
    // The count of documents for the current term
    auto term_doc_count = roaring_bitmap_get_cardinality(bitmap_cur);
    doc_count += term_doc_count;

    // Range
    if (!range_slot_initialized) {
      range_slot_initialized = true;
      range_slot_doc_count = 0;
      range_begin_key = iter_term->key().ToString();
      roaring_bitmap_clear(bitmap_range);
    }
    range_end_key = iter_term->key().ToString();
    range_slot_doc_count += term_doc_count;
    roaring_bitmap_or_inplace(bitmap_range, bitmap_cur);
    if (range_slot_doc_count >= num_doc_per_range_slot) {
      if (create_range_slot().ok()) {
        range_slot_initialized = false;
      } else {
        return Status::InternalError();
      }
    }

    // CDF
    if (!cdf_slot_initialized) {
      cdf_slot_initialized = true;
      cdf_slot_doc_count = 0;
    }
    cdf_slot_doc_count += term_doc_count;
    if (cdf_slot_doc_count >= num_doc_per_cdf_slot) {
      create_cdf_slot();
      cdf_slot_initialized = false;
    }

    roaring_bitmap_free(bitmap_cur);
    bitmap_cur = nullptr;
    iter_term->Next();
  }


  // Finalize
  if (range_slot_initialized) {
    if (!create_range_slot().ok()) {
      return Status::InternalError();
    }
  }
  if (num_range_slot_created >= term_count) {
    LOG_DEBUG(
        "Drop range index in %s, range_slot_count[%ld] vs term_count[%ld].",
        ID().c_str(), num_range_slot_created, term_count);
    if (!ctx_.reset_cf(cf_name_ranges()).ok()) {
      LOG_ERROR("Failed to drop range index");
      return Status::InternalError();
    }
    cf_ranges_ = ctx_.get_cf(cf_name_ranges());
    if (!cf_ranges_) {
      LOG_ERROR("Failed to get cf_ranges for %s", ID().c_str());
      return Status::InternalError();
    }
  }

  if (cdf_slot_initialized) {
    iter_term->SeekToLast();
    create_cdf_slot();
  }
  ailego::JsonObject cdf_json_obj;
  cdf_json_obj.set("field_value_histogram", cdf_json_array);
  cdf_json_obj.set("total_doc_count", ailego::JsonValue(doc_count));
  ailego::JsonValue cdf_json(std::move(cdf_json_obj));
  rs = ctx_.db_->Put(ctx_.write_opts_, cf_cdf_, field_.name(),
                     cdf_json.as_json_string().as_stl_string());
  if (!rs.ok()) {
    LOG_ERROR("Failed to insert CDF of field[%s] to %s, code[%d], reason[%s]",
              field_.name().c_str(), ID().c_str(), rs.code(),
              rs.ToString().c_str());
    return Status::InternalError();
  }

  doc_range_stat_ =
      SegmentDocRangeStat::Create(cdf_json.as_json_string().as_stl_string());
  if (!doc_range_stat_) {
    LOG_ERROR("Failed to create doc range stats from %s", ID().c_str());
    return Status::InternalError();
  }

  LOG_INFO("Generated statistical indexes in %s", ID().c_str());
  return Status::OK();
}


Status InvertedColumnIndexer::seal() {
  if (read_only_) {
    return Status::PermissionDenied();
  }

  Status status = flush_special_values();
  if (!status.ok()) {
    LOG_ERROR("Failed to flush special values to %s", ID().c_str());
    return status;
  }

  if (enable_range_optimization_) {
    status = generate_statistical_indexes();
    if (!status.ok()) {
      LOG_ERROR("Failed to generate statistical indexes in %s", ID().c_str());
      return status;
    }
  }

  auto rs = ctx_.db_->Put(ctx_.write_opts_, key_sealed(), "sealed");
  if (rs.ok()) {
    sealed_ = true;
    read_only_ = true;
    return Status::OK();
  } else {
    LOG_ERROR("Failed to seal %s", ID().c_str());
    return Status::InternalError();
  }
}


}  // namespace zvec


================================================
FILE: src/db/index/column/inverted_column/inverted_doc_range.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <memory>
#include <string>
#include <vector>
#include <zvec/ailego/encoding/json/mod_json_plus.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/type.h>


namespace zvec {


struct DocRange {
  std::string key_{""};
  size_t doc_count_{0};

  DocRange() {}

  DocRange(const std::string &key, int count) : key_(key), doc_count_(count) {}

  bool operator<(const std::string &key) const {
    return key_ < key;
  }
};


class SegmentDocRangeStat {
 public:
  using Ptr = std::shared_ptr<SegmentDocRangeStat>;


  SegmentDocRangeStat(std::vector<DocRange> &&doc_ranges,
                      uint64_t total_doc_count)
      : doc_ranges_(std::move(doc_ranges)), total_doc_count_(total_doc_count) {
    std::sort(
        doc_ranges_.begin(), doc_ranges_.end(),
        [](const DocRange &a, const DocRange &b) { return a.key_ < b.key_; });
  }


  static Ptr Create(const std::string &stat_json_str) {
    ailego::JsonValue stat_json_value;
    ailego::JsonParser parser;
    if (!parser.parse(stat_json_str.c_str(), &stat_json_value)) {
      LOG_ERROR("Failed to parse json string");
      return nullptr;
    }

    ailego::JsonObject stat_json_obj = stat_json_value.as_object();
    ailego::JsonArray stat_json_array;
    if (!stat_json_obj.get("field_value_histogram", &stat_json_array)) {
      LOG_ERROR("Failed to get histogram");
      return nullptr;
    }

    ailego::JsonValue stat_total_doc_count;
    if (!stat_json_obj.get("total_doc_count", &stat_total_doc_count)) {
      LOG_ERROR("Failed to get total doc count");
      return nullptr;
    }

    std::vector<DocRange> doc_ranges;
    for (auto it = stat_json_array.begin(); it != stat_json_array.end(); ++it) {
      ailego::JsonString stat_key;
      ailego::JsonValue stat_doc_count;
      if (!it->as_object().get("key", &stat_key)) {
        LOG_ERROR("Failed to get key");
        return nullptr;
      }
      if (!it->as_object().get("doc_count", &stat_doc_count)) {
        LOG_ERROR("Failed to get doc count");
        return nullptr;
      }
      doc_ranges.emplace_back(stat_key.decode().as_stl_string(),
                              stat_doc_count.as_integer());
    }

    return std::make_shared<SegmentDocRangeStat>(
        std::move(doc_ranges), stat_total_doc_count.as_integer());
  }


  void evaluate_ratio(const std::string &value, CompareOp op,
                      uint64_t *total_size, uint64_t *range_size) const {
    if (doc_ranges_.size() == 0) {
      *range_size = 0;
      *total_size = total_doc_count_;
    }

    // Is greater than?
    bool is_gt = (op == CompareOp::GT) || (op == CompareOp::GE);

    auto it = std::lower_bound(doc_ranges_.begin(), doc_ranges_.end(), value);

    if (it == doc_ranges_.end()) {
      *range_size = is_gt ? 0 : total_doc_count_;
    } else {
      *range_size = is_gt ? total_doc_count_ - it->doc_count_ : it->doc_count_;
    }
    *total_size = total_doc_count_;
  }


 private:
  std::vector<DocRange> doc_ranges_;
  uint64_t total_doc_count_;
};


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_indexer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "inverted_indexer.h"
#include <zvec/ailego/encoding/json.h>
#include "inverted_rocksdb_merger.h"


namespace zvec {


Status InvertedIndexer::open(bool create_dir_if_missing, bool read_only) {
  std::vector<std::string> cf_names{};  // Column families
  for (const auto &field : fields_) {
    if (field.index_type() != IndexType::INVERT) {
      LOG_ERROR("Field[%s] is not an inverted field", field.name().c_str());
      return Status::InvalidArgument();
    }
    auto params =
        std::dynamic_pointer_cast<InvertIndexParams>(field.index_params());
    cf_names.emplace_back(field.name() + INVERT_SUFFIX_TERMS);
    if (field.is_array_type()) {
      cf_names.emplace_back(field.name() + INVERT_SUFFIX_ARRAY_LEN);
    }
    if (allow_range_optimization(field) &&
        params->enable_range_optimization()) {
      cf_names.emplace_back(field.name() + INVERT_SUFFIX_RANGES);
    }
    if (allow_extended_wildcard(field) && params->enable_extended_wildcard()) {
      cf_names.emplace_back(field.name() + INVERT_SUFFIX_REVERSED_TERMS);
    }
  }
  cf_names.emplace_back(INVERT_CDF);

  Status s;
  if (FILE::IsExist(working_dir_)) {
    if (!FILE::IsDirectory(working_dir_)) {
      LOG_ERROR("InvertedIndexer path[%s] is not a directory",
                working_dir_.c_str());
      return Status::InvalidArgument();
    }
    s = rocksdb_context_.open(working_dir_, cf_names, read_only,
                              std::make_shared<InvertedRocksdbValueMerger>());
  } else {
    if (!create_dir_if_missing) {
      LOG_ERROR("InvertedIndexer path[%s] does not exist",
                working_dir_.c_str());
      return Status::NotFound();
    }
    s = rocksdb_context_.create(working_dir_, cf_names,
                                std::make_shared<InvertedRocksdbValueMerger>());
  }

  if (!s.ok()) {
    LOG_ERROR("Failed to open %s", ID().c_str());
    return s;
  }

  for (const auto &field : fields_) {
    auto column_indexer = InvertedColumnIndexer::CreateAndOpen(
        collection_name_, field, rocksdb_context_, read_only);
    if (column_indexer == nullptr) {
      LOG_ERROR("Failed to create InvertedColumnIndexer[%s]",
                field.name().c_str());
      return Status::InternalError();
    }
    indexers_.emplace(field.name(), std::move(column_indexer));
  }

  LOG_INFO("Opened %s", ID().c_str());
  return s;
}


Status InvertedIndexer::flush() {
  for (auto &[_, indexer] : indexers_) {
    if (indexer->is_sealed()) {
      continue;
    }
    if (!indexer->flush_special_values().ok()) {
      LOG_ERROR("Failed to flush %s", indexer->ID().c_str());
      return Status::InternalError();
    }
  }

  auto s = rocksdb_context_.flush();
  if (s.ok()) {
    LOG_INFO("Flushed %s", ID().c_str());
  } else {
    LOG_ERROR("Failed to flush %s", ID().c_str());
  }
  return s;
}


Status InvertedIndexer::create_snapshot(const std::string &snapshot_dir) {
  Status s;
  if (s = flush(); !s.ok()) {
    LOG_ERROR("Failed to flush %s during creating a snapshot", ID().c_str());
    return s;
  }

  if (s = rocksdb_context_.create_checkpoint(snapshot_dir); s.ok()) {
    LOG_INFO("Created snapshot[%s] of %s", snapshot_dir.c_str(), ID().c_str());
  } else {
    LOG_ERROR("Failed to create snapshot[%s] of %s", snapshot_dir.c_str(),
              ID().c_str());
  }
  return s;
}


Status InvertedIndexer::seal() {
  Status s;
  for (const auto &[_, indexer] : indexers_) {
    if (indexer->is_sealed()) {
      continue;
    }
    if (s = indexer->seal(); !s.ok()) {
      LOG_ERROR("Failed to seal %s", indexer->ID().c_str());
    }
  }

  if (s = flush(); !s.ok()) {
    LOG_ERROR("Failed to flush %s during sealing", ID().c_str());
    return s;
  }

  if (s = rocksdb_context_.compact(); s.ok()) {
    LOG_INFO("Sealed %s", ID().c_str());
  } else {
    LOG_ERROR("Failed to compact %s during sealing", ID().c_str());
  }
  return s;
}


Status InvertedIndexer::create_column_indexer(const FieldSchema &field) {
  if (field.index_type() != IndexType::INVERT) {
    return Status::InvalidArgument();
  }
  auto it = std::find_if(fields_.begin(), fields_.end(),
                         [&field](FieldSchema &cur_field) {
                           return cur_field.name() == field.name();
                         });
  if (it != fields_.end()) {
    LOG_ERROR("InvertedColumnIndexer[%s] already exists in %s",
              field.name().c_str(), ID().c_str());
    return Status::InvalidArgument();
  }
  auto params =
      std::dynamic_pointer_cast<InvertIndexParams>(field.index_params());

  Status s;
  bool cf_terms_created{false};
  bool cf_array_len_created{false};
  bool cf_ranges_created{false};
  bool cf_reversed_terms_created{false};
  AILEGO_DEFER([&]() {
    if (s.ok()) {
      LOG_INFO("Created a new InvertedColumnIndexer[%s] in %s",
               field.name().c_str(), ID().c_str());
    } else {
      if (cf_terms_created) {
        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_TERMS);
      }
      if (cf_array_len_created) {
        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_ARRAY_LEN);
      }
      if (cf_ranges_created) {
        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_RANGES);
      }
      if (cf_reversed_terms_created) {
        rocksdb_context_.drop_cf(field.name() + INVERT_SUFFIX_REVERSED_TERMS);
      }
      LOG_ERROR("Failed to create InvertedColumnIndexer[%s] in %s",
                field.name().c_str(), ID().c_str());
    }
  });

  s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_TERMS);
  if (s.ok()) {
    cf_terms_created = true;
  } else {
    return s;
  }
  if (field.is_array_type()) {
    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_ARRAY_LEN);
    if (s.ok()) {
      cf_array_len_created = true;
    } else {
      return s;
    }
  }
  if (allow_range_optimization(field) && params->enable_range_optimization()) {
    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_RANGES);
    if (s.ok()) {
      cf_ranges_created = true;
    } else {
      return s;
    }
  }
  if (allow_extended_wildcard(field) && params->enable_extended_wildcard()) {
    s = rocksdb_context_.create_cf(field.name() + INVERT_SUFFIX_REVERSED_TERMS);
    if (s.ok()) {
      cf_reversed_terms_created = true;
    } else {
      return s;
    }
  }

  auto column_indexer = InvertedColumnIndexer::CreateAndOpen(
      collection_name_, field, rocksdb_context_);
  if (column_indexer) {
    fields_.emplace_back(field);
    indexers_.emplace(field.name(), std::move(column_indexer));
    s = Status::OK();
  } else {
    s = Status::InternalError();
  }
  return s;
}


Status InvertedIndexer::remove_column_indexer(const std::string &field_name) {
  auto it = std::find_if(fields_.begin(), fields_.end(),
                         [&field_name](FieldSchema &cur_field) {
                           return cur_field.name() == field_name;
                         });
  auto column_indexer = (*this)[field_name];
  if (it == fields_.end() && !column_indexer) {
    LOG_ERROR("InvertedColumnIndexer[%s] doesn't exists in %s",
              field_name.c_str(), ID().c_str());
    return Status::NotFound();
  }
  if (it == fields_.end() || !column_indexer) {
    LOG_ERROR("%s is in corrupted state", ID().c_str());
    return Status::InternalError();
  }

  if (auto s = column_indexer->drop_storage(); !s.ok()) {
    LOG_ERROR("Failed to remove InvertedColumnIndexer[%s] in %s",
              field_name.c_str(), ID().c_str());
    return s;
  }

  fields_.erase(it);
  indexers_.erase(field_name);
  LOG_INFO("Removed InvertedColumnIndexer[%s] in %s", field_name.c_str(),
           ID().c_str());
  return Status::OK();
}


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_indexer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <rocksdb/utilities/checkpoint.h>
#include "inverted_column_indexer.h"


namespace zvec {


class InvertedIndexer {
 public:
  using Ptr = std::shared_ptr<InvertedIndexer>;


  explicit InvertedIndexer(const std::string &collection_name,
                           const std::string &working_dir,
                           const std::vector<FieldSchema> &fields)
      : collection_name_(collection_name),
        working_dir_(working_dir),
        fields_(fields) {};


  virtual ~InvertedIndexer() {
    rocksdb_context_.close();
    LOG_INFO("Closed %s", ID().c_str());
  }


  static Ptr CreateAndOpen(const std::string &collection_name,
                           const std::string &working_dir,
                           const bool create_dir_if_missing,
                           const std::vector<FieldSchema> &fields,
                           bool read_only) {
    Ptr indexer =
        std::make_shared<InvertedIndexer>(collection_name, working_dir, fields);
    if (indexer->open(create_dir_if_missing, read_only).ok()) {
      return indexer;
    } else {
      return nullptr;
    }
  }


  InvertedColumnIndexer::Ptr operator[](const std::string &field_name) {
    auto it = indexers_.find(field_name);
    if (it != indexers_.end()) {
      return it->second;
    }
    return nullptr;
  }


  Status flush();

  Status create_snapshot(const std::string &snapshot_dir);

  Status seal();

  Status create_column_indexer(const FieldSchema &field);

  Status remove_column_indexer(const std::string &field_name);

  inline std::string collection() const {
    return collection_name_;
  }

  inline std::string working_dir() const {
    return working_dir_;
  }

  inline const std::string ID() const {
    return "InvertedIndexer[collection:" + collection_name_ + "|path:'" +
           working_dir_ + "']";
  }


 private:
  using FILE = ailego::File;

  Status open(bool create_dir_if_missing, bool read_only);

  inline bool allow_range_optimization(const FieldSchema &field) const {
    bool not_allowed =
        field.is_array_type() || field.data_type() == DataType::BOOL;
    return !not_allowed;
  }

  inline bool allow_extended_wildcard(const FieldSchema &field) const {
    return field.data_type() == DataType::STRING;
  }


 private:
  const std::string collection_name_;
  const std::string working_dir_;
  std::vector<FieldSchema> fields_;

  std::unordered_map<std::string, InvertedColumnIndexer::Ptr> indexers_;
  RocksdbContext rocksdb_context_{};
};


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_rocksdb_merger.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <ailego/pattern/defer.h>
#include <rocksdb/merge_operator.h>
#include "inverted_codec.h"


namespace zvec {


class InvertedRocksdbValueMerger : public rocksdb::MergeOperator {
 public:
  virtual bool FullMergeV2(const MergeOperationInput &merge_in,
                           MergeOperationOutput *merge_out) const override {
    if (merge_in.existing_value == nullptr &&
        merge_in.operand_list.size() == 1) {
      merge_out->new_value = std::string(merge_in.operand_list[0].data(),
                                         merge_in.operand_list[0].size());
      return true;
    }

    merge_out->new_value.clear();

    Status s;
    roaring_bitmap_t *bitmap{nullptr};
    if (merge_in.existing_value != nullptr) {
      s = InvertedIndexCodec::Deserialize(merge_in.existing_value->data(),
                                          merge_in.existing_value->size(),
                                          &bitmap);
      if (!s.ok()) {
        LOG_ERROR("Failed to deserialize existing value");
        return false;
      }
    } else {
      bitmap = roaring_bitmap_create();
      if (!bitmap) {
        LOG_ERROR("Failed to create bitmap");
        return false;
      }
    }
    AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap); });

    for (const rocksdb::Slice &m : merge_in.operand_list) {
      s = InvertedIndexCodec::Merge_OR(m.data(), m.size(), true, bitmap);
      if (!s.ok()) {
        LOG_ERROR("Failed to merge bitmap");
        return false;
      }
    }
    roaring_bitmap_repair_after_lazy(bitmap);

    s = InvertedIndexCodec::Serialize(bitmap, &(merge_out->new_value));
    if (s.ok()) {
      return true;
    } else {
      LOG_ERROR("Failed to serialize bitmap");
      return false;
    }
  }


  virtual bool PartialMerge(const rocksdb::Slice & /*key*/,
                            const rocksdb::Slice &left_operand,
                            const rocksdb::Slice &right_operand,
                            std::string *new_value,
                            rocksdb::Logger * /*logger*/) const override {
    roaring_bitmap_t *bitmap{nullptr};
    auto s = InvertedIndexCodec::Deserialize(left_operand.data(),
                                             left_operand.size(), &bitmap);
    if (!s.ok()) {
      LOG_ERROR("Failed to deserialize existing value");
      return false;
    }
    AILEGO_DEFER([&]() { roaring_bitmap_free(bitmap); });

    s = InvertedIndexCodec::Merge_OR(right_operand.data(), right_operand.size(),
                                     false, bitmap);
    if (!s.ok()) {
      LOG_ERROR("Failed to merge bitmap");
      return false;
    }

    s = InvertedIndexCodec::Serialize(bitmap, new_value);
    if (s.ok()) {
      return true;
    } else {
      LOG_ERROR("Failed to serialize bitmap");
      return false;
    }
  }


  const char *Name() const override {
    return "InvertedRocksdbValueMerger";
  }
};


}  // namespace zvec

================================================
FILE: src/db/index/column/inverted_column/inverted_search_result.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <roaring/roaring.h>
#include "db/common/constants.h"
#include "db/index/column/common/index_results.h"


namespace zvec {


class InvertedSearchResult
    : public IndexResults,
      public std::enable_shared_from_this<InvertedSearchResult> {
 public:
  using Ptr = std::shared_ptr<InvertedSearchResult>;


  class Filter : public IndexFilter {
   public:
    explicit Filter(std::shared_ptr<const InvertedSearchResult> result)
        : result_(std::move(result)) {};

    bool is_filtered(uint64_t id) const override {
      return !result_->contains(id);
    }

   private:
    const std::shared_ptr<const InvertedSearchResult> result_{};
  };


  const IndexFilter::Ptr make_filter() const {
    return bitmap_ ? std::make_shared<Filter>(shared_from_this()) : nullptr;
  }


  explicit InvertedSearchResult() {}


  explicit InvertedSearchResult(roaring_bitmap_t *bitmap) : bitmap_(bitmap) {}


  ~InvertedSearchResult() {
    destroy_bitmap();
  }


  InvertedSearchResult(const InvertedSearchResult &) = delete;
  InvertedSearchResult(InvertedSearchResult &&) = delete;
  InvertedSearchResult &operator=(const InvertedSearchResult &) = delete;
  InvertedSearchResult &operator=(InvertedSearchResult &&) = delete;


  bool contains(uint32_t id) const {
    if (bitmap_) {
      return roaring_bitmap_contains(bitmap_, id);
    } else {
      return false;
    }
  }


  size_t count() const override {
    if (bitmap_) {
      return roaring_bitmap_get_cardinality(bitmap_);
    } else {
      return 0;
    }
  }


  class InvertedIndexIterator : public Iterator {
   public:
    explicit InvertedIndexIterator(
        std::shared_ptr<const InvertedSearchResult> result)
        : result_(result) {
      if (result_->bitmap_) {
        iter_ = roaring_create_iterator(result_->bitmap_);
      }
    }

    virtual ~InvertedIndexIterator() {
      if (iter_) {
        roaring_free_uint32_iterator(iter_);
      }
    }

    virtual idx_t doc_id() const {
      if (!iter_) {
        return INVALID_DOC_ID;
      }
      if (iter_->has_value) {
        return iter_->current_value;
      } else {
        return INVALID_DOC_ID;
      }
    }

    virtual float score() const {
      return 0.0f;
    }

    virtual void next() {
      if (iter_ && iter_->has_value) {
        roaring_advance_uint32_iterator(iter_);
      }
    }

    virtual bool valid() const {
      return iter_ ? iter_->has_value : false;
    }

   private:
    const std::shared_ptr<const InvertedSearchResult> result_{};
    roaring_uint32_iterator_t *iter_{nullptr};
  };


  IteratorUPtr create_iterator() override {
    return std::make_unique<InvertedIndexIterator>(shared_from_this());
  }


  void extract_ids(std::vector<uint32_t> *ids) const {
    if (!ids) {
      LOG_ERROR("Failed to extract ids: ids pointer is null");
      return;
    }
    if (!bitmap_) {
      return;
    }

    ids->reserve(static_cast<size_t>(count()));
    roaring_uint32_iterator_t *iter = roaring_create_iterator(bitmap_);
    while (iter->has_value) {
      ids->push_back(iter->current_value);
      roaring_advance_uint32_iterator(iter);
    }
    roaring_free_uint32_iterator(iter);
  }


  void set_and_own_bitmap(roaring_bitmap_t *bitmap) {
    destroy_bitmap();
    bitmap_ = bitmap;
  }


  void destroy_bitmap() {
    if (bitmap_) {
      roaring_bitmap_free(bitmap_);
      bitmap_ = nullptr;
    }
  }


  void AND(const InvertedSearchResult &other) {
    if (!bitmap_ || !other.bitmap_) {
      destroy_bitmap();
    } else {
      roaring_bitmap_and_inplace(bitmap_, other.bitmap_);
    }
  }


  void OR(const InvertedSearchResult &other) {
    if (!other.bitmap_) {
      return;
    }
    if (!bitmap_) {
      bitmap_ = roaring_bitmap_copy(other.bitmap_);
      return;
    }
    roaring_bitmap_or_inplace(bitmap_, other.bitmap_);
  }


 private:
  roaring_bitmap_t *bitmap_{nullptr};
};


}  // namespace zvec

================================================
FILE: src/db/index/column/vector_column/combined_vector_column_indexer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "combined_vector_column_indexer.h"
#include <algorithm>
#include <cstdint>

namespace zvec {

CombinedVectorColumnIndexer::CombinedVectorColumnIndexer(
    const std::vector<VectorColumnIndexer::Ptr> &indexers,
    const std::vector<VectorColumnIndexer::Ptr> &normal_indexers,
    const FieldSchema &field_schema, const SegmentMeta &segment_meta,
    std::vector<BlockMeta> blocks, MetricType metric_type, bool is_quantized)
    : field_schema_(field_schema),
      indexers_(std::move(indexers)),
      normal_indexers_(std::move(normal_indexers)),
      blocks_(std::move(blocks)),
      metric_type_(metric_type),
      is_quantized_(is_quantized) {
  if (segment_meta.has_writing_forward_block()) {
    if (is_quantized_) {
      BlockMeta quant_block = segment_meta.writing_forward_block().value();
      quant_block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);
      blocks_.push_back(std::move(quant_block));
    } else {
      BlockMeta block = segment_meta.writing_forward_block().value();
      block.set_type(BlockType::VECTOR_INDEX);
      blocks_.push_back(std::move(block));
    }
  }

  int block_offset = 0;
  for (size_t i = 0; i < indexers_.size(); ++i) {
    auto &block_meta = blocks_[i];
    block_offsets_.push_back(block_offset);
    block_offset += block_meta.doc_count_;
  }

  min_doc_id_ = segment_meta.min_doc_id();
}

Result<IndexResults::Ptr> CombinedVectorColumnIndexer::Search(
    const vector_column_params::VectorData &vector_data,
    const vector_column_params::QueryParams &query_params) {
  core::IndexDocumentList doc_list;
  std::vector<std::string> reverted_vector_list;
  std::vector<std::string> reverted_sparse_values_list;

  // query_params.bf_pks is segment level, here we need to convert it to block
  // level
  std::vector<std::vector<uint64_t>> block_bf_pks(indexers_.size());

  if (!query_params.bf_pks.empty()) {
    // dispatcher pks to corresponding block_bf_pks
    for (auto &pk : query_params.bf_pks[0]) {
      for (size_t i = 0; i < block_offsets_.size(); ++i) {
        if (pk >= block_offsets_[i] &&
            pk < block_offsets_[i] + blocks_[i].doc_count_) {
          block_bf_pks[i].push_back(
              static_cast<uint64_t>(pk - block_offsets_[i]));
          break;
        }
      }
    }
  }

  auto q_params = query_params.query_params;
  for (size_t i = 0; i < indexers_.size(); ++i) {
    if (!query_params.bf_pks.empty() && block_bf_pks[i].empty()) {
      LOG_DEBUG(
          "query_params has bf_pks, but block_bf_pks[%zu] is empty, just skip "
          "this indexer",
          i);
      continue;
    }
    zvec::Result<zvec::IndexResults::Ptr> result{nullptr};
    float scale_factor{};
    bool need_refine{false};
    if (q_params && q_params->is_using_refiner()) {
      if (normal_indexers_.size() != indexers_.size()) {
        return tl::make_unexpected(Status::InvalidArgument(
            "normal indexers size[", normal_indexers_.size(),
            "] not match indexers size[", indexers_.size(), "]"));
      }
      // query_params of HNSW doesn't have scale_factor
      if (q_params->type() == IndexType::FLAT) {
        scale_factor = std::dynamic_pointer_cast<FlatQueryParams>(q_params)
                           ->scale_factor();
      } else if (q_params->type() == IndexType::IVF) {
        scale_factor =
            std::dynamic_pointer_cast<IVFQueryParams>(q_params)->scale_factor();
      }
      need_refine = true;
    }

    const IndexFilter *filter{nullptr};
    auto per_block_filter =
        BlockOffsetFilter{query_params.filter, block_offsets_[i]};
    if (query_params.filter) {
      if (block_offsets_[i] > 0) {
        filter = &per_block_filter;
      } else {
        filter = query_params.filter;
      }
    }

    vector_column_params::QueryParams modified_query_params{
        query_params.data_type,
        query_params.dimension,
        query_params.topk,
        filter,
        query_params.fetch_vector,
        query_params.query_params,
        query_params.group_by
            ? std::make_unique<vector_column_params::GroupByParams>(
                  query_params.group_by->group_topk,
                  query_params.group_by->group_count,
                  query_params.group_by->group_by)
            : nullptr,
        {},
        need_refine ? std::shared_ptr<vector_column_params::RefinerParam>(
                          new vector_column_params::RefinerParam{
                              scale_factor, normal_indexers_[i]})
                    : nullptr,
        query_params.extra_params};

    if (!query_params.bf_pks.empty()) {
      modified_query_params.bf_pks.emplace_back(block_bf_pks[i]);
    }

    result = indexers_[i]->Search(vector_data, modified_query_params);
    if (!result) {
      return tl::make_unexpected(result.error());
    }

    auto index_results = result.value();
    VectorIndexResults *vector_index_results =
        dynamic_cast<VectorIndexResults *>(index_results.get());

    const auto &sub_docs = vector_index_results->docs();
    for (size_t j = 0; j < sub_docs.size(); ++j) {
      auto doc = sub_docs[j];
      doc.set_key(block_offsets_[i] + sub_docs[j].key());
      doc_list.emplace_back(std::move(doc));
    }

    auto &&temp_vector_list = vector_index_results->reverted_vector_list();
    reverted_vector_list.insert(
        reverted_vector_list.end(),
        std::make_move_iterator(temp_vector_list.begin()),
        std::make_move_iterator(temp_vector_list.end()));

    auto &&temp_sparse_list =
        vector_index_results->reverted_sparse_values_list();
    reverted_sparse_values_list.insert(
        reverted_sparse_values_list.end(),
        std::make_move_iterator(temp_sparse_list.begin()),
        std::make_move_iterator(temp_sparse_list.end()));
  }

  if (doc_list.empty()) {
    // return empty result
    return std::make_unique<VectorIndexResults>(
        field_schema_.is_sparse_vector(), std::move(doc_list),
        std::move(reverted_vector_list),
        std::move(reverted_sparse_values_list));
  }

  std::vector<size_t> indices(doc_list.size());
  std::iota(indices.begin(), indices.end(), 0);

  std::sort(indices.begin(), indices.end(),
            [this, &doc_list](size_t lhs, size_t rhs) {
              const auto &lhs_doc = doc_list[lhs];
              const auto &rhs_doc = doc_list[rhs];

              if (this->metric_type_ == MetricType::L2) {
                return lhs_doc.score() < rhs_doc.score();
              } else if (this->metric_type_ == MetricType::IP) {
                return lhs_doc.score() > rhs_doc.score();
              } else if (this->metric_type_ == MetricType::COSINE) {
                return lhs_doc.score() < rhs_doc.score();
              } else {
                // default
                return lhs_doc.score() < rhs_doc.score();
              }
            });

  // doc_list
  std::vector<core::IndexDocument> sorted_doc_list(doc_list.size());
  for (size_t i = 0; i < indices.size(); ++i) {
    sorted_doc_list[i] = std::move(doc_list[indices[i]]);
  }
  doc_list = std::move(sorted_doc_list);

  // reverted_vector_list
  if (!reverted_vector_list.empty()) {
    std::vector<std::string> sorted_reverted_vector_list(
        reverted_vector_list.size());
    for (size_t i = 0; i < indices.size(); ++i) {
      if (indices[i] < reverted_vector_list.size()) {
        sorted_reverted_vector_list[i] =
            std::move(reverted_vector_list[indices[i]]);
      }
    }
    reverted_vector_list = std::move(sorted_reverted_vector_list);
  }

  // reverted_sparse_values_list
  if (!reverted_sparse_values_list.empty()) {
    std::vector<std::string> sorted_reverted_sparse_vector_list(
        reverted_sparse_values_list.size());
    for (size_t i = 0; i < indices.size(); ++i) {
      if (indices[i] < reverted_sparse_values_list.size()) {
        sorted_reverted_sparse_vector_list[i] =
            std::move(reverted_sparse_values_list[indices[i]]);
      }
    }
    reverted_sparse_values_list = std::move(sorted_reverted_sparse_vector_list);
  }

  // truncate to topk
  if (doc_list.size() > query_params.topk) doc_list.resize(query_params.topk);
  if (reverted_vector_list.size() > query_params.topk)
    reverted_vector_list.resize(query_params.topk);
  if (reverted_sparse_values_list.size() > query_params.topk)
    reverted_sparse_values_list.resize(query_params.topk);

  return std::make_unique<VectorIndexResults>(
      field_schema_.is_sparse_vector(), std::move(doc_list),
      std::move(reverted_vector_list), std::move(reverted_sparse_values_list));
}

Result<vector_column_params::VectorDataBuffer>
CombinedVectorColumnIndexer::Fetch(uint32_t segment_doc_id) const {
  int32_t target_block_doc_id = -1;
  size_t target_block_idx = 0;

  uint32_t block_offset = 0;
  for (size_t i = 0; i < blocks_.size(); ++i) {
    auto &block_meta = blocks_[i];
    if (block_offset <= segment_doc_id &&
        segment_doc_id < block_offset + block_meta.doc_count_) {
      target_block_doc_id = segment_doc_id - block_offset;
      target_block_idx = i;
      break;
    }
    block_offset += block_meta.doc_count_;
  }

  if (target_block_doc_id == -1) {
    LOG_ERROR("Can't find block for doc_id[%u]", segment_doc_id);
    return tl::make_unexpected(
        Status::NotFound("Can't find block for doc_id:", segment_doc_id));
  }

  auto indexer = indexers_[target_block_idx];
  return indexer->Fetch(target_block_doc_id);
}

}  // namespace zvec

================================================
FILE: src/db/index/column/vector_column/combined_vector_column_indexer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <vector>
#include "db/index/common/index_filter.h"
#include "vector_column_indexer.h"
#include "vector_column_params.h"

namespace zvec {

class CombinedVectorColumnIndexer {
 public:
  using Ptr = std::shared_ptr<CombinedVectorColumnIndexer>;

  explicit CombinedVectorColumnIndexer(
      const std::vector<VectorColumnIndexer::Ptr> &indexers,
      const std::vector<VectorColumnIndexer::Ptr> &normal_indexers,
      const FieldSchema &field_schema, const SegmentMeta &segment_meta,
      std::vector<BlockMeta> blocks, MetricType metric_type,
      bool is_quantized = false);

  virtual ~CombinedVectorColumnIndexer() = default;

  virtual Result<IndexResults::Ptr> Search(
      const vector_column_params::VectorData &vector_data,
      const vector_column_params::QueryParams &query_params);

  // doc_id is segment local id
  virtual Result<vector_column_params::VectorDataBuffer> Fetch(
      uint32_t segment_doc_id) const;


 protected:
  /**
   * A filter wrapper that applies an offset to document IDs before
   * delegating to an inner filter.
   *
   * This is used when multiple blocks with different ID offsets are stored.
   * Each block has its own local ID space, and this filter translates
   * block-level IDs to segment-level IDs before checking the inner filter.
   */
  class BlockOffsetFilter : public IndexFilter {
   public:
    BlockOffsetFilter(const IndexFilter *inner_filter, uint64_t offset)
        : inner_filter_(inner_filter), offset_(offset) {}

    bool is_filtered(uint64_t id) const override {
      return inner_filter_->is_filtered(id + offset_);
    }

   private:
    const IndexFilter *inner_filter_;
    uint64_t offset_;
  };

  // for ut
  CombinedVectorColumnIndexer() = default;


 private:
  FieldSchema field_schema_;
  std::vector<VectorColumnIndexer::Ptr> indexers_;
  std::vector<VectorColumnIndexer::Ptr> normal_indexers_;
  std::vector<BlockMeta> blocks_;
  std::vector<uint32_t> block_offsets_;
  MetricType metric_type_{MetricType::UNDEFINED};
  bool is_quantized_{false};
  uint64_t min_doc_id_{0};
};

}  // namespace zvec

================================================
FILE: src/db/index/column/vector_column/engine_helper.hpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/core/interface/index.h>
#include <zvec/core/interface/index_param_builders.h>
#include <zvec/db/doc.h>
#include <zvec/db/query_params.h>
#include <zvec/db/status.h>
#include "zvec/db/index_params.h"
#include "zvec/db/type.h"
#include "vector_column_params.h"


namespace zvec {
// TODO: rename file extension
class ProximaEngineHelper {
 public:
  static Result<vector_column_params::VectorDataBuffer>
  move_from_engine_vector_buffer(
      const core_interface::VectorDataBuffer &&vector_data_buffer,
      bool is_sparse) {
    if (is_sparse) {
      auto sparse_vector_buffer = std::get<core_interface::SparseVectorBuffer>(
          vector_data_buffer.vector_buffer);
      return vector_column_params::VectorDataBuffer{
          vector_column_params::SparseVectorBuffer{
              std::move(sparse_vector_buffer.indices),
              std::move(sparse_vector_buffer.values)}};
    }
    auto dense_vector_buffer = std::get<core_interface::DenseVectorBuffer>(
        vector_data_buffer.vector_buffer);
    return vector_column_params::VectorDataBuffer{
        vector_column_params::DenseVectorBuffer{
            std::move(dense_vector_buffer.data)}};
  }

  static Result<vector_column_params::VectorData> convert_from_engine_vector(
      const core_interface::VectorData &vector_data, bool is_sparse) {
    if (is_sparse) {
      auto engine_vector =
          std::get<core_interface::SparseVector>(vector_data.vector);
      return vector_column_params::VectorData{
          vector_column_params::SparseVector{engine_vector.count,
                                             engine_vector.indices,
                                             engine_vector.values}};
    }
    auto engine_vector =
        std::get<core_interface::DenseVector>(vector_data.vector);
    return vector_column_params::VectorData{
        vector_column_params::DenseVector{engine_vector.data}};
  }

  // convert to engine vector
  static Result<core_interface::VectorData> convert_to_engine_vector(
      const vector_column_params::VectorData &vector_data, bool is_sparse) {
    if (is_sparse) {
      auto db_vector =
          std::get<vector_column_params::SparseVector>(vector_data.vector);
      auto engine_vector = core_interface::SparseVector{
          db_vector.count, const_cast<void *>(db_vector.indices),
          const_cast<void *>(db_vector.values)};
      return core_interface::VectorData{engine_vector};
    }

    auto db_vector =
        std::get<vector_column_params::DenseVector>(vector_data.vector);
    auto engine_vector =
        core_interface::DenseVector{const_cast<void *>(db_vector.data)};
    return core_interface::VectorData{engine_vector};
  }

  // convert_filter
  static std::shared_ptr<core_interface::IndexFilter> convert_to_engine_filter(
      const IndexFilter *filter) {
    auto engine_filter = std::make_shared<core_interface::IndexFilter>();
    if (filter != nullptr) {
      engine_filter->set(
          [filter](uint64_t id) { return filter->is_filtered(id); });
    }
    return engine_filter;
  }

 private:
  template <typename EngineQueryParamType>
  static Result<std::unique_ptr<EngineQueryParamType>>
  _build_common_query_param(
      const vector_column_params::QueryParams &db_query_params) {
    auto engine_query_param = std::make_unique<EngineQueryParamType>();
    engine_query_param->topk = db_query_params.topk;
    engine_query_param->fetch_vector = db_query_params.fetch_vector;

    engine_query_param->filter =
        convert_to_engine_filter(db_query_params.filter);

    if (db_query_params.query_params) {
      engine_query_param->radius = db_query_params.query_params->radius();
      engine_query_param->is_linear = db_query_params.query_params->is_linear();
    }
    if (db_query_params.refiner_param) {
      engine_query_param->refiner_param =
          std::make_shared<core_interface::RefinerParam>(
              core_interface::RefinerParam{
                  .scale_factor_ = db_query_params.refiner_param->scale_factor_,
                  .reference_index =
                      db_query_params.refiner_param->reference_indexer->index});
    }

    return engine_query_param;
  }

 public:
  static Result<std::unique_ptr<core_interface::BaseIndexQueryParam>>
  convert_to_engine_query_param(
      const FieldSchema &field_schema,
      const vector_column_params::QueryParams &query_params) {
    if (!field_schema.index_params()) {
      return tl::make_unexpected(Status::InvalidArgument("nullptr"));
    }
    switch (field_schema.index_params()->type()) {
      case IndexType::FLAT: {
        // auto db_index_params =
        //     dynamic_cast<const FlatIndexParams
        //     *>(field_schema.index_params());
        auto flat_query_param_result =
            _build_common_query_param<core_interface::FlatQueryParam>(
                query_params);
        if (!flat_query_param_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build query param: " +
              flat_query_param_result.error().message()));
        }
        return std::move(flat_query_param_result.value());
      }

      case IndexType::HNSW: {
        auto hnsw_query_param_result =
            _build_common_query_param<core_interface::HNSWQueryParam>(
                query_params);
        if (!hnsw_query_param_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build query param: " +
              hnsw_query_param_result.error().message()));
        }
        auto &hnsw_query_param = hnsw_query_param_result.value();
        if (query_params.query_params) {
          auto db_hnsw_query_params = dynamic_cast<const HnswQueryParams *>(
              query_params.query_params.get());
          hnsw_query_param->ef_search = db_hnsw_query_params->ef();
        }
        return std::move(hnsw_query_param);
      }

      case IndexType::HNSW_RABITQ: {
        auto hnsw_query_param_result =
            _build_common_query_param<core_interface::HNSWRabitqQueryParam>(
                query_params);
        if (!hnsw_query_param_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build query param: " +
              hnsw_query_param_result.error().message()));
        }
        auto &hnsw_query_param = hnsw_query_param_result.value();
        if (query_params.query_params) {
          auto db_hnsw_rabitq_query_params =
              dynamic_cast<const HnswRabitqQueryParams *>(
                  query_params.query_params.get());
          hnsw_query_param->ef_search = db_hnsw_rabitq_query_params->ef();
        }
        return std::move(hnsw_query_param);
      }

      case IndexType::IVF: {
        auto ivf_query_param_result =
            _build_common_query_param<core_interface::IVFQueryParam>(
                query_params);
        if (!ivf_query_param_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build query param: " +
              ivf_query_param_result.error().message()));
        }
        auto &ivf_query_param = ivf_query_param_result.value();
        if (query_params.query_params) {
          auto db_ivf_query_params = dynamic_cast<const IVFQueryParams *>(
              query_params.query_params.get());
          ivf_query_param->nprobe = db_ivf_query_params->nprobe();
        }
        return std::move(ivf_query_param);
      }
      default:
        return tl::make_unexpected(Status::InvalidArgument("not supported"));
    }
  }

  static Result<core_interface::MetricType> convert_to_engine_metric_type(
      MetricType metric_type) {
    switch (metric_type) {
      case MetricType::MIPSL2:
        return core_interface::MetricType::kMIPSL2sq;
      case MetricType::IP:
        return core_interface::MetricType::kInnerProduct;
      case MetricType::L2:
        return core_interface::MetricType::kL2sq;
      case MetricType::COSINE:
        return core_interface::MetricType::kCosine;
      default:
        return tl::make_unexpected(
            Status::InvalidArgument("unsupported metric type"));
    }
  }

  static Result<core_interface::QuantizerType> convert_to_engine_quantize_type(
      QuantizeType quantize_type) {
    switch (quantize_type) {
      case QuantizeType::UNDEFINED:
        return core_interface::QuantizerType::kNone;
      case QuantizeType::FP16:
        return core_interface::QuantizerType::kFP16;
      case QuantizeType::INT8:
        return core_interface::QuantizerType::kInt8;
      case QuantizeType::INT4:
        return core_interface::QuantizerType::kInt4;
      case QuantizeType::RABITQ:
        return core_interface::QuantizerType::kRabitq;
      default:
        return tl::make_unexpected(
            Status::InvalidArgument("unsupported quantize type"));
    }
  }

  static Result<core_interface::DataType> convert_to_engine_data_type(
      DataType data_type) {
    switch (data_type) {
      case DataType::VECTOR_FP32:
      case DataType::SPARSE_VECTOR_FP32:
        return core_interface::DataType::DT_FP32;

      case DataType::VECTOR_FP16:
      case DataType::SPARSE_VECTOR_FP16:
        return core_interface::DataType::DT_FP16;

      case DataType::VECTOR_INT8:
        return core_interface::DataType::DT_INT8;

      default:
        return tl::make_unexpected(
            Status::InvalidArgument("unsupported data type"));
    }
  }

 private:
  template <typename DBIndexParamType, typename IndexParamBuilderType>
  static Result<std::shared_ptr<IndexParamBuilderType>>
  _build_common_index_param(const FieldSchema &field_schema) {
    auto db_index_params = dynamic_cast<const DBIndexParamType *>(
        field_schema.index_params().get());
    if (db_index_params == nullptr) {
      return tl::make_unexpected(Status::InvalidArgument("bad_cast"));
    }
    auto index_param_builder = std::make_shared<IndexParamBuilderType>();

    // db will ensure the id is consecutive
    index_param_builder->WithUseIDMap(false);

    index_param_builder->WithIsSparse(field_schema.is_sparse_vector())
        .WithDimension(field_schema.dimension());
    if (auto data_type_result =
            convert_to_engine_data_type(field_schema.data_type());
        data_type_result.has_value()) {
      index_param_builder->WithDataType(data_type_result.value());
    } else {
      return tl::make_unexpected(
          Status::InvalidArgument("unsupported data type"));
    }
    if (auto metric_type_result =
            convert_to_engine_metric_type(db_index_params->metric_type());
        metric_type_result.has_value()) {
      index_param_builder->WithMetricType(metric_type_result.value());
    } else {
      return tl::make_unexpected(
          Status::InvalidArgument("unsupported metric type"));
    }
    if (auto quantize_type =
            convert_to_engine_quantize_type(db_index_params->quantize_type());
        quantize_type.has_value()) {
      index_param_builder->WithQuantizerParam(
          core_interface::QuantizerParam(quantize_type.value()));
    } else {
      return tl::make_unexpected(
          Status::InvalidArgument("unsupported quantize type"));
    }
    return index_param_builder;
  }

 public:
  static Result<core_interface::BaseIndexParam::Pointer>
  convert_to_engine_index_param(const FieldSchema &field_schema) {
    if (!field_schema.index_params()) {
      return tl::make_unexpected(
          Status::InvalidArgument("field_schema.index_params nullptr"));
    }

    switch (field_schema.index_params()->type()) {
      case IndexType::FLAT: {
        auto index_param_builder =
            _build_common_index_param<FlatIndexParams,
                                      core_interface::FlatIndexParamBuilder>(
                field_schema);
        if (!index_param_builder.has_value()) {
          return tl::make_unexpected(
              Status::InvalidArgument("failed to build index param: " +
                                      index_param_builder.error().message()));
        }
        return index_param_builder.value()->Build();
      }

      case IndexType::HNSW: {
        auto index_param_builder_result =
            _build_common_index_param<HnswIndexParams,
                                      core_interface::HNSWIndexParamBuilder>(
                field_schema);
        if (!index_param_builder_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build index param: " +
              index_param_builder_result.error().message()));
        }
        auto index_param_builder = index_param_builder_result.value();

        auto db_index_params = dynamic_cast<const HnswIndexParams *>(
            field_schema.index_params().get());
        index_param_builder->WithM(db_index_params->m());
        index_param_builder->WithEFConstruction(
            db_index_params->ef_construction());

        return index_param_builder->Build();
      }

      case IndexType::HNSW_RABITQ: {
        auto index_param_builder_result = _build_common_index_param<
            HnswRabitqIndexParams, core_interface::HNSWRabitqIndexParamBuilder>(
            field_schema);
        if (!index_param_builder_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build index param: " +
              index_param_builder_result.error().message()));
        }
        auto index_param_builder = index_param_builder_result.value();

        auto db_index_params = dynamic_cast<const HnswRabitqIndexParams *>(
            field_schema.index_params().get());
        index_param_builder->WithM(db_index_params->m());
        index_param_builder->WithEFConstruction(
            db_index_params->ef_construction());
        index_param_builder->WithTotalBits(db_index_params->total_bits());
        index_param_builder->WithNumClusters(db_index_params->num_clusters());
        index_param_builder->WithSampleCount(db_index_params->sample_count());
        index_param_builder->WithProvider(
            db_index_params->raw_vector_provider());
        index_param_builder->WithReformer(db_index_params->rabitq_reformer());

        return index_param_builder->Build();
      }

      case IndexType::IVF: {
        auto index_param_builder_result = _build_common_index_param<
            IVFIndexParams, core_interface::IVFIndexParamBuilder>(field_schema);
        if (!index_param_builder_result.has_value()) {
          return tl::make_unexpected(Status::InvalidArgument(
              "failed to build index param: " +
              index_param_builder_result.error().message()));
        }
        auto index_param_builder = index_param_builder_result.value();

        auto db_index_params = dynamic_cast<const IVFIndexParams *>(
            field_schema.index_params().get());
        index_param_builder->WithNList(db_index_params->n_list());
        index_param_builder->WithNiters(db_index_params->n_iters());
        index_param_builder->WithUseSoar(db_index_params->use_soar());

        return index_param_builder->Build();
      }

      default:
        return tl::make_unexpected(Status::InvalidArgument("not supported"));
    }
  }
};
};  // namespace zvec

================================================
FILE: src/db/index/column/vector_column/vector_column_indexer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vector_column_indexer.h"
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/core/interface/index_factory.h>
#include <zvec/db/status.h>
#include "engine_helper.hpp"


namespace zvec {

Status VectorColumnIndexer::Open(
    const vector_column_params::ReadOptions &read_options) {
  if (index != nullptr) {
    return Status::InvalidArgument("Index already opened");
  }

  // TODO: pass read_options to proxima index
  if (engine_name_ == "proxima") {
    return CreateProximaIndex(read_options);
  } else {
    return Status::InvalidArgument("Engine name not supported");
  }
}

Status VectorColumnIndexer::CreateProximaIndex(
    const vector_column_params::ReadOptions &read_options) {
  auto index_param_result =
      ProximaEngineHelper::convert_to_engine_index_param(field_schema_);
  if (!index_param_result.has_value()) {
    return Status::InvalidArgument(index_param_result.error().message());
  }
  auto &index_param = index_param_result.value();

  index = core_interface::IndexFactory::CreateAndInitIndex(*index_param);
  if (index == nullptr) {
    return Status::InternalError("Failed to create index");
  }

  auto storage_type =
      read_options.use_mmap
          ? core_interface::StorageOptions::StorageType::kMMAP
          : core_interface::StorageOptions::StorageType::kBufferPool;

  if (0 != index->Open(this->index_file_path(),
                       {storage_type, read_options.create_new,
                        read_options.read_only})) {
    return Status::InternalError("Failed to open index");
  }

  return Status::OK();
}

Status VectorColumnIndexer::Flush() {
  if (index == nullptr) {
    return Status::InvalidArgument("Index not opened");
  }

  if (0 != index->Flush()) {
    return Status::InternalError("Failed to flush index");
  }
  return Status::OK();
}


Status VectorColumnIndexer::Close() {
  if (index == nullptr) {
    return Status::InvalidArgument("Index not opened");
  }

  if (0 != index->Close()) {
    return Status::InternalError("Failed to close index");
  }
  index.reset();
  return Status::OK();
}

Status VectorColumnIndexer::Destroy() {
  if (index == nullptr) {
    return Status::InvalidArgument("Index not opened");
  }

  if (Close() != Status::OK()) {
    return Status::InternalError("Failed to close index");
  }
  if (!ailego::File::RemovePath(index_file_path_)) {
    return Status::InternalError("Failed to remove index file");
  }
  return Status::OK();
}

Status VectorColumnIndexer::Merge(
    const std::vector<VectorColumnIndexer::Ptr> &indexers,
    const IndexFilter::Ptr &filter,
    const vector_column_params::MergeOptions &merge_options) {
  if (index == nullptr) {
    return Status::InvalidArgument("Index not opened");
  }

  if (indexers.empty()) {
    return Status::OK();
  }

  auto engine_indexers = std::vector<core_interface::Index::Pointer>();

  for (auto &indexer : indexers) {
    if (indexer->index_file_path() == this->index_file_path()) {
      continue;
    }
    engine_indexers.push_back(indexer->index);
  }
  auto engine_filter =
      ProximaEngineHelper::convert_to_engine_filter(filter.get());
  if (engine_filter == nullptr) {
    return Status::InvalidArgument("Failed to convert filter");
  }
  if (0 !=
      index->Merge(engine_indexers, *engine_filter,
                   {merge_options.write_concurrency, merge_options.pool})) {
    return Status::InternalError("Failed to merge index");
  }
  return Status::OK();
}

Status VectorColumnIndexer::Insert(
    const vector_column_params::VectorData &vector_data, uint32_t doc_id) {
  if (index == nullptr) {
    return Status::InvalidArgument("Index not opened");
  }

  auto engine_vector_data =
      ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);
  if (0 != index->Add(engine_vector_data.value(), doc_id)) {
    return Status::InternalError("Failed to add vector to index");
  }
  return Status::OK();
}

Result<vector_column_params::VectorDataBuffer> VectorColumnIndexer::Fetch(
    uint32_t doc_id) const {
  if (index == nullptr) {
    return tl::make_unexpected(Status::InvalidArgument("Index not opened"));
  }

  auto vector_data_buffer = core_interface::VectorDataBuffer();

  if (0 != index->Fetch(doc_id, &vector_data_buffer)) {
    return tl::make_unexpected(
        Status::InternalError("Failed to fetch vector from index"));
  }
  return ProximaEngineHelper::move_from_engine_vector_buffer(
             std::move(vector_data_buffer), is_sparse_)
      .value();
}

Result<IndexResults::Ptr> VectorColumnIndexer::Search(
    const vector_column_params::VectorData &vector_data,
    const vector_column_params::QueryParams &query_params) {
  if (index == nullptr) {
    return tl::make_unexpected(Status::InvalidArgument("Index not opened"));
  }

  auto engine_vector_data =
      ProximaEngineHelper::convert_to_engine_vector(vector_data, is_sparse_);
  core_interface::SearchResult search_result;
  auto engine_query_param_result =
      ProximaEngineHelper::convert_to_engine_query_param(field_schema_,
                                                         query_params);
  if (!engine_query_param_result.has_value()) {
    return tl::make_unexpected(engine_query_param_result.error());
  }
  auto &engine_query_param = engine_query_param_result.value();
  if (query_params.bf_pks.size() > 1) {
    LOG_ERROR("bf_pks size > 1 is not supported");
    return tl::make_unexpected(
        Status::InvalidArgument("bf_pks size > 1 is not supported"));
  } else if (query_params.bf_pks.size() == 1) {
    auto &bf_pks = query_params.bf_pks[0];
    engine_query_param->bf_pks =
        std::make_shared<std::vector<uint64_t>>(std::move(bf_pks));
  } else {
    engine_query_param->bf_pks = nullptr;
  }
  if (0 != index->Search(engine_vector_data.value(),
                         std::move(engine_query_param), &search_result)) {
    return tl::make_unexpected(
        Status::InternalError("Failed to search vector"));
  }

  auto result = std::make_shared<VectorIndexResults>(
      is_sparse_, std::move(search_result.doc_list_),
      std::move(search_result.reverted_vector_list_),
      std::move(search_result.reverted_sparse_values_list_));
  return result;
}

}  // namespace zvec


================================================
FILE: src/db/index/column/vector_column/vector_column_indexer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <utility>
#include <variant>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/interface/index.h>
#include <zvec/core/interface/index_param.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include "db/common/constants.h"
#include "db/common/typedef.h"
#include "db/index/column/common/index_results.h"
#include "db/index/common/meta.h"
#include "zvec/core/framework/index_provider.h"
#include "vector_column_params.h"
#include "vector_index_results.h"

namespace zvec {

class ProximaEngineHelper;

class VectorColumnIndexer {
 public:
  using Ptr = std::shared_ptr<VectorColumnIndexer>;
  PROXIMA_DISALLOW_COPY_AND_ASSIGN(VectorColumnIndexer);

  VectorColumnIndexer(const std::string &index_file_path,
                      const FieldSchema &field_schema,
                      const std::string &engine_name = "proxima")
      : field_schema_(field_schema),
        index_file_path_(index_file_path),
        engine_name_(engine_name) {
    // assert(field_schema.is_dense_vector() ||
    // field_schema.is_sparse_vector());
    is_sparse_ = field_schema.is_sparse_vector();
  }

  virtual ~VectorColumnIndexer() = default;

 public:
  Status Open(const vector_column_params::ReadOptions &read_options);

  Status Flush();

  // Close will call Flush()
  Status Close();

  // Destroy will call Close() and remove index file
  Status Destroy();


  // If HNSWIndexer.merge([FlatIndexer1, FlatIndexer2])
  // then the merged indexer is a HNSWIndexer
  Status Merge(const std::vector<VectorColumnIndexer::Ptr> &indexers,
               const IndexFilter::Ptr &filter = nullptr,
               const vector_column_params::MergeOptions &merge_options = {});
  // TODO: should we use this function? or a Reducer?
  //  TODO: sstatic reduce, optimize; iterator/scan


  //! Insert vector
  Status Insert(const vector_column_params::VectorData &vector_data,
                uint32_t doc_id);
  // TODO: batch insert

  virtual Result<IndexResults::Ptr> Search(
      const vector_column_params::VectorData &vector_data,
      const vector_column_params::QueryParams &query_params);
  // Result<std::vector<IndexResults::Ptr>> BatchSearch(
  //     const VectorDataset &vector_data,
  //     const  vector_column_params::QueryParams &query_params);

  Result<vector_column_params::VectorDataBuffer> Fetch(uint32_t doc_id) const;
  // Result<VectorDataset> BatchFetch(const std::vector<uint32_t> &doc_ids)
  // const;

  core::IndexProvider::Pointer create_index_provider() const {
    return index->create_index_provider();
  }

 public:
  std::string index_file_path() const {
    return index_file_path_;
  }

  size_t doc_count() const {
    if (index == nullptr) {
      return -1;
    }
    return index->GetDocCount();
  }

  // for ut
 protected:
  VectorColumnIndexer() = default;

 private:
  // protected:
  //  virtual bool init_proxima_params() = 0;

  // proxima or other engine index param like VSAGE
  // build proxima index
  Status CreateProximaIndex(
      const vector_column_params::ReadOptions &read_options);

 protected:
  friend ProximaEngineHelper;
  core_interface::Index::Pointer index;
  FieldSchema field_schema_{};
  std::string index_file_path_{};


  std::string engine_name_ = "proxima";
  bool is_sparse_{false};  // TODO: eliminate the dynamic flag and make it
                           // static/template/seperate class
};


}  // namespace zvec


================================================
FILE: src/db/index/column/vector_column/vector_column_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <functional>
#include <memory>
#include <variant>
#include <vector>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/interface/index_param.h>
#include <zvec/db/query_params.h>
#include <zvec/db/type.h>
#include "db/index/common/index_filter.h"

namespace zvec {
class VectorColumnIndexer;

namespace vector_column_params {
struct DenseVector {
  const void *data;
};

struct SparseVector {
  uint32_t count;
  const void *indices;  // uint32
  const void *values;   // FP16/FP32
};

struct VectorData {
  std::variant<DenseVector, SparseVector> vector;
};


// VectorData with memory management
struct DenseVectorBuffer {
  std::string data;  // use string to manage memory
};

struct SparseVectorBuffer {
  std::string indices;  // uint32_t
  std::string values;

  using IndexType = uint32_t;
  uint32_t count() const {
    return indices.size() / sizeof(IndexType);
  }
};

struct VectorDataBuffer {
  std::variant<DenseVectorBuffer, SparseVectorBuffer> vector_buffer;
};


struct ReadOptions {
  bool use_mmap{true};
  bool create_new{false};
  bool read_only{false};
};

struct MergeOptions {
  uint32_t write_concurrency{1};
  ailego::ThreadPool *pool{nullptr};
};

struct GroupByParams {
  GroupByParams(uint32_t group_topk, uint32_t group_count,
                std::function<std::string(uint64_t key)> group_by)
      : group_topk(group_topk),
        group_count(group_count),
        group_by(std::move(group_by)) {}

  uint32_t group_topk{0};
  uint32_t group_count{0};
  std::function<std::string(uint64_t key)> group_by{};
};

struct RefinerParam {
  float scale_factor_{10};
  std::shared_ptr<VectorColumnIndexer> reference_indexer{nullptr};
};

// This is an internal version, while QueryParams in doc.h is an interface ver
struct QueryParams {
  DataType data_type{DataType::UNDEFINED};
  uint32_t dimension{0U};
  uint32_t topk{0U};
  mutable const IndexFilter *filter{nullptr};
  bool fetch_vector{false};
  zvec::QueryParams::Ptr query_params;
  std::unique_ptr<GroupByParams> group_by;
  // TODO: 1. should be uint32? 2. if no batch mode, change to optional<vector>
  std::vector<std::vector<uint64_t>> bf_pks{};

  std::shared_ptr<RefinerParam> refiner_param{nullptr};

  ailego::Params extra_params{};
};
}  // namespace vector_column_params
}  // namespace zvec


================================================
FILE: src/db/index/column/vector_column/vector_index_results.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstddef>
#include <memory>
#include <zvec/core/framework/index_document.h>
#include "db/common/typedef.h"
#include "db/index/column/common/index_results.h"

// TODO: eliminate aitheta2 dependency for decoupling

namespace zvec {

class VectorIndexResults : public IndexResults {
 public:
  class VectorIterator : public IndexResults::Iterator {
   public:
    VectorIterator(const VectorIndexResults *rs) : rs_(rs) {}

    VectorIterator(const VectorIndexResults *rs, uint32_t index)
        : rs_(rs), index_(index) {}

   public:
    idx_t doc_id() const override {
      return rs_->document(index_).key();
    }

    float score() const override {
      return rs_->document(index_).score();
    }

    void next() override {
      index_++;
    }

    bool valid() const override {
      return (index_ < rs_->count());
    }

    const vector_column_params::VectorData vector() const override {
      if (is_sparse()) {
        return vector_column_params::VectorData{
            vector_column_params::SparseVector{sparse_count(),
                                               sparse_indices().data(),
                                               sparse_values().data()}};
      }
      return vector_column_params::VectorData{
          vector_column_params::DenseVector{dense_vector()}};
    }

   private:
    const void *dense_vector() const {
      if (!rs_->reverted_vector_list_.empty()) {
        return rs_->reverted_vector_list_[index_].data();
      }
      return rs_->document(index_).vector();
    }
    uint32_t sparse_count() const {
      return rs_->document(index_).sparse_doc().sparse_count();
    }

    const std::string &sparse_indices() const {
      return rs_->document(index_).sparse_doc().sparse_indices();
    }

    const std::string &sparse_values() const {
      if (!rs_->reverted_sparse_values_list_.empty()) {
        return rs_->reverted_sparse_values_list_[index_];
      }
      return rs_->document(index_).sparse_doc().sparse_values();
    }

   private:
    const VectorIndexResults *rs_{nullptr};
    uint32_t index_{0U};
  };

  friend class VectorIterator;

 public:
  // VectorIndexResults(core::IndexDocumentList &&doc_list)
  //     : docs_(std::move(doc_list)) {}
  //
  // VectorIndexResults(core::IndexDocumentList &&doc_list,
  //                    std::vector<std::string> &&reverted_vector_list)
  //     : docs_(std::move(doc_list)),
  //       reverted_vector_list_(std::move(reverted_vector_list)) {}
  VectorIndexResults(bool is_sparse, core::IndexDocumentList &&doc_list,
                     std::vector<std::string> &&reverted_vector_list,
                     std::vector<std::string> &&reverted_sparse_values_list)
      : is_sparse_(is_sparse),
        docs_(std::move(doc_list)),
        reverted_vector_list_(std::move(reverted_vector_list)),
        reverted_sparse_values_list_(std::move(reverted_sparse_values_list)) {}

 public:
  IndexResults::IteratorUPtr create_iterator() override {
    auto ret = std::unique_ptr<VectorIterator>(new VectorIterator(this));
    ret->set_is_sparse(is_sparse_);
    return ret;
  }

  size_t count() const override {
    return docs_.size();
  }

 public:  // unique method
  core::IndexDocumentList &docs() {
    return docs_;
  }

  std::vector<std::string> &reverted_vector_list() {
    return reverted_vector_list_;
  }

  std::vector<std::string> &reverted_sparse_values_list() {
    return reverted_sparse_values_list_;
  }


 private:
  const core::IndexDocument &document(size_t index) const {
    return docs_[index];
  }

 private:
  bool is_sparse_;
  core::IndexDocumentList docs_{};
  std::vector<std::string> reverted_vector_list_{};
  std::vector<std::string> reverted_sparse_values_list_{};
};

class GroupVectorIndexResults : public IndexResults {
 public:
  class GroupVectorIterator : public IndexResults::Iterator {
   public:
    GroupVectorIterator(const GroupVectorIndexResults *rs) : rs_(rs) {}

   public:
    idx_t doc_id() const override {
      return rs_->document(group_index_, doc_index_).key();
    }

    float score() const override {
      return rs_->document(group_index_, doc_index_).score();
    }

    void next() override {
      doc_index_++;
      if (doc_index_ >= rs_->groups_[group_index_].docs().size()) {
        group_index_++;
        doc_index_ = 0;
      }
    }

    bool valid() const override {
      return group_index_ < rs_->groups_.size();
    }

    const vector_column_params::VectorData vector() const override {
      if (is_sparse()) {
        return vector_column_params::VectorData{
            vector_column_params::SparseVector{sparse_count(),
                                               sparse_indices().data(),
                                               sparse_values().data()}};
      }
      return vector_column_params::VectorData{
          vector_column_params::DenseVector{dense_vector()}};
    }

   private:
    const void *dense_vector() const {
      if (!rs_->reverted_vector_list_.empty()) {
        return rs_->reverted_vector_list_[group_index_][doc_index_].data();
      }
      return rs_->document(group_index_, doc_index_).vector();
    }

    uint32_t sparse_count() const {
      return rs_->document(group_index_, doc_index_)
          .sparse_doc()
          .sparse_count();
    }

    const std::string &sparse_indices() const {
      return rs_->document(group_index_, doc_index_)
          .sparse_doc()
          .sparse_indices();
    }

    const std::string &sparse_values() const {
      if (!rs_->reverted_sparse_values_list_.empty()) {
        return rs_->reverted_sparse_values_list_[group_index_][doc_index_];
      }
      return rs_->document(group_index_, doc_index_)
          .sparse_doc()
          .sparse_values();
    }

    const std::string &group_id() const override {
      return rs_->groups_[group_index_].group_id();
    }

   private:
    const GroupVectorIndexResults *rs_{nullptr};
    uint32_t group_index_{0U};
    uint32_t doc_index_{0U};
  };

  friend class GroupVectorIterator;

 public:
  GroupVectorIndexResults(core::IndexGroupDocumentList &&group_list)
      : groups_(std::move(group_list)) {
    init_count();
  }

  GroupVectorIndexResults(
      core::IndexGroupDocumentList &&group_list,
      std::vector<std::vector<std::string>> &&reverted_vector_list)
      : groups_(std::move(group_list)),
        reverted_vector_list_(std::move(reverted_vector_list)) {
    init_count();
  }

  GroupVectorIndexResults(
      core::IndexGroupDocumentList &&group_list,
      std::vector<std::vector<std::string>> &&reverted_vector_list,
      std::vector<std::vector<std::string>> &&reverted_sparse_values_list)
      : groups_(std::move(group_list)),
        reverted_vector_list_(std::move(reverted_vector_list)),
        reverted_sparse_values_list_(std::move(reverted_sparse_values_list)) {}

 public:
  IndexResults::IteratorUPtr create_iterator() override {
    return std::unique_ptr<GroupVectorIterator>(new GroupVectorIterator(this));
  }


  size_t count() const override {
    return count_;
  }

 public:  // unique method
  core::IndexGroupDocumentList &groups() {
    return groups_;
  }

 private:
  const core::IndexDocument &document(size_t group_index,
                                      size_t doc_index) const {
    return groups_[group_index].docs()[doc_index];
  }

  void init_count() {
    count_ = 0;
    for (const auto &group : groups_) {
      count_ += group.docs().size();
    }
  }

 private:
  core::IndexGroupDocumentList groups_{};
  std::vector<std::vector<std::string>> reverted_vector_list_{};
  std::vector<std::vector<std::string>> reverted_sparse_values_list_{};
  size_t count_{0};
};


}  // namespace zvec


================================================
FILE: src/db/index/common/delete_store.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once


#include <memory>
#include <string>
#include "db/common/concurrent_roaring_bitmap.h"
#include "index_filter.h"


namespace zvec {


class DeleteStore : public std::enable_shared_from_this<DeleteStore> {
 public:
  using Ptr = std::shared_ptr<DeleteStore>;

  explicit DeleteStore(std::string collection_name)
      : collection_name_(std::move(collection_name)) {};

  ~DeleteStore() {
    LOG_INFO("Closed delete store");
  }

  static Ptr CreateAndLoad(std::string collection_name,
                           const std::string &file_path) {
    if (file_path.empty()) {
      LOG_ERROR("File path is empty");
      return nullptr;
    }
    DeleteStore::Ptr ptr =
        std::make_shared<DeleteStore>(std::move(collection_name));
    if (ptr->load(file_path).ok()) {
      return ptr;
    } else {
      return nullptr;
    }
  }


 private:
  DeleteStore(const DeleteStore &) = delete;
  DeleteStore &operator=(const DeleteStore &) = delete;
  DeleteStore &operator=(DeleteStore &&) = delete;


 public:
  class Filter : public IndexFilter {
   public:
    explicit Filter(std::shared_ptr<const DeleteStore> delete_store)
        : delete_store_(std::move(delete_store)) {}

    bool is_filtered(uint64_t id) const override {
      return delete_store_->is_deleted(id);
    }

   private:
    const std::shared_ptr<const DeleteStore> delete_store_;
  };

  Status load(const std::string &file_path) {
    Status status = bitmap_.deserialize(file_path);
    if (status.ok()) {
      empty_ = bitmap_.cardinality() == 0 ? true : false;
      LOG_INFO("Opened delete store, count[%lu]", bitmap_.cardinality());
    } else {
      LOG_ERROR("Failed to load delete store from file[%s]", file_path.c_str());
    }
    return status;
  }

  Status flush(const std::string &file_path) {
    Status status = bitmap_.serialize(file_path, true);
    if (status.ok()) {
      LOG_DEBUG("Flushed delete store to file[%s]", file_path.c_str());
      modified_since_last_flush_ = false;
    } else {
      LOG_ERROR("Failed to flush delete store to file[%s]", file_path.c_str());
    }
    return status;
  }

  void mark_deleted(uint64_t doc_id) {
    bitmap_.add(doc_id);
    empty_ = false;
    modified_since_last_flush_ = true;
  }

  bool is_deleted(uint64_t doc_id) const {
    return bitmap_.contains(doc_id);
  }

  std::shared_ptr<IndexFilter> make_filter() const {
    return empty_ ? nullptr : std::make_shared<Filter>(shared_from_this());
  };

  size_t storage_size_in_bytes() const {
    return bitmap_.storage_size_in_bytes();
  }

  size_t count() const {
    return bitmap_.cardinality();
  }

  size_t range_count(uint64_t min_doc_id, uint64_t max_doc_id) const {
    return bitmap_.range_cardinality(min_doc_id, max_doc_id);
  }

  const std::string &collection_name() const {
    return collection_name_;
  }

  bool modified_since_last_flush() const {
    return modified_since_last_flush_;
  }

  Ptr clone() const {
    auto ptr = std::make_shared<DeleteStore>(collection_name_);
    ptr->bitmap_ = bitmap_;
    ptr->empty_ = bitmap_.cardinality() == 0 ? true : false;
    ptr->modified_since_last_flush_ = false;
    return ptr;
  }

  bool empty() const {
    return empty_;
  }

 private:
  using FILE = ailego::File;

  const std::string collection_name_{};
  ConcurrentRoaringBitmap64 bitmap_{};
  bool empty_{true};
  bool modified_since_last_flush_{false};
};


}  // namespace zvec

================================================
FILE: src/db/index/common/doc.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <regex>
#include <stdexcept>
#include <zvec/db/doc.h>
#include "db/common/constants.h"
#include "db/index/common/type_helper.h"

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define IS_BIG_ENDIAN 1
#else
#define IS_BIG_ENDIAN 0
#endif


namespace zvec {

enum ValueType : uint8_t {
  TYPE_EMPTY = 0,
  TYPE_BOOL = 1,
  TYPE_INT32 = 2,
  TYPE_UINT32 = 3,
  TYPE_INT64 = 4,
  TYPE_UINT64 = 5,
  TYPE_FLOAT = 6,
  TYPE_DOUBLE = 7,
  TYPE_STRING = 8,
  TYPE_VECTOR_BOOL = 9,
  TYPE_VECTOR_INT8 = 10,
  TYPE_VECTOR_INT16 = 11,
  TYPE_VECTOR_INT32 = 12,
  TYPE_VECTOR_INT64 = 13,
  TYPE_VECTOR_UINT32 = 14,
  TYPE_VECTOR_UINT64 = 15,
  TYPE_VECTOR_FLOAT16 = 16,
  TYPE_VECTOR_FLOAT = 17,
  TYPE_VECTOR_DOUBLE = 18,
  TYPE_VECTOR_STRING = 19,
  TYPE_VECTOR_PAIR_INT_FLOAT = 20,
  TYPE_VECTOR_PAIR_INT_FLOAT16 = 21,
};

std::string get_value_type_name(const Doc::Value &value, bool is_vector) {
  return std::visit(
      [&](const auto &v) -> std::string {
        using T = std::decay_t<decltype(v)>;
        if constexpr (std::is_same_v<T, std::monostate>) {
          return "EMPTY";
        } else if constexpr (std::is_same_v<T, bool>) {
          return "BOOL";
        } else if constexpr (std::is_same_v<T, int32_t>) {
          return "INT32";
        } else if constexpr (std::is_same_v<T, uint32_t>) {
          return "UINT32";
        } else if constexpr (std::is_same_v<T, int64_t>) {
          return "INT64";
        } else if constexpr (std::is_same_v<T, uint64_t>) {
          return "UINT64";
        } else if constexpr (std::is_same_v<T, float>) {
          return "FLOAT";
        } else if constexpr (std::is_same_v<T, double>) {
          return "DOUBLE";
        } else if constexpr (std::is_same_v<T, std::string>) {
          return "STRING";
        } else if constexpr (std::is_same_v<T, std::vector<bool>>) {
          return "ARRAY_BOOL";
        } else if constexpr (std::is_same_v<T, std::vector<int8_t>>) {
          return "VECTOR_INT8";
        } else if constexpr (std::is_same_v<T, std::vector<int16_t>>) {
          return "VECTOR_INT16";
        } else if constexpr (std::is_same_v<T, std::vector<int32_t>>) {
          return is_vector ? "VECTOR_INT32" : "ARRAY_INT32";
        } else if constexpr (std::is_same_v<T, std::vector<int64_t>>) {
          return is_vector ? "VECTOR_INT64" : "ARRAY_INT64";
        } else if constexpr (std::is_same_v<T, std::vector<uint32_t>>) {
          return is_vector ? "VECTOR_UINT32" : "ARRAY_UINT32";
        } else if constexpr (std::is_same_v<T, std::vector<uint64_t>>) {
          return is_vector ? "VECTOR_UINT64" : "ARRAY_UINT64";
        } else if constexpr (std::is_same_v<T, std::vector<float16_t>>) {
          return "VECTOR_FP16";
        } else if constexpr (std::is_same_v<T, std::vector<float>>) {
          return "VECTOR_FP32";
        } else if constexpr (std::is_same_v<T, std::vector<double>>) {
          return "VECTOR_FP64";
        } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {
          return "ARRAY_STRING";
        } else if constexpr (std::is_same_v<T, std::pair<std::vector<uint32_t>,
                                                         std::vector<float>>>) {
          return "SPARSE_VECTOR_FP32";
        } else if constexpr (std::is_same_v<
                                 T, std::pair<std::vector<uint32_t>,
                                              std::vector<float16_t>>>) {
          return "SPARSE_VECTOR_FP16";
        } else {
          return "unknown type";
        }
      },
      value);
}

template <typename T>
T byte_swap(T value) {
  if constexpr (std::is_same_v<T, float16_t>) {
    uint16_t val = *reinterpret_cast<uint16_t *>(&value);
    val = __builtin_bswap16(val);
    return *reinterpret_cast<float16_t *>(&val);
  } else if constexpr (sizeof(T) == 1) {
    return value;
  } else if constexpr (sizeof(T) == 2) {
    return (value << 8) | ((value >> 8) & 0xFF);
  } else if constexpr (sizeof(T) == 4) {
    return __builtin_bswap32(value);
  } else if constexpr (sizeof(T) == 8) {
    return __builtin_bswap64(value);
  } else {
    T result = 0;
    for (size_t i = 0; i < sizeof(T); ++i) {
      result |= ((value >> (i * 8)) & 0xFF) << ((sizeof(T) - 1 - i) * 8);
    }
    return result;
  }
}

template <typename T>
void write_value_to_buffer(std::vector<uint8_t> &buffer, const T &value) {
  T write_value = value;
  if (IS_BIG_ENDIAN) {
    write_value = byte_swap<T>(value);
  }
  const uint8_t *bytes = reinterpret_cast<const uint8_t *>(&write_value);
  buffer.insert(buffer.end(), bytes, bytes + sizeof(T));
}

template <typename T>
T read_value_from_buffer(const uint8_t *&data) {
  T value;
  std::memcpy(&value, data, sizeof(T));
  data += sizeof(T);

  if (IS_BIG_ENDIAN) {
    value = byte_swap<T>(value);
  }
  return value;
}


void Doc::write_to_buffer(std::vector<uint8_t> &buffer, const void *src,
                          size_t size) {
  const uint8_t *bytes = static_cast<const uint8_t *>(src);
  buffer.insert(buffer.end(), bytes, bytes + size);
}

void Doc::read_from_buffer(const uint8_t *&data, void *dest, size_t size) {
  std::memcpy(dest, data, size);
  data += size;
}

void Doc::serialize_value(std::vector<uint8_t> &buffer, const Value &value) {
  std::visit(
      [&buffer](const auto &v) {
        using T = std::decay_t<decltype(v)>;

        if constexpr (std::is_same_v<T, std::monostate>) {
          uint8_t type = TYPE_EMPTY;
          write_to_buffer(buffer, &type, sizeof(type));
        } else if constexpr (std::is_same_v<T, bool>) {
          uint8_t type = TYPE_BOOL;
          write_to_buffer(buffer, &type, sizeof(type));
          write_to_buffer(buffer, &v, sizeof(v));
        } else if constexpr (std::is_same_v<T, int32_t>) {
          uint8_t type = TYPE_INT32;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<int32_t>(buffer, v);
        } else if constexpr (std::is_same_v<T, int64_t>) {
          uint8_t type = TYPE_INT64;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<int64_t>(buffer, v);
        } else if constexpr (std::is_same_v<T, uint32_t>) {
          uint8_t type = TYPE_UINT32;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<uint32_t>(buffer, v);
        } else if constexpr (std::is_same_v<T, uint64_t>) {
          uint8_t type = TYPE_UINT64;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<uint64_t>(buffer, v);
        } else if constexpr (std::is_same_v<T, float>) {
          uint8_t type = TYPE_FLOAT;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<float>(buffer, v);
        } else if constexpr (std::is_same_v<T, double>) {
          uint8_t type = TYPE_DOUBLE;
          write_to_buffer(buffer, &type, sizeof(type));
          write_value_to_buffer<double>(buffer, v);
        } else if constexpr (std::is_same_v<T, std::string>) {
          uint8_t type = TYPE_STRING;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          write_to_buffer(buffer, v.data(), len);
        } else if constexpr (std::is_same_v<T, std::vector<bool>>) {
          uint8_t type = TYPE_VECTOR_BOOL;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          for (bool b : v) {
            write_to_buffer(buffer, &b, sizeof(b));
          }
        } else if constexpr (std::is_same_v<T, std::vector<int8_t>>) {
          uint8_t type = TYPE_VECTOR_INT8;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          write_to_buffer(buffer, v.data(), len * sizeof(int8_t));
        } else if constexpr (std::is_same_v<T, std::vector<int16_t>>) {
          uint8_t type = TYPE_VECTOR_INT16;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              int16_t swapped = byte_swap<int16_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(int16_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<int32_t>>) {
          uint8_t type = TYPE_VECTOR_INT32;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              int32_t swapped = byte_swap<int32_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(int32_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<int64_t>>) {
          uint8_t type = TYPE_VECTOR_INT64;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              int64_t swapped = byte_swap<int64_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(int64_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<uint32_t>>) {
          uint8_t type = TYPE_VECTOR_UINT32;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              uint32_t swapped = byte_swap<uint32_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(uint32_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<uint64_t>>) {
          uint8_t type = TYPE_VECTOR_UINT64;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              uint64_t swapped = byte_swap<uint64_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(uint64_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<float>>) {
          uint8_t type = TYPE_VECTOR_FLOAT;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              float swapped = byte_swap<float>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(float));
          }
        } else if constexpr (std::is_same_v<T, std::vector<double>>) {
          uint8_t type = TYPE_VECTOR_DOUBLE;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              double swapped = byte_swap<double>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(double));
          }
        } else if constexpr (std::is_same_v<T, std::vector<float16_t>>) {
          uint8_t type = TYPE_VECTOR_FLOAT16;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &val : v) {
              float16_t swapped = byte_swap<float16_t>(val);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            write_to_buffer(buffer, v.data(), len * sizeof(float16_t));
          }
        } else if constexpr (std::is_same_v<T, std::vector<std::string>>) {
          uint8_t type = TYPE_VECTOR_STRING;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          for (const auto &s : v) {
            uint32_t str_len = static_cast<uint32_t>(s.size());
            write_value_to_buffer<uint32_t>(buffer, str_len);
            write_to_buffer(buffer, s.data(), str_len);
          }
        } else if constexpr (std::is_same_v<T, std::pair<std::vector<uint32_t>,
                                                         std::vector<float>>>) {
          uint8_t type = TYPE_VECTOR_PAIR_INT_FLOAT;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.first.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &p : v.first) {
              uint32_t swapped = byte_swap<uint32_t>(p);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            for (const auto &p : v.first) {
              write_to_buffer(buffer, &p, sizeof(p));
            }
          }
          len = static_cast<uint32_t>(v.second.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &p : v.second) {
              float swapped = byte_swap<float>(p);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            for (const auto &p : v.second) {
              write_to_buffer(buffer, &p, sizeof(p));
            }
          }
        } else if constexpr (std::is_same_v<
                                 T, std::pair<std::vector<uint32_t>,
                                              std::vector<float16_t>>>) {
          uint8_t type = TYPE_VECTOR_PAIR_INT_FLOAT16;
          write_to_buffer(buffer, &type, sizeof(type));
          uint32_t len = static_cast<uint32_t>(v.first.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &p : v.first) {
              uint32_t swapped = byte_swap<uint32_t>(p);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            for (const auto &p : v.first) {
              write_to_buffer(buffer, &p, sizeof(p));
            }
          }
          len = static_cast<uint32_t>(v.second.size());
          write_value_to_buffer<uint32_t>(buffer, len);
          if (IS_BIG_ENDIAN) {
            for (const auto &p : v.second) {
              float16_t swapped = byte_swap<float16_t>(p);
              write_to_buffer(buffer, &swapped, sizeof(swapped));
            }
          } else {
            for (const auto &p : v.second) {
              write_to_buffer(buffer, &p, sizeof(p));
            }
          }
        }
      },
      value);
}


Doc::Value Doc::deserialize_value(const uint8_t *&data) {
  uint8_t type;
  read_from_buffer(data, &type, sizeof(type));

  switch (type) {
    case TYPE_EMPTY: {
      return std::monostate{};
    }
    case TYPE_BOOL: {
      bool v;
      read_from_buffer(data, &v, sizeof(v));
      return v;
    }
    case TYPE_INT32: {
      return read_value_from_buffer<int32_t>(data);
    }
    case TYPE_INT64: {
      return read_value_from_buffer<int64_t>(data);
    }
    case TYPE_UINT32: {
      return read_value_from_buffer<uint32_t>(data);
    }
    case TYPE_UINT64: {
      return read_value_from_buffer<uint64_t>(data);
    }
    case TYPE_FLOAT: {
      return read_value_from_buffer<float>(data);
    }
    case TYPE_DOUBLE: {
      return read_value_from_buffer<double>(data);
    }
    case TYPE_STRING: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::string v(reinterpret_cast<const char *>(data), len);
      data += len;
      return v;
    }
    case TYPE_VECTOR_BOOL: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<bool> v;
      v.reserve(len);
      for (uint32_t i = 0; i < len; ++i) {
        bool b;
        read_from_buffer(data, &b, sizeof(b));
        v.push_back(b);
      }
      return v;
    }
    case TYPE_VECTOR_INT8: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<int8_t> v(len);
      read_from_buffer(data, v.data(), len * sizeof(int8_t));
      return v;
    }
    case TYPE_VECTOR_INT16: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<int16_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<int16_t>(read_value_from_buffer<int16_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(int16_t));
      }
      return v;
    }
    case TYPE_VECTOR_INT32: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<int32_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<int32_t>(read_value_from_buffer<int32_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(int32_t));
      }
      return v;
    }
    case TYPE_VECTOR_INT64: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<int64_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<int64_t>(read_value_from_buffer<int64_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(int64_t));
      }
      return v;
    }
    case TYPE_VECTOR_UINT32: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<uint32_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(uint32_t));
      }
      return v;
    }
    case TYPE_VECTOR_UINT64: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<uint64_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<uint64_t>(read_value_from_buffer<uint64_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(uint64_t));
      }
      return v;
    }
    case TYPE_VECTOR_FLOAT: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<float> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<float>(read_value_from_buffer<float>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(float));
      }
      return v;
    }
    case TYPE_VECTOR_DOUBLE: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<double> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<double>(read_value_from_buffer<double>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(double));
      }
      return v;
    }
    case TYPE_VECTOR_FLOAT16: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<float16_t> v(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v[i] = byte_swap<float16_t>(read_value_from_buffer<float16_t>(data));
        }
      } else {
        read_from_buffer(data, v.data(), len * sizeof(float16_t));
      }
      return v;
    }
    case TYPE_VECTOR_STRING: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::vector<std::string> v;
      v.reserve(len);
      for (uint32_t i = 0; i < len; ++i) {
        uint32_t str_len = read_value_from_buffer<uint32_t>(data);
        std::string s(reinterpret_cast<const char *>(data), str_len);
        data += str_len;
        v.push_back(s);
      }
      return v;
    }
    case TYPE_VECTOR_PAIR_INT_FLOAT: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::pair<std::vector<uint32_t>, std::vector<float>> v;
      v.first.reserve(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v.first.push_back(
              byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data)));
        }
      } else {
        for (uint32_t i = 0; i < len; ++i) {
          uint32_t first;
          read_from_buffer(data, &first, sizeof(first));
          v.first.push_back(first);
        }
      }
      len = read_value_from_buffer<uint32_t>(data);
      v.second.reserve(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v.second.push_back(
              byte_swap<float>(read_value_from_buffer<float>(data)));
        }
      } else {
        for (uint32_t i = 0; i < len; ++i) {
          float second;
          read_from_buffer(data, &second, sizeof(second));
          v.second.push_back(second);
        }
      }
      return v;
    }
    case TYPE_VECTOR_PAIR_INT_FLOAT16: {
      uint32_t len = read_value_from_buffer<uint32_t>(data);
      std::pair<std::vector<uint32_t>, std::vector<float16_t>> v;
      v.first.reserve(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v.first.push_back(
              byte_swap<uint32_t>(read_value_from_buffer<uint32_t>(data)));
        }
      } else {
        for (uint32_t i = 0; i < len; ++i) {
          uint32_t first;
          read_from_buffer(data, &first, sizeof(first));
          v.first.push_back(first);
        }
      }
      len = read_value_from_buffer<uint32_t>(data);
      v.second.reserve(len);
      if (IS_BIG_ENDIAN) {
        for (uint32_t i = 0; i < len; ++i) {
          v.second.push_back(
              byte_swap<float16_t>(read_value_from_buffer<float16_t>(data)));
        }
      } else {
        for (uint32_t i = 0; i < len; ++i) {
          float16_t second;
          read_from_buffer(data, &second, sizeof(second));
          v.second.push_back(second);
        }
      }
      return v;
    }

    default:
      throw std::runtime_error("Unknown value type: " + std::to_string(type));
  }
}

std::vector<uint8_t> Doc::serialize() const {
  std::vector<uint8_t> buffer;
  uint32_t pk_len = static_cast<uint32_t>(pk_.size());
  write_to_buffer(buffer, &pk_len, sizeof(pk_len));
  write_to_buffer(buffer, pk_.data(), pk_len);

  write_to_buffer(buffer, &score_, sizeof(score_));
  write_to_buffer(buffer, &doc_id_, sizeof(doc_id_));
  write_to_buffer(buffer, &op_, sizeof(op_));

  uint32_t field_count = static_cast<uint32_t>(fields_.size());
  write_to_buffer(buffer, &field_count, sizeof(field_count));

  for (const auto &[field_name, value] : fields_) {
    uint32_t name_len = static_cast<uint32_t>(field_name.size());
    write_to_buffer(buffer, &name_len, sizeof(name_len));
    write_to_buffer(buffer, field_name.data(), name_len);

    serialize_value(buffer, value);
  }

  return buffer;
}

Doc::Ptr Doc::deserialize(const uint8_t *data, size_t /*size*/) {
  const uint8_t *ptr = data;
  Doc::Ptr doc = std::make_shared<Doc>();

  uint32_t pk_len = read_value_from_buffer<uint32_t>(ptr);
  std::string pk(reinterpret_cast<const char *>(ptr), pk_len);
  ptr += pk_len;
  doc->set_pk(pk);

  float score = read_value_from_buffer<float>(ptr);
  doc->set_score(score);

  uint64_t doc_id = read_value_from_buffer<uint64_t>(ptr);
  doc->set_doc_id(doc_id);

  Operator op;
  read_from_buffer(ptr, &op, sizeof(op));
  doc->set_operator(op);

  uint32_t field_count = read_value_from_buffer<uint32_t>(ptr);

  for (uint32_t i = 0; i < field_count; ++i) {
    uint32_t name_len = read_value_from_buffer<uint32_t>(ptr);
    std::string field_name(reinterpret_cast<const char *>(ptr), name_len);
    ptr += name_len;

    Doc::Value value = deserialize_value(ptr);
    doc->fields_[field_name] = value;
  }

  return doc;
}

Status Doc::validate(const CollectionSchema::Ptr &schema,
                     bool is_update) const {
  if (!schema) {
    return Status::InternalError("doc validate failed: schema is null");
  }

  if (pk_.empty()) {
    return Status::InvalidArgument("doc validate failed: doc_id is not set");
  }

  if (!std::regex_match(pk_, DOC_PK_REGEX)) {
    return Status::InvalidArgument("doc validate failed: doc_id[", pk_,
                                   "] cannot pass the regex verification");
  }

  // check doc fields match schema
  for (auto &[name, value] : fields_) {
    if (!schema->has_field(name)) {
      return Status::InvalidArgument("doc validate failed: field[", name,
                                     "] does not exist in collection's schema");
    }
  }

  const auto &fields = schema->fields();
  for (auto const &field_schema : fields) {
    auto field_name = field_schema->name();
    auto field_pair = fields_.find(field_name);
    if (field_pair == fields_.end()) {
      if (field_schema->nullable() || is_update) {
        continue;
      }
      return Status::InvalidArgument(
          "doc validate failed: field[", field_name,
          "] is configured not nullable, but doc does not contain this field");
    } else {
      if (std::holds_alternative<std::monostate>(field_pair->second)) {
        if (field_schema->nullable()) {
          continue;
        }
        return Status::InvalidArgument(
            "doc validate failed: field[", field_name,
            "] is configured not nullable, but doc's field value is empty");
      }
    }

    const Value &field_value = field_pair->second;
    DataType expected_type = field_schema->data_type();
    bool type_match = true;
    uint32_t value_dimension = 0;

    switch (expected_type) {
      case DataType::BINARY:
        type_match = std::holds_alternative<std::string>(field_value);
        break;
      case DataType::STRING:
        type_match = std::holds_alternative<std::string>(field_value);
        break;
      case DataType::BOOL:
        type_match = std::holds_alternative<bool>(field_value);
        break;
      case DataType::INT32:
        type_match = std::holds_alternative<int32_t>(field_value);
        break;
      case DataType::UINT32:
        type_match = std::holds_alternative<uint32_t>(field_value);
        break;
      case DataType::INT64:
        type_match = std::holds_alternative<int64_t>(field_value);
        break;
      case DataType::UINT64:
        type_match = std::holds_alternative<uint64_t>(field_value);
        break;
      case DataType::FLOAT:
        type_match = std::holds_alternative<float>(field_value);
        break;
      case DataType::DOUBLE:
        type_match = std::holds_alternative<double>(field_value);
        break;
      case DataType::ARRAY_BINARY:
        type_match =
            std::holds_alternative<std::vector<std::string>>(field_value);
        break;
      case DataType::ARRAY_STRING:
        type_match =
            std::holds_alternative<std::vector<std::string>>(field_value);
        break;
      case DataType::ARRAY_BOOL:
        type_match = std::holds_alternative<std::vector<bool>>(field_value);
        break;
      case DataType::ARRAY_INT32:
        type_match = std::holds_alternative<std::vector<int32_t>>(field_value);
        break;
      case DataType::ARRAY_INT64:
        type_match = std::holds_alternative<std::vector<int64_t>>(field_value);
        break;
      case DataType::ARRAY_UINT32:
        type_match = std::holds_alternative<std::vector<uint32_t>>(field_value);
        break;
      case DataType::ARRAY_UINT64:
        type_match = std::holds_alternative<std::vector<uint64_t>>(field_value);
        break;
      case DataType::ARRAY_FLOAT:
        type_match = std::holds_alternative<std::vector<float>>(field_value);
        break;
      case DataType::ARRAY_DOUBLE:
        type_match = std::holds_alternative<std::vector<double>>(field_value);
        break;
      case DataType::VECTOR_BINARY32: {
        type_match = std::holds_alternative<std::vector<uint32_t>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<uint32_t>>(field_value).size();
        }
        break;
      }
      case DataType::VECTOR_BINARY64: {
        type_match = std::holds_alternative<std::vector<uint64_t>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<uint64_t>>(field_value).size();
        }
        break;
      }
      case DataType::VECTOR_FP16: {
        type_match =
            std::holds_alternative<std::vector<float16_t>>(field_value);
        if (type_match) {
          value_dimension =
              std::get<std::vector<float16_t>>(field_value).size();
        }
        break;
      }
      case DataType::VECTOR_FP32: {
        type_match = std::holds_alternative<std::vector<float>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<float>>(field_value).size();
        }
        break;
      }
      case DataType::VECTOR_FP64: {
        type_match = std::holds_alternative<std::vector<double>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<double>>(field_value).size();
        }
        break;
      }
      // case DataType::VECTOR_INT4:
      //   type_match =
      //   std::holds_alternative<std::vector<int8_t>>(field_value); break;
      case DataType::VECTOR_INT8: {
        type_match = std::holds_alternative<std::vector<int8_t>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<int8_t>>(field_value).size();
        }
        break;
      }
      case DataType::VECTOR_INT16: {
        type_match = std::holds_alternative<std::vector<int16_t>>(field_value);
        if (type_match) {
          value_dimension = std::get<std::vector<int16_t>>(field_value).size();
        }
        break;
      }
      case DataType::SPARSE_VECTOR_FP16: {
        type_match = std::holds_alternative<
            std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(
            field_value);
        if (type_match) {
          auto [sparse_indices, sparse_values] = std::get<
              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(
              field_value);
          if (sparse_values.size() != sparse_indices.size()) {
            return Status::InvalidArgument(
                "doc validate failed: field[", field_name,
                "]'s sparse vector indices and values size not match");
          }
          if (sparse_indices.size() > kSparseMaxDimSize) {
            return Status::InvalidArgument(
                "doc validate failed: vector[", field_name,
                "], the number of sparse indices exceeds the maximum limit ",
                kSparseMaxDimSize);
          }
        }
        break;
      }
      case DataType::SPARSE_VECTOR_FP32: {
        type_match = std::holds_alternative<
            std::pair<std::vector<uint32_t>, std::vector<float>>>(field_value);
        if (type_match) {
          auto &[sparse_indices, sparse_values] =
              std::get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
                  field_value);
          if (sparse_values.size() != sparse_indices.size()) {
            return Status::InvalidArgument(
                "doc validate failed: field[", field_name,
                "]'s sparse vector indices and values size not match");
          }
          if (sparse_indices.size() > kSparseMaxDimSize) {
            return Status::InvalidArgument(
                "doc validate failed: vector[", field_name,
                "], the number of sparse indices exceeds the maximum limit ",
                kSparseMaxDimSize);
          }
        }
        break;
      }
      default:
        return Status::InvalidArgument("doc validate failed: field[",
                                       field_name,
                                       "]'s value type is not supported");
        break;
    }

    if (!type_match) {
      return Status::InvalidArgument(
          "doc validate failed: field[", field_name,
          "]'s value type mismatch, it should be ",
          DataTypeCodeBook::AsString(expected_type), ", but got type: ",
          get_value_type_name(field_value, field_schema->is_vector_field()));
    }
    if (field_schema->is_dense_vector()) {
      if (value_dimension != field_schema->dimension()) {
        return Status::InvalidArgument(
            "doc validate failed: field[", field_name,
            "]'s dimension mismatch, it should be ", field_schema->dimension(),
            ", but got dimension: ", value_dimension);
      }
    }
  }
  return Status::OK();
}

size_t Doc::memory_usage() const {
  // Base size of the object itself
  size_t usage = sizeof(Doc);

  // Calculate memory used by pk_ string
  usage += pk_.capacity();

  // Calculate memory used by fields_ hash map structure
  usage += fields_.bucket_count() *
           sizeof(std::unordered_map<std::string, Value>::value_type *);

  // Iterate through all fields to calculate their actual memory usage
  for (const auto &pair : fields_) {
    const auto &key = pair.first;
    const auto &value = pair.second;

    // Memory for the key (string)
    usage += key.capacity();

    // Memory for the value (based on actual variant type)
    usage += [&value]() -> size_t {
      switch (value.index()) {
        case 0:      // std::monostate
          return 0;  // No additional memory needed

        case 1:      // bool
        case 2:      // int32_t
        case 3:      // uint32_t
        case 4:      // int64_t
        case 5:      // uint64_t
        case 6:      // float
        case 7:      // double
          return 0;  // Basic types are already allocated within the variant

        case 8:  // std::string
          return std::get<std::string>(value).capacity();

        case 9:  // std::vector<bool>
          return std::get<std::vector<bool>>(value).size() * sizeof(bool);

        case 10:  // std::vector<int8_t>
          return std::get<std::vector<int8_t>>(value).capacity() *
                 sizeof(int8_t);

        case 11:  // std::vector<int16_t>
          return std::get<std::vector<int16_t>>(value).capacity() *
                 sizeof(int16_t);

        case 12:  // std::vector<int32_t>
          return std::get<std::vector<int32_t>>(value).capacity() *
                 sizeof(int32_t);

        case 13:  // std::vector<int64_t>
          return std::get<std::vector<int64_t>>(value).capacity() *
                 sizeof(int64_t);

        case 14:  // std::vector<uint32_t>
          return std::get<std::vector<uint32_t>>(value).capacity() *
                 sizeof(uint32_t);

        case 15:  // std::vector<uint64_t>
          return std::get<std::vector<uint64_t>>(value).capacity() *
                 sizeof(uint64_t);

        case 16:  // std::vector<float16_t>
          return std::get<std::vector<float16_t>>(value).capacity() *
                 sizeof(float16_t);

        case 17:  // std::vector<float>
          return std::get<std::vector<float>>(value).capacity() * sizeof(float);

        case 18:  // std::vector<double>
          return std::get<std::vector<double>>(value).capacity() *
                 sizeof(double);

        case 19:  // std::vector<std::string>
        {
          size_t vec_usage =
              std::get<std::vector<std::string>>(value).capacity() *
              sizeof(std::string);
          for (const auto &str : std::get<std::vector<std::string>>(value)) {
            vec_usage += str.capacity();
          }
          return vec_usage;
        }

        case 20:  // std::pair<std::vector<uint32_t>, std::vector<float>>
        {
          const auto &pair_val =
              std::get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
                  value);
          return pair_val.first.capacity() * sizeof(uint32_t) +
                 pair_val.second.capacity() * sizeof(float);
        }

        case 21:  // std::pair<std::vector<uint32_t>, std::vector<float16_t>>
        {
          const auto &pair_val = std::get<
              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(value);
          return pair_val.first.capacity() * sizeof(uint32_t) +
                 pair_val.second.capacity() * sizeof(float16_t);
        }

        default:
          return 0;
      }
    }();
  }

  return usage;
}

template <typename T>
std::string vec_to_string(const std::vector<T> &v) {
  std::ostringstream oss;
  oss << "[";
  for (size_t i = 0; i < v.size(); ++i) {
    if (i > 0) oss << ", ";
    oss << +v[i];  // + from print as char
  }
  oss << "]";
  return oss.str();
}

template <class... Ts>
struct overloaded : Ts... {
  using Ts::operator()...;
};
template <class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;

std::string Doc::to_detail_string() const {
  std::stringstream oss;
  oss << "[op:" << (uint32_t)op_ << ", doc_id: " << doc_id_
      << ", score: " << score_ << ", pk: " << pk_
      << ", fields: " << fields_.size() << "]";
  oss << "{";
  bool first_field = true;
  for (const auto &[key, val] : fields_) {
    if (!first_field) oss << ", ";
    first_field = false;

    oss << "\"" << key << "\": ";

    std::visit(
        overloaded{
            [&](std::monostate) { oss << "null"; },
            [&](bool b) { oss << (b ? "true" : "false"); },
            [&](int32_t i) { oss << i; },
            [&](uint32_t u) { oss << u; },
            [&](int64_t i) { oss << i; },
            [&](uint64_t u) { oss << u; },
            [&](float f) { oss << f; },
            [&](double d) { oss << d; },
            [&](const std::string &s) { oss << "\"" << s << "\""; },
            [&](const std::vector<bool> &vb) { oss << vec_to_string(vb); },
            [&](const std::vector<int32_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<int8_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<int16_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<uint32_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<int64_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<uint64_t> &v) { oss << vec_to_string(v); },
            [&](const std::vector<float> &v) { oss << vec_to_string(v); },
            [&](const std::vector<double> &v) { oss << vec_to_string(v); },
            [&](const std::vector<std::string> &v) {
              oss << "[";
              for (size_t i = 0; i < v.size(); ++i) {
                if (i > 0) oss << ", ";
                oss << "\"" << v[i] << "\"";
              }
              oss << "]";
            },
            [&](const std::vector<float16_t> &v) {
              oss << "[";
              for (size_t i = 0; i < v.size(); ++i) {
                if (i > 0) oss << ", ";
                oss << static_cast<float>(v[i]);  // print in float
              }
              oss << "]";
            },
            [&](const std::pair<std::vector<uint32_t>, std::vector<float>> &p) {
              oss << "{first:" << vec_to_string(p.first)
                  << ", second:" << vec_to_string(p.second) << "}";
            },
            [&](const std::pair<std::vector<uint32_t>, std::vector<float16_t>>
                    &p) {
              oss << "{first:" << vec_to_string(p.first) << ", second:[";
              for (size_t i = 0; i < p.second.size(); ++i) {
                if (i > 0) oss << ", ";
                oss << static_cast<float>(p.second[i]);
              }
              oss << "]}";
            }},
        val);
  }
  oss << "}";
  return oss.str();
}

struct Doc::ValueEqual {
  template <typename T, typename U>
  bool operator()(const T &, const U &) const {
    return false;
  }

  template <typename T>
  bool operator()(const T &a, const T &b) const {
    return a == b;
  }

  bool operator()(float a, float b) const {
    return std::fabs(a - b) < 1e-6f;
  }

  bool operator()(double a, double b) const {
    return std::fabs(a - b) < 1e-9;
  }

  bool operator()(const std::vector<float16_t> &a,
                  const std::vector<float16_t> &b) const {
    if (a.size() != b.size()) return false;
    for (size_t i = 0; i < a.size(); ++i)
      if (std::fabs(static_cast<float>(a[i]) - static_cast<float>(b[i])) >=
          1e-3f)
        return false;
    return true;
  }

  bool operator()(const std::vector<float> &a,
                  const std::vector<float> &b) const {
    if (a.size() != b.size()) return false;
    for (size_t i = 0; i < a.size(); ++i)
      if (std::fabs(a[i] - b[i]) >= 1e-6f) return false;
    return true;
  }

  bool operator()(const std::vector<double> &a,
                  const std::vector<double> &b) const {
    if (a.size() != b.size()) return false;
    for (size_t i = 0; i < a.size(); ++i)
      if (std::fabs(a[i] - b[i]) >= 1e-9) return false;
    return true;
  }
};

bool Doc::operator==(const Doc &other) const {
  // Compare basic fields
  if (pk_ != other.pk_) {
    return false;
  }

  // Compare fields map sizes
  if (fields_.size() != other.fields_.size()) {
    return false;
  }

  // Compare each field
  for (const auto &pair : fields_) {
    const auto &field_name = pair.first;
    const auto &field_value = pair.second;

    auto it = other.fields_.find(field_name);
    if (it == other.fields_.end()) {
      return false;
    }

    // Compare variant values
    if (field_value.index() != it->second.index()) {
      return false;
    }

    // Use visitor to compare the actual values
    if (!std::visit(ValueEqual{}, field_value, it->second)) return false;
  }

  return true;
}

Status VectorQuery::validate(const FieldSchema *schema) const {
  if ((uint32_t)topk_ > kMaxQueryTopk) {
    return Status::InvalidArgument("query validate failed: topk[", topk_,
                                   "] is too large, max is ", kMaxQueryTopk);
  }
  if (output_fields_.has_value() &&
      output_fields_->size() > kMaxOutputFieldSize) {
    return Status::InvalidArgument(
        "query validate failed: output_fields is too large, max is ",
        kMaxOutputFieldSize);
  }

  if (schema == nullptr) {
    // support query with vector
    if (query_vector_.empty() && query_sparse_indices_.empty()) {
      return Status::OK();
    }

    return Status::InvalidArgument("query validate failed:  vector_field[",
                                   field_name_,
                                   "] not defined in the collection schema");
  }
  // validate dense/sparse vector
  if (schema->is_dense_vector()) {
    // validate dimension
    auto dim = schema->dimension();
    switch (schema->data_type()) {
      case DataType::VECTOR_FP16:
        if (dim * sizeof(float16_t) != query_vector_.size()) {
          return Status::InvalidArgument(
              "query validate failed: dimension is invalid");
        }
        break;
      case DataType::VECTOR_FP32:
        if (dim * sizeof(float) != query_vector_.size()) {
          return Status::InvalidArgument(
              "query validate failed: dimension is invalid");
        }
        break;
      case DataType::VECTOR_FP64:
        if (dim * sizeof(double) != query_vector_.size()) {
          return Status::InvalidArgument(
              "query validate failed: dimension is invalid");
        }
        break;
      case DataType::VECTOR_INT8:
        if (dim * sizeof(int8_t) != query_vector_.size()) {
          return Status::InvalidArgument(
              "query validate failed: dimension is invalid");
        }
        break;
      case DataType::VECTOR_INT16:
      case DataType::VECTOR_INT4:
      case DataType::VECTOR_BINARY32:
      case DataType::VECTOR_BINARY64:
        return Status::NotSupported(
            "query validate failed: unsupported dense vector type");
      default:
        return Status::InvalidArgument(
            "query validate failed: field is not dense vector");
    }
  } else if (schema->is_sparse_vector()) {
    // validate sparse indices size
    if (query_sparse_indices_.size() > kSparseMaxDimSize * sizeof(uint32_t)) {
      return Status::InvalidArgument(
          "query validate failed: the number of sparse indices exceeds the "
          "maximum limit ",
          kSparseMaxDimSize);
    }
  } else {
    return Status::InvalidArgument(
        "query validate failed: field is not vector");
  }
  return Status::OK();
}

}  // namespace zvec


================================================
FILE: src/db/index/common/id_map.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "id_map.h"
#include <zvec/ailego/logger/logger.h>
#include "db/common/constants.h"


namespace zvec {


Status IDMap::open(const std::string &working_dir, bool create_if_missing,
                   bool read_only) {
  if (opened_) {
    LOG_ERROR("IDMap is already opened");
    return Status::InternalError();
  }

  Status s;
  if (FILE::IsExist(working_dir)) {
    if (!FILE::IsDirectory(working_dir)) {
      LOG_ERROR("IDMap path[%s] is not a directory", working_dir.c_str());
      return Status::InvalidArgument();
    }
    s = rocksdb_context_.open(working_dir, read_only);
  } else {
    if (!create_if_missing) {
      LOG_ERROR("IDMap path[%s] does not exist", working_dir.c_str());
      return Status::NotFound();
    }
    s = rocksdb_context_.create(working_dir);
  }
  if (s.ok()) {
    LOG_INFO("Opened IDMap[%s]", working_dir.c_str());
    working_dir_ = working_dir;
    opened_ = true;
  } else {
    LOG_ERROR("Failed to open IDMap[%s]", working_dir.c_str());
  }
  return s;
}


IDMap::Ptr IDMap::CreateAndOpen(const std::string &collection_name,
                                const std::string &working_dir,
                                bool create_if_missing, bool read_only) {
  IDMap::Ptr id_map = std::make_shared<IDMap>(collection_name);
  if (id_map->open(working_dir, create_if_missing, read_only).ok()) {
    return id_map;
  } else {
    return nullptr;
  }
}


Status IDMap::close() {
  if (!opened_) {
    return Status::OK();
  }

  Status status = rocksdb_context_.close();
  if (status.ok()) {
    LOG_INFO("Closed IDMap[%s]", working_dir_.c_str());
  } else {
    LOG_ERROR("Failed to close IDMap[%s]", working_dir_.c_str());
  }
  return status;
}


Status IDMap::flush() {
  if (!opened_) {
    return Status::InternalError();
  }

  auto s = rocksdb_context_.flush();
  if (s.ok()) {
    LOG_INFO("Flushed IDMap[%s]", working_dir_.c_str());
  } else {
    LOG_ERROR("Failed to flush IDMap[%s]", working_dir_.c_str());
  }
  return s;
}


Status IDMap::upsert(const std::string &key, uint64_t doc_id) {
  if (!opened_) {
    return Status::InternalError();
  }

  rocksdb::Slice value((const char *)&doc_id, sizeof(uint64_t));
  auto s = rocksdb_context_.db_->Put(rocksdb_context_.write_opts_, key, value);
  if (s.ok()) {
    return Status::OK();
  } else {
    LOG_ERROR("Failed to put [%s, %zu] into IDMap[%s], code[%d], reason[%s]",
              key.c_str(), (size_t)doc_id, working_dir_.c_str(), s.code(),
              s.ToString().c_str());
    return Status::InternalError();
  }
}


void IDMap::remove(const std::string &key) {
  rocksdb_context_.db_->Delete(rocksdb_context_.write_opts_, key);
}


bool IDMap::has(const std::string &key, uint64_t *doc_id) const {
  std::string value;
  auto s = rocksdb_context_.db_->Get(rocksdb_context_.read_opts_, key, &value);
  if (s.ok()) {
    if (doc_id) {
      *doc_id = *(uint64_t *)(value.data());
    }
    return true;
  } else {
    if (doc_id) {
      *doc_id = INVALID_DOC_ID;
    }
    return false;
  }
}


Status IDMap::multi_get(const std::vector<std::string> &keys,
                        std::vector<uint64_t> *doc_ids) const {
  if (keys.empty()) {
    doc_ids->clear();
    return Status::InvalidArgument();
  }

  std::vector<rocksdb::Slice> slice_keys(keys.begin(), keys.end());
  std::vector<rocksdb::PinnableSlice> pinnable_values;
  pinnable_values.resize(keys.size());
  std::vector<rocksdb::Status> statuses;
  statuses.resize(keys.size());

  auto db = rocksdb_context_.db_.get();

  db->MultiGet(rocksdb_context_.read_opts_, db->DefaultColumnFamily(),
               slice_keys.size(), slice_keys.data(), pinnable_values.data(),
               statuses.data(), false);

  doc_ids->resize(keys.size());
  for (size_t i = 0; i < keys.size(); i++) {
    if (statuses[i].ok()) {
      (*doc_ids)[i] = *(uint64_t *)(pinnable_values[i].data());
    } else if (statuses[i].code() == rocksdb::Status::kNotFound) {
      (*doc_ids)[i] = INVALID_DOC_ID;
    } else {
      LOG_ERROR("Failed to get key[%s] from IDMap[%s], code[%d], reason[%s]",
                keys[i].c_str(), working_dir_.c_str(), statuses[i].code(),
                statuses[i].ToString().c_str());
      return Status::InternalError();
    }
  }

  return Status::OK();
}


size_t IDMap::storage_size_in_bytes() {
  return rocksdb_context_.sst_file_size();
}


size_t IDMap::count() {
  return rocksdb_context_.count();
}


}  // namespace zvec


================================================
FILE: src/db/index/common/id_map.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <vector>
#include <zvec/ailego/io/file.h>
#include <zvec/db/status.h>
#include "db/common/rocksdb_context.h"


namespace zvec {


class IDMap {
 public:
  using Ptr = std::shared_ptr<IDMap>;

  explicit IDMap(std::string collection_name)
      : collection_name_(std::move(collection_name)) {};

  ~IDMap() {
    if (opened_) {
      close();
    }
  }

  static Ptr CreateAndOpen(const std::string &collection_name,
                           const std::string &working_dir,
                           bool create_if_missing, bool read_only);


 private:
  IDMap(const IDMap &) = delete;
  IDMap &operator=(const IDMap &) = delete;
  IDMap &operator=(IDMap &&) = delete;


 public:
  Status open(const std::string &working_dir, bool create_if_missing,
              bool read_only);

  Status close();

  Status create_snapshot(const std::string &snapshot_dir);

  Status flush();

  Status upsert(const std::string &key, uint64_t doc_id);

  void remove(const std::string &key);

  bool has(const std::string &key, uint64_t *doc_id = nullptr) const;

  Status multi_get(const std::vector<std::string> &keys,
                   std::vector<uint64_t> *doc_ids) const;

  size_t storage_size_in_bytes();

  size_t count();


  const std::string &collection_name() const {
    return collection_name_;
  }

  const std::string &working_dir() const {
    return working_dir_;
  }


 private:
  using FILE = ailego::File;


  const std::string collection_name_{};
  std::string working_dir_{};

  RocksdbContext rocksdb_context_{};
  bool opened_{false};
};


}  // namespace zvec

================================================
FILE: src/db/index/common/index_filter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <functional>
#include <memory>


namespace zvec {


class IndexFilter {
 public:
  using Ptr = std::shared_ptr<IndexFilter>;

  IndexFilter() = default;

  virtual ~IndexFilter() = default;

  IndexFilter(const IndexFilter &) = delete;

  IndexFilter &operator=(const IndexFilter &) = delete;

  /**
   * @return true if the document is filtered (should be excluded)
   * @return false if the document is not filtered (should be included)
   */
  virtual bool is_filtered(uint64_t id) const = 0;
};

class EasyIndexFilter : public IndexFilter {
 public:
  using FilterFunction = std::function<bool(uint64_t)>;

  /**
   * Create an IndexFilter::Ptr from a lambda expression or function
   * @param filter_func A function that takes a uint64_t id and returns true
   *                    if the document should be filtered (excluded)
   */
  static IndexFilter::Ptr Create(FilterFunction filter_func) {
    return std::make_shared<EasyIndexFilter>(std::move(filter_func));
  }

  /**
   * Constructor that takes a filter function
   * @param filter_func A function that takes a uint64_t id and returns true
   *                    if the document should be filtered (excluded)
   */
  explicit EasyIndexFilter(FilterFunction filter_func)
      : filter_func_(std::move(filter_func)) {}

  /**
   * Check if a document should be filtered
   * @param id The document ID
   * @return true if the document should be filtered (excluded)
   * @return false if the document should not be filtered (included)
   */
  bool is_filtered(uint64_t id) const override {
    return filter_func_(id);
  }

 private:
  FilterFunction filter_func_;
};


}  // namespace zvec

================================================
FILE: src/db/index/common/index_params.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>
#include <zvec/db/index_params.h>
#include "type_helper.h"

namespace zvec {

std::string InvertIndexParams::to_string() const {
  std::ostringstream oss;
  oss << "InvertIndexParams{"
      << "enable_range_optimization:"
      << (enable_range_optimization_ ? "true" : "false")
      << ", enable_extended_wildcard:"
      << (enable_extended_wildcard_ ? "true" : "false") << "}";
  return oss.str();
}

std::string VectorIndexParams::vector_index_params_to_string(
    const std::string &class_name, MetricType metric_type,
    QuantizeType quantize_type) const {
  std::ostringstream oss;
  oss << class_name << "{"
      << "metric:" << MetricTypeCodeBook::AsString(metric_type)
      << ",quantize:" << QuantizeTypeCodeBook::AsString(quantize_type);
  return oss.str();
}

}  // namespace zvec

================================================
FILE: src/db/index/common/meta.h
================================================

// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <algorithm>
#include <cstdint>
#include <memory>
#include <optional>
#include <set>
#include <sstream>
#include <vector>
#include "db/common/utils.h"
#include "db/index/common/type_helper.h"

namespace zvec {

using SegmentID = uint32_t;
using BlockID = uint32_t;

class BlockMeta {
 public:
  using Ptr = std::shared_ptr<BlockMeta>;

 public:
  BlockMeta() = default;

  BlockMeta(uint32_t id, BlockType type, uint64_t min_doc_id,
            uint64_t max_doc_id, uint32_t doc_count,
            const std::vector<std::string> &columns)
      : id_(id),
        type_(type),
        min_doc_id_(min_doc_id),
        max_doc_id_(max_doc_id),
        doc_count_(doc_count),
        columns_(columns) {}

  BlockMeta(uint32_t id, BlockType type, uint64_t min_doc_id,
            uint64_t max_doc_id)
      : id_(id),
        type_(type),
        min_doc_id_(min_doc_id),
        max_doc_id_(max_doc_id) {}
  uint32_t id() const {
    return id_;
  }

  void set_id(uint32_t id) {
    id_ = id;
  }

  BlockType type() const {
    return type_;
  }

  void set_type(BlockType type) {
    type_ = type;
  }

  uint64_t min_doc_id() const {
    return min_doc_id_;
  }

  void set_min_doc_id(uint64_t min_doc_id) {
    min_doc_id_ = min_doc_id;
  }

  uint64_t max_doc_id() const {
    return max_doc_id_;
  }

  void set_max_doc_id(uint64_t max_doc_id) {
    max_doc_id_ = max_doc_id;
  }

  uint32_t doc_count() const {
    return doc_count_;
  }

  void set_doc_count(uint32_t doc_count) {
    doc_count_ = doc_count;
  }

  const std::vector<std::string> &columns() const {
    return columns_;
  }

  void set_columns(const std::vector<std::string> &columns) {
    columns_ = columns;
  }

  void add_column(const std::string &column) {
    columns_.push_back(column);
  }

  void del_column(const std::string &column) {
    columns_.erase(std::remove(columns_.begin(), columns_.end(), column),
                   columns_.end());
  }

  bool contain_column(const std::string &column) const {
    return std::find(columns_.begin(), columns_.end(), column) !=
           columns_.end();
  }

 public:
  bool operator==(const BlockMeta &other) const {
    return id_ == other.id_ && type_ == other.type_ &&
           min_doc_id_ == other.min_doc_id_ &&
           max_doc_id_ == other.max_doc_id_ && columns_ == other.columns_ &&
           doc_count_ == other.doc_count_;
  }

  std::string to_string() const {
    std::ostringstream oss;
    oss << "BlockMeta{"
        << "id:" << id_ << ",type:" << BlockTypeCodeBook::AsString(type_)
        << ",min_doc_id:" << min_doc_id_ << ",max_doc_id:" << max_doc_id_
        << ",doc_count:" << doc_count_ << ",columns:[";

    for (size_t i = 0; i < columns_.size(); ++i) {
      if (i > 0) oss << ",";
      oss << "'" << columns_[i] << "'";
    }

    oss << "]}";
    return oss.str();
  }

  std::string to_string_formatted(int indent_level = 0) const {
    std::ostringstream oss;
    oss << indent(indent_level) << "BlockMeta{\n"
        << indent(indent_level + 1) << "id: " << id_ << ",\n"
        << indent(indent_level + 1)
        << "type: " << BlockTypeCodeBook::AsString(type_) << ",\n"
        << indent(indent_level + 1) << "min_doc_id: " << min_doc_id_ << ",\n"
        << indent(indent_level + 1) << "max_doc_id: " << max_doc_id_ << ",\n"
        << indent(indent_level + 1) << "doc_count: " << doc_count_ << ",\n"
        << indent(indent_level + 1) << "columns: [";

    if (!columns_.empty()) {
      oss << "\n";
      for (size_t i = 0; i < columns_.size(); ++i) {
        oss << indent(indent_level + 2) << "'" << columns_[i] << "'";
        if (i < columns_.size() - 1) {
          oss << ",";
        }
        oss << "\n";
      }
      oss << indent(indent_level + 1);
    }

    oss << "]\n" << indent(indent_level) << "}";
    return oss.str();
  }

 public:
  uint32_t id_{0};
  BlockType type_{BlockType::UNDEFINED};
  uint64_t min_doc_id_{0};
  uint64_t max_doc_id_{0};
  uint32_t doc_count_{0};
  std::vector<std::string> columns_{};
};

class SegmentMeta {
 public:
  using Ptr = std::shared_ptr<SegmentMeta>;

 public:
  SegmentMeta() {};

  explicit SegmentMeta(SegmentID id) : id_(id) {}

  void set_id(SegmentID id) {
    id_ = id;
  }

  SegmentID id() const {
    return id_;
  }

  void add_persisted_block(const BlockMeta &block) {
    persisted_blocks_.push_back(block);
  }

  void set_persisted_blocks(const std::vector<BlockMeta> &blocks) {
    persisted_blocks_ = blocks;
  }

  bool remove_block(BlockID block_id) {
    auto it = std::remove_if(
        persisted_blocks_.begin(), persisted_blocks_.end(),
        [block_id](const BlockMeta &block) { return block.id() == block_id; });
    bool found = (it != persisted_blocks_.end());
    persisted_blocks_.erase(it, persisted_blocks_.end());
    return found;
  }

  void remove_vector_persisted_block(const std::string &column, bool quantize) {
    std::vector<BlockMeta> new_persisted_blocks;
    for (auto &b : persisted_blocks_) {
      if (quantize) {
        if (!(b.type() == BlockType::VECTOR_INDEX_QUANTIZE &&
              b.contain_column(column))) {
          new_persisted_blocks.push_back(b);
        }
      } else {
        if (!(b.type() == BlockType::VECTOR_INDEX &&
              b.contain_column(column))) {
          new_persisted_blocks.push_back(b);
        }
      }
    }
    persisted_blocks_ = new_persisted_blocks;
  }

  void remove_vector_persisted_block(const std::string &column) {
    std::vector<BlockMeta> new_persisted_blocks;
    for (auto &b : persisted_blocks_) {
      if (!b.contain_column(column)) {
        new_persisted_blocks.push_back(b);
      }
    }
    persisted_blocks_ = new_persisted_blocks;
  }

  void remove_scalar_index_block() {
    std::vector<BlockMeta> new_persisted_blocks;
    for (auto &b : persisted_blocks_) {
      if (b.type() != BlockType::SCALAR_INDEX) {
        new_persisted_blocks.push_back(b);
      }
    }
    persisted_blocks_ = new_persisted_blocks;
  }

  void set_writing_forward_block(const BlockMeta &writing_forward_block) {
    writing_forward_block_ = writing_forward_block;
  }

  void remove_writing_forward_block() {
    writing_forward_block_ = std::nullopt;
  }

  void update_max_doc_id(uint64_t max_doc_id) {
    if (writing_forward_block_.has_value()) {
      writing_forward_block_->set_max_doc_id(max_doc_id);
    }
  }

  uint64_t min_doc_id() const {
    if (persisted_blocks_.empty()) {
      if (writing_forward_block_.has_value()) {
        return writing_forward_block_->min_doc_id();
      }
      return 0;
    }
    uint64_t min_doc_id{std::numeric_limits<uint64_t>::max()};
    for (const auto &block : persisted_blocks_) {
      if (block.type() == BlockType::SCALAR) {
        min_doc_id = std::min(min_doc_id, block.min_doc_id());
      }
    }
    if (min_doc_id == std::numeric_limits<uint64_t>::max() &&
        writing_forward_block_.has_value()) {
      min_doc_id = writing_forward_block_->min_doc_id();
    }
    return min_doc_id;
  }

  uint64_t max_doc_id() const {
    if (writing_forward_block_.has_value() &&
        writing_forward_block_->doc_count_ != 0) {
      return writing_forward_block_->max_doc_id();
    }
    uint64_t max_doc_id{0};
    for (const auto &block : persisted_blocks_) {
      if (block.type() == BlockType::SCALAR) {
        max_doc_id = std::max(max_doc_id, block.max_doc_id());
      }
    }
    return max_doc_id;
  }

  uint32_t doc_count() const {
    uint32_t count{0};
    if (writing_forward_block_.has_value()) {
      count = writing_forward_block_->doc_count();
    }
    for (const auto &block : persisted_blocks_) {
      if (block.type() == BlockType::SCALAR) {
        count += block.doc_count();
      }
    }
    return count;
  }

  std::vector<BlockMeta> &persisted_blocks() {
    return persisted_blocks_;
  }

  const std::vector<BlockMeta> &persisted_blocks() const {
    return persisted_blocks_;
  }

  std::optional<BlockMeta> &writing_forward_block() {
    return writing_forward_block_;
  }

  const std::optional<BlockMeta> &writing_forward_block() const {
    return writing_forward_block_;
  }

  bool has_writing_forward_block() const {
    return writing_forward_block_.has_value();
  }

  bool vector_indexed(const std::string &field) const {
    return indexed_vector_fields_.count(field) > 0;
  }

  void add_indexed_vector_field(const std::string &field) {
    indexed_vector_fields_.insert(field);
  }

  std::set<std::string> indexed_vector_fields() const {
    return indexed_vector_fields_;
  }

  void set_indexed_vector_fields(const std::set<std::string> &fields) {
    indexed_vector_fields_ = fields;
  }

 public:
  bool operator==(const SegmentMeta &other) const {
    return id_ == other.id_ && persisted_blocks_ == other.persisted_blocks_ &&
           writing_forward_block_ == other.writing_forward_block_ &&
           indexed_vector_fields_ == other.indexed_vector_fields_;
  }

  bool operator!=(const SegmentMeta &other) const {
    return !(*this == other);
  }

  // Add these methods to SegmentMeta class in meta.h

  std::string to_string() const {
    std::ostringstream oss;
    oss << "SegmentMeta{"
        << "id:" << id_ << ",persisted_blocks:[";

    for (size_t i = 0; i < persisted_blocks_.size(); ++i) {
      if (i > 0) oss << ",";
      oss << persisted_blocks_[i].to_string();
    }

    oss << "],writing_forward_block:";
    if (writing_forward_block_.has_value()) {
      oss << writing_forward_block_->to_string();
    } else {
      oss << "null";
    }

    oss << ",indexed_vector_fields:[";

    size_t i = 0;
    for (const auto &field : indexed_vector_fields_) {
      if (i > 0) oss << ",";
      oss << "'" << field << "'";
      ++i;
    }

    oss << "]}";
    return oss.str();
  }

  std::string to_string_formatted(int indent_level = 0) const {
    std::ostringstream oss;
    oss << indent(indent_level) << "SegmentMeta{\n"
        << indent(indent_level + 1) << "id: " << id_ << ",\n"
        << indent(indent_level + 1) << "persisted_blocks: [\n";

    for (size_t i = 0; i < persisted_blocks_.size(); ++i) {
      oss << persisted_blocks_[i].to_string_formatted(indent_level + 2);
      if (i < persisted_blocks_.size() - 1) {
        oss << ",";
      }
      oss << "\n";
    }

    oss << "\n"
        << indent(indent_level + 1) << "],\n"
        << indent(indent_level + 1) << "writing_forward_block: ";

    if (writing_forward_block_.has_value()) {
      oss << "\n"
          << writing_forward_block_->to_string_formatted(indent_level + 2)
          << "\n";
    } else {
      oss << "null\n";
    }

    oss << indent(indent_level + 1) << "indexed_vector_fields: [";

    size_t i = 0;
    for (const auto &field : indexed_vector_fields_) {
      if (i > 0) oss << ",";
      oss << "'" << field << "'";
      ++i;
    }

    oss << "]\n" << indent(indent_level) << "}";
    return oss.str();
  }

 private:
  SegmentID id_{0};
  std::vector<BlockMeta> persisted_blocks_;
  std::optional<BlockMeta> writing_forward_block_;
  std::set<std::string> indexed_vector_fields_;
};

}  // namespace zvec

================================================
FILE: src/db/index/common/proto_converter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "proto_converter.h"

namespace zvec {

HnswIndexParams::OPtr ProtoConverter::FromPb(
    const proto::HnswIndexParams &params_pb) {
  auto params = std::make_shared<HnswIndexParams>(
      MetricTypeCodeBook::Get(params_pb.base().metric_type()), params_pb.m(),
      params_pb.ef_construction(),
      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));

  return params;
}

proto::HnswIndexParams ProtoConverter::ToPb(const HnswIndexParams *params) {
  proto::HnswIndexParams params_pb;
  params_pb.mutable_base()->set_metric_type(
      MetricTypeCodeBook::Get(params->metric_type()));
  params_pb.mutable_base()->set_quantize_type(
      QuantizeTypeCodeBook::Get(params->quantize_type()));
  params_pb.set_ef_construction(params->ef_construction());
  params_pb.set_m(params->m());
  return params_pb;
}

// HnswRabitqIndexParams
HnswRabitqIndexParams::OPtr ProtoConverter::FromPb(
    const proto::HnswRabitqIndexParams &params_pb) {
  auto params = std::make_shared<HnswRabitqIndexParams>(
      MetricTypeCodeBook::Get(params_pb.base().metric_type()),
      params_pb.total_bits(), params_pb.num_clusters(), params_pb.m(),
      params_pb.ef_construction(), params_pb.sample_count());

  return params;
}

proto::HnswRabitqIndexParams ProtoConverter::ToPb(
    const HnswRabitqIndexParams *params) {
  proto::HnswRabitqIndexParams params_pb;
  params_pb.mutable_base()->set_metric_type(
      MetricTypeCodeBook::Get(params->metric_type()));
  params_pb.mutable_base()->set_quantize_type(
      QuantizeTypeCodeBook::Get(params->quantize_type()));
  params_pb.set_m(params->m());
  params_pb.set_ef_construction(params->ef_construction());
  params_pb.set_total_bits(params->total_bits());
  params_pb.set_num_clusters(params->num_clusters());
  params_pb.set_sample_count(params->sample_count());
  return params_pb;
}

// FlatIndexParams
FlatIndexParams::OPtr ProtoConverter::FromPb(
    const proto::FlatIndexParams &params_pb) {
  return std::make_shared<FlatIndexParams>(
      MetricTypeCodeBook::Get(params_pb.base().metric_type()),
      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));
}

proto::FlatIndexParams ProtoConverter::ToPb(const FlatIndexParams *params) {
  proto::FlatIndexParams params_pb;
  params_pb.mutable_base()->set_metric_type(
      MetricTypeCodeBook::Get(params->metric_type()));
  params_pb.mutable_base()->set_quantize_type(
      QuantizeTypeCodeBook::Get(params->quantize_type()));
  return params_pb;
}

// IVFIndexParams
IVFIndexParams::OPtr ProtoConverter::FromPb(
    const proto::IVFIndexParams &params_pb) {
  return std::make_shared<IVFIndexParams>(
      MetricTypeCodeBook::Get(params_pb.base().metric_type()),
      params_pb.n_list(), params_pb.n_iters(), params_pb.use_soar(),
      QuantizeTypeCodeBook::Get(params_pb.base().quantize_type()));
}

proto::IVFIndexParams ProtoConverter::ToPb(const IVFIndexParams *params) {
  proto::IVFIndexParams params_pb;
  params_pb.mutable_base()->set_metric_type(
      MetricTypeCodeBook::Get(params->metric_type()));
  params_pb.mutable_base()->set_quantize_type(
      QuantizeTypeCodeBook::Get(params->quantize_type()));
  params_pb.set_n_list(params->n_list());
  params_pb.set_n_iters(params->n_iters());
  params_pb.set_use_soar(params->use_soar());
  return params_pb;
}

// InvertIndexParams
InvertIndexParams::OPtr ProtoConverter::FromPb(
    const proto::InvertIndexParams &params_pb) {
  auto params = std::make_shared<InvertIndexParams>(
      params_pb.enable_range_optimization());

  return params;
}

proto::InvertIndexParams ProtoConverter::ToPb(const InvertIndexParams *params) {
  proto::InvertIndexParams params_pb;
  params_pb.set_enable_range_optimization(params->enable_range_optimization());
  return params_pb;
}

// FieldSchema
FieldSchema::Ptr ProtoConverter::FromPb(const proto::FieldSchema &schema_pb) {
  auto schema = std::make_shared<FieldSchema>();

  schema->set_name(schema_pb.name());
  schema->set_data_type(DataTypeCodeBook::Get(schema_pb.data_type()));
  schema->set_dimension(schema_pb.dimension());
  schema->set_nullable(schema_pb.nullable());
  if (schema_pb.has_index_params()) {
    schema->set_index_params(ProtoConverter::FromPb(schema_pb.index_params()));
  }
  return schema;
}
proto::FieldSchema ProtoConverter::ToPb(const FieldSchema &schema) {
  proto::FieldSchema schema_pb;

  schema_pb.set_name(schema.name());
  schema_pb.set_data_type(DataTypeCodeBook::Get(schema.data_type()));
  schema_pb.set_dimension(schema.dimension());
  schema_pb.set_nullable(schema.nullable());
  auto index_params = schema.index_params();
  if (index_params) {
    auto index_params_pb = schema_pb.mutable_index_params();
    index_params_pb->MergeFrom(ProtoConverter::ToPb(index_params.get()));
  }
  return schema_pb;
}

// CollectionSchema
CollectionSchema::Ptr ProtoConverter::FromPb(
    const proto::CollectionSchema &schema_pb) {
  CollectionSchema::Ptr schema = std::make_shared<CollectionSchema>();

  schema->set_name(schema_pb.name());

  for (auto &column_schema_pb : schema_pb.fields()) {
    FieldSchema::Ptr column_schema = ProtoConverter::FromPb(column_schema_pb);
    schema->add_field(column_schema);
  }

  schema->set_max_doc_count_per_segment(schema_pb.max_doc_count_per_segment());

  return schema;
}

proto::CollectionSchema ProtoConverter::ToPb(const CollectionSchema &schema) {
  proto::CollectionSchema schema_pb;
  schema_pb.set_name(schema.name());
  for (auto &column_schema : schema.fields()) {
    proto::FieldSchema *column_schema_pb = schema_pb.add_fields();
    column_schema_pb->MergeFrom(ProtoConverter::ToPb(*column_schema));
  }

  schema_pb.set_max_doc_count_per_segment(schema.max_doc_count_per_segment());

  return schema_pb;
}

IndexParams::Ptr ProtoConverter::FromPb(const proto::IndexParams &params_pb) {
  if (params_pb.has_hnsw()) {
    return ProtoConverter::FromPb(params_pb.hnsw());
  } else if (params_pb.has_invert()) {
    return ProtoConverter::FromPb(params_pb.invert());
  } else if (params_pb.has_ivf()) {
    return ProtoConverter::FromPb(params_pb.ivf());
  } else if (params_pb.has_flat()) {
    return ProtoConverter::FromPb(params_pb.flat());
  } else if (params_pb.has_hnsw_rabitq()) {
    return ProtoConverter::FromPb(params_pb.hnsw_rabitq());
  }

  return nullptr;
}

// BlockMeta
BlockMeta::Ptr ProtoConverter::FromPb(const proto::BlockMeta &meta_pb) {
  auto block_meta = std::make_shared<BlockMeta>();

  block_meta->set_id(meta_pb.block_id());
  block_meta->set_type(BlockTypeCodeBook::Get(meta_pb.block_type()));
  block_meta->set_min_doc_id(meta_pb.min_doc_id());
  block_meta->set_max_doc_id(meta_pb.max_doc_id());
  block_meta->set_doc_count(meta_pb.doc_count());
  for (auto &column : meta_pb.columns()) {
    block_meta->add_column(column);
  }

  return block_meta;
}

proto::IndexParams ProtoConverter::ToPb(const IndexParams *params) {
  proto::IndexParams params_pb;

  switch (params->type()) {
    case IndexType::INVERT: {
      auto invert_params = dynamic_cast<const InvertIndexParams *>(params);
      if (invert_params) {
        params_pb.mutable_invert()->CopyFrom(
            ProtoConverter::ToPb(invert_params));
      }
      break;
    }
    case IndexType::HNSW: {
      auto hnsw_params = dynamic_cast<const HnswIndexParams *>(params);
      if (hnsw_params) {
        params_pb.mutable_hnsw()->CopyFrom(ProtoConverter::ToPb(hnsw_params));
      }
      break;
    }
    case IndexType::IVF: {
      auto ivf_params = dynamic_cast<const IVFIndexParams *>(params);
      if (ivf_params) {
        params_pb.mutable_ivf()->CopyFrom(ProtoConverter::ToPb(ivf_params));
      }
      break;
    }
    case IndexType::FLAT: {
      auto flat_params = dynamic_cast<const FlatIndexParams *>(params);
      if (flat_params) {
        params_pb.mutable_flat()->CopyFrom(ProtoConverter::ToPb(flat_params));
      }
      break;
    }
    case IndexType::HNSW_RABITQ: {
      auto hnsw_rabitq_params =
          dynamic_cast<const HnswRabitqIndexParams *>(params);
      if (hnsw_rabitq_params) {
        params_pb.mutable_hnsw_rabitq()->CopyFrom(
            ProtoConverter::ToPb(hnsw_rabitq_params));
      }
    }
    default:
      break;
  }

  return params_pb;
}

proto::BlockMeta ProtoConverter::ToPb(const BlockMeta &meta) {
  proto::BlockMeta meta_pb;
  meta_pb.set_block_id(meta.id());
  meta_pb.set_block_type(BlockTypeCodeBook::Get(meta.type()));
  meta_pb.set_min_doc_id(meta.min_doc_id());
  meta_pb.set_max_doc_id(meta.max_doc_id());
  meta_pb.set_doc_count(meta.doc_count());
  for (auto &column : meta.columns()) {
    meta_pb.add_columns(column);
  }

  return meta_pb;
}

// SegmentMeta
SegmentMeta::Ptr ProtoConverter::FromPb(const proto::SegmentMeta &meta_pb) {
  auto meta = std::make_shared<SegmentMeta>(meta_pb.segment_id());

  auto persisted_blocks = meta_pb.persisted_blocks();

  for (auto &persisted_block_pb : persisted_blocks) {
    BlockMeta::Ptr persisted_block = ProtoConverter::FromPb(persisted_block_pb);
    meta->add_persisted_block(*persisted_block);
  }

  if (meta_pb.has_writing_forward_block()) {
    meta->set_writing_forward_block(
        *ProtoConverter::FromPb(meta_pb.writing_forward_block()));
  }

  auto indexed_vector_fields = meta_pb.indexed_vector_fields();
  for (auto &indexed_vector_field : indexed_vector_fields) {
    meta->add_indexed_vector_field(indexed_vector_field);
  }

  return meta;
}

proto::SegmentMeta ProtoConverter::ToPb(const SegmentMeta &meta) {
  proto::SegmentMeta meta_pb;
  meta_pb.set_segment_id(meta.id());

  auto persisted_blocks = meta.persisted_blocks();
  for (auto &persisted_block : persisted_blocks) {
    auto persisted_block_pb = ProtoConverter::ToPb(persisted_block);
    meta_pb.add_persisted_blocks()->MergeFrom(persisted_block_pb);
  }

  if (meta.has_writing_forward_block()) {
    meta_pb.mutable_writing_forward_block()->MergeFrom(
        ProtoConverter::ToPb(meta.writing_forward_block().value()));
  }

  auto indexed_vector_fields = meta.indexed_vector_fields();
  for (auto &field : indexed_vector_fields) {
    meta_pb.add_indexed_vector_fields(field);
  }

  return meta_pb;
}

}  // namespace zvec

================================================
FILE: src/db/index/common/proto_converter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/db/index_params.h>
#include <zvec/db/schema.h>
#include "db/index/common/meta.h"

namespace zvec {

struct ProtoConverter {
  // HnswIndexParams
  static HnswIndexParams::OPtr FromPb(const proto::HnswIndexParams &params_pb);

  static proto::HnswIndexParams ToPb(const HnswIndexParams *params);

  // HnswRabitqIndexParams
  static HnswRabitqIndexParams::OPtr FromPb(
      const proto::HnswRabitqIndexParams &params_pb);
  static proto::HnswRabitqIndexParams ToPb(const HnswRabitqIndexParams *params);

  // FlatIndexParams
  static FlatIndexParams::OPtr FromPb(const proto::FlatIndexParams &params_pb);
  static proto::FlatIndexParams ToPb(const FlatIndexParams *params);

  // IVFIndexParams
  static IVFIndexParams::OPtr FromPb(const proto::IVFIndexParams &params_pb);
  static proto::IVFIndexParams ToPb(const IVFIndexParams *params);

  // InvertIndexParams
  static InvertIndexParams::OPtr FromPb(
      const proto::InvertIndexParams &params_pb);
  static proto::InvertIndexParams ToPb(const InvertIndexParams *params);

  // IndexParams
  static IndexParams::Ptr FromPb(const proto::IndexParams &params_pb);
  static proto::IndexParams ToPb(const IndexParams *params);

  // FieldSchema
  static FieldSchema::Ptr FromPb(const proto::FieldSchema &field_pb);
  static proto::FieldSchema ToPb(const FieldSchema &field);

  // CollectionSchema
  static CollectionSchema::Ptr FromPb(const proto::CollectionSchema &schema_pb);
  static proto::CollectionSchema ToPb(const CollectionSchema &schema);

  // BlockMeta
  static BlockMeta::Ptr FromPb(const proto::BlockMeta &meta_pb);
  static proto::BlockMeta ToPb(const BlockMeta &meta);

  // SegmentMeta
  static SegmentMeta::Ptr FromPb(const proto::SegmentMeta &meta_pb);
  static proto::SegmentMeta ToPb(const SegmentMeta &meta);
};

}  // namespace zvec

================================================
FILE: src/db/index/common/schema.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <regex>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <zvec/db/index_params.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#include "ailego/internal/cpu_features.h"
#include "db/common/constants.h"
#include "db/common/typedef.h"
#include "db/common/utils.h"
#include "db/index/common/type_helper.h"

namespace zvec {

#if defined(RABITQ_COMPILED_AVX512)
constexpr const int kRabitqCompiledAvx512 = RABITQ_COMPILED_AVX512;
#else
constexpr const int kRabitqCompiledAvx512 = 0;
#endif

std::unordered_map<DataType, std::set<QuantizeType>> quantize_type_map = {
    {DataType::VECTOR_FP32,
     {QuantizeType::FP16, QuantizeType::INT4, QuantizeType::INT8,
      QuantizeType::RABITQ}},
    // {DataType::VECTOR_FP64, {QuantizeType::FP16}},
    {DataType::SPARSE_VECTOR_FP32, {QuantizeType::FP16}},
};

std::unordered_set<DataType> support_dense_vector_type = {
    DataType::VECTOR_FP32,
    DataType::VECTOR_FP16,
    DataType::VECTOR_INT8,
};

std::unordered_set<DataType> support_sparse_vector_type = {
    DataType::SPARSE_VECTOR_FP32,
    DataType::SPARSE_VECTOR_FP16,
};

std::unordered_set<IndexType> support_dense_vector_index = {
    IndexType::FLAT, IndexType::HNSW, IndexType::HNSW_RABITQ, IndexType::IVF};

std::unordered_set<IndexType> support_sparse_vector_index = {IndexType::FLAT,
                                                             IndexType::HNSW};

Status FieldSchema::validate() const {
  if (data_type_ == DataType::UNDEFINED) {
    return Status::InvalidArgument("schema validate failed: field[", name_,
                                   "]'s data_type is not defined");
  }
  if (name_.empty()) {
    return Status::InvalidArgument("schema validate failed: field[", name_,
                                   "]'s name is empty");
  }
  if (!std::regex_match(name_, FIELD_NAME_REGEX)) {
    return Status::InvalidArgument(
        "schema validate failed: field[", name_,
        "]'s name cannot pass the regex verification");
  }
  if (is_vector_field()) {
    auto is_sparse = is_sparse_vector();
    if (!is_sparse && (dimension_ == 0 || dimension() > kMaxDenseDimSize)) {
      return Status::InvalidArgument("schema validate failed: field[", name_,
                                     "]'s dimension must be in (0,20000]");
    }

    if (!is_sparse) {
      if (support_dense_vector_type.find(data_type_) ==
          support_dense_vector_type.end()) {
        return Status::InvalidArgument(
            "schema validate failed: dense_vector's data type only "
            "support FP32, "
            "but field[",
            name_, "]'s data type is ", DataTypeCodeBook::AsString(data_type_));
      }
    } else {
      if (support_sparse_vector_type.find(data_type_) ==
          support_sparse_vector_type.end()) {
        return Status::InvalidArgument(
            "schema validate failed: sparse_vector's data type only "
            "support FP32, "
            "but field[",
            name_, "]'s data type is ", DataTypeCodeBook::AsString(data_type_));
      }
    }

    if (index_params_) {
      auto vector_index_params =
          std::dynamic_pointer_cast<VectorIndexParams>(index_params_);

      if (is_sparse) {
        if (support_sparse_vector_index.find(index_params_->type()) ==
            support_sparse_vector_index.end()) {
          return Status::InvalidArgument(
              "schema validate failed: sparse_vector's index_params only "
              "support FLAT|HNSW index, "
              "but field[",
              name_, "]'s index_type is ",
              IndexTypeCodeBook::AsString(index_params_->type()));
        }
        if (vector_index_params->metric_type() != MetricType::IP) {
          return Status::InvalidArgument(
              "schema validate failed: sparse_vector's index_params only "
              "support IP metric, but "
              "field[",
              name_, "]'s metric is ",
              MetricTypeCodeBook::AsString(vector_index_params->metric_type()));
        }

      } else {
        if (support_dense_vector_index.find(index_params_->type()) ==
            support_dense_vector_index.end()) {
          return Status::InvalidArgument(
              "schema validate failed: dense_vector's index_params only "
              "support FLAT|HNSW|IVF index, but field[",
              name_, "]'s index_type is ",
              IndexTypeCodeBook::AsString(index_params_->type()));
        }
      }

      if (index_params_->type() == IndexType::HNSW_RABITQ) {
        if (dimension_ < kMinRabitqDimSize || dimension_ > kMaxRabitqDimSize) {
          return Status::InvalidArgument(
              "schema validate failed: HNSW_RABITQ index only support "
              "dimension in [",
              kMinRabitqDimSize, ", ", kMaxRabitqDimSize, "]");
        }
        if (data_type_ != DataType::VECTOR_FP32) {
          return Status::InvalidArgument(
              "schema validate failed: HNSW_RABITQ index only support FP32 "
              "data types");
        }
        auto metric_type = vector_index_params->metric_type();
        if (metric_type != MetricType::L2 && metric_type != MetricType::IP &&
            metric_type != MetricType::COSINE) {
          return Status::InvalidArgument(
              "schema validate failed: HNSW_RABITQ index only support "
              "L2/IP/COSINE metric");
        }
#if !RABITQ_SUPPORTED
        return Status::NotSupported(
            "RabitQ is not supported on this platform (Linux x86_64 only)");
#endif
        auto &flags = zvec::ailego::internal::CpuFeatures::static_flags_;
        if (!flags.AVX2 && !flags.AVX512F) {
          return Status::NotSupported(
              "RabitQ requires AVX2/AVX512F to be supported");
        }

        if (kRabitqCompiledAvx512 && !flags.AVX512F) {
          return Status::NotSupported(
              "RabitQ compiled with AVX512F while runtime does not support");
        }
      }


      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
        auto iter = quantize_type_map.find(data_type_);
        if (iter == quantize_type_map.end()) {
          return Status::InvalidArgument(
              "schema validate failed: ",
              is_sparse ? "sparse_vector" : "dense_vector",
              "'s index_params of ", DataTypeCodeBook::AsString(data_type_),
              " do not support quantize, but field[", name_,
              "]'s quantize_type is ",
              QuantizeTypeCodeBook::AsString(
                  vector_index_params->quantize_type()));
        } else {
          if (iter->second.find(vector_index_params->quantize_type()) ==
              iter->second.end()) {
            return Status::InvalidArgument(
                "schema validate failed: ",
                is_sparse ? "sparse_vector" : "dense_vector",
                "'s index_params of ", DataTypeCodeBook::AsString(data_type_),
                " support ", QuantizeTypeCodeBook::AsString(iter->second),
                " quantize, but field[", name_, "]'s quantize_type is ",
                QuantizeTypeCodeBook::AsString(
                    vector_index_params->quantize_type()));
          }
        }
      }
      if (index_params_->type() == IndexType::IVF &&
          vector_index_params->metric_type() == MetricType::IP) {
        if (data_type_ != DataType::VECTOR_FP16 &&
            data_type_ != DataType::VECTOR_FP32) {
          return Status::InvalidArgument(
              "schema validate failed: IVF index only support FP32/FP16 data "
              "types according to the IP metric");
        }
      }
      if (vector_index_params->metric_type() == MetricType::COSINE) {
        if (data_type_ != DataType::VECTOR_FP16 &&
            data_type_ != DataType::VECTOR_FP32) {
          return Status::InvalidArgument(
              "schema validate failed: cosine metric only supports FP32/FP16 "
              "data types, but field[",
              name_, "]'s data type is ",
              DataTypeCodeBook::AsString(data_type_));
        }
      }
    }
  } else {
    if (index_params_) {
      if (index_params_->is_vector_index_type()) {
        return Status::InvalidArgument(
            "schema validate failed: scalar_field's index_params only support "
            "INVERT "
            "index, "
            "but field[",
            name_, "]'s index_type is ",
            IndexTypeCodeBook::AsString(index_params_->type()));
      }
    }
  }
  return Status::OK();
}

std::string FieldSchema::to_string() const {
  std::ostringstream oss;
  oss << "FieldSchema{"
      << "name:'" << name_ << "'"
      << ",data_type:" << DataTypeCodeBook::AsString(data_type_)
      << ",nullable:" << (nullable_ ? "true" : "false")
      << ",dimension:" << dimension_;

  if (index_params_) {
    oss << ",index_params:" << index_params_->to_string();
  } else {
    oss << ",index_params:null";
  }

  oss << "}";
  return oss.str();
}

std::string FieldSchema::to_string_formatted(int indent_level) const {
  std::ostringstream oss;
  if (is_vector_field()) {
    oss << indent(indent_level) << "FieldSchema[vector]{\n";
  } else {
    oss << indent(indent_level) << "FieldSchema[scalar]{\n";
  }

  oss << indent(indent_level + 1) << "name: '" << name_ << "',\n"
      << indent(indent_level + 1)
      << "data_type: " << DataTypeCodeBook::AsString(data_type_) << ",\n";

  if (is_vector_field()) {
    if (is_dense_vector()) {
      oss << indent(indent_level + 1) << "dimension: " << dimension_ << ",\n";
    }
  } else {
    oss << indent(indent_level + 1)
        << "nullable: " << (nullable_ ? "true" : "false") << ",\n";
  }

  if (index_params_) {
    oss << indent(indent_level + 1)
        << "index_params: " << index_params_->to_string() << "\n";
  } else {
    oss << indent(indent_level + 1) << "index_params: null\n";
  }

  oss << indent(indent_level) << "}";
  return oss.str();
}

Status CollectionSchema::validate() const {
  if (name_.empty()) {
    return Status::InvalidArgument("schema validate failed: name is empty");
  }
  if (!std::regex_match(name_, COLLECTION_NAME_REGEX)) {
    return Status::InvalidArgument(
        "schema validate failed: collection[", name_,
        "]'s name cannot pass the regex verification");
  }
  if (forward_fields().size() > kMaxScalarFieldSize) {
    return Status::InvalidArgument(
        "schema validate failed: collection[", name_,
        "]'s field size must <= ", kMaxScalarFieldSize);
  }
  if (max_doc_count_per_segment_ < MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD) {
    return Status::InvalidArgument(
        "schema validate failed: max_doc_count_per_segment must >= ",
        MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD);
  }
  auto v_fields = vector_fields();
  if (v_fields.empty()) {
    return Status::InvalidArgument(
        "schema validate failed: vector fields is empty");
  }
  if (v_fields.size() > kMaxVectorFieldSize) {
    return Status::InvalidArgument(
        "schema validate failed: collection[", name_,
        "]'s vector field size must <= ", kMaxVectorFieldSize);
  }
  for (auto &field : fields_) {
    auto s = field->validate();
    CHECK_RETURN_STATUS(s);
  }
  return Status::OK();
}

std::string CollectionSchema::to_string() const {
  std::ostringstream oss;
  oss << "CollectionSchema{"
      << "name:'" << name_ << "'"
      << ",max_doc_count_per_segment:" << max_doc_count_per_segment_
      << ",fields:[";

  for (size_t i = 0; i < fields_.size(); ++i) {
    if (i > 0) oss << ",";
    oss << fields_[i]->to_string();
  }

  oss << "]}";
  return oss.str();
}


std::string CollectionSchema::to_string_formatted(int indent_level) const {
  std::ostringstream oss;
  oss << indent(indent_level) << "CollectionSchema{\n"
      << indent(indent_level + 1) << "name: '" << name_ << "',\n"
      << indent(indent_level + 1)
      << "max_doc_count_per_segment: " << max_doc_count_per_segment_ << ",\n"
      << indent(indent_level + 1) << "fields: [\n";

  for (size_t i = 0; i < fields_.size(); ++i) {
    oss << fields_[i]->to_string_formatted(indent_level + 2);
    if (i < fields_.size() - 1) {
      oss << ",";
    }
    oss << "\n";
  }

  oss << indent(indent_level + 1) << "]\n" << indent(indent_level) << "}";
  return oss.str();
}

Status CollectionSchema::add_field(FieldSchema::Ptr column_schema) {
  // Check if field already exists
  if (has_field(column_schema->name())) {
    return Status::AlreadyExists("field[", column_schema->name(),
                                 "] already exists in schema");
  }

  // Add field to list and map
  if (column_schema->is_vector_field()) {
    if (column_schema->index_params() == nullptr) {
      column_schema->set_index_params(DefaultVectorIndexParams);
    }
  }

  fields_.push_back(column_schema);
  fields_map_[column_schema->name()] = column_schema;

  return Status::OK();
}

Status CollectionSchema::alter_field(
    const std::string &column_name,
    const FieldSchema::Ptr &new_column_options) {
  // Check if field exists
  if (!has_field(column_name)) {
    return Status::NotFound("field[", column_name, "] not found in schema");
  }

  std::string new_column_name = new_column_options->name();

  // If renaming to an existing field name (and it's not the same field)
  if (new_column_name != column_name && has_field(new_column_name)) {
    return Status::AlreadyExists("field[", new_column_name,
                                 "] already exists in schema");
  }

  // Update map: remove old entry if name changed, add new entry
  if (new_column_name != column_name) {
    fields_map_.erase(column_name);
  }
  fields_map_[new_column_name] = new_column_options;

  // Update list
  for (auto &field : fields_) {
    if (field->name() == column_name) {
      field = new_column_options;
      break;
    }
  }

  return Status::OK();
}

Status CollectionSchema::drop_field(const std::string &column_name) {
  // Check if field exists
  if (!has_field(column_name)) {
    return Status::NotFound("field[", column_name, "] not found in schema");
  }

  // Remove from map
  fields_map_.erase(column_name);

  // Remove from list
  fields_.erase(std::remove_if(fields_.begin(), fields_.end(),
                               [&column_name](const FieldSchema::Ptr &field) {
                                 return field->name() == column_name;
                               }),
                fields_.end());

  return Status::OK();
}

bool CollectionSchema::has_field(const std::string &column) const {
  return fields_map_.find(column) != fields_map_.end();
}

const FieldSchema *CollectionSchema::get_field(
    const std::string &column) const {
  auto it = fields_map_.find(column);
  if (it != fields_map_.end()) {
    return it->second.get();
  }
  return nullptr;
}

FieldSchema *CollectionSchema::get_field(const std::string &column) {
  auto it = fields_map_.find(column);
  if (it != fields_map_.end()) {
    return it->second.get();
  }
  return nullptr;
}

const FieldSchema *CollectionSchema::get_forward_field(
    const std::string &column) const {
  // Forward fields are typically non-vector fields
  auto field = get_field(column);
  if (field && !field->is_vector_field()) {
    return field;
  }
  return nullptr;
}

FieldSchema *CollectionSchema::get_forward_field(const std::string &column) {
  // Forward fields are typically non-vector fields
  auto field = get_field(column);
  if (field && !field->is_vector_field()) {
    return field;
  }
  return nullptr;
}

const FieldSchema *CollectionSchema::get_vector_field(
    const std::string &column) const {
  // Vector fields are fields with vector data types
  auto field = get_field(column);
  if (field && field->is_vector_field()) {
    return field;
  }
  return nullptr;
}

FieldSchema *CollectionSchema::get_vector_field(const std::string &column) {
  // Vector fields are fields with vector data types
  auto field = get_field(column);
  if (field && field->is_vector_field()) {
    return field;
  }
  return nullptr;
}

FieldSchemaPtrList CollectionSchema::fields() const {
  return fields_;
}

FieldSchemaPtrList CollectionSchema::forward_fields() const {
  FieldSchemaPtrList forward_fields;
  for (const auto &field : fields_) {
    if (!field->is_vector_field()) {
      forward_fields.push_back(field);
    }
  }
  return forward_fields;
}

FieldSchemaPtrList CollectionSchema::forward_fields_with_index() const {
  FieldSchemaPtrList forward_fields;
  for (const auto &field : fields_) {
    if (!field->is_vector_field() && field->index_params() != nullptr) {
      forward_fields.push_back(field);
    }
  }
  return forward_fields;
}

std::vector<std::string> CollectionSchema::forward_field_names() const {
  std::vector<std::string> names;
  for (const auto &field : fields_) {
    if (!field->is_vector_field()) {
      names.push_back(field->name());
    }
  }
  return names;
}

std::vector<std::string> CollectionSchema::forward_field_names_with_index()
    const {
  std::vector<std::string> names;
  for (const auto &field : fields_) {
    if (!field->is_vector_field() && field->index_params() != nullptr) {
      names.push_back(field->name());
    }
  }
  return names;
}

std::vector<std::string> CollectionSchema::all_field_names() const {
  std::vector<std::string> names;
  for (const auto &field : fields_) {
    names.push_back(field->name());
  }
  return names;
}

FieldSchemaPtrList CollectionSchema::vector_fields() const {
  FieldSchemaPtrList vector_fields;
  for (const auto &field : fields_) {
    if (field->is_vector_field()) {
      vector_fields.push_back(field);
    }
  }
  return vector_fields;
}

uint64_t CollectionSchema::max_doc_count_per_segment() const {
  return max_doc_count_per_segment_;
}

void CollectionSchema::set_max_doc_count_per_segment(
    uint64_t max_doc_count_per_segment) {
  max_doc_count_per_segment_ = max_doc_count_per_segment;
}

Status CollectionSchema::add_index(const std::string &column,
                                   const IndexParams::Ptr &index_params) {
  // Get field and set index params
  auto field = get_field(column);
  if (field) {
    field->set_index_params(index_params);
  } else {
    return Status::NotFound("field[", column, "] not found in schema");
  }

  return Status::OK();
}

Status CollectionSchema::drop_index(const std::string &column) {
  // Get field and clear index params
  auto field = get_field(column);
  if (field) {
    if (field->is_vector_field()) {
      field->set_index_params(DefaultVectorIndexParams);
    } else {
      field->set_index_params(nullptr);
    }
  } else {
    return Status::NotFound("field[", column, "] not found in schema");
  }

  return Status::OK();
}

bool CollectionSchema::has_index(const std::string &column) const {
  auto field = get_field(column);
  if (field) {
    if (field->is_vector_field()) {
      if (field->index_params() == nullptr) {
        return false;
      } else {
        return *field->index_params() != DefaultVectorIndexParams;
      }
    }
    return field->index_params() != nullptr;
  }
  return false;
}

}  // namespace zvec

================================================
FILE: src/db/index/common/stats.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>
#include <zvec/db/stats.h>
#include "db/common/utils.h"

namespace zvec {
std::string CollectionStats::to_string() const {
  std::ostringstream oss;
  oss << "CollectionStats{"
      << "doc_count:" << doc_count << ",index_completeness:{";

  size_t i = 0;
  for (const auto &pair : index_completeness) {
    if (i > 0) oss << ",";
    oss << pair.first << ":" << pair.second;
    ++i;
  }

  oss << "}}";
  return oss.str();
}

std::string CollectionStats::to_string_formatted(int indent_level) const {
  std::ostringstream oss;
  oss << indent(indent_level) << "CollectionStats{\n"
      << indent(indent_level + 1) << "doc_count: " << doc_count << ",\n"
      << indent(indent_level + 1) << "index_completeness: {\n";

  size_t i = 0;
  for (const auto &pair : index_completeness) {
    if (i > 0) oss << ",\n";
    oss << indent(indent_level + 2) << pair.first << ": " << pair.second;
    ++i;
  }

  if (!index_completeness.empty()) {
    oss << "\n";
  }
  oss << indent(indent_level + 1) << "}\n" << indent(indent_level) << "}";

  return oss.str();
}

}  // namespace zvec

================================================
FILE: src/db/index/common/type_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "type_helper.h"
#include <zvec/core/framework/index_meta.h>

namespace zvec {

core::IndexMeta::DataType DataTypeCodeBook::to_data_type(DataType type) {
  switch (type) {
    case DataType::VECTOR_FP32:
      return core::IndexMeta::DataType::DT_FP32;
    case DataType::VECTOR_FP64:
      return core::IndexMeta::DataType::DT_FP64;
    case DataType::VECTOR_FP16:
      return core::IndexMeta::DataType::DT_FP16;
    case DataType::VECTOR_INT8:
      return core::IndexMeta::DataType::DT_INT8;
    case DataType::VECTOR_INT16:
      return core::IndexMeta::DataType::DT_INT16;
    case DataType::VECTOR_INT4:
      return core::IndexMeta::DataType::DT_INT4;
    case DataType::VECTOR_BINARY32:
      return core::IndexMeta::DataType::DT_BINARY32;
    case DataType::VECTOR_BINARY64:
      return core::IndexMeta::DataType::DT_BINARY64;

    case DataType::SPARSE_VECTOR_FP16:
      return core::IndexMeta::DataType::DT_FP16;
    case DataType::SPARSE_VECTOR_FP32:
      return core::IndexMeta::DataType::DT_FP32;

    default:
      return core::IndexMeta::DataType::DT_UNDEFINED;
  }
}

}  // namespace zvec

================================================
FILE: src/db/index/common/type_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_meta.h>
#include <zvec/db/type.h>
#include "proto/zvec.pb.h"

namespace zvec {

//! Index Type Codebook
struct IndexTypeCodeBook {
  //! convert protobuf IndexType to C++ IndexType
  static IndexType Get(proto::IndexType type) {
    switch (type) {
      case proto::IT_HNSW:
        return IndexType::HNSW;
      case proto::IT_HNSW_RABITQ:
        return IndexType::HNSW_RABITQ;
      case proto::IT_FLAT:
        return IndexType::FLAT;
      case proto::IT_IVF:
        return IndexType::IVF;
      case proto::IT_INVERT:
        return IndexType::INVERT;
      default:
        break;
    }
    return IndexType::UNDEFINED;
  }

  //! Convert C++ IndexType to protobuf IndexType
  static proto::IndexType Get(IndexType type) {
    switch (type) {
      case IndexType::HNSW:
        return proto::IT_HNSW;
      case IndexType::HNSW_RABITQ:
        return proto::IT_HNSW_RABITQ;
      case IndexType::FLAT:
        return proto::IT_FLAT;
      case IndexType::IVF:
        return proto::IT_IVF;
      case IndexType::INVERT:
        return proto::IT_INVERT;
      default:
        break;
    }
    return proto::IT_UNDEFINED;
  }

  //! Convert C++ IndexType to C++ String
  static std::string AsString(IndexType type) {
    switch (type) {
      case IndexType::HNSW:
        return "HNSW";
      case IndexType::HNSW_RABITQ:
        return "HNSW_RABITQ";
      case IndexType::FLAT:
        return "FLAT";
      case IndexType::IVF:
        return "IVF";
      case IndexType::INVERT:
        return "INVERT";
      default:
        break;
    }
    return "UNDEFINED";
  }
};

struct DataTypeCodeBook {
  static bool IsArrayType(proto::DataType type) {
    return proto::DataType::DT_ARRAY_BINARY <= type &&
           type <= proto::DataType::DT_ARRAY_DOUBLE;
  }

  static DataType Get(proto::DataType type) {
    DataType data_types = DataType::UNDEFINED;
    switch (type) {
      case proto::DataType::DT_BINARY:
        data_types = DataType::BINARY;
        break;
      case proto::DataType::DT_STRING:
        data_types = DataType::STRING;
        break;
      case proto::DataType::DT_BOOL:
        data_types = DataType::BOOL;
        break;
      case proto::DataType::DT_INT32:
        data_types = DataType::INT32;
        break;
      case proto::DataType::DT_INT64:
        data_types = DataType::INT64;
        break;
      case proto::DataType::DT_UINT32:
        data_types = DataType::UINT32;
        break;
      case proto::DataType::DT_UINT64:
        data_types = DataType::UINT64;
        break;
      case proto::DataType::DT_FLOAT:
        data_types = DataType::FLOAT;
        break;
      case proto::DataType::DT_DOUBLE:
        data_types = DataType::DOUBLE;
        break;
      case proto::DataType::DT_VECTOR_BINARY32:
        data_types = DataType::VECTOR_BINARY32;
        break;
      case proto::DataType::DT_VECTOR_BINARY64:
        data_types = DataType::VECTOR_BINARY64;
        break;
      case proto::DataType::DT_VECTOR_FP16:
        data_types = DataType::VECTOR_FP16;
        break;
      case proto::DataType::DT_VECTOR_FP32:
        data_types = DataType::VECTOR_FP32;
        break;
      case proto::DataType::DT_VECTOR_FP64:
        data_types = DataType::VECTOR_FP64;
        break;
      case proto::DataType::DT_VECTOR_INT4:
        data_types = DataType::VECTOR_INT4;
        break;
      case proto::DataType::DT_VECTOR_INT8:
        data_types = DataType::VECTOR_INT8;
        break;
      case proto::DataType::DT_VECTOR_INT16:
        data_types = DataType::VECTOR_INT16;
        break;
      case proto::DataType::DT_SPARSE_VECTOR_FP16:
        data_types = DataType::SPARSE_VECTOR_FP16;
        break;
      case proto::DataType::DT_SPARSE_VECTOR_FP32:
        data_types = DataType::SPARSE_VECTOR_FP32;
        break;
      case proto::DataType::DT_ARRAY_BINARY:
        data_types = DataType::ARRAY_BINARY;
        break;
      case proto::DataType::DT_ARRAY_STRING:
        data_types = DataType::ARRAY_STRING;
        break;
      case proto::DataType::DT_ARRAY_BOOL:
        data_types = DataType::ARRAY_BOOL;
        break;
      case proto::DataType::DT_ARRAY_INT32:
        data_types = DataType::ARRAY_INT32;
        break;
      case proto::DataType::DT_ARRAY_INT64:
        data_types = DataType::ARRAY_INT64;
        break;
      case proto::DataType::DT_ARRAY_UINT32:
        data_types = DataType::ARRAY_UINT32;
        break;
      case proto::DataType::DT_ARRAY_UINT64:
        data_types = DataType::ARRAY_UINT64;
        break;
      case proto::DataType::DT_ARRAY_FLOAT:
        data_types = DataType::ARRAY_FLOAT;
        break;
      case proto::DataType::DT_ARRAY_DOUBLE:
        data_types = DataType::ARRAY_DOUBLE;
        break;

      default:
        break;
    }
    return data_types;
  }

  static proto::DataType Get(const DataType type) {
    proto::DataType data_type = proto::DataType::DT_UNDEFINED;
    switch (type) {
      case DataType::BINARY:
        data_type = proto::DataType::DT_BINARY;
        break;
      case DataType::STRING:
        data_type = proto::DataType::DT_STRING;
        break;
      case DataType::BOOL:
        data_type = proto::DataType::DT_BOOL;
        break;
      case DataType::INT32:
        data_type = proto::DataType::DT_INT32;
        break;
      case DataType::INT64:
        data_type = proto::DataType::DT_INT64;
        break;
      case DataType::UINT32:
        data_type = proto::DataType::DT_UINT32;
        break;
      case DataType::UINT64:
        data_type = proto::DataType::DT_UINT64;
        break;
      case DataType::FLOAT:
        data_type = proto::DataType::DT_FLOAT;
        break;
      case DataType::DOUBLE:
        data_type = proto::DataType::DT_DOUBLE;
        break;
      case DataType::VECTOR_BINARY32:
        data_type = proto::DataType::DT_VECTOR_BINARY32;
        break;
      case DataType::VECTOR_BINARY64:
        data_type = proto::DataType::DT_VECTOR_BINARY64;
        break;
      case DataType::VECTOR_FP16:
        data_type = proto::DataType::DT_VECTOR_FP16;
        break;
      case DataType::VECTOR_FP32:
        data_type = proto::DataType::DT_VECTOR_FP32;
        break;
      case DataType::VECTOR_FP64:
        data_type = proto::DataType::DT_VECTOR_FP64;
        break;
      case DataType::VECTOR_INT4:
        data_type = proto::DataType::DT_VECTOR_INT4;
        break;
      case DataType::VECTOR_INT8:
        data_type = proto::DataType::DT_VECTOR_INT8;
        break;
      case DataType::VECTOR_INT16:
        data_type = proto::DataType::DT_VECTOR_INT16;
        break;
      case DataType::SPARSE_VECTOR_FP16:
        data_type = proto::DataType::DT_SPARSE_VECTOR_FP16;
        break;
      case DataType::SPARSE_VECTOR_FP32:
        data_type = proto::DataType::DT_SPARSE_VECTOR_FP32;
        break;
      case DataType::ARRAY_BINARY:
        data_type = proto::DataType::DT_ARRAY_BINARY;
        break;
      case DataType::ARRAY_BOOL:
        data_type = proto::DataType::DT_ARRAY_BOOL;
        break;
      case DataType::ARRAY_DOUBLE:
        data_type = proto::DataType::DT_ARRAY_DOUBLE;
        break;
      case DataType::ARRAY_FLOAT:
        data_type = proto::DataType::DT_ARRAY_FLOAT;
        break;
      case DataType::ARRAY_INT32:
        data_type = proto::DataType::DT_ARRAY_INT32;
        break;
      case DataType::ARRAY_INT64:
        data_type = proto::DataType::DT_ARRAY_INT64;
        break;
      case DataType::ARRAY_STRING:
        data_type = proto::DataType::DT_ARRAY_STRING;
        break;
      case DataType::ARRAY_UINT32:
        data_type = proto::DataType::DT_ARRAY_UINT32;
        break;
      case DataType::ARRAY_UINT64:
        data_type = proto::DataType::DT_ARRAY_UINT64;
        break;
      default:
        break;
    }

    return data_type;
  }

  static std::string AsString(DataType type) {
    std::string data_type;

    switch (type) {
      case DataType::BINARY:
        data_type = "BINARY";
        break;
      case DataType::STRING:
        data_type = "STRING";
        break;
      case DataType::BOOL:
        data_type = "BOOL";
        break;
      case DataType::INT32:
        data_type = "INT32";
        break;
      case DataType::INT64:
        data_type = "INT64";
        break;
      case DataType::UINT32:
        data_type = "UINT32";
        break;
      case DataType::UINT64:
        data_type = "UINT64";
        break;
      case DataType::FLOAT:
        data_type = "FLOAT";
        break;
      case DataType::DOUBLE:
        data_type = "DOUBLE";
        break;
      case DataType::VECTOR_BINARY32:
        data_type = "VECTOR_BINARY32";
        break;
      case DataType::VECTOR_BINARY64:
        data_type = "VECTOR_BINARY64";
        break;
      case DataType::VECTOR_FP16:
        data_type = "VECTOR_FP16";
        break;
      case DataType::VECTOR_FP32:
        data_type = "VECTOR_FP32";
        break;
      case DataType::VECTOR_FP64:
        data_type = "VECTOR_FP64";
        break;
      case DataType::VECTOR_INT4:
        data_type = "VECTOR_INT4";
        break;
      case DataType::VECTOR_INT8:
        data_type = "VECTOR_INT8";
        break;
      case DataType::VECTOR_INT16:
        data_type = "VECTOR_INT16";
        break;
      case DataType::SPARSE_VECTOR_FP16:
        data_type = "SPARSE_VECTOR_FP16";
        break;
      case DataType::SPARSE_VECTOR_FP32:
        data_type = "SPARSE_VECTOR_FP32";
        break;
      case DataType::ARRAY_BINARY:
        data_type = "ARRAY_BINARY";
        break;
      case DataType::ARRAY_BOOL:
        data_type = "ARRAY_BOOL";
        break;
      case DataType::ARRAY_DOUBLE:
        data_type = "ARRAY_DOUBLE";
        break;
      case DataType::ARRAY_FLOAT:
        data_type = "ARRAY_FLOAT";
        break;
      case DataType::ARRAY_INT32:
        data_type = "ARRAY_INT32";
        break;
      case DataType::ARRAY_INT64:
        data_type = "ARRAY_INT64";
        break;
      case DataType::ARRAY_STRING:
        data_type = "ARRAY_STRING";
        break;
      case DataType::ARRAY_UINT32:
        data_type = "ARRAY_UINT32";
        break;
      case DataType::ARRAY_UINT64:
        data_type = "ARRAY_UINT64";
        break;
      default:
        break;
    }

    return data_type;
  }

  static core::IndexMeta::DataType to_data_type(DataType type);
};

struct MetricTypeCodeBook {
  static MetricType Get(proto::MetricType type) {
    switch (type) {
      case proto::MetricType::MT_IP:
        return MetricType::IP;
      case proto::MetricType::MT_L2:
        return MetricType::L2;
      case proto::MetricType::MT_COSINE:
        return MetricType::COSINE;
      default:
        return MetricType::UNDEFINED;
    }
  }

  static proto::MetricType Get(MetricType type) {
    switch (type) {
      case MetricType::IP:
        return proto::MetricType::MT_IP;
      case MetricType::L2:
        return proto::MetricType::MT_L2;
      case MetricType::COSINE:
        return proto::MetricType::MT_COSINE;
      default:
        return proto::MetricType::MT_UNDEFINED;
    }
  }

  static std::string AsString(MetricType type) {
    switch (type) {
      case MetricType::IP:
        return "IP";
      case MetricType::L2:
        return "L2";
      case MetricType::COSINE:
        return "COSINE";
      default:
        return "UNDEFINED";
    }
  }
};

struct QuantizeTypeCodeBook {
  static QuantizeType Get(proto::QuantizeType type) {
    switch (type) {
      case proto::QuantizeType::QT_FP16:
        return QuantizeType::FP16;
      case proto::QuantizeType::QT_INT4:
        return QuantizeType::INT4;
      case proto::QuantizeType::QT_INT8:
        return QuantizeType::INT8;
      case proto::QuantizeType::QT_RABITQ:
        return QuantizeType::RABITQ;
      default:
        return QuantizeType::UNDEFINED;
    }
  }

  static proto::QuantizeType Get(QuantizeType type) {
    switch (type) {
      case QuantizeType::FP16:
        return proto::QuantizeType::QT_FP16;
      case QuantizeType::INT4:
        return proto::QuantizeType::QT_INT4;
      case QuantizeType::INT8:
        return proto::QuantizeType::QT_INT8;
      case QuantizeType::RABITQ:
        return proto::QuantizeType::QT_RABITQ;
      default:
        return proto::QuantizeType::QT_UNDEFINED;
    }
  }

  static std::string AsString(QuantizeType type) {
    switch (type) {
      case QuantizeType::FP16:
        return "FP16";
      case QuantizeType::INT4:
        return "INT4";
      case QuantizeType::INT8:
        return "INT8";
      case QuantizeType::RABITQ:
        return "RABITQ";
      default:
        return "UNDEFINED";
    }
  }

  static std::string AsString(std::set<QuantizeType> type) {
    std::string str;
    for (auto t : type) {
      str += QuantizeTypeCodeBook::AsString(t) + ",";
    }
    return str.substr(0, str.size() - 1);
  }
};

struct BlockTypeCodeBook {
  static BlockType Get(proto::BlockType type) {
    BlockType block_types = BlockType::UNDEFINED;
    switch (type) {
      case proto::BlockType::BT_SCALAR:
        block_types = BlockType::SCALAR;
        break;
      case proto::BlockType::BT_SCALAR_INDEX:
        block_types = BlockType::SCALAR_INDEX;
        break;
      case proto::BlockType::BT_VECTOR_INDEX:
        block_types = BlockType::VECTOR_INDEX;
        break;
      case proto::BlockType::BT_VECTOR_INDEX_QUANTIZE:
        block_types = BlockType::VECTOR_INDEX_QUANTIZE;
        break;
      default:
        break;
    }
    return block_types;
  }

  static proto::BlockType Get(BlockType type) {
    proto::BlockType block_types = proto::BlockType::BT_UNDEFINED;
    switch (type) {
      case BlockType::SCALAR:
        block_types = proto::BlockType::BT_SCALAR;
        break;
      case BlockType::SCALAR_INDEX:
        block_types = proto::BlockType::BT_SCALAR_INDEX;
        break;
      case BlockType::VECTOR_INDEX:
        block_types = proto::BlockType::BT_VECTOR_INDEX;
        break;
      case BlockType::VECTOR_INDEX_QUANTIZE:
        block_types = proto::BlockType::BT_VECTOR_INDEX_QUANTIZE;
        break;
      default:
        break;
    }

    return block_types;
  }

  static std::string AsString(BlockType type) {
    switch (type) {
      case BlockType::SCALAR:
        return "SCALAR";
      case BlockType::SCALAR_INDEX:
        return "SCALAR_INDEX";
      case BlockType::VECTOR_INDEX:
        return "VECTOR_INDEX";
      case BlockType::VECTOR_INDEX_QUANTIZE:
        return "VECTOR_INDEX_QUANTIZE";
      default:
        return "UNDEFINED";
    }
  }
};

}  // namespace zvec

================================================
FILE: src/db/index/common/version_manager.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "version_manager.h"
#include <cerrno>
#include <cstdint>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <regex>
#include <string>
#include <proto/zvec.pb.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/status.h>
#include "db/common/file_helper.h"
#include "db/common/typedef.h"
#include "db/index/common/proto_converter.h"
#include "db/index/common/type_helper.h"

namespace zvec {

Status Version::Load(const std::string &path, Version *version) {
  std::ifstream ifs(path, std::ios::binary);
  if (!ifs.is_open()) {
    LOG_ERROR("Failed to open file: %s", path.c_str());
    return Status::InternalError("Failed to open file");
  }

  proto::Manifest manifest;

  if (!manifest.ParseFromIstream(&ifs)) {
    LOG_ERROR("Failed to parse manifest from file: %s", path.c_str());
    return Status::InternalError("Failed to parse manifest");
  }

  CollectionSchema::Ptr schema = ProtoConverter::FromPb(manifest.schema());
  version->set_schema(*schema);

  version->set_enable_mmap(manifest.enable_mmap());

  for (int i = 0; i < manifest.persisted_segment_metas_size(); ++i) {
    SegmentMeta::Ptr meta =
        ProtoConverter::FromPb(manifest.persisted_segment_metas(i));
    version->add_persisted_segment_meta(meta);
  }

  if (manifest.has_writing_segment_meta()) {
    SegmentMeta::Ptr meta =
        ProtoConverter::FromPb(manifest.writing_segment_meta());
    version->reset_writing_segment_meta(meta);
  }

  version->set_id_map_path_suffix(manifest.id_map_path_suffix());
  version->set_delete_snapshot_path_suffix(
      manifest.delete_snapshot_path_suffix());

  version->set_next_segment_id(manifest.next_segment_id());

  return Status::OK();
}

Status Version::Save(const std::string &path, const Version &version) {
  std::ofstream ofs(path, std::ios::binary);
  if (!ofs.is_open()) {
    LOG_ERROR("Failed to open file: %s, err: %s", path.c_str(),
              strerror(errno));
    return Status::InternalError("Failed to open file: %s", path.c_str());
  }

  proto::Manifest manifest;

  // set schema
  auto schema = ProtoConverter::ToPb(version.schema());
  manifest.mutable_schema()->Swap(&schema);

  manifest.set_enable_mmap(version.enable_mmap());

  // set segments meta
  for (auto &meta : version.persisted_segment_metas()) {
    auto meta_pb = ProtoConverter::ToPb(*meta);
    manifest.add_persisted_segment_metas()->Swap(&meta_pb);
  }

  if (version.writing_segment_meta()) {
    auto meta_pb = ProtoConverter::ToPb(*version.writing_segment_meta());
    manifest.mutable_writing_segment_meta()->Swap(&meta_pb);
  }

  manifest.set_id_map_path_suffix(version.id_map_path_suffix());
  manifest.set_delete_snapshot_path_suffix(
      version.delete_snapshot_path_suffix());
  manifest.set_next_segment_id(version.next_segment_id());

  if (!manifest.SerializeToOstream(&ofs)) {
    LOG_ERROR("Failed to serialize manifest to file: %s", path.c_str());
    return Status::InternalError("Failed to serialize manifest to file");
  }

  return Status::OK();
}

std::string Version::to_string() const {
  std::ostringstream oss;
  oss << "Version{" << "schema:" << (schema_ ? schema_->to_string() : "null")
      << ",persisted_segment_metas:[";

  size_t i = 0;
  for (const auto &pair : persisted_segment_metas_map_) {
    if (i > 0) oss << ",";
    oss << pair.second->to_string();
    ++i;
  }

  oss << "],writing_segment_meta:";
  if (writing_segment_meta_) {
    oss << writing_segment_meta_->to_string();
  } else {
    oss << "null";
  }

  oss << ",id_map_path_suffix:" << id_map_path_suffix_
      << ",delete_snapshot_path_suffix:" << delete_snapshot_path_suffix_
      << ",next_segment_id:" << next_segment_id_
      << ",enable_mmap:" << enable_mmap_ << "}";
  return oss.str();
}

std::string Version::to_string_formatted(int indent_level) const {
  std::ostringstream oss;
  oss << indent(indent_level) << "Version{\n"
      << indent(indent_level + 1) << "schema: ";

  if (schema_) {
    oss << "\n" << schema_->to_string_formatted(indent_level + 2) << "\n";
  } else {
    oss << "null\n";
  }

  oss << indent(indent_level + 1) << "persisted_segment_metas: [\n";

  size_t i = 0;
  for (const auto &pair : persisted_segment_metas_map_) {
    oss << pair.second->to_string_formatted(indent_level + 2);
    if (i < persisted_segment_metas_map_.size() - 1) {
      oss << ",";
    }
    oss << "\n";
    ++i;
  }

  oss << "\n"
      << indent(indent_level + 1) << "],\n"
      << indent(indent_level + 1) << "writing_segment_meta: ";

  if (writing_segment_meta_) {
    oss << "\n"
        << writing_segment_meta_->to_string_formatted(indent_level + 2) << "\n";
  } else {
    oss << "null\n";
  }

  oss << indent(indent_level + 1)
      << "id_map_path_suffix: " << id_map_path_suffix_ << ",\n"
      << indent(indent_level + 1)
      << "delete_snapshot_path_suffix: " << delete_snapshot_path_suffix_
      << ",\n"
      << indent(indent_level + 1) << "next_segment_id: " << next_segment_id_
      << "\n"
      << indent(indent_level + 1) << "enable_mmap: " << enable_mmap_ << "\n"
      << indent(indent_level) << "}";
  return oss.str();
}

Result<VersionManager::Ptr> VersionManager::Recovery(const std::string &path) {
  namespace fs = std::filesystem;
  if (!fs::exists(path)) {
    LOG_ERROR("VersionManager::Recovery: path %s does not exist", path.c_str());
    return tl::make_unexpected(
        Status::NotFound("path ", path, " does not exist"));
  }
  if (!fs::is_directory(path)) {
    LOG_ERROR("VersionManager::Recovery: path %s is not a directory",
              path.c_str());
    return tl::make_unexpected(
        Status::InvalidArgument("path", path, " is not a directory"));
  }

  std::string prefix = GetFileName(FileID::MANIFEST_FILE);
  std::string manifest_pattern = "^" + prefix + R"(\.(\d+)$)";
  std::regex regex(manifest_pattern);
  std::smatch match;

  uint64_t max_id = UINT64_MAX;
  std::string version_path;

  for (const auto &entry : fs::directory_iterator(path)) {
    if (entry.is_regular_file()) {
      std::string filename = entry.path().filename().string();
      if (std::regex_match(filename, match, regex)) {
        uint64_t id = std::stoull(match[1].str());
        if (id > max_id || max_id == UINT64_MAX) {
          max_id = id;
          version_path = entry.path().string();
        }
      }
    }
  }

  if (max_id == UINT64_MAX) {
    LOG_ERROR("Failed to find the version file in collction_path(%s)",
              path.c_str());
    return tl::make_unexpected(
        Status::NotFound("Failed to find the version file"));
  }

  Version version;
  auto s = Version::Load(version_path, &version);
  CHECK_RETURN_STATUS_EXPECTED(s);

  VersionManager::Ptr manager =
      VersionManager::Ptr(new VersionManager(path, version, max_id + 1));

  return manager;
}

Result<VersionManager::Ptr> VersionManager::Create(
    const std::string &path, const Version &initial_version) {
  VersionManager::Ptr manager =
      VersionManager::Ptr(new VersionManager(path, initial_version));
  return manager;
}

VersionManager::VersionManager(const std::string &path,
                               const Version &initial_version,
                               uint64_t version_id)
    : path_(path), current_version_(initial_version), version_id_(version_id) {}

Version VersionManager::get_current_version() {
  std::lock_guard lock(mtx_);
  return current_version_;
}

Status VersionManager::apply(const Version &version) {
  std::lock_guard lock(mtx_);
  current_version_ = version;
  return Status::OK();
}

Status VersionManager::reset_writing_segment_meta(SegmentMeta::Ptr meta) {
  std::lock_guard lock(mtx_);
  current_version_.reset_writing_segment_meta(meta);
  return Status::OK();
}

Status VersionManager::add_persisted_segment_meta(SegmentMeta::Ptr meta) {
  std::lock_guard lock(mtx_);
  return current_version_.add_persisted_segment_meta(meta);
}

Status VersionManager::remove_persisted_segment_meta(SegmentID id) {
  std::lock_guard lock(mtx_);
  return current_version_.remove_persisted_segment_meta(id);
}

Status VersionManager::flush() {
  std::lock_guard lock(mtx_);

  std::string current_path;
  if (version_id_ != 0) {
    current_path =
        FileHelper::MakeFilePath(path_, FileID::MANIFEST_FILE, version_id_ - 1);
  }

  auto s = Version::Save(
      FileHelper::MakeFilePath(path_, FileID::MANIFEST_FILE, version_id_++),
      current_version_);
  CHECK_RETURN_STATUS(s);

  if (!current_path.empty()) {
    FileHelper::RemoveFile(current_path);
  }

  return Status::OK();
}


}  // namespace zvec

================================================
FILE: src/db/index/common/version_manager.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <algorithm>
#include <cstdint>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <vector>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include "db/index/common/meta.h"

namespace zvec {

class Version {
 public:
  using Ptr = std::shared_ptr<Version>;

  Version() = default;

  static Status Load(const std::string &path, Version *version);

  static Status Save(const std::string &path, const Version &version);

 public:
  void set_schema(const CollectionSchema &schema) {
    schema_ = std::make_shared<CollectionSchema>(schema);
  }

  const CollectionSchema &schema() const {
    return *schema_;
  }

  void set_enable_mmap(bool enable_mmap) {
    enable_mmap_ = enable_mmap;
  }

  bool enable_mmap() const {
    return enable_mmap_;
  }

  Status add_persisted_segment_meta(const SegmentMeta::Ptr &meta) {
    if (meta == nullptr) {
      return Status::InvalidArgument("Segment meta is null");
    }
    auto iter = persisted_segment_metas_map_.find(meta->id());
    if (iter != persisted_segment_metas_map_.end()) {
      return Status::InvalidArgument("Segment meta already exists");
    }
    persisted_segment_metas_map_[meta->id()] = meta;
    return Status::OK();
  }

  Status remove_persisted_segment_meta(SegmentID segment_id) {
    auto iter = persisted_segment_metas_map_.find(segment_id);
    if (iter == persisted_segment_metas_map_.end()) {
      return Status::NotFound("Segment meta not found");
    }
    persisted_segment_metas_map_.erase(segment_id);
    return Status::OK();
  }

  Status update_persisted_segment_meta(SegmentMeta::Ptr meta) {
    if (meta == nullptr) {
      return Status::InvalidArgument("Segment meta is null");
    }
    auto iter = persisted_segment_metas_map_.find(meta->id());
    if (iter == persisted_segment_metas_map_.end()) {
      return Status::NotFound("Segment meta not found");
    }
    persisted_segment_metas_map_[meta->id()] =
        std::make_shared<SegmentMeta>(*meta);
    return Status::OK();
  }

  void set_persisted_segment_metas(const std::vector<SegmentMeta::Ptr> &metas) {
    for (auto &meta : metas) {
      persisted_segment_metas_map_[meta->id()] = meta;
    }
  }

  std::vector<SegmentMeta::Ptr> persisted_segment_metas() const {
    std::vector<SegmentMeta::Ptr> segment_metas;
    segment_metas.reserve(persisted_segment_metas_map_.size());
    for (auto &segment_meta : persisted_segment_metas_map_) {
      segment_metas.push_back(segment_meta.second);
    }

    std::sort(segment_metas.begin(), segment_metas.end(),
              [](const SegmentMeta::Ptr &lhs, const SegmentMeta::Ptr &rhs) {
                return lhs->min_doc_id() < rhs->min_doc_id();
              });

    return segment_metas;
  }

  void reset_writing_segment_meta(SegmentMeta::Ptr segment_meta) {
    writing_segment_meta_ = segment_meta;
  }

  SegmentMeta::Ptr writing_segment_meta() const {
    return writing_segment_meta_;
  }

  void set_id_map_path_suffix(uint32_t suffix) {
    id_map_path_suffix_ = suffix;
  }

  uint32_t id_map_path_suffix() const {
    return id_map_path_suffix_;
  }

  void set_delete_snapshot_path_suffix(uint32_t suffix) {
    delete_snapshot_path_suffix_ = suffix;
  }

  uint32_t delete_snapshot_path_suffix() const {
    return delete_snapshot_path_suffix_;
  }

  void set_next_segment_id(SegmentID id) {
    next_segment_id_ = id;
  }

  SegmentID next_segment_id() const {
    return next_segment_id_;
  }

 public:
  bool operator==(const Version &other) const {
    if (*schema_ != *other.schema_ ||
        persisted_segment_metas_map_.size() !=
            other.persisted_segment_metas_map_.size()) {
      return false;
    }

    for (const auto &item : persisted_segment_metas_map_) {
      auto it = other.persisted_segment_metas_map_.find(item.first);
      if (it == other.persisted_segment_metas_map_.end() ||
          *item.second != *it->second) {
        return false;
      }
    }

    return true;
  }

  std::string to_string() const;

  std::string to_string_formatted(int indent_level = 0) const;

 private:
  CollectionSchema::Ptr schema_;
  bool enable_mmap_;

  std::unordered_map<SegmentID, SegmentMeta::Ptr> persisted_segment_metas_map_;

  SegmentMeta::Ptr writing_segment_meta_;

  uint32_t id_map_path_suffix_{0};
  uint32_t delete_snapshot_path_suffix_{0};

  SegmentID next_segment_id_{0};
};

// Wrapper of Current Version
class VersionManager {
 public:
  using Ptr = std::shared_ptr<VersionManager>;

  static Result<VersionManager::Ptr> Recovery(const std::string &path);

  static Result<VersionManager::Ptr> Create(const std::string &path,
                                            const Version &initial_version);

 private:
  VersionManager(const std::string &path, const Version &initial_version,
                 uint64_t version_id = 0);

 public:
  Version get_current_version();

  // overwrite the current version
  Status apply(const Version &version);

  Status reset_writing_segment_meta(SegmentMeta::Ptr meta);

  Status add_persisted_segment_meta(SegmentMeta::Ptr meta);

  Status remove_persisted_segment_meta(SegmentID id);

  Status flush();

  void set_id_map_path_suffix(uint32_t suffix) {
    std::lock_guard lock(mtx_);
    current_version_.set_id_map_path_suffix(suffix);
  }

  void set_delete_snapshot_path_suffix(uint32_t suffix) {
    std::lock_guard lock(mtx_);
    current_version_.set_delete_snapshot_path_suffix(suffix);
  }

  uint32_t delete_snapshot_path_suffix() const {
    std::lock_guard lock(mtx_);
    return current_version_.delete_snapshot_path_suffix();
  }

  void set_next_segment_id(SegmentID id) {
    std::lock_guard lock(mtx_);
    current_version_.set_next_segment_id(id);
  }

  void set_enable_mmap(bool enable_mmap) {
    std::lock_guard lock(mtx_);
    current_version_.set_enable_mmap(enable_mmap);
  }

 private:
  const std::string path_;
  Version current_version_;
  mutable std::mutex mtx_;

  uint64_t version_id_ = 0;
};

}  // namespace zvec


================================================
FILE: src/db/index/segment/column_merging_reader.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "column_merging_reader.h"
#include <iostream>
#include <arrow/array.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include "db/index/storage/store_helper.h"

namespace zvec {

std::shared_ptr<ColumnMergingReader> ColumnMergingReader::Make(
    const std::shared_ptr<arrow::Schema> &target_schema,
    std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>
        &&input_readers) {
  return std::make_shared<ColumnMergingReader>(target_schema,
                                               std::move(input_readers));
}

ColumnMergingReader::ColumnMergingReader(
    const std::shared_ptr<arrow::Schema> &target_schema,
    std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> &&input_readers)
    : target_schema_(target_schema), input_readers_(std::move(input_readers)) {
  current_batches_.resize(input_readers_.size());
  std::fill(current_batches_.begin(), current_batches_.end(), nullptr);
}

std::shared_ptr<arrow::Schema> ColumnMergingReader::schema() const {
  return target_schema_;
}

arrow::Status ColumnMergingReader::ReadNext(
    std::shared_ptr<arrow::RecordBatch> *out) {
  *out = nullptr;

  if (!has_more_) {
    return arrow::Status::OK();
  }

  // Read next batch from each input reader
  for (size_t i = 0; i < input_readers_.size(); ++i) {
    arrow::Status status = input_readers_[i]->ReadNext(&current_batches_[i]);
    if (!status.ok()) {
      return status;
    }
  }

  // Check if all readers have reached EOF
  bool all_null = true;
  for (const auto &batch : current_batches_) {
    if (batch != nullptr) {
      all_null = false;
      break;
    }
  }

  // All readers reached EOF
  if (all_null) {
    has_more_ = false;
    return arrow::Status::OK();
  }

  // Verify that all non-null batches have consistent row counts
  int64_t expected_rows = -1;
  for (const auto &batch : current_batches_) {
    if (batch) {
      if (expected_rows == -1) {
        expected_rows = batch->num_rows();
      } else if (expected_rows != batch->num_rows()) {
        return arrow::Status::Invalid(
            "Input readers have inconsistent row counts");
      }
    }
  }

  if (expected_rows <= 0) {
    has_more_ = false;
    return arrow::Status::OK();
  }

  // Build each column
  std::vector<std::shared_ptr<arrow::Array>> columns;
  columns.reserve(target_schema_->num_fields());

  for (int i = 0; i < target_schema_->num_fields(); ++i) {
    auto field = target_schema_->field(i);
    std::shared_ptr<arrow::Array> col_array = nullptr;

    // Try to find this column from any batch
    for (const auto &batch : current_batches_) {
      if (!batch) continue;
      int col_idx = batch->schema()->GetFieldIndex(field->name());
      if (col_idx != -1) {
        col_array = batch->column(col_idx);
        break;
      }
    }

    if (!col_array) {
      return arrow::Status::Invalid(
          "Failed to find column in any input reader: ", field->name());
    }

    columns.push_back(std::move(col_array));
  }

  // Construct final batch
  *out = arrow::RecordBatch::Make(target_schema_, expected_rows,
                                  std::move(columns));
  if (!*out) {
    return arrow::Status::Invalid("Failed to create merged record batch");
  }

  // Clear current batches, prepare for next read
  std::fill(current_batches_.begin(), current_batches_.end(), nullptr);

  return arrow::Status::OK();
}

}  // namespace zvec

================================================
FILE: src/db/index/segment/column_merging_reader.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <vector>
#include <arrow/api.h>
#include <arrow/ipc/reader.h>

namespace zvec {

class ColumnMergingReader : public arrow::RecordBatchReader {
 public:
  static std::shared_ptr<ColumnMergingReader> Make(
      const std::shared_ptr<arrow::Schema> &target_schema,
      std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>
          &&input_readers);

  explicit ColumnMergingReader(
      const std::shared_ptr<arrow::Schema> &target_schema,
      std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>
          &&input_readers);

  ~ColumnMergingReader() override = default;  // LCOV_EXCL_LINE

  std::shared_ptr<arrow::Schema> schema() const override;

  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override;

 private:
  std::shared_ptr<arrow::Schema> target_schema_;
  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> input_readers_;

  std::vector<std::shared_ptr<arrow::RecordBatch>> current_batches_;
  bool has_more_ = true;
};

}  // namespace zvec


================================================
FILE: src/db/index/segment/segment.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "segment.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <ailego/parallel/multi_thread_list.h>
#include <ailego/pattern/defer.h>
#include <arrow/dataset/dataset.h>
#include <arrow/dataset/scanner.h>
#include <arrow/ipc/reader.h>
#include <arrow/table.h>
#include <arrow/util/iterator.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/parallel/thread_queue.h>
#include <zvec/db/config.h>
#include <zvec/db/doc.h>
#include <zvec/db/index_params.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#if RABITQ_SUPPORTED
#include "core/algorithm/hnsw_rabitq/rabitq_params.h"
#endif
#include "db/common/constants.h"
#include "db/common/file_helper.h"
#include "db/common/global_resource.h"
#include "db/common/typedef.h"
#include "db/index/column/inverted_column/inverted_indexer.h"
#include "db/index/column/vector_column/engine_helper.hpp"
#include "db/index/column/vector_column/vector_column_indexer.h"
#include "db/index/column/vector_column/vector_column_params.h"
#include "db/index/common/index_filter.h"
#include "db/index/common/meta.h"
#include "db/index/segment/segment_helper.h"
#include "db/index/storage/base_forward_store.h"
#include "db/index/storage/bufferpool_forward_store.h"
#include "db/index/storage/memory_forward_store.h"
#include "db/index/storage/mmap_forward_store.h"
#include "db/index/storage/store_helper.h"
#include "db/index/storage/wal/wal_file.h"
#include "zvec/ailego/container/params.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_meta.h"
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"
#include "column_merging_reader.h"
#include "sql_expr_parser.h"

namespace zvec {

void global_init() {
  static std::once_flag once;
  // run once
  std::call_once(once, []() {
    auto status = arrow::compute::Initialize();
    if (!status.ok()) {
      LOG_ERROR("arrow compute init failed: [%s]", status.ToString().c_str());
      abort();
    }
  });
}

class SegmentImpl : public Segment,
                    public std::enable_shared_from_this<SegmentImpl> {
 public:
  using Ptr = std::shared_ptr<SegmentImpl>;

  class SegmentIndexFilter : public IndexFilter {
   public:
    SegmentIndexFilter(const DeleteStore::Ptr &delete_store,
                       SegmentImpl::Ptr impl)
        : delete_store_(delete_store), impl_(impl) {}

    bool is_filtered(uint64_t id) const override;

   private:
    DeleteStore::Ptr delete_store_;
    std::weak_ptr<SegmentImpl> impl_;
  };

  SegmentImpl(const std::string &path, const CollectionSchema &schema,
              const SegmentMeta &segment_meta, const IDMap::Ptr &id_map,
              const DeleteStore::Ptr &delete_store,
              const VersionManager::Ptr &version_manager)
      : path_(path),
        collection_schema_(std::make_shared<CollectionSchema>(schema)),
        segment_meta_(std::make_shared<SegmentMeta>(segment_meta)),
        version_manager_(version_manager),
        id_map_(id_map),
        delete_store_(delete_store) {
    seg_path_ = FileHelper::MakeSegmentPath(path_, segment_meta.id());
  }

  virtual ~SegmentImpl() {
    close();
    if (need_destroyed_) {
      cleanup();
    }
  }

  SegmentID id() const override;

  SegmentMeta::Ptr meta() const override;

  uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) override;

  Status Insert(Doc &doc) override;

  Status Update(Doc &doc) override;

  Status Upsert(Doc &doc) override;

  Status Delete(const std::string &pk) override;

  Status Delete(uint64_t g_doc_id) override;

  Doc::Ptr Fetch(uint64_t g_doc_id) override;

  CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(
      const std::string &field_name) const override;

  CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(
      const std::string &field_name) const override;

  VectorColumnIndexer::Ptr get_memory_vector_indexer(
      const std::string &field_name);

  VectorColumnIndexer::Ptr get_memory_quant_vector_indexer(
      const std::string &field_name);

  std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(
      const std::string &field_name) const override;

  virtual std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(
      const std::string &field_name) const override;

  InvertedColumnIndexer::Ptr get_scalar_indexer(
      const std::string &field_name) const override;

  const IndexFilter::Ptr get_filter() override;

  Status create_all_vector_index(
      int concurrency, SegmentMeta::Ptr *new_segment_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) override;

  Status create_vector_index(
      const std::string &column, const IndexParams::Ptr &index_params,
      int concurrency, SegmentMeta::Ptr *new_segment_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) override;

  Status drop_vector_index(
      const std::string &column, SegmentMeta::Ptr *new_segment_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers) override;

  Status reload_vector_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &new_segment_meta,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &vector_indexers,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &quant_vector_indexers) override;

  bool vector_index_ready(const std::string &column,
                          const IndexParams::Ptr &index_params) const override;

  bool all_vector_index_ready() const override;

  Status create_scalar_index(const std::vector<std::string> &columns,
                             const IndexParams::Ptr &index_params,
                             SegmentMeta::Ptr *new_segment_meta,
                             InvertedIndexer::Ptr *new_scalar_indexer) override;

  Status drop_scalar_index(const std::vector<std::string> &columns,
                           SegmentMeta::Ptr *new_segment_meta,
                           InvertedIndexer::Ptr *new_scalar_indexer) override;

  Status reload_scalar_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
      const InvertedIndexer::Ptr &scalar_indexer) override;

  Status dump() override;

  Status flush() override;

  Status destroy() override;

  TablePtr fetch(const std::vector<std::string> &columns,
                 const std::vector<int> &indices) const override;

  ExecBatchPtr fetch(const std::vector<std::string> &columns,
                     int indice) const override;

  RecordBatchReaderPtr scan(
      const std::vector<std::string> &columns) const override;

  Status add_column(FieldSchema::Ptr column_schema,
                    const std::string &expression,
                    const AddColumnOptions &options) override;

  Status alter_column(const std::string &column_name,
                      const FieldSchema::Ptr &new_column_schema,
                      const AlterColumnOptions &options) override;

  Status drop_column(const std::string &column_name) override;

 public:
  Status Open(const SegmentOptions &options);
  Status Create(const SegmentOptions &options, uint64_t min_doc_id);

 private:
  Status close();
  Status cleanup();
  bool ready_for_dump_block();

  // Helper functions for Open()
  Status load_persist_scalar_blocks();
  Status load_scalar_index_blocks(bool create = false);
  Status load_vector_index_blocks();
  Status init_memory_components();
  Status finish_memory_components();

  void fresh_persist_block_offset();
  void calculate_block_offsets();
  int find_persist_block_id(BlockType type, int segment_doc_id,
                            const std::string &col_name = "",
                            int *out_offset_idx = nullptr) const;
  const std::vector<int> &get_persist_block_offsets(
      BlockType type, const std::string &col_name = "") const;
  const std::vector<BlockMeta> &get_persist_block_metas(
      BlockType type, const std::string &col_name = "") const;

  VectorColumnIndexer::Ptr create_vector_indexer(const std::string &field_name,
                                                 const FieldSchema &field,
                                                 BlockID block_id,
                                                 bool is_quantized = false);

  Result<VectorColumnIndexer::Ptr> merge_vector_indexer(
      const std::string &index_file_path, const std::string &column,
      const FieldSchema &field, int concurrency);

  // Helper functions for Insert/Update/Upsert/Delete
  template <typename ValueType>
  Status InsertScalar(InvertedColumnIndexer::Ptr &indexer, const Doc &doc,
                      const FieldSchema::Ptr &field);
  template <typename ValueType>
  Status InsertVector(VectorColumnIndexer::Ptr &indexer, const Doc &doc,
                      const FieldSchema::Ptr &field);
  Status ConvertVectorDataBufferToDocField(
      const FieldSchema::Ptr &field,
      const vector_column_params::VectorDataBuffer &buf, Doc *doc);

  Status insert_scalar_indexer(Doc &doc);
  Status insert_vector_indexer(Doc &doc);
  Status internal_insert(Doc &doc);
  Status internal_update(Doc &doc);
  Status internal_upsert(Doc &doc);
  Status internal_delete(const Doc &doc);

  Status recover();
  Status open_wal_file();
  Status append_wal(const Doc &doc);
  Status update_version(uint32_t delete_snapshot_path_suffix);

  Result<uint64_t> get_global_doc_id(uint32_t local_id) const;

  BlockID allocate_block_id();

  bool validate(const std::vector<std::string> &columns) const;

  Status reopen_invert_indexer(bool read_only = false);

  Status insert_array_to_invert_indexer(
      const FieldSchema::Ptr &schema,
      const std::shared_ptr<arrow::ChunkedArray> &data,
      InvertedColumnIndexer::Ptr *column_indexer);

  TablePtr fetch_normal(const std::vector<std::string> &columns,
                        const std::shared_ptr<arrow::Schema> &result_schema,
                        const std::vector<int> &indices) const;

  // For performance tuning
  TablePtr fetch_perf(const std::vector<std::string> &columns,
                      const std::shared_ptr<arrow::Schema> &result_schema,
                      const std::vector<int> &indices) const;

  void fresh_persist_chunked_array();

 private:
  // scalar forward (uses segment-local doc ID)
  MemForwardStore::Ptr memory_store_;
  std::vector<BaseForwardStore::Ptr> persist_stores_;

  // scalar index (uses segment-local doc ID)
  InvertedIndexer::Ptr invert_indexers_;

  // vector index (uses block-local doc ID, each indexer starts from 0)
  std::unordered_map<std::string, VectorColumnIndexer::Ptr>
      memory_vector_indexers_;

  std::unordered_map<std::string, BlockID> memory_vector_block_ids_;

  std::unordered_map<std::string, VectorColumnIndexer::Ptr>
      quant_memory_vector_indexers_;

  std::unordered_map<std::string, BlockID> quant_memory_vector_block_ids_;

  std::unordered_map<std::string, std::vector<VectorColumnIndexer::Ptr>>
      vector_indexers_;

  std::unordered_map<std::string, std::vector<VectorColumnIndexer::Ptr>>
      quant_vector_indexers_;

  // index filter
  IndexFilter::Ptr filter_;

  std::string path_;
  std::string seg_path_;
  CollectionSchema::Ptr collection_schema_;
  SegmentMeta::Ptr segment_meta_;
  VersionManager::Ptr version_manager_;
  SegmentOptions options_;

  IDMap::Ptr id_map_;
  DeleteStore::Ptr delete_store_;

  // Maps segment-local doc ID (array index) to global doc ID (stored value)
  std::vector<uint64_t> doc_ids_;

  std::array<std::variant<std::vector<int>,
                          std::unordered_map<std::string, std::vector<int>>>,
             static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE) + 1>
      persist_block_offsets_;
  std::array<
      std::variant<std::vector<BlockMeta>,
                   std::unordered_map<std::string, std::vector<BlockMeta>>>,
      static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE) + 1>
      persist_block_metas_;

  std::atomic<uint64_t> doc_id_allocator_{0};
  std::atomic<BlockID> block_id_allocator_{0};

  // wal
  WalFilePtr wal_file_{nullptr};

  bool sealed_{false};

  mutable std::mutex seg_mtx_;

  // segment column lock
  mutable std::shared_mutex seg_col_mtx_;

  bool need_destroyed_{false};

  // For performance tuning
  std::vector<std::shared_ptr<arrow::ChunkedArray>> persist_chunk_arrays_;
  std::vector<uint64_t> chunk_offsets_;
  std::unordered_map<std::string, int> col_idx_map_;
  bool use_fetch_perf_{false};

  // Inner classes
  class CombinedRecordBatchReader;
};

class SegmentImpl::CombinedRecordBatchReader : public arrow::RecordBatchReader {
 public:
  CombinedRecordBatchReader(
      std::shared_ptr<const SegmentImpl> segment,
      std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers,
      const std::vector<std::string> &columns);

  ~CombinedRecordBatchReader();

  std::shared_ptr<arrow::Schema> schema() const override;

  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override;

 private:
  std::shared_ptr<const SegmentImpl> segment_;
  std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers_;
  std::vector<uint64_t> offsets_;
  std::shared_ptr<arrow::Schema> projected_schema_;
  bool need_local_doc_id_ = false;
  size_t current_reader_index_;
  size_t local_doc_id_;
  int local_doc_id_col_index_ = -1;
};

////////////////////////////////////////////////////////////////////////////////////
// SegmentImpl implementation
////////////////////////////////////////////////////////////////////////////////////

bool SegmentImpl::SegmentIndexFilter::is_filtered(uint64_t id) const {
  auto impl = impl_.lock();
  if (!impl) return false;
  auto result = impl->get_global_doc_id(id);
  if (!result.has_value()) {
    return false;
  }
  uint64_t doc_id = result.value();
  if (delete_store_ && delete_store_->is_deleted(doc_id)) {
    return true;
  }
  return false;
}

Status SegmentImpl::Open(const SegmentOptions &options) {
  options_ = options;
  options_.enable_mmap_ = version_manager_->get_current_version().enable_mmap();

  filter_ =
      std::make_shared<SegmentIndexFilter>(delete_store_, shared_from_this());

  // load persist forward blocks
  auto s = load_persist_scalar_blocks();
  CHECK_RETURN_STATUS(s);

  // load scalar indexes
  s = load_scalar_index_blocks();
  CHECK_RETURN_STATUS(s);

  // load vector indexes
  s = load_vector_index_blocks();
  CHECK_RETURN_STATUS(s);

  auto writing_block = segment_meta_->writing_forward_block();
  if (!writing_block.has_value() && !options_.read_only_) {
    return Status::InternalError(
        "No writing block found when in writing mode.");
  }

  if (writing_block.has_value()) {
    // init doc_id_allocator and block_id_allocator
    doc_id_allocator_ = writing_block.value().min_doc_id();
    BlockID max_block_id{writing_block.value().id()};
    for (auto &block : segment_meta_->persisted_blocks()) {
      max_block_id = std::max(max_block_id, block.id());
    }
    block_id_allocator_ = max_block_id + 1;

    // recover writing block
    s = recover();
    CHECK_RETURN_STATUS(s);
  } else {
    // Update block_id_allocator_
    BlockID max_block_id{0};
    auto &persist_blocks = segment_meta_->persisted_blocks();
    for (const auto &block : persist_blocks) {
      max_block_id = std::max(max_block_id, block.id());
    }
    block_id_allocator_.store(max_block_id + 1);
  }

  fresh_persist_block_offset();

  fresh_persist_chunked_array();

  return Status::OK();
}

Status SegmentImpl::Create(const SegmentOptions &options, uint64_t min_doc_id) {
  options_ = options;
  filter_ =
      std::make_shared<SegmentIndexFilter>(delete_store_, shared_from_this());

  // init memory forward block
  auto block_id = allocate_block_id();
  std::vector<std::string> columns{GLOBAL_DOC_ID, USER_ID};
  std::vector<std::string> schema_forward_fields =
      collection_schema_->forward_field_names();
  columns.insert(columns.end(), schema_forward_fields.begin(),
                 schema_forward_fields.end());

  segment_meta_->set_writing_forward_block(
      {block_id, BlockType::SCALAR, min_doc_id, min_doc_id, 0, columns});
  auto vector_fields = collection_schema_->vector_fields();
  for (auto &field : vector_fields) {
    if (field->index_params()->type() == IndexType::FLAT) {
      segment_meta_->add_indexed_vector_field(field->name());
    }
  }
  auto s = load_scalar_index_blocks(true);
  CHECK_RETURN_STATUS(s);

  doc_id_allocator_.store(min_doc_id);

  return Status::OK();
}

Status SegmentImpl::close() {
  flush();
  if (invert_indexers_) {
    invert_indexers_.reset();
  }
  for (const auto &[name, indexers] : vector_indexers_) {
    for (auto indexer : indexers) {
      indexer->Close();
    }
  }
  vector_indexers_.clear();
  for (auto [name, indexer] : memory_vector_indexers_) {
    indexer->Close();
  }
  memory_vector_indexers_.clear();

  return Status::OK();
}

SegmentID SegmentImpl::id() const {
  return segment_meta_->id();
}

SegmentMeta::Ptr SegmentImpl::meta() const {
  return segment_meta_;
}

uint64_t SegmentImpl::doc_count(const IndexFilter::Ptr filter) {
  uint64_t doc_count = doc_ids_.size();
  if (filter) {
    for (const auto &doc_id : doc_ids_) {
      if (filter->is_filtered(doc_id)) {
        doc_count--;
      }
    }
  }

  return doc_count;
}

template <typename T>
struct is_vector : std::false_type {};

template <typename T, typename A>
struct is_vector<std::vector<T, A>> : std::true_type {};

template <typename ValueType>
Status SegmentImpl::InsertScalar(InvertedColumnIndexer::Ptr &indexer,
                                 const Doc &doc,
                                 const FieldSchema::Ptr &field) {
  auto value = doc.get<ValueType>(field->name());
  auto segment_doc_id = doc_ids_.size();
  if (value.has_value()) {
    if constexpr (std::is_same_v<ValueType, std::vector<bool>>) {
      return indexer->insert(segment_doc_id, value.value());
    } else if constexpr (std::is_same_v<ValueType, std::vector<std::string>>) {
      return indexer->insert(segment_doc_id, value.value());
    } else if constexpr (is_vector<ValueType>::value) {
      const auto &vec = value.value();
      std::string value_str(
          reinterpret_cast<const char *>(vec.data()),
          vec.size() * sizeof(typename ValueType::value_type));
      return indexer->insert(segment_doc_id, value_str);
    } else if constexpr (std::is_same_v<ValueType, std::string>) {
      const ValueType &val = value.value();
      return indexer->insert(segment_doc_id, val);
    } else if constexpr (std::is_same_v<ValueType, bool>) {
      const ValueType &val = value.value();
      return indexer->insert(segment_doc_id, val);
    } else {
      const ValueType &val = value.value();
      std::string value_str(reinterpret_cast<const char *>(&val),
                            sizeof(ValueType));
      return indexer->insert(segment_doc_id, value_str);
    }
  } else {
    return indexer->insert_null(segment_doc_id);
  }
  return Status::OK();
}

template <typename ValueType>
Status SegmentImpl::InsertVector(VectorColumnIndexer::Ptr &indexer,
                                 const Doc &doc,
                                 const FieldSchema::Ptr &field) {
  auto value = doc.get<ValueType>(field->name());
  if (value.has_value()) {
    vector_column_params::VectorData vector_data;
    if constexpr (std::is_same_v<ValueType,
                                 std::pair<std::vector<uint32_t>,
                                           std::vector<float16_t>>>) {
      const std::vector<uint32_t> &sparse_indices = value.value().first;
      const std::vector<float16_t> &sparse_value = value.value().second;
      vector_data.vector = vector_column_params::SparseVector{
          (uint32_t)sparse_indices.size(), (void *)sparse_indices.data(),
          (void *)sparse_value.data()};
    } else if constexpr (std::is_same_v<ValueType,
                                        std::pair<std::vector<uint32_t>,
                                                  std::vector<float>>>) {
      const std::vector<uint32_t> &sparse_indices = value.value().first;
      const std::vector<float> &sparse_value = value.value().second;
      vector_data.vector = vector_column_params::SparseVector{
          (uint32_t)sparse_indices.size(), (void *)sparse_indices.data(),
          (void *)sparse_value.data()};
    } else {
      vector_data.vector =
          vector_column_params::DenseVector{.data = value.value().data()};
    }

    auto &mem_block_meta = segment_meta_->writing_forward_block().value();
    auto &block_doc_id = mem_block_meta.doc_count_;

    return indexer->Insert(vector_data, block_doc_id);
  } else {
    LOG_WARN("Field %s not found or is null for doc: %s", field->name().c_str(),
             doc.to_detail_string().c_str());
  }
  return Status::OK();
}

Status SegmentImpl::insert_scalar_indexer(Doc &doc) {
  for (const auto &field : collection_schema_->forward_fields()) {
    auto index_type = field->index_type();
    if (index_type != IndexType::INVERT) {
      continue;
    }
    auto indexer = get_scalar_indexer(field->name());
    if (!indexer) {
      return Status::InternalError("Field ", field->name(), " indexer is null");
    }
    Status status;
    auto data_type = field->data_type();
    switch (field->data_type()) {
      case DataType::BINARY: {
        status = InsertScalar<std::string>(indexer, doc, field);
        break;
      }
      case DataType::STRING: {
        status = InsertScalar<std::string>(indexer, doc, field);
        break;
      }
      case DataType::BOOL:
        status = InsertScalar<bool>(indexer, doc, field);
        break;
      case DataType::INT32:
        status = InsertScalar<int32_t>(indexer, doc, field);
        break;
      case DataType::INT64:
        status = InsertScalar<int64_t>(indexer, doc, field);
        break;
      case DataType::UINT32:
        status = InsertScalar<uint32_t>(indexer, doc, field);
        break;
      case DataType::UINT64:
        status = InsertScalar<uint64_t>(indexer, doc, field);
        break;
      case DataType::FLOAT:
        status = InsertScalar<float>(indexer, doc, field);
        break;
      case DataType::DOUBLE:
        status = InsertScalar<double>(indexer, doc, field);
        break;
      case DataType::ARRAY_BINARY:
        status = InsertScalar<std::vector<std::string>>(indexer, doc, field);
        break;
      case DataType::ARRAY_STRING:
        status = InsertScalar<std::vector<std::string>>(indexer, doc, field);
        break;
      case DataType::ARRAY_BOOL:
        status = InsertScalar<std::vector<bool>>(indexer, doc, field);
        break;
      case DataType::ARRAY_INT32:
        status = InsertScalar<std::vector<int32_t>>(indexer, doc, field);
        break;
      case DataType::ARRAY_INT64:
        status = InsertScalar<std::vector<int64_t>>(indexer, doc, field);
        break;
      case DataType::ARRAY_UINT32:
        status = InsertScalar<std::vector<uint32_t>>(indexer, doc, field);
        break;
      case DataType::ARRAY_UINT64:
        status = InsertScalar<std::vector<uint64_t>>(indexer, doc, field);
        break;
      case DataType::ARRAY_FLOAT:
        status = InsertScalar<std::vector<float>>(indexer, doc, field);
        break;
      case DataType::ARRAY_DOUBLE:
        status = InsertScalar<std::vector<double>>(indexer, doc, field);
        break;
      default:
        status = Status::InternalError("unsupport data type ",
                                       DataTypeCodeBook::AsString(data_type));
    }
    if (!status.ok()) {
      LOG_ERROR("insert scalar failed[%s]", status.message().c_str());
      return status;
    }
  }
  return Status::OK();
}

Status SegmentImpl::insert_vector_indexer(Doc &doc) {
  for (const auto &field : collection_schema_->vector_fields()) {
    std::vector<VectorColumnIndexer::Ptr> indexers;
    auto m_indexer = get_memory_vector_indexer(field->name());
    if (!m_indexer) {
      LOG_ERROR("vector indexer not found for field %s", field->name().c_str());
      return Status::InternalError("vector indexer not found for field: ",
                                   field->name());
    }
    indexers.push_back(m_indexer);
    auto vector_index_params =
        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());
    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
      m_indexer = get_memory_quant_vector_indexer(field->name());
      if (!m_indexer) {
        LOG_ERROR("quant vector indexer not found for field %s",
                  field->name().c_str());
        return Status::InternalError(
            "quant vector indexer not found for field: ", field->name());
      }
      indexers.push_back(m_indexer);
    }

    for (auto indexer : indexers) {
      Status status;
      auto data_type = field->data_type();
      switch (data_type) {
        case DataType::VECTOR_BINARY32:
          status = InsertVector<std::vector<uint32_t>>(indexer, doc, field);
          break;
        case DataType::VECTOR_BINARY64:
          status = InsertVector<std::vector<uint64_t>>(indexer, doc, field);
          break;
        case DataType::VECTOR_FP16:
          status = InsertVector<std::vector<float16_t>>(indexer, doc, field);
          break;
        case DataType::VECTOR_FP32:
          status = InsertVector<std::vector<float>>(indexer, doc, field);
          break;
        case DataType::VECTOR_FP64:
          status = InsertVector<std::vector<double>>(indexer, doc, field);
          break;
        // case DataType::VECTOR_INT4:
        //   status = InsertVector<std::vector<int8_t>>(indexer, doc, field);
        //   break;
        case DataType::VECTOR_INT8:
          status = InsertVector<std::vector<int8_t>>(indexer, doc, field);
          break;
        case DataType::VECTOR_INT16:
          status = InsertVector<std::vector<int16_t>>(indexer, doc, field);
          break;
        case DataType::SPARSE_VECTOR_FP16:
          status = InsertVector<
              std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(
              indexer, doc, field);
          break;
        case DataType::SPARSE_VECTOR_FP32:
          status = InsertVector<
              std::pair<std::vector<uint32_t>, std::vector<float>>>(indexer,
                                                                    doc, field);
          break;
        default:
          status = Status::InvalidArgument(
              "unsupport data type", DataTypeCodeBook::AsString(data_type));
      }
      if (!status.ok()) {
        LOG_ERROR("insert vector failed[%s]", status.message().c_str());
        return status;
      }
    }
  }
  return Status::OK();
}

Status SegmentImpl::internal_insert(Doc &doc) {
  uint64_t g_doc_id = doc_id_allocator_.fetch_add(1);
  doc.set_doc_id(g_doc_id);

  if (ready_for_dump_block()) {
    auto s = flush();
    CHECK_RETURN_STATUS(s);
  }

  // init writing memory components
  if (!memory_store_) {
    auto s = init_memory_components();
    CHECK_RETURN_STATUS(s);
  }

  // write idmap
  auto s = id_map_->upsert(doc.pk(), g_doc_id);
  CHECK_RETURN_STATUS(s);

  // write forward
  s = memory_store_->insert(doc);
  CHECK_RETURN_STATUS(s);

  // write scalar index
  s = insert_scalar_indexer(doc);
  if (!s.ok() && s.code() != StatusCode::ALREADY_EXISTS) {
    return s;
  }
  // write vector index
  s = insert_vector_indexer(doc);
  if (!s.ok() && s != Status::AlreadyExists()) {
    return s;
  }

  auto &mem_block = segment_meta_->writing_forward_block().value();
  mem_block.max_doc_id_ = g_doc_id;
  mem_block.doc_count_ = mem_block.doc_count_ + 1;

  doc_ids_.push_back(g_doc_id);

  return Status::OK();
}

Status SegmentImpl::internal_update(Doc &doc) {
  delete_store_->mark_deleted(doc.doc_id());
  return internal_insert(doc);
}

Status SegmentImpl::internal_upsert(Doc &doc) {
  uint64_t g_doc_id;
  bool exist = id_map_->has(doc.pk(), &g_doc_id);
  if (exist) {
    delete_store_->mark_deleted(g_doc_id);
  }
  return internal_insert(doc);
}

Status SegmentImpl::internal_delete(const Doc &doc) {
  delete_store_->mark_deleted(doc.doc_id());
  id_map_->remove(doc.pk());
  return Status::OK();
}

Status SegmentImpl::Insert(Doc &doc) {
  std::lock_guard lock(seg_mtx_);

  if (id_map_ && id_map_->has(doc.pk())) {
    return Status::AlreadyExists("insert failed: doc_id[", doc.pk(),
                                 "] already exists in collection");
  }

  doc.set_operator(Operator::INSERT);

  // append wal
  auto s = append_wal(doc);
  CHECK_RETURN_STATUS(s);

  return internal_insert(doc);
}

Status SegmentImpl::Update(Doc &doc) {
  std::lock_guard lock(seg_mtx_);
  uint64_t g_doc_id;
  if (!id_map_->has(doc.pk(), &g_doc_id)) {
    return Status::NotFound("update failed: doc_id[", doc.pk(),
                            "] not found in collection");
  }

  doc.set_doc_id(g_doc_id);
  doc.set_operator(Operator::UPDATE);

  // append wal
  auto s = append_wal(doc);
  CHECK_RETURN_STATUS(s);

  return internal_update(doc);
}

Status SegmentImpl::Upsert(Doc &doc) {
  std::lock_guard lock(seg_mtx_);

  doc.set_operator(Operator::UPSERT);

  // append wal
  auto s = append_wal(doc);
  CHECK_RETURN_STATUS(s);

  return internal_upsert(doc);
}

Status SegmentImpl::Delete(const std::string &pk) {
  std::lock_guard lock(seg_mtx_);

  uint64_t g_doc_id;
  if (!id_map_->has(pk, &g_doc_id)) {
    return Status::NotFound("primary key: ", pk, " not found");
  }
  if (delete_store_->is_deleted(g_doc_id)) {
    return Status::NotFound("primary key: ", pk, " g_doc_id: ", g_doc_id,
                            " already deleted");
  }

  Doc mutable_doc;
  mutable_doc.set_pk(pk);
  mutable_doc.set_doc_id(g_doc_id);
  mutable_doc.set_operator(Operator::DELETE);

  // append wal
  auto s = append_wal(mutable_doc);
  CHECK_RETURN_STATUS(s);

  return internal_delete(mutable_doc);
}

// Note: Here we have no way to determine if g_doc_id is valid
Status SegmentImpl::Delete(uint64_t g_doc_id) {
  std::lock_guard lock(seg_mtx_);
  if (delete_store_->is_deleted(g_doc_id)) {
    return Status::NotFound("g_doc_id:", g_doc_id, " already deleted");
  }

  Doc mutable_doc;
  mutable_doc.set_doc_id(g_doc_id);
  mutable_doc.set_operator(Operator::DELETE);

  // append wal
  auto s = append_wal(mutable_doc);
  CHECK_RETURN_STATUS(s);
  return internal_delete(mutable_doc);
}

template <typename T>
Status DenseVectorDataConverter(
    const FieldSchema::Ptr &field,
    const vector_column_params::DenseVectorBuffer &buffer, Doc *doc) {
  const T *data_ptr = reinterpret_cast<const T *>(buffer.data.data());
  size_t data_size = buffer.data.size() / sizeof(T);
  std::vector<T> vector_data(data_ptr, data_ptr + data_size);
  doc->set(field->name(), vector_data);
  return Status::OK();
}

template <typename IndexType, typename ValueType>
Status SparseVectorDataConverter(
    const FieldSchema::Ptr &field,
    const vector_column_params::SparseVectorBuffer &buffer, Doc *doc) {
  const IndexType *indices_ptr =
      reinterpret_cast<const IndexType *>(buffer.indices.data());
  size_t indices_size = buffer.indices.size() / sizeof(IndexType);
  std::vector<IndexType> indices_vector(indices_ptr,
                                        indices_ptr + indices_size);

  const ValueType *values_ptr =
      reinterpret_cast<const ValueType *>(buffer.values.data());
  size_t values_size = buffer.values.size() / sizeof(ValueType);
  std::vector<ValueType> values_vector(values_ptr, values_ptr + values_size);

  std::pair<std::vector<IndexType>, std::vector<ValueType>> sparse_vector_pair(
      std::move(indices_vector), std::move(values_vector));
  doc->set(field->name(), sparse_vector_pair);
  return Status::OK();
}


Status SegmentImpl::ConvertVectorDataBufferToDocField(
    const FieldSchema::Ptr &field,
    const vector_column_params::VectorDataBuffer &buf, Doc *doc) {
  Status status;
  if (std::holds_alternative<vector_column_params::DenseVectorBuffer>(
          buf.vector_buffer)) {
    const auto &dense_buffer =
        std::get<vector_column_params::DenseVectorBuffer>(buf.vector_buffer);
    switch (field->data_type()) {
      case DataType::VECTOR_BINARY32: {
        status = DenseVectorDataConverter<uint32_t>(field, dense_buffer, doc);
        break;
      }
      case DataType::VECTOR_BINARY64: {
        status = DenseVectorDataConverter<uint64_t>(field, dense_buffer, doc);
        break;
      }
      case DataType::VECTOR_FP16: {
        status = DenseVectorDataConverter<float16_t>(field, dense_buffer, doc);
        break;
      }
      case DataType::VECTOR_FP32: {
        status = DenseVectorDataConverter<float>(field, dense_buffer, doc);
        break;
      }
      case DataType::VECTOR_FP64: {
        status = DenseVectorDataConverter<double>(field, dense_buffer, doc);
        break;
      }
      // case DataType::VECTOR_INT4: {
      //   status = DenseVectorDataConverter<int8_t>(field, dense_buffer, doc);
      //   break;
      // }
      case DataType::VECTOR_INT8: {
        status = DenseVectorDataConverter<int8_t>(field, dense_buffer, doc);
        break;
      }
      case DataType::VECTOR_INT16: {
        status = DenseVectorDataConverter<int16_t>(field, dense_buffer, doc);
        break;
      }
      default:
        return Status::InvalidArgument(
            "Unsupported dense vector element type: ", field->data_type());
    }
  } else if (std::holds_alternative<vector_column_params::SparseVectorBuffer>(
                 buf.vector_buffer)) {
    const auto &sparse_buffer =
        std::get<vector_column_params::SparseVectorBuffer>(buf.vector_buffer);
    switch (field->data_type()) {
      case DataType::SPARSE_VECTOR_FP16: {
        status = SparseVectorDataConverter<uint32_t, float16_t>(
            field, sparse_buffer, doc);
        break;
      }
      case DataType::SPARSE_VECTOR_FP32: {
        status = SparseVectorDataConverter<uint32_t, float>(field,
                                                            sparse_buffer, doc);
        break;
      }
      default:
        return Status::InvalidArgument(
            "Unsupported sparse vector element type: ", field->data_type());
    }
  } else {
    return Status::InvalidArgument("Unsupported vector buffer type");
  }

  return status;
}


Doc::Ptr SegmentImpl::Fetch(uint64_t g_doc_id) {
  std::lock_guard lock(seg_mtx_);

  if (g_doc_id > segment_meta_->max_doc_id()) {
    LOG_ERROR("g_doc_id[%zu] not exist in segment[%d] ", (size_t)g_doc_id,
              id());
    return nullptr;
  }

  int segment_doc_id = 0;
  auto it = std::lower_bound(doc_ids_.begin(), doc_ids_.end(), g_doc_id);
  if (it != doc_ids_.end() && *it == g_doc_id) {
    segment_doc_id = static_cast<int>(std::distance(doc_ids_.begin(), it));
  } else {
    LOG_ERROR(
        "g_doc_id[%zu] not found in doc_ids_[%zu], min_doc_id[%zu] "
        "max_doc_id[%zu], meta[%s]",
        (size_t)g_doc_id, doc_ids_.size(), (size_t)doc_ids_.front(),
        (size_t)doc_ids_.back(), segment_meta_->to_string_formatted().c_str());
    return nullptr;
  }

  std::vector<std::string> forward_columns;
  forward_columns.push_back(GLOBAL_DOC_ID);
  forward_columns.push_back(USER_ID);
  for (const auto &field : collection_schema_->forward_fields()) {
    forward_columns.push_back(field->name());
  }

  // Build result schema
  std::vector<std::shared_ptr<arrow::Field>> fields;
  for (size_t i = 0; i < forward_columns.size(); ++i) {
    const auto &col = forward_columns[i];
    if (col == GLOBAL_DOC_ID) {
      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64()));
    } else if (col == USER_ID) {
      fields.push_back(arrow::field(USER_ID, arrow::utf8()));
    } else {
      auto *field = collection_schema_->get_field(col);
      std::shared_ptr<arrow::Field> arrow_field;
      auto status = ConvertFieldSchemaToArrowField(field, &arrow_field);
      if (!status.ok()) {
        LOG_ERROR("Convert field schema failed: %s",
                  field->to_string().c_str());
        return nullptr;
      }
      fields.push_back(std::move(arrow_field));
    }
  }
  auto result_schema = std::make_shared<arrow::Schema>(fields);

  // fetch forward columns
  auto exec_batch = fetch(forward_columns, segment_doc_id);
  if (!exec_batch) {
    LOG_ERROR("Fetch failed, doc_id: %zu", (size_t)g_doc_id);
    return nullptr;
  }
  if (exec_batch->length != 1) {
    LOG_ERROR("Fetch failed, doc_id: %zu, num_rows: %zu != 1", (size_t)g_doc_id,
              (size_t)exec_batch->length);
    return nullptr;
  }

  if (exec_batch->num_values() != (int)forward_columns.size()) {
    LOG_ERROR("table column size error, expect %zu, actual %d",
              forward_columns.size(), exec_batch->num_values());
    return nullptr;
  }

  auto doc = std::make_shared<Doc>();

  // column 0 is the global doc_id
  if (auto doc_id_scalar = std::static_pointer_cast<arrow::Int64Scalar>(
          (*exec_batch)[0].scalar())) {
    doc->set_doc_id(doc_id_scalar->value);
  } else {
    LOG_ERROR("Global doc id scalar is not of int64 type");
    return nullptr;
  }

  // column 1 is the uid(pk)
  if (auto str_scalar = std::dynamic_pointer_cast<arrow::StringScalar>(
          (*exec_batch)[1].scalar())) {
    doc->set_pk(std::string(str_scalar->view()));
  } else {
    LOG_ERROR("Primary key scalar is not of string type");
    return nullptr;
  }

  // other forward columns
  for (int col_idx = 2; col_idx < exec_batch->num_values(); ++col_idx) {
    auto column_name = forward_columns[col_idx];
    auto column = result_schema->GetFieldByName(column_name);
    auto &column_scalar = (*exec_batch)[col_idx].scalar();
    if (column_scalar == nullptr || column_scalar->is_valid == false) {
      continue;
    }
    switch (column->type()->id()) {
      case arrow::Type::STRING: {
        auto str_scalar =
            std::dynamic_pointer_cast<arrow::StringScalar>(column_scalar);
        doc->set(column_name, std::string(str_scalar->view()));
        break;
      }
      case arrow::Type::INT32: {
        auto int32_scalar =
            std::dynamic_pointer_cast<arrow::Int32Scalar>(column_scalar);
        doc->set(column_name, int32_scalar->value);
        break;
      }
      case arrow::Type::INT64: {
        auto int64_scalar =
            std::dynamic_pointer_cast<arrow::Int64Scalar>(column_scalar);
        doc->set(column_name, int64_scalar->value);
        break;
      }
      case arrow::Type::UINT32: {
        auto uint32_scalar =
            std::dynamic_pointer_cast<arrow::UInt32Scalar>(column_scalar);
        doc->set(column_name, uint32_scalar->value);
        break;
      }
      case arrow::Type::UINT64: {
        auto uint64_scalar =
            std::dynamic_pointer_cast<arrow::UInt64Scalar>(column_scalar);
        doc->set(column_name, uint64_scalar->value);
        break;
      }
      case arrow::Type::DOUBLE: {
        auto double_scalar =
            std::dynamic_pointer_cast<arrow::DoubleScalar>(column_scalar);
        doc->set(column_name, double_scalar->value);
        break;
      }
      case arrow::Type::FLOAT: {
        auto float_scalar =
            std::dynamic_pointer_cast<arrow::FloatScalar>(column_scalar);
        doc->set(column_name, float_scalar->value);
        break;
      }
      case arrow::Type::BOOL: {
        auto bool_scalar =
            std::dynamic_pointer_cast<arrow::BooleanScalar>(column_scalar);
        doc->set(column_name, bool_scalar->value);
        break;
      }
      case arrow::Type::BINARY: {
        auto binary_scalar =
            std::dynamic_pointer_cast<arrow::BinaryScalar>(column_scalar);
        doc->set(column_name, std::string(binary_scalar->view()));
        break;
      }
      case arrow::Type::LIST: {
        auto list_scalar =
            std::dynamic_pointer_cast<arrow::ListScalar>(column_scalar);
        if (list_scalar && list_scalar->value) {
          auto list_type =
              std::dynamic_pointer_cast<arrow::ListType>(column->type());
          if (list_type) {
            auto value_type = list_type->value_type();
            switch (value_type->id()) {
              case arrow::Type::BOOL: {
                std::vector<bool> values;
                auto array = std::dynamic_pointer_cast<arrow::BooleanArray>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    } else {
                      LOG_ERROR(
                          "Invalid arrow::boolean array value at index %zu",
                          (size_t)i);
                      continue;
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::INT32: {
                std::vector<int32_t> values;
                auto array = std::dynamic_pointer_cast<arrow::Int32Array>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::INT64: {
                std::vector<int64_t> values;
                auto array = std::dynamic_pointer_cast<arrow::Int64Array>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::UINT32: {
                std::vector<uint32_t> values;
                auto array = std::dynamic_pointer_cast<arrow::UInt32Array>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::UINT64: {
                std::vector<uint64_t> values;
                auto array = std::dynamic_pointer_cast<arrow::UInt64Array>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::FLOAT: {
                std::vector<float> values;
                auto array = std::dynamic_pointer_cast<arrow::FloatArray>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::DOUBLE: {
                std::vector<double> values;
                auto array = std::dynamic_pointer_cast<arrow::DoubleArray>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->Value(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::STRING: {
                std::vector<std::string> values;
                auto array = std::dynamic_pointer_cast<arrow::StringArray>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->GetString(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              case arrow::Type::BINARY: {
                std::vector<std::string> values;
                auto array = std::dynamic_pointer_cast<arrow::BinaryArray>(
                    list_scalar->value);
                if (array) {
                  values.reserve(array->length());
                  for (int64_t i = 0; i < array->length(); ++i) {
                    if (array->IsValid(i)) {
                      values.push_back(array->GetString(i));
                    }
                  }
                  doc->set(column_name, values);
                }
                break;
              }
              default:
                LOG_WARN("Unsupported list element type: %s",
                         value_type->ToString().c_str());
                break;
            }
          }
        }
        break;
      }
      default:
        LOG_ERROR("Unsupported type: %s", column_name.c_str());
        break;
    }
  }

  // fetch vector
  for (const auto &field : collection_schema_->vector_fields()) {
    int block_idx = find_persist_block_id(BlockType::VECTOR_INDEX,
                                          segment_doc_id, field->name());
    if (block_idx != -1) {
      const auto &block_offsets =
          get_persist_block_offsets(BlockType::VECTOR_INDEX, field->name());
      auto block_offset = block_offsets[block_idx];
      auto local_row = segment_doc_id - block_offset;

      auto column_name = field->name();
      auto iter = vector_indexers_.find(column_name);
      if (iter != vector_indexers_.end()) {
        const auto &vector_indexers = iter->second;
        if (block_idx >= (int)vector_indexers.size()) {
          LOG_ERROR("block_idx[%d] out of range[%lu]", block_idx,
                    vector_indexers.size());
          continue;
        }
        auto vector_indexer = vector_indexers[block_idx];
        auto fetch_result = vector_indexer->Fetch(local_row);
        if (!fetch_result) {
          LOG_ERROR(
              "vector indexer fetch failed, local_row: %d, block_idx: %d, "
              "segment_doc_id: %d",
              local_row, block_idx, segment_doc_id);
          return nullptr;
        }
        const auto &vector_buffer = fetch_result.value();
        auto status =
            ConvertVectorDataBufferToDocField(field, vector_buffer, doc.get());
        if (!status.ok()) {
          LOG_ERROR("convert vector data buffer to doc field failed %s",
                    status.message().c_str());
        }
      }

    } else {
      if (segment_meta_->has_writing_forward_block()) {
        const auto &p_block_offsets =
            get_persist_block_offsets(BlockType::VECTOR_INDEX, field->name());
        const auto &p_block_metas =
            get_persist_block_metas(BlockType::VECTOR_INDEX, field->name());
        auto mem_block_offset =
            p_block_offsets.empty()
                ? 0
                : p_block_offsets.back() + p_block_metas.back().doc_count_;
        int local_row = segment_doc_id - mem_block_offset;
        auto column_name = field->name();
        auto iter = memory_vector_indexers_.find(column_name);
        if (iter != memory_vector_indexers_.end()) {
          auto vector_indexer = iter->second;
          auto fetch_result = vector_indexer->Fetch(local_row);
          if (!fetch_result.has_value()) {
            LOG_ERROR(
                "vector indexer fetch failed, column: %s, doc_count: %lu, "
                "mem_block_offset: %d, local_row: %d",
                field->name().c_str(), vector_indexer->doc_count(),
                mem_block_offset, local_row);
            continue;
          }
          const auto &vector_buffer = fetch_result.value();
          auto status = ConvertVectorDataBufferToDocField(field, vector_buffer,
                                                          doc.get());
          if (!status.ok()) {
            LOG_ERROR("convert vector data buffer to doc field failed %s",
                      status.message().c_str());
          }
        }
      } else {
        LOG_ERROR("Can't find vector block for g_doc_id: %zu",
                  (size_t)g_doc_id);
      }
    }
  }

  return doc;
}

CombinedVectorColumnIndexer::Ptr SegmentImpl::get_combined_vector_indexer(
    const std::string &field_name) const {
  std::vector<VectorColumnIndexer::Ptr> indexers;
  auto iter = vector_indexers_.find(field_name);
  if (iter != vector_indexers_.end()) {
    indexers = iter->second;
  }
  auto m_iter = memory_vector_indexers_.find(field_name);
  if (m_iter != memory_vector_indexers_.end()) {
    indexers.push_back(m_iter->second);
  }

  auto field = collection_schema_->get_field(field_name);
  auto vector_index_params =
      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());
  MetricType metric_type = vector_index_params->metric_type();
  auto blocks = get_persist_block_metas(BlockType::VECTOR_INDEX, field_name);

  auto normal_indexers = indexers;
  return std::make_shared<CombinedVectorColumnIndexer>(
      indexers, normal_indexers, *field, *segment_meta_, std::move(blocks),
      metric_type);
}

CombinedVectorColumnIndexer::Ptr SegmentImpl::get_quant_combined_vector_indexer(
    const std::string &field_name) const {
  std::vector<VectorColumnIndexer::Ptr> indexers;
  auto iter = quant_vector_indexers_.find(field_name);
  if (iter != quant_vector_indexers_.end()) {
    indexers = iter->second;
  }
  auto m_iter = quant_memory_vector_indexers_.find(field_name);
  if (m_iter != quant_memory_vector_indexers_.end()) {
    indexers.push_back(m_iter->second);
  }

  std::vector<VectorColumnIndexer::Ptr> normal_indexers;
  iter = vector_indexers_.find(field_name);
  if (iter != vector_indexers_.end()) {
    normal_indexers = iter->second;
  }
  m_iter = memory_vector_indexers_.find(field_name);
  if (m_iter != memory_vector_indexers_.end()) {
    normal_indexers.push_back(m_iter->second);
  }

  auto field = collection_schema_->get_field(field_name);
  auto vector_index_params =
      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());
  MetricType metric_type = vector_index_params->metric_type();
  auto blocks =
      get_persist_block_metas(BlockType::VECTOR_INDEX_QUANTIZE, field_name);

  return std::make_shared<CombinedVectorColumnIndexer>(
      indexers, normal_indexers, *field, *segment_meta_, std::move(blocks),
      metric_type, true);
}

VectorColumnIndexer::Ptr SegmentImpl::get_memory_vector_indexer(
    const std::string &field_name) {
  auto iter = memory_vector_indexers_.find(field_name);
  if (iter != memory_vector_indexers_.end()) {
    return iter->second;
  }
  return nullptr;
}

VectorColumnIndexer::Ptr SegmentImpl::get_memory_quant_vector_indexer(
    const std::string &field_name) {
  auto iter = quant_memory_vector_indexers_.find(field_name);
  if (iter != quant_memory_vector_indexers_.end()) {
    return iter->second;
  }
  return nullptr;
}

std::vector<VectorColumnIndexer::Ptr> SegmentImpl::get_vector_indexer(
    const std::string &field_name) const {
  auto iter = vector_indexers_.find(field_name);
  if (iter != vector_indexers_.end()) {
    return iter->second;
  }
  return std::vector<VectorColumnIndexer::Ptr>();
}

std::vector<VectorColumnIndexer::Ptr> SegmentImpl::get_quant_vector_indexer(
    const std::string &field_name) const {
  std::vector<VectorColumnIndexer::Ptr> col_indexers;
  auto iter = quant_vector_indexers_.find(field_name);
  if (iter != quant_vector_indexers_.end()) {
    return iter->second;
  }
  return std::vector<VectorColumnIndexer::Ptr>();
}

InvertedColumnIndexer::Ptr SegmentImpl::get_scalar_indexer(
    const std::string &field_name) const {
  if (invert_indexers_) {
    return (*invert_indexers_)[field_name];
  }
  return nullptr;
}

const IndexFilter::Ptr SegmentImpl::get_filter() {
  return delete_store_->empty() ? nullptr : filter_;
}

Status SegmentImpl::create_all_vector_index(
    int concurrency, SegmentMeta::Ptr *segment_meta,
    std::unordered_map<std::string, VectorColumnIndexer::Ptr> *vector_indexers,
    std::unordered_map<std::string, VectorColumnIndexer::Ptr>
        *quant_vector_indexers) {
  const auto &vector_fields = collection_schema_->vector_fields();

  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);
  new_segment_meta->remove_writing_forward_block();

  std::set<std::string> vector_field_names;
  for (const auto &field : vector_fields) {
    auto s = create_vector_index(field->name(), field->index_params(),
                                 concurrency, &new_segment_meta,
                                 vector_indexers, quant_vector_indexers);
    CHECK_RETURN_STATUS(s);
    vector_field_names.insert(field->name());
  }

  new_segment_meta->set_indexed_vector_fields(vector_field_names);
  *segment_meta = new_segment_meta;

  return Status::OK();
}

Result<VectorColumnIndexer::Ptr> SegmentImpl::merge_vector_indexer(
    const std::string &index_file_path, const std::string &column,
    const FieldSchema &field, int concurrency) {
  VectorColumnIndexer::Ptr vector_indexer =
      std::make_shared<VectorColumnIndexer>(index_file_path, field);

  vector_column_params::ReadOptions options{options_.enable_mmap_, true};

  auto s = vector_indexer->Open(options);
  CHECK_RETURN_STATUS_EXPECTED(s);
  std::vector<VectorColumnIndexer::Ptr> to_merge_indexers =
      vector_indexers_[column];
  vector_column_params::MergeOptions merge_options;
  if (concurrency == 0) {
    merge_options.pool = GlobalResource::Instance().optimize_thread_pool();
    merge_options.write_concurrency =
        GlobalConfig::Instance().optimize_thread_count();
  } else {
    merge_options.write_concurrency = concurrency;
  }
  s = vector_indexer->Merge(to_merge_indexers, filter_, merge_options);
  CHECK_RETURN_STATUS_EXPECTED(s);
  s = vector_indexer->Flush();
  CHECK_RETURN_STATUS_EXPECTED(s);

  return vector_indexer;
}

Status SegmentImpl::create_vector_index(
    const std::string &column, const IndexParams::Ptr &index_params,
    int concurrency, SegmentMeta::Ptr *segment_meta,
    std::unordered_map<std::string, VectorColumnIndexer::Ptr> *vector_indexers,
    std::unordered_map<std::string, VectorColumnIndexer::Ptr>
        *quant_vector_indexers) {
  auto field = collection_schema_->get_vector_field(column);
  SegmentMeta::Ptr new_segment_meta;
  if (*segment_meta == nullptr) {
    new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);
    new_segment_meta->remove_writing_forward_block();
  } else {
    new_segment_meta = *segment_meta;
  }

  if (segment_meta_->vector_indexed(column) &&
      *field->index_params() == *index_params) {
    // if segment is already indexed and index params are same, skip create
    *segment_meta = new_segment_meta;
    return Status::OK();
  }
  new_segment_meta->add_indexed_vector_field(column);

  auto vector_index_params =
      std::dynamic_pointer_cast<VectorIndexParams>(index_params);

  if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {
    auto block_id = allocate_block_id();

    auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);
    field_with_new_index_params->set_index_params(index_params);

    std::string index_file_path = FileHelper::MakeVectorIndexPath(
        path_, column, segment_meta_->id(), block_id);
    auto vector_indexer = merge_vector_indexer(
        index_file_path, column, *field_with_new_index_params, concurrency);
    if (!vector_indexer.has_value()) {
      return vector_indexer.error();
    }

    vector_indexers->insert({column, vector_indexer.value()});

    new_segment_meta->remove_vector_persisted_block(column);
    BlockMeta block;
    block.set_id(block_id);
    block.set_type(BlockType::VECTOR_INDEX);
    block.set_columns({column});
    block.set_min_doc_id(doc_ids_.front());
    block.set_max_doc_id(doc_ids_.back());
    block.set_doc_count(doc_ids_.size());
    new_segment_meta->add_persisted_block(block);

    *segment_meta = new_segment_meta;

  } else {
    auto original_index_params =
        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());

    core::IndexProvider::Pointer raw_vector_provider;

    if (!(vector_index_params->metric_type() ==
              original_index_params->metric_type() &&
          vector_indexers_[column].size() == 1)) {
      BlockID block_id = allocate_block_id();

      auto field_with_flat = std::make_shared<FieldSchema>(*field);
      field_with_flat->set_index_params(
          MakeDefaultVectorIndexParams(vector_index_params->metric_type()));

      std::string index_file_path = FileHelper::MakeVectorIndexPath(
          path_, column, segment_meta_->id(), block_id);
      auto vector_indexer = merge_vector_indexer(index_file_path, column,
                                                 *field_with_flat, concurrency);
      if (!vector_indexer.has_value()) {
        return vector_indexer.error();
      }

      vector_indexers->insert({column, vector_indexer.value()});

      new_segment_meta->remove_vector_persisted_block(column, false);
      BlockMeta block;
      block.set_id(block_id);
      block.set_type(BlockType::VECTOR_INDEX);
      block.set_columns({column});
      block.set_min_doc_id(meta()->min_doc_id());
      block.set_max_doc_id(meta()->max_doc_id());
      block.set_doc_count(meta()->doc_count());
      new_segment_meta->add_persisted_block(block);
      if (vector_index_params->quantize_type() == QuantizeType::RABITQ) {
        raw_vector_provider = vector_indexer.value()->create_index_provider();
      }
    } else {
      raw_vector_provider =
          vector_indexers_[column][0]->create_index_provider();
    }

    if (vector_index_params->quantize_type() != QuantizeType::RABITQ) {
      auto quant_block_id = allocate_block_id();
      auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);
      field_with_new_index_params->set_index_params(index_params);

      std::string index_file_path = FileHelper::MakeQuantizeVectorIndexPath(
          path_, column, segment_meta_->id(), quant_block_id);
      auto vector_indexer = merge_vector_indexer(
          index_file_path, column, *field_with_new_index_params, concurrency);
      if (!vector_indexer.has_value()) {
        return vector_indexer.error();
      }

      quant_vector_indexers->insert({column, vector_indexer.value()});

      new_segment_meta->remove_vector_persisted_block(column, true);
      BlockMeta block;
      block.set_id(quant_block_id);
      block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);
      block.set_columns({column});
      block.set_min_doc_id(meta()->min_doc_id());
      block.set_max_doc_id(meta()->max_doc_id());
      block.set_doc_count(meta()->doc_count());
      new_segment_meta->add_persisted_block(block);
    } else {
#if !RABITQ_SUPPORTED
      return Status::NotSupported(
          "RabitQ is not supported on this platform (Linux x86_64 only)");
#else
      // rabitq
      auto rabitq_params = std::dynamic_pointer_cast<HnswRabitqIndexParams>(
          vector_index_params->clone());
      if (!rabitq_params) {
        return Status::InternalError("Expect HnswRabitqIndexParams");
      }
      // train rabitq converter
      auto converter = core::IndexFactory::CreateConverter("RabitqConverter");
      if (!converter) {
        return Status::NotSupported("RabitqConverter not found");
      }
      core::IndexMeta index_meta;
      index_meta.set_meta(
          ProximaEngineHelper::convert_to_engine_data_type(field->data_type())
              .value(),
          // use field dimension
          field->dimension());
      index_meta.set_metric(
          core_interface::Index::get_metric_name(
              ProximaEngineHelper::convert_to_engine_metric_type(
                  vector_index_params->metric_type())
                  .value(),
              false),
          0, ailego::Params{});
      ailego::Params converter_params;
      converter_params.set(core::PARAM_RABITQ_TOTAL_BITS,
                           rabitq_params->total_bits());
      converter_params.set(core::PARAM_RABITQ_NUM_CLUSTERS,
                           rabitq_params->num_clusters());
      converter_params.set(core::PARAM_RABITQ_SAMPLE_COUNT,
                           rabitq_params->sample_count());
      if (int ret = converter->init(index_meta, converter_params); ret != 0) {
        return Status::InternalError("Failed to init rabitq converter:", ret);
      }
      if (int ret = converter->train(raw_vector_provider); ret != 0) {
        return Status::InternalError("Failed to train rabitq converter:", ret);
      }
      core::IndexReformer::Pointer reformer;
      if (int ret = converter->to_reformer(&reformer); ret != 0) {
        return Status::InternalError("Failed to to get rabitq reformer:", ret);
      }
      rabitq_params->set_rabitq_reformer(reformer);
      rabitq_params->set_raw_vector_provider(raw_vector_provider);

      auto quant_block_id = allocate_block_id();
      auto field_with_new_index_params = std::make_shared<FieldSchema>(*field);
      field_with_new_index_params->set_index_params(rabitq_params);

      std::string index_file_path = FileHelper::MakeQuantizeVectorIndexPath(
          path_, column, segment_meta_->id(), quant_block_id);
      auto vector_indexer = merge_vector_indexer(
          index_file_path, column, *field_with_new_index_params, concurrency);
      if (!vector_indexer.has_value()) {
        return vector_indexer.error();
      }

      quant_vector_indexers->insert({column, vector_indexer.value()});

      new_segment_meta->remove_vector_persisted_block(column, true);
      BlockMeta block;
      block.set_id(quant_block_id);
      block.set_type(BlockType::VECTOR_INDEX_QUANTIZE);
      block.set_columns({column});
      block.set_min_doc_id(meta()->min_doc_id());
      block.set_max_doc_id(meta()->max_doc_id());
      block.set_doc_count(meta()->doc_count());
      new_segment_meta->add_persisted_block(block);
#endif
    }

    *segment_meta = new_segment_meta;
  }

  return Status::OK();
}

Status SegmentImpl::drop_vector_index(
    const std::string &column, SegmentMeta::Ptr *segment_meta,
    std::unordered_map<std::string, VectorColumnIndexer::Ptr>
        *vector_indexers) {
  auto field = collection_schema_->get_vector_field(column);
  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);
  new_segment_meta->remove_writing_forward_block();
  new_segment_meta->add_indexed_vector_field(column);

  if (*field->index_params() == DefaultVectorIndexParams) {
    *segment_meta = new_segment_meta;
    return Status::OK();
  }

  auto vector_index_params =
      std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());

  auto block_id = allocate_block_id();

  auto field_with_default_index = std::make_shared<FieldSchema>(*field);
  field_with_default_index->set_index_params(DefaultVectorIndexParams);

  std::string index_file_path = FileHelper::MakeVectorIndexPath(
      path_, column, segment_meta_->id(), block_id);

  auto new_vector_indexer = std::make_shared<VectorColumnIndexer>(
      index_file_path, *field_with_default_index);
  vector_column_params::ReadOptions options{options_.enable_mmap_, true};

  auto s = new_vector_indexer->Open(options);
  CHECK_RETURN_STATUS(s);
  s = new_vector_indexer->Merge(vector_indexers_[column], nullptr);
  CHECK_RETURN_STATUS(s);
  s = new_vector_indexer->Flush();
  CHECK_RETURN_STATUS(s);

  (*vector_indexers)[column] = new_vector_indexer;
  new_segment_meta->remove_vector_persisted_block(
      column, vector_index_params->quantize_type() != QuantizeType::UNDEFINED);

  BlockMeta block;
  block.set_id(block_id);
  block.set_type(BlockType::VECTOR_INDEX);
  block.set_columns({column});
  block.set_min_doc_id(meta()->min_doc_id());
  block.set_max_doc_id(meta()->max_doc_id());
  block.set_doc_count(meta()->doc_count());
  new_segment_meta->add_persisted_block(block);

  *segment_meta = new_segment_meta;

  return Status::OK();
}

Status SegmentImpl::reload_vector_index(
    const CollectionSchema &schema, const SegmentMeta::Ptr &new_segment_meta,
    const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
        &vector_indexers,
    const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
        &quant_vector_indexers) {
  collection_schema_ = std::make_shared<CollectionSchema>(schema);
  segment_meta_ = new_segment_meta;
  fresh_persist_block_offset();

  auto vector_fields = schema.vector_fields();

  for (auto field : vector_fields) {
    auto vector_index_params =
        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());

    if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {
      auto iter = vector_indexers.find(field->name());
      if (iter != vector_indexers.end()) {
        auto indexers = vector_indexers_[field->name()];
        for (auto indexer : indexers) {
          auto s = indexer->Destroy();
          CHECK_RETURN_STATUS(s);
        }
        vector_indexers_[field->name()] = {iter->second};
      }
      auto q_iter = quant_vector_indexers_.find(field->name());
      if (q_iter != quant_vector_indexers_.end()) {
        auto q_indexers = q_iter->second;
        for (auto q_indexer : q_indexers) {
          auto s = q_indexer->Destroy();
          CHECK_RETURN_STATUS(s);
        }
        quant_vector_indexers_.erase(q_iter);
      }
    } else {
      auto iter = vector_indexers.find(field->name());
      if (iter != vector_indexers.end()) {
        auto indexers = vector_indexers_[field->name()];
        for (auto indexer : indexers) {
          auto s = indexer->Destroy();
          CHECK_RETURN_STATUS(s);
        }
        vector_indexers_[field->name()] = {iter->second};
      }
      auto q_iter = quant_vector_indexers.find(field->name());
      if (q_iter != quant_vector_indexers.end()) {
        auto q_indexers = quant_vector_indexers_[field->name()];
        for (auto q_indexer : q_indexers) {
          auto s = q_indexer->Destroy();
          CHECK_RETURN_STATUS(s);
        }
        quant_vector_indexers_[field->name()] = {q_iter->second};
      }
    }
  }

  return Status::OK();
}

bool SegmentImpl::vector_index_ready(
    const std::string &column, const IndexParams::Ptr &index_params) const {
  auto field = collection_schema_->get_vector_field(column);
  return segment_meta_->vector_indexed(column) &&
         *field->index_params() == *index_params;
}

bool SegmentImpl::all_vector_index_ready() const {
  for (const auto &field : collection_schema_->vector_fields()) {
    if (!segment_meta_->vector_indexed(field->name())) {
      return false;
    }
  }
  return true;
}

Status SegmentImpl::create_scalar_index(const std::vector<std::string> &columns,
                                        const IndexParams::Ptr &index_params,
                                        SegmentMeta::Ptr *segment_meta,
                                        InvertedIndexer::Ptr *scalar_indexer) {
  // validate
  std::vector<FieldSchema> fields;
  std::vector<std::string> field_names;

  for (const auto &column : columns) {
    auto field = collection_schema_->get_field(column);
    if (!field || field->is_vector_field()) {
      return Status::InvalidArgument("Invalid column name");
    }

    if (field->index_params() != nullptr &&
        *field->index_params() == *index_params) {
      // if already indexed, just skip it
      continue;
    }

    auto new_field = std::make_shared<FieldSchema>(*field);
    new_field->set_index_params(index_params);

    fields.push_back(*new_field);
    field_names.push_back(new_field->name());
  }

  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);
  if (fields.empty()) {
    *segment_meta = new_segment_meta;
    return Status::OK();
  }

  new_segment_meta->remove_scalar_index_block();

  // create scalar indexer
  // clone original indexer
  auto block_id = allocate_block_id();
  std::string new_invert_index_path =
      FileHelper::MakeInvertIndexPath(path_, id(), block_id);

  Status s;
  InvertedIndexer::Ptr new_scalar_indexer{nullptr};
  if (invert_indexers_) {
    s = invert_indexers_->create_snapshot(new_invert_index_path);
    CHECK_RETURN_STATUS(s);

    auto inverted_fields_ptr = collection_schema_->forward_fields_with_index();
    std::vector<FieldSchema> inverted_fields;
    std::vector<std::string> inverted_field_names;
    for (auto field : inverted_fields_ptr) {
      inverted_fields.push_back(*field);
      inverted_field_names.push_back(field->name());
    }

    new_scalar_indexer = InvertedIndexer::CreateAndOpen(
        collection_schema_->name(), new_invert_index_path, false,
        inverted_fields, false);
    if (!new_scalar_indexer) {
      LOG_ERROR("Failed to create scalar indexer");
      return Status::InternalError("Failed to create scalar indexer");
    }
    for (const auto &field : fields) {
      if (std::find(inverted_field_names.begin(), inverted_field_names.end(),
                    field.name()) != inverted_field_names.end()) {
        s = new_scalar_indexer->remove_column_indexer(field.name());
        CHECK_RETURN_STATUS(s);
      }
      s = new_scalar_indexer->create_column_indexer(field);
      CHECK_RETURN_STATUS(s);
    }
  } else {
    new_scalar_indexer = InvertedIndexer::CreateAndOpen(
        collection_schema_->name(), new_invert_index_path, true, fields, false);
    if (!new_scalar_indexer) {
      LOG_ERROR("Failed to create scalar indexer");
      return Status::InternalError("Failed to create scalar indexer");
    }
  }

  // insert scalar indexer
  auto reader = scan(columns);
  if (reader == nullptr) {
    return Status::InternalError("Failed to create reader");
  }

  int accu_doc_count = 0;
  while (true) {
    auto batch = reader->Next();
    if (!batch.ok()) {
      return Status::InternalError("reader next failed: ",
                                   batch.status().message());
    }

    auto batch_value = batch.ValueOrDie();

    if (!batch_value) {
      break;
    }

    s = SegmentHelper::ReduceScalarIndex(new_scalar_indexer, batch_value,
                                         accu_doc_count);
    if (!s.ok()) {
      LOG_ERROR("Reduce Scalar Index faield, err: %s", s.message().c_str());
    }
    CHECK_RETURN_STATUS(s);

    accu_doc_count += batch_value->num_rows();
  }

  s = new_scalar_indexer->seal();
  CHECK_RETURN_STATUS(s);

  BlockMeta block;
  block.set_id(block_id);
  block.set_type(BlockType::SCALAR_INDEX);
  block.set_columns(field_names);
  new_segment_meta->add_persisted_block(block);

  *segment_meta = new_segment_meta;
  *scalar_indexer = new_scalar_indexer;

  return Status::OK();
}

Status SegmentImpl::drop_scalar_index(const std::vector<std::string> &columns,
                                      SegmentMeta::Ptr *segment_meta,
                                      InvertedIndexer::Ptr *scalar_indexer) {
  // validate
  for (const auto &column : columns) {
    auto field = collection_schema_->get_field(column);
    if (!field || field->is_vector_field()) {
      return Status::InvalidArgument(
          "Invalid column name to drop scalar index");
    }
  }

  std::vector<FieldSchema> fields;
  std::vector<FieldSchema> drop_fields;
  std::vector<FieldSchema> invert_fields;
  std::vector<std::string> field_names;
  for (const auto &field : collection_schema_->forward_fields()) {
    if (field->index_type() == IndexType::INVERT) {
      invert_fields.push_back(*field);
      if (std::find(columns.begin(), columns.end(), field->name()) !=
          columns.end()) {
        drop_fields.push_back(*field);
        continue;
      }
      fields.push_back(*field);
      field_names.push_back(field->name());
    }
  }

  auto new_segment_meta = std::make_shared<SegmentMeta>(*segment_meta_);
  new_segment_meta->remove_scalar_index_block();

  if (fields.empty()) {
    *segment_meta = new_segment_meta;
    *scalar_indexer = nullptr;
    return Status::OK();
  }

  // clone original indexer
  auto block_id = allocate_block_id();
  std::string new_invert_index_path =
      FileHelper::MakeInvertIndexPath(path_, id(), block_id);
  auto s = invert_indexers_->create_snapshot(new_invert_index_path);
  CHECK_RETURN_STATUS(s);

  auto new_scalar_indexer = InvertedIndexer::CreateAndOpen(
      collection_schema_->name(), new_invert_index_path, false, invert_fields,
      options_.read_only_);
  if (!new_scalar_indexer) {
    LOG_ERROR("Failed to create scalar indexer");
    return Status::InternalError("Failed to create scalar indexer");
  }
  for (const auto &field : drop_fields) {
    s = new_scalar_indexer->remove_column_indexer(field.name());
    CHECK_RETURN_STATUS(s);
  }

  s = new_scalar_indexer->seal();
  CHECK_RETURN_STATUS(s);

  BlockMeta block;
  block.set_id(block_id);
  block.set_type(BlockType::SCALAR_INDEX);
  block.set_columns(field_names);

  new_segment_meta->add_persisted_block(block);

  *segment_meta = new_segment_meta;
  *scalar_indexer = new_scalar_indexer;

  return Status::OK();
}

Status SegmentImpl::reload_scalar_index(
    const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
    const InvertedIndexer::Ptr &scalar_indexer) {
  collection_schema_ = std::make_shared<CollectionSchema>(schema);
  segment_meta_ = segment_meta;

  if (!scalar_indexer) {
    // no need to reload inverted indexer
    return Status::OK();
  }

  fresh_persist_block_offset();

  if (invert_indexers_) {
    auto old_dir = invert_indexers_->working_dir();
    invert_indexers_ = scalar_indexer;

    FileHelper::RemoveDirectory(old_dir);
  } else {
    invert_indexers_ = scalar_indexer;
  }

  return Status::OK();
}

Status SegmentImpl::dump() {
  if (sealed_) {
    return Status::NotSupported("Segment has been dumped.");
  }
  auto s = flush();
  CHECK_RETURN_STATUS(s);

  if (invert_indexers_) {
    s = invert_indexers_->seal();
    CHECK_RETURN_STATUS(s);
  }

  sealed_ = true;

  return Status::OK();
}

Status SegmentImpl::flush() {
  CHECK_SEGMENT_READONLY_RETURN_STATUS;

  if (wal_file_ == nullptr || !wal_file_->has_record()) {
    return Status::OK();
  }

  if (wal_file_) {
    if (wal_file_->flush() != 0) {
      return Status::InternalError("Failed to flush wal");
    }
  }

  Status s;

  if (memory_store_) {
    s = memory_store_->flush();
    CHECK_RETURN_STATUS(s);
  }

  // flush scalar indexer
  if (invert_indexers_) {
    s = invert_indexers_->flush();
    CHECK_RETURN_STATUS(s);
  }

  // flush vector indexer
  for (const auto &indexer : memory_vector_indexers_) {
    if (indexer.second) {
      s = indexer.second->Flush();
      CHECK_RETURN_STATUS(s);
    }
  }

  // flush quant vector indexer
  for (const auto &indexer : quant_memory_vector_indexers_) {
    if (indexer.second) {
      s = indexer.second->Flush();
      CHECK_RETURN_STATUS(s);
    }
  }

  if (id_map_) {
    s = id_map_->flush();
    CHECK_RETURN_STATUS(s);
  }

  auto block = segment_meta_->writing_forward_block().value();

  uint32_t delete_snapshot_path_suffix = UINT32_MAX;
  uint32_t delete_snapshot_path_suffix_current = UINT32_MAX;
  if (delete_store_) {
    if (delete_store_->modified_since_last_flush()) {
      delete_snapshot_path_suffix_current =
          version_manager_->delete_snapshot_path_suffix();
      delete_snapshot_path_suffix =
          version_manager_->delete_snapshot_path_suffix() + 1;
      std::string delete_store_path = FileHelper::MakeFilePath(
          path_, FileID::DELETE_FILE, delete_snapshot_path_suffix);
      s = delete_store_->flush(delete_store_path);
      CHECK_RETURN_STATUS(s);
    }
  }

  if (memory_store_) {
    // update segment meta with memory components
    s = finish_memory_components();
    CHECK_RETURN_STATUS(s);

    // set a new mem block
    auto block_id = allocate_block_id();
    segment_meta_->set_writing_forward_block({block_id, BlockType::SCALAR,
                                              block.max_doc_id_ + 1, 0, 0,
                                              block.columns_});
  }

  // update version and flush
  s = update_version(delete_snapshot_path_suffix);
  CHECK_RETURN_STATUS(s);

  // clear wal file
  if (wal_file_) {
    auto ret = wal_file_->remove();
    if (ret != 0) {
      LOG_ERROR("Remove wal file failed.");
      return Status::InternalError("Remove wal file failed");
    }
    wal_file_.reset();
  }

  if (delete_snapshot_path_suffix_current != UINT32_MAX) {
    std::string delete_store_path = FileHelper::MakeFilePath(
        path_, FileID::DELETE_FILE, delete_snapshot_path_suffix_current);
    FileHelper::RemoveFile(delete_store_path);
  }

  return Status::OK();
}

Status SegmentImpl::destroy() {
  if (need_destroyed_) {
    return Status::InvalidArgument("Segment has been marked need destroyed");
  }
  need_destroyed_ = true;
  return Status::OK();
}

Status SegmentImpl::cleanup() {
  auto seg_path = FileHelper::MakeSegmentPath(path_, segment_meta_->id());
  FileHelper::RemoveDirectory(seg_path);
  return Status::OK();
}

bool SegmentImpl::validate(const std::vector<std::string> &columns) const {
  if (columns.empty()) {
    LOG_ERROR("Empty columns");
    return false;
  }
  for (const auto &column : columns) {
    if (column == LOCAL_ROW_ID || column == GLOBAL_DOC_ID ||
        column == USER_ID) {
      continue;
    }
    if (collection_schema_->get_forward_field(column) == nullptr) {
      LOG_ERROR("Validate failed. unknown column: %s", column.c_str());
      return false;
    }
  }
  return true;
}

TablePtr SegmentImpl::fetch_perf(
    const std::vector<std::string> &columns,
    const std::shared_ptr<arrow::Schema> &result_schema,
    const std::vector<int> &indices) const {
  std::vector<std::shared_ptr<arrow::ChunkedArray>> chunk_arrays;
  chunk_arrays.resize(columns.size());

  bool need_local_doc_id = false;
  size_t local_doc_id_col_index = 0;

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      need_local_doc_id = true;
      local_doc_id_col_index = i;
      chunk_arrays[i] = nullptr;
      continue;
    }
    chunk_arrays[i] = persist_chunk_arrays_[col_idx_map_.at(columns[i])];
  }

  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());

  std::vector<std::pair<int64_t, int64_t>> indices_in_table;
  for (const auto &target_index : indices) {
    auto it = std::upper_bound(chunk_offsets_.begin(), chunk_offsets_.end(),
                               target_index);
    if (it == chunk_offsets_.begin()) {
      LOG_ERROR("Target index %d is out of bounds", target_index);
      return nullptr;
    }
    int chunk_index =
        static_cast<int>(std::distance(chunk_offsets_.begin(), it) - 1);
    int64_t index_in_chunk = target_index - chunk_offsets_[chunk_index];
    indices_in_table.emplace_back(chunk_index, index_in_chunk);
  }

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      continue;
    }
    const auto &source_column = chunk_arrays[i];
    std::shared_ptr<arrow::Array> array;
    auto status =
        BuildArrayFromIndicesWithType(source_column, indices_in_table, &array);
    if (!status.ok()) {
      LOG_ERROR("BuildArrayFromIndices failed: %s", status.ToString().c_str());
      return nullptr;
    }
    result_arrays[i] = array;
  }

  if (need_local_doc_id) {
    std::vector<uint64_t> values;
    values.reserve(indices.size());
    for (const auto idx : indices) {
      values.push_back(idx);
    }

    arrow::UInt64Builder builder;
    auto s = builder.AppendValues(values);
    if (!s.ok()) {
      LOG_ERROR("Failed to append values to builder: %s", s.message().c_str());
      return nullptr;
    }
    std::shared_ptr<arrow::Array> array;
    s = builder.Finish(&array);
    if (!s.ok()) {
      LOG_ERROR("Failed to finish builder: %s", s.message().c_str());
      return nullptr;
    }
    result_arrays[local_doc_id_col_index] = array;
  }

  return arrow::Table::Make(result_schema, result_arrays,
                            static_cast<int64_t>(indices.size()));
}

TablePtr SegmentImpl::fetch_normal(
    const std::vector<std::string> &columns,
    const std::shared_ptr<arrow::Schema> &result_schema,
    const std::vector<int> &indices) const {
  // Store scalars per column: column_index -> (output_row, scalar)
  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>
      column_results(columns.size());

  // Collect local_doc_id values if needed
  std::vector<std::pair<int, uint64_t>> local_doc_id_values;

  // Group fetch requests by block: block_index -> {column -> [(output_row,
  // local_row)]}
  //   block_index >= 0: persisted store
  //   block_index == -1: memory store
  std::map<int, std::map<std::string, std::vector<std::pair<int, int>>>>
      block_request_map;

  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);

  const auto &block_offsets = get_persist_block_offsets(BlockType::SCALAR);
  const auto &block_metas = get_persist_block_metas(BlockType::SCALAR);

  // Phase 1: Map each (doc_id, column) to its block and local row
  for (int output_row = 0; output_row < static_cast<int>(indices.size());
       ++output_row) {
    int doc_id = indices[output_row];

    for (size_t col_index = 0; col_index < columns.size(); ++col_index) {
      const std::string &col = columns[col_index];
      if (col == LOCAL_ROW_ID) {
        local_doc_id_values.emplace_back(output_row, doc_id);
        continue;
      }
      int offset_idx = -1;
      int block_index =
          find_persist_block_id(BlockType::SCALAR, doc_id, col, &offset_idx);

      int local_row = -1;
      if (block_index != -1 && offset_idx > -1 &&
          offset_idx < static_cast<int>(block_offsets.size())) {
        local_row = doc_id - block_offsets[offset_idx];
        block_request_map[block_index][col].emplace_back(output_row, local_row);
        continue;
      }

      // Check memory store
      if (segment_meta_->has_writing_forward_block()) {
        int mem_offset =
            block_offsets.empty()
                ? 0
                : block_offsets.back() + block_metas.back().doc_count_;
        const auto &mem_block = segment_meta_->writing_forward_block().value();

        if (mem_offset <= doc_id &&
            doc_id < mem_offset + static_cast<int>(mem_block.doc_count_)) {
          local_row = doc_id - mem_offset;
          block_request_map[-1][col].emplace_back(output_row, local_row);
          continue;
        }
      }

      LOG_ERROR("Document ID %d not found in segment %d", doc_id, meta()->id());
      return nullptr;
    }
  }

  // Phase 2: Execute batched fetch per block
  for (const auto &[block_index, col_to_rows] : block_request_map) {
    std::vector<std::string> fetch_columns;
    std::vector<int> fetch_local_rows;
    std::vector<std::pair<int, int>>
        output_to_result_index;  // (output_row, result_pos)

    fetch_columns.reserve(col_to_rows.size());
    for (const auto &kv : col_to_rows) {
      fetch_columns.push_back(kv.first);
    }

    // all column has same output size, here just take first column
    for (const auto &[output_row, local_row] :
         col_to_rows.at(fetch_columns[0])) {
      fetch_local_rows.push_back(local_row);
      output_to_result_index.emplace_back(
          output_row, static_cast<int>(fetch_local_rows.size() - 1));
    }

    std::shared_ptr<arrow::Table> block_table;
    if (block_index >= 0 &&
        block_index < static_cast<int>(persist_stores_.size())) {
      block_table =
          persist_stores_[block_index]->fetch(fetch_columns, fetch_local_rows);
    } else if (block_index == -1 && memory_store_) {
      block_table = memory_store_->fetch(fetch_columns, fetch_local_rows);
    }

    if (!block_table || block_table->num_rows() == 0) {
      continue;
    }

    // Fill results
    for (size_t i = 0; i < fetch_columns.size(); ++i) {
      const std::string &col = fetch_columns[i];
      auto col_it = std::find(columns.begin(), columns.end(), col);
      if (col_it == columns.end()) continue;
      size_t col_index = std::distance(columns.begin(), col_it);

      auto chunked_array = block_table->column(i)->chunks();
      auto flat_array_res =
          arrow::Concatenate(chunked_array, arrow::default_memory_pool());
      if (!flat_array_res.ok()) {
        LOG_ERROR("Concatenate failed: %s",
                  flat_array_res.status().message().c_str());
        return nullptr;
      }
      auto flat_array = flat_array_res.ValueOrDie();

      for (size_t j = 0; j < fetch_local_rows.size(); ++j) {
        auto scalar_result = flat_array->GetScalar(j);
        if (!scalar_result.ok()) continue;
        int output_row = output_to_result_index[j].first;
        column_results[col_index].emplace_back(
            output_row, std::move(scalar_result.ValueOrDie()));
      }
    }
  }

  // Phase 3: Construct result arrays
  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());

  bool need_local_doc_id = false;
  size_t local_doc_id_col_index = -1;

  for (size_t col_index = 0; col_index < columns.size(); ++col_index) {
    const std::string &col = columns[col_index];
    if (col == LOCAL_ROW_ID) {
      need_local_doc_id = true;
      local_doc_id_col_index = col_index;
      continue;
    }

    auto &result_vec = column_results[col_index];
    std::sort(result_vec.begin(), result_vec.end());

    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;
    for (int i = 0; i < static_cast<int>(indices.size()); ++i) {
      auto it = std::find_if(
          result_vec.begin(), result_vec.end(),
          [i](const std::pair<int, std::shared_ptr<arrow::Scalar>> &p) {
            return p.first == i;
          });
      if (it != result_vec.end()) {
        ordered_scalars.push_back(it->second);
      } else {
        auto field = result_schema->GetFieldByName(col);
        ordered_scalars.push_back(
            arrow::MakeNullScalar(field ? field->type() : arrow::null()));
      }
    }

    auto status = ConvertScalarVectorToArrayByType(ordered_scalars,
                                                   &result_arrays[col_index]);
    if (!status.ok()) {
      LOG_ERROR("Failed to convert scalars to array for column '%s': %s",
                col.c_str(), status.message().c_str());
      return nullptr;
    }
  }

  // Add LOCAL_ROW_ID array if requested
  if (need_local_doc_id) {
    std::sort(local_doc_id_values.begin(), local_doc_id_values.end());
    std::vector<uint64_t> values;
    values.reserve(local_doc_id_values.size());
    for (const auto &[row, id] : local_doc_id_values) {
      values.push_back(id);
    }

    arrow::UInt64Builder builder;
    auto s = builder.AppendValues(values);
    if (!s.ok()) {
      LOG_ERROR("Failed to append values to builder: %s", s.message().c_str());
      return nullptr;
    }
    std::shared_ptr<arrow::Array> array;
    s = builder.Finish(&array);
    if (!s.ok()) {
      LOG_ERROR("Failed to finish builder: %s", s.message().c_str());
      return nullptr;
    }
    result_arrays[local_doc_id_col_index] = std::move(array);
  }

  // Wrap arrays into ChunkedArray and build final table
  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;
  result_columns.reserve(result_arrays.size());
  for (const auto &arr : result_arrays) {
    result_columns.push_back(std::make_shared<arrow::ChunkedArray>(arr));
  }

  return arrow::Table::Make(result_schema, result_columns,
                            static_cast<int64_t>(indices.size()));
}

TablePtr SegmentImpl::fetch(const std::vector<std::string> &columns,
                            const std::vector<int> &indices) const {
  if (!validate(columns)) {
    return nullptr;
  }

  // Build result schema
  std::vector<std::shared_ptr<arrow::Field>> fields;

  for (size_t i = 0; i < columns.size(); ++i) {
    const auto &col = columns[i];
    if (col == LOCAL_ROW_ID) {
      fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));
    } else if (col == GLOBAL_DOC_ID) {
      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64()));
    } else if (col == USER_ID) {
      fields.push_back(arrow::field(USER_ID, arrow::utf8()));
    } else {
      auto *field = collection_schema_->get_field(col);
      std::shared_ptr<arrow::Field> arrow_field;
      auto status = ConvertFieldSchemaToArrowField(field, &arrow_field);
      if (!status.ok()) {
        LOG_ERROR("Convert field schema failed: %s",
                  field->to_string().c_str());
        return nullptr;
      }
      fields.push_back(std::move(arrow_field));
    }
  }

  auto result_schema = std::make_shared<arrow::Schema>(fields);

  // Early return for empty indices
  if (indices.empty()) {
    arrow::ArrayVector empty_arrays;
    for (const auto &field : fields) {
      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());
    }
    return arrow::Table::Make(result_schema, empty_arrays, 0);
  }

  if (segment_meta_->doc_count() == 0) {
    LOG_ERROR("Segment has no rows");
    return nullptr;
  }

  if (use_fetch_perf_) {
    return fetch_perf(columns, result_schema, indices);
  }
  return fetch_normal(columns, result_schema, indices);
}

ExecBatchPtr SegmentImpl::fetch(const std::vector<std::string> &columns,
                                int doc_id) const {
  if (columns.empty()) {
    LOG_ERROR("Empty columns");
    return nullptr;
  }

  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);

  const auto &block_offsets = get_persist_block_offsets(BlockType::SCALAR);
  const auto &block_metas = get_persist_block_metas(BlockType::SCALAR);

  bool is_in_single_persist_store = false;
  for (auto &block : block_metas) {
    std::vector<bool> is_column_in_block;
    is_column_in_block.reserve(columns.size());
    for (const auto &column : columns) {
      is_column_in_block.push_back(block.contain_column(column));
    }

    // Count how many columns are in this block
    int count =
        std::count(is_column_in_block.begin(), is_column_in_block.end(), true);

    if (count == 0) {
      // None of the query columns are in this block; continue to the next block
      continue;
    } else if (count == static_cast<int>(columns.size())) {
      // All query columns are present in this block; stop searching
      is_in_single_persist_store = true;
      break;
    } else {
      // Some but not all query columns are in this block (spanning multiple
      // blocks); stop searching
      break;
    }
  }

  if (is_in_single_persist_store) {
    int offset_idx = -1;
    int block_index = find_persist_block_id(BlockType::SCALAR, doc_id,
                                            columns[0], &offset_idx);
    if (block_index != -1 && offset_idx > -1 &&
        offset_idx < static_cast<int>(block_offsets.size())) {
      int local_row = doc_id - block_offsets[offset_idx];
      return persist_stores_[block_index]->fetch(columns, local_row);
    }

    // Check memory store
    if (segment_meta_->has_writing_forward_block()) {
      int mem_offset =
          block_offsets.empty()
              ? 0
              : block_offsets.back() + block_metas.back().doc_count_;
      const auto &mem_block = segment_meta_->writing_forward_block().value();

      if (mem_offset <= doc_id &&
          doc_id < mem_offset + static_cast<int>(mem_block.doc_count_)) {
        int local_row = doc_id - mem_offset;
        return memory_store_->fetch(columns, local_row);
      }
    }
  } else {
    auto table = fetch(columns, std::vector<int>{doc_id});
    if (table) {
      std::vector<arrow::Datum> datums;
      for (const auto &col : table->columns()) {
        datums.emplace_back(col->chunk(0)->GetScalar(0).ValueOrDie());
      }

      arrow::Result<arrow::compute::ExecBatch> exec_batch_result =
          arrow::compute::ExecBatch::Make(datums, table->num_rows());

      if (exec_batch_result.ok()) {
        arrow::compute::ExecBatch exec_batch = exec_batch_result.ValueOrDie();
        return std::make_shared<arrow::compute::ExecBatch>(exec_batch);
      }
    }
  }

  LOG_ERROR("Document ID %d not found in persist segment", doc_id);
  return nullptr;
}

RecordBatchReaderPtr SegmentImpl::scan(
    const std::vector<std::string> &columns) const {
  if (!validate(columns)) {
    return nullptr;
  }

  std::shared_lock<std::shared_mutex> lock(seg_col_mtx_);

  const std::vector<BlockMeta> &scalar_blocks =
      get_persist_block_metas(BlockType::SCALAR);

  std::map<std::pair<int64_t, int64_t>,
           std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>>>
      block_groups;

  for (size_t i = 0; i < scalar_blocks.size() && i < persist_stores_.size();
       ++i) {
    const auto &block = scalar_blocks[i];
    const auto &store = persist_stores_[i];

    std::vector<std::string> interested_cols;
    for (const auto &col : columns) {
      if (block.contain_column(col)) {
        interested_cols.push_back(col);
      }
    }

    if (interested_cols.empty()) {
      continue;
    }

    auto reader = store->scan(interested_cols);
    if (!reader) {
      continue;
    }

    auto key = std::make_pair(block.min_doc_id(), block.max_doc_id());
    block_groups[key].push_back(std::move(reader));
  }

  if (memory_store_ && memory_store_->num_rows() > 0) {
    auto reader = memory_store_->scan(columns);
    if (reader) {
      auto &mem_block = segment_meta_->writing_forward_block().value();
      auto key = std::make_pair(mem_block.min_doc_id(), mem_block.max_doc_id());
      block_groups[key].push_back(std::move(reader));
    }
  }

  std::vector<std::shared_ptr<arrow::Field>> fields;
  for (const auto &col : columns) {
    if (col == LOCAL_ROW_ID) {
      continue;
    } else if (col == GLOBAL_DOC_ID) {
      fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64(), false));
    } else if (col == USER_ID) {
      fields.push_back(arrow::field(USER_ID, arrow::utf8(), false));
    } else {
      auto *field = collection_schema_->get_field(col);
      std::shared_ptr<arrow::Field> arrow_field;
      auto s = ConvertFieldSchemaToArrowField(field, &arrow_field);
      if (!s.ok()) {
        LOG_ERROR("convert field schema: %s to arrow field failed",
                  field->to_string().c_str());
        return nullptr;
      }
      fields.push_back(arrow_field);
    }
  }
  auto target_schema = std::make_shared<arrow::Schema>(fields);

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> merged_readers;
  for (auto &kv : block_groups) {
    auto &&readers = std::move(kv.second);
    auto merging_reader =
        ColumnMergingReader::Make(target_schema, std::move(readers));
    if (merging_reader) {
      merged_readers.push_back(std::move(merging_reader));
    }
  }

  return std::make_shared<CombinedRecordBatchReader>(
      shared_from_this(), std::move(merged_readers), columns);
}


////////////////////////////////////////////////////////////////////////////////////
// CombinedRecordBatchReader implementation
////////////////////////////////////////////////////////////////////////////////////

SegmentImpl::CombinedRecordBatchReader::CombinedRecordBatchReader(
    std::shared_ptr<const SegmentImpl> segment,
    std::vector<std::shared_ptr<arrow::RecordBatchReader>> readers,
    const std::vector<std::string> &columns)
    : segment_(segment),
      readers_(std::move(readers)),
      current_reader_index_(0),
      local_doc_id_(0) {
  if (!readers_.empty()) {
    auto schema = readers_[0]->schema();
    std::vector<std::shared_ptr<arrow::Field>> selected_fields;
    for (size_t i = 0; i < columns.size(); ++i) {
      auto &col_name = columns[i];
      if (col_name == LOCAL_ROW_ID) {
        selected_fields.push_back(
            arrow::field(LOCAL_ROW_ID, arrow::uint64(), false));
        need_local_doc_id_ = true;
        local_doc_id_col_index_ = static_cast<int>(i);
      } else {
        if (auto field = schema->GetFieldByName(col_name); field) {
          selected_fields.push_back(field);
        }
      }
    }

    projected_schema_ = arrow::schema(selected_fields);

    auto segment_meta = segment_->meta();
    const auto &blocks = segment_meta->persisted_blocks();
    for (const auto &block : blocks) {
      if (block.type() != BlockType::SCALAR) continue;
      offsets_.push_back(block.min_doc_id_);
    }
    if (segment_meta->has_writing_forward_block()) {
      const auto &mem_block = segment_meta->writing_forward_block().value();
      offsets_.push_back(mem_block.min_doc_id_);
    }
  }
}

SegmentImpl::CombinedRecordBatchReader::~CombinedRecordBatchReader() {}

std::shared_ptr<arrow::Schema> SegmentImpl::CombinedRecordBatchReader::schema()
    const {
  return projected_schema_;
}

arrow::Status SegmentImpl::CombinedRecordBatchReader::ReadNext(
    std::shared_ptr<arrow::RecordBatch> *batch) {
  *batch = nullptr;
  while (current_reader_index_ < readers_.size()) {
    auto status = readers_[current_reader_index_]->ReadNext(batch);
    if (!status.ok()) {
      return status;
    }

    if (need_local_doc_id_ && *batch) {
      auto num_rows = (*batch)->num_rows();
      arrow::UInt64Builder builder;
      ARROW_RETURN_NOT_OK(builder.Reserve(num_rows));

      for (int64_t i = 0; i < num_rows; ++i) {
        builder.UnsafeAppend(local_doc_id_++);
      }
      std::shared_ptr<arrow::Array> local_id_array;
      ARROW_RETURN_NOT_OK(builder.Finish(&local_id_array));

      auto result =
          (*batch)->AddColumn(local_doc_id_col_index_,
                              projected_schema_->GetFieldByName(LOCAL_ROW_ID),
                              std::move(local_id_array));
      if (result.ok()) {
        *batch = std::move(result.ValueOrDie());
      }
    }

    if (*batch) {
      return arrow::Status::OK();
    }

    current_reader_index_++;
    if (current_reader_index_ < readers_.size()) {
      local_doc_id_ = offsets_[current_reader_index_];
    }
  }

  *batch = nullptr;
  return arrow::Status::OK();
}

bool SegmentImpl::ready_for_dump_block() {
  if (memory_store_) return memory_store_->is_full();
  return false;
}

template <typename ArrayType, typename ValueType>
Status ProcessChunkData(InvertedColumnIndexer::Ptr *column_indexer,
                        const std::shared_ptr<arrow::Array> &chunk,
                        int64_t &doc_count) {
  auto typed_array = std::dynamic_pointer_cast<ArrayType>(chunk);
  if (typed_array) {
    for (int64_t i = 0; i < typed_array->length(); ++i, ++doc_count) {
      if (typed_array->IsNull(i)) {
        auto status = (*column_indexer)->insert_null(doc_count);
        if (!status.ok()) {
          LOG_ERROR("Failed to insert null value to indexer for doc %zu: %s",
                    (size_t)doc_count, status.message().c_str());
          return status;
        }
      } else {
        ValueType value = typed_array->Value(i);
        std::string value_str(reinterpret_cast<const char *>(&value),
                              sizeof(ValueType));
        auto status = (*column_indexer)->insert(doc_count, value_str);
        if (!status.ok()) {
          LOG_ERROR("Failed to insert numeric value to indexer for doc %zu: %s",
                    (size_t)doc_count, status.message().c_str());
          return status;
        }
      }
    }
  }
  return Status::OK();
}

Status SegmentImpl::insert_array_to_invert_indexer(
    const FieldSchema::Ptr &column_schema,
    const std::shared_ptr<arrow::ChunkedArray> &new_column,
    InvertedColumnIndexer::Ptr *column_indexer) {
  // Iterate through the new column data and insert into the indexer
  int64_t doc_count = 0;
  for (int chunk_index = 0; chunk_index < new_column->num_chunks();
       ++chunk_index) {
    auto chunk = new_column->chunk(chunk_index);

    // Handle different data types based on the column schema
    switch (column_schema->data_type()) {
      case DataType::INT32: {
        auto status = ProcessChunkData<arrow::Int32Array, int32_t>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      case DataType::INT64: {
        auto status = ProcessChunkData<arrow::Int64Array, int64_t>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      case DataType::UINT32: {
        auto status = ProcessChunkData<arrow::UInt32Array, uint32_t>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      case DataType::UINT64: {
        auto status = ProcessChunkData<arrow::UInt64Array, uint64_t>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      case DataType::FLOAT: {
        auto status = ProcessChunkData<arrow::FloatArray, float>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      case DataType::DOUBLE: {
        auto status = ProcessChunkData<arrow::DoubleArray, double>(
            column_indexer, chunk, doc_count);
        CHECK_RETURN_STATUS(status);
        break;
      }
      default:
        LOG_WARN(
            "Unsupported data type for indexing: %s",
            DataTypeCodeBook::AsString(column_schema->data_type()).c_str());
        break;
    }
  }

  return Status::OK();
}


Status SegmentImpl::reopen_invert_indexer(bool read_only) {
  // build invert index path
  uint32_t block_id = 0;
  auto &persist_blocks = segment_meta_->persisted_blocks();
  for (auto &block : persist_blocks) {
    if (block.type() == BlockType::SCALAR_INDEX) {
      block_id = block.id();
      break;
    }
  }
  std::string invert_index_path =
      FileHelper::MakeInvertIndexPath(path_, id(), block_id);

  // build invert index fields
  std::vector<std::string> inverted_field_names;
  auto inverted_fields_ptr = collection_schema_->forward_fields_with_index();
  std::vector<FieldSchema> inverted_fields;
  for (auto field : inverted_fields_ptr) {
    inverted_fields.push_back(*field);
    inverted_field_names.push_back(field->name());
  }

  // reopen invert indexer with read_only false
  invert_indexers_.reset();
  invert_indexers_ = InvertedIndexer::CreateAndOpen(collection_schema_->name(),
                                                    invert_index_path, false,
                                                    inverted_fields, read_only);
  if (!invert_indexers_) {
    LOG_ERROR("Failed to create scalar indexer");
    return Status::InternalError("Failed to create scalar indexer");
  }
  return Status::OK();
}

Status SegmentImpl::add_column(FieldSchema::Ptr column_schema,
                               const std::string &expression,
                               const AddColumnOptions & /*options*/) {
  if (memory_store_) {
    return Status::NotSupported(
        "Add column is not supported for segment with memory store");
  }

  global_init();

  std::vector<std::shared_ptr<arrow::Field>> fields;
  arrow::Status status =
      ConvertCollectionSchemaToArrowFields(collection_schema_, &fields);
  if (!status.ok()) {
    return Status::InvalidArgument(
        "ConvertCollectionSchemaToArrowFields failed:", status.message());
  }
  auto physic_schema = std::make_shared<arrow::Schema>(fields);

  auto &scalar_blocks = get_persist_block_metas(BlockType::SCALAR);
  if (scalar_blocks.empty()) {
    return Status::NotSupported(
        "Add column is not supported for empty scalar segment");
  }

  std::shared_ptr<arrow::Field> arrow_field;
  status = ConvertFieldSchemaToArrowField(column_schema.get(), &arrow_field);
  if (!status.ok()) {
    return Status::InvalidArgument("ConvertFieldSchemaToArrowField failed:",
                                   status.message());
  }

  std::shared_ptr<arrow::ChunkedArray> new_column;
  auto expected_type = arrow_field->type();
  if (expression.empty()) {
    if (!column_schema->nullable()) {
      return Status::InvalidArgument(
          "Add column is not supported for non-nullable column");
    }
    arrow::Result<std::shared_ptr<arrow::Array>> result =
        arrow::MakeArrayOfNull(expected_type, scalar_blocks[0].doc_count_);
    if (!result.ok()) {
      return Status::InternalError("MakeArrayOfNull failed");
    }
    auto array = result.ValueOrDie();
    new_column = std::make_shared<arrow::ChunkedArray>(
        std::vector<std::shared_ptr<arrow::Array>>{array});

  } else {
    // Parse Simple sql expression
    auto p_result = ParseToExpression(expression, physic_schema);
    if (!p_result.ok()) {
      return Status::InvalidArgument("parse expression failed:",
                                     p_result.status().message());
    }
    auto expr = p_result.ValueOrDie();

    auto result = ReadBlocksAsDataset(scalar_blocks, path_, segment_meta_->id(),
                                      !options_.enable_mmap_);
    if (!result.ok()) {
      return Status::InternalError(result.status().message());
    }
    auto dataset = std::move(result).ValueOrDie();
    auto eval_result = EvaluateExpressionWithDataset(
        dataset, column_schema->name(), expr, expected_type);
    if (!eval_result.ok()) {
      return Status::InternalError("evaluate expression failed:",
                                   eval_result.status().message());
    }
    auto result_table = eval_result.ValueOrDie();
    if (result_table->num_columns() != 1) {
      return Status::InvalidArgument(
          "Expression result must have exactly one column");
    }
    new_column = result_table->column(0);
  }

  // write new column
  const std::string &filter_column = scalar_blocks[0].columns()[0];
  std::vector<BlockMeta> filter_column_blocks;
  std::copy_if(scalar_blocks.begin(), scalar_blocks.end(),
               std::back_inserter(filter_column_blocks),
               [&filter_column](const BlockMeta &block) {
                 return block.contain_column(filter_column);
               });

  std::vector<BlockMeta> new_blocks;
  status = WriteColumnInBlocks(
      column_schema->name(), new_column, filter_column_blocks, path_,
      segment_meta_->id(), [this]() { return allocate_block_id(); },
      !options_.enable_mmap_, &new_blocks);
  if (!status.ok()) {
    return Status::InternalError(status.message());
  }

  // create persist scalar indexer
  if (column_schema->has_invert_index()) {
    if (invert_indexers_) {
      auto s = reopen_invert_indexer();
      CHECK_RETURN_STATUS(s);

      s = invert_indexers_->create_column_indexer(*column_schema);
      CHECK_RETURN_STATUS(s);

      // update segment meta
      auto &persist_blocks = segment_meta_->persisted_blocks();
      for (auto &block : persist_blocks) {
        if (block.type() == BlockType::SCALAR_INDEX) {
          block.add_column(column_schema->name());
          break;
        }
      }
    } else {
      auto new_block_id = allocate_block_id();
      std::string new_invert_index_path =
          FileHelper::MakeInvertIndexPath(path_, id(), new_block_id);

      invert_indexers_ = InvertedIndexer::CreateAndOpen(
          collection_schema_->name(), new_invert_index_path, true,
          {*column_schema}, false);
      if (!invert_indexers_) {
        LOG_ERROR("Failed to create scalar indexer");
        return Status::InternalError("Failed to create scalar indexer");
      }

      // update segment meta
      BlockMeta block;
      block.set_id(new_block_id);
      block.set_type(BlockType::SCALAR_INDEX);
      block.set_doc_count(new_column->length());
      block.set_min_doc_id(doc_ids_.front());
      block.set_max_doc_id(doc_ids_.back());
      block.set_columns({column_schema->name()});

      segment_meta_->add_persisted_block(block);
    }

    auto column_indexer = (*invert_indexers_)[column_schema->name()];
    auto s = insert_array_to_invert_indexer(column_schema, new_column,
                                            &column_indexer);
    CHECK_RETURN_STATUS(s);
    column_indexer->seal();
    invert_indexers_->flush();
  }

  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);
  // create and append persist scalar indexer
  for (auto &block : new_blocks) {
    auto forward_path = FileHelper::MakeForwardBlockPath(
        path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);

    BaseForwardStore::Ptr forward_store;
    if (options_.enable_mmap_) {
      forward_store = std::make_shared<MmapForwardStore>(forward_path);
    } else {
      forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);
    }
    auto s = forward_store->Open();
    CHECK_RETURN_STATUS(s);
    persist_stores_.push_back(forward_store);
    segment_meta_->add_persisted_block(block);
  }

  // collection_schema append new field
  auto s = collection_schema_->add_field(column_schema);
  CHECK_RETURN_STATUS(s);

  fresh_persist_block_offset();

  fresh_persist_chunked_array();

  return Status::OK();
}


Status SegmentImpl::alter_column(const std::string &column_name,
                                 const FieldSchema::Ptr &new_column_schema,
                                 const AlterColumnOptions & /*options*/) {
  if (memory_store_) {
    return Status::NotSupported(
        "Add column is not supported for segment with memory store");
  }

  global_init();

  auto old_field_schema = collection_schema_->get_forward_field(column_name);
  if (!old_field_schema) {
    return Status::NotFound("Column not found: " + column_name);
  }

  std::string new_column_name = new_column_schema->name();
  std::shared_ptr<arrow::Field> new_arrow_field;
  auto as =
      ConvertFieldSchemaToArrowField(new_column_schema.get(), &new_arrow_field);
  if (!as.ok()) {
    return Status::InternalError("ConvertFieldSchemaToArrowField failed: " +
                                 as.ToString());
  }

  auto &scalar_blocks = get_persist_block_metas(BlockType::SCALAR);
  if (scalar_blocks.empty()) {
    return Status::NotSupported(
        "Add column is not supported for empty scalar segment");
  }

  std::vector<BlockMeta> filter_column_blocks;
  for (const auto &block : scalar_blocks) {
    if (block.contain_column(column_name)) {
      filter_column_blocks.push_back(block);
    }
  }

  auto result = ReadBlocksAsDataset(
      filter_column_blocks, path_, segment_meta_->id(), !options_.enable_mmap_);
  if (!result.ok()) {
    return Status::InternalError(result.status().message());
  }
  auto dataset = std::move(result).ValueOrDie();

  arrow::Expression expr = arrow::compute::field_ref(old_field_schema->name());
  auto eval_result = EvaluateExpressionWithDataset(
      dataset, new_column_name, expr, new_arrow_field->type());
  if (!eval_result.ok()) {
    return Status::InternalError("evaluate expression failed:",
                                 eval_result.status().message());
  }
  auto result_table = eval_result.ValueOrDie();
  if (result_table->num_columns() != 1) {
    return Status::InvalidArgument(
        "Expression result must have exactly one column");
  }
  auto new_column = result_table->column(0);

  std::vector<BlockMeta> new_blocks;
  auto status = WriteColumnInBlocks(
      new_column_name, new_column, filter_column_blocks, path_,
      segment_meta_->id(), [this]() { return allocate_block_id(); },
      !options_.enable_mmap_, &new_blocks);
  if (!status.ok()) {
    return Status::InternalError(status.message());
  }

  if (new_column_schema->has_invert_index()) {
    if (invert_indexers_) {
      auto s = reopen_invert_indexer();
      CHECK_RETURN_STATUS(s);

      s = invert_indexers_->remove_column_indexer(column_name);
      CHECK_RETURN_STATUS(s);

      s = invert_indexers_->create_column_indexer(*new_column_schema);
      CHECK_RETURN_STATUS(s);

      // update segment meta
      auto &persist_blocks = segment_meta_->persisted_blocks();
      for (auto &block : persist_blocks) {
        if (block.type() == BlockType::SCALAR_INDEX) {
          block.del_column(old_field_schema->name());
          block.add_column(new_column_schema->name());
          break;
        }
      }
    } else {
      auto new_block_id = allocate_block_id();
      std::string new_invert_index_path =
          FileHelper::MakeInvertIndexPath(path_, id(), new_block_id);

      invert_indexers_ = InvertedIndexer::CreateAndOpen(
          collection_schema_->name(), new_invert_index_path, true,
          {*new_column_schema}, false);
      if (!invert_indexers_) {
        LOG_ERROR("Failed to create scalar indexer");
        return Status::InternalError("Failed to create scalar indexer");
      }

      // update segment meta
      BlockMeta block;
      block.set_id(new_block_id);
      block.set_type(BlockType::SCALAR_INDEX);
      block.set_doc_count(new_column->length());
      block.set_min_doc_id(doc_ids_.front());
      block.set_max_doc_id(doc_ids_.back());
      block.set_columns({new_column_schema->name()});

      segment_meta_->add_persisted_block(block);
    }

    // insert data into new invert indexer
    auto column_indexer = (*invert_indexers_)[new_column_schema->name()];
    auto s = insert_array_to_invert_indexer(new_column_schema, new_column,
                                            &column_indexer);
    CHECK_RETURN_STATUS(s);
    column_indexer->seal();
    invert_indexers_->flush();
  } else if (old_field_schema->has_invert_index()) {
    // drop old invert indexer
    auto s = reopen_invert_indexer();
    CHECK_RETURN_STATUS(s);

    s = invert_indexers_->remove_column_indexer(column_name);
    CHECK_RETURN_STATUS(s);

    auto &persist_blocks = segment_meta_->persisted_blocks();
    for (auto &block : persist_blocks) {
      if (block.type() == BlockType::SCALAR_INDEX) {
        block.del_column(old_field_schema->name());
        if (block.columns().empty()) {
          segment_meta_->remove_block(block.id());
        }
        break;
      }
    }
  }

  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);
  // update old block, remove column
  std::vector<BlockMeta> &persisted_blocks = segment_meta_->persisted_blocks();
  std::vector<int> will_del_block_idx;
  for (size_t idx = 0; idx < persisted_blocks.size(); idx++) {
    auto &block = persisted_blocks[idx];
    if (block.type() == BlockType::SCALAR) {
      if (block.contain_column(column_name)) {
        if (block.columns_.size() > 1) {
          block.del_column(column_name);
        } else {
          will_del_block_idx.push_back(idx);
        }
      }
    }
  }

  // delete single block
  std::vector<int> will_del_block_ids;
  for (int i = static_cast<int>(will_del_block_idx.size()) - 1; i >= 0; i--) {
    int idx = will_del_block_idx[i];
    auto &block = persisted_blocks[idx];
    will_del_block_ids.push_back(block.id_);
    persisted_blocks.erase(persisted_blocks.begin() + idx);
  }

  std::vector<int> will_del_local_block_idx;
  auto &local_blocks = get_persist_block_metas(BlockType::SCALAR);
  for (size_t idx = 0; idx < local_blocks.size(); idx++) {
    auto &block = local_blocks[idx];
    if (block.contain_column(column_name)) {
      if (block.columns_.size() == 1) {
        will_del_local_block_idx.push_back(idx);
      }
    }
  }

  for (int idx = static_cast<int>(will_del_local_block_idx.size()) - 1;
       idx >= 0; idx--) {
    int local_idx = will_del_local_block_idx[idx];
    persist_stores_.erase(persist_stores_.begin() + local_idx);
  }

  if (!options_.enable_mmap_) {
    ailego::BufferManager::Instance().init(
        GlobalConfig::Instance().memory_limit_bytes(), 1);
  }

  // delete single column store file
  for (auto block_id : will_del_block_ids) {
    // delete forward store file
    std::string filepath = FileHelper::MakeForwardBlockPath(
        path_, meta()->id(), block_id, !options_.enable_mmap_);
    if (!FileHelper::RemoveFile(filepath)) {
      return Status::InternalError("remove ", filepath, " failed");
    } else {
      LOG_INFO("remove scalar store file: %s success", filepath.c_str());
    }
  }

  // create and append persist scalar indexer
  for (auto &block : new_blocks) {
    auto forward_path = FileHelper::MakeForwardBlockPath(
        path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);

    BaseForwardStore::Ptr forward_store;
    if (options_.enable_mmap_) {
      forward_store = std::make_shared<MmapForwardStore>(forward_path);
    } else {
      forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);
    }
    auto s = forward_store->Open();
    CHECK_RETURN_STATUS(s);
    persist_stores_.push_back(forward_store);
    segment_meta_->add_persisted_block(block);
  }

  // collection_schema append new field
  auto alter_status =
      collection_schema_->alter_field(column_name, new_column_schema);
  CHECK_RETURN_STATUS(alter_status);

  fresh_persist_block_offset();

  fresh_persist_chunked_array();

  return Status::OK();
}

Status SegmentImpl::drop_column(const std::string &column_name) {
  if (memory_store_) {
    return Status::NotSupported(
        "Add column is not supported for segment with memory store");
  }

  std::unique_lock<std::shared_mutex> lock(seg_col_mtx_);
  // update old block, remove column
  std::vector<BlockMeta> &persisted_blocks = segment_meta_->persisted_blocks();
  std::vector<int> will_del_block_idx;
  for (size_t idx = 0; idx < persisted_blocks.size(); idx++) {
    auto &block = persisted_blocks[idx];
    if (block.type() == BlockType::SCALAR) {
      if (block.contain_column(column_name)) {
        if (block.columns_.size() > 1) {
          block.del_column(column_name);
        } else {
          will_del_block_idx.push_back(idx);
        }
      }
    }
  }

  // delete single block
  std::vector<int> will_del_block_ids;
  for (int i = static_cast<int>(will_del_block_idx.size()) - 1; i >= 0; i--) {
    int idx = will_del_block_idx[i];
    auto &block = persisted_blocks[idx];
    will_del_block_ids.push_back(block.id_);
    persisted_blocks.erase(persisted_blocks.begin() + idx);
  }

  std::vector<int> will_del_local_block_idx;
  auto &local_blocks = get_persist_block_metas(BlockType::SCALAR);
  for (size_t idx = 0; idx < local_blocks.size(); idx++) {
    auto &block = local_blocks[idx];
    if (block.contain_column(column_name)) {
      if (block.columns_.size() == 1) {
        will_del_local_block_idx.push_back(idx);
      }
    }
  }

  for (int idx = static_cast<int>(will_del_local_block_idx.size()) - 1;
       idx >= 0; idx--) {
    int local_idx = will_del_local_block_idx[idx];
    persist_stores_.erase(persist_stores_.begin() + local_idx);
  }

  if (!options_.enable_mmap_) {
    ailego::BufferManager::Instance().init(
        GlobalConfig::Instance().memory_limit_bytes(), 1);
  }

  // delete single column store file
  for (auto block_id : will_del_block_ids) {
    // delete forward store file
    std::string filepath = FileHelper::MakeForwardBlockPath(
        path_, meta()->id(), block_id, !options_.enable_mmap_);
    if (!FileHelper::RemoveFile(filepath)) {
      return Status::InternalError("remove ", filepath, " failed");
    } else {
      LOG_INFO("remove scalar store file: %s success", filepath.c_str());
    }
  }

  auto old_field_schema = collection_schema_->get_forward_field(column_name);
  if (old_field_schema->has_invert_index()) {
    auto s = reopen_invert_indexer();
    CHECK_RETURN_STATUS(s);

    s = invert_indexers_->remove_column_indexer(old_field_schema->name());
    CHECK_RETURN_STATUS(s);
    invert_indexers_->flush();

    auto &persist_blocks = segment_meta_->persisted_blocks();
    for (auto &block : persist_blocks) {
      if (block.type() == BlockType::SCALAR_INDEX) {
        block.del_column(old_field_schema->name());
        if (block.columns_.empty()) {
          // remove block meta from segment meta
          segment_meta_->remove_block(block.id_);
        }
        break;
      }
    }
  }

  // collection_schema append new field
  auto alter_status = collection_schema_->drop_field(column_name);
  CHECK_RETURN_STATUS(alter_status);

  fresh_persist_block_offset();

  fresh_persist_chunked_array();

  return Status::OK();
}

////////////////////////////////////////////////////////////////////////////////////
// Private methods implementation
////////////////////////////////////////////////////////////////////////////////////


void SegmentImpl::fresh_persist_block_offset() {
  // Clear
  for (size_t i = 0; i <= static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE);
       ++i) {
    if (std::holds_alternative<std::vector<int>>(persist_block_offsets_[i])) {
      std::get<std::vector<int>>(persist_block_offsets_[i]).clear();
    } else if (std::holds_alternative<
                   std::unordered_map<std::string, std::vector<int>>>(
                   persist_block_offsets_[i])) {
      std::get<std::unordered_map<std::string, std::vector<int>>>(
          persist_block_offsets_[i])
          .clear();
    }
    std::visit(
        [](auto &&arg) {
          using T = std::decay_t<decltype(arg)>;
          if constexpr (std::is_same_v<T, std::vector<BlockMeta>> ||
                        std::is_same_v<
                            T, std::unordered_map<std::string,
                                                  std::vector<BlockMeta>>>) {
            arg.clear();
          }
        },
        persist_block_metas_[i]);
  }

  for (const auto &block : segment_meta_->persisted_blocks()) {
    size_t type_index = static_cast<size_t>(block.type());
    if (block.type() == BlockType::SCALAR) {
      if (std::holds_alternative<std::vector<BlockMeta>>(
              persist_block_metas_[type_index])) {
        std::get<std::vector<BlockMeta>>(persist_block_metas_[type_index])
            .push_back(block);
      } else {
        persist_block_metas_[type_index] = std::vector<BlockMeta>{block};
        persist_block_offsets_[type_index] = std::vector<int>();
      }
    } else if (block.type() == BlockType::VECTOR_INDEX ||
               block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {
      if (block.columns().size() == 1) {
        auto column_name = block.columns()[0];
        if (std::holds_alternative<
                std::unordered_map<std::string, std::vector<BlockMeta>>>(
                persist_block_metas_[type_index])) {
          auto block_map =
              std::get<std::unordered_map<std::string, std::vector<BlockMeta>>>(
                  persist_block_metas_[type_index]);
          auto iter = block_map.find(column_name);
          if (iter != block_map.end()) {
            auto &block_metas = iter->second;
            block_metas.push_back(block);
          } else {
            block_map.insert(
                std::make_pair(column_name, std::vector<BlockMeta>{block}));
            auto block_offsets_map =
                std::get<std::unordered_map<std::string, std::vector<int>>>(
                    persist_block_offsets_[type_index]);
            block_offsets_map.insert(
                std::make_pair(column_name, std::vector<int>()));
          }

          std::get<std::unordered_map<std::string, std::vector<BlockMeta>>>(
              persist_block_metas_[type_index])[column_name]
              .push_back(block);
        } else {
          std::unordered_map<std::string, std::vector<BlockMeta>> new_map;
          new_map[column_name].push_back(block);
          persist_block_metas_[type_index] = std::move(new_map);
          persist_block_offsets_[type_index] =
              std::unordered_map<std::string, std::vector<int>>();
        }
      } else {
        LOG_ERROR("Add block meta: %s failed, block.columns.size != 1",
                  block.to_string().c_str());
      }
    }
  }

  calculate_block_offsets();
}

void SegmentImpl::fresh_persist_chunked_array() {
  if (options_.enable_mmap_ && options_.read_only_) {
    persist_chunk_arrays_.clear();
    chunk_offsets_.clear();
    col_idx_map_.clear();
    use_fetch_perf_ = false;

    std::vector<std::vector<std::shared_ptr<arrow::ChunkedArray>>> chunk_arrays;
    auto fields = collection_schema_->forward_field_names();
    fields.insert(fields.begin(), USER_ID);
    fields.insert(fields.begin(), GLOBAL_DOC_ID);
    chunk_arrays.resize(fields.size());
    persist_chunk_arrays_.resize(fields.size());

    for (size_t i = 0; i < fields.size(); ++i) {
      col_idx_map_[fields[i]] = i;
    }

    auto &block_metas = get_persist_block_metas(BlockType::SCALAR);
    if (block_metas.empty()) {
      return;
    }

    for (size_t i = 0; i < block_metas.size(); ++i) {
      auto &block_meta = block_metas[i];
      const auto table = persist_stores_[i]->get_table();
      for (size_t j = 0; j < fields.size(); ++j) {
        if (block_meta.contain_column(fields[j])) {
          auto chunked_array = table->GetColumnByName(fields[j]);
          if (chunked_array) {
            chunk_arrays[j].push_back(chunked_array);
          }
        }
      }
    }

    for (size_t i = 0; i < fields.size(); ++i) {
      std::vector<std::shared_ptr<arrow::Array>> all_chunks;
      for (const auto &arr : chunk_arrays[i]) {
        for (int j = 0; j < arr->num_chunks(); ++j) {
          all_chunks.push_back(arr->chunk(j));
        }
      }
      persist_chunk_arrays_[i] =
          std::make_shared<arrow::ChunkedArray>(all_chunks);
    }

    auto &first_chunked_array = persist_chunk_arrays_[0];
    chunk_offsets_.reserve(first_chunked_array->num_chunks() + 1);
    chunk_offsets_.push_back(0);

    for (int chunk_idx = 0; chunk_idx < first_chunked_array->num_chunks();
         ++chunk_idx) {
      chunk_offsets_.push_back(chunk_offsets_.back() +
                               first_chunked_array->chunk(chunk_idx)->length());
    }

    if (persist_chunk_arrays_.size() > 0 && chunk_offsets_.size() > 0) {
      use_fetch_perf_ = true;
    }

    LOG_INFO(
        "fresh_persist_chunked_array persist_chunk_arrays[%zu] "
        "chunk_offset[%zu]",
        persist_chunk_arrays_.size(), chunk_offsets_.size());
  }
}

void SegmentImpl::calculate_block_offsets() {
  for (size_t type_index = 0;
       type_index <= static_cast<size_t>(BlockType::VECTOR_INDEX_QUANTIZE);
       ++type_index) {
    auto &block_offsets = persist_block_offsets_[type_index];
    int current_offset = 0;

    // Visit the appropriate container based on the variant type
    std::visit(
        [&current_offset, &block_offsets](auto &&blocks) {
          using T = std::decay_t<decltype(blocks)>;

          if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {
            // For SCALAR type - simple vector
            auto &offset_vector = std::get<std::vector<int>>(block_offsets);
            offset_vector.clear();
            offset_vector.reserve(blocks.size());
            if (!blocks.empty()) {
              auto &filter_col_name = blocks[0].columns()[0];
              for (const auto &block : blocks) {
                if (!block.contain_column(filter_col_name)) continue;
                offset_vector.push_back(current_offset);
                current_offset += static_cast<int>(block.doc_count_);
              }
            }
          } else if constexpr (std::is_same_v<T, std::unordered_map<
                                                     std::string,
                                                     std::vector<BlockMeta>>>) {
            // For other types - map with column names
            auto &offset_map =
                std::get<std::unordered_map<std::string, std::vector<int>>>(
                    block_offsets);
            offset_map.clear();

            for (const auto &[column_name, block_list] : blocks) {
              auto &column_offsets = offset_map[column_name];
              column_offsets.reserve(block_list.size());
              int column_offset = 0;

              for (const auto &block : block_list) {
                column_offsets.push_back(column_offset);
                column_offset += static_cast<int>(block.doc_count_);
              }
            }
          }
        },
        persist_block_metas_[type_index]);
  }
}

int SegmentImpl::find_persist_block_id(BlockType type, int segment_doc_id,
                                       const std::string &col_name,
                                       int *out_offset_idx) const {
  size_t type_index = static_cast<size_t>(type);

  auto visitor = [segment_doc_id, col_name,
                  out_offset_idx](const auto &blocks) -> int {
    using T = std::decay_t<decltype(blocks)>;
    int current_offset = 0;

    if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {
      if (!blocks.empty()) {
        std::string filter_column = col_name;
        if (col_name.empty() || col_name == GLOBAL_DOC_ID ||
            col_name == USER_ID) {
          filter_column = blocks[0].columns()[0];
        }
        int offset_idx = -1;
        for (size_t block_idx = 0; block_idx < blocks.size(); block_idx++) {
          const auto &block = blocks[block_idx];
          if (!block.contain_column(filter_column)) {
            continue;
          }
          offset_idx++;
          if (segment_doc_id >= current_offset &&
              segment_doc_id <
                  current_offset + static_cast<int>(block.doc_count_)) {
            if (out_offset_idx) {
              *out_offset_idx = offset_idx;
            }
            return static_cast<int>(block_idx);
          }
          current_offset += static_cast<int>(block.doc_count_);
        }
      }
    } else if constexpr (std::is_same_v<
                             T, std::unordered_map<std::string,
                                                   std::vector<BlockMeta>>>) {
      for (const auto &[column_name, block_list] : blocks) {
        if (!column_name.empty() && column_name != col_name) {
          continue;
        }

        current_offset = 0;
        for (size_t block_idx = 0; block_idx < block_list.size(); block_idx++) {
          const auto &block = block_list[block_idx];
          if (segment_doc_id >= current_offset &&
              segment_doc_id <
                  current_offset + static_cast<int>(block.doc_count_)) {
            return static_cast<int>(block_idx);
          }
          current_offset += static_cast<int>(block.doc_count_);
        }
      }
    }

    return -1;
  };

  return std::visit(visitor, persist_block_metas_[type_index]);
}

const std::vector<int> &SegmentImpl::get_persist_block_offsets(
    BlockType type, const std::string &col_name) const {
  size_t type_index = static_cast<size_t>(type);

  auto visitor = [&col_name](const auto &offsets) -> const std::vector<int> & {
    using T = std::decay_t<decltype(offsets)>;

    static const std::vector<int> empty_offsets;

    if constexpr (std::is_same_v<T, std::vector<int>>) {
      return offsets;
    } else if constexpr (std::is_same_v<T,
                                        std::unordered_map<std::string,
                                                           std::vector<int>>>) {
      auto it = offsets.find(col_name);
      if (it != offsets.end()) {
        return it->second;
      }
    }

    return empty_offsets;
  };

  return std::visit(visitor, persist_block_offsets_[type_index]);
}

const std::vector<BlockMeta> &SegmentImpl::get_persist_block_metas(
    BlockType type, const std::string &col_name) const {
  size_t type_index = static_cast<size_t>(type);

  auto visitor =
      [&col_name](const auto &metas) -> const std::vector<BlockMeta> & {
    using T = std::decay_t<decltype(metas)>;

    static const std::vector<BlockMeta> empty_metas;

    if constexpr (std::is_same_v<T, std::vector<BlockMeta>>) {
      return metas;
    } else if constexpr (std::is_same_v<
                             T, std::unordered_map<std::string,
                                                   std::vector<BlockMeta>>>) {
      auto it = metas.find(col_name);
      if (it != metas.end()) {
        return it->second;
      }
    }

    return empty_metas;
  };

  return std::visit(visitor, persist_block_metas_[type_index]);
}

Status SegmentImpl::load_persist_scalar_blocks() {
  doc_ids_.reserve(segment_meta_->doc_count());
  for (const auto &block : segment_meta_->persisted_blocks()) {
    if (block.type() == BlockType::SCALAR) {
      auto forward_path = FileHelper::MakeForwardBlockPath(
          path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);

      BaseForwardStore::Ptr forward_store;
      if (options_.enable_mmap_) {
        forward_store = std::make_shared<MmapForwardStore>(forward_path);
      } else {
        forward_store = std::make_shared<BufferPoolForwardStore>(forward_path);
      }
      auto s = forward_store->Open();
      CHECK_RETURN_STATUS(s);
      persist_stores_.push_back(forward_store);

      if (!block.contain_column(GLOBAL_DOC_ID)) {
        continue;
      }
      auto rb_reader = forward_store->scan({GLOBAL_DOC_ID});
      while (true) {
        std::shared_ptr<arrow::RecordBatch> batch;
        auto status = rb_reader->ReadNext(&batch);
        if (!status.ok()) {
          LOG_ERROR("Read batch failed: %s", status.message().c_str());
          return Status::InternalError(status.message());
        }

        if (batch == nullptr) {
          break;
        }

        auto uint64_array =
            std::dynamic_pointer_cast<arrow::UInt64Array>(batch->column(0));
        if (!uint64_array) {
          LOG_ERROR("Failed to cast column to UInt64Array");
          return Status::InternalError("Array type mismatch");
        }
        auto *values = uint64_array->raw_values();
        doc_ids_.insert(doc_ids_.end(), values,
                        values + uint64_array->length());
      }
    }
  }

  return Status::OK();
}

Status SegmentImpl::load_scalar_index_blocks(bool create) {
  std::vector<FieldSchema> fields;
  std::vector<std::string> field_names;
  for (const auto &field : collection_schema_->forward_fields()) {
    if (field->index_type() == IndexType::INVERT) {
      fields.push_back(*field);
      field_names.push_back(field->name());
    }
  }

  if (fields.empty()) {
    LOG_INFO("No scalar index found");
    return Status::OK();
  }

  if (create) {
    auto block_id = allocate_block_id();
    auto invert_path = FileHelper::MakeInvertIndexPath(path_, id(), block_id);
    auto collection_name = collection_schema_->name();
    invert_indexers_ = InvertedIndexer::CreateAndOpen(
        collection_name, invert_path, true, fields, options_.read_only_);
    if (!invert_indexers_) {
      LOG_ERROR("Failed to open scalar indexer");
      return Status::InternalError("Failed to open scalar indexer");
    }

    // scalar index block
    segment_meta_->add_persisted_block(
        BlockMeta{block_id, BlockType::SCALAR_INDEX, 0, 0, 0, field_names});

    return Status::OK();
  } else {
    for (const auto &block : segment_meta_->persisted_blocks()) {
      if (block.type() == BlockType::SCALAR_INDEX) {
        auto block_id = block.id();
        auto invert_path =
            FileHelper::MakeInvertIndexPath(path_, id(), block_id);
        auto collection_name = collection_schema_->name();
        invert_indexers_ = InvertedIndexer::CreateAndOpen(
            collection_name, invert_path, false, fields, options_.read_only_);
        if (!invert_indexers_) {
          LOG_ERROR("Failed to open scalar indexer");
          return Status::InternalError("Failed to open scalar indexer");
        }
        return Status::OK();
      }
    }

    if (invert_indexers_ == nullptr) {
      LOG_ERROR("No scalar index found");
      return Status::NotFound("No scalar index found");
    }
  }
  return Status::OK();
}

Status SegmentImpl::load_vector_index_blocks() {
  for (const auto &block : segment_meta_->persisted_blocks()) {
    if (block.type() == BlockType::VECTOR_INDEX ||
        block.type() == BlockType::VECTOR_INDEX_QUANTIZE) {
      // vector block only contained 1 column
      auto column = block.columns()[0];

      FieldSchema new_field_params =
          *collection_schema_->get_vector_field(column);

      auto vector_index_params = std::dynamic_pointer_cast<VectorIndexParams>(
          new_field_params.index_params());
      if (block.type_ == BlockType::VECTOR_INDEX) {
        if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED ||
            !segment_meta_->vector_indexed(column)) {
          new_field_params.set_index_params(
              MakeDefaultVectorIndexParams(vector_index_params->metric_type()));
        }
      } else {
        if (!segment_meta_->vector_indexed(column)) {
          new_field_params.set_index_params(MakeDefaultQuantVectorIndexParams(
              vector_index_params->metric_type(),
              vector_index_params->quantize_type()));
        }
      }

      std::string index_path;
      if (block.type_ == BlockType::VECTOR_INDEX) {
        index_path = FileHelper::MakeVectorIndexPath(
            path_, column, segment_meta_->id(), block.id_);

      } else {
        index_path = FileHelper::MakeQuantizeVectorIndexPath(
            path_, column, segment_meta_->id(), block.id_);
      }

      auto vector_indexer =
          std::make_shared<VectorColumnIndexer>(index_path, new_field_params);
      auto s = vector_indexer->Open(vector_column_params::ReadOptions{
          options_.enable_mmap_, false, true});
      CHECK_RETURN_STATUS(s);

      if (block.type_ == BlockType::VECTOR_INDEX) {
        auto it = vector_indexers_.find(column);
        if (it == vector_indexers_.end()) {
          std::vector<VectorColumnIndexer::Ptr> vector_indexers;
          vector_indexers.push_back(vector_indexer);
          vector_indexers_.emplace(column, std::move(vector_indexers));
        } else {
          it->second.push_back(vector_indexer);
        }
      } else {
        auto it = quant_vector_indexers_.find(column);
        if (it == quant_vector_indexers_.end()) {
          std::vector<VectorColumnIndexer::Ptr> vector_indexers;
          vector_indexers.push_back(vector_indexer);
          quant_vector_indexers_.emplace(column, std::move(vector_indexers));
        } else {
          it->second.push_back(vector_indexer);
        }
      }
    }
  }
  return Status::OK();
}

VectorColumnIndexer::Ptr SegmentImpl::create_vector_indexer(
    const std::string &field_name, const FieldSchema &field, BlockID block_id,
    bool is_quantized) {
  std::string index_file_path;
  if (is_quantized) {
    index_file_path = FileHelper::MakeQuantizeVectorIndexPath(
        path_, field_name, segment_meta_->id(), block_id);
    quant_memory_vector_block_ids_[field_name] = block_id;
  } else {
    index_file_path = FileHelper::MakeVectorIndexPath(
        path_, field_name, segment_meta_->id(), block_id);
    memory_vector_block_ids_[field_name] = block_id;
  }

  if (FileHelper::FileExists(index_file_path)) {
    LOG_WARN(
        "Index file[%s] already exists (possible crash residue); cleaning and "
        "overwriting.",
        index_file_path.c_str());
    FileHelper::RemoveFile(index_file_path);
  }

  auto vector_indexer =
      std::make_shared<VectorColumnIndexer>(index_file_path, field);
  vector_column_params::ReadOptions options{true, true};
  auto status = vector_indexer->Open(options);
  if (!status.ok()) {
    LOG_ERROR("Failed to open vector indexer for field: %s, err: %s",
              field.to_string().c_str(), status.message().c_str());
    return nullptr;
  }
  return vector_indexer;
}

Status SegmentImpl::init_memory_components() {
  // init memory block id
  auto &mem_block = segment_meta_->writing_forward_block().value();

  // create and open memory forward block
  auto mem_path = FileHelper::MakeForwardBlockPath(seg_path_, mem_block.id_,
                                                   !options_.enable_mmap_);
  if (FileHelper::FileExists(mem_path)) {
    LOG_WARN(
        "ForwardBlock file[%s] already exists (possible crash residue); "
        "cleaning and overwriting.",
        mem_path.c_str());
    FileHelper::RemoveFile(mem_path);
  }
  memory_store_ = std::make_shared<MemForwardStore>(
      collection_schema_, mem_path,
      options_.enable_mmap_ ? FileFormat::IPC : FileFormat::PARQUET,
      options_.max_buffer_size_);
  auto s = memory_store_->Open();
  CHECK_RETURN_STATUS(s);

  // create and open memory vector indexer
  for (const auto &field : collection_schema_->vector_fields()) {
    auto index_params =
        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());

    if (index_params->quantize_type() == QuantizeType::UNDEFINED) {
      // create normal vector indexer
      FieldSchema normal_field(*field);
      normal_field.set_index_params(
          MakeDefaultVectorIndexParams(index_params->metric_type()));
      auto block_id = allocate_block_id();
      auto vector_indexer =
          create_vector_indexer(field->name(), normal_field, block_id);
      if (!vector_indexer) {
        return Status::InternalError("Create vector column indexer failed: ",
                                     field->name());
      }
      memory_vector_indexers_.insert({field->name(), vector_indexer});
    } else {
      // first create normal vector indexer
      FieldSchema normal_field(*field);
      normal_field.set_index_params(
          MakeDefaultVectorIndexParams(index_params->metric_type()));
      auto block_id = allocate_block_id();
      auto vector_indexer =
          create_vector_indexer(field->name(), normal_field, block_id);
      if (!vector_indexer) {
        return Status::InternalError("Create vector column indexer failed: ",
                                     field->name());
      }
      memory_vector_indexers_.insert({field->name(), vector_indexer});

      // second create quantize vector indexer
      block_id = allocate_block_id();
      FieldSchema normal_quant_field(*field);
      normal_quant_field.set_index_params(MakeDefaultQuantVectorIndexParams(
          index_params->metric_type(), index_params->quantize_type()));
      auto quant_vector_indexer = create_vector_indexer(
          field->name(), normal_quant_field, block_id, true);

      if (!quant_vector_indexer) {
        return Status::InternalError("Create vector column indexer failed: ",
                                     field->name());
      }
      quant_memory_vector_indexers_.insert(
          {field->name(), quant_vector_indexer});
    }
  }

  return Status::OK();
}

Status SegmentImpl::recover() {
  // recover mem block meta
  auto &mem_block = segment_meta_->writing_forward_block().value();
  doc_id_allocator_.store(mem_block.min_doc_id());

  std::string wal_file_path =
      FileHelper::MakeWalPath(path_, segment_meta_->id(), mem_block.id_);
  if (!std::filesystem::exists(wal_file_path)) {
    LOG_INFO("Recover wal file not exists just return. path: %s",
             wal_file_path.c_str());
    return Status::OK();
  }

  WalFilePtr recover_wal_file;
  WalOptions wal_option;
  wal_option.create_new = false;
  if (WalFile::CreateAndOpen(wal_file_path, wal_option, &recover_wal_file) !=
      0) {
    LOG_WARN("Recover wal file failed. path: %s", wal_file_path.c_str());
    return Status::OK();
  }
  AILEGO_DEFER([&]() { recover_wal_file->close(); });

  std::array<uint64_t, static_cast<size_t>(Operator::DELETE) + 1>
      recovered_doc_count{};
  uint64_t total_recovered_doc_count{0};

  int ret = recover_wal_file->prepare_for_read();
  if (ret != 0) {
    LOG_ERROR("Recover wal file failed. path: %s", wal_file_path.c_str());
    return Status::InternalError("Failed to prepare wal file: ", wal_file_path,
                                 " for read");
  }

  LOG_INFO("Recover start read wal [%s]", wal_file_path.c_str());

  std::lock_guard<std::mutex> lock(seg_mtx_);

  while (true) {
    std::string buf = recover_wal_file->next();
    if (buf.empty()) {
      LOG_INFO("Recover read wal finished");
      break;
    }
    total_recovered_doc_count++;
    auto doc = Doc::deserialize(reinterpret_cast<const uint8_t *>(buf.data()),
                                buf.size());
    if (doc == nullptr) {
      LOG_ERROR("Recover wal failed. doc deserialize failed at %zu",
                (size_t)total_recovered_doc_count);
      continue;
    }

    Status status;
    switch (doc->get_operator()) {
      case Operator::INSERT: {
        internal_insert(*doc);
        break;
      }
      case Operator::UPDATE: {
        internal_update(*doc);
        break;
      }
      case Operator::UPSERT: {
        internal_upsert(*doc);
        break;
      }
      case Operator::DELETE: {
        internal_delete(*doc);
        break;
      }
      default:
        LOG_ERROR("Unknown operator type: %d", (int)doc->get_operator());
        break;
    }

    if (!status.ok()) {
      LOG_ERROR("Recover wal failed. Operation %d failed at %zu: %s",
                static_cast<int>(doc->get_operator()),
                (size_t)total_recovered_doc_count, status.message().c_str());
      continue;
    }

    recovered_doc_count[static_cast<size_t>(doc->get_operator())]++;
  }

  const auto added_docs = recovered_doc_count[0] +  // INSERT
                          recovered_doc_count[1] +  // UPSERT
                          recovered_doc_count[2];   // UPDATE
  mem_block.max_doc_id_ += added_docs;

  LOG_INFO(
      "Recover from wal finished. total_recovered_doc_count[%zu] insert[%zu] "
      "upsert[%zu] update[%zu] delete[%zu] path[%s]",
      (size_t)total_recovered_doc_count,
      (size_t)recovered_doc_count[0],  // INSERT
      (size_t)recovered_doc_count[1],  // UPSERT
      (size_t)recovered_doc_count[2],  // UPDATE
      (size_t)recovered_doc_count[3],  // DELETE
      wal_file_path.c_str());

  return Status::OK();
}

Status SegmentImpl::open_wal_file() {
  auto mem_block = segment_meta_->writing_forward_block().value();
  std::string wal_file_path =
      FileHelper::MakeWalPath(path_, segment_meta_->id(), mem_block.id_);
  WalOptions wal_option;
  if (std::filesystem::exists(wal_file_path)) {
    wal_option.create_new = false;
  } else {
    wal_option.create_new = true;
  }

  if (WalFile::CreateAndOpen(wal_file_path, wal_option, &wal_file_) != 0) {
    LOG_ERROR("Recover wal file failed. path: %s", wal_file_path.c_str());

    return Status::OK();
  }

  LOG_INFO("Open wal file succ. path: %s", wal_file_path.c_str());
  return Status::OK();
}

Status SegmentImpl::append_wal(const Doc &doc) {
  std::vector<uint8_t> buf = doc.serialize();

  if (!wal_file_) {
    auto s = open_wal_file();
    CHECK_RETURN_STATUS(s);
  }

  auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));
  if (ret != 0) {
    LOG_ERROR("Append wal failed. ret: %d", ret);
    return Status::InternalError("Failed to append wal");
  }

  return Status::OK();
}

Status SegmentImpl::finish_memory_components() {
  auto block = segment_meta_->writing_forward_block().value();

  // close for loading persist block
  auto s = memory_store_->close();
  CHECK_RETURN_STATUS(s);
  memory_store_.reset();

  // load forward store
  auto persist_forward_store_path = FileHelper::MakeForwardBlockPath(
      path_, segment_meta_->id(), block.id_, !options_.enable_mmap_);

  BaseForwardStore::Ptr persist_store;
  if (options_.enable_mmap_) {
    persist_store =
        std::make_shared<MmapForwardStore>(persist_forward_store_path);
  } else {
    persist_store =
        std::make_shared<BufferPoolForwardStore>(persist_forward_store_path);
  }
  s = persist_store->Open();
  CHECK_RETURN_STATUS(s);
  persist_stores_.push_back(persist_store);

  BlockMeta b{block.id_,         block.type_,      block.min_doc_id_,
              block.max_doc_id_, block.doc_count_, block.columns_};
  segment_meta_->add_persisted_block(b);

  // remove indexer from memory to persist
  for (auto &[column_name, indexer] : memory_vector_indexers_) {
    auto block_id = memory_vector_block_ids_[column_name];
    BlockMeta vb =
        BlockMeta{block_id,          BlockType::VECTOR_INDEX, block.min_doc_id_,
                  block.max_doc_id_, block.doc_count_,        {column_name}};
    auto it = vector_indexers_.find(column_name);
    if (it == vector_indexers_.end()) {
      std::vector<VectorColumnIndexer::Ptr> vector_indexers{indexer};
      vector_indexers_.emplace(column_name, std::move(vector_indexers));
    } else {
      it->second.push_back(indexer);
    }
    segment_meta_->add_persisted_block(vb);
  }

  // remove quant indexer from memory to persist
  for (auto &[column_name, indexer] : quant_memory_vector_indexers_) {
    auto block_id = quant_memory_vector_block_ids_[column_name];
    BlockMeta block_meta(block_id, BlockType::VECTOR_INDEX_QUANTIZE,
                         block.min_doc_id_, block.max_doc_id_, block.doc_count_,
                         {column_name});

    auto it = quant_vector_indexers_.find(column_name);
    if (it == quant_vector_indexers_.end()) {
      std::vector<VectorColumnIndexer::Ptr> vector_indexers;
      vector_indexers.push_back(indexer);
      quant_vector_indexers_.emplace(column_name, std::move(vector_indexers));
    } else {
      it->second.push_back(indexer);
    }
    segment_meta_->add_persisted_block(block_meta);
  }

  // clear memory vector indexers
  memory_vector_indexers_.clear();
  quant_memory_vector_indexers_.clear();
  memory_vector_block_ids_.clear();
  quant_memory_vector_block_ids_.clear();

  fresh_persist_block_offset();
  return Status::OK();
}

Status SegmentImpl::update_version(uint32_t delete_snapshot_path_suffix) {
  if (version_manager_) {
    if (delete_snapshot_path_suffix != UINT32_MAX) {
      version_manager_->set_delete_snapshot_path_suffix(
          delete_snapshot_path_suffix);
    }
    auto s = version_manager_->reset_writing_segment_meta(segment_meta_);
    CHECK_RETURN_STATUS(s);
    s = version_manager_->flush();
    CHECK_RETURN_STATUS(s);
  }
  return Status::OK();
}

BlockID SegmentImpl::allocate_block_id() {
  return block_id_allocator_.fetch_add(1);
}

Result<uint64_t> SegmentImpl::get_global_doc_id(uint32_t local_id) const {
  std::lock_guard lock(seg_mtx_);
  if (local_id >= doc_ids_.size()) {
    return tl::make_unexpected(
        Status::InvalidArgument("local_id out of range"));
  }
  // global doc_id
  return doc_ids_[local_id];
}


////////////////////////////////////////////////////////////////////////////////////
// Segment factory methods implementation
////////////////////////////////////////////////////////////////////////////////////

Result<Segment::Ptr> Segment::CreateAndOpen(
    const std::string &path, const CollectionSchema &schema,
    SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,
    const DeleteStore::Ptr &delete_store,
    const VersionManager::Ptr &version_manager, const SegmentOptions &options) {
  auto segment = std::shared_ptr<SegmentImpl>(
      new SegmentImpl(path, schema, SegmentMeta(segment_id), id_map,
                      delete_store, version_manager));

  auto segment_path = FileHelper::MakeSegmentPath(path, segment_id);
  // check or create path
  if (FileHelper::DirectoryExists(segment_path)) {
    return tl::make_unexpected(Status::InternalError(
        "Segment path is already exists: ", segment_path));
  } else {
    if (!FileHelper::CreateDirectory(segment_path)) {
      return tl::make_unexpected(Status::InternalError(
          "Create segment directory failed: ", segment_path));
    }
  }

  auto s = segment->Create(options, min_doc_id);
  CHECK_RETURN_STATUS_EXPECTED(s);

  return segment;
}

Result<Segment::Ptr> Segment::Open(const std::string &path,
                                   const CollectionSchema &schema,
                                   const SegmentMeta &segment_meta,
                                   const IDMap::Ptr &id_map,
                                   const DeleteStore::Ptr &delete_store,
                                   const VersionManager::Ptr &version_manager,
                                   const SegmentOptions &options) {
  auto segment = std::shared_ptr<SegmentImpl>(new SegmentImpl(
      path, schema, segment_meta, id_map, delete_store, version_manager));

  auto segment_path = FileHelper::MakeSegmentPath(path, segment_meta.id());
  // check path
  if (!FileHelper::DirectoryExists(segment_path)) {
    return tl::make_unexpected(
        Status::InternalError("Segment path is not exist: ", segment_path));
  }

  auto s = segment->Open(options);
  CHECK_RETURN_STATUS_EXPECTED(s);

  return segment;
}

}  // namespace zvec

================================================
FILE: src/db/index/segment/segment.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include <arrow/record_batch.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/db/doc.h>
#include <zvec/db/index_params.h>
#include <zvec/db/options.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include "db/index/column/inverted_column/inverted_column_indexer.h"
#include "db/index/column/inverted_column/inverted_indexer.h"
#include "db/index/column/vector_column/combined_vector_column_indexer.h"
#include "db/index/column/vector_column/vector_column_indexer.h"
#include "db/index/common/delete_store.h"
#include "db/index/common/id_map.h"
#include "db/index/common/meta.h"
#include "db/index/common/version_manager.h"
#include "db/index/storage/base_forward_store.h"

namespace zvec {

class CombinedRecordBatchReader;

class Segment {
 public:
  using Ptr = std::shared_ptr<Segment>;

  static Result<Ptr> CreateAndOpen(const std::string &path,
                                   const CollectionSchema &schema,
                                   SegmentID segment_id, uint64_t min_doc_id,
                                   const IDMap::Ptr &id_map,
                                   const DeleteStore::Ptr &delete_store,
                                   const VersionManager::Ptr &version_manager,
                                   const SegmentOptions &options);

  static Result<Ptr> Open(const std::string &path,
                          const CollectionSchema &schema,
                          const SegmentMeta &segment_meta,
                          const IDMap::Ptr &id_map,
                          const DeleteStore::Ptr &delete_store,
                          const VersionManager::Ptr &version_manager,
                          const SegmentOptions &options);

  virtual SegmentID id() const = 0;

  virtual SegmentMeta::Ptr meta() const = 0;

  virtual uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) = 0;

  // for collection
  virtual Status add_column(FieldSchema::Ptr column_schema,
                            const std::string &expression,
                            const AddColumnOptions &options) = 0;

  virtual Status alter_column(const std::string &column_name,
                              const FieldSchema::Ptr &new_column_schema,
                              const AlterColumnOptions &options) = 0;

  virtual Status drop_column(const std::string &column_name) = 0;

  virtual Status create_all_vector_index(
      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) = 0;

  // defined in segment.h cause it needs to access block_id generator
  virtual Status create_vector_index(
      const std::string &column, const IndexParams::Ptr &index_params,
      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) = 0;

  virtual Status drop_vector_index(
      const std::string &column, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers) = 0;

  virtual Status reload_vector_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &vector_indexers,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &quant_vector_indexers = {}) = 0;

  virtual bool vector_index_ready(
      const std::string &column,
      const IndexParams::Ptr &index_params) const = 0;

  virtual bool all_vector_index_ready() const = 0;

  // defined in segment.h cause it needs to access block_id generator
  virtual Status create_scalar_index(
      const std::vector<std::string> &columns,
      const IndexParams::Ptr &index_params, SegmentMeta::Ptr *new_segment_meta,
      InvertedIndexer::Ptr *new_scalar_indexer) = 0;

  // defined in segment.h cause it needs to access block_id generator
  virtual Status drop_scalar_index(
      const std::vector<std::string> &columns,
      SegmentMeta::Ptr *new_segment_meta,
      InvertedIndexer::Ptr *new_scalar_indexer) = 0;

  virtual Status reload_scalar_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
      const InvertedIndexer::Ptr &scalar_indexer) = 0;

  virtual Status Insert(Doc &doc) = 0;

  virtual Status Upsert(Doc &doc) = 0;

  virtual Status Update(Doc &doc) = 0;

  virtual Status Delete(const std::string &pk) = 0;

  virtual Status Delete(uint64_t g_doc_id) = 0;

  virtual Doc::Ptr Fetch(uint64_t g_doc_id) = 0;

  // for sqlengine
  virtual TablePtr fetch(const std::vector<std::string> &columns,
                         const std::vector<int> &indices) const = 0;

  virtual ExecBatchPtr fetch(const std::vector<std::string> &columns,
                             int index) const = 0;

  // caller should hold segment shared_ptr for segment handle the indexer's
  // lifetime
  virtual RecordBatchReaderPtr scan(
      const std::vector<std::string> &columns) const = 0;

  // caller hold segment shared_ptr for segment handle the indexer's lifetime
  virtual CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(
      const std::string &field_name) const = 0;

  // caller hold segment shared_ptr for segment handle the indexer's lifetime
  virtual CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(
      const std::string &field_name) const = 0;

  // caller hold segment shared_ptr for segment handle the indexer's lifetime
  virtual std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(
      const std::string &field_name) const = 0;

  virtual std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(
      const std::string &field_name) const = 0;

  // caller hold segment shared_ptr for segment handle the indexer's lifetime
  virtual InvertedColumnIndexer::Ptr get_scalar_indexer(
      const std::string &field_name) const = 0;

  virtual const IndexFilter::Ptr get_filter() = 0;

  // for others
  virtual Status flush() = 0;
  virtual Status dump() = 0;

  // only mark need_destroyed
  virtual Status destroy() = 0;
};

}  // namespace zvec

================================================
FILE: src/db/index/segment/segment_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "segment_helper.h"
#include <cstdint>
#include <functional>
#include <memory>
#include <arrow/compute/api_vector.h>
#include <arrow/type_fwd.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"
#include "db/common/file_helper.h"
#include "db/common/global_resource.h"
#include "db/common/typedef.h"
#include "db/index/column/inverted_column/inverted_indexer.h"
#include "db/index/column/vector_column/vector_column_indexer.h"
#include "db/index/common/index_filter.h"
#include "db/index/common/meta.h"
#include "db/index/storage/forward_writer.h"
#include "roaring.hh"

namespace zvec {

Status SegmentHelper::Execute(SegmentTask::Ptr &task) {
  auto &task_info = task->task_info();
  Status s;
  if (std::holds_alternative<CompactTask>(task_info)) {
    auto &compact_task = std::get<CompactTask>(task_info);
    s = ExecuteCompactTask(compact_task);
  } else if (std::holds_alternative<CreateVectorIndexTask>(task_info)) {
    auto &create_index_task = std::get<CreateVectorIndexTask>(task_info);
    s = ExecuteCreateVectorIndexTask(create_index_task);
  } else if (std::holds_alternative<CreateScalarIndexTask>(task_info)) {
    auto &create_index_task = std::get<CreateScalarIndexTask>(task_info);
    s = ExecuteCreateScalarIndexTask(create_index_task);
  } else if (std::holds_alternative<DropVectorIndexTask>(task_info)) {
    auto &drop_index_task = std::get<DropVectorIndexTask>(task_info);
    s = ExecuteDropVectorIndexTask(drop_index_task);
  } else if (std::holds_alternative<DropScalarIndexTask>(task_info)) {
    auto &drop_index_task = std::get<DropScalarIndexTask>(task_info);
    s = ExecuteDropScalarIndexTask(drop_index_task);
  } else {
    return Status::InvalidArgument("Unknown task type");
  }
  return s;
}

class RowIdFilter : public IndexFilter {
 public:
  explicit RowIdFilter(roaring::Roaring &&delete_row_id_bitmap)
      : delete_row_id_bitmap_(delete_row_id_bitmap) {}

  bool is_filtered(uint64_t id) const override {
    return delete_row_id_bitmap_.contains(id);
  }

 private:
  roaring::Roaring delete_row_id_bitmap_;
};

Status SegmentHelper::ExecuteCompactTask(CompactTask &task) {
  // input
  auto collection_path = task.collection_path_;
  auto schema = task.schema_;
  auto input_segments = task.input_segments_;
  auto filter = task.filter_;
  auto output_segment_id = task.output_segment_id_;

  auto columns = schema->forward_field_names();

  // make segment path
  auto output_segment_path =
      FileHelper::MakeTempSegmentPath(collection_path, output_segment_id);
  if (!FileHelper::CreateDirectory(output_segment_path)) {
    LOG_ERROR("Create directory failed: %s", output_segment_path.c_str());
    return Status::InternalError("Create directory failed: %s",
                                 output_segment_path.c_str());
  }

  std::function<BlockID()> block_id_generator =
      [block_id = BlockID{0}]() mutable { return block_id++; };

  // iterate every doc, build forward and invert indexer
  roaring::Roaring delete_row_id_bitmap;
  uint64_t min_doc_id{std::numeric_limits<uint64_t>::max()};
  uint64_t max_doc_id{0};
  uint32_t doc_count{0};
  std::vector<BlockMeta> block_metas;
  Status s = ReduceScalar(schema, input_segments, output_segment_path, columns,
                          filter, task.forward_use_parquet_, block_id_generator,
                          &delete_row_id_bitmap, &block_metas, &min_doc_id,
                          &max_doc_id, &doc_count);
  CHECK_RETURN_STATUS(s);

  if (doc_count == 0) {
    FileHelper::RemoveDirectory(output_segment_path);
    return Status::OK();
  }

  std::shared_ptr<RowIdFilter> row_id_filter =
      std::make_shared<RowIdFilter>(std::move(delete_row_id_bitmap));

  s = ReduceVectorIndex(schema, input_segments, output_segment_path,
                        row_id_filter, block_id_generator, min_doc_id,
                        max_doc_id, doc_count, task.concurrency_, &block_metas);
  CHECK_RETURN_STATUS(s);

  LOG_INFO("Compacted vector index");

  auto new_segment_meta = std::make_shared<SegmentMeta>();
  new_segment_meta->set_id(task.output_segment_id_);
  new_segment_meta->set_persisted_blocks(block_metas);
  std::set<std::string> indexed_vector_fields;
  for (auto &field : schema->vector_fields()) {
    indexed_vector_fields.emplace(field->name());
  }
  new_segment_meta->set_indexed_vector_fields(indexed_vector_fields);
  task.output_segment_meta_ = new_segment_meta;

  return Status::OK();
}

Status SegmentHelper::ReduceScalar(
    const CollectionSchema::Ptr schema,
    const std::vector<Segment::Ptr> &input_segments,
    const std::string &output_segment_path,
    const std::vector<std::string> &columns, const IndexFilter::Ptr &filter,
    bool forward_use_parquet, std::function<BlockID()> &block_id_generator,
    roaring::Roaring *delete_row_id_bitmap,
    std::vector<BlockMeta> *output_block_metas, uint64_t *min_doc_id,
    uint64_t *max_doc_id, uint32_t *doc_count) {
  // forward
  auto forward_block_id = block_id_generator();
  auto forward_path = FileHelper::MakeForwardBlockPath(
      output_segment_path, forward_block_id, forward_use_parquet);

  std::shared_ptr<ForwardWriter> forward_writer;
  if (forward_use_parquet) {
    forward_writer = ForwardWriter::CreateParquetWriter(forward_path);
  } else {
    forward_writer = ForwardWriter::CreateArrowIPCWriter(forward_path);
  }

  // invert index
  auto all_fields = schema->fields();
  std::vector<FieldSchema> invert_fields;
  std::vector<std::string> invert_field_names;
  for (auto &field : all_fields) {
    if (!field->is_vector_field()) {
      if (field->index_params() &&
          field->index_params()->type() == IndexType::INVERT) {
        invert_fields.push_back(*field);
        invert_field_names.push_back(field->name());
      }
    }
  }
  InvertedIndexer::Ptr invert_indexer;
  BlockID invert_block_id{0};
  if (invert_fields.size() > 0) {
    invert_block_id = block_id_generator();
    auto invert_path =
        FileHelper::MakeInvertIndexPath(output_segment_path, invert_block_id);
    invert_indexer = InvertedIndexer::CreateAndOpen(schema->name(), invert_path,
                                                    true, invert_fields, false);
    if (invert_indexer == nullptr) {
      return Status::InternalError("Open invert indexer failed");
    }
  }

  uint32_t row_id_offset{0U};
  *doc_count = 0;

  std::vector<std::string> all_reduce_columns{GLOBAL_DOC_ID, USER_ID};
  for (auto &column : columns) {
    all_reduce_columns.push_back(column);
  }

  for (auto &segment : input_segments) {
    auto reader = segment->scan(all_reduce_columns);
    if (reader == nullptr) {
      return Status::InternalError("scan segment failed");
    }

    while (true) {
      auto batch = reader->Next();
      if (!batch.ok()) {
        return Status::InternalError("reader next failed: ",
                                     batch.status().message());
      }

      auto batch_value = batch.ValueOrDie();

      if (!batch_value) {
        break;
      }

      if (batch_value->num_rows() == 0) continue;

      std::shared_ptr<arrow::RecordBatch> filtered_batch;
      auto as =
          FilterRecordBatch(batch_value, filter, row_id_offset, &filtered_batch,
                            delete_row_id_bitmap, min_doc_id, max_doc_id);
      if (!as.ok()) {
        return Status::InternalError("filter record batch failed: ",
                                     as.message());
      }

      row_id_offset += batch_value->num_rows();

      if (!filtered_batch || filtered_batch->num_rows() == 0) {
        continue;
      }

      // forward
      as = forward_writer->insert_batch(filtered_batch);
      if (!as.ok()) {
        return Status::InternalError("writer insert failed: ", as.message());
      }

      // invert index
      if (invert_indexer) {
        auto s = ReduceScalarIndex(invert_indexer, filtered_batch, *doc_count);
        CHECK_RETURN_STATUS(s);
      }

      *doc_count += filtered_batch->num_rows();
    }
  }

  if (*doc_count == 0) {
    // no docs
    return Status::OK();
  }

  // flush forward
  auto as = forward_writer->finalize();
  if (!as.ok()) {
    return Status::InternalError("writer finalize failed: ", as.message());
  }

  BlockMeta forward_meta;
  forward_meta.set_id(forward_block_id);
  forward_meta.set_type(BlockType::SCALAR);
  forward_meta.set_min_doc_id(*min_doc_id);
  forward_meta.set_max_doc_id(*max_doc_id);
  forward_meta.set_doc_count(*doc_count);
  forward_meta.set_columns(all_reduce_columns);

  output_block_metas->push_back(forward_meta);

  if (invert_indexer) {
    auto s = invert_indexer->flush();
    CHECK_RETURN_STATUS(s);

    s = invert_indexer->seal();
    CHECK_RETURN_STATUS(s);

    BlockMeta meta;
    meta.set_id(invert_block_id);
    meta.set_type(BlockType::SCALAR_INDEX);

    output_block_metas->push_back(meta);
  }

  LOG_INFO("Compacted scalar and scalar index");

  return Status::OK();
}

Status SegmentHelper::ReduceScalarIndex(
    InvertedIndexer::Ptr invert_indexer,
    const std::shared_ptr<arrow::RecordBatch> &batch, uint32_t doc_id_offset) {
  auto a_schema = batch->schema();
  int num_columns = batch->num_columns();

  for (int i = 0; i < num_columns; ++i) {
    auto field = a_schema->field(i);
    auto column_name = field->name();

    auto indexer = (*invert_indexer)[column_name];
    if (!indexer) {
      continue;
    }

    auto array = batch->column(i);
    auto type_id = field->type()->id();

    Status s;

    switch (type_id) {
      case arrow::Type::BOOL: {
        auto typed_array = std::static_pointer_cast<arrow::BooleanArray>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            bool value = typed_array->Value(j);
            s = indexer->insert(j + doc_id_offset, value);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::INT32: {
        auto typed_array = std::static_pointer_cast<arrow::Int32Array>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            int32_t value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::INT64: {
        auto typed_array = std::static_pointer_cast<arrow::Int64Array>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            int64_t value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::UINT32: {
        auto typed_array = std::static_pointer_cast<arrow::UInt32Array>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            uint32_t value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::UINT64: {
        auto typed_array = std::static_pointer_cast<arrow::UInt64Array>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            uint64_t value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::FLOAT: {
        auto typed_array = std::static_pointer_cast<arrow::FloatArray>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            float value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::DOUBLE: {
        auto typed_array = std::static_pointer_cast<arrow::DoubleArray>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            double value = typed_array->Value(j);
            std::string value_str(reinterpret_cast<const char *>(&value),
                                  sizeof(value));
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::STRING: {
        auto typed_array = std::static_pointer_cast<arrow::StringArray>(array);
        for (int64_t j = 0; j < typed_array->length(); ++j) {
          if (!typed_array->IsNull(j)) {
            std::string value_str = typed_array->GetString(j);
            s = indexer->insert(j + doc_id_offset, value_str);
            CHECK_RETURN_STATUS(s);
          } else {
            s = indexer->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
          }
        }
        break;
      }
      case arrow::Type::LIST: {
        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);
        auto value_array = list_array->values();
        auto value_type_id = value_array->type()->id();

        auto offset_array = list_array->offsets();
        auto typed_offsets =
            std::static_pointer_cast<arrow::Int32Array>(offset_array);

        for (int64_t j = 0; j < list_array->length(); ++j) {
          if (list_array->IsNull(j)) {
            s = (*invert_indexer)[column_name]->insert_null(j + doc_id_offset);
            CHECK_RETURN_STATUS(s);
            continue;
          }

          int32_t start_offset = typed_offsets->Value(j);
          int32_t end_offset = typed_offsets->Value(j + 1);

          switch (value_type_id) {
            case arrow::Type::BOOL: {
              std::vector<bool> values;
              auto typed =
                  std::static_pointer_cast<arrow::BooleanArray>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  values.push_back(typed->Value(k));
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::INT32: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::Int32Array>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  int32_t value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::INT64: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::Int64Array>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  int64_t value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::UINT32: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::UInt32Array>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  uint32_t value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::UINT64: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::UInt64Array>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  uint64_t value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::FLOAT: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::FloatArray>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  float value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::DOUBLE: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::DoubleArray>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  double value = typed->Value(k);
                  std::string value_str(reinterpret_cast<const char *>(&value),
                                        sizeof(value));
                  values.push_back(value_str);
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            case arrow::Type::STRING: {
              std::vector<std::string> values;
              auto typed =
                  std::static_pointer_cast<arrow::StringArray>(value_array);
              for (int32_t k = start_offset; k < end_offset; ++k) {
                if (typed->IsValid(k)) {
                  values.push_back(typed->GetString(k));
                }
              }
              s = (*invert_indexer)[column_name]->insert(j + doc_id_offset,
                                                         values);
              CHECK_RETURN_STATUS(s);
              break;
            }
            default:
              LOG_WARN(
                  "Warning: Unsupported nested type '%s' in List column '%s'",
                  value_array->type()->ToString().c_str(), column_name.c_str());
              continue;
          }
        }
        break;
      }
      default:
        LOG_WARN("Warning: Unsupported column type '%s' for column '%s'",
                 field->type()->ToString().c_str(), column_name.c_str());
        continue;
    }
  }

  return Status::OK();
}

Status SegmentHelper::ReduceVectorIndex(
    const CollectionSchema::Ptr schema,
    const std::vector<Segment::Ptr> &input_segments,
    const std::string &output_segment_path, const IndexFilter::Ptr &filter,
    std::function<BlockID()> &block_id_generator, uint64_t min_doc_id,
    uint64_t max_doc_id, uint32_t doc_count, int concurrency,
    std::vector<BlockMeta> *output_block_metas) {
  Status s;

  // vector
  auto vector_fields = schema->vector_fields();
  for (auto &field : vector_fields) {
    auto vector_index_params =
        std::dynamic_pointer_cast<VectorIndexParams>(field->index_params());

    auto vector_block_id = block_id_generator();
    if (vector_index_params->quantize_type() == QuantizeType::UNDEFINED) {
      auto vector_index_path = FileHelper::MakeVectorIndexPath(
          output_segment_path, field->name(), vector_block_id);

      // only create original vector indexer
      auto vector_indexer =
          std::make_shared<VectorColumnIndexer>(vector_index_path, *field);
      s = vector_indexer->Open({true, true});
      CHECK_RETURN_STATUS(s);

      std::vector<VectorColumnIndexer::Ptr> merge_indexers;
      for (auto &input_segment : input_segments) {
        // merge_indexers should be ordered put
        auto to_merge_indexers =
            input_segment->get_vector_indexer(field->name());
        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),
                              to_merge_indexers.end());
      }

      vector_column_params::MergeOptions merge_options;
      if (concurrency == 0) {
        merge_options.pool = GlobalResource::Instance().optimize_thread_pool();
      } else {
        merge_options.write_concurrency = concurrency;
      }

      s = vector_indexer->Merge(merge_indexers, filter, merge_options);
      CHECK_RETURN_STATUS(s);

      s = vector_indexer->Flush();
      CHECK_RETURN_STATUS(s);

      BlockMeta new_block_meta;
      new_block_meta.set_id(vector_block_id);
      new_block_meta.set_type(BlockType::VECTOR_INDEX);
      new_block_meta.set_columns({field->name()});
      new_block_meta.set_min_doc_id(min_doc_id);
      new_block_meta.set_max_doc_id(max_doc_id);
      new_block_meta.set_doc_count(doc_count);

      output_block_metas->push_back(new_block_meta);
    } else {
      auto vector_index_path = FileHelper::MakeQuantizeVectorIndexPath(
          output_segment_path, field->name(), vector_block_id);

      auto field_without_quantize = std::make_shared<FieldSchema>(*field);
      field_without_quantize->set_index_params(
          MakeDefaultVectorIndexParams(vector_index_params->metric_type()));

      // create flat index
      auto vector_indexer = std::make_shared<VectorColumnIndexer>(
          vector_index_path, *field_without_quantize);
      s = vector_indexer->Open({true, true});
      CHECK_RETURN_STATUS(s);

      std::vector<VectorColumnIndexer::Ptr> merge_indexers;
      for (auto &input_segment : input_segments) {
        // merge_indexers should be ordered put
        auto to_merge_indexers =
            input_segment->get_vector_indexer(field->name());
        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),
                              to_merge_indexers.end());
      }

      vector_column_params::MergeOptions merge_options;
      if (concurrency == 0) {
        merge_options.pool = GlobalResource::Instance().optimize_thread_pool();
      } else {
        merge_options.write_concurrency = concurrency;
      }

      s = vector_indexer->Merge(merge_indexers, filter, merge_options);
      CHECK_RETURN_STATUS(s);

      s = vector_indexer->Flush();
      CHECK_RETURN_STATUS(s);

      BlockMeta new_block_meta;
      new_block_meta.set_id(vector_block_id);
      new_block_meta.set_type(BlockType::VECTOR_INDEX);
      new_block_meta.set_columns({field->name()});
      output_block_metas->push_back(new_block_meta);

      // create quantize index
      auto vector_quan_block_id = block_id_generator();

      auto vector_quan_index_path = FileHelper::MakeQuantizeVectorIndexPath(
          output_segment_path, field->name(), vector_quan_block_id);

      auto vector_indexer_quantize =
          std::make_shared<VectorColumnIndexer>(vector_quan_index_path, *field);
      s = vector_indexer_quantize->Open({true, true});
      CHECK_RETURN_STATUS(s);

      merge_indexers.clear();
      for (auto &input_segment : input_segments) {
        // merge_indexers should be ordered put
        auto to_merge_indexers =
            input_segment->get_quant_vector_indexer(field->name());
        merge_indexers.insert(merge_indexers.end(), to_merge_indexers.begin(),
                              to_merge_indexers.end());
      }

      s = vector_indexer_quantize->Merge(merge_indexers, filter, merge_options);
      CHECK_RETURN_STATUS(s);

      s = vector_indexer_quantize->Flush();
      CHECK_RETURN_STATUS(s);

      new_block_meta.set_id(vector_quan_block_id);
      new_block_meta.set_type(BlockType::VECTOR_INDEX_QUANTIZE);
      new_block_meta.set_columns({field->name()});
      output_block_metas->push_back(new_block_meta);
    }
  }

  return Status::OK();
}

arrow::Status SegmentHelper::FilterRecordBatch(
    const std::shared_ptr<arrow::RecordBatch> &batch,
    const IndexFilter::Ptr filter, uint32_t row_id_offset,
    std::shared_ptr<arrow::RecordBatch> *filterd,
    roaring::Roaring *delete_row_id_bitmap, uint64_t *min_doc_id,
    uint64_t *max_doc_id) {
  if (!filter) {
    *filterd = batch;
    for (int64_t i = 0; i < batch->num_rows(); ++i) {
      // column 0 is doc_id
      auto result = batch->column(0)->GetScalar(i);
      if (!result.ok()) {
        return result.status();
      }
      uint64_t doc_id =
          std::dynamic_pointer_cast<arrow::UInt64Scalar>(*result)->value;
      *min_doc_id = std::min(*min_doc_id, doc_id);
      *max_doc_id = std::max(*max_doc_id, doc_id);
    }
    return arrow::Status::OK();
  }

  std::vector<uint64_t> selected_indices;
  for (int64_t i = 0; i < batch->num_rows(); ++i) {
    auto result = batch->column(0)->GetScalar(i);
    if (!result.ok()) {
      return result.status();
    }
    uint64_t doc_id =
        std::dynamic_pointer_cast<arrow::UInt64Scalar>(*result)->value;
    if (!filter->is_filtered(doc_id)) {
      selected_indices.push_back(i);
      *min_doc_id = std::min(*min_doc_id, doc_id);
      *max_doc_id = std::max(*max_doc_id, doc_id);
    } else {
      delete_row_id_bitmap->add(i + row_id_offset);
    }
  }

  if (selected_indices.empty()) {
    return arrow::Status::OK();
  }

  arrow::UInt64Builder builder;
  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));
  std::shared_ptr<arrow::Array> selection_array;
  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));

  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;
  for (int i = 0; i < batch->num_columns(); ++i) {
    arrow::Datum out;
    ARROW_ASSIGN_OR_RAISE(
        out, arrow::compute::Take(batch->column(i), selection_array));
    filtered_columns.push_back(out.make_array());
  }

  auto filtered_batch = arrow::RecordBatch::Make(
      batch->schema(), static_cast<int64_t>(selected_indices.size()),
      filtered_columns);

  *filterd = filtered_batch;

  return arrow::Status::OK();
}

Status SegmentHelper::ExecuteCreateVectorIndexTask(
    CreateVectorIndexTask &task) {
  if (task.column_to_build_vector_index_ == "") {
    return task.input_segment_->create_all_vector_index(
        task.concurrency_, &task.output_segment_meta_,
        &task.output_vector_indexers_, &task.output_quant_vector_indexers_);
  } else {
    return task.input_segment_->create_vector_index(
        task.column_to_build_vector_index_, task.index_params_,
        task.concurrency_, &task.output_segment_meta_,
        &task.output_vector_indexers_, &task.output_quant_vector_indexers_);
  }
}

Status SegmentHelper::ExecuteCreateScalarIndexTask(
    CreateScalarIndexTask &task) {
  return task.input_segment_->create_scalar_index(
      task.columns_to_build_scalar_index_, task.index_params_,
      &task.output_segment_meta_, &task.output_scalar_indexer_);
}

Status SegmentHelper::ExecuteDropVectorIndexTask(DropVectorIndexTask &task) {
  return task.input_segment_->drop_vector_index(
      task.column_to_drop_vector_index_, &task.output_segment_meta_,
      &task.output_vector_indexers_);
}

Status SegmentHelper::ExecuteDropScalarIndexTask(DropScalarIndexTask &task) {
  return task.input_segment_->drop_scalar_index(
      task.columns_to_drop_scalar_index_, &task.output_segment_meta_,
      &task.output_scalar_indexer_);
}

}  // namespace zvec

================================================
FILE: src/db/index/segment/segment_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <variant>
#include <arrow/record_batch.h>
#include <arrow/status.h>
#include <zvec/db/index_params.h>
#include "db/index/column/inverted_column/inverted_indexer.h"
#include "db/index/common/index_filter.h"
#include "db/index/common/meta.h"
#include "segment.h"

namespace zvec {

struct CompactTask {
  CompactTask(const std::string &collection_path,
              const CollectionSchema::Ptr &schema,
              const std::vector<Segment::Ptr> &input_segments,
              SegmentID output_segment_id, const IndexFilter::Ptr filter,
              bool forward_use_parquet, int concurrency)
      : collection_path_(collection_path),
        schema_(schema),
        input_segments_(input_segments),
        output_segment_id_(output_segment_id),
        filter_(std::move(filter)),
        forward_use_parquet_(forward_use_parquet),
        concurrency_(concurrency) {}

  const std::string collection_path_;
  const CollectionSchema::Ptr schema_;
  const std::vector<Segment::Ptr>
      input_segments_;  // size must > 1 when filter is nullptr; size could = 1
                        // when filter is not nullptr
  SegmentID output_segment_id_;
  const IndexFilter::Ptr filter_;
  bool forward_use_parquet_;
  int concurrency_;

  // output
  SegmentMeta::Ptr output_segment_meta_;
};

struct CreateVectorIndexTask {
  CreateVectorIndexTask(const Segment::Ptr &input_segment,
                        const std::string &column_to_build_vector_index,
                        const IndexParams::Ptr &index_params, int concurrency)
      : input_segment_(input_segment),
        column_to_build_vector_index_(column_to_build_vector_index),
        index_params_(index_params),
        concurrency_(concurrency) {}

  Segment::Ptr input_segment_;
  std::string column_to_build_vector_index_;  // if empty means create index for
  // all vector columns
  IndexParams::Ptr index_params_;
  int concurrency_;

  // output
  SegmentMeta::Ptr output_segment_meta_;
  std::unordered_map<std::string, VectorColumnIndexer::Ptr>
      output_vector_indexers_;
  std::unordered_map<std::string, VectorColumnIndexer::Ptr>
      output_quant_vector_indexers_;
};

struct DropVectorIndexTask {
  DropVectorIndexTask(const Segment::Ptr &input_segment,
                      const std::string &column_to_drop_vector_index)
      : input_segment_(input_segment),
        column_to_drop_vector_index_(column_to_drop_vector_index) {}

  Segment::Ptr input_segment_;
  std::string column_to_drop_vector_index_;

  // output
  SegmentMeta::Ptr output_segment_meta_;
  std::unordered_map<std::string, VectorColumnIndexer::Ptr>
      output_vector_indexers_;
};

struct CreateScalarIndexTask {
  CreateScalarIndexTask(
      const Segment::Ptr &input_segment,
      const std::vector<std::string> &columns_to_build_scalar_index,
      const IndexParams::Ptr &index_params, int concurrency)
      : input_segment_(input_segment),
        columns_to_build_scalar_index_(columns_to_build_scalar_index),
        index_params_(index_params),
        concurrency_(concurrency) {}

  Segment::Ptr input_segment_;
  std::vector<std::string> columns_to_build_scalar_index_;
  IndexParams::Ptr index_params_;
  int concurrency_;

  // output
  SegmentMeta::Ptr output_segment_meta_;
  InvertedIndexer::Ptr output_scalar_indexer_;
};

struct DropScalarIndexTask {
  DropScalarIndexTask(Segment::Ptr input_segment,
                      std::vector<std::string> columns_to_drop_scalar_index)
      : input_segment_(input_segment),
        columns_to_drop_scalar_index_(columns_to_drop_scalar_index) {}

  Segment::Ptr input_segment_;
  std::vector<std::string> columns_to_drop_scalar_index_;

  // output
  SegmentMeta::Ptr output_segment_meta_;
  InvertedIndexer::Ptr output_scalar_indexer_;  // nullptr means no scalar index
};

class SegmentTask {
 public:
  using Ptr = std::shared_ptr<SegmentTask>;

  using TaskInfo =
      std::variant<CompactTask, CreateVectorIndexTask, DropVectorIndexTask,
                   CreateScalarIndexTask, DropScalarIndexTask>;

  static Ptr CreateComapctTask(const CompactTask &task) {
    return std::make_shared<SegmentTask>(task);
  }

  static Ptr CreateCreateVectorIndexTask(const CreateVectorIndexTask &task) {
    return std::make_shared<SegmentTask>(task);
  }

  static Ptr CreateDropVectorIndexTask(const DropVectorIndexTask &task) {
    return std::make_shared<SegmentTask>(task);
  }

  static Ptr CreateCreateScalarIndexTask(const CreateScalarIndexTask &task) {
    return std::make_shared<SegmentTask>(task);
  }

  static Ptr CreateDropScalarIndexTask(const DropScalarIndexTask &task) {
    return std::make_shared<SegmentTask>(task);
  }

 public:
  SegmentTask(const CompactTask &task) : task_info_(task) {}

  SegmentTask(const CreateVectorIndexTask &task) : task_info_(task) {}

  SegmentTask(const CreateScalarIndexTask &task) : task_info_(task) {}

  SegmentTask(const DropVectorIndexTask &task) : task_info_(task) {}

  SegmentTask(const DropScalarIndexTask &task) : task_info_(task) {}

  TaskInfo &task_info() {
    return task_info_;
  }

 private:
  TaskInfo task_info_;
};

class SegmentHelper {
 public:
  static Status Execute(SegmentTask::Ptr &task);

 private:
  static Status ExecuteCompactTask(CompactTask &task);

  static Status ExecuteCreateVectorIndexTask(CreateVectorIndexTask &task);

  static Status ExecuteCreateScalarIndexTask(CreateScalarIndexTask &task);

  static Status ExecuteDropVectorIndexTask(DropVectorIndexTask &task);

  static Status ExecuteDropScalarIndexTask(DropScalarIndexTask &task);

 public:
  static Status ReduceScalar(const CollectionSchema::Ptr schema,
                             const std::vector<Segment::Ptr> &input_segments,
                             const std::string &output_segment_path,
                             const std::vector<std::string> &columns,
                             const IndexFilter::Ptr &filter,
                             bool forward_use_parquet,
                             std::function<BlockID()> &block_id_generator,
                             roaring::Roaring *delete_row_id_bitmap,
                             std::vector<BlockMeta> *output_block_metas,
                             uint64_t *min_doc_id, uint64_t *max_doc_id,
                             uint32_t *doc_count);

  static Status ReduceScalarIndex(
      InvertedIndexer::Ptr indexer,
      const std::shared_ptr<arrow::RecordBatch> &batch, uint32_t doc_id_offset);

  static Status ReduceVectorIndex(
      const CollectionSchema::Ptr schema,
      const std::vector<Segment::Ptr> &input_segments,
      const std::string &output_segment_path, const IndexFilter::Ptr &filter,
      std::function<BlockID()> &block_id_generator, uint64_t min_doc_id,
      uint64_t max_doc_id, uint32_t doc_count, int concurrency,
      std::vector<BlockMeta> *output_block_metas);

  static arrow::Status FilterRecordBatch(
      const std::shared_ptr<arrow::RecordBatch> &batch,
      const IndexFilter::Ptr filter, uint32_t row_id_offset,
      std::shared_ptr<arrow::RecordBatch> *filtered,
      roaring::Roaring *delete_row_id_bitmap, uint64_t *min_doc_id,
      uint64_t *max_doc_id);
};

}  // namespace zvec

================================================
FILE: src/db/index/segment/segment_manager.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// segment_manager.cpp
#include "db/index/segment/segment_manager.h"
#include <algorithm>
#include <future>
#include <thread>
#include <vector>
#include <zvec/db/status.h>
#include "db/common/typedef.h"

namespace zvec {

Status SegmentManager::add_segment(Segment::Ptr segment) {
  if (!segment) {
    return Status::InvalidArgument("Segment is null");
  }

  segments_map_[segment->id()] = segment;
  return Status::OK();
}

Status SegmentManager::remove_segment(SegmentID segment_id) {
  auto iter = segments_map_.find(segment_id);
  if (iter == segments_map_.end()) {
    return Status::NotFound("Segment not found");
  }

  segments_map_.erase(segment_id);
  return Status::OK();
}

Status SegmentManager::destroy_segment(SegmentID segment_id) {
  auto iter = segments_map_.find(segment_id);
  if (iter == segments_map_.end()) {
    return Status::NotFound("Segment not found");
  }

  auto s = iter->second->destroy();
  CHECK_RETURN_STATUS(s);

  segments_map_.erase(segment_id);
  return Status::OK();
}

std::vector<Segment::Ptr> SegmentManager::get_segments() const {
  std::vector<Segment::Ptr> segments;
  for (auto &pair : segments_map_) {
    segments.push_back(pair.second);
  }
  std::sort(segments.begin(), segments.end(),
            [](Segment::Ptr a, Segment::Ptr b) {
              return a->meta()->min_doc_id() < b->meta()->min_doc_id();
            });
  return segments;
}

std::vector<SegmentMeta::Ptr> SegmentManager::get_segments_meta() const {
  std::vector<SegmentMeta::Ptr> segments_meta;
  for (auto &pair : segments_map_) {
    segments_meta.push_back(pair.second->meta());
  }

  std::sort(segments_meta.begin(), segments_meta.end(),
            [](SegmentMeta::Ptr a, SegmentMeta::Ptr b) {
              return a->min_doc_id() < b->min_doc_id();
            });

  return segments_meta;
}

Status SegmentManager::add_column(const FieldSchema::Ptr &column_schema,
                                  const std::string &expression,
                                  int concurrency) {
  if (concurrency <= 0) {
    concurrency = static_cast<int>(std::thread::hardware_concurrency());
  }

  std::vector<std::future<Status>> futures;
  std::vector<std::pair<SegmentID, Segment::Ptr>> segments(
      segments_map_.begin(), segments_map_.end());

  for (size_t i = 0; i < segments.size(); i += concurrency) {
    size_t end = std::min(i + concurrency, segments.size());
    for (size_t j = i; j < end; ++j) {
      auto &segment = segments[j].second;
      futures.emplace_back(std::async(std::launch::async, [&]() -> Status {
        return segment->add_column(column_schema, expression,
                                   AddColumnOptions{concurrency});
      }));
    }

    for (auto it = futures.begin(); it != futures.end(); ++it) {
      Status status = it->get();
      if (!status.ok()) {
        return status;
      }
    }
    futures.clear();
  }

  return Status::OK();
}

Status SegmentManager::alter_column(const std::string &column_name,
                                    const FieldSchema::Ptr &new_column_schema,
                                    int concurrency) {
  if (concurrency <= 0) {
    concurrency = static_cast<int>(std::thread::hardware_concurrency());
  }

  std::vector<std::future<Status>> futures;
  std::vector<std::pair<SegmentID, Segment::Ptr>> segments(
      segments_map_.begin(), segments_map_.end());

  for (size_t i = 0; i < segments.size(); i += concurrency) {
    size_t end = std::min(i + concurrency, segments.size());
    for (size_t j = i; j < end; ++j) {
      auto &segment = segments[j].second;
      futures.emplace_back(std::async(std::launch::async, [&]() -> Status {
        return segment->alter_column(column_name, new_column_schema,
                                     AlterColumnOptions{concurrency});
      }));
    }

    for (auto it = futures.begin(); it != futures.end(); ++it) {
      Status status = it->get();
      if (!status.ok()) {
        return status;
      }
    }
    futures.clear();
  }

  return Status::OK();
}

Status SegmentManager::drop_column(const std::string &column_name) {
  for (auto &[segment_id, segment] : segments_map_) {
    auto s = segment->drop_column(column_name);
    CHECK_RETURN_STATUS(s);
  }

  return Status::OK();
}

}  // namespace zvec

================================================
FILE: src/db/index/segment/segment_manager.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <unordered_map>
#include <vector>
#include "segment.h"

namespace zvec {
class SegmentManager {
 public:
  using Ptr = std::shared_ptr<SegmentManager>;

  SegmentManager() = default;
  ~SegmentManager() = default;

 public:
  uint32_t segment_count() const {
    return segments_map_.size();
  }

  Status add_segment(Segment::Ptr segment);

  Status remove_segment(SegmentID segment_id);

  Status destroy_segment(SegmentID segment_id);

  std::vector<Segment::Ptr> get_segments() const;

  std::vector<SegmentMeta::Ptr> get_segments_meta() const;

  Status add_column(const FieldSchema::Ptr &column_schema,
                    const std::string &expression, int concurrency);

  Status alter_column(const std::string &column_name,
                      const FieldSchema::Ptr &new_column_schema,
                      int concurrency);

  Status drop_column(const std::string &column_name);

 private:
  std::unordered_map<SegmentID, Segment::Ptr> segments_map_;
};
}  // namespace zvec

================================================
FILE: src/db/index/segment/sql_expr_parser.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sql_expr_parser.h"
#include <cctype>
#include <string>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/type.h>

namespace zvec {

bool IsNumericType(const std::shared_ptr<arrow::DataType> &type) {
  return arrow::is_integer(type->id()) || arrow::is_floating(type->id());
}

using arrow::compute::call;
using arrow::compute::Expression;
using arrow::compute::field_ref;
using arrow::compute::literal;

class Parser {
 public:
  Parser(const std::string &expr, const std::shared_ptr<arrow::Schema> &schema)
      : expr_(expr), pos_(0), schema_(schema) {}

  arrow::Result<Expression> Parse() {
    SkipWhitespace();
    ARROW_ASSIGN_OR_RAISE(auto e, ParseExpression());
    SkipWhitespace();
    if ((size_t)pos_ < expr_.size()) {
      return arrow::Status::Invalid("Unexpected character at position ", pos_,
                                    ": ", expr_[pos_]);
    }
    return e;
  }

 private:
  std::string expr_;
  int pos_;
  std::shared_ptr<arrow::Schema> schema_;

  void SkipWhitespace() {
    while ((size_t)pos_ < expr_.size() && std::isspace(expr_[pos_])) {
      ++pos_;
    }
  }

  arrow::Result<Expression> ParseExpression() {
    SkipWhitespace();
    ARROW_ASSIGN_OR_RAISE(auto left, ParseTerm());
    SkipWhitespace();
    while ((size_t)pos_ < expr_.size() &&
           (expr_[pos_] == '+' || expr_[pos_] == '-')) {
      char op = expr_[pos_++];
      SkipWhitespace();
      ARROW_ASSIGN_OR_RAISE(auto right, ParseTerm());
      SkipWhitespace();
      auto func = (op == '+') ? "add" : "subtract";
      left = call(std::string(func), {left, right});
    }

    return left;
  }

  arrow::Result<Expression> ParseTerm() {
    SkipWhitespace();
    ARROW_ASSIGN_OR_RAISE(auto left, ParseFactor());
    SkipWhitespace();

    while ((size_t)pos_ < expr_.size() &&
           (expr_[pos_] == '*' || expr_[pos_] == '/')) {
      char op = expr_[pos_++];
      SkipWhitespace();
      ARROW_ASSIGN_OR_RAISE(auto right, ParseFactor());
      SkipWhitespace();
      auto func = (op == '*') ? "multiply" : "divide";
      left = call(std::string(func), {left, right});
    }

    return left;
  }

  arrow::Result<Expression> ParseFactor() {
    SkipWhitespace();

    if ((size_t)pos_ >= expr_.size()) {
      return arrow::Status::Invalid("Unexpected end of expression.");
    }

    char c = expr_[pos_];

    // Parenthetical expression
    if (c == '(') {
      ++pos_;
      SkipWhitespace();
      ARROW_ASSIGN_OR_RAISE(auto inner, ParseExpression());
      SkipWhitespace();
      if ((size_t)pos_ >= expr_.size() || expr_[pos_] != ')') {
        return arrow::Status::Invalid("Mismatched parentheses.");
      }
      ++pos_;
      SkipWhitespace();
      return inner;
    }

    // Unary minus operator
    if (c == '-') {
      ++pos_;  // Skip the minus sign
      SkipWhitespace();
      ARROW_ASSIGN_OR_RAISE(auto operand, ParseFactor());
      return call("negate", {operand});
    }

    // Unary plus operator (optional support)
    if (c == '+') {
      ++pos_;  // Skip the plus sign
      SkipWhitespace();
      return ParseFactor();
    }

    // Numeric literal (integer or floating point)
    if (std::isdigit(c)) {
      return ParseNumber();
    }

    // Column name (starts with letter or _)
    if (std::isalpha(c) || c == '_') {
      return ParseColumnName();
    }

    return arrow::Status::Invalid("Unexpected character: '", std::string(1, c),
                                  "'");
  }

  arrow::Result<Expression> ParseNumber() {
    int start = pos_;
    bool has_dot = false;
    bool has_exponent = false;

    while ((size_t)pos_ < expr_.size()) {
      char c = expr_[pos_];
      if (std::isdigit(c)) {
        ++pos_;
      } else if (c == '.' && !has_dot) {
        has_dot = true;
        ++pos_;
      } else if ((c == 'e' || c == 'E') && !has_exponent) {
        has_exponent = true;
        ++pos_;
        if ((size_t)pos_ < expr_.size() &&
            (expr_[pos_] == '+' || expr_[pos_] == '-')) {
          ++pos_;
        }
      } else {
        break;
      }
    }

    std::string num_str = expr_.substr(start, pos_ - start);

    if (!has_dot && !has_exponent) {
      try {
        int64_t value = std::stoll(num_str);
        return literal(value);
      } catch (...) {
        // fallback to double
        try {
          double value = std::stod(num_str);
          return literal(value);
        } catch (...) {
          return arrow::Status::Invalid("Invalid integer: ", num_str);
        }
      }
    } else {
      try {
        double value = std::stod(num_str);
        return literal(value);
      } catch (...) {
        return arrow::Status::Invalid("Invalid float: ", num_str);
      }
    }
    return arrow::Status::Invalid("Failed to parse number: ", num_str);
  }

  arrow::Result<Expression> ParseColumnName() {
    int start = pos_;
    while ((size_t)pos_ < expr_.size()) {
      char c = expr_[pos_];
      if (std::isalnum(c) || c == '_') {
        ++pos_;
      } else {
        break;
      }
    }
    std::string name = expr_.substr(start, pos_ - start);

    auto field = schema_->GetFieldByName(name);
    if (!field) {
      return arrow::Status::Invalid("Column not found in schema: ", name);
    } else if (!IsNumericType(field->type())) {
      return arrow::Status::Invalid("Column is not numeric: ", name);
    }

    return field_ref(name);
  }
};

arrow::Result<Expression> CheckSupportedArithmeticExpression(
    const Expression &expr, const arrow::Schema &schema) {
  // Case 0: Literal, must be numeric type
  if (auto literal = expr.literal()) {
    auto type = literal->type();
    if (IsNumericType(type)) {
      return expr;
    } else {
      return arrow::Status::Invalid("Only numeric literals are allowed, got: ",
                                    literal->ToString());
    }
  }

  // Case 1: Single column reference (e.g., col)
  if (auto field_ref = expr.field_ref()) {
    auto field = schema.GetFieldByName(*field_ref->name());
    if (!field) {
      return arrow::Status::Invalid("Field not found: ", *field_ref->name());
    }
    if (!IsNumericType(field->type())) {
      return arrow::Status::Invalid(
          "Only numeric columns are allowed, but got: ", field->ToString());
    }
    return expr;  // Valid, return directly
  }

  // Step 2: Handle function calls (unary, binary, etc.)
  if (auto call = expr.call()) {
    const auto &func_name = call->function_name;

    // Case 2: Binary arithmetic operations (e.g., col + 1)
    if (func_name == "add" || func_name == "subtract" ||
        func_name == "multiply" || func_name == "divide") {
      if (call->arguments.size() != 2) {
        return arrow::Status::Invalid("Expected two arguments for '", func_name,
                                      "'");
      }

      const auto &left = call->arguments[0];
      const auto &right = call->arguments[1];

      // One must be field_ref, the other must be literal
      bool left_is_field = left.field_ref() != nullptr;
      bool right_is_literal = right.literal() != nullptr;

      if (left_is_field && right_is_literal) {
        auto field = schema.GetFieldByName(*left.field_ref()->name());
        if (!field) {
          return arrow::Status::Invalid("Field not found: ",
                                        *left.field_ref()->name());
        }
        if (!IsNumericType(field->type())) {
          return arrow::Status::Invalid("Column is not numeric: ",
                                        field->ToString());
        }
        return expr;
      }

      bool right_is_field = right.field_ref() != nullptr;
      bool left_is_literal = left.literal() != nullptr;

      if (right_is_field && left_is_literal) {
        auto field = schema.GetFieldByName(*right.field_ref()->name());
        if (!field) {
          return arrow::Status::Invalid("Field not found: ",
                                        *right.field_ref()->name());
        }
        if (!IsNumericType(field->type())) {
          return arrow::Status::Invalid("Column is not numeric: ",
                                        field->ToString());
        }
        return expr;
      }

      return arrow::Status::Invalid(
          "Only support binary operation between a column and a literal, got: ",
          expr.ToString());
    }

    // Case 3: Unary operators (e.g., -col)
    if (func_name == "negate") {
      if (call->arguments.size() != 1) {
        return arrow::Status::Invalid("negate expects one argument");
      }
      const auto &arg = call->arguments[0];

      // Check if argument is field_ref or literal
      if (auto field_ref = arg.field_ref()) {
        auto field = schema.GetFieldByName(*field_ref->name());
        if (!field) {
          return arrow::Status::Invalid("Field not found: ",
                                        *field_ref->name());
        }
        if (!IsNumericType(field->type())) {
          return arrow::Status::Invalid("Cannot negate non-numeric column: ",
                                        field->ToString());
        }
        return expr;
      } else if (auto literal = arg.literal()) {
        // Allow negation of literals
        if (IsNumericType(literal->type())) {
          return expr;
        } else {
          return arrow::Status::Invalid("Cannot negate non-numeric literal: ",
                                        literal->ToString());
        }
      } else {
        return arrow::Status::Invalid(
            "Only support negation of a column or numeric literal, got: ",
            arg.ToString());
      }
    }

    // Unsupported functions
    return arrow::Status::Invalid("Unsupported function in expression: ",
                                  func_name);
  }

  // Fallback error: unsupported expression form
  return arrow::Status::Invalid(
      "Only support: (1) single numeric column or literal, (2) column +/-/*/% "
      "literal, (3) -column. Got: ",
      expr.ToString());
}

// Public interface function
arrow::Result<Expression> ParseToExpression(
    const std::string &sql_expr, const std::shared_ptr<arrow::Schema> &schema) {
  Parser parser(sql_expr, schema);
  return parser.Parse();
}

}  // namespace zvec

================================================
FILE: src/db/index/segment/sql_expr_parser.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <arrow/compute/api.h>
#include <arrow/status.h>

namespace zvec {

arrow::Result<arrow::compute::Expression> ParseToExpression(
    const std::string &sql_expr, const std::shared_ptr<arrow::Schema> &schema);

arrow::Result<arrow::compute::Expression> CheckSupportedArithmeticExpression(
    const arrow::compute::Expression &expr, const arrow::Schema &schema);

}  // namespace zvec


================================================
FILE: src/db/index/storage/arrow_ipc_writer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "arrow_ipc_writer.h"
#include <cstdint>
#include <iostream>
#include <arrow/compute/api_vector.h>

namespace zvec {

ArrowIpcWriter::ArrowIpcWriter(const std::string &filepath,
                               int64_t max_rows_per_batch)
    : filepath_(filepath),
      max_rows_per_batch_(max_rows_per_batch),
      finalized_(false) {}

ArrowIpcWriter::~ArrowIpcWriter() {
  if (!finalized_ && writer_) {
    auto status = finalize();
    if (!status.ok()) {
      std::cerr << "Auto-finalize failed: " << status.ToString() << std::endl;
    }
  }
}

arrow::Status ArrowIpcWriter::insert(
    std::shared_ptr<arrow::RecordBatchReader> reader,
    const IndexFilter::Ptr &filter) {
  if (!reader) {
    return arrow::Status::Invalid("RecordBatchReader is null");
  }

  auto incoming_schema = reader->schema();
  if (!incoming_schema) {
    return arrow::Status::Invalid("Reader schema is null");
  }

  if (!writer_) {
    schema_ = incoming_schema;

    ARROW_ASSIGN_OR_RAISE(sink_, arrow::io::FileOutputStream::Open(filepath_));

    auto writer = arrow::ipc::MakeFileWriter(sink_.get(), schema_);
    if (!writer.ok()) {
      return writer.status();
    }

    writer_ = std::move(writer.ValueOrDie());

  } else {
    if (!schema_->Equals(incoming_schema)) {
      return arrow::Status::Invalid("Schema mismatch in Insert()");
    }
  }

  std::shared_ptr<arrow::RecordBatch> batch;
  while (true) {
    ARROW_ASSIGN_OR_RAISE(batch, reader->Next());
    if (!batch) break;
    if (batch->num_rows() == 0) continue;

    if (max_rows_per_batch_ > 0 && batch->num_rows() > max_rows_per_batch_) {
      int64_t offset = 0;
      while (offset < batch->num_rows()) {
        int64_t length =
            std::min(max_rows_per_batch_, batch->num_rows() - offset);
        auto slice = batch->Slice(offset, length);
        ARROW_RETURN_NOT_OK(write_batch(*slice, filter));
        offset += length;
      }
    } else {
      ARROW_RETURN_NOT_OK(write_batch(*batch, filter));
    }

    batch.reset();
  }

  return arrow::Status::OK();
}

arrow::Status ArrowIpcWriter::insert_batch(
    std::shared_ptr<arrow::RecordBatch> batch, const IndexFilter::Ptr &filter) {
  if (!batch) {
    return arrow::Status::Invalid("RecordBatch is null");
  }

  if (batch->num_rows() == 0) {
    return arrow::Status::OK();
  }

  auto incoming_schema = batch->schema();
  if (!incoming_schema) {
    return arrow::Status::Invalid("Reader schema is null");
  }

  if (!writer_) {
    schema_ = incoming_schema;

    ARROW_ASSIGN_OR_RAISE(sink_, arrow::io::FileOutputStream::Open(filepath_));

    auto writer = arrow::ipc::MakeFileWriter(sink_.get(), schema_);
    if (!writer.ok()) {
      return writer.status();
    }

    writer_ = std::move(writer.ValueOrDie());

  } else {
    if (!schema_->Equals(incoming_schema)) {
      return arrow::Status::Invalid("Schema mismatch in Insert()");
    }
  }

  if (max_rows_per_batch_ > 0 && batch->num_rows() > max_rows_per_batch_) {
    int64_t offset = 0;
    while (offset < batch->num_rows()) {
      int64_t length =
          std::min(max_rows_per_batch_, batch->num_rows() - offset);
      auto slice = batch->Slice(offset, length);

      ARROW_RETURN_NOT_OK(write_batch(*slice, filter));

      offset += length;
    }
  } else {
    ARROW_RETURN_NOT_OK(write_batch(*batch, filter));
  }

  return arrow::Status::OK();
}

arrow::Status ArrowIpcWriter::write_batch(const arrow::RecordBatch &batch,
                                          const IndexFilter::Ptr &filter) {
  if (!filter) {
    return writer_->WriteRecordBatch(batch);
  }

  std::vector<int64_t> selected_indices;
  for (int64_t i = 0; i < batch.num_rows(); ++i) {
    if (filter->is_filtered(i)) {
      selected_indices.push_back(i);
    }
  }

  if (selected_indices.empty()) {
    return arrow::Status::OK();
  }

  arrow::Int64Builder builder;
  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));
  std::shared_ptr<arrow::Array> selection_array;
  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));

  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;
  for (int i = 0; i < batch.num_columns(); ++i) {
    arrow::Datum out;
    ARROW_ASSIGN_OR_RAISE(
        out, arrow::compute::Take(batch.column(i), selection_array));
    filtered_columns.push_back(out.make_array());
  }

  auto filtered_batch = arrow::RecordBatch::Make(
      batch.schema(), static_cast<int64_t>(selected_indices.size()),
      filtered_columns);

  return writer_->WriteRecordBatch(*filtered_batch);
}

arrow::Status ArrowIpcWriter::finalize() {
  if (finalized_) return arrow::Status::OK();
  if (!writer_) {
    return arrow::Status::Invalid("No data written, cannot finalize");
  }

  ARROW_RETURN_NOT_OK(writer_->Close());
  writer_.reset();

  ARROW_RETURN_NOT_OK(sink_->Close());
  sink_.reset();

  finalized_ = true;
  return arrow::Status::OK();
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/arrow_ipc_writer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// arrow_ipc_writer.h
#pragma once

#include <memory>
#include <string>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/writer.h>
#include "db/index/common/index_filter.h"
#include "forward_writer.h"

namespace zvec {

class ArrowIpcWriter : public ForwardWriter {
 public:
  explicit ArrowIpcWriter(const std::string &filepath,
                          int64_t max_rows_per_batch = 0);
  ~ArrowIpcWriter();

  arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,
                       const IndexFilter::Ptr &filter = nullptr) override;

  arrow::Status insert_batch(std::shared_ptr<arrow::RecordBatch> batch,
                             const IndexFilter::Ptr &filter = nullptr) override;

  arrow::Status finalize() override;

 private:
  arrow::Status write_batch(const arrow::RecordBatch &batch,
                            const IndexFilter::Ptr &filter);

 private:
  std::string filepath_;
  int64_t max_rows_per_batch_;

  std::shared_ptr<arrow::io::FileOutputStream> sink_;
  std::shared_ptr<arrow::ipc::RecordBatchWriter> writer_;
  std::shared_ptr<arrow::Schema> schema_;
  bool finalized_;
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/base_forward_store.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <vector>
#include <arrow/compute/api.h>
#include <arrow/datum.h>
#include <arrow/table_builder.h>
#include <arrow/util/async_generator.h>
#include <zvec/db/status.h>

namespace cp = arrow::compute;

using Table = arrow::Table;
using RecordBatch = arrow::RecordBatch;
using RecordBatchReader = arrow::RecordBatchReader;
using RecordBatchBuilder = arrow::RecordBatchBuilder;
using TablePtr = std::shared_ptr<Table>;
using ExecBatchPtr = std::shared_ptr<arrow::compute::ExecBatch>;
using RecordBatchPtr = std::shared_ptr<RecordBatch>;
using RecordBatchReaderPtr = std::shared_ptr<RecordBatchReader>;
using RecordBatchBuilderPtr = std::shared_ptr<RecordBatchBuilder>;

namespace zvec {

class BaseForwardStore {
 public:
  using Ptr = std::shared_ptr<BaseForwardStore>;

  virtual Status Open() = 0;

  virtual TablePtr fetch(const std::vector<std::string> &columns,
                         const std::vector<int> &indices) = 0;

  virtual ExecBatchPtr fetch(const std::vector<std::string> &columns,
                             int index) = 0;

  virtual RecordBatchReaderPtr scan(
      const std::vector<std::string> &columns) = 0;

  virtual const std::shared_ptr<arrow::Schema> physic_schema() const = 0;

  virtual TablePtr get_table() = 0;
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/bufferpool_forward_store.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "bufferpool_forward_store.h"
#include <arrow/acero/exec_plan.h>
#include <arrow/compute/api.h>
#include <arrow/filesystem/api.h>
#include <arrow/ipc/reader.h>
#include <arrow/ipc/writer.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <parquet/arrow/reader.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/ailego/logger/logger.h>
#include "db/index/storage/store_helper.h"
#include "lazy_record_batch_reader.h"


namespace zvec {

BufferPoolForwardStore::BufferPoolForwardStore(const std::string &uri)
    : file_path_(uri) {}

Status BufferPoolForwardStore::Open() {
  std::string uri = file_path_;
  auto status = CreateRandomAccessFileByUri(uri, &file_, &file_path_);
  if (!status.ok()) {
    return Status::InternalError("Failed to create random access uri: ", uri,
                                 " : ", status.ToString());
  }
  auto format = InferFileFormat(file_path_);
  if (format == FileFormat::PARQUET) {
    status = OpenParquet(file_);
    if (!status.ok()) {
      return Status::InternalError("Failed to open parquet file: ", file_path_,
                                   " : ", status.ToString());
    }
  } else {
    return Status::InternalError("Unsupported format, file: ", file_path_);
  }
  return Status::OK();
}
arrow::Status BufferPoolForwardStore::OpenParquet(
    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {
  auto parquet_file_reader = parquet::ParquetFileReader::Open(file);
  ARROW_RETURN_NOT_OK(parquet::arrow::FileReader::Make(
      arrow::default_memory_pool(), std::move(parquet_file_reader),
      &parquet_reader_));

  auto parquet_metadata = parquet_reader_->parquet_reader()->metadata();
  num_rows_ = parquet_metadata->num_rows();
  num_row_groups_ = parquet_metadata->num_row_groups();

  // Initialize row group offsets and row counts
  int64_t offset = 0;
  for (int64_t rg = 0; rg < num_row_groups_; ++rg) {
    auto row_group_metadata = parquet_metadata->RowGroup(rg);
    int64_t num_rows_in_group = row_group_metadata->num_rows();
    row_group_row_nums_.push_back(num_rows_in_group);
    row_group_offsets_.push_back(offset);
    offset += num_rows_in_group;
  }

  ARROW_RETURN_NOT_OK(parquet_reader_->GetSchema(&physic_schema_));

  LOG_INFO("Opened Parquet with %lld rows, %d cols, %d row groups",
           static_cast<long long>(num_rows_), physic_schema_->num_fields(),
           parquet_metadata->num_row_groups());

  return arrow::Status::OK();
}


bool BufferPoolForwardStore::validate(
    const std::vector<std::string> &columns) const {
  if (columns.empty()) {
    LOG_ERROR("Empty columns");
    return false;
  }
  // TODO : for persist segment, after add new column, this check is not
  // correct.
  for (auto &column : columns) {
    if (column == LOCAL_ROW_ID) {
      continue;
    }
    if (physic_schema_->GetFieldIndex(column) == -1) {
      LOG_ERROR("Validate failed. unknown column: %s", column.c_str());
      return false;
    }
  }
  return true;
}

int BufferPoolForwardStore::FindRowGroupForRow(int64_t row) {
  auto it = std::upper_bound(row_group_offsets_.begin(),
                             row_group_offsets_.end(), row);
  if (it == row_group_offsets_.begin()) {
    return 0;
  }
  return static_cast<int>(std::distance(row_group_offsets_.begin(), it) - 1);
}

int64_t BufferPoolForwardStore::GetRowGroupOffset(int rg_id) {
  if (rg_id < 0 || rg_id >= static_cast<int>(row_group_offsets_.size())) {
    LOG_ERROR("Invalid row group id: %d, max: %zu", rg_id,
              row_group_offsets_.size());
    return -1;
  }
  return row_group_offsets_[rg_id];
}


TablePtr BufferPoolForwardStore::fetch(const std::vector<std::string> &columns,
                                       const std::vector<int> &indices) {
  if (!validate(columns)) {
    return nullptr;
  }

  if (indices.empty()) {
    arrow::ArrayVector empty_arrays;
    auto fields = SelectFields(physic_schema_, columns);
    for (const auto &field : fields) {
      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());
    }
    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),
                              empty_arrays, 0);
  }

  bool need_local_doc_id = false;
  std::vector<int> col_indices;
  std::vector<int> data_column_positions;
  std::vector<std::shared_ptr<arrow::Field>> all_fields(columns.size());

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      need_local_doc_id = true;
      all_fields[i] = arrow::field(LOCAL_ROW_ID, arrow::uint64());
    } else {
      int idx = physic_schema_->GetFieldIndex(columns[i]);
      if (idx == -1) {
        LOG_ERROR("Unknown column: %s", columns[i].c_str());
        return nullptr;
      }
      col_indices.push_back(idx);
      data_column_positions.push_back(static_cast<int>(i));
      all_fields[i] = physic_schema_->GetFieldByName(columns[i]);
    }
  }

  std::unordered_map<int, std::vector<std::pair<int, uint64_t>>> rg_to_local;
  std::vector<std::pair<int, int64_t>>
      local_doc_id_pairs;  // (output_row, global_row)

  int output_row = 0;
  for (int global_row : indices) {
    if (global_row < 0 || global_row >= num_rows_) {
      LOG_ERROR("Invalid row index: %d, max: %lld", global_row,
                static_cast<long long>(num_rows_));
      return nullptr;
    }
    int rg_id = FindRowGroupForRow(global_row);
    int64_t offset = GetRowGroupOffset(rg_id);
    if (offset == -1) {
      LOG_ERROR("Failed to get row group offset for row: %d", global_row);
      return nullptr;
    }
    uint64_t local_in_rg = global_row - offset;
    rg_to_local[rg_id].emplace_back(output_row, local_in_rg);

    if (need_local_doc_id) {
      local_doc_id_pairs.emplace_back(output_row,
                                      static_cast<int64_t>(global_row));
    }
    ++output_row;
  }

  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>
      sorted_scalars(col_indices.size());

  auto &buf_mgr = ailego::BufferManager::Instance();
  for (const auto &[rg_id, pairs] : rg_to_local) {
    for (size_t i = 0; i < col_indices.size(); ++i) {
      int col_idx = col_indices[i];
      auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id);
      auto buffer_handle = buf_mgr.acquire(buffer_id);
      auto col_chunked_array = buffer_handle.pin_parquet_data();

      if (!col_chunked_array) {
        LOG_ERROR(
            "Failed to pin parquet data for file: %s, column: %d, row_group: "
            "%d",
            file_path_.c_str(), col_idx, rg_id);
        return nullptr;
      }

      if (col_chunked_array->num_chunks() == 0) {
        LOG_WARN(
            "No chunks in chunked array for file: %s, column: %d, row_group: "
            "%d",
            file_path_.c_str(), col_idx, rg_id);
        continue;
      }

      auto &dst = sorted_scalars[i];
      for (const auto &[tmp_output_row, local_idx] : pairs) {
        if ((size_t)local_idx >= (size_t)col_chunked_array->length()) {
          LOG_ERROR("Local index %ld out of bounds for array length %zu",
                    static_cast<long>(local_idx),
                    (size_t)col_chunked_array->length());
          return nullptr;
        }
        auto scalar_result = col_chunked_array->GetScalar(local_idx);
        if (!scalar_result.ok()) {
          LOG_ERROR("Failed to get scalar for row %zu status: %s",
                    (size_t)local_idx,
                    scalar_result.status().ToString().c_str());
        }
        dst.emplace_back(tmp_output_row, std::move(scalar_result.ValueOrDie()));
      }
    }
  }

  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());
  for (size_t i = 0; i < sorted_scalars.size(); ++i) {
    auto &vec = sorted_scalars[i];
    std::sort(vec.begin(), vec.end());
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;
    ordered_scalars.reserve(vec.size());
    for (auto &p : vec) {
      ordered_scalars.push_back(std::move(p.second));
    }

    std::shared_ptr<arrow::Array> arr;
    auto status = ConvertScalarVectorToArrayByType(ordered_scalars, &arr);
    if (!status.ok()) {
      LOG_ERROR("ConvertScalarVectorToArrayByType failed: %s",
                status.message().c_str());
      return nullptr;
    }

    int position = data_column_positions[i];
    result_arrays[position] = std::move(arr);
  }

  if (need_local_doc_id) {
    std::sort(local_doc_id_pairs.begin(), local_doc_id_pairs.end());
    std::vector<uint64_t> values;
    values.reserve(local_doc_id_pairs.size());
    for (const auto &p : local_doc_id_pairs) {
      values.push_back(p.second);
    }

    // Create UInt64Array
    auto buffer_result = arrow::AllocateBuffer(values.size() * sizeof(uint64_t),
                                               arrow::default_memory_pool());
    if (!buffer_result.ok()) return nullptr;
    auto buffer = std::move(buffer_result.ValueOrDie());
    std::memcpy(buffer->mutable_data(), values.data(),
                values.size() * sizeof(uint64_t));

    std::vector<std::shared_ptr<arrow::Buffer>> buffers;
    buffers.push_back(nullptr);  // no null bitmap
    buffers.push_back(std::shared_ptr<arrow::Buffer>(buffer.release()));

    auto data = arrow::ArrayData::Make(arrow::uint64(),
                                       static_cast<uint64_t>(values.size()),
                                       std::move(buffers), /*null_count=*/0);

    for (size_t i = 0; i < columns.size(); ++i) {
      if (columns[i] == LOCAL_ROW_ID) {
        result_arrays[i] = std::make_shared<arrow::UInt64Array>(data);
      }
    }
  }

  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;
  result_columns.reserve(result_arrays.size());
  for (auto &arr : result_arrays) {
    result_columns.emplace_back(std::make_shared<arrow::ChunkedArray>(arr));
  }

  auto out_schema = std::make_shared<arrow::Schema>(all_fields);
  return arrow::Table::Make(out_schema, result_columns,
                            static_cast<int64_t>(indices.size()));
}

ExecBatchPtr BufferPoolForwardStore::fetch(
    const std::vector<std::string> &columns, int index) {
  if (!validate(columns) || index < 0 || index >= num_rows_) {
    return nullptr;
  }

  std::vector<int> col_indices;
  for (const auto &col : columns) {
    int idx = physic_schema_->GetFieldIndex(col);
    if (idx == -1) {
      LOG_ERROR("Unknown column: %s", col.c_str());
      return nullptr;
    }
    col_indices.push_back(idx);
  }

  int rg_id = FindRowGroupForRow(index);
  int64_t offset = GetRowGroupOffset(rg_id);

  std::vector<arrow::Datum> scalars;
  auto &buf_mgr = ailego::BufferManager::Instance();
  for (size_t i = 0; i < col_indices.size(); ++i) {
    int col_idx = col_indices[i];
    auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id);
    auto buffer_handle = buf_mgr.acquire(buffer_id);
    auto col_chunked_array = buffer_handle.pin_parquet_data();

    if (!col_chunked_array) {
      LOG_ERROR(
          "Failed to pin parquet data for file: %s, column: %d, row_group: "
          "%d",
          file_path_.c_str(), col_idx, rg_id);
      return nullptr;
    }

    if (col_chunked_array->num_chunks() == 0) {
      LOG_WARN(
          "No chunks in chunked array for file: %s, column: %d, row_group: "
          "%d",
          file_path_.c_str(), col_idx, rg_id);
      continue;
    }
    auto concat_result = arrow::Concatenate(col_chunked_array->chunks(),
                                            arrow::default_memory_pool());
    if (!concat_result.ok()) {
      LOG_ERROR("Concatenate failed for file: %s, column: %d, row_group: %d",
                file_path_.c_str(), col_idx, rg_id);
      return nullptr;
    }
    auto concat = concat_result.ValueOrDie();
    auto scalar_result = concat->GetScalar(index - offset);
    if (!scalar_result.ok()) {
      LOG_ERROR("Failed to get scalar for row %zu status: %s", (size_t)offset,
                scalar_result.status().ToString().c_str());
    }

    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));
  }

  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);
}

RecordBatchReaderPtr BufferPoolForwardStore::scan(
    const std::vector<std::string> &columns) {
  if (!validate(columns)) {
    return nullptr;
  }

  // Create a new parquet reader for scanning
  std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
  auto parquet_file_reader = parquet::ParquetFileReader::Open(file_);
  auto status = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
                                                 std::move(parquet_file_reader),
                                                 &parquet_reader);
  if (!status.ok()) {
    LOG_ERROR("Failed to create parquet reader: %s", status.message().c_str());
    return nullptr;
  }

  return std::make_shared<ParquetRecordBatchReader>(parquet_reader, columns,
                                                    physic_schema_, file_path_);
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/bufferpool_forward_store.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <vector>
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <arrow/filesystem/filesystem.h>
#include <arrow/io/api.h>
#include <arrow/ipc/reader.h>
#include <arrow/util/async_generator.h>
#include <parquet/arrow/reader.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/db/status.h>
#include "base_forward_store.h"

namespace zvec {

/// BufferPoolForwardStore implements a forward store that uses a buffer pool
/// to efficiently manage data access from parquet files.
class BufferPoolForwardStore
    : public BaseForwardStore,
      public std::enable_shared_from_this<BufferPoolForwardStore> {
 public:
  /// Pointer type for BufferPoolForwardStore instances
  using Ptr = std::shared_ptr<BufferPoolForwardStore>;

  /// Constructor that initializes the store with a file URI
  /// \param uri The URI of the file to be accessed
  explicit BufferPoolForwardStore(const std::string &uri);

  virtual ~BufferPoolForwardStore() = default;

  Status Open() override;

  /// Fetch specific columns and row indices from the data source
  /// \param columns The list of column names to fetch
  /// \param indices The list of row indices to fetch
  /// \return A table containing the requested data or nullptr on failure
  TablePtr fetch(const std::vector<std::string> &columns,
                 const std::vector<int> &indices) override;

  /// Fetch specific columns and row indices from the data source
  /// \param columns The list of column names to fetch
  /// \param index The row index to fetch
  /// \return An ExecBatch containing the requested data or nullptr on failure
  ExecBatchPtr fetch(const std::vector<std::string> &columns,
                     int index) override;

  /// Scan specified columns from the data source
  /// \param columns The list of column names to scan
  /// \return A RecordBatchReader for streaming the data or nullptr on failure
  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;

  /// Get the physical schema of the file
  /// \return A shared pointer to the arrow schema representing the physical
  /// structure of the data
  const std::shared_ptr<arrow::Schema> physic_schema() const override {
    return physic_schema_;
  }

  TablePtr get_table() override {
    return nullptr;
  }

 private:
  /// Validate that the requested columns exist in the schema
  /// \param columns The list of column names to validate
  /// \return true if all columns are valid, false otherwise
  bool validate(const std::vector<std::string> &columns) const;

  /// Open a parquet file and initialize metadata
  /// \param file The RandomAccessFile to read from
  /// \return arrow::Status indicating success or failure
  arrow::Status OpenParquet(
      const std::shared_ptr<arrow::io::RandomAccessFile> &file);

  /// Find which row group contains a given row
  /// \param row The row index to locate
  /// \return The row group ID containing the row
  int FindRowGroupForRow(int64_t row);

  /// Get the row offset for a given row group
  /// \param rg_id The row group ID
  /// \return The row offset of the row group, or -1 on error
  int64_t GetRowGroupOffset(int rg_id);

 private:
  /// Physical schema of the file
  std::shared_ptr<arrow::Schema> physic_schema_;

  /// Total number of rows in the file
  int64_t num_rows_ = 0;

  /// Path to the file
  std::string file_path_;

  // Parquet-specific members
  /// The RandomAccessFile for reading data
  std::shared_ptr<arrow::io::RandomAccessFile> file_;

  /// The parquet file reader
  std::unique_ptr<parquet::arrow::FileReader> parquet_reader_;

  /// Number of row groups in the file
  int64_t num_row_groups_ = 0;

  /// Offsets of each row group
  std::vector<int64_t> row_group_offsets_;

  /// Number of rows in each row group
  std::vector<int64_t> row_group_row_nums_;

  /// Buffer manager for caching data
  std::shared_ptr<ailego::BufferManager> buffer_manager_;
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/chunked_file_writer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "chunked_file_writer.h"
#include <fstream>
#include <arrow/ipc/writer.h>
#include <parquet/arrow/writer.h>
#include <parquet/exception.h>
#include <zvec/ailego/logger/logger.h>


namespace zvec {

class IpcChunkedWriter : public ChunkedFileWriter {
 public:
  static arrow::Result<std::unique_ptr<IpcChunkedWriter>> Make(
      const std::string &path, const std::shared_ptr<arrow::Schema> &schema) {
    ARROW_ASSIGN_OR_RAISE(auto out_file,
                          arrow::io::FileOutputStream::Open(path));

    std::shared_ptr<arrow::ipc::RecordBatchWriter> writer;
    ARROW_ASSIGN_OR_RAISE(writer, arrow::ipc::MakeFileWriter(out_file, schema));

    return std::make_unique<IpcChunkedWriter>(schema, std::move(out_file),
                                              std::move(writer));
  }

  IpcChunkedWriter(std::shared_ptr<arrow::Schema> schema,
                   std::shared_ptr<arrow::io::FileOutputStream> out_file,
                   std::shared_ptr<arrow::ipc::RecordBatchWriter> writer)
      : ChunkedFileWriter(std::move(schema)),
        out_file_(std::move(out_file)),
        writer_(std::move(writer)) {}

  arrow::Status Write(const arrow::RecordBatch &batch) override {
    return writer_->WriteRecordBatch(batch);
  }

  arrow::Status Write(const arrow::Table &table) override {
    return writer_->WriteTable(table);
  }

  arrow::Status Close() override {
    ARROW_RETURN_NOT_OK(writer_->Close());
    return out_file_->Close();
  }

 private:
  std::shared_ptr<arrow::io::FileOutputStream> out_file_;
  std::shared_ptr<arrow::ipc::RecordBatchWriter> writer_;
};


class ParquetChunkedWriter : public ChunkedFileWriter {
 public:
  static arrow::Result<std::unique_ptr<ParquetChunkedWriter>> Make(
      const std::string &path, const std::shared_ptr<arrow::Schema> &schema) {
    ARROW_ASSIGN_OR_RAISE(auto out_file,
                          arrow::io::FileOutputStream::Open(path));

    parquet::WriterProperties::Builder builder;
    auto properties = builder.build();

    std::shared_ptr<parquet::arrow::FileWriter> writer;
    ARROW_ASSIGN_OR_RAISE(writer, parquet::arrow::FileWriter::Open(
                                      *schema, arrow::default_memory_pool(),
                                      out_file, properties));

    return std::make_unique<ParquetChunkedWriter>(schema, std::move(out_file),
                                                  std::move(writer));
  }

  ParquetChunkedWriter(std::shared_ptr<arrow::Schema> schema,
                       std::shared_ptr<arrow::io::FileOutputStream> out_file,
                       std::shared_ptr<parquet::arrow::FileWriter> writer)
      : ChunkedFileWriter(std::move(schema)),
        out_file_(std::move(out_file)),
        writer_(std::move(writer)) {}

  arrow::Status Write(const arrow::RecordBatch &batch) override {
    return writer_->WriteRecordBatch(batch);
  }

  arrow::Status Write(const arrow::Table &table) override {
    return writer_->WriteTable(table);
  }

  arrow::Status Close() override {
    ARROW_RETURN_NOT_OK(writer_->Close());
    return out_file_->Close();
  }

 private:
  std::shared_ptr<arrow::io::FileOutputStream> out_file_;
  std::shared_ptr<parquet::arrow::FileWriter> writer_;
};


std::unique_ptr<ChunkedFileWriter> ChunkedFileWriter::Open(
    const std::string &file_path, const std::shared_ptr<arrow::Schema> &schema,
    FileFormat format) {
  switch (format) {
    case FileFormat::IPC: {
      auto result = IpcChunkedWriter::Make(file_path, schema);
      if (!result.ok()) {
        LOG_ERROR("Failed to open IPC writer: %s",
                  result.status().ToString().c_str());
        return nullptr;
      }
      return std::move(result).ValueUnsafe();
    }
    case FileFormat::PARQUET: {
      auto result = ParquetChunkedWriter::Make(file_path, schema);
      if (!result.ok()) {
        LOG_ERROR("Failed to open Parquet writer: %s",
                  result.status().ToString().c_str());
        return nullptr;
      }
      return std::move(result).ValueUnsafe();
    }
    default:
      LOG_ERROR("Unsupported format");
      return nullptr;
  }
}

}  // namespace zvec


================================================
FILE: src/db/index/storage/chunked_file_writer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <parquet/arrow/writer.h>
#include <zvec/db/type.h>

namespace zvec {

class ChunkedFileWriter {
 public:
  using Ptr = std::unique_ptr<ChunkedFileWriter>;

  static std::unique_ptr<ChunkedFileWriter> Open(
      const std::string &file_path,
      const std::shared_ptr<arrow::Schema> &schema, FileFormat format);

  virtual arrow::Status Write(const arrow::RecordBatch &batch) = 0;

  virtual arrow::Status Write(const arrow::Table &table) = 0;

  virtual arrow::Status Close() = 0;

  virtual ~ChunkedFileWriter() = default;  // LCOV_EXCL_BR_LINE

 protected:
  explicit ChunkedFileWriter(std::shared_ptr<arrow::Schema> schema)
      : schema_(std::move(schema)) {}

  std::shared_ptr<arrow::Schema> schema_;
};

}  // namespace zvec


================================================
FILE: src/db/index/storage/forward_writer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// forward_writer.cc
#include "forward_writer.h"
#include "arrow_ipc_writer.h"
#include "parquet_writer.h"

namespace zvec {

std::unique_ptr<ForwardWriter> ForwardWriter::CreateArrowIPCWriter(
    const std::string &filepath, int64_t max_rows_per_batch) {
  return std::make_unique<ArrowIpcWriter>(filepath, max_rows_per_batch);
}

std::unique_ptr<ForwardWriter> ForwardWriter::CreateParquetWriter(
    const std::string &filepath, int64_t max_rows_per_batch) {
  return std::make_unique<ParquetWriter>(filepath, max_rows_per_batch);
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/forward_writer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <arrow/api.h>
#include "db/index/common/index_filter.h"

namespace zvec {

class ForwardWriter {
 public:
  virtual ~ForwardWriter() = default;

  // Factory methods
  static std::unique_ptr<ForwardWriter> CreateArrowIPCWriter(
      const std::string &filepath, int64_t max_rows_per_batch = 0);

  static std::unique_ptr<ForwardWriter> CreateParquetWriter(
      const std::string &filepath, int64_t max_rows_per_batch = 0);

  // Interface methods
  virtual arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,
                               const IndexFilter::Ptr &filter = nullptr) = 0;

  virtual arrow::Status insert_batch(
      std::shared_ptr<arrow::RecordBatch> batch,
      const IndexFilter::Ptr &filter = nullptr) = 0;

  virtual arrow::Status finalize() = 0;
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/lazy_record_batch_reader.h
================================================

// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <arrow/ipc/reader.h>
#include <parquet/arrow/reader.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include "db/common/constants.h"


namespace zvec {

class IPCRecordBatchReader : public arrow::RecordBatchReader {
 public:
  IPCRecordBatchReader(
      std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader,
      const std::vector<std::string> &columns,
      std::shared_ptr<arrow::Schema> schema)
      : reader_(std::move(reader)),
        schema_(std::move(schema)),
        columns_(columns) {
    std::vector<std::shared_ptr<arrow::Field>> fields;
    for (const auto &col : columns) {
      int index = schema_->GetFieldIndex(col);
      if (index != -1) {
        fields.push_back(schema_->field(index));
        col_indices_.push_back(index);
      }
    }
    projected_schema_ = arrow::schema(fields);
    num_record_batches_ = reader_->num_record_batches();
  }

  std::shared_ptr<arrow::Schema> schema() const override {
    return projected_schema_;
  }

  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override {
    if (current_batch_ >= num_record_batches_) {
      *batch = nullptr;
      return arrow::Status::OK();
    }

    ARROW_ASSIGN_OR_RAISE(auto full_batch,
                          reader_->ReadRecordBatch(current_batch_));
    current_batch_++;

    std::vector<std::shared_ptr<arrow::Array>> projected_arrays;
    for (int index : col_indices_) {
      projected_arrays.push_back(full_batch->column(index));
    }

    *batch = arrow::RecordBatch::Make(projected_schema_, full_batch->num_rows(),
                                      projected_arrays);
    return arrow::Status::OK();
  }

 private:
  std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader_;
  std::shared_ptr<arrow::Schema> schema_;
  std::shared_ptr<arrow::Schema> projected_schema_;
  std::vector<std::string> columns_;
  std::vector<int> col_indices_;
  int current_batch_ = 0;
  int num_record_batches_ = 0;
};


class ParquetRecordBatchReader : public arrow::RecordBatchReader {
 public:
  ParquetRecordBatchReader(std::unique_ptr<parquet::arrow::FileReader> &reader,
                           const std::vector<std::string> &columns,
                           std::shared_ptr<arrow::Schema> schema,
                           const std::string &file_path, bool with_cache = true)
      : reader_(std::move(reader)),
        schema_(std::move(schema)),
        columns_(columns),
        file_path_(file_path),
        with_cache_(with_cache) {
    std::vector<std::shared_ptr<arrow::Field>> fields;
    for (const auto &col : columns) {
      int index = schema_->GetFieldIndex(col);
      if (index != -1) {
        fields.push_back(schema_->field(index));
        col_indices_.push_back(index);
      }
    }
    projected_schema_ = arrow::schema(fields);

    auto parquet_metadata = reader_->parquet_reader()->metadata();
    total_rows_ = parquet_metadata->num_rows();
    num_row_groups_ = parquet_metadata->num_row_groups();
    int64_t offset = 0;
    for (int64_t rg = 0; rg < num_row_groups_; ++rg) {
      auto row_group_metadata = parquet_metadata->RowGroup(rg);
      int64_t num_rows_in_group = row_group_metadata->num_rows();
      row_group_row_nums_.push_back(num_rows_in_group);
      row_group_offsets_.push_back(offset);
      offset += num_rows_in_group;
    }
  }

  std::shared_ptr<arrow::Schema> schema() const override {
    return projected_schema_;
  }

  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *batch) override {
    if (current_row_group_ >= num_row_groups_) {
      return arrow::Status::OK();
    }

    int64_t rg_id = current_row_group_;
    int64_t num_rows_in_rg = row_group_row_nums_[rg_id];

    std::vector<std::shared_ptr<arrow::Array>> chunks(col_indices_.size());
    if (with_cache_) {
      auto &buf_mgr = ailego::BufferManager::Instance();
      for (size_t col_idx = 0; col_idx < col_indices_.size(); ++col_idx) {
        auto buffer_id = ailego::BufferID::ParquetID(
            file_path_, col_indices_[col_idx], rg_id);
        auto buffer_handle = buf_mgr.acquire(buffer_id);
        auto col_chunked_array = buffer_handle.pin_parquet_data();
        if (col_chunked_array) {
          std::shared_ptr<arrow::Array> concat;
          auto concat_result = arrow::Concatenate(col_chunked_array->chunks(),
                                                  arrow::default_memory_pool());
          if (!concat_result.ok()) {
            return concat_result.status();
          }
          concat = concat_result.ValueOrDie();
          chunks[col_idx] = concat;
        }
      }
    } else {
      std::shared_ptr<arrow::Table> rg_table;
      ARROW_RETURN_NOT_OK(
          reader_->RowGroup(rg_id)->ReadTable(col_indices_, &rg_table));
      for (size_t i = 0; i < col_indices_.size(); ++i) {
        std::shared_ptr<arrow::Array> concat;
        auto concat_result = arrow::Concatenate(rg_table->column(i)->chunks(),
                                                arrow::default_memory_pool());
        if (!concat_result.ok()) {
          return concat_result.status();
        }
        concat = concat_result.ValueOrDie();
        chunks[i] = concat;
      }
    }

    *batch =
        arrow::RecordBatch::Make(projected_schema_, num_rows_in_rg, chunks);
    current_row_group_++;
    return arrow::Status::OK();
  }

 private:
  std::unique_ptr<parquet::arrow::FileReader> reader_;
  std::shared_ptr<arrow::Schema> schema_;
  std::shared_ptr<arrow::Schema> projected_schema_;
  std::vector<std::string> columns_;
  std::vector<int> col_indices_;
  std::string file_path_;

  int64_t current_row_group_ = 0;
  int64_t num_row_groups_ = 0;
  int64_t total_rows_ = 0;
  std::vector<int64_t> row_group_offsets_;
  std::vector<int64_t> row_group_row_nums_;
  bool with_cache_;
};


}  // namespace zvec


================================================
FILE: src/db/index/storage/memory_forward_store.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "memory_forward_store.h"
#include <memory>
#include <string>
#include <vector>
#include <ailego/pattern/defer.h>
#include <arrow/api.h>
#include <arrow/builder.h>
#include <arrow/compute/api.h>
#include <arrow/io/file.h>
#include <arrow/ipc/writer.h>
#include <arrow/record_batch.h>
#include <arrow/table.h>
#include <arrow/util/async_generator.h>
#include <zvec/ailego/logger/logger.h>
#include "db/common/constants.h"
#include "db/index/storage/base_forward_store.h"


namespace zvec {

MemForwardStore::MemForwardStore(
    const std::shared_ptr<CollectionSchema> &collection_schema,
    const std::string &path, const FileFormat format,
    const uint32_t max_buffer_size)
    : schema_(collection_schema),
      path_(path),
      format_(format),
      max_cache_size_(max_buffer_size / 100),
      max_buffer_size_(max_buffer_size) {
  cache_.reserve(128);
}

Status MemForwardStore::Open() {
  arrow::FieldVector fields;
  auto status = ConvertCollectionSchemaToArrowFields(schema_, &fields);
  if (!status.ok()) {
    return Status::InternalError("convert schema to arrow fields failed ",
                                 status.ToString());
  }
  physic_schema_ = arrow::schema(fields);
  // Initialize file writer
  writer_ = ChunkedFileWriter::Open(path_, physic_schema_, format_);
  return Status::OK();
}

RecordBatchBuilderPtr MemForwardStore::createBuilder() {
  auto result = arrow::RecordBatchBuilder::Make(physic_schema_,
                                                arrow::default_memory_pool());
  if (!result.ok()) {
    LOG_ERROR("failed to create RecordBatchBuilder: %s",
              result.status().ToString().c_str());
    return nullptr;
  }
  return std::move(result.ValueOrDie());
}


bool MemForwardStore::validate(const std::vector<std::string> &columns) const {
  if (columns.empty()) {
    LOG_ERROR("empty columns");
    return false;
  }
  for (auto &column : columns) {
    if (column == LOCAL_ROW_ID) {
      continue;
    }
    if (physic_schema_->GetFieldIndex(column) == -1) {
      LOG_ERROR("validate failed. unknown column: %s", column.c_str());
      return false;
    }
  }
  return true;
}


// Notice: This function just convert the docs to arrow::ArrayBuilder, not clean
// the cache_.
arrow::Status MemForwardStore::convertToBuilder(
    RecordBatchBuilderPtr &rb_builder) {
  for (const auto &doc : cache_) {
    auto &fields = physic_schema_->fields();

    // global doc_id
    auto gid_builder =
        dynamic_cast<arrow::UInt64Builder *>(rb_builder->GetField(0));
    ARROW_RETURN_NOT_OK(gid_builder->Append(doc.doc_id()));

    // user id(pk)
    auto uid_builder =
        dynamic_cast<arrow::StringBuilder *>(rb_builder->GetField(1));
    ARROW_RETURN_NOT_OK(uid_builder->Append(doc.pk()));

    // other fields
    for (size_t idx = 2; idx < fields.size(); ++idx) {
      auto field = fields[idx];
      auto builder = rb_builder->GetField(idx);
      ARROW_RETURN_NOT_OK(AppendFieldValueToBuilder(doc, field, builder));
    }
  }
  return arrow::Status::OK();
}

Status MemForwardStore::insert(const Doc &doc) {
  std::lock_guard lock(cache_mtx_);
  cache_.emplace_back(doc);
  num_rows_++;
  auto doc_bytes = doc.memory_usage();
  total_cache_bytes_ = total_cache_bytes_ + (uint32_t)doc_bytes;
  if (total_cache_bytes_ < max_cache_size_) {
    return Status::OK();
  }
  // Flush cache when it reaches max size
  auto rb_builder = createBuilder();
  auto status = convertToBuilder(rb_builder);
  if (!status.ok()) {
    return Status::InternalError("convertToBuilder error: ", status.ToString());
  }
  auto result = rb_builder->Flush(false);
  if (!result.ok()) {
    return Status::InternalError("flush error: ", result.status().ToString());
  }
  auto batch = result.ValueOrDie();
  int64_t rb_size = MemorySize(*batch);
  batches_.push_back(batch);

  total_rb_bytes_ = total_rb_bytes_ + (uint32_t)rb_size;
  cache_.clear();
  total_cache_bytes_ = 0;

  return Status::OK();
}

arrow::Result<RecordBatchPtr> MemForwardStore::convertToRecordBatch() {
  auto rb_builder = createBuilder();
  ARROW_RETURN_NOT_OK(convertToBuilder(rb_builder));
  ARROW_ASSIGN_OR_RAISE(auto batch, rb_builder->Flush(false));
  return batch;
}

arrow::Result<TablePtr> MemForwardStore::convertToTable(
    const std::vector<std::string> &columns, const std::vector<int> &indices) {
  std::shared_ptr<arrow::RecordBatch> batch;
  ARROW_ASSIGN_OR_RAISE(batch, convertToRecordBatch());
  std::vector<std::shared_ptr<arrow::RecordBatch>> all_batches = batches_;
  if (batch->num_rows() > 0) {
    all_batches.push_back(batch);
  }

  if (all_batches.empty()) {
    return arrow::Table::MakeEmpty(physic_schema_, nullptr);
  }

  // Combine all batches into a single table
  std::shared_ptr<arrow::Table> combined_table;
  ARROW_ASSIGN_OR_RAISE(combined_table,
                        arrow::Table::FromRecordBatches(all_batches));

  std::shared_ptr<arrow::Table> filtered_table = combined_table;
  if (!indices.empty()) {
    // Filter rows by indices if provided
    std::shared_ptr<arrow::Array> index_array;
    arrow::Int32Builder builder;
    ARROW_RETURN_NOT_OK(builder.AppendValues(indices));
    ARROW_RETURN_NOT_OK(builder.Finish(&index_array));

    arrow::Datum input_datum(combined_table);
    arrow::Datum index_datum(index_array);

    arrow::compute::ExecContext ctx;
    arrow::Datum result_datum;
    ARROW_ASSIGN_OR_RAISE(
        result_datum,
        arrow::compute::Take(input_datum, index_datum,
                             arrow::compute::TakeOptions::Defaults(), &ctx));
    filtered_table = result_datum.table();
  }

  std::shared_ptr<arrow::Table> selected_table = filtered_table;
  if (!columns.empty()) {
    // Select only specified columns
    std::vector<int> column_indices;
    for (const auto &column_name : columns) {
      if (column_name == LOCAL_ROW_ID) continue;
      int index = filtered_table->schema()->GetFieldIndex(column_name);
      if (index != -1) {
        column_indices.push_back(index);
      }
    }

    if (!column_indices.empty()) {
      ARROW_ASSIGN_OR_RAISE(selected_table,
                            filtered_table->SelectColumns(column_indices));
    }
  }
  return selected_table;
}

Status MemForwardStore::flush() {
  std::lock_guard lock(cache_mtx_);

  if (cache_.empty() && batches_.empty()) {
    return Status::OK();
  }

  auto result = convertToRecordBatch();
  if (!result.ok()) {
    return Status::InternalError("failed to convert cache to RecordBatch: ",
                                 result.status().ToString());
  }

  auto cache_batch = result.ValueOrDie();
  if (cache_batch->num_rows() > 0) {
    batches_.push_back(cache_batch);
    cache_.clear();
  }

  bool has_incr = false;
  size_t start_index = flushed_batches_;

  while (start_index < batches_.size()) {
    std::vector<std::shared_ptr<arrow::RecordBatch>> batches_to_merge;
    int64_t total_rows = 0;
    size_t end_index = start_index;

    while (end_index < batches_.size()) {
      auto &current_batch = batches_[end_index];
      int64_t current_rows = current_batch->num_rows();

      if (current_rows >= kMaxRecordBatchNumRows) {
        if (batches_to_merge.empty()) {
          batches_to_merge.push_back(current_batch);
          end_index++;
        }
        break;
      }

      if (!batches_to_merge.empty() &&
          total_rows + current_rows > kMaxRecordBatchNumRows) {
        break;
      }

      batches_to_merge.push_back(current_batch);
      total_rows += current_rows;
      end_index++;
    }

    if (!batches_to_merge.empty()) {
      std::shared_ptr<arrow::RecordBatch> batch_to_write;

      if (batches_to_merge.size() == 1) {
        batch_to_write = batches_to_merge[0];
      } else {
        std::shared_ptr<arrow::Table> table;
        auto status =
            arrow::Table::FromRecordBatches(batches_to_merge).Value(&table);
        if (!status.ok()) {
          return Status::InternalError("failed to merge batches: ",
                                       status.ToString());
        }

        result = table->CombineChunksToBatch();
        if (!result.ok()) {
          return Status::InternalError("failed to combine chunks: ",
                                       result.status().ToString());
        }
        batch_to_write = result.ValueOrDie();
      }

      auto status = writer_->Write(*batch_to_write);
      if (!status.ok()) {
        return Status::InternalError("failed to write RecordBatch to file: ",
                                     status.ToString());
      }

      flushed_batches_ = end_index;
      has_incr = true;
    } else {
      break;
    }

    start_index = end_index;
  }

  if (has_incr) {
    LOG_INFO("successfully flushed %u batches to %s", flushed_batches_,
             path_.c_str());
  }
  return Status::OK();
}

Status MemForwardStore::close() {
  if (!cache_.empty() || !batches_.empty()) {
    flush();
  }
  if (writer_) {
    auto status = writer_->Close();
    if (!status.ok()) {
      LOG_WARN("failed to close writer: %s", status.ToString().c_str());
    }
    writer_.reset();
  }
  batches_.clear();
  cache_.clear();
  return Status::OK();
}

TablePtr MemForwardStore::get_table() {
  std::lock_guard lock(cache_mtx_);
  std::shared_ptr<arrow::RecordBatch> batch =
      convertToRecordBatch().ValueOrDie();
  std::vector<std::shared_ptr<arrow::RecordBatch>> all_batches = batches_;
  if (batch->num_rows() > 0) {
    all_batches.push_back(batch);
  }

  if (all_batches.empty()) {
    return nullptr;
  }

  return arrow::Table::FromRecordBatches(all_batches).ValueOrDie();
}

TablePtr MemForwardStore::fetch(const std::vector<std::string> &columns,
                                const std::vector<int> &indices) {
  std::lock_guard lock(cache_mtx_);

  if (!validate(columns)) {
    return nullptr;
  }

  if (indices.empty()) {
    arrow::ArrayVector empty_arrays;
    auto fields = SelectFields(physic_schema_, columns);
    for (const auto &field : fields) {
      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());
    }
    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),
                              empty_arrays, 0);
  }

  bool need_local_doc_id = false;
  std::vector<std::string> data_columns;
  std::vector<bool> is_local_row_id(columns.size(), false);

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      need_local_doc_id = true;
      is_local_row_id[i] = true;
    } else {
      data_columns.push_back(columns[i]);
    }
  }

  auto result = convertToTable(data_columns, indices);
  if (!result.ok()) {
    LOG_ERROR("failed to convert to table: %s",
              result.status().ToString().c_str());
    return nullptr;
  }

  auto data_table = result.ValueOrDie();
  if (!need_local_doc_id) {
    return data_table;
  }

  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns(
      columns.size());
  std::vector<std::shared_ptr<arrow::Field>> result_fields(columns.size());

  size_t data_col_idx = 0;
  for (size_t i = 0; i < columns.size(); ++i) {
    if (is_local_row_id[i]) {
      continue;
    }

    result_columns[i] = data_table->column(data_col_idx);
    result_fields[i] = data_table->schema()->field(data_col_idx);
    data_col_idx++;
  }

  if (need_local_doc_id) {
    std::shared_ptr<arrow::Array> rowid_array;
    arrow::UInt64Builder builder;

    std::vector<uint64_t> indices_i64(indices.begin(), indices.end());
    auto status = builder.AppendValues(indices_i64);

    if (!status.ok()) {
      LOG_ERROR("failed to append rowid values: %s", status.ToString().c_str());
      return nullptr;
    }

    status = builder.Finish(&rowid_array);
    if (!status.ok()) {
      LOG_ERROR("failed to finish rowid array: %s", status.ToString().c_str());
      return nullptr;
    }
    auto rowid_chunked = std::make_shared<arrow::ChunkedArray>(rowid_array);

    for (size_t i = 0; i < columns.size(); ++i) {
      if (is_local_row_id[i]) {
        result_columns[i] = rowid_chunked;
        result_fields[i] = arrow::field(LOCAL_ROW_ID, arrow::uint64());
      }
    }
  }

  auto new_schema = arrow::schema(result_fields);
  return arrow::Table::Make(new_schema, result_columns, data_table->num_rows());
}

ExecBatchPtr MemForwardStore::fetch(const std::vector<std::string> &columns,
                                    int index) {
  std::lock_guard lock(cache_mtx_);

  if (!validate(columns)) {
    return nullptr;
  }

  auto result = convertToTable(columns, std::vector<int>{index});
  if (!result.ok()) {
    LOG_ERROR("failed to convert to table: %s",
              result.status().ToString().c_str());
    return nullptr;
  }

  auto table = result.ValueOrDie();

  // Extract scalars
  std::vector<arrow::Datum> scalars;
  scalars.reserve(columns.size());
  for (const auto &column : columns) {
    const auto &array = table->GetColumnByName(column);
    auto scalar_result = array->GetScalar(0);
    if (!scalar_result.ok()) {
      LOG_ERROR("failed to get column %s scalar from array: %s", column.c_str(),
                scalar_result.status().ToString().c_str());
      return nullptr;
    }
    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));
  }

  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);
}

RecordBatchReaderPtr MemForwardStore::scan(
    const std::vector<std::string> &columns) {
  std::lock_guard lock(cache_mtx_);

  if (!validate(columns)) {
    return nullptr;
  }

  auto result = convertToTable(columns, {});
  if (!result.ok()) {
    LOG_ERROR("failed to convert to table: %s",
              result.status().ToString().c_str());
    return nullptr;
  }

  return std::make_shared<arrow::TableBatchReader>(result.ValueOrDie());
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/memory_forward_store.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include <arrow/compute/api.h>
#include <arrow/table_builder.h>
#include <arrow/util/async_generator.h>
#include <zvec/db/doc.h>
#include <zvec/db/status.h>
#include "base_forward_store.h"
#include "chunked_file_writer.h"
#include "store_helper.h"

namespace zvec {

/// MemForwardStore implements a forward store that keeps data in memory
/// and can flush data to disk when needed.
class MemForwardStore : public BaseForwardStore {
 public:
  /// Pointer type for MemForwardStore instances
  using Ptr = std::shared_ptr<MemForwardStore>;

  /// Constructor that initializes the store with collection schema and settings
  /// \param collection_schema The schema for the collection
  /// \param path The path where data will be stored
  /// \param format The file format for persistence
  /// \param max_cache_rows Maximum number of rows to keep in cache
  /// \param max_rows Maximum number of rows allowed in the store
  MemForwardStore(const std::shared_ptr<CollectionSchema> &collection_schema,
                  const std::string &path, const FileFormat format,
                  const uint32_t max_buffer_size = 100 * 1024 * 1024);

  virtual ~MemForwardStore() {
    close();
  }

  /// Check if the store is full
  /// \return true if the store has reached its maximum capacity
  bool is_full() {
    return total_bytes() >= max_buffer_size_;
  }

  /// Open the store
  /// \return 0 on success, non-zero on failure
  Status Open() override;

  /// Insert a document into the store
  /// \param doc The document to insert
  /// \return 0 on success, non-zero on failure
  Status insert(const Doc &doc);

  /// Flush cached data to disk
  /// \return 0 on success, non-zero on failure
  Status flush();

  /// Close the store and flush any remaining data
  /// \return 0 on success, non-zero on failure
  Status close();

 public:
  /// Get the path of the store
  /// \return The path where data is stored
  const std::string path() const {
    return path_;
  }

  /// Get the total bytes of the store
  uint32_t total_bytes() const {
    return total_cache_bytes_ + total_rb_bytes_;
  }

  /// Get the total number of rows in the store
  uint32_t num_rows() const {
    return num_rows_;
  }

 public:
  /// Fetch specific columns and row indices from the data source
  /// \param columns The list of column names to fetch
  /// \param indices The list of row indices to fetch
  /// \return A table containing the requested data or nullptr on failure
  TablePtr fetch(const std::vector<std::string> &columns,
                 const std::vector<int> &indices) override;

  /// Fetch specific columns and row indices from the data source
  /// \param columns The list of column names to fetch
  /// \param index The row index to fetch
  /// \return An ExecBatch containing the requested data or nullptr on failure
  ExecBatchPtr fetch(const std::vector<std::string> &columns,
                     int index) override;

  /// Scan specified columns from the data source
  /// \param columns The list of column names to scan
  /// \return A RecordBatchReader for streaming the data or nullptr on failure
  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;

  /// Get the physical schema of the file
  /// \return A shared pointer to the arrow schema representing the physical
  /// structure of the data
  const std::shared_ptr<arrow::Schema> physic_schema() const override {
    return physic_schema_;
  }

  TablePtr get_table() override;

 private:
  /// Create a RecordBatchBuilder for this store
  /// \return A new RecordBatchBuilder
  RecordBatchBuilderPtr createBuilder();

  /// Convert internal data to a RecordBatch
  /// \return A Result containing the RecordBatch or an error status
  arrow::Result<RecordBatchPtr> convertToRecordBatch();

  /// Convert internal data to a Table
  /// \param columns The list of column names to include
  /// \param indices The list of row indices to include
  /// \return A Result containing the Table or an error status
  arrow::Result<TablePtr> convertToTable(
      const std::vector<std::string> &columns, const std::vector<int> &indices);

  /// Convert internal data to a RecordBatchBuilder
  /// \param builder The builder to populate
  /// \return arrow::Status indicating success or failure
  arrow::Status convertToBuilder(RecordBatchBuilderPtr &builder);

  /// Validate that the requested columns exist in the schema
  /// \param columns The list of column names to validate
  /// \return true if all columns are valid, false otherwise
  bool validate(const std::vector<std::string> &columns) const;

 private:
  /// Mutex to protect cache access
  std::mutex cache_mtx_;

  /// Cache of documents waiting to be flushed
  std::vector<Doc> cache_;

  /// Batches of data that have been flushed
  std::vector<RecordBatchPtr> batches_;

  /// Collection schema
  std::shared_ptr<CollectionSchema> schema_;

  /// Physical schema
  std::shared_ptr<arrow::Schema> physic_schema_;

  /// Total RecordBatch bytes in the store
  uint32_t total_rb_bytes_{0};

  /// Total cache doc bytes
  uint32_t total_cache_bytes_{0};

  /// Total number of rows in the store
  uint32_t num_rows_{0};

  /// Path where data is stored
  std::string path_;

  /// File format for persistence
  FileFormat format_;

  /// Number of batches that have been flushed
  uint32_t flushed_batches_{0};

  /// Writer for chunked files
  ChunkedFileWriter::Ptr writer_;


  /// Maximum size of cache, default 1MB
  uint32_t max_cache_size_{1048576};

  /// Maximum size of the buffer, default 100MB
  uint32_t max_buffer_size_{104857600};
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/mmap_forward_store.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mmap_forward_store.h"
#include <memory>
#include <arrow/acero/options.h>
#include <arrow/compute/api.h>
#include <arrow/datum.h>
#include <arrow/filesystem/localfs.h>
#include <zvec/ailego/logger/logger.h>
#include "db/index/storage/base_forward_store.h"
#include "lazy_record_batch_reader.h"


namespace zvec {

MmapForwardStore::MmapForwardStore(const std::string &uri) : file_path_(uri) {}

Status MmapForwardStore::Open() {
  std::string uri = file_path_;
  auto status = CreateRandomAccessFileByUri(uri, &file_, &file_path_);
  if (!status.ok()) {
    LOG_ERROR("Failed to create random access uri: %s : %s", uri.c_str(),
              status.ToString().c_str());
    return Status::InvalidArgument(status.ToString());
  }
  format_ = InferFileFormat(file_path_);
  switch (format_) {
    case FileFormat::PARQUET: {
      status = OpenParquet(file_);
      if (!status.ok()) {
        LOG_ERROR("Failed to open parquet file: %s : %s", file_path_.c_str(),
                  status.ToString().c_str());
        return Status::InternalError(status.ToString());
      }
      break;
    }
    case FileFormat::IPC: {
      status = OpenIPC(file_);
      if (!status.ok()) {
        LOG_ERROR("Failed to open ipc file: %s : %s", file_path_.c_str(),
                  status.ToString().c_str());
        return Status::InternalError(status.ToString());
      }
      break;
    }
    default:
      LOG_ERROR("Unknown file format: %s", uri.c_str());
      return Status::InvalidArgument("Unknown file format: ", uri);
      break;
  }
  return Status::OK();
}

arrow::Status MmapForwardStore::OpenParquet(
    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {
  auto parquet_file_reader = parquet::ParquetFileReader::Open(file);
  ARROW_RETURN_NOT_OK(parquet::arrow::FileReader::Make(
      arrow::default_memory_pool(), std::move(parquet_file_reader),
      &parquet_reader_));

  auto parquet_metadata = parquet_reader_->parquet_reader()->metadata();
  num_rows_ = parquet_metadata->num_rows();
  num_row_groups_ = parquet_metadata->num_row_groups();

  // Initialize row group offsets and row counts
  int64_t offset = 0;
  for (int64_t rg = 0; rg < num_row_groups_; ++rg) {
    auto row_group_metadata = parquet_metadata->RowGroup(rg);
    int64_t num_rows_in_group = row_group_metadata->num_rows();
    row_group_row_nums_.push_back(num_rows_in_group);
    row_group_offsets_.push_back(offset);
    offset += num_rows_in_group;
  }

  ARROW_RETURN_NOT_OK(parquet_reader_->GetSchema(&physic_schema_));

  LOG_INFO("Opened Parquet with %lld rows, %d cols, %d row groups",
           static_cast<long long>(num_rows_), physic_schema_->num_fields(),
           parquet_metadata->num_row_groups());

  return arrow::Status::OK();
}

arrow::Status MmapForwardStore::OpenIPC(
    const std::shared_ptr<arrow::io::RandomAccessFile> &file) {
  std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader;
  arrow::Result<std::shared_ptr<arrow::ipc::RecordBatchFileReader>> result =
      arrow::ipc::RecordBatchFileReader::Open(file);
  ARROW_RETURN_NOT_OK(result.status());
  reader = std::move(result).ValueOrDie();
  ipc_file_reader_ = std::move(reader);
  PARQUET_ASSIGN_OR_THROW(table_, ipc_file_reader_->ToTable());

  if (table_->num_columns() == 0) {
    return arrow::Status::Invalid("IPC file has no columns");
  }

  auto chunked_array = table_->column(0);
  for (int i = 0; i < chunked_array->num_chunks(); ++i) {
    auto chunk = chunked_array->chunk(i);

    if (chunk->length() == 0) {
      return arrow::Status::Invalid("Encountered empty chunk at index %d", i);
    }

    chunk_index_map_.emplace_back(num_rows_, num_rows_ + chunk->length() - 1);
    num_rows_ += chunk->length();

    // Check if all chunks have the same size except possibly the last one
    if (fixed_batch_size_ == -1) {
      fixed_batch_size_ = chunk->length();
    } else if (fixed_batch_size_ != chunk->length()) {
      if (i != chunked_array->num_chunks() - 1) {
        is_fixed_batch_size_ = false;
      }
    }
  }

  physic_schema_ = ipc_file_reader_->schema();
  LOG_INFO(
      "Opened IPC with %lld rows, %d cols, %d chunks, is_fixed_batch_size[%d] "
      "fixed_batch_size[%lld] physic_schema: %s",
      static_cast<long long>(num_rows_), physic_schema_->num_fields(),
      chunked_array->num_chunks(), is_fixed_batch_size_,
      static_cast<long long>(fixed_batch_size_),
      physic_schema_->ToString().c_str());

  return arrow::Status::OK();
}

bool MmapForwardStore::validate(const std::vector<std::string> &columns) const {
  if (columns.empty()) {
    LOG_ERROR("Empty columns");
    return false;
  }
  for (auto &column : columns) {
    if (column == LOCAL_ROW_ID) {
      continue;
    }
    if (physic_schema_->GetFieldIndex(column) == -1) {
      LOG_ERROR("Validate failed. unknown column: %s", column.c_str());
      return false;
    }
  }
  return true;
}

RecordBatchReaderPtr MmapForwardStore::ScanParquet(
    const std::vector<std::string> &columns) {
  // Create a new parquet reader for scanning
  std::unique_ptr<parquet::arrow::FileReader> parquet_reader;
  auto parquet_file_reader = parquet::ParquetFileReader::Open(file_);
  auto status = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
                                                 std::move(parquet_file_reader),
                                                 &parquet_reader);
  if (!status.ok()) {
    LOG_ERROR("Failed to create parquet reader: %s", status.message().c_str());
    return nullptr;
  }

  auto rb_reader = std::make_shared<ParquetRecordBatchReader>(
      parquet_reader, columns, physic_schema_, file_path_, false);
  return rb_reader;
}

RecordBatchReaderPtr MmapForwardStore::ScanIPC(
    const std::vector<std::string> &columns) {
  std::vector<int> col_indices;
  for (auto &column : columns) {
    int idx = physic_schema_->GetFieldIndex(column);
    if (idx == -1) continue;
    col_indices.push_back(idx);
  }

  auto result = table_->SelectColumns(col_indices);
  if (!result.ok()) {
    LOG_ERROR("Failed to select columns: %s",
              result.status().message().c_str());
    return nullptr;
  }
  auto sub_table = std::move(result).ValueOrDie();

  return std::make_shared<arrow::TableBatchReader>(sub_table);
}

TablePtr MmapForwardStore::FetchParquet(const std::vector<std::string> &columns,
                                        const std::vector<int> &indices) {
  bool need_local_doc_id = false;
  std::vector<int> col_indices;
  std::vector<int> data_column_positions;

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      need_local_doc_id = true;
    } else {
      int idx = physic_schema_->GetFieldIndex(columns[i]);
      if (idx == -1) return nullptr;
      col_indices.push_back(idx);
      data_column_positions.push_back(static_cast<int>(i));
    }
  }

  std::vector<std::vector<std::pair<int, std::shared_ptr<arrow::Scalar>>>>
      sorted_scalars(col_indices.size());
  std::vector<std::pair<int, int64_t>> local_doc_id_pairs;

  // Group by row group, but keep track of original output position
  std::unordered_map<int, std::vector<std::pair<int, uint64_t>>> rg_to_local;
  int output_row = 0;
  for (int global_row : indices) {
    if (global_row < 0 || global_row >= num_rows_) return nullptr;
    int rg_id = FindRowGroupForRow(global_row);
    int64_t offset = GetRowGroupOffset(rg_id);
    uint64_t local_in_rg = global_row - offset;
    rg_to_local[rg_id].emplace_back(output_row, local_in_rg);
    if (need_local_doc_id) {
      local_doc_id_pairs.emplace_back(output_row, global_row);
    }
    ++output_row;
  }

  // Read each row group and extract scalars at required positions
  for (const auto &[rg_id, pairs] : rg_to_local) {
    std::shared_ptr<arrow::Table> rg_table;
    auto status =
        parquet_reader_->RowGroup(rg_id)->ReadTable(col_indices, &rg_table);
    if (!status.ok()) {
      LOG_ERROR("Failed to read row group %d", rg_id);
      return nullptr;
    }

    // Concatenate chunks for faster random access
    std::vector<std::shared_ptr<arrow::Array>> flat_columns;
    for (const auto &col : rg_table->columns()) {
      auto flat_result =
          arrow::Concatenate(col->chunks(), arrow::default_memory_pool());
      if (!flat_result.ok()) {
        LOG_ERROR("Failed to concatenate chunks for rg {%d} status:%s", rg_id,
                  flat_result.status().message().c_str());
        return nullptr;
      }
      flat_columns.push_back(flat_result.ValueOrDie());
    }

    // Extract scalars for this RG
    for (size_t i = 0; i < col_indices.size(); ++i) {
      auto &dst = sorted_scalars[i];
      const auto &array = flat_columns[i];

      for (const auto &[output_row_tmp, local_idx] : pairs) {
        auto scalar_result = array->GetScalar(local_idx);
        if (!scalar_result.ok()) {
          LOG_ERROR("Failed to get scalar for row %zu status: %s",
                    (size_t)local_idx,
                    scalar_result.status().ToString().c_str());
        }
        dst.emplace_back(output_row_tmp, scalar_result.ValueOrDie());
      }
    }
  }

  std::vector<std::shared_ptr<arrow::Array>> result_arrays(columns.size());

  for (size_t i = 0; i < sorted_scalars.size(); ++i) {
    auto &vec = sorted_scalars[i];
    std::sort(vec.begin(), vec.end());
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars;
    ordered_scalars.reserve(vec.size());
    for (auto &p : vec) {
      ordered_scalars.push_back(std::move(p.second));
    }

    std::shared_ptr<arrow::Array> arr;
    auto status = ConvertScalarVectorToArrayByType(ordered_scalars, &arr);
    if (!status.ok()) {
      LOG_ERROR("ConvertScalarVectorToArrayByType failed: %s",
                status.message().c_str());
      return nullptr;
    }

    int position = data_column_positions[i];
    result_arrays[position] = std::move(arr);
  }

  if (need_local_doc_id) {
    std::sort(local_doc_id_pairs.begin(), local_doc_id_pairs.end());
    std::vector<uint64_t> values;
    values.reserve(local_doc_id_pairs.size());
    for (const auto &p : local_doc_id_pairs) {
      values.push_back(p.second);
    }

    // Create UInt64Array
    auto buffer_result = arrow::AllocateBuffer(values.size() * sizeof(uint64_t),
                                               arrow::default_memory_pool());
    if (!buffer_result.ok()) return nullptr;
    auto buffer = std::move(buffer_result.ValueOrDie());
    std::memcpy(buffer->mutable_data(), values.data(),
                values.size() * sizeof(uint64_t));

    std::vector<std::shared_ptr<arrow::Buffer>> buffers;
    buffers.push_back(nullptr);  // no null bitmap
    buffers.push_back(std::shared_ptr<arrow::Buffer>(buffer.release()));

    auto data = arrow::ArrayData::Make(arrow::uint64(),
                                       static_cast<uint64_t>(values.size()),
                                       std::move(buffers), /*null_count=*/0);

    for (size_t i = 0; i < columns.size(); ++i) {
      if (columns[i] == LOCAL_ROW_ID) {
        result_arrays[i] = std::make_shared<arrow::UInt64Array>(data);
      }
    }
  }

  std::vector<std::shared_ptr<arrow::Field>> selected_fields;
  for (const auto &col : columns) {
    if (col == LOCAL_ROW_ID) {
      selected_fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));
    } else {
      selected_fields.push_back(physic_schema_->GetFieldByName(col));
    }
  }

  auto out_schema = std::make_shared<arrow::Schema>(selected_fields);

  std::vector<std::shared_ptr<arrow::ChunkedArray>> chunks;
  chunks.reserve(result_arrays.size());
  for (auto &arr : result_arrays) {
    chunks.emplace_back(std::make_shared<arrow::ChunkedArray>(arr));
  }

  return arrow::Table::Make(out_schema, chunks,
                            static_cast<int64_t>(indices.size()));
}

ExecBatchPtr MmapForwardStore::FetchParquet(
    const std::vector<std::string> &columns, int index) {
  std::vector<int> col_indices;
  for (const auto &col : columns) {
    int idx = physic_schema_->GetFieldIndex(col);
    if (idx == -1) return nullptr;
    col_indices.push_back(idx);
  }

  int rg_id = FindRowGroupForRow(index);
  int64_t offset = GetRowGroupOffset(rg_id);
  uint64_t local_in_rg = index - offset;

  std::shared_ptr<arrow::Table> rg_table;
  auto status =
      parquet_reader_->RowGroup(rg_id)->ReadTable(col_indices, &rg_table);
  if (!status.ok()) {
    LOG_ERROR("Failed to read row group %d", rg_id);
    return nullptr;
  }

  // Extract scalars
  std::vector<arrow::Datum> scalars;
  scalars.reserve(columns.size());
  for (const auto &column : columns) {
    const auto &array = rg_table->GetColumnByName(column);
    auto scalar_result = array->GetScalar(local_in_rg);
    scalars.emplace_back(std::move(scalar_result.ValueOrDie()));
  }

  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);
}

TablePtr MmapForwardStore::FetchIPC(const std::vector<std::string> &columns,
                                    const std::vector<int> &indices) {
  std::vector<std::pair<int64_t, int64_t>> indices_in_table;
  auto chunked_array = table_->column(0);
  for (const auto &target_index : indices) {
    int target_chunk_index = -1;
    int64_t offset_in_chunk = -1;
    if (FindTargetChunk(target_index, chunked_array->num_chunks(),
                        &target_chunk_index, &offset_in_chunk)) {
      indices_in_table.emplace_back(target_chunk_index, offset_in_chunk);
    } else {
      LOG_ERROR("Failed to find target chunk for index %d", target_index);
      return nullptr;
    }
  }

  std::vector<std::shared_ptr<arrow::ChunkedArray>> result_columns;
  std::vector<std::shared_ptr<arrow::Field>> result_fields;

  for (size_t i = 0; i < columns.size(); ++i) {
    if (columns[i] == LOCAL_ROW_ID) {
      std::shared_ptr<arrow::Array> array;
      arrow::UInt64Builder builder;
      std::vector<uint64_t> u64_indices(indices.begin(), indices.end());
      auto status = builder.AppendValues(u64_indices);
      if (!status.ok()) {
        LOG_ERROR("Failed to append values to UInt64Builder: %s",
                  status.ToString().c_str());
        return nullptr;
      }

      status = builder.Finish(&array);
      if (!status.ok()) {
        LOG_ERROR("Failed to finish UInt64Builder: %s",
                  status.ToString().c_str());
        return nullptr;
      }

      result_columns.push_back(std::make_shared<arrow::ChunkedArray>(array));
      result_fields.push_back(
          arrow::field(LOCAL_ROW_ID, arrow::uint64(), false));
    } else {
      std::shared_ptr<arrow::Array> array;
      auto col_array = table_->GetColumnByName(columns[i]);
      auto status =
          BuildArrayFromIndicesWithType(col_array, indices_in_table, &array);
      if (!status.ok()) {
        LOG_ERROR("BuildArrayFromIndices failed: %s",
                  status.ToString().c_str());
        return nullptr;
      }
      result_columns.push_back(std::make_shared<arrow::ChunkedArray>(array));
      result_fields.push_back(physic_schema_->GetFieldByName(columns[i]));
    }
  }

  auto result_schema = std::make_shared<arrow::Schema>(result_fields);
  return arrow::Table::Make(result_schema, result_columns, indices.size());
}

ExecBatchPtr MmapForwardStore::FetchIPC(const std::vector<std::string> &columns,
                                        int index) {
  // Extract scalars
  std::vector<arrow::Datum> scalars;
  scalars.reserve(columns.size());
  for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) {
    //! NOTICE: no need to check LOCAL_ROW_ID here
    int field_index = table_->schema()->GetFieldIndex(columns[col_idx]);
    auto chunked_array = table_->column(field_index);
    auto scalar_result = chunked_array->GetScalar(index);
    if (scalar_result.ok()) {
      scalars.push_back(scalar_result.ValueOrDie());
    } else {
      LOG_ERROR("Get scalar failed for column %zu, row %d: %s", col_idx, index,
                scalar_result.status().ToString().c_str());
      return nullptr;
    }
  }

  return std::make_shared<arrow::ExecBatch>(std::move(scalars), 1);
}

int MmapForwardStore::FindRowGroupForRow(int64_t row) {
  auto it = std::upper_bound(row_group_offsets_.begin(),
                             row_group_offsets_.end(), row);
  if (it == row_group_offsets_.begin()) {
    return 0;
  }
  return static_cast<int>(std::distance(row_group_offsets_.begin(), it) - 1);
}

int64_t MmapForwardStore::GetRowGroupOffset(int rg_id) {
  return row_group_offsets_[rg_id];
}

bool MmapForwardStore::FindTargetChunk(int target_index, int num_chunks,
                                       int *target_chunk_index,
                                       int64_t *offset_in_chunk) {
  if (target_index < 0 || target_index >= num_rows_) {
    return false;
  }

  if (is_fixed_batch_size_ && fixed_batch_size_ > 0) {
    // direct calculation
    int chunk_index = target_index / fixed_batch_size_;
    if (chunk_index < 0 || chunk_index >= num_chunks) {
      return false;
    }
    *target_chunk_index = chunk_index;
    *offset_in_chunk = target_index % fixed_batch_size_;
    return true;
  } else {
    // binary search
    int left = 0;
    int right = num_chunks - 1;

    while (left <= right) {
      int mid = left + (right - left) / 2;
      const auto &range = chunk_index_map_[mid];

      if (target_index >= range.first && target_index <= range.second) {
        *target_chunk_index = mid;
        *offset_in_chunk = target_index - range.first;
        return true;
      } else if (target_index < range.first) {
        right = mid - 1;
      } else {
        left = mid + 1;
      }
    }
  }

  return false;
}

TablePtr MmapForwardStore::fetch(const std::vector<std::string> &columns,
                                 const std::vector<int> &indices) {
  if (!validate(columns)) {
    return nullptr;
  }

  if (indices.empty()) {
    arrow::ArrayVector empty_arrays;
    auto fields = SelectFields(physic_schema_, columns);
    for (const auto &field : fields) {
      empty_arrays.push_back(arrow::MakeEmptyArray(field->type()).ValueOrDie());
    }
    return arrow::Table::Make(std::make_shared<arrow::Schema>(fields),
                              empty_arrays, 0);
  }

  if (format_ == FileFormat::PARQUET) {
    return FetchParquet(columns, indices);
  } else {
    return FetchIPC(columns, indices);
  }
}

ExecBatchPtr MmapForwardStore::fetch(const std::vector<std::string> &columns,
                                     int index) {
  if (!validate(columns)) {
    return nullptr;
  }

  if (index < 0 || index >= num_rows_) {
    LOG_ERROR("Invalid global row: %d", index);
    return nullptr;
  }

  if (format_ == FileFormat::PARQUET) {
    return FetchParquet(columns, index);
  } else {
    return FetchIPC(columns, index);
  }
}

RecordBatchReaderPtr MmapForwardStore::scan(
    const std::vector<std::string> &columns) {
  if (!validate(columns)) {
    return nullptr;
  }

  if (format_ == FileFormat::PARQUET) {
    return ScanParquet(columns);
  } else {
    return ScanIPC(columns);
  }
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/mmap_forward_store.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <arrow/api.h>
#include <arrow/chunked_array.h>
#include <arrow/compute/api.h>
#include <arrow/dataset/api.h>
#include <arrow/filesystem/api.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <arrow/table.h>
#include <arrow/util/async_generator.h>
#include <parquet/arrow/reader.h>
#include <parquet/column_reader.h>
#include <parquet/exception.h>
#include <zvec/db/status.h>
#include "base_forward_store.h"
#include "store_helper.h"

namespace zvec {

/// MmapForwardStore implements a forward store that uses memory mapping
/// to efficiently access data from parquet or IPC files.
class MmapForwardStore : public BaseForwardStore {
 public:
  /// Pointer type for MmapForwardStore instances
  using Ptr = std::shared_ptr<MmapForwardStore>;

  /// Constructor that initializes the store with a file URI
  /// \param uri The URI of the file to be accessed
  MmapForwardStore(const std::string &uri);
  virtual ~MmapForwardStore() {}

  Status Open() override;

  /// Fetch specific columns and row indices from the data source
  /// \param columns The list of column names to fetch
  /// \param indices The list of row indices to fetch
  /// \return A table containing the requested data or nullptr on failure
  TablePtr fetch(const std::vector<std::string> &columns,
                 const std::vector<int> &indices) override;

  /// Fetch specific columns and a single row index from the data source
  /// \param columns The list of column names to fetch
  /// \param index The row index to fetch
  /// \return An ExecBatch containing the requested data or nullptr on failure
  ExecBatchPtr fetch(const std::vector<std::string> &columns,
                     int index) override;

  /// Scan specified columns from the data source
  /// \param columns The list of column names to scan
  /// \return A RecordBatchReader for streaming the data or nullptr on failure
  RecordBatchReaderPtr scan(const std::vector<std::string> &columns) override;

  /// Get the physical schema of the file
  /// \return A shared pointer to the arrow schema representing the physical
  /// structure of the data
  const std::shared_ptr<arrow::Schema> physic_schema() const override {
    return physic_schema_;
  }

  TablePtr get_table() override {
    return table_;
  }

 private:
  /// Validate that the requested columns exist in the schema
  /// \param columns The list of column names to validate
  /// \return true if all columns are valid, false otherwise
  bool validate(const std::vector<std::string> &columns) const;

 private:
  /// Open a parquet file and initialize metadata
  /// \param file The RandomAccessFile to read from
  /// \return arrow::Status indicating success or failure
  arrow::Status OpenParquet(
      const std::shared_ptr<arrow::io::RandomAccessFile> &file);

  /// Open an IPC file and initialize metadata
  /// \param file The RandomAccessFile to read from
  /// \return arrow::Status indicating success or failure
  arrow::Status OpenIPC(
      const std::shared_ptr<arrow::io::RandomAccessFile> &file);

  /// Fetch data from a parquet file
  /// \param columns The list of column names to fetch
  /// \param indices The list of row indices to fetch
  /// \return A table containing the requested data or nullptr on failure
  TablePtr FetchParquet(const std::vector<std::string> &columns,
                        const std::vector<int> &indices);

  /// Fetch specific columns and a single row index from parquet file
  /// \param columns The list of column names to fetch
  /// \param index The row index to fetch
  /// \return An ExecBatch containing the requested data or nullptr on failure
  ExecBatchPtr FetchParquet(const std::vector<std::string> &columns, int index);

  /// Fetch data from an IPC file
  /// \param columns The list of column names to fetch
  /// \param indices The list of row indices to fetch
  /// \return A table containing the requested data or nullptr on failure
  TablePtr FetchIPC(const std::vector<std::string> &columns,
                    const std::vector<int> &indices);

  /// Fetch specific columns and a single row index from IPC file
  /// \param columns The list of column names to fetch
  /// \param index The row index to fetch
  /// \return An ExecBatch containing the requested data or nullptr on failure
  ExecBatchPtr FetchIPC(const std::vector<std::string> &columns, int index);

  /// Scan data from a parquet file
  /// \param columns The list of column names to scan
  /// \return A RecordBatchReader for streaming the data or nullptr on failure
  RecordBatchReaderPtr ScanParquet(const std::vector<std::string> &columns);

  /// Scan data from an IPC file
  /// \param columns The list of column names to scan
  /// \return A RecordBatchReader for streaming the data or nullptr on failure
  RecordBatchReaderPtr ScanIPC(const std::vector<std::string> &columns);

  /// Find which row group contains a given row
  /// \param row The row index to locate
  /// \return The row group ID containing the row
  int FindRowGroupForRow(int64_t row);

  /// Get the row offset for a given row group
  /// \param rg_id The row group ID
  /// \return The row offset of the row group, or -1 on error
  int64_t GetRowGroupOffset(int rg_id);

  /// Find the chunk that contains a target row index using binary search
  /// \param target_index The row index to locate
  /// \param num_chunks The total number of chunks in the array
  /// \param target_chunk_index Output parameter for the index of the chunk
  /// containing the target
  /// \param offset_in_chunk Output parameter for the offset within the found
  /// chunk
  /// \return true if the target chunk was found, false otherwise
  bool FindTargetChunk(int target_index, int num_chunks,
                       int *target_chunk_index, int64_t *offset_in_chunk);

 private:
  /// Format of the file being accessed
  FileFormat format_;

  /// Physical schema of the file
  std::shared_ptr<arrow::Schema> physic_schema_;

  /// Total number of rows in the file
  int64_t num_rows_{0};

  /// Path to the file
  std::string file_path_;

  // Parquet-specific members
  /// The RandomAccessFile for reading data
  std::shared_ptr<arrow::io::RandomAccessFile> file_;

  /// The parquet file reader
  std::unique_ptr<parquet::arrow::FileReader> parquet_reader_;

  /// Number of row groups in the file
  int64_t num_row_groups_{0};

  /// Offsets of each row group
  std::vector<int64_t> row_group_offsets_;

  /// Number of rows in each row group
  std::vector<int64_t> row_group_row_nums_;

  // IPC-specific members
  /// The IPC file reader
  std::shared_ptr<arrow::ipc::RecordBatchFileReader> ipc_file_reader_;

  std::shared_ptr<arrow::Table> table_;

  std::vector<std::pair<int64_t, int64_t>> chunk_index_map_;

  // For performance tuning
  bool is_fixed_batch_size_{true};
  int64_t fixed_batch_size_{-1};
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/parquet_writer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "parquet_writer.h"
#include <cstdint>
#include <iostream>
#include <arrow/compute/api_vector.h>

namespace zvec {

ParquetWriter::ParquetWriter(const std::string &filepath,
                             int64_t max_rows_per_group)
    : filepath_(filepath), max_rows_per_group_(max_rows_per_group) {}

ParquetWriter::~ParquetWriter() {
  if (!finalized_ && writer_) {
    auto status = finalize();
    if (!status.ok()) {
      std::cerr << "Auto-finalize failed: " << status.ToString() << std::endl;
    }
  }
}

arrow::Status ParquetWriter::insert(
    std::shared_ptr<arrow::RecordBatchReader> reader,
    const IndexFilter::Ptr &filter) {
  if (!reader) {
    return arrow::Status::Invalid("RecordBatchReader is null");
  }

  if (!writer_) {
    auto schema = reader->schema();
    if (!schema) {
      return arrow::Status::Invalid("Reader schema is null");
    }

    ARROW_ASSIGN_OR_RAISE(outfile_,
                          arrow::io::FileOutputStream::Open(filepath_));

    parquet::WriterProperties::Builder builder;
    std::shared_ptr<parquet::WriterProperties> props = builder.build();

    auto writer = parquet::arrow::FileWriter::Open(
        *schema, arrow::default_memory_pool(), outfile_, props);
    ARROW_RETURN_NOT_OK(writer);
    writer_ = std::move(writer.ValueOrDie());
  }

  std::shared_ptr<arrow::RecordBatch> batch;
  while (true) {
    ARROW_ASSIGN_OR_RAISE(batch, reader->Next());
    if (!batch) break;

    if (batch->num_rows() == 0) continue;

    if (max_rows_per_group_ > 0 && batch->num_rows() > max_rows_per_group_) {
      int64_t offset = 0;
      while (offset < batch->num_rows()) {
        int64_t length =
            std::min(max_rows_per_group_, batch->num_rows() - offset);
        auto slice = batch->Slice(offset, length);
        ARROW_RETURN_NOT_OK(write_batch(*slice, filter));
        offset += length;
      }
    } else {
      ARROW_RETURN_NOT_OK(write_batch(*batch, filter));
    }

    batch.reset();
  }

  return arrow::Status::OK();
}

arrow::Status ParquetWriter::insert_batch(
    std::shared_ptr<arrow::RecordBatch> batch, const IndexFilter::Ptr &filter) {
  if (!batch) {
    return arrow::Status::Invalid("RecordBatch is null");
  }

  if (batch->num_rows() == 0) {
    return arrow::Status::OK();
  }

  if (!writer_) {
    auto schema = batch->schema();

    ARROW_ASSIGN_OR_RAISE(outfile_,
                          arrow::io::FileOutputStream::Open(filepath_));

    parquet::WriterProperties::Builder builder;
    std::shared_ptr<parquet::WriterProperties> props = builder.build();

    auto writer = parquet::arrow::FileWriter::Open(
        *schema, arrow::default_memory_pool(), outfile_, props);
    ARROW_RETURN_NOT_OK(writer);
    writer_ = std::move(writer.ValueOrDie());
  }

  if (max_rows_per_group_ > 0 && batch->num_rows() > max_rows_per_group_) {
    int64_t offset = 0;
    while (offset < batch->num_rows()) {
      int64_t length =
          std::min(max_rows_per_group_, batch->num_rows() - offset);
      auto slice = batch->Slice(offset, length);

      ARROW_RETURN_NOT_OK(write_batch(*slice, filter));

      offset += length;
    }
  } else {
    ARROW_RETURN_NOT_OK(write_batch(*batch, filter));
  }

  return arrow::Status::OK();
}

arrow::Status ParquetWriter::write_batch(const arrow::RecordBatch &batch,
                                         const IndexFilter::Ptr &filter) {
  if (!filter) {
    return writer_->WriteRecordBatch(batch);
  }

  std::vector<int64_t> selected_indices;
  for (int64_t i = 0; i < batch.num_rows(); ++i) {
    if (filter->is_filtered(i)) {
      selected_indices.push_back(i);
    }
  }

  if (selected_indices.empty()) {
    return arrow::Status::OK();
  }

  arrow::Int64Builder builder;
  ARROW_RETURN_NOT_OK(builder.AppendValues(selected_indices));
  std::shared_ptr<arrow::Array> selection_array;
  ARROW_RETURN_NOT_OK(builder.Finish(&selection_array));

  std::vector<std::shared_ptr<arrow::Array>> filtered_columns;
  for (int i = 0; i < batch.num_columns(); ++i) {
    arrow::Datum out;
    ARROW_ASSIGN_OR_RAISE(
        out, arrow::compute::Take(batch.column(i), selection_array));
    filtered_columns.push_back(out.make_array());
  }

  auto filtered_batch = arrow::RecordBatch::Make(
      batch.schema(), static_cast<int64_t>(selected_indices.size()),
      filtered_columns);

  return writer_->WriteRecordBatch(*filtered_batch);
}

arrow::Status ParquetWriter::finalize() {
  if (finalized_) {
    return arrow::Status::OK();
  }
  if (!writer_) {
    return arrow::Status::Invalid(
        "No data written, cannot finalize empty file");
  }

  ARROW_RETURN_NOT_OK(writer_->Close());
  writer_.reset();

  ARROW_RETURN_NOT_OK(outfile_->Close());
  outfile_.reset();

  finalized_ = true;
  return arrow::Status::OK();
}

}  // namespace zvec

================================================
FILE: src/db/index/storage/parquet_writer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/status.h>
#include <parquet/arrow/writer.h>
#include "db/index/common/index_filter.h"
#include "forward_writer.h"

namespace zvec {

class ParquetWriter : public ForwardWriter {
 public:
  explicit ParquetWriter(const std::string &filepath,
                         int64_t max_rows_per_group = 0);

  ~ParquetWriter();

  arrow::Status insert(std::shared_ptr<arrow::RecordBatchReader> reader,
                       const IndexFilter::Ptr &filter = nullptr) override;

  arrow::Status insert_batch(std::shared_ptr<arrow::RecordBatch> batch,
                             const IndexFilter::Ptr &filter = nullptr) override;

  arrow::Status finalize() override;

 private:
  arrow::Status write_batch(const arrow::RecordBatch &batch,
                            const IndexFilter::Ptr &filter);

 private:
  std::string filepath_;
  int64_t max_rows_per_group_ = 0;

  std::shared_ptr<arrow::io::FileOutputStream> outfile_;
  std::unique_ptr<parquet::arrow::FileWriter> writer_;
  bool finalized_ = false;
};

}  // namespace zvec

================================================
FILE: src/db/index/storage/store_helper.h
================================================

// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <filesystem>
#include <iostream>
#include <memory>
#include <string>
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <arrow/dataset/api.h>
#include <arrow/filesystem/api.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/result.h>
#include <arrow/status.h>
#include <parquet/arrow/reader.h>
#include <zvec/db/doc.h>
#include <zvec/db/schema.h>
#include "db/common/constants.h"
#include "db/common/file_helper.h"
#include "db/index/common/meta.h"
#include "chunked_file_writer.h"


namespace zvec {

inline FileFormat InferFileFormat(const std::string &file_path) {
  std::string ext = std::filesystem::path(file_path).extension();
  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
  if (ext == ".parquet") {
    return FileFormat::PARQUET;
  } else if (ext == ".feather" || ext == ".ipc" || ext == ".arrow") {
    return FileFormat::IPC;
  } else {
    return FileFormat::UNKNOWN;
  }
}

inline arrow::Status ConvertFieldSchemaToArrowField(
    const FieldSchema *field, std::shared_ptr<arrow::Field> *out) {
  switch (field->data_type()) {
    case DataType::BINARY:
      *out = arrow::field(field->name(), arrow::binary(), field->nullable());
      break;
    case DataType::STRING:
      *out = arrow::field(field->name(), arrow::utf8(), field->nullable());
      break;
    case DataType::BOOL:
      *out = arrow::field(field->name(), arrow::boolean(), field->nullable());
      break;
    case DataType::INT32:
      *out = arrow::field(field->name(), arrow::int32(), field->nullable());
      break;
    case DataType::INT64:
      *out = arrow::field(field->name(), arrow::int64(), field->nullable());
      break;
    case DataType::UINT32:
      *out = arrow::field(field->name(), arrow::uint32(), field->nullable());
      break;
    case DataType::UINT64:
      *out = arrow::field(field->name(), arrow::uint64(), field->nullable());
      break;
    case DataType::FLOAT:
      *out = arrow::field(field->name(), arrow::float32(), field->nullable());
      break;
    case DataType::DOUBLE:
      *out = arrow::field(field->name(), arrow::float64(), field->nullable());
      break;
    case DataType::ARRAY_BINARY:
      *out = arrow::field(field->name(), arrow::list(arrow::binary()),
                          field->nullable());
      break;
    case DataType::ARRAY_STRING:
      *out = arrow::field(field->name(), arrow::list(arrow::utf8()),
                          field->nullable());
      break;
    case DataType::ARRAY_BOOL:
      *out = arrow::field(field->name(), arrow::list(arrow::boolean()),
                          field->nullable());
      break;
    case DataType::ARRAY_INT32:
      *out = arrow::field(field->name(), arrow::list(arrow::int32()),
                          field->nullable());
      break;
    case DataType::ARRAY_INT64:
      *out = arrow::field(field->name(), arrow::list(arrow::int64()),
                          field->nullable());
      break;
    case DataType::ARRAY_UINT32:
      *out = arrow::field(field->name(), arrow::list(arrow::uint32()),
                          field->nullable());
      break;
    case DataType::ARRAY_UINT64:
      *out = arrow::field(field->name(), arrow::list(arrow::uint64()),
                          field->nullable());
      break;
    case DataType::ARRAY_FLOAT:
      *out = arrow::field(field->name(), arrow::list(arrow::float32()),
                          field->nullable());
      break;
    case DataType::ARRAY_DOUBLE:
      *out = arrow::field(field->name(), arrow::list(arrow::float64()),
                          field->nullable());
      break;
    default:
      return arrow::Status::Invalid(
          "Unsupported data type ",
          DataTypeCodeBook::AsString(field->data_type()));
  }

  return arrow::Status::OK();
}

inline arrow::Status ConvertCollectionSchemaToArrowFields(
    const CollectionSchema::Ptr &schema, arrow::FieldVector *out) {
  arrow::FieldVector fields;
  fields.push_back(arrow::field(GLOBAL_DOC_ID, arrow::uint64(), false));
  fields.push_back(arrow::field(USER_ID, arrow::utf8(), false));
  for (auto &field : schema->forward_fields()) {
    std::shared_ptr<arrow::Field> arrow_field;
    ARROW_RETURN_NOT_OK(
        ConvertFieldSchemaToArrowField(field.get(), &arrow_field));
    fields.push_back(arrow_field);
  }
  *out = std::move(fields);
  return arrow::Status::OK();
}

template <typename BuilderType, typename ScalarType>
inline arrow::Status ConvertScalarVectorToArray(
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,
    std::shared_ptr<arrow::Array> *out) {
  std::shared_ptr<arrow::Array> arr;
  BuilderType builder;
  for (const auto &scalar : ordered_scalars) {
    if (scalar == nullptr || scalar->is_valid == false) {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
      continue;
    }
    auto status =
        builder.Append(dynamic_cast<const ScalarType &>(*scalar).value);
  }
  ARROW_RETURN_NOT_OK(builder.Finish(&arr));
  *out = arr;
  return arrow::Status::OK();
}

template <>
inline arrow::Status
ConvertScalarVectorToArray<arrow::StringBuilder, arrow::StringScalar>(
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,
    std::shared_ptr<arrow::Array> *out) {
  std::shared_ptr<arrow::Array> arr;
  arrow::StringBuilder builder;
  for (const auto &scalar : ordered_scalars) {
    if (scalar == nullptr || scalar->is_valid == false) {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
      continue;
    }
    const auto &str = dynamic_cast<const arrow::StringScalar &>(*scalar).value;
    ARROW_RETURN_NOT_OK(
        builder.Append(str->data(), static_cast<int>(str->size())));
  }
  ARROW_RETURN_NOT_OK(builder.Finish(&arr));
  *out = arr;
  return arrow::Status::OK();
}

template <>
inline arrow::Status
ConvertScalarVectorToArray<arrow::BinaryBuilder, arrow::BinaryScalar>(
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,
    std::shared_ptr<arrow::Array> *out) {
  std::shared_ptr<arrow::Array> arr;
  arrow::BinaryBuilder builder;
  for (const auto &scalar : ordered_scalars) {
    if (scalar == nullptr || scalar->is_valid == false) {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
      continue;
    }
    const auto &binary_scalar =
        dynamic_cast<const arrow::BinaryScalar &>(*scalar);
    if (binary_scalar.value) {
      ARROW_RETURN_NOT_OK(
          builder.Append(binary_scalar.value->data(),
                         static_cast<int>(binary_scalar.value->size())));
    } else {
      ARROW_RETURN_NOT_OK(builder.AppendEmptyValue());
    }
  }
  ARROW_RETURN_NOT_OK(builder.Finish(&arr));
  *out = arr;
  return arrow::Status::OK();
}

inline arrow::Status ConvertScalarVectorToArrayByType(
    std::vector<std::shared_ptr<arrow::Scalar>> ordered_scalars,
    std::shared_ptr<arrow::Array> *out) {
  auto type = ordered_scalars.empty() ? nullptr : ordered_scalars[0]->type;
  if (type == nullptr)
    return arrow::Status::Invalid("Cannot convert empty vector to array");
  arrow::Status status;
  switch (type->id()) {
    case arrow::Type::BINARY:
      status =
          ConvertScalarVectorToArray<arrow::BinaryBuilder, arrow::BinaryScalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::BOOL:
      status = ConvertScalarVectorToArray<arrow::BooleanBuilder,
                                          arrow::BooleanScalar>(ordered_scalars,
                                                                out);
      break;
    case arrow::Type::INT32:
      status =
          ConvertScalarVectorToArray<arrow::Int32Builder, arrow::Int32Scalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::UINT32:
      status =
          ConvertScalarVectorToArray<arrow::UInt32Builder, arrow::UInt32Scalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::INT64:
      status =
          ConvertScalarVectorToArray<arrow::Int64Builder, arrow::Int64Scalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::UINT64:
      status =
          ConvertScalarVectorToArray<arrow::UInt64Builder, arrow::UInt64Scalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::FLOAT:
      status =
          ConvertScalarVectorToArray<arrow::FloatBuilder, arrow::FloatScalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::DOUBLE:
      status =
          ConvertScalarVectorToArray<arrow::DoubleBuilder, arrow::DoubleScalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::STRING:
      status =
          ConvertScalarVectorToArray<arrow::StringBuilder, arrow::StringScalar>(
              ordered_scalars, out);
      break;
    case arrow::Type::LIST: {
      if (ordered_scalars.empty()) {
        return arrow::Status::Invalid(
            "Cannot convert empty vector to list array");
      }

      auto list_type = std::dynamic_pointer_cast<arrow::ListType>(type);
      if (!list_type) {
        return arrow::Status::TypeError("Expected ListType for LIST scalar");
      }

      std::unique_ptr<arrow::ArrayBuilder> value_builder;
      ARROW_RETURN_NOT_OK(arrow::MakeBuilder(arrow::default_memory_pool(),
                                             list_type->value_type(),
                                             &value_builder));

      arrow::ListBuilder builder(arrow::default_memory_pool(),
                                 std::move(value_builder), list_type);

      for (const auto &scalar : ordered_scalars) {
        if (scalar == nullptr || scalar->is_valid == false) {
          ARROW_RETURN_NOT_OK(builder.AppendNull());
          continue;
        }

        auto list_scalar = std::dynamic_pointer_cast<arrow::ListScalar>(scalar);
        if (!list_scalar) {
          return arrow::Status::TypeError("Expected ListScalar for LIST type");
        }

        ARROW_RETURN_NOT_OK(builder.Append());
        auto value_builder_ptr = builder.value_builder();
        ARROW_RETURN_NOT_OK(value_builder_ptr->AppendArraySlice(
            *list_scalar->value->data(), 0, list_scalar->value->length()));
      }

      std::shared_ptr<arrow::Array> arr;
      ARROW_RETURN_NOT_OK(builder.Finish(&arr));
      *out = arr;
      return arrow::Status::OK();
    }
    default:
      // TODO other type
      return arrow::Status::NotImplemented("Unsupported type");
  }

  return status;
}

template <typename ArrowBuilderType, typename ValueType>
inline arrow::Status AppendValue(ArrowBuilderType *builder, const Doc &doc,
                                 std::shared_ptr<arrow::Field> field) {
  auto value = doc.get<ValueType>(field->name());
  if (!value.has_value()) {
    return builder->AppendNull();
  }
  return builder->Append(value.value());
}

inline arrow::Status AppendFieldValueToBuilder(
    const Doc &doc, const std::shared_ptr<arrow::Field> &field,
    arrow::ArrayBuilder *builder) {
  auto type = field->type()->id();
  switch (type) {
    case arrow::Type::STRING: {
      auto string_builder = dynamic_cast<arrow::StringBuilder *>(builder);
      return AppendValue<arrow::StringBuilder, std::string>(string_builder, doc,
                                                            field);
    }
    case arrow::Type::INT32: {
      auto int32_builder = dynamic_cast<arrow::Int32Builder *>(builder);
      return AppendValue<arrow::Int32Builder, int32_t>(int32_builder, doc,
                                                       field);
    }
    case arrow::Type::INT64: {
      auto int64_builder = dynamic_cast<arrow::Int64Builder *>(builder);
      return AppendValue<arrow::Int64Builder, int64_t>(int64_builder, doc,
                                                       field);
    }
    case arrow::Type::UINT32: {
      auto uint32_builder = dynamic_cast<arrow::UInt32Builder *>(builder);
      return AppendValue<arrow::UInt32Builder, uint32_t>(uint32_builder, doc,
                                                         field);
    }
    case arrow::Type::UINT64: {
      auto uint64_builder = dynamic_cast<arrow::UInt64Builder *>(builder);
      return AppendValue<arrow::UInt64Builder, uint64_t>(uint64_builder, doc,
                                                         field);
    }
    case arrow::Type::DOUBLE: {
      auto double_builder = dynamic_cast<arrow::DoubleBuilder *>(builder);
      return AppendValue<arrow::DoubleBuilder, double>(double_builder, doc,
                                                       field);
    }
    case arrow::Type::FLOAT: {
      auto float_builder = dynamic_cast<arrow::FloatBuilder *>(builder);
      return AppendValue<arrow::FloatBuilder, float>(float_builder, doc, field);
    }
    case arrow::Type::BOOL: {
      auto bool_builder = dynamic_cast<arrow::BooleanBuilder *>(builder);
      return AppendValue<arrow::BooleanBuilder, bool>(bool_builder, doc, field);
    }
    case arrow::Type::BINARY: {
      auto binary_builder = dynamic_cast<arrow::BinaryBuilder *>(builder);
      return AppendValue<arrow::BinaryBuilder, std::string>(binary_builder, doc,
                                                            field);
    }
    case arrow::Type::LIST: {
      auto list_builder = dynamic_cast<arrow::ListBuilder *>(builder);
      auto list_type =
          std::dynamic_pointer_cast<arrow::ListType>(field->type());

      if (!list_type) {
        return arrow::Status::TypeError("Field type is not ListType");
      }

      auto value_type = list_type->value_type()->id();

      switch (value_type) {
        case arrow::Type::BINARY: {
          auto value = doc.get<std::vector<std::string>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto binary_builder = dynamic_cast<arrow::BinaryBuilder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(binary_builder->Append(item));
          }
          break;
        }

        case arrow::Type::BOOL: {
          auto value = doc.get<std::vector<bool>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto bool_builder = dynamic_cast<arrow::BooleanBuilder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(bool_builder->Append(item));
          }
          break;
        }

        case arrow::Type::INT32: {
          auto value = doc.get<std::vector<int32_t>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto int32_builder = dynamic_cast<arrow::Int32Builder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(int32_builder->Append(item));
          }
          break;
        }

        case arrow::Type::INT64: {
          auto value = doc.get<std::vector<int64_t>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto int64_builder = dynamic_cast<arrow::Int64Builder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(int64_builder->Append(item));
          }
          break;
        }

        case arrow::Type::UINT32: {
          auto value = doc.get<std::vector<uint32_t>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto uint32_builder = dynamic_cast<arrow::UInt32Builder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(uint32_builder->Append(item));
          }
          break;
        }

        case arrow::Type::UINT64: {
          auto value = doc.get<std::vector<uint64_t>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto uint64_builder = dynamic_cast<arrow::UInt64Builder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(uint64_builder->Append(item));
          }
          break;
        }

        case arrow::Type::FLOAT: {
          auto value = doc.get<std::vector<float>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto float_builder = dynamic_cast<arrow::FloatBuilder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(float_builder->Append(item));
          }
          break;
        }

        case arrow::Type::DOUBLE: {
          auto value = doc.get<std::vector<double>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto double_builder = dynamic_cast<arrow::DoubleBuilder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(double_builder->Append(item));
          }
          break;
        }

        case arrow::Type::STRING: {
          auto value = doc.get<std::vector<std::string>>(field->name());
          if (!value.has_value()) {
            return list_builder->AppendNull();
          }

          const auto &list_value = value.value();
          auto string_builder = dynamic_cast<arrow::StringBuilder *>(
              list_builder->value_builder());

          ARROW_RETURN_NOT_OK(list_builder->Append());
          for (const auto &item : list_value) {
            ARROW_RETURN_NOT_OK(string_builder->Append(item));
          }
          break;
        }

        default:
          return arrow::Status::NotImplemented(
              "unsupported list element type: ", value_type);
      }

      return arrow::Status::OK();
    }
    default:
      return arrow::Status::NotImplemented("unsupported type: ", type,
                                           ", field: ", field->name());
  }
}

template <typename ArrowArrayType, typename BuilderType>
inline arrow::Status BuildArrayFromIndices(
    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,
    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,
    std::shared_ptr<arrow::Array> *out_array) {
  BuilderType builder;
  ARROW_RETURN_NOT_OK(
      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));

  int64_t last_chunk_index = -1;
  const ArrowArrayType *cached_chunk{nullptr};

  bool no_null = chunked_array->null_count() == 0;
  for (const auto &pair : indices_in_chunked_array) {
    if (pair.first != last_chunk_index) {
      const auto &chunk = chunked_array->chunk(pair.first);
      cached_chunk = static_cast<const ArrowArrayType *>(chunk.get());
      last_chunk_index = pair.first;
    }

    if (no_null || !cached_chunk->IsNull(pair.second)) {
      ARROW_RETURN_NOT_OK(builder.Append(cached_chunk->Value(pair.second)));
    } else {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
    }
  }

  return builder.Finish(out_array);
}

template <>
inline arrow::Status
BuildArrayFromIndices<arrow::StringArray, arrow::StringBuilder>(
    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,
    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,
    std::shared_ptr<arrow::Array> *out_array) {
  arrow::StringBuilder builder;
  ARROW_RETURN_NOT_OK(
      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));

  bool no_null = chunked_array->null_count() == 0;
  const arrow::StringArray *cached_chunk{nullptr};

  int64_t last_chunk_index = -1;
  int64_t data_size = 0;
  for (const auto &pair : indices_in_chunked_array) {
    if (pair.first != last_chunk_index) {
      const auto &chunk = chunked_array->chunk(pair.first);
      cached_chunk = static_cast<const arrow::StringArray *>(chunk.get());
      last_chunk_index = pair.first;
    }

    if (no_null || !cached_chunk->IsNull(pair.second)) {
      data_size += cached_chunk->Value(pair.second).size();
    }
  }
  ARROW_RETURN_NOT_OK(builder.ReserveData(data_size));


  last_chunk_index = -1;
  for (const auto &pair : indices_in_chunked_array) {
    if (pair.first != last_chunk_index) {
      const auto &chunk = chunked_array->chunk(pair.first);
      cached_chunk = static_cast<const arrow::StringArray *>(chunk.get());
      last_chunk_index = pair.first;
    }

    if (no_null || !cached_chunk->IsNull(pair.second)) {
      ARROW_RETURN_NOT_OK(builder.Append(cached_chunk->Value(pair.second)));
    } else {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
    }
  }

  return builder.Finish(out_array);
}

inline arrow::Status BuildListArrayFromIndices(
    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,
    const std::vector<std::pair<int64_t, int64_t>> &indices_in_chunked_array,
    const std::shared_ptr<arrow::ListType> &list_type,
    std::shared_ptr<arrow::Array> *out_array) {
  std::unique_ptr<arrow::ArrayBuilder> value_builder;
  ARROW_RETURN_NOT_OK(arrow::MakeBuilder(
      arrow::default_memory_pool(), list_type->value_type(), &value_builder));

  arrow::ListBuilder builder(arrow::default_memory_pool(),
                             std::move(value_builder), list_type);
  ARROW_RETURN_NOT_OK(
      builder.Reserve(static_cast<int64_t>(indices_in_chunked_array.size())));

  int64_t last_chunk_index = -1;
  const arrow::ListArray *cached_chunk{nullptr};

  for (const auto &pair : indices_in_chunked_array) {
    if (pair.first != last_chunk_index) {
      const auto &chunk = chunked_array->chunk(pair.first);
      cached_chunk = std::static_pointer_cast<arrow::ListArray>(chunk).get();
      last_chunk_index = pair.first;
    }

    if (cached_chunk->IsValid(pair.second)) {
      auto offset = cached_chunk->value_offset(pair.second);
      auto length = cached_chunk->value_length(pair.second);

      ARROW_RETURN_NOT_OK(builder.Append());
      auto value_builder_ptr = builder.value_builder();
      auto values = cached_chunk->values();
      ARROW_RETURN_NOT_OK(
          value_builder_ptr->AppendArraySlice(*values->data(), offset, length));
    } else {
      ARROW_RETURN_NOT_OK(builder.AppendNull());
    }
  }

  return builder.Finish(out_array);
}

inline arrow::Status BuildArrayFromIndicesWithType(
    const std::shared_ptr<arrow::ChunkedArray> &chunked_array,
    const std::vector<std::pair<int64_t, int64_t>> &indices_in_table,
    std::shared_ptr<arrow::Array> *out_array) {
  auto col_data_type = chunked_array->type();
  switch (col_data_type->id()) {
    case arrow::Type::STRING:
      return BuildArrayFromIndices<arrow::StringArray, arrow::StringBuilder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::INT32:
      return BuildArrayFromIndices<arrow::Int32Array, arrow::Int32Builder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::INT64:
      return BuildArrayFromIndices<arrow::Int64Array, arrow::Int64Builder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::UINT32:
      return BuildArrayFromIndices<arrow::UInt32Array, arrow::UInt32Builder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::UINT64:
      return BuildArrayFromIndices<arrow::UInt64Array, arrow::UInt64Builder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::DOUBLE:
      return BuildArrayFromIndices<arrow::DoubleArray, arrow::DoubleBuilder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::FLOAT:
      return BuildArrayFromIndices<arrow::FloatArray, arrow::FloatBuilder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::BOOL:
      return BuildArrayFromIndices<arrow::BooleanArray, arrow::BooleanBuilder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::BINARY:
      return BuildArrayFromIndices<arrow::BinaryArray, arrow::BinaryBuilder>(
          chunked_array, indices_in_table, out_array);
    case arrow::Type::LIST: {
      auto list_type =
          std::dynamic_pointer_cast<arrow::ListType>(col_data_type);
      return BuildListArrayFromIndices(chunked_array, indices_in_table,
                                       list_type, out_array);
    }
    default:
      return arrow::Status::NotImplemented("Unsupported element type: ",
                                           col_data_type->name().c_str());
  }
}

inline arrow::Status CreateRandomAccessFileByUri(
    const std::string &uri,
    std::shared_ptr<arrow::io::RandomAccessFile> *out_file,
    std::string *out_file_path) {
  std::string path_from_uri, file_path;
  std::shared_ptr<arrow::fs::FileSystem> fs;
  auto maybe_fs = arrow::fs::FileSystemFromUri(uri, &path_from_uri);

  if (maybe_fs.ok()) {
    fs = maybe_fs.ValueOrDie();
    *out_file_path = path_from_uri;
  } else {
    arrow::fs::LocalFileSystemOptions options;
    options.use_mmap = true;
    fs = std::make_shared<arrow::fs::LocalFileSystem>(options);
    if (uri.length() >= 2 && uri.substr(0, 2) == "./") {
      *out_file_path = uri.substr(2);
    } else {
      *out_file_path = uri;
    }
  }

  auto result = fs->OpenInputFile(*out_file_path);
  if (!result.ok()) {
    return result.status();
  }
  *out_file = result.ValueOrDie();
  return arrow::Status::OK();
}

inline std::vector<std::shared_ptr<arrow::Field>> SelectFields(
    const std::shared_ptr<arrow::Schema> &schema,
    const std::vector<std::string> &column_names) {
  std::vector<std::shared_ptr<arrow::Field>> fields;
  for (const auto &name : column_names) {
    if (name == LOCAL_ROW_ID) {
      fields.push_back(arrow::field(LOCAL_ROW_ID, arrow::uint64()));
    } else {
      fields.push_back(schema->field(schema->GetFieldIndex(name)));
    }
  }
  return fields;
}

inline arrow::Result<std::shared_ptr<arrow::Array>> SelectArrayByIndices(
    const std::shared_ptr<arrow::Array> &arr,
    const std::vector<int32_t> &indices) {
  arrow::Int32Builder builder;
  ARROW_RETURN_NOT_OK(builder.AppendValues(indices));
  std::shared_ptr<arrow::Array> indices_array;
  ARROW_RETURN_NOT_OK(builder.Finish(&indices_array));

  return arrow::compute::Take(*arr, *indices_array);
}

inline arrow::Result<std::shared_ptr<arrow::dataset::Dataset>>
ReadBlocksAsDataset(const std::vector<BlockMeta> &scalar_blocks,
                    const std::string &base_path, uint32_t collection_id,
                    bool use_parquet) {
  auto fs = std::make_shared<arrow::fs::LocalFileSystem>();
  auto pool = arrow::default_memory_pool();

  if (scalar_blocks.empty()) {
    return arrow::Status::Invalid("No block metadata provided");
  }

  using ColData = std::pair<std::shared_ptr<arrow::Field>,
                            std::shared_ptr<arrow::ChunkedArray>>;
  std::map<uint64_t, std::map<std::string, ColData>> segments;
  std::map<uint64_t, uint32_t> segment_doc_count;
  std::set<uint64_t> ordered_min_ids;

  for (const auto &block : scalar_blocks) {
    if (block.doc_count_ == 0 || block.columns_.empty()) continue;

    uint64_t start_row = block.min_doc_id_;
    uint32_t expected_count = block.doc_count_;

    std::string filepath = FileHelper::MakeForwardBlockPath(
        base_path, collection_id, block.id_, use_parquet);

    try {
      auto file_info = fs->GetFileInfo(filepath).ValueOrDie();
      auto file = fs->OpenInputFile(file_info.path()).ValueOrDie();

      std::shared_ptr<arrow::Table> table;

      if (use_parquet) {
        std::unique_ptr<parquet::arrow::FileReader> reader;
        reader = parquet::arrow::OpenFile(file, pool).ValueOrDie();
        ARROW_RETURN_NOT_OK(reader->ReadTable(&table));
      } else {
        auto reader =
            arrow::ipc::RecordBatchFileReader::Open(file).ValueOrDie();

        std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
        for (int i = 0; i < reader->num_record_batches(); ++i) {
          auto batch = reader->ReadRecordBatch(i).ValueOrDie();
          batches.push_back(batch);
        }

        table = arrow::Table::FromRecordBatches(batches).ValueOrDie();
      }

      if (segments.find(start_row) == segments.end()) {
        segments[start_row] = {};
        segment_doc_count[start_row] = expected_count;
        ordered_min_ids.insert(start_row);
      }

      for (int i = 0; i < table->num_columns(); ++i) {
        const auto &field = table->schema()->field(i);
        auto original_chunked_array = table->column(i);

        segments[start_row][field->name()] = {field, original_chunked_array};
      }
    } catch (const std::exception &e) {
      return arrow::Status::IOError("Failed to read block ",
                                    std::to_string(block.id_), ": ", e.what());
    }
  }

  if (segments.empty()) {
    return arrow::Status::Invalid("No valid data blocks found");
  }

  std::vector<uint64_t> sorted_starts(ordered_min_ids.begin(),
                                      ordered_min_ids.end());
  std::sort(sorted_starts.begin(), sorted_starts.end());

  std::vector<std::shared_ptr<arrow::Table>> segment_tables;
  for (uint64_t start_row : sorted_starts) {
    auto &col_map = segments[start_row];
    uint32_t count = segment_doc_count[start_row];

    std::vector<std::shared_ptr<arrow::Field>> fields;
    std::vector<std::shared_ptr<arrow::ChunkedArray>> columns;

    for (const auto &kv : col_map) {
      fields.push_back(kv.second.first);
      columns.push_back(kv.second.second);
    }

    auto schema = std::make_shared<arrow::Schema>(fields);
    std::shared_ptr<arrow::Table> table =
        arrow::Table::Make(schema, columns, count);
    if (!table) {
      return arrow::Status::Invalid(
          "Failed to create table from schema and columns");
    }
    segment_tables.push_back(table);
  }

  ARROW_ASSIGN_OR_RAISE(auto final_table,
                        arrow::ConcatenateTables(segment_tables));
  auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(final_table);
  return dataset;
}

inline arrow::Result<std::shared_ptr<arrow::Table>>
EvaluateExpressionWithDataset(
    const std::shared_ptr<arrow::dataset::Dataset> &dataset,
    const std::string &new_column_name, const arrow::compute::Expression &expr,
    const std::shared_ptr<arrow::DataType> &expected_type) {
  auto new_scan_result = dataset->NewScan();
  if (!new_scan_result.ok()) {
    return arrow::Status::Invalid("Failed to create scanner builder");
  }
  auto scanner_builder = std::move(new_scan_result.ValueOrDie());

  arrow::compute::CastOptions cast_options;
  cast_options.to_type = expected_type;
  cast_options.allow_int_overflow = true;
  cast_options.allow_float_truncate = true;
  arrow::Expression cast_expr = call("cast", {expr}, cast_options);

  auto status = scanner_builder->Project({cast_expr}, {new_column_name});
  if (!status.ok()) {
    return arrow::Status::Invalid("Failed to project expression: ",
                                  status.ToString());
  }
  auto scanner_result = scanner_builder->Finish();
  if (!scanner_result.ok()) {
    return arrow::Status::Invalid("Failed to finish scanner builder: ",
                                  scanner_result.status().ToString());
  }
  auto scanner = std::move(scanner_result.ValueOrDie());

  auto to_table_result = scanner->ToTable();
  if (!to_table_result.ok()) {
    return arrow::Status::Invalid("Failed to convert scanner to table: ",
                                  to_table_result.status().ToString());
  }
  auto result_table = std::move(to_table_result.ValueOrDie());
  return result_table;
}

inline arrow::Status WriteColumnInBlocks(
    const std::string &column_name,
    const std::shared_ptr<arrow::ChunkedArray> &data,
    const std::vector<BlockMeta> &blocks, const std::string &base_path,
    uint32_t segment_id, std::function<BlockID()> allocate_block_id,
    bool use_parquet, std::vector<BlockMeta> *out) {
  int offset = 0;
  for (const auto &block : blocks) {
    auto slice = data->Slice(offset, block.doc_count_);
    auto field = arrow::field(column_name, slice->type());
    auto physic_schema = arrow::schema({field});
    auto table = arrow::Table::Make(arrow::schema({field}), {slice});

    BlockID block_id = allocate_block_id();
    std::string path = FileHelper::MakeForwardBlockPath(base_path, segment_id,
                                                        block_id, use_parquet);
    auto writer = ChunkedFileWriter::Open(
        path, physic_schema,
        use_parquet ? FileFormat::PARQUET : FileFormat::IPC);
    ARROW_RETURN_NOT_OK(writer->Write(*table));
    ARROW_RETURN_NOT_OK(writer->Close());

    BlockMeta new_block(block_id, BlockType::SCALAR, block.min_doc_id_,
                        block.max_doc_id_, block.doc_count_, {column_name});

    out->push_back(new_block);

    offset += block.doc_count_;
  }
  return arrow::Status::OK();
}

inline int64_t MemorySize(const arrow::RecordBatch &batch) {
  int64_t total = 0;
  for (int i = 0; i < batch.num_columns(); ++i) {
    const auto &array = batch.column(i);
    const auto &data = array->data();
    for (const auto &buffer : data->buffers) {
      if (buffer) {
        total += buffer->size();
      }
    }
  }
  return total;
}

}  // namespace zvec


================================================
FILE: src/db/index/storage/wal/local_wal_file.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "local_wal_file.h"
#include <unistd.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/logger/logger.h>
#include "db/common/error_code.h"
#include "db/common/file_helper.h"
#include "db/common/typedef.h"

#define MAX_RECORD_SIZE 4194304  // 4Mb

namespace zvec {

int LocalWalFile::append(std::string &&data) {
  WalRecord record;
  record.length_ = data.size();
  record.crc_ = ailego::Crc32c::Hash(
      reinterpret_cast<const void *>(data.data()), record.length_, 0);
  record.content_ = std::forward<std::string>(data);

  if (write_record(record) < 0) {
    WLOG_ERROR("Wal write record error. record.length_[%zu]",
               (size_t)record.length_);
    return -1;
  }
  // if max_docs_wal_flush_ is 0, no need flush
  if (max_docs_wal_flush_ != 0 && docs_count_ >= max_docs_wal_flush_) {
    if (!file_.flush()) {
      WLOG_ERROR("Wal flush error. docs_count_[%zu] max_docs_wal_flush_[%zu]",
                 (size_t)docs_count_, (size_t)max_docs_wal_flush_);
    }
    docs_count_ = 0;
  }
  return 0;
}

std::string LocalWalFile::next() {
  WalRecord record;
  if (read_record(record) > 0) {
    uint32_t tmp_crc = ailego::Crc32c::Hash(
        reinterpret_cast<const void *>(record.content_.data()), record.length_,
        0);
    if (tmp_crc == record.crc_) {
      return std::move(record.content_);
    } else {
      WLOG_ERROR(
          "Wal next error. record.length_[%zu] crc_[%zu] != tmp_crc[%zu]",
          (size_t)record.length_, (size_t)record.crc_, (size_t)tmp_crc);
    }
  }
  // end of file or read error
  return std::string();
}

int LocalWalFile::open(const WalOptions &wal_option) {
  CHECK_STATUS(opened_, false);
  if (wal_option.create_new) {
    if (FileHelper::FileExists(wal_path_)) {
      WLOG_ERROR("Wal open error. file already exist create_new[%d]",
                 wal_option.create_new);
      return -1;
    }

    if (!file_.create(wal_path_, false)) {
      WLOG_ERROR("Wal create error. create_new[%d]", wal_option.create_new);
      return -1;
    }

    // write wal header
    int write_size = file_.write((const void *)&header_, sizeof(header_));
    if (write_size != sizeof(header_)) {
      WLOG_ERROR("Wal write header error. create_new[%d]",
                 wal_option.create_new);
      return -1;
    }

  } else {
    if (!FileHelper::FileExists(wal_path_)) {
      WLOG_ERROR("Wal open error. file is not exist create_new[%d]",
                 wal_option.create_new);
      return -1;
    }

    if (!file_.open(wal_path_.c_str(), false)) {
      WLOG_ERROR("Wal open error. create_new[%d]", wal_option.create_new);
      return -1;
    }

    // open default for write
    file_.seek(0, ailego::File::Origin::End);
  }

  max_docs_wal_flush_ = wal_option.max_docs_wal_flush;
  opened_ = true;

  WLOG_INFO("Wal open success. create_new[%d]", wal_option.create_new);
  return 0;
}

int LocalWalFile::close() {
  CHECK_STATUS(opened_, true);
  file_.close();
  WLOG_INFO("Wal close success");
  opened_ = false;
  return 0;
}

int LocalWalFile::remove() {
  if (opened_) {
    close();
  }
  if (FileHelper::FileExists(wal_path_)) {
    FileHelper::RemoveFile(wal_path_);
    WLOG_INFO("Wal remove success.");
  }
  return 0;
}

int LocalWalFile::flush() {
  CHECK_STATUS(opened_, true);
  if (!file_.flush()) {
    WLOG_ERROR("Wal flush error.");
    return -1;
  }
  return 0;
}

int LocalWalFile::prepare_for_read() {
  CHECK_STATUS(opened_, true);
  if (!file_.seek(0, ailego::File::Origin::Begin)) {
    return -1;
  }
  int read_size = file_.read((void *)&header_, sizeof(header_));
  if (read_size != sizeof(header_)) {
    WLOG_ERROR("Wal read header error.");
    return -1;
  }
  if (header_.wal_version != 0UL) {
    WLOG_ERROR("Wal version not support error.");
    return -1;
  }
  return 0;
}

//! Return 1 if success or -1 if write error
int LocalWalFile::write_record(WalRecord &record) {
  CHECK_STATUS(opened_, true);

  int write_size = 0;
  int ret = -1;

  std::lock_guard<std::mutex> lock(file_mutex_);
  do {
    write_size = file_.write((const void *)&record.length_, LENGTH_SIZE);
    if (write_size != LENGTH_SIZE) {
      WLOG_ERROR("Wal write error. record.length_ error write_size[%d]",
                 write_size);
      break;
    }

    write_size = file_.write((const void *)&record.crc_, CRC_SIZE);
    if (write_size != CRC_SIZE) {
      WLOG_ERROR("Wal write error. record.crc_ error write_size[%d]",
                 write_size);
      break;
    }

    write_size =
        file_.write((const void *)record.content_.data(), record.length_);
    if (write_size != (int)record.length_) {
      WLOG_ERROR("Wal write error. record.content_ error write_size[%d]",
                 write_size);
      break;
    }
    ret = 1;  // write one record success
    docs_count_++;
  } while (false);

  return ret;
}

//! Return 1 if success or 0 if eof or -1 if read error
int LocalWalFile::read_record(WalRecord &record) {
  CHECK_STATUS(opened_, true);

  int read_size = 0;
  std::string err_msg;
  int ret = -1;

  do {
    read_size =
        file_.read(reinterpret_cast<void *>(&record.length_), LENGTH_SIZE);
    if (read_size == 0) {
      ret = 0;
      WLOG_INFO("Wal read finished. end of file");
      break;
    }

    if (read_size != LENGTH_SIZE) {
      WLOG_ERROR("Wal read error. record.length_ error read_size[%d]",
                 read_size);
      break;
    }

    read_size = file_.read(reinterpret_cast<void *>(&record.crc_), CRC_SIZE);
    if (read_size != CRC_SIZE) {
      WLOG_ERROR("Wal read error. record.crc_ error read_size[%d]", read_size);
      break;
    }

    // resize may crash if record.length_ very large
    if (record.length_ <= 0 || record.length_ > MAX_RECORD_SIZE) {
      WLOG_ERROR("Wal read error. record.length_ value error read_size[%d]",
                 read_size);
      break;
    }

    record.content_.resize(record.length_);
    read_size = file_.read((void *)const_cast<char *>(record.content_.data()),
                           record.length_);
    if (read_size != (int)record.length_) {
      WLOG_ERROR("Wal read error. record.content_ error read_size[%d]",
                 read_size);
      break;
    }
    ret = 1;  // read one record success
  } while (false);

  return ret;
}

};  // namespace zvec

================================================
FILE: src/db/index/storage/wal/local_wal_file.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <atomic>
#include <condition_variable>
#include <deque>
#include <fstream>
#include <mutex>
#include <thread>
#include <unordered_map>
#include <zvec/ailego/io/file.h>
#include "wal_file.h"

namespace zvec {

/*
 * Wal Header info
 */
struct WalHeader {
  uint64_t wal_version{0U};
  uint64_t reserved_[7];
};

static_assert(sizeof(WalHeader) % 64 == 0,
              "Wal Header must be aligned with 64 bytes");

class WalRecord {
 public:
  uint32_t length_;
  uint32_t crc_;
  std::string content_;
};

class LocalWalFile : public WalFile {
 public:
  LocalWalFile(const LocalWalFile &) = delete;
  LocalWalFile &operator=(const LocalWalFile &) = delete;

  //! Constructor
  LocalWalFile(const std::string &wal_path) : wal_path_(wal_path) {}

  //! Destructor
  ~LocalWalFile() {
    if (opened_) {
      close();
    }
  }

 public:
  int append(std::string &&data) override;
  int prepare_for_read() override;
  std::string next() override;

 public:
  int open(const WalOptions &wal_option) override;

  int close() override;

  int flush() override;

  int remove() override;

  bool has_record() override {
    return file_.size() > sizeof(header_);
  }

 private:
  int write_record(WalRecord &record);
  int read_record(WalRecord &record);

 private:
  ailego::File file_;
  const static int32_t LENGTH_SIZE{4};
  const static int32_t CRC_SIZE{4};

 private:
  std::string wal_path_{};
  std::mutex file_mutex_;
  uint32_t max_docs_wal_flush_{0};
  std::atomic<uint64_t> docs_count_{0UL};
  WalHeader header_;

  bool opened_{false};
};


};  // namespace zvec


================================================
FILE: src/db/index/storage/wal/wal_file.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "wal_file.h"
#include "local_wal_file.h"

namespace zvec {

WalFilePtr WalFile::Create(const std::string &wal_path) {
  return std::make_shared<LocalWalFile>(wal_path);
}

int WalFile::CreateAndOpen(const std::string &wal_path,
                           const WalOptions &wal_options,
                           WalFilePtr *wal_file) {
  *wal_file = std::make_shared<LocalWalFile>(wal_path);

  return (*wal_file)->open(wal_options);
}


};  // namespace zvec

================================================
FILE: src/db/index/storage/wal/wal_file.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <vector>


namespace zvec {

class WalFile;

using WalFilePtr = std::shared_ptr<WalFile>;

struct WalOptions {
  uint32_t max_docs_wal_flush{0};
  bool create_new{false};
};

class WalFile {
 public:
  //! Constructor
  WalFile() {}

  //! Destructor
  virtual ~WalFile() {}  // LCOV_EXCL_LINE

  //! Create an instance
  static WalFilePtr Create(const std::string &wal_path);

  //! Crate an instance and open
  static int CreateAndOpen(const std::string &wal_path,
                           const WalOptions &wal_options, WalFilePtr *wal_file);

 public:
  virtual int append(std::string &&data) = 0;
  virtual int prepare_for_read() = 0;
  virtual std::string next() = 0;

 public:
  //! Open and initialize WalFile
  virtual int open(const WalOptions &wal_options) = 0;

  //! Close WalFile
  virtual int close() = 0;

  //! Remove wal disk file
  virtual int remove() = 0;

  //! Flush WalFile's memory to disk file
  virtual int flush() = 0;

  virtual bool has_record() = 0;
};

};  // namespace zvec

================================================
FILE: src/db/proto/zvec.proto
================================================
syntax = "proto3";

package zvec.proto;

option cc_enable_arenas = true;

// The Go package name, refers to
// https://developers.google.com/protocol-buffers/docs/reference/go-generated#package
option go_package = "proxima/zvec/proto";

/*! Types of Data
 */
enum DataType {
  DT_UNDEFINED = 0;

  DT_BINARY = 1;
  DT_STRING = 2;
  DT_BOOL = 3;
  DT_INT32 = 4;
  DT_INT64 = 5;
  DT_UINT32 = 6;
  DT_UINT64 = 7;
  DT_FLOAT = 8;
  DT_DOUBLE = 9;

  DT_VECTOR_BINARY32 = 20;
  DT_VECTOR_BINARY64 = 21;
  DT_VECTOR_FP16 = 22;
  DT_VECTOR_FP32 = 23;
  DT_VECTOR_FP64 = 24;
  DT_VECTOR_INT4 = 25;
  DT_VECTOR_INT8 = 26;
  DT_VECTOR_INT16 = 27;

  DT_SPARSE_VECTOR_FP16 = 30;
  DT_SPARSE_VECTOR_FP32 = 31;

  // ARRAY
  DT_ARRAY_BINARY = 40;
  DT_ARRAY_STRING = 41;
  DT_ARRAY_BOOL = 42;
  DT_ARRAY_INT32 = 43;
  DT_ARRAY_INT64 = 44;
  DT_ARRAY_UINT32 = 45;
  DT_ARRAY_UINT64 = 46;
  DT_ARRAY_FLOAT = 47;
  DT_ARRAY_DOUBLE = 48;
};

enum IndexType {
  // Undefined
  IT_UNDEFINED = 0;
  // Proxima HNSW Index
  IT_HNSW = 1;
  // Proxima IVF Index
  IT_IVF = 2;
  // Proxima FLAT Index
  IT_FLAT = 3;
  // Proxima HNSW RABITQ Index
  IT_HNSW_RABITQ = 4;
  // Invert Index
  IT_INVERT = 10;
};

enum QuantizeType {
  QT_UNDEFINED = 0;
  QT_FP16 = 1;
  QT_INT8 = 2;
  QT_INT4 = 3;
  QT_RABITQ = 4;
};

enum MetricType {
  MT_UNDEFINED = 0;
  MT_L2 = 1;
  MT_IP = 2;
  MT_COSINE = 3;
};

message InvertIndexParams {
  bool enable_range_optimization = 1;
};

message BaseIndexParams {
  MetricType metric_type = 1;
  QuantizeType quantize_type = 2;
};

message HnswIndexParams {
  BaseIndexParams base = 1;
  int32 m = 2;
  int32 ef_construction = 3;
}

message HnswRabitqIndexParams {
  BaseIndexParams base = 1;
  int32 m = 2;
  int32 ef_construction = 3;
  int32 total_bits = 4;
  int32 num_clusters = 5;
  int32 sample_count = 6;
}

message FlatIndexParams {
  BaseIndexParams base = 1;
}

message IVFIndexParams {
  BaseIndexParams base = 1;
  int32 n_list = 2;
  int32 n_iters = 3;
  bool use_soar = 4;
}

message IndexParams {
  oneof params {
    InvertIndexParams invert = 1;
    HnswIndexParams hnsw = 2;
    FlatIndexParams flat = 3;
    IVFIndexParams ivf = 4;
    HnswRabitqIndexParams hnsw_rabitq = 5;
  };
};

message FieldSchema {
  string name = 1;
  DataType data_type = 2;
  uint32 dimension = 3;
  bool nullable = 4;
  IndexParams index_params = 5;
};

message CollectionSchema {
  string name = 1;
  repeated FieldSchema fields = 2;
  uint64 max_doc_count_per_segment = 3;
};

enum BlockType {
  BT_UNDEFINED = 0;
  BT_SCALAR = 1;
  BT_SCALAR_INDEX = 2;
  BT_VECTOR_INDEX = 3;
  BT_VECTOR_INDEX_QUANTIZE = 4;
};

message BlockMeta {
  uint32 block_id = 1;
  BlockType block_type = 2;  // for getting filename prefix
  uint64 min_doc_id = 3;
  uint64 max_doc_id = 4;
  uint64 doc_count = 5;
  repeated string columns = 6;  // columns contained in this block
};

// message AlterColumnMeta {
//   string old_column_name = 1;
//   FieldSchema new_schema = 2;
// };

message SegmentMeta {
  uint32 segment_id = 1;
  // scalar data, vector data and vector index
  repeated BlockMeta persisted_blocks = 2;

  BlockMeta writing_forward_block = 3;

  // if indexed, index_params can be retrieved from schema
  // if not indexed, index_params is default index_params(flat)
  repeated string indexed_vector_fields = 4;
  // repeated AlterColumnMeta alter_columns = 10;
};

message Manifest {
  uint32 version = 1;

  CollectionSchema schema = 2;

  bool enable_mmap = 3;

  repeated SegmentMeta persisted_segment_metas = 4;

  SegmentMeta writing_segment_meta = 5;

  uint32 id_map_path_suffix = 6;
  uint32 delete_snapshot_path_suffix = 7;

  uint32 next_segment_id = 8;
};

================================================
FILE: src/db/sqlengine/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_library(
    NAME zvec_sqlengine STATIC STRICT
    SRCS *.cc common/*.cc antlr/gen/*.cc parser/*.cc analyzer/*.cc planner/*.cc planner/ops/*.cc planner/physical_rules/*.cc
    LIBS zvec_index
         zvec_common
         antlr4
         Arrow::arrow_acero
    INCS . ${PROJECT_ROOT_DIR}/src
    VERSION "${PROXIMA_ZVEC_VERSION}"
  )


================================================
FILE: src/db/sqlengine/analyzer/query_analyzer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_analyzer.h"
#include <cstddef>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/db/config.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"
#include "db/common/error_code.h"
#include "db/index/common/type_helper.h"
#include "db/sqlengine/analyzer/query_node.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/parser/select_info.h"
#include "query_info_helper.h"
#include "simple_rewriter.h"

namespace zvec::sqlengine {

const std::map<NodeOp, QueryNodeOp> QueryAnalyzer::opMap_ = {
    {NodeOp::T_AND, QueryNodeOp::Q_AND},
    {NodeOp::T_OR, QueryNodeOp::Q_OR},
    {NodeOp::T_EQ, QueryNodeOp::Q_EQ},
    {NodeOp::T_NE, QueryNodeOp::Q_NE},
    {NodeOp::T_GT, QueryNodeOp::Q_GT},
    {NodeOp::T_GE, QueryNodeOp::Q_GE},
    {NodeOp::T_LT, QueryNodeOp::Q_LT},
    {NodeOp::T_LE, QueryNodeOp::Q_LE},
    {NodeOp::T_LIKE, QueryNodeOp::Q_LIKE},
    {NodeOp::T_IN, QueryNodeOp::Q_IN},
    {NodeOp::T_CONTAIN_ALL, QueryNodeOp::Q_CONTAIN_ALL},
    {NodeOp::T_CONTAIN_ANY, QueryNodeOp::Q_CONTAIN_ANY},
    {NodeOp::T_PLUS, QueryNodeOp::Q_PLUS},
    {NodeOp::T_MINUS, QueryNodeOp::Q_MINUS},
    {NodeOp::T_MUL, QueryNodeOp::Q_MUL},
    {NodeOp::T_DIV, QueryNodeOp::Q_DIV},
    {NodeOp::T_FUNCTION_CALL, QueryNodeOp::Q_FUNCTION_CALL},
    {NodeOp::T_RANGE_VALUE, QueryNodeOp::Q_RANGE_VALUE},
    {NodeOp::T_LIST_VALUE, QueryNodeOp::Q_LIST_VALUE},
    {NodeOp::T_VECTOR_MATRIX_VALUE, QueryNodeOp::Q_VECTOR_MATRIX_VALUE},
    {NodeOp::T_INT_VALUE, QueryNodeOp::Q_INT_VALUE},
    {NodeOp::T_FLOAT_VALUE, QueryNodeOp::Q_FLOAT_VALUE},
    {NodeOp::T_STRING_VALUE, QueryNodeOp::Q_STRING_VALUE},
    {NodeOp::T_NULL_VALUE, QueryNodeOp::Q_NULL_VALUE},
    {NodeOp::T_ID, QueryNodeOp::Q_ID},
    {NodeOp::T_BOOL_VALUE, QueryNodeOp::Q_BOOL_VALUE},
    {NodeOp::T_IS_NULL, QueryNodeOp::Q_IS_NULL},
    {NodeOp::T_IS_NOT_NULL, QueryNodeOp::Q_IS_NOT_NULL},
};

Result<QueryInfo::Ptr> QueryAnalyzer::analyze(const CollectionSchema &schema,
                                              SQLInfo::Ptr sql_info) {
  // create query_info from sql_info. The purpose:
  // 1. Keep module isolated
  // 2. Everything in sql_info should be read-only, any potential changes
  // should apply to query_info. Especially for changes about syntax
  // optimization applied to query_info.
  // 3. add more necessary information and more analyzing
  // result to QueryInfo, so as to ease plan and execution.
  auto query_info_ret = create_queryinfo_from_sqlinfo(schema, *sql_info);
  if (!query_info_ret) {
    return query_info_ret;
  }
  auto query_info = std::move(query_info_ret.value());

  // select list check
  for (auto &query_field_info : query_info->query_fields()) {
    const std::string &field_name = query_field_info->field_name();
    auto forward_field = schema.get_field(field_name);
    if (!forward_field) {
      return tl::make_unexpected(
          Status::InvalidArgument(field_name, " not defined in schema"));
    }

    // set forward field info as reference
    query_field_info->set_field_schema_ptr(forward_field);

    // add forward field info
    query_info->add_select_item_schema_ptr(field_name, forward_field);
  }

  // condition check & decide index/filter condition
  if (query_info->search_cond() != nullptr) {
    // rewrite query by  rule
    SimpleRewriter rewriter;
    rewriter.rewrite(query_info.get());

    SearchCondCheckWalker search_cond_check_walker(schema);
    search_cond_check_walker.traverse_cond_node(query_info->search_cond());
    if (!search_cond_check_walker.err_msg().empty()) {
      return tl::make_unexpected(
          Status::NotSupported(search_cond_check_walker.err_msg()));
    }

    size_t num_of_filters = search_cond_check_walker.filter_rels().size() +
                            search_cond_check_walker.invert_rels().size();
    if (num_of_filters > kMaxNumOfFilters) {
      return tl::make_unexpected(
          Status::NotSupported("max number of filters is "
                               "limited to 4096"));
    }

    auto st = decide_filter_index_cond(schema, search_cond_check_walker,
                                       query_info.get());
    if (!st.ok()) {
      return tl::make_unexpected(
          Status::InternalError("decide_filter_index_cond failed"));
    }
    // add forward filter meta according to final result
    auto status = set_forward_filter_meta(schema, query_info.get(),
                                          query_info->filter_cond().get());
    if (!status.ok()) {
      return tl::make_unexpected(status);
    }

    // for special feature: post filtering, move filters to post filters
    if (query_info->vector_cond_info() &&
        query_info->vector_cond_info()->post_filter_topk() > 0) {
      query_info->set_post_invert_cond(query_info->invert_cond());
      query_info->set_invert_cond(nullptr);
      query_info->set_post_filter_cond(query_info->filter_cond());
      query_info->set_filter_cond(nullptr);
      LOG_DEBUG("post filter is applied. %u",
                query_info->vector_cond_info()->post_filter_topk());
    }
  }

  // orderby list check
  for (auto &query_orderby_info : query_info->query_orderbys()) {
    const std::string &field_name = query_orderby_info->field_name();
    auto forward_field = schema.get_forward_field(field_name);

    if (forward_field == nullptr) {
      return tl::make_unexpected(
          Status::InvalidArgument(field_name, " not defined in schema"));
    }

    if (forward_field->is_array_type()) {
      return tl::make_unexpected(Status::InvalidArgument(
          "order by fields should not be array data type"));
    }

    // set forward field info as reference
    query_orderby_info->set_field_schema_ptr(forward_field);

    // add forward field info
    query_info->add_orderby_item_schema_ptr(field_name, forward_field);
  }

  // group by check
  if (const auto &group = query_info->group_by(); group != nullptr) {
    if (!query_info->vector_cond_info()) {
      return tl::make_unexpected(
          Status::InvalidArgument("group by should has vector query"));
    }
    if (!query_info->query_orderbys().empty()) {
      return tl::make_unexpected(
          Status::InvalidArgument("group by not "
                                  "support order by forward"));
    }
    auto forward_field = schema.get_forward_field(group->group_by_field);
    if (!forward_field) {
      return tl::make_unexpected(Status::InvalidArgument(
          group->group_by_field, "not defined in schema"));
    }
    if (forward_field->is_array_type()) {
      return tl::make_unexpected(
          Status::InvalidArgument("group by fields "
                                  "should not be array data type"));
    }
    if (forward_field->is_vector_field()) {
      return tl::make_unexpected(
          Status::InvalidArgument("group by fields "
                                  "should not be vector data type"));
    }
    query_info->set_group_by_schema_ptr(forward_field);
  }
  return query_info;
}

Status QueryAnalyzer::set_forward_filter_meta(const CollectionSchema &schema,
                                              QueryInfo *query_info,
                                              QueryNode *filter_cond) {
  if (filter_cond == nullptr) {
    return Status::OK();
  }

  if (filter_cond->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {
    QueryNode *left_node = filter_cond->left().get();
    QueryNode *right_node = filter_cond->right().get();
    if (filter_cond->left() != nullptr) {
      auto ret = set_forward_filter_meta(schema, query_info, left_node);
      if (!ret.ok()) {
        return ret;
      }
    }
    if (filter_cond->right() != nullptr) {
      return set_forward_filter_meta(schema, query_info, right_node);
    }
    return Status::OK();
  }

  QueryRelNode *query_rel_node = reinterpret_cast<QueryRelNode *>(filter_cond);
  query_rel_node->set_forward();
  std::string forward_field_name;
  auto *left_node = query_rel_node->left_node();
  if (left_node->op() == QueryNodeOp::Q_ID) {
    forward_field_name = left_node->text();
  } else if (left_node->op() == QueryNodeOp::Q_FUNCTION_CALL) {
    const QueryFuncNode *func_node =
        dynamic_cast<const QueryFuncNode *>(left_node);
    const auto &arguments = func_node->arguments();
    auto func_name = func_node->get_func_name();
    if (func_name == kFuncArrayLength) {
      forward_field_name = arguments[0]->text();
    } else {
      return Status::NotSupported("function ", func_name, " is not supported");
    }
  } else {
    return Status::NotSupported("left node ", left_node->op(),
                                " is not supported");
  }
  auto forward_field = schema.get_forward_field(forward_field_name);
  if (forward_field == nullptr) {
    return Status::InvalidArgument(forward_field_name, " not found in schema");
  }
  if (forward_field->has_invert_index()) {
    // invert condition to forward condition
    QueryNode *right_node =
        std::dynamic_pointer_cast<QueryNode>(query_rel_node->right()).get();
    // Revert numeric buf to numeric text
    QueryInfoHelper::constant_node_data_buf_2_text(
        forward_field->element_data_type(), forward_field->is_array_type(),
        right_node);
  }

  // forward_field is nullptr for schema free field
  query_info->add_forward_filter_schema_ptr(forward_field_name, forward_field);
  return Status::OK();
}

// decide filter or index condition according to data collected from
// search_cond_check_walker
Status QueryAnalyzer::decide_filter_index_cond(
    const CollectionSchema &schema,
    const SearchCondCheckWalker &search_cond_check_walker,
    QueryInfo *query_info) {
  const std::vector<QueryRelNode *> &filter_rels =
      search_cond_check_walker.filter_rels();
  const std::vector<QueryRelNode *> &invert_rels =
      search_cond_check_walker.invert_rels();
  QueryRelNode *vector_rel = search_cond_check_walker.vector_rel();
  uint32_t vector_rel_size = (vector_rel != nullptr) ? 1 : 0;
  uint32_t invert_size = (uint32_t)invert_rels.size();
  uint32_t filter_size = (uint32_t)filter_rels.size();

  LOG_DEBUG("vector_rel_size[%u] invert[%u] filter[%u]", vector_rel_size,
            invert_size, filter_size);

  // sanity check
  // check if all invert conds exist in one sub-tree, if yes,
  // move the sub-tree as final invert cond for query.
  if (invert_size > 0) {
    QueryNode *invert_subroot =
        get_invert_subroot(query_info->search_cond().get());
    if (invert_subroot != nullptr) {
      LOG_DEBUG(
          "all invert conds are under one sub-root, invert query applied. "
          "[%s]",
          invert_subroot->text().c_str());
      query_info->set_invert_cond(
          invert_subroot->detach_from_search_cond(query_info));
    }
  }

  if (vector_rel_size > 0) {
    if (vector_rel->or_ancestor()) {
      return Status::InvalidArgument(
          "vector condition must NOT be OR ancestor.");
    }
    std::shared_ptr<QueryInfo::QueryVectorCondInfo> vector_cond_info;
    Status st = check_and_convert_vector(schema, vector_rel, &vector_cond_info);
    if (!st.ok()) {
      return st;
    }
    vector_rel->detach_from_search_cond(query_info);
    query_info->set_vector_cond_info(std::move(vector_cond_info));
  }

  // after set invert and vector well, the left conds are filter cond if any
  if (query_info->search_cond() != nullptr) {
    if (filter_size != 0) {  // optimize
      query_info->set_filter_cond(query_info->search_cond());
    }
    // after above steps, all conds are moved to vector/invert/forward,
    // so clear search cond finally.
    query_info->set_search_cond(nullptr);
  }

  return Status::OK();
}

QueryNode *QueryAnalyzer::get_invert_subroot(QueryNode *search_cond) {
  SubRootResult subroot_result;
  std::function<bool(QueryRelNode * node)> rule = [](QueryRelNode *rel_node) {
    return rel_node->is_invert();
  };
  QueryInfoHelper::find_subroot_by_rule(search_cond, rule, &subroot_result);
  return subroot_result.subroot;
}

Result<QueryInfo::Ptr> QueryAnalyzer::create_queryinfo_from_sqlinfo(
    const CollectionSchema &schema, const SQLInfo &sql_info) {
  QueryInfo::Ptr query_info = std::make_shared<QueryInfo>();

  if (sql_info.type() != SQLInfo::SQLType::SELECT) {
    return tl::make_unexpected(
        Status::NotSupported("only select is "
                             "supported"));
  }

  SelectInfo::Ptr select_info =
      std::dynamic_pointer_cast<SelectInfo>(sql_info.base_info());
  if (select_info == nullptr) {
    return tl::make_unexpected(Status::InternalError("select_info is null"));
  }

  // copy search and filter
  std::string err;
  query_info->set_search_cond(
      create_querynode_from_node(select_info->search_cond(), 0, &err));
  if (!err.empty()) {
    return tl::make_unexpected(
        Status::InternalError("create querynode from node failed: ", err));
  }

  // set select element info
  for (const auto &select_elem_info : select_info->selected_elems()) {
    if (select_elem_info->is_empty()) {
      continue;  // leave query_field to be null
    }

    if (select_elem_info->is_asterisk()) {
      query_info->set_asterisk(true);
      for (auto &forward_field : schema.forward_fields()) {
        if (!zvec::FieldSchema::is_vector_field(
                forward_field->element_data_type())) {
          query_info->add_query_field(std::make_shared<QueryFieldInfo>(
              forward_field->name(), "", "", "", false));
        }
      }
      continue;
    }

    query_info->add_query_field(std::make_shared<QueryFieldInfo>(
        select_elem_info->field_name(), select_elem_info->alias(),
        select_elem_info->func_name(), select_elem_info->func_param(),
        select_elem_info->is_func_param_asterisk()));
  }

  if (select_info->include_vector()) {
    query_info->set_include_vector(true);
    for (auto &index_field : schema.vector_fields()) {
      if (!query_info->exists_in_query_fields(index_field->name())) {
        query_info->add_query_field(std::make_shared<QueryFieldInfo>(
            index_field->name(), "", "", "", false));
      }
    }
  }
  query_info->set_include_doc_id(select_info->is_include_doc_id());

  // set order by element info
  for (auto &orderby_elem_info : select_info->orderby_elems()) {
    query_info->add_query_orderby(std::make_shared<QueryOrderbyInfo>(
        orderby_elem_info->field_name(), orderby_elem_info->is_desc()));
  }

  // set topN
  if (select_info->limit() > 0) {
    query_info->set_query_topn(select_info->limit());
  } else {
    query_info->set_query_topn(DEFAULT_TOPN);
  }

  // set group by
  query_info->set_group_by(select_info->group_by());

  return query_info;
}

QueryNode::Ptr QueryAnalyzer::create_querynode_from_node(const Node::Ptr &node,
                                                         uint32_t level,
                                                         std::string *err) {
  QueryNode::Ptr query_node = nullptr;

  if (node == nullptr) {
    return nullptr;
  }

  // copy subclass object according to node op
  if (node->type() == Node::NodeType::REL_EXPR) {
    // REL_EXPR include T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_LIKE, T_IN,
    // T_CONTAIN_ALL, T_CONTAIN_ANY, T_IS_NULL, T_IS_NOT_NULL
    // use type == REL_EXPR to simplify */
    query_node = std::make_shared<QueryRelNode>();
  } else {
    if (node->op() == NodeOp::T_INT_VALUE ||
        node->op() == NodeOp::T_FLOAT_VALUE ||
        node->op() == NodeOp::T_STRING_VALUE ||
        node->op() == NodeOp::T_NULL_VALUE ||
        node->op() == NodeOp::T_BOOL_VALUE) {
      ConstantNode::Ptr constant_node =
          std::dynamic_pointer_cast<ConstantNode>(node);
      query_node = std::make_shared<QueryConstantNode>(constant_node->value());
    } else if (node->op() == NodeOp::T_ID) {
      IDNode::Ptr id_node = std::dynamic_pointer_cast<IDNode>(node);
      query_node = std::make_shared<QueryIDNode>(id_node->value());
    } else if (node->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {
      VectorMatrixNode::Ptr vector_node =
          std::dynamic_pointer_cast<VectorMatrixNode>(node);
      query_node =
          std::make_shared<QueryVectorMatrixNode>(std::move(vector_node));
    } else if (node->op() == NodeOp::T_FUNCTION_CALL) {
      FuncNode::Ptr func_node = std::dynamic_pointer_cast<FuncNode>(node);
      QueryFuncNode::Ptr query_func_node = std::make_shared<QueryFuncNode>();
      query_func_node->set_func_name_node(create_querynode_from_node(
          func_node->get_func_name_node(), level + 1, err));
      for (auto argument : func_node->arguments()) {
        query_func_node->add_argument(
            create_querynode_from_node(argument, level + 1, err));
      }
      query_node = std::move(query_func_node);
    } else if (node->op() == NodeOp::T_LIST_VALUE) {
      InValueExprListNode::Ptr in_value_expr_list_node =
          std::dynamic_pointer_cast<InValueExprListNode>(node);
      QueryListNode::Ptr query_in_value_expr_node =
          std::make_shared<QueryListNode>();

      for (auto in_value_expr : in_value_expr_list_node->in_value_expr_list()) {
        query_in_value_expr_node->add_value_expr(
            create_querynode_from_node(in_value_expr, level, err));
      }
      query_in_value_expr_node->set_exclude(in_value_expr_list_node->exclude());
      query_node = std::move(query_in_value_expr_node);
    } else { /* others are normal Node */
      query_node = std::make_shared<QueryNode>();
    }
  }

  if (query_node == nullptr) {
    *err = "node op is not handled. " + node->type_to_str(node->op());
    return nullptr;
  }

  // copy nodeOp
  QueryNodeOp query_node_op = nodeop_2_query_nodeop(node->op());
  if (query_node_op == QueryNodeOp::Q_NONE) {
    *err = "cannot find query node op " + Node::type_to_str(node->op());
    return nullptr;
  }
  query_node->set_op(query_node_op);

  // set & increment level
  query_node->set_level(level++);

  // copy left & right
  if (node->left() != nullptr) {
    query_node->set_left(create_querynode_from_node(node->left(), level, err));
  }
  if (node->right() != nullptr) {
    query_node->set_right(
        create_querynode_from_node(node->right(), level, err));
  }

  return query_node;
}

QueryNodeOp QueryAnalyzer::nodeop_2_query_nodeop(NodeOp op) {
  auto iter = opMap_.find(op);
  if (iter == opMap_.end()) {
    return QueryNodeOp::Q_NONE;
  }
  return iter->second;
}

Status QueryAnalyzer::check_and_convert_vector(
    const CollectionSchema &schema, const QueryRelNode *query_rel_node,
    std::shared_ptr<QueryInfo::QueryVectorCondInfo> *vector_cond) {
  const QueryNode::Ptr &vector_field_node = query_rel_node->left();
  const auto &vector_field_name = vector_field_node->text();

  auto vector_meta = schema.get_vector_field(vector_field_name);
  if (vector_meta == nullptr) {
    return Status::InvalidArgument("vector field not found:",
                                   vector_field_name);
  }

  std::string vector_term;
  uint32_t dimension = vector_meta->dimension();
  std::string vector_sparse_indices;
  std::string vector_sparse_values;
  QueryParams::Ptr query_params;

  const QueryNode::Ptr &vector_value_node = query_rel_node->right();

  // for pb request
  if (vector_value_node->op() == QueryNodeOp::Q_VECTOR_MATRIX_VALUE) {
    // for format vector = [,,,]
    const QueryVectorMatrixNode::Ptr &vector_node =
        std::dynamic_pointer_cast<QueryVectorMatrixNode>(vector_value_node);
    // we only have vector matrix, other info is not available
    vector_term = vector_node->matrix();
    vector_sparse_indices = vector_node->sparse_indices();
    vector_sparse_values = vector_node->sparse_values();
    query_params = vector_node->query_params();
  } else {
    return Status::InvalidArgument("invalid vector value node. op[",
                                   vector_value_node->op_name(), "], text[",
                                   vector_value_node->text(), "]");
  }

  auto core_data_type =
      DataTypeCodeBook::to_data_type(vector_meta->data_type());
  if (core_data_type == core::IndexMeta::DataType::DT_UNDEFINED) {
    return Status::InvalidArgument("invalid data type:",
                                   (int)vector_meta->data_type());
  }

  *vector_cond = std::make_shared<QueryInfo::QueryVectorCondInfo>(
      vector_meta, vector_term, core_data_type, dimension,
      std::move(vector_sparse_indices), std::move(vector_sparse_values),
      std::move(query_params));
  return Status::OK();
}


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_analyzer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <memory>
#include <string>
#include <zvec/db/status.h>
#include "db/sqlengine/parser/sql_info.h"
#include "query_info.h"
#include "query_node_walker.h"

namespace zvec::sqlengine {

class QueryAnalyzer {
 public:
  QueryAnalyzer() = default;

  Result<QueryInfo::Ptr> analyze(const CollectionSchema &schema,
                                 SQLInfo::Ptr sql_info);
  const std::string &err_msg();
  int err_code();

 private:
  Result<QueryInfo::Ptr> create_queryinfo_from_sqlinfo(
      const CollectionSchema &schema, const SQLInfo &sql_info);
  QueryNode::Ptr create_querynode_from_node(const Node::Ptr &node,
                                            uint32_t level, std::string *err);
  QueryNodeOp nodeop_2_query_nodeop(NodeOp op);
  Status decide_filter_index_cond(
      const CollectionSchema &schema,
      const SearchCondCheckWalker &search_cond_check_walker,
      QueryInfo *query_info);
  QueryNode *get_invert_subroot(QueryNode *node);
  Status check_and_convert_vector(
      const CollectionSchema &schema, const QueryRelNode *query_rel_node,
      std::shared_ptr<QueryInfo::QueryVectorCondInfo> *vector_cond);

  Status set_forward_filter_meta(const CollectionSchema &schema,
                                 QueryInfo *query_info, QueryNode *filter_cond);

 private:
  static const std::map<NodeOp, QueryNodeOp> opMap_;
  static const int DEFAULT_TOPN = 20;
  static const size_t kMaxNumOfFilters = 4096;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_field_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_field_info.h"

namespace zvec::sqlengine {

std::string QueryFieldInfo::to_string() const {
  std::string str = "";
  if (is_func_call()) {
    if (is_func_param_asterisk()) {
      str += func_name_ + "(*)";
    } else {
      str += func_name_ + "(" + func_param_ + ")";
    }
  } else {
    str = field_name_;
    if (!alias_.empty()) {
      str += " as " + alias_;
    }
  }

  return str;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_field_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <zvec/db/schema.h>

namespace zvec::sqlengine {

class QueryFieldInfo {
 public:
  using Ptr = std::shared_ptr<QueryFieldInfo>;

  QueryFieldInfo() {}

  QueryFieldInfo(const std::string &m_field_name, const std::string &m_alias,
                 const std::string &m_func_name,
                 const std::string &m_func_param, bool m_func_param_asterisk)
      : field_name_(m_field_name),
        alias_(m_alias),
        func_name_(m_func_name),
        func_param_(m_func_param),
        func_param_asterisk_(m_func_param_asterisk) {}

  ~QueryFieldInfo() {}

  void set_field_name(const std::string &value) {
    field_name_ = value;
  }

  const std::string &field_name() const {
    return field_name_;
  }

  void set_alias(const std::string &value) {
    alias_ = value;
  }
  const std::string &alias() const {
    return alias_;
  }

  const std::string &func_name() const {
    return func_name_;
  }

  void set_func_name(const std::string &value) {
    func_name_ = value;
  }

  const std::string &func_param() const {
    return func_param_;
  }

  void set_func_param(const std::string &value) {
    func_param_ = value;
  }

  bool is_func_call() const {
    return (!func_name_.empty());
  }

  void set_func_param_asterisk(bool value) {
    func_param_asterisk_ = value;
  }
  bool is_func_param_asterisk() const {
    return func_param_asterisk_;
  }

  void set_field_schema_ptr(const zvec::FieldSchema *field_schema_ptr) {
    field_schema_ptr_ = field_schema_ptr;
  }
  const zvec::FieldSchema *field_schema_ptr() {
    return field_schema_ptr_;
  }


  std::string to_string() const;

 private:
  std::string field_name_{""};
  std::string alias_{""};

  std::string func_name_{""};
  std::string func_param_{""};
  bool func_param_asterisk_{false};

  const zvec::FieldSchema *field_schema_ptr_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_info.h"
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/string_helper.h>
#include "db/common/constants.h"

namespace zvec::sqlengine {

std::string QueryInfo::to_string() const {
  std::string str = "Query Info: {\n";

  if (!query_fields_.empty()) {
    str += "query_fields: ";
    for (auto iter = query_fields_.begin(); iter != query_fields_.end();
         iter++) {
      if (iter != query_fields_.begin()) {
        str += ", ";
      }
      QueryFieldInfo::Ptr query_field_info_ptr = *iter;
      str += query_field_info_ptr->to_string();
    }
    str += "\n";
  }

  if (!query_orderbys_.empty()) {
    str += "query_orderbys: ";
    for (auto iter = query_orderbys_.begin(); iter != query_orderbys_.end();
         iter++) {
      if (iter != query_orderbys_.begin()) {
        str += ", ";
      }
      QueryOrderbyInfo::Ptr query_orderby_info_ptr = *iter;
      str += query_orderby_info_ptr->to_string();
    }
    str += "\n";
  }

  if (!all_fetched_schema_schemas_.empty()) {
    str += "all_fetched_field_schemas: ";
    for (auto iter = all_fetched_schema_schemas_.begin();
         iter != all_fetched_schema_schemas_.end(); iter++) {
      if (iter != all_fetched_schema_schemas_.begin()) {
        str += ", ";
      }
      str += iter->first;
    }
    str += "\n";
  }

  if (group_by_ != nullptr) {
    str += "group_by: " + group_by_->to_string() + "\n";
  }

  str += "query_topn: " + std::to_string(query_topn_) + " ";
  str += "\n";

  str += "search_cond:\n";
  if (search_cond_ != nullptr) {
    str += search_cond_->text();
    str += "\n";
  }

  str += "vector_cond:\n";
  if (vector_cond_info_ != nullptr) {
    ailego::StringHelper::Append(
        &str, vector_cond_info_->vector_field_name(), "=", "feature(",
        vector_cond_info_->batch() > 1 ? "matrix[[...],...]" : "vector[...]",
        ", ", vector_cond_info_->data_type(), ",", vector_cond_info_->batch(),
        ")(FEATURE",
        vector_cond_info_->vector_sparse_indices().empty() ? ""
                                                           : "_WITH_SPARSE",
        ")\n");
  }

  str += "filter_cond:\n";
  if (filter_cond_ != nullptr) {
    str += filter_cond_->text();
    str += "\n";
  }

  str += "invert_cond:\n";
  if (invert_cond_ != nullptr) {
    str += invert_cond_->text();
    str += "\n";
  }

  str += "}";
  return str;
}

bool QueryInfo::is_filter_unsatisfiable() const {
  if (invert_cond_ && invert_cond_->predictate_result().has_value() &&
      !invert_cond_->predictate_result().value()) {
    return true;
  }
  if (filter_cond_ && filter_cond_->predictate_result().has_value() &&
      !filter_cond_->predictate_result().value()) {
    return true;
  }
  return false;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <zvec/ailego/logger/logger.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/db/schema.h>
#include "db/common/constants.h"
#include "db/sqlengine/common/group_by.h"
#include "query_field_info.h"
#include "query_node.h"
#include "query_orderby_info.h"

namespace zvec::sqlengine {

struct FieldAndSchema {
  FieldAndSchema(std::string field, const FieldSchema *schema)
      : field_name(std::move(field)), field_schema_ptr(schema) {}

  std::string field_name;
  const FieldSchema *field_schema_ptr;
};

class QueryInfo {
 public:
  using Ptr = std::shared_ptr<QueryInfo>;

  class QueryVectorCondInfo {
   public:
    using Ptr = std::shared_ptr<QueryVectorCondInfo>;

    QueryVectorCondInfo(const FieldSchema *vector_schema,
                        const std::string &vector_term,
                        core::IndexMeta::DataType core_data_type, int dimension,
                        std::string vector_sparse_indices,
                        std::string vector_sparse_values,
                        QueryParams::Ptr query_params)
        : vector_schema_(vector_schema),
          vector_term_(vector_term),
          data_type_(core_data_type),
          dimension_(dimension),
          vector_sparse_indices_(std::move(vector_sparse_indices)),
          vector_sparse_values_(std::move(vector_sparse_values)),
          query_params_(std::move(query_params)) {
      auto *vector_params = dynamic_cast<VectorIndexParams *>(
          vector_schema_->index_params().get());
      if (vector_params && vector_params->metric_type() == MetricType::IP) {
        reverse_sort_ = true;
      }
    }

   public:
    std::string vector_field_name() const {
      return vector_schema_->name();
    }

    const FieldSchema *vector_schema() const {
      return vector_schema_;
    }

    const std::string &vector_term() const {
      return vector_term_;
    }

    core::IndexMeta::DataType data_type() const {
      return data_type_;
    }

    uint32_t dimension() const {
      return dimension_;
    }

    uint32_t post_filter_topk() const {
      return 0;
    }

    int batch() const {
      return 1;
    }

    uint32_t sparse_count() const {
      return vector_sparse_indices_.size() / sizeof(uint32_t);
    }

    const std::string &vector_sparse_indices() const {
      return vector_sparse_indices_;
    }

    const std::string &vector_sparse_values() const {
      return vector_sparse_values_;
    }

    bool is_reverse_sort() const {
      return reverse_sort_;
    }

    const QueryParams::Ptr &query_params() const {
      return query_params_;
    }

   private:
    const FieldSchema *vector_schema_{nullptr};
    std::string vector_term_{""};
    core::IndexMeta::DataType data_type_;
    uint32_t dimension_{0};
    std::string vector_sparse_indices_{""};
    std::string vector_sparse_values_{""};
    QueryParams::Ptr query_params_;
    bool reverse_sort_{false};
  };

 public:
  QueryInfo() = default;
  ~QueryInfo() = default;

  void set_search_cond(QueryNode::Ptr value) {
    search_cond_ = std::move(value);
  }

  QueryNode::Ptr search_cond() const {
    return search_cond_;
  }

  void set_invert_cond(QueryNode::Ptr value) {
    invert_cond_ = std::move(value);
  }

  QueryNode::Ptr invert_cond() const {
    return invert_cond_;
  }

  void set_filter_cond(QueryNode::Ptr value) {
    filter_cond_ = std::move(value);
  }

  QueryNode::Ptr filter_cond() const {
    return filter_cond_;
  }

  void set_vector_cond_info(QueryVectorCondInfo::Ptr value) {
    vector_cond_info_ = std::move(value);
  }

  const QueryVectorCondInfo::Ptr &vector_cond_info() const {
    return vector_cond_info_;
  }

  void set_query_topn(uint32_t value) {
    query_topn_ = value;
  }

  uint32_t query_topn() const {
    return query_topn_;
  }

  const std::vector<QueryFieldInfo::Ptr> &query_fields() const {
    return query_fields_;
  }

  void add_query_field(QueryFieldInfo::Ptr &&query_field_info) {
    query_fields_.emplace_back(query_field_info);
  }

  const std::vector<QueryOrderbyInfo::Ptr> &query_orderbys() const {
    return query_orderbys_;
  }

  void add_query_orderby(QueryOrderbyInfo::Ptr &&query_orderby_info) {
    query_orderbys_.emplace_back(query_orderby_info);
  }

  void add_select_item_schema_ptr(
      std::string field, const zvec::FieldSchema *select_item_schema_ptr) {
    bool is_vector_field = false;
    if (select_item_schema_ptr != nullptr &&
        FieldSchema::is_vector_field(select_item_schema_ptr->data_type())) {
      is_vector_field = true;
    }
    add_fetched_schema(field, select_item_schema_ptr);
    if (is_vector_field) {
      selected_vector_fields_.emplace_back(field, select_item_schema_ptr);
    } else {
      selectd_scalar_field_names_.emplace_back(field);
    }
    select_item_schema_ptrs_.emplace_back(std::move(field),
                                          std::move(select_item_schema_ptr));
  }

  const std::vector<FieldAndSchema> &select_item_schema_ptrs() const {
    return select_item_schema_ptrs_;
  }

  void add_forward_filter_schema_ptr(
      std::string field, const zvec::FieldSchema *forward_filter_schema_ptr) {
    add_fetched_schema(field, forward_filter_schema_ptr);
    if (forward_filter_field_names_set_.emplace(field).second) {
      forward_filter_field_names_.emplace_back(std::move(field));
    }
  }

  void add_orderby_item_schema_ptr(
      std::string field, const zvec::FieldSchema *orderby_item_schema_ptr) {
    add_fetched_schema(field, orderby_item_schema_ptr);
    orderby_item_schema_ptrs_.emplace_back(std::move(field),
                                           orderby_item_schema_ptr);
  }

  const std::vector<FieldAndSchema> &orderby_item_schema_ptrs() const {
    return orderby_item_schema_ptrs_;
  }

  void add_fetched_schema(std::string field,
                          const zvec::FieldSchema *other_item_schema_ptr) {
    auto res = all_fetched_schema_schemas_.emplace(std::move(field),
                                                   other_item_schema_ptr);
    if (res.second &&
        !FieldSchema::is_vector_field(other_item_schema_ptr->data_type())) {
      all_fetched_scalar_field_names_.emplace_back(
          other_item_schema_ptr->name());
    }
  }

  const std::unordered_map<std::string, const FieldSchema *> &
  all_fetched_schemas() const {
    return all_fetched_schema_schemas_;
  }

  bool is_field_fetched(const std::string &field) const {
    return all_fetched_schema_schemas_.count(field) > 0;
  }

  const std::vector<std::string> &get_selected_scalar_field_names() {
    return selectd_scalar_field_names_;
  }

  const std::vector<std::string> &get_all_fetched_scalar_field_names() {
    return all_fetched_scalar_field_names_;
  }

  const std::vector<std::string> &get_forward_filter_field_names() {
    return forward_filter_field_names_;
  };


  bool exists_in_query_fields(const std::string &field_name) {
    for (auto query_field_info : query_fields_) {
      if (field_name == query_field_info->field_name()) {
        return true;
      }
    }
    return false;
  }

  void set_post_invert_cond(const QueryNode::Ptr &value) {
    post_invert_cond_ = value;
  }

  const QueryNode::Ptr &post_invert_cond() const {
    return post_invert_cond_;
  }

  void set_post_filter_cond(const QueryNode::Ptr &value) {
    post_filter_cond_ = value;
  }

  const QueryNode::Ptr &post_filter_cond() const {
    return post_filter_cond_;
  }

  void set_asterisk(bool value) {
    asterisk_ = value;
  }

  bool is_asterisk() const {
    return asterisk_;
  }

  void set_include_vector(bool value) {
    include_vector_ = value;
  }

  bool is_include_vector() const {
    return include_vector_;
  }

  void set_include_doc_id(bool value) {
    include_doc_id_ = value;
    if (include_doc_id_) {
      selectd_scalar_field_names_.emplace_back(GLOBAL_DOC_ID);
      all_fetched_scalar_field_names_.emplace_back(GLOBAL_DOC_ID);
    }
  }

  bool is_include_doc_id() const {
    return include_doc_id_;
  }

  const std::vector<FieldAndSchema> &selected_vector_fields() const {
    return selected_vector_fields_;
  }

  void set_group_by(GroupBy::Ptr group_by) {
    group_by_ = std::move(group_by);
  }
  const GroupBy::Ptr &group_by() const {
    return group_by_;
  }

  void set_group_by_schema_ptr(const FieldSchema *group_by_schema_ptr) {
    group_by_schema_ptr_ = group_by_schema_ptr;
  }
  const FieldSchema *group_by_schema_ptr() const {
    return group_by_schema_ptr_;
  }

  std::string to_string() const;

  bool is_filter_unsatisfiable() const;

 private:
  QueryNode::Ptr search_cond_{nullptr};

  QueryNode::Ptr invert_cond_{nullptr};
  QueryNode::Ptr filter_cond_{nullptr};

  QueryVectorCondInfo::Ptr vector_cond_info_{nullptr};

  // these two are for post filtering only
  QueryNode::Ptr post_invert_cond_{nullptr};
  QueryNode::Ptr post_filter_cond_{nullptr};

  uint32_t query_topn_{0};
  std::vector<QueryFieldInfo::Ptr> query_fields_{};
  std::vector<QueryOrderbyInfo::Ptr> query_orderbys_{};

  GroupBy::Ptr group_by_{};

  // from analyzing
  std::unordered_set<std::string> forward_filter_field_names_set_{};
  std::vector<std::string> forward_filter_field_names_{};
  // USER_ID are system needed fields
  std::vector<std::string> selectd_scalar_field_names_{USER_ID};
  std::vector<FieldAndSchema> select_item_schema_ptrs_{};
  std::vector<FieldAndSchema> orderby_item_schema_ptrs_{};
  // all fetched field schemas from forward, including user select fields and
  // system needed fields
  std::unordered_map<std::string, const FieldSchema *>
      all_fetched_schema_schemas_{};
  std::vector<std::string> all_fetched_scalar_field_names_{USER_ID,
                                                           LOCAL_ROW_ID};

  bool asterisk_{false};
  bool include_vector_{false};
  bool include_doc_id_{false};
  std::vector<FieldAndSchema> selected_vector_fields_{};
  const FieldSchema *group_by_schema_ptr_{};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_info_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_info_helper.h"
#include <memory>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec::sqlengine {


bool QueryInfoHelper::text_2_data_buf(const std::string &text,
                                      zvec::DataType data_type,
                                      std::string *data_buf) {
  if (data_type == zvec::DataType::INT32) {
    int32_t int32_val;
    if (!ailego::StringHelper::ToInt32(text, &int32_val)) {
      return false;
    }
    data_buf->assign((const char *)&int32_val, sizeof(int32_t));
    return true;
  }

  if (data_type == zvec::DataType::UINT32) {
    uint32_t uint32_val;
    if (!ailego::StringHelper::ToUint32(text, &uint32_val)) {
      return false;
    }
    data_buf->assign((const char *)&uint32_val, sizeof(uint32_t));
    return true;
  }

  if (data_type == zvec::DataType::INT64) {
    int64_t int64_val;
    if (!ailego::StringHelper::ToInt64(text, &int64_val)) {
      return false;
    }
    data_buf->assign((const char *)&int64_val, sizeof(int64_t));
    return true;
  }

  if (data_type == zvec::DataType::UINT64) {
    uint64_t uint64_val;
    if (!ailego::StringHelper::ToUint64(text, &uint64_val)) {
      return false;
    }
    data_buf->assign((const char *)&uint64_val, sizeof(uint64_t));
    return true;
  }

  if (data_type == zvec::DataType::FLOAT) {
    float float_val;
    if (!ailego::StringHelper::ToFloat(text, &float_val)) {
      return false;
    }
    data_buf->assign((const char *)&float_val, sizeof(float));
    return true;
  }

  if (data_type == zvec::DataType::DOUBLE) {
    double double_val;
    if (!ailego::StringHelper::ToDouble(text, &double_val)) {
      return false;
    }
    data_buf->assign((const char *)&double_val, sizeof(double));
    return true;
  }

  return false;
}

bool QueryInfoHelper::data_buf_2_text(const std::string &data_buf,
                                      zvec::DataType data_type,
                                      std::string *text) {
  if (data_type == zvec::DataType::INT32) {
    *text = ailego::StringHelper::ToString(*(int32_t *)data_buf.data());
    return true;
  }

  if (data_type == zvec::DataType::UINT32) {
    *text = ailego::StringHelper::ToString(*(uint32_t *)data_buf.data());
    return true;
  }

  if (data_type == zvec::DataType::INT64) {
    *text = ailego::StringHelper::ToString(*(int64_t *)data_buf.data());
    return true;
  }

  if (data_type == zvec::DataType::UINT64) {
    *text = ailego::StringHelper::ToString(*(uint64_t *)data_buf.data());
    return true;
  }

  if (data_type == zvec::DataType::FLOAT) {
    *text = ailego::StringHelper::ToString(*(float *)data_buf.data());
    return true;
  }

  if (data_type == zvec::DataType::DOUBLE) {
    *text = ailego::StringHelper::ToString(*(double *)data_buf.data());
    return true;
  }

  return false;
}

void QueryInfoHelper::constant_node_data_buf_2_text(DataType data_type,
                                                    bool is_array_type,
                                                    QueryNode *node) {
  if (is_array_type) {  // node->op() == QueryNodeOp::Q_LIST_VALUE
    QueryListNode *list_node = reinterpret_cast<QueryListNode *>(node);
    for (auto &child_node : list_node->value_expr_list()) {
      if (std::string numeric_text{""};
          data_buf_2_text(child_node->text(), data_type, &numeric_text)) {
        child_node->set_text(std::move(numeric_text));
      }
    }
    return;
  }

  if (std::string numeric_text{""};
      data_buf_2_text(node->text(), data_type, &numeric_text)) {
    node->set_text(std::move(numeric_text));
  }
}


// rule in argument is for rel_expr in children.
// rule !or_ancestor is for result.
// !or_ancestor is shared and enough as fixed result rule for current rules
bool QueryInfoHelper::traverse_node_by_rule(
    QueryNode *node, const std::function<bool(QueryRelNode *node)> &rule,
    SubRootResult *subroot_result, int32_t *num_of_child) {
  if (node->type() == QueryNode::QueryNodeType::REL_EXPR) {
    QueryRelNode *rel_node = dynamic_cast<QueryRelNode *>(node);
    rel_node->set_rule_result(false);  // clear previous if any
    *num_of_child = 1;
    bool result = rule(rel_node);
    if (result) {
      if (!node->or_ancestor()) {
        subroot_result->set_result(rel_node, *num_of_child);
      }
      rel_node->set_rule_result(true);
    }
    return result;
  }

  int32_t left_num_of_child = 0;
  int32_t right_num_of_child = 0;
  QueryNode *left_node = node->left().get();
  QueryNode *right_node = node->right().get();

  bool left_ok = traverse_node_by_rule(left_node, rule, subroot_result,
                                       &left_num_of_child);
  // if (!left_ok) {
  //   return false;
  // }

  bool right_ok = traverse_node_by_rule(right_node, rule, subroot_result,
                                        &right_num_of_child);

  *num_of_child = left_num_of_child + right_num_of_child;

  if (left_ok && right_ok) {
    if (!node->or_ancestor()) {
      subroot_result->set_result(node, *num_of_child);
    }
    return true;
  }

  return false;
}

void QueryInfoHelper::find_subroot_by_rule(
    QueryNode *root, const std::function<bool(QueryRelNode *node)> &rule,
    SubRootResult *subroot_result) {
  int32_t num_of_child = 0;
  traverse_node_by_rule(root, rule, subroot_result, &num_of_child);
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_info_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include "query_info.h"

namespace zvec::sqlengine {

struct SubRootResult {
  QueryNode *subroot;
  int32_t num_of_child;

  SubRootResult() : subroot(nullptr), num_of_child(0) {}

  SubRootResult(QueryNode *node, int32_t num)
      : subroot(node), num_of_child(num) {}

  void set_result(QueryNode *node, int32_t num) {
    if (subroot == nullptr || num_of_child < num) {
      subroot = node;
      num_of_child = num;
    }
  }
};

class QueryInfoHelper {
 public:
  static bool text_2_data_buf(const std::string &text, zvec::DataType data_type,
                              std::string *data_buf);
  static bool data_buf_2_text(const std::string &data_buf,
                              zvec::DataType data_type, std::string *text);
  static void constant_node_data_buf_2_text(DataType data_type,
                                            bool is_array_type,
                                            QueryNode *node);

  static void find_subroot_by_rule(
      QueryNode *root, const std::function<bool(QueryRelNode *node)> &rule,
      SubRootResult *subroot_result);

  static bool traverse_node_by_rule(
      QueryNode *node, const std::function<bool(QueryRelNode *node)> &rule,
      SubRootResult *subroot_result, int32_t *num_of_child);
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_node.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_node.h"
#include <assert.h>
#include <sstream>
#include <zvec/ailego/logger/logger.h>
#include "query_info.h"

namespace zvec::sqlengine {

void QueryNode::set_type_by_op() {
  QueryNodeType node_type = QueryNodeType::NO_TYPE;
  switch (op()) {
    case QueryNodeOp::Q_AND:
    case QueryNodeOp::Q_OR:
      node_type = QueryNodeType::LOGIC_EXPR;
      break;

    case QueryNodeOp::Q_EQ:
    case QueryNodeOp::Q_NE:
    case QueryNodeOp::Q_GT:
    case QueryNodeOp::Q_GE:
    case QueryNodeOp::Q_LT:
    case QueryNodeOp::Q_LE:
    case QueryNodeOp::Q_LIKE:
    case QueryNodeOp::Q_IN:
    case QueryNodeOp::Q_CONTAIN_ANY:
    case QueryNodeOp::Q_CONTAIN_ALL:
    case QueryNodeOp::Q_IS_NULL:
    case QueryNodeOp::Q_IS_NOT_NULL:
      node_type = QueryNodeType::REL_EXPR;
      break;

    case QueryNodeOp::Q_PLUS:
    case QueryNodeOp::Q_MINUS:
    case QueryNodeOp::Q_MUL:
    case QueryNodeOp::Q_DIV:
      node_type = QueryNodeType::ARITH_EXPR;
      break;

    case QueryNodeOp::Q_FUNCTION_CALL:
      node_type = QueryNodeType::FUNC;
      break;

    case QueryNodeOp::Q_RANGE_VALUE:
    case QueryNodeOp::Q_LIST_VALUE:
    case QueryNodeOp::Q_VECTOR_MATRIX_VALUE:
    case QueryNodeOp::Q_INT_VALUE:
    case QueryNodeOp::Q_FLOAT_VALUE:
    case QueryNodeOp::Q_STRING_VALUE:
    case QueryNodeOp::Q_BOOL_VALUE:
    case QueryNodeOp::Q_NULL_VALUE:
      node_type = QueryNodeType::CONST;
      break;
    case QueryNodeOp::Q_ID:
      node_type = QueryNodeType::ID;
      break;
    default:
      break;
  }

  type_ = node_type;
}

QueryNode::Ptr QueryNode::detach_from_parent() {
  if (parent_->left().get() == this) {
    QueryNode::Ptr tmp = parent_->left();
    parent_->set_left(nullptr);
    return tmp;
  } else {  // if (parent_->right().get() == this)
    QueryNode::Ptr tmp = parent_->right();
    parent_->set_right(nullptr);
    return tmp;
  }
}

QueryNode::Ptr QueryNode::replace_from_parent(QueryNode::Ptr new_node_ptr) {
  new_node_ptr->set_parent(parent_);
  if (parent_->left().get() == this) {
    QueryNode::Ptr tmp = parent_->left();
    parent_->set_left(std::move(new_node_ptr));
    tmp->set_parent(nullptr);
    return tmp;
  } else {  // if (parent_->right().get() == this)
    QueryNode::Ptr tmp = parent_->right();
    parent_->set_right(std::move(new_node_ptr));
    tmp->set_parent(nullptr);
    return tmp;
  }
}

QueryNode::Ptr QueryNode::replace_from_search_cond(QueryNode::Ptr new_node_ptr,
                                                   QueryInfo *query_info) {
  if (parent_ == nullptr) {
    new_node_ptr->set_parent(parent_);
    QueryNode::Ptr tmp = query_info->search_cond();
    query_info->set_search_cond(std::move(new_node_ptr));
    return tmp;
  }
  return replace_from_parent(std::move(new_node_ptr));
}

QueryNode::Ptr QueryNode::detach_from_search_cond(QueryInfo *query_info) {
  if (parent_ == nullptr) {
    QueryNode::Ptr tmp = query_info->search_cond();
    query_info->set_search_cond(nullptr);
    return tmp;
  }

  return detach_from_parent();
}

QueryNode::Ptr QueryNode::detach_from_invert_cond(QueryInfo *query_info) {
  if (parent_ == nullptr) {
    QueryNode::Ptr tmp = query_info->invert_cond();
    query_info->set_invert_cond(nullptr);
    return tmp;
  }

  return detach_from_parent();
}

std::string QueryNode::text() const {
  std::stringstream stream;
  switch (type_) {
    case QueryNodeType::LOGIC_EXPR:
      stream << "(" << left_text() << ") " << op_name() << " (" << right_text()
             << ")";
      break;
    case QueryNodeType::REL_EXPR:
      stream << left()->text() << op_name() << right()->text();
      break;
    default:
      break;
  }

  return stream.str();
}

bool QueryNode::is_matched(const QueryNode &) const {
  LOG_ERROR("Not implementated. op[%s]", op_name().c_str());
  return false;
}

//========================================================================

std::string QueryVectorMatrixNode::text() const {
  return node_->text();
}

//========================================================================

QueryConstantNode::QueryConstantNode(const std::string &m_value) {
  value_ = m_value;
}

std::string QueryConstantNode::value() {
  return value_;
}

std::string QueryConstantNode::text() const {
  return value_;
}

void QueryConstantNode::set_text(std::string new_val) {
  value_ = std::move(new_val);
}

//========================================================================

QueryIDNode::QueryIDNode(const std::string &m_value) {
  value_ = m_value;
}

void QueryIDNode::set_value(const std::string &m_value) {
  value_ = m_value;
}

std::string QueryIDNode::value() {
  return value_;
}

std::string QueryIDNode::text() const {
  return value_;
}

bool QueryIDNode::is_matched(const QueryNode &other) const {
  if (other.op() != op()) {
    return false;
  }
  auto &other_id_node = dynamic_cast<const QueryIDNode &>(other);
  return value_ == other_id_node.value_;
}

//========================================================================

QueryFuncNode::QueryFuncNode() {
  set_op(QueryNodeOp::Q_FUNCTION_CALL);
}

void QueryFuncNode::set_func_name_node(QueryNode::Ptr func_name_node) {
  func_name_node_ = std::move(func_name_node);
  if (func_name_node_->text() == "feature") {
    func_type_ = QueryFuncType::FEATURE;
  } else {
    func_type_ = QueryFuncType::NON_FEATURE;
  }
}

const QueryNode::Ptr &QueryFuncNode::get_func_name_node() const {
  return func_name_node_;
}

void QueryFuncNode::add_argument(QueryNode::Ptr argument_node) {
  arguments_.emplace_back(std::move(argument_node));
}

const std::vector<QueryNode::Ptr> &QueryFuncNode::arguments() const {
  return arguments_;
}

std::string QueryFuncNode::text() const {
  std::stringstream stream;
  stream << func_name_node_->text();
  stream << "(";

  int i = 0;
  for (auto argument : arguments_) {
    if (i > 0) {
      stream << ", ";
    }
    stream << argument->text();
    i++;
  }
  stream << ")";
  return stream.str();
}

bool QueryFuncNode::is_matched(const QueryNode &other) const {
  if (other.op() != op()) {
    return false;
  }
  auto &other_func_node = dynamic_cast<const QueryFuncNode &>(other);
  if (!func_name_node_->is_matched(*other_func_node.func_name_node_)) {
    return false;
  }
  // only id() function with zero arguments is considered matched
  if (arguments_.empty() && other_func_node.arguments_.empty() &&
      func_name_node_->text() == "id") {
    return true;
  }
  return false;
}


//========================================================================

QueryRelNode::QueryRelNode() {}

void QueryRelNode::set_rel_type(RelType value) {
  rel_type_ = value;
}

QueryRelNode::RelType QueryRelNode::rel_type() {
  return rel_type_;
}

std::string QueryRelNode::text() const {
  std::stringstream stream;
  stream << QueryNode::text();
  if (rel_type_ == RelType::NO_TYPE) {
    stream << "(NO_REL_TYPE)";
  } else if (is_feature()) {
    stream << "(FEATURE)";
  } else if (is_invert()) {
    stream << "(INVERT)";
  } else if (is_forward()) {
    stream << "(FORWARD)";
  }
  if (or_ancestor()) {
    stream << "(OR_A)";
  }

  return stream.str();
}

//========================================================================

std::string QueryListNode::text() const {
  std::stringstream stream;
  if (exclude_) {
    stream << "NOT ";
  }

  stream << "(";

  int i = 0;
  for (auto value_expr : value_expr_list_) {
    if (i > 0) {
      stream << ", ";
    }
    stream << value_expr->text();
    i++;
  }
  stream << ")";
  return stream.str();
}

std::vector<std::string> QueryListNode::to_value_list() {
  std::vector<std::string> value_list;
  for (auto &value_expr : value_expr_list_) {
    value_list.emplace_back(value_expr->text());
  }

  return value_list;
}


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <optional>
#include <vector>
#include <zvec/db/query_params.h>
#include "db/sqlengine/common/generic_node.h"
#include "db/sqlengine/parser/node.h"

namespace zvec::sqlengine {

enum class QueryNodeOp {
  Q_NONE,
  Q_AND,
  Q_OR,
  Q_EQ,
  Q_NE,
  Q_GT,
  Q_GE,
  Q_LT,
  Q_LE,
  Q_LIKE,
  Q_IN,
  Q_CONTAIN_ALL,
  Q_CONTAIN_ANY,
  Q_PLUS,
  Q_MINUS,
  Q_MUL,
  Q_DIV,
  Q_FUNCTION_CALL,
  Q_RANGE_VALUE,
  Q_LIST_VALUE,
  Q_VECTOR_MATRIX_VALUE,
  Q_INT_VALUE,
  Q_FLOAT_VALUE,
  Q_STRING_VALUE,
  Q_NULL_VALUE,
  Q_ID,
  Q_BOOL_VALUE,
  Q_IS_NULL,
  Q_IS_NOT_NULL,
};

class QueryInfo;
class QueryNode : public Generic_Node<QueryNodeOp, QueryNode> {
 public:
  using Ptr = std::shared_ptr<QueryNode>;

  static inline std::string type_to_str(QueryNodeOp c) {
    static std::string names[] = {"NONE",
                                  "and",
                                  "or",
                                  "=",
                                  "!=",
                                  ">",
                                  ">=",
                                  "<",
                                  "<=",
                                  " LIKE ",
                                  " in ",
                                  " contain_all ",
                                  " contain_any ",
                                  "+",
                                  "-",
                                  "*",
                                  "/",
                                  "FUNCTION_CALL",
                                  "RANGE_VALUE",
                                  "LIST_VALUE",
                                  "VECTOR_MATRIX_VALUE",
                                  "INT_VALUE",
                                  "FLOAT_VALUE",
                                  "STRING_VALUE",
                                  "NULL_VALUE",
                                  "ID",
                                  "BOOL_VALUE",
                                  " IS_NULL ",
                                  " IS_NOT_NULL "};

    return names[static_cast<int>(c)];
  }

  enum class QueryNodeType {
    NO_TYPE,
    LOGIC_EXPR,
    REL_EXPR,
    ARITH_EXPR,
    FUNC,
    CONST,
    ID
  };

 public:
  QueryNode() : Generic_Node(QueryNodeOp::Q_NONE) {}
  QueryNode(QueryNodeOp m_op) : Generic_Node(m_op) {
    set_op(m_op);
  }
  ~QueryNode() override = default;

  std::string left_text() const {
    if (left_ == nullptr) {
      return "nullptr";
    }
    return left_->text();
  }
  std::string right_text() const {
    if (right_ == nullptr) {
      return "nullptr";
    }
    return right_->text();
  }


  virtual bool is_matched(const QueryNode &other) const;

  void set_op(QueryNodeOp value) override {
    Generic_Node<QueryNodeOp, QueryNode>::set_op(value);
    set_type_by_op();
  }

  std::string op_name() const {
    return type_to_str(op_);
  }

  QueryNode::QueryNodeType type() const {
    return type_;
  }

  void set_level(uint32_t value) {
    level_ = value;
  }
  uint32_t level() {
    return level_;
  }

  void set_or_ancestor(bool val = true) {
    or_ancestor_ = val;
  }

  bool or_ancestor() const {
    return or_ancestor_;
  }

  QueryNode::Ptr detach_from_parent();

  QueryNode::Ptr replace_from_parent(QueryNode::Ptr new_query_node);

  QueryNode::Ptr replace_from_search_cond(QueryNode::Ptr new_query_node,
                                          QueryInfo *query_info);

  QueryNode::Ptr detach_from_search_cond(QueryInfo *query_info_ptr);

  QueryNode::Ptr detach_from_invert_cond(QueryInfo *query_info_ptr);

  virtual std::string text() const override;

  virtual void set_text(std::string /*new_val*/) {
    /* for QueryConstantNode only */
    return;
  }

  std::optional<bool> predictate_result() const {
    return predictate_result_;
  }
  void set_predictate_result(bool result) {
    predictate_result_ = result;
  }

 protected:
  void set_type_by_op();

 protected:
  QueryNodeType type_{QueryNodeType::NO_TYPE};

 private:
  uint32_t level_{0};
  bool or_ancestor_{false};
  // evaluation result of predication, maybe true, false or unknown
  std::optional<bool> predictate_result_{std::nullopt};
};

class QueryVectorMatrixNode : public QueryNode {
 public:
  using Ptr = std::shared_ptr<QueryVectorMatrixNode>;

  QueryVectorMatrixNode(std::shared_ptr<VectorMatrixNode> node)
      : node_(std::move(node)) {}

  std::string text() const override;

  const std::string &matrix() const {
    return node_->matrix();
  }

  const std::string &sparse_indices() const {
    return node_->sparse_indices();
  }

  const std::string &sparse_values() const {
    return node_->sparse_values();
  }

  const QueryParams::Ptr &query_params() const {
    return node_->query_params();
  }

 private:
  std::shared_ptr<const VectorMatrixNode> node_{nullptr};
};

class QueryConstantNode : public QueryNode {
 public:
  using Ptr = std::shared_ptr<QueryConstantNode>;

  QueryConstantNode(const std::string &m_value);

  std::string value();
  std::string text() const override;

  void set_text(std::string new_val) override;

 private:
  std::string value_;
};

class QueryIDNode : public QueryNode {
 public:
  using Ptr = std::shared_ptr<QueryIDNode>;

  QueryIDNode(const std::string &m_value);

  void set_value(const std::string &m_value);

  std::string value();
  std::string text() const override;

  bool is_matched(const QueryNode &other) const override;

 private:
  std::string value_;
};

class QueryFuncNode : public QueryNode {
  enum class QueryFuncType { FEATURE = 0, NON_FEATURE = 1 };

 public:
  using Ptr = std::shared_ptr<QueryFuncNode>;

  QueryFuncNode();
  virtual ~QueryFuncNode() = default;

  void set_func_name_node(QueryNode::Ptr func_name_node);
  const QueryNode::Ptr &get_func_name_node() const;

  std::string get_func_name() const {
    return func_name_node_->text();
  }

  void add_argument(QueryNode::Ptr argument_node);
  const std::vector<QueryNode::Ptr> &arguments() const;

  std::string text() const override;
  bool is_feature_func() {
    return func_type_ == QueryFuncType::FEATURE;
  }

  bool is_matched(const QueryNode &other) const override;

 private:
  QueryNode::Ptr func_name_node_{nullptr};
  std::vector<QueryNode::Ptr> arguments_{};
  QueryFuncType func_type_{QueryFuncType::FEATURE};
};

class QueryRelNode : public QueryNode {
 public:
  using Ptr = std::shared_ptr<QueryRelNode>;

  enum class RelType { NO_TYPE, FEATURE, INVERT, FORWARD };

  QueryRelNode();

  bool is_feature() const {
    return rel_type_ == RelType::FEATURE;
  }
  bool is_invert() const {
    return rel_type_ == RelType::INVERT;
  }
  bool is_forward() const {
    return rel_type_ == RelType::FORWARD;
  }

  void set_vector() {
    rel_type_ = RelType::FEATURE;
  }
  void set_invert() {
    rel_type_ = RelType::INVERT;
  }
  void set_forward() {
    rel_type_ = RelType::FORWARD;
  }

  void set_rel_type(RelType value);
  RelType rel_type();

  std::string text() const override;

  bool rule_result() {
    return rule_result_;
  }

  void set_rule_result(bool result) {
    rule_result_ = result;
  }

 private:
  RelType rel_type_{RelType::NO_TYPE};
  // rule result is intermediate result for evalute rules
  bool rule_result_{false};
};

class QueryListNode : public QueryNode {
 public:
  using Ptr = std::shared_ptr<QueryListNode>;

  QueryListNode() {
    set_op(QueryNodeOp::Q_LIST_VALUE);
  }

  void add_value_expr(QueryNode::Ptr value_expr) {
    value_expr_list_.emplace_back(std::move(value_expr));
  }

  const std::vector<QueryNode::Ptr> &value_expr_list() const {
    return value_expr_list_;
  }

  bool exclude() const {
    return exclude_;
  }

  void set_exclude(bool val) {
    exclude_ = val;
  }

  std::string text() const override;

  std::vector<std::string> to_value_list();

 private:
  std::vector<QueryNode::Ptr> value_expr_list_{};
  bool exclude_{false};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_node_walker.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_node_walker.h"
#include <cstddef>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/index_params.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"
#include "db/index/common/type_helper.h"
#include "db/sqlengine/analyzer/query_node.h"
#include "db/sqlengine/common/util.h"
#include "query_info_helper.h"

namespace zvec::sqlengine {

inline bool is_numeric_type(zvec::DataType data_type) {
  // include INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE
  // use following code to reduce the runtime comparison cost
  return (data_type >= zvec::DataType::INT32 &&
          data_type <= zvec::DataType::DOUBLE);
}

SearchCondCheckWalker::SearchCondCheckWalker(const CollectionSchema &table_ptr)
    : table_ptr_(table_ptr) {}

ControlOp SearchCondCheckWalker::traverse_cond_node(
    const QueryNode::Ptr &query_node, bool or_ancestor) {
  if (query_node == nullptr) {
    return ControlOp::BREAK;
  }

  ControlOp ret = access(query_node, or_ancestor);
  if (ret == ControlOp::BREAK) {
    // finish traversing
    return ControlOp::BREAK;
  }

  if (query_node->op() == QueryNodeOp::Q_OR) {
    or_ancestor = true;
  }

  if (query_node->left() != nullptr) {
    ControlOp ret2 = traverse_cond_node(query_node->left(), or_ancestor);
    if (ret2 == ControlOp::BREAK) {
      return ControlOp::BREAK;
    }
  }
  if (query_node->right() != nullptr) {
    ControlOp ret2 = traverse_cond_node(query_node->right(), or_ancestor);
    if (ret2 == ControlOp::BREAK) {
      return ControlOp::BREAK;
    }
  }

  return ControlOp::CONTINUE;
}

ControlOp SearchCondCheckWalker::access(const QueryNode::Ptr &query_node,
                                        bool or_ancestor) {
  // set all types of child node or ancestor if it does,
  // besides query_rel_node, mainly for logic node invert_subroot_node_
  if (or_ancestor) {
    query_node->set_or_ancestor();
  }

  if (query_node->type() != QueryNode::QueryNodeType::REL_EXPR) {
    return ControlOp::CONTINUE;
  }

  const QueryRelNode::Ptr &query_rel_node =
      std::dynamic_pointer_cast<QueryRelNode>(query_node);

  const QueryNode::Ptr &left = query_rel_node->left();
  const QueryNode::Ptr &right = query_rel_node->right();

  // left side must be single field name or function
  if (left->op() != QueryNodeOp::Q_ID &&
      left->op() != QueryNodeOp::Q_FUNCTION_CALL) {
    err_msg_ =
        "left side in relation expr must be single field name or function "
        "call. " +
        query_node->text();
    return ControlOp::BREAK;
  }

  if (left->op() == QueryNodeOp::Q_FUNCTION_CALL) {
    if (!left_op_func_check(query_rel_node)) {
      return ControlOp::BREAK;
    }
    return ControlOp::CONTINUE;
  }

  // right side support constant value only
  if (right->type() != QueryNode::QueryNodeType::CONST &&
      right->type() != QueryNode::QueryNodeType::FUNC) {
    err_msg_ =
        "right side in relation expr support constant value or function "
        "only. " +
        query_node->text();
    return ControlOp::BREAK;
  }

  // Function check
  if (right->type() == QueryNode::QueryNodeType::FUNC) {
    if (func_check(right) != 0) {
      return ControlOp::BREAK;
    }
  }

  // In phrase check, IN only work with list value
  if (query_node->op() == QueryNodeOp::Q_IN) {
    if (right->op() != QueryNodeOp::Q_LIST_VALUE) {
      err_msg_ =
          "In rel expr only works with list value. " + query_node->text();
      return ControlOp::BREAK;
    }
    QueryListNode::Ptr list_node =
        std::dynamic_pointer_cast<QueryListNode>(right);
    if (list_node->value_expr_list().size() > 20000) {
      err_msg_ = "In rel expr only support list size no more than 20000 " +
                 query_node->text();
      return ControlOp::BREAK;
    }
  }

  std::string field_name = left->text();

  const zvec::FieldSchema *vector_field =
      table_ptr_.get_vector_field(field_name);

  // check vector index cond
  if (vector_field != nullptr) {
    // vector supports eq only.
    if (query_node->op() != QueryNodeOp::Q_EQ) {
      err_msg_ = ailego::StringHelper::Concat("vector field only support EQ. ",
                                              query_rel_node->text());
      return ControlOp::BREAK;
    }
    // more than one vector query check.
    if (vector_rel_ != NULL) {
      err_msg_ = ailego::StringHelper::Concat(
          "more than one vector search is not supported. ", vector_rel_->text(),
          " ", query_rel_node->text());
      return ControlOp::BREAK;
    }
    vector_rel_ = query_rel_node.get();
    query_rel_node->set_vector();
    // arrive here, it is a index condition.
    return ControlOp::CONTINUE;
  }

  const zvec::FieldSchema *forward_field =
      table_ptr_.get_forward_field(field_name);
  // field must have schema
  if (!forward_field) {
    err_msg_ = ailego::StringHelper::Concat("field not found in table schema: ",
                                            query_rel_node->text());
    return ControlOp::BREAK;
  }

  // only string field or is null allow empty string value
  if (right->text().empty() &&
      (forward_field->element_data_type() != DataType::STRING &&
       query_node->op() != QueryNodeOp::Q_IS_NULL &&
       query_node->op() != QueryNodeOp::Q_IS_NOT_NULL)) {
    err_msg_ = ailego::StringHelper::Concat(
        "right side in relation expr is empty: ", query_node->text());
    return ControlOp::BREAK;
  }

  if (query_node->op() == QueryNodeOp::Q_IS_NULL ||
      query_node->op() == QueryNodeOp::Q_IS_NOT_NULL) {
    if (forward_field->index_params() != nullptr) {
      add_invert_filter(query_rel_node.get());
    } else {
      add_forward_filter(query_rel_node.get(), field_name);
    }
    return ControlOp::CONTINUE;
  }

  // Like phrase check
  if (query_node->op() == QueryNodeOp::Q_LIKE) {
    if (!check_like(*forward_field, query_rel_node.get())) {
      return ControlOp::BREAK;
    }
    return ControlOp::CONTINUE;
  }


  // invert index analysis, if field exists on both forward and index,
  // as long as the cond conform to index cond criteria,
  // it is regarded as index cond, not forward cond.
  if (forward_field->index_params() != nullptr) {
    if (const auto ret = check_array_and_contain_compatible(
            query_rel_node, forward_field, true);
        ret != std::nullopt) {
      return ret.value();
    }
    // data type of index only support string, numeric and vector, and:
    // string supports all op,
    const auto field_data_type = forward_field->element_data_type();
    const bool is_string_field = field_data_type == zvec::DataType::STRING;
    // numeric supports all op except like, ( as well as bool )
    const bool is_numeric_field_without_like =
        query_node->op() != QueryNodeOp::Q_LIKE &&
        (is_numeric_type(field_data_type) ||
         field_data_type == zvec::DataType::BOOL);

    // if not satisfy, fall back to forward analysis
    if (is_string_field || is_numeric_field_without_like) {
      if (!check_and_convert_value_type(field_data_type, right)) {
        err_msg_ = ailego::StringHelper::Concat(
            "field type and value type not match in relation expr. ",
            query_rel_node->text());
        return ControlOp::BREAK;
      }

      // bool op check
      if (field_data_type == zvec::DataType::BOOL) {
        if (query_node->op() != QueryNodeOp::Q_EQ &&
            query_node->op() != QueryNodeOp::Q_NE) {
          err_msg_ = "bool type only support EQ and NQ";
          return ControlOp::BREAK;
        }
      }

      add_invert_filter(query_rel_node.get());
      // arrive here, it is a index condition.
      return ControlOp::CONTINUE;
    }
  }

  // compared with index_field's check, following check for forward_field:
  // 1. support BINARY type
  // 2. validate `like op only on str` in body instead of `if` condition block
  // 3. use check_and_convert_value_type() instead of field_type_vs_value_type()
  //        to convert numeric values to str & not support BINARY
  if (forward_field != nullptr) {
    if (const auto ret = check_array_and_contain_compatible(
            query_rel_node, forward_field, false);
        ret != std::nullopt) {
      return ret.value();
    }
    // data type of forward only support binary, string, bool, int and float
    if (forward_field->element_data_type() == zvec::DataType::BINARY ||
        forward_field->element_data_type() == zvec::DataType::STRING ||
        forward_field->element_data_type() == zvec::DataType::BOOL ||
        is_numeric_type(forward_field->element_data_type())) {
      if (!field_type_vs_value_type(forward_field->element_data_type(),
                                    right)) {
        err_msg_ = ailego::StringHelper::Concat(
            "forward field type and value type not match in relation expr. ",
            query_rel_node->text());
        return ControlOp::BREAK;
      }

      // bool op check
      if (forward_field->element_data_type() == zvec::DataType::BOOL) {
        if (query_node->op() != QueryNodeOp::Q_EQ &&
            query_node->op() != QueryNodeOp::Q_NE) {
          err_msg_ = "bool type only support EQ and NQ";
          return ControlOp::BREAK;
        }
      }

      // like only works on string
      if (query_node->op() == QueryNodeOp::Q_LIKE &&
          forward_field->element_data_type() != zvec::DataType::STRING) {
        err_msg_ = "operator LIKE only works on string";
        return ControlOp::BREAK;
      }

      add_forward_filter(query_rel_node.get(), field_name);
      // arrive here, it is a forward.
      return ControlOp::CONTINUE;
    } else {
      err_msg_ = ailego::StringHelper::Concat(
          "unsupported data type in relation expr: ", query_rel_node->text());
      return ControlOp::BREAK;
    }
  } else {
    if (right->op() == QueryNodeOp::Q_VECTOR_MATRIX_VALUE) {
      err_msg_ = ailego::StringHelper::Concat(
          "vector vector not supported for schema free field in relation "
          "expr: ",
          query_rel_node->text());
      return ControlOp::BREAK;
    }
    if (right->type() != QueryNode::QueryNodeType::CONST) {
      err_msg_ = ailego::StringHelper::Concat(
          "only support const for schema free field in relation expr: ",
          query_rel_node->text());
      return ControlOp::BREAK;
    }
    add_forward_filter(query_rel_node.get(), field_name);
    // treat as schema free field forward
    return ControlOp::CONTINUE;
  }
}

int SearchCondCheckWalker::func_check(const QueryNode::Ptr &func_node) {
  const QueryFuncNode::Ptr &func_node_ptr =
      std::dynamic_pointer_cast<QueryFuncNode>(func_node);
  const QueryNode::Ptr &func_name_node_ptr =
      func_node_ptr->get_func_name_node();
  /* function must be feature */
  std::string func_name = func_name_node_ptr->text();
  if (func_name != kFeature) {
    err_msg_ = "Function is not supported. " + func_name;
    return -1;
  }
  size_t size = func_node_ptr->arguments().size();
  if (size < 1 || size > 4) {
    err_msg_ = "vector function has wrong number of arguments. ";
    return -1;
  }
  // do not check arguments here, check during vector transforming
  return 0;
}

tl::expected<void, std::string> SearchCondCheckWalker::array_length_func_check(
    const QueryFuncNode::Ptr &func_node_ptr,
    const QueryRelNode::Ptr &query_node) {
  const auto &arguments = func_node_ptr->arguments();
  if (arguments.size() != 1) {
    return tl::make_unexpected(
        "array_length function should have only one argument. ");
  }
  auto &arg0 = arguments[0];
  if (arg0->op() != QueryNodeOp::Q_ID) {
    return tl::make_unexpected(
        "array_length function argument must be a field name, got " +
        arg0->op_name());
  }
  auto *arg0_schema = table_ptr_.get_field(arg0->text());
  if (arg0_schema == nullptr) {
    return tl::make_unexpected(
        "array_length argument not found in schema, with " + arg0->text());
  }
  if (!arg0_schema->is_array_type()) {
    return tl::make_unexpected(
        "array_length only support array, got " +
        DataTypeCodeBook::AsString(arg0_schema->data_type()));
  }
  if (!is_arithematic_compare_op(query_node->op())) {
    return tl::make_unexpected(
        "array_length only support arithematic "
        "compare op, got " +
        query_node->op_name());
  }
  // only allow integer
  auto &right_node = query_node->right();
  if (right_node->op() != QueryNodeOp::Q_INT_VALUE) {
    return tl::make_unexpected(
        "array_length right side only support integer, got " +
        right_node->op_name());
  }

  if (arg0_schema->index_params() != nullptr) {
    if (!check_and_convert_value_type(DataType::UINT32, right_node)) {
      return tl::make_unexpected(
          "array_length right side only support integer, got " +
          right_node->op_name());
    }
    add_invert_filter(query_node.get());
  } else {
    add_forward_filter(query_node.get(), arg0->text());
  }

  return {};
}

bool SearchCondCheckWalker::is_arithematic_compare_op(QueryNodeOp op) {
  return op == QueryNodeOp::Q_EQ || op == QueryNodeOp::Q_NE ||
         op == QueryNodeOp::Q_GT || op == QueryNodeOp::Q_GE ||
         op == QueryNodeOp::Q_LT || op == QueryNodeOp::Q_LE;
}

bool SearchCondCheckWalker::left_op_func_check(
    const QueryRelNode::Ptr &query_node) {
  const QueryFuncNode::Ptr &func_node_ptr =
      std::dynamic_pointer_cast<QueryFuncNode>(query_node->left());
  const QueryNode::Ptr &func_name_node_ptr =
      func_node_ptr->get_func_name_node();
  /* function must be feature */
  std::string func_name = func_name_node_ptr->text();
  tl::expected<void, std::string> res;
  if (func_name == kFuncArrayLength) {
    res = array_length_func_check(func_node_ptr, query_node);
  } else {
    err_msg_ = "Function is not supported. " + func_name;
    return false;
  }
  if (!res.has_value()) {
    err_msg_ = res.error();
    return false;
  }
  return true;
}

bool SearchCondCheckWalker::check_like(const zvec::FieldSchema &field,
                                       QueryRelNode *query_node) {
  auto *like_value_node = query_node->right_node();
  if (like_value_node->op() != QueryNodeOp::Q_STRING_VALUE) {
    err_msg_ = "like phrase only support string now.";
    return false;
  }
  std::string field_name = query_node->left_node()->text();
  const InvertIndexParams *param =
      dynamic_cast<InvertIndexParams *>(field.index_params().get());
  if (param == nullptr) {
    add_forward_filter(query_node, std::move(field_name));
    return true;
  }
  int percent_count = 0;
  int underscore_count = 0;
  std::string text = like_value_node->text();
  size_t percent_loc = std::string::npos;
  for (size_t i = 0; i < text.size(); i++) {
    char c = text[i];
    if (c == '\\') {
      // just ignore next character
      i++;
      continue;
    }
    if (c == '%') {
      percent_count++;
      percent_loc = i;
    } else if (c == '_') {
      underscore_count++;
    }
  }
  // invert support at most one '%', not support '_'
  if (percent_count > 1 || underscore_count > 0) {
    add_forward_filter(query_node, std::move(field_name));
    return true;
  }
  // invert only support % at the end if extended wildcard is not enabled
  if (param->enable_extended_wildcard() || percent_loc == text.size() - 1) {
    add_invert_filter(query_node);
  } else {
    add_forward_filter(query_node, std::move(field_name));
  }
  return true;
}

bool SearchCondCheckWalker::field_type_vs_value_type(
    zvec::DataType data_type, const QueryNode::Ptr &node) {
  QueryNodeOp value_type = node->op();
  if (value_type == QueryNodeOp::Q_LIST_VALUE) {
    return field_type_vs_list_value_type(data_type, node);
  }

  if ((data_type == zvec::DataType::BINARY ||
       data_type == zvec::DataType::STRING) &&
      value_type != QueryNodeOp::Q_STRING_VALUE) {
    return false;
  }
  if (data_type == zvec::DataType::BOOL &&
      value_type != QueryNodeOp::Q_BOOL_VALUE) {
    return false;
  }
  if ((data_type == zvec::DataType::INT32 ||
       data_type == zvec::DataType::INT64 ||
       data_type == zvec::DataType::UINT32 ||
       data_type == zvec::DataType::UINT64) &&
      value_type != QueryNodeOp::Q_INT_VALUE) {
    return false;
  }
  if ((data_type == zvec::DataType::FLOAT ||
       data_type == zvec::DataType::DOUBLE) &&
      (value_type != QueryNodeOp::Q_FLOAT_VALUE &&
       value_type != QueryNodeOp::Q_INT_VALUE)) {
    return false;
  }

  if (zvec::FieldSchema::is_vector_field(data_type)) {
    if (value_type != QueryNodeOp::Q_VECTOR_MATRIX_VALUE &&
        value_type != QueryNodeOp::Q_FUNCTION_CALL) {
      return false;
    }
    if (value_type == QueryNodeOp::Q_FUNCTION_CALL) {
      QueryFuncNode::Ptr func_node =
          std::dynamic_pointer_cast<QueryFuncNode>(node);
      if (!func_node->is_feature_func()) {
        return false;
      }
    }
  }

  return true;
}

bool SearchCondCheckWalker::field_type_vs_list_value_type(
    zvec::DataType data_type, const QueryNode::Ptr &node) {
  /* list value only support field with data type string, numeric and bool */
  if (!(data_type == zvec::DataType::STRING || is_numeric_type(data_type) ||
        data_type == zvec::DataType::BOOL)) {
    return false;
  }

  QueryListNode::Ptr list_node = std::dynamic_pointer_cast<QueryListNode>(node);
  for (auto &value : list_node->value_expr_list()) {
    // recursively call single value check and convert
    if (bool ret = field_type_vs_value_type(data_type, value); !ret) {
      return false;
    }
  }

  return true;
}

// use for invert index, compared with field_type_vs_value_type:
// 1. not support DataType::BINARY, for the invert index doesn't support it
// 2. convert numeric to str, for the invert index is based on text
bool SearchCondCheckWalker::check_and_convert_value_type(
    zvec::DataType data_type, const QueryNode::Ptr &node) {
  QueryNodeOp value_type = node->op();

  if (value_type == QueryNodeOp::Q_LIST_VALUE) {
    return check_and_convert_list_value_type(data_type, node);
  }

  if (data_type == zvec::DataType::STRING &&
      value_type != QueryNodeOp::Q_STRING_VALUE) {
    return false;
  }

  if (data_type == zvec::DataType::BOOL &&
      value_type != QueryNodeOp::Q_BOOL_VALUE) {
    return false;
  }

  if ((data_type == zvec::DataType::INT32 ||
       data_type == zvec::DataType::INT64 ||
       data_type == zvec::DataType::UINT32 ||
       data_type == zvec::DataType::UINT64) &&
      value_type != QueryNodeOp::Q_INT_VALUE) {
    return false;
  }

  if ((data_type == zvec::DataType::FLOAT ||
       data_type == zvec::DataType::DOUBLE) &&
      (value_type != QueryNodeOp::Q_FLOAT_VALUE &&
       value_type != QueryNodeOp::Q_INT_VALUE)) {
    return false;
  }

  if (zvec::FieldSchema::is_vector_field(data_type)) {
    if (value_type != QueryNodeOp::Q_VECTOR_MATRIX_VALUE &&
        value_type != QueryNodeOp::Q_FUNCTION_CALL) {
      return false;
    }
    if (value_type == QueryNodeOp::Q_FUNCTION_CALL) {
      QueryFuncNode::Ptr func_node =
          std::dynamic_pointer_cast<QueryFuncNode>(node);
      if (!func_node->is_feature_func()) {
        return false;
      }
    }
  }

  if (is_numeric_type(data_type)) {
    std::string numeric_buf;
    if (!QueryInfoHelper::text_2_data_buf(node->text(), data_type,
                                          &numeric_buf)) {
      return false;
    }
    node->set_text(std::move(numeric_buf));
  }

  return true;
}

bool SearchCondCheckWalker::check_and_convert_list_value_type(
    zvec::DataType data_type, const QueryNode::Ptr &node) {
  /* list value only support field with data type string and numeric */
  if (!(data_type == zvec::DataType::STRING || is_numeric_type(data_type) ||
        data_type == DataType::BOOL)) {
    return false;
  }

  QueryListNode::Ptr list_node = std::dynamic_pointer_cast<QueryListNode>(node);
  for (auto &value : list_node->value_expr_list()) {
    // recursively call single value check and convert
    if (bool ret = check_and_convert_value_type(data_type, value); !ret) {
      return false;
    }
  }

  return true;
}

// RULEs for contain_* operator & array_* data type
// 1. **only** array__dt supports contain_* op
//          && array__dt **only** supports contain_* op
// 2. right hand value should be a list
// 3. list size should be no more than MAX_ARRAY_FIELD_LEN
// 4. list value type should be same as index field's sub type
//    e.g., array_int32 containing a list of int64 is invalid
// 5. following the restriction of `in`, only string & numeric list is allowed
// 6. (same with other field) if field exists on both forward and index,
//  the cond should be index one, aka invert index has higher priority
std::optional<ControlOp>
SearchCondCheckWalker::check_array_and_contain_compatible(
    const QueryRelNode::Ptr &query_rel_node, const FieldSchema *field,
    bool is_invert_field) {
  const QueryNode::Ptr &left = query_rel_node->left();
  const QueryNode::Ptr &right = query_rel_node->right();

  const bool is_contain_op =
      query_rel_node->op() == QueryNodeOp::Q_CONTAIN_ALL ||
      query_rel_node->op() == QueryNodeOp::Q_CONTAIN_ANY;

  // not check here
  if (!(field->is_array_type() || is_contain_op)) {
    return {};
  }

  // rule 1, which can be expressed in an alternative way:
  // is_array & is_contain_op must have same value
  if (field->is_array_type() ^ is_contain_op) {
    err_msg_ = ailego::StringHelper::Concat(
        "Contain_* rel expr only works with array data type and "
        "array data type only works with contain_* op. filter: ",
        query_rel_node->text());
    return ControlOp::BREAK;
  }
  // rule 2
  if (right->op() != QueryNodeOp::Q_LIST_VALUE) {
    err_msg_ = ailego::StringHelper::Concat(
        "Contain_* rel expr only works with list value. filter: ",
        query_rel_node->text());
    return ControlOp::BREAK;
  }
  // rule 3
  QueryListNode::Ptr list_node =
      std::dynamic_pointer_cast<QueryListNode>(right);
  if (list_node->value_expr_list().size() > MAX_ARRAY_FIELD_LEN) {
    err_msg_ = ailego::StringHelper::Concat(
        "Contain_* rel expr only support list size no more than ",
        ailego::StringHelper::ToString(MAX_ARRAY_FIELD_LEN), ": ",
        query_rel_node->text());
    return ControlOp::BREAK;
  }

  // rule 4, check if list value type matches field's sub type
  // rule 5 is enforced by check_and_convert_value_type(), inside which
  // will call check_and_convert_list_value_type() to constrain
  // the list value type
  // Similarly to field_type_vs_value_type() func for forward index
  if (!(is_invert_field
            ? check_and_convert_value_type(field->element_data_type(), right)
            : field_type_vs_value_type(field->element_data_type(), right))) {
    err_msg_ = ailego::StringHelper::Concat(
        "field type and value type not match in relation expr. ",
        query_rel_node->text());
    return ControlOp::BREAK;
  }

  // pass all these checks
  if (is_invert_field) {
    add_invert_filter(query_rel_node.get());
  } else {
    add_forward_filter(query_rel_node.get(), left->text());
  }
  return ControlOp::CONTINUE;
}

void SearchCondCheckWalker::add_forward_filter(QueryRelNode *query_rel_node,
                                               std::string forward_field_name) {
  forward_filter_field_names_.emplace_back(std::move(forward_field_name));
  filter_rels_.push_back(query_rel_node);
  query_rel_node->set_forward();
}

void SearchCondCheckWalker::add_invert_filter(QueryRelNode *query_rel_node) {
  invert_rels_.push_back(query_rel_node);
  query_rel_node->set_invert();
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_node_walker.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <optional>
#include <string>
#include <vector>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/db/type.h>
#include "db/sqlengine/analyzer/query_node.h"
#include "query_info.h"

namespace zvec::sqlengine {

enum class ControlOp { CONTINUE, BREAK };

class SearchCondCheckWalker {
 public:
  SearchCondCheckWalker(const zvec::CollectionSchema &table_ptr);
  ControlOp traverse_cond_node(const QueryNode::Ptr &query_node,
                               bool or_ancestor = false);


  const std::vector<std::string> &forward_filter_field_names() {
    return forward_filter_field_names_;
  }

  QueryRelNode *vector_rel() const {
    return vector_rel_;
  }

  const std::vector<QueryRelNode *> &invert_rels() const {
    return invert_rels_;
  }

  const std::vector<QueryRelNode *> &filter_rels() const {
    return filter_rels_;
  }

  const std::string err_msg() {
    return err_msg_;
  }

 private:
  ControlOp access(const QueryNode::Ptr &query_node, bool or_ancestor);

  std::optional<ControlOp> check_array_and_contain_compatible(
      const QueryRelNode::Ptr &query_rel_node, const FieldSchema *field,
      bool is_invert_field);

  int func_check(const QueryNode::Ptr &func_node);
  bool left_op_func_check(const QueryRelNode::Ptr &query_node);
  tl::expected<void, std::string> array_length_func_check(
      const QueryFuncNode::Ptr &func_node, const QueryRelNode::Ptr &query_node);
  bool is_arithematic_compare_op(QueryNodeOp op);
  bool check_like(const zvec::FieldSchema &field, QueryRelNode *query_node);

  bool field_type_vs_value_type(zvec::DataType data_type,
                                const QueryNode::Ptr &node);

  bool field_type_vs_list_value_type(zvec::DataType data_type,
                                     const QueryNode::Ptr &node);

  bool check_and_convert_value_type(zvec::DataType data_type,
                                    const QueryNode::Ptr &node);

  bool check_and_convert_list_value_type(zvec::DataType data_type,
                                         const QueryNode::Ptr &node);
  void add_forward_filter(QueryRelNode *query_rel_node,
                          std::string forward_field_name);
  void add_invert_filter(QueryRelNode *query_rel_node);

 private:
  std::string err_msg_;
  const CollectionSchema &table_ptr_;
  std::vector<std::string> forward_filter_field_names_{};

  QueryRelNode *vector_rel_{nullptr};
  std::vector<QueryRelNode *> filter_rels_{};
  std::vector<QueryRelNode *> invert_rels_{};

  static inline const std::string kFeature = "feature";
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_orderby_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_orderby_info.h"

namespace zvec::sqlengine {

QueryOrderbyInfo::QueryOrderbyInfo() {}

QueryOrderbyInfo::QueryOrderbyInfo(const std::string &m_field_name, bool m_desc)
    : field_name_(m_field_name), desc_(m_desc) {}


std::string QueryOrderbyInfo::to_string() const {
  std::string str = field_name_;
  str = str + " " + (desc_ ? "DESC" : "ASC");
  return str;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/query_orderby_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <zvec/db/schema.h>

namespace zvec::sqlengine {

class QueryOrderbyInfo {
 public:
  using Ptr = std::shared_ptr<QueryOrderbyInfo>;

  QueryOrderbyInfo();
  QueryOrderbyInfo(const std::string &m_field_name, bool m_desc);
  ~QueryOrderbyInfo() = default;

  void set_field_name(const std::string &value) {
    field_name_ = value;
  }

  const std::string &field_name() const {
    return field_name_;
  }

  void set_desc() {
    desc_ = true;
  }
  bool is_desc() const {
    return desc_;
  }

  void set_field_schema_ptr(const zvec::FieldSchema *field_schema_ptr) {
    field_schema_ptr_ = field_schema_ptr;
  }
  const zvec::FieldSchema *field_schema_ptr() {
    return field_schema_ptr_;
  }

  std::string to_string() const;

 private:
  std::string field_name_{""};
  bool desc_{false};

  const zvec::FieldSchema *field_schema_ptr_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/simple_rewriter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "simple_rewriter.h"
#include <array>
#include <memory>
#include <vector>
#include "db/sqlengine/analyzer/query_node.h"

namespace zvec::sqlengine {

void SimpleRewriter::rewrite(QueryInfo *query_info) {
  auto query_node = query_info->search_cond();
  if (query_node == nullptr) {
    return;
  }
  std::string before_rewrite = query_node->text();

  EqualOrRewriteRule equal_or_rule;
  ContainRewriteRule contain_rule;
  std::array<RewriteRule *, 2> rewrite_rules{
      &equal_or_rule,
      &contain_rule,
  };
  bool rewrited = false;
  for (auto &rule : rewrite_rules) {
    rewrited = rule->rewrite(query_node) || rewrited;
  }
  if (rewrited) {
    simplify_tree(query_node, query_info);
    std::string after_rewrite = query_info->search_cond()->text();
    LOG_INFO("Rewrite filter. before[%s] after[%s]", before_rewrite.c_str(),
             after_rewrite.c_str());
  }
}

void SimpleRewriter::simplify_tree(QueryNode::Ptr query_node,
                                   QueryInfo *query_info) {
  if (query_node == nullptr ||
      query_node->type() != QueryNode::QueryNodeType::LOGIC_EXPR) {
    return;
  }
  simplify_tree(query_node->left(), query_info);
  simplify_tree(query_node->right(), query_info);
  if (query_node->left() == nullptr) {
    if (query_node->right() == nullptr) {
      query_node->detach_from_search_cond(query_info);
    } else {
      query_node->replace_from_search_cond(query_node->right(), query_info);
    }
  } else {
    if (query_node->right() == nullptr) {
      query_node->replace_from_search_cond(query_node->left(), query_info);
    }
  }
}

bool EqualOrRewriteRule::rewrite(QueryNode::Ptr query_node) {
  rewrite_impl(false, std::move(query_node));
  return rewrited_;
}

void EqualOrRewriteRule::rewrite_impl(bool is_or, QueryNode::Ptr query_node) {
  if (query_node == nullptr) {
    return;
  }
  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {
    bool is_cur_or = query_node->op() == QueryNodeOp::Q_OR;
    if (!is_cur_or) {
      cur_ = nullptr;
    }
    rewrite_impl(is_cur_or, query_node->left());
    rewrite_impl(is_cur_or, query_node->right());
    return;
  }
  if (!is_or) {
    return;
  }
  if (query_node->op() == QueryNodeOp::Q_EQ ||
      query_node->op() == QueryNodeOp::Q_NE) {
    bool is_ne = query_node->op() == QueryNodeOp::Q_NE;
    if (cur_ == nullptr || !cur_->left()->is_matched(*query_node->left())) {
      cur_ = query_node;
    } else {
      if (cur_->op() == QueryNodeOp::Q_IN) {
        QueryListNode::Ptr list =
            std::dynamic_pointer_cast<QueryListNode>(cur_->right());
        if (is_ne == list->exclude()) {
          list->add_value_expr(query_node->right());
          // detach from parent
          query_node->detach_from_parent();
        } else {
          cur_ = query_node;
        }
      } else {  // EQ || NE
        if (query_node->op() == cur_->op()) {
          // create in node
          QueryListNode::Ptr list = std::make_shared<QueryListNode>();
          list->add_value_expr(cur_->right());
          list->add_value_expr(query_node->right());
          list->set_exclude(is_ne);
          auto in_node = std::make_shared<QueryRelNode>();
          in_node->set_left(cur_->left());
          in_node->set_right(std::move(list));
          in_node->set_op(QueryNodeOp::Q_IN);
          // detach from parent
          query_node->detach_from_parent();
          cur_->replace_from_parent(in_node);
          cur_ = std::move(in_node);
          rewrited_ = true;
        } else {
          cur_ = query_node;
        }
      }
    }
  }
}

std::optional<bool> get_predicate_result(const QueryNode *ptr) {
  if (ptr == nullptr) {
    return std::nullopt;
  }
  return ptr->predictate_result();
}

bool ContainRewriteRule::rewrite(QueryNode::Ptr query_node) {
  if (query_node == nullptr) {
    return false;
  }
  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {
    bool rewrited = rewrite(query_node->left()) || rewrite(query_node->right());
    auto left_result = get_predicate_result(query_node->left().get());
    auto right_result = get_predicate_result(query_node->right().get());
    // ContainRewrite can only generate false predict result value
    if (left_result.has_value() || right_result.has_value()) {
      if (query_node->op() == QueryNodeOp::Q_AND) {
        query_node->set_predictate_result(false);
      } else if (query_node->op() == QueryNodeOp::Q_OR) {
        // if left is false
        if (left_result.has_value()) {
          // if right is null or false
          if (right_result.has_value() || query_node->right() == nullptr) {
            query_node->set_predictate_result(false);
          } else {  // if right is not null and not false
            query_node->left()->detach_from_parent();
          }
        } else {
          if (right_result.has_value()) {
            if (query_node->left() == nullptr) {
              // set predict result to false if left is null
              query_node->set_predictate_result(false);
            } else {
              // detach right if left is not null and not false
              query_node->right()->detach_from_parent();
            }
          }
        }
      }
    }
    return rewrited;
  }
  auto op = query_node->op();
  if (op != QueryNodeOp::Q_CONTAIN_ALL && op != QueryNodeOp::Q_CONTAIN_ANY) {
    return false;
  }
  auto list_node =
      std::dynamic_pointer_cast<QueryListNode>(query_node->right());
  if (!list_node->value_expr_list().empty()) {
    return false;
  }
  if ((list_node->exclude() && op == QueryNodeOp::Q_CONTAIN_ALL) ||
      (!list_node->exclude() && op == QueryNodeOp::Q_CONTAIN_ANY)) {
    // `not contain_all ()` evaluates to false
    // `contain_any ()` evaluates to false
    query_node->set_predictate_result(false);
    return true;
  }
  // `contain_all()` or `not contain_any()` rewrite to `is not null`
  query_node->set_op(QueryNodeOp::Q_IS_NOT_NULL);
  auto right = std::make_shared<QueryConstantNode>("");
  right->set_op(QueryNodeOp::Q_NULL_VALUE);
  query_node->set_right(std::move(right));
  return true;
}


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/analyzer/simple_rewriter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/analyzer/query_info.h"

namespace zvec::sqlengine {

class SimpleRewriter {
 public:
  SimpleRewriter() = default;

  //! Rewrite query_info->search_cond and simplify tree
  void rewrite(QueryInfo *query_info);

 private:
  void simplify_tree(QueryNode::Ptr query_node, QueryInfo *query_info);
};

class RewriteRule {
 public:
  RewriteRule() = default;
  //! Rewrite filter, return whether successfully rewrited.
  virtual bool rewrite(QueryNode::Ptr query_node) = 0;

 protected:
  bool rewrited_{false};
};

class EqualOrRewriteRule : public RewriteRule {
 public:
  EqualOrRewriteRule() = default;

  bool rewrite(QueryNode::Ptr query_node) override;

 private:
  void rewrite_impl(bool is_or, QueryNode::Ptr query_node);

 private:
  QueryNode::Ptr cur_;
};

// ContainRewriteRule rewrites contain_all/any ()
class ContainRewriteRule : public RewriteRule {
 public:
  ContainRewriteRule() = default;

  bool rewrite(QueryNode::Ptr query_node) override;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/antlr/SQLLexer.g4
================================================
lexer grammar SQLLexer;

channels{COMMENTS}

OR:                           'OR';
AND:                          'AND';
NOT:                          'NOT';
IN:                           'IN';
CONTAIN_ALL:                  'CONTAIN_ALL';
CONTAIN_ANY:                  'CONTAIN_ANY';
BETWEEN:                      'BETWEEN';
LIKE:                         'LIKE';
WHERE:						            'WHERE';
SELECT:						            'SELECT';
FROM:                         'FROM';
AS:							              'AS';
BY:							              'BY';
ORDER:						            'ORDER';
ASC:						              'ASC';
DESC:						              'DESC';
LIMIT:						            'LIMIT';
TRUE_V:                         'TRUE';
FALSE_V:                        'FALSE';
IS:                           'IS';
NULL_V:                         'NULL';

fragment
UNSIGNED_INTEGER: UNSIGNED_INTEGER_FRAGMENT;
INTEGER: MINUS_SIGN? UNSIGNED_INTEGER;

fragment
APPROXIMATE_NUM_LIT: FLOAT_FRAGMENT ('E' ('+'|'-')? (FLOAT_FRAGMENT | UNSIGNED_INTEGER_FRAGMENT))? ('D' | 'F')?;
FLOAT: MINUS_SIGN? APPROXIMATE_NUM_LIT;

SQUOTA_STRING: '\'' (~('\'' | '\\') | '\\'. )* '\'';
DQUOTA_STRING: '"' (~('"' | '\\') | '\\'. )* '"';


DOT: '.';
LP: '(';
RP: ')';
LMP: '[';
RMP: ']';
ASTERISK: '*';
PLUS_SIGN: '+';
MINUS_SIGN: '-';
COMMA: ',';
SOLIDUS: '/';
MOD: '%';
AT_SIGN: '@';
ASSIGN_OP: ':=';
SHARP_SIGN: '#';

COLON: ':';
SEMI: ';';
LE_OP: '<=';
GE_OP: '>=';
NE_OP: '!=';
CARET_OP: '^';
TILDE_OP: '~';
L_OP: '<';
G_OP: '>';
E_OP: '=';
CONCAT_OP: '||';
UNDERSCORE: '_';

SPACES: [ \t\r\n]+ -> skip;

fragment
SIMPLE_LETTER
    : [A-Z]
    ;

fragment
UNSIGNED_INTEGER_FRAGMENT: [0-9]+ ;

fragment
FLOAT_FRAGMENT
    : UNSIGNED_INTEGER* '.'? UNSIGNED_INTEGER+
    ;


VECTOR
    : LMP (MINUS_SIGN|UNSIGNED_INTEGER_FRAGMENT|FLOAT_FRAGMENT|','| SPACES)+ RMP
    ;

SINGLE_LINE_COMMENT: '--' ~('\r' | '\n')* (NEWLINE | EOF)   -> channel(COMMENTS);
MULTI_LINE_COMMENT: '/*' .*? '*/'                           -> channel(COMMENTS);

fragment
NEWLINE: '\r'? '\n';

REGULAR_ID: (SIMPLE_LETTER | '_' | '-' | [0-9])+;


================================================
FILE: src/db/sqlengine/antlr/SQLParser.g4
================================================
parser grammar SQLParser;

options {tokenVocab=SQLLexer;}

swallow_to_semi
    : ~SEMI+
    ;

compilation_unit
    : (unit_statement (SOLIDUS | SEMI)?)+ EOF
    ;

logic_expr_unit
    : logic_expr EOF
    ;

unit_statement
    : dql_statement
    ;

where_clause
    : WHERE logic_expr
    ;

logic_expr
    : relation_expr
    | logic_expr AND logic_expr
    | logic_expr OR logic_expr
    | enclosed_expr
    ;

enclosed_expr
    : LP logic_expr RP
    ;

relation_expr
    : identifier rel_oper value_expr
    | identifier LIKE value_expr
    | identifier NOT? IN LP in_value_expr_list RP
     //LMP'[' RMP']' only used in vector representation
    | identifier NOT? (CONTAIN_ALL | CONTAIN_ANY) LP in_value_expr_list? RP
    | identifier IS NOT? NULL_V
    | function_call rel_oper value_expr
    ;

rel_oper
    : E_OP
    | ne_op
    | L_OP
    | G_OP
    | le_op
    | ge_op
    ;

value_expr
    : constant
    | function_call
    ;

in_value_expr_list
    : in_value_expr (COMMA in_value_expr)*
    ;

in_value_expr
    : constant_num_and_str
    | bool_value
    ;

constant
    : numeric
    | quoted_string
    | vector_expr
    | bool_value
    ;

constant_num_and_str
    : numeric
    | quoted_string
    ;

matrix
    : LMP VECTOR (COMMA VECTOR)* RMP
    ;

vector_expr
    : VECTOR
    | matrix
    ;

function_value_expr
    : value_expr
    | identifier
    ;

function_call
    : identifier LP (function_value_expr (COMMA function_value_expr)*)? RP
    ;

dql_statement
    : select_statement
    ;

select_statement
    : SELECT selected_elements from_clause where_clause? order_by_clause? limit_clause?
    ;

selected_elements
    : selected_element (COMMA selected_element)*
    ;

selected_element
    : ASTERISK
    | field_name AS? field_alias?
    ;

from_clause
    : FROM tableview_name
    ;

order_by_clause
    : ORDER BY order_by_element (COMMA order_by_element)*
    ;

order_by_element
    : field_name (ASC | DESC)?
    ;

limit_clause
    : LIMIT int_value
    ;


// $>

/********* schema objects names *********/


tableview_name
    : identifier
    ;

field_name
    : identifier
    ;

table_alias
    : identifier
    ;

field_alias
    : AS? identifier
    ;

numeric
    : int_value
    | float_value
    ;

int_value
    : INTEGER
    ;

float_value
    : FLOAT
    ;

quoted_string
    : SQUOTA_STRING
	| DQUOTA_STRING
    ;
bool_value
	: TRUE_V
	| FALSE_V
	;

identifier
    : regular_id
    ;

ne_op
    : NE_OP
    ;

ge_op
    : GE_OP
    | G_OP E_OP
    ;

le_op
    : LE_OP
    | L_OP E_OP
    ;

regular_id
    : REGULAR_ID
	| OR
	| AND
	| NOT
	| IN
	| BETWEEN
	| LIKE
	| WHERE
	| SELECT
	| AS
	| BY
	| ORDER
	| ASC
	| DESC
	| LIMIT
	;


================================================
FILE: src/db/sqlengine/antlr/gen/SQLLexer.cc
================================================

// Generated from SQLLexer.g4 by ANTLR 4.8


#include "SQLLexer.h"


using namespace antlr4;

using namespace antlr4;

SQLLexer::SQLLexer(CharStream *input) : Lexer(input) {
  _interpreter = new atn::LexerATNSimulator(this, _atn, _decisionToDFA,
                                            _sharedContextCache);
}

SQLLexer::~SQLLexer() {
  delete _interpreter;
}

std::string SQLLexer::getGrammarFileName() const {
  return "SQLLexer.g4";
}

const std::vector<std::string> &SQLLexer::getRuleNames() const {
  return _ruleNames;
}

const std::vector<std::string> &SQLLexer::getChannelNames() const {
  return _channelNames;
}

const std::vector<std::string> &SQLLexer::getModeNames() const {
  return _modeNames;
}

const std::vector<std::string> &SQLLexer::getTokenNames() const {
  return _tokenNames;
}

dfa::Vocabulary &SQLLexer::getVocabulary() const {
  return _vocabulary;
}

const std::vector<uint16_t> SQLLexer::getSerializedATN() const {
  return _serializedATN;
}

const atn::ATN &SQLLexer::getATN() const {
  return _atn;
}


// Static vars and initialization.
std::vector<dfa::DFA> SQLLexer::_decisionToDFA;
atn::PredictionContextCache SQLLexer::_sharedContextCache;

// We own the ATN which in turn owns the ATN states.
atn::ATN SQLLexer::_atn;
std::vector<uint16_t> SQLLexer::_serializedATN;

std::vector<std::string> SQLLexer::_ruleNames = {"OR",
                                                 "AND",
                                                 "NOT",
                                                 "IN",
                                                 "CONTAIN_ALL",
                                                 "CONTAIN_ANY",
                                                 "BETWEEN",
                                                 "LIKE",
                                                 "WHERE",
                                                 "SELECT",
                                                 "FROM",
                                                 "AS",
                                                 "BY",
                                                 "ORDER",
                                                 "ASC",
                                                 "DESC",
                                                 "LIMIT",
                                                 "TRUE_V",
                                                 "FALSE_V",
                                                 "IS",
                                                 "NULL_V",
                                                 "UNSIGNED_INTEGER",
                                                 "INTEGER",
                                                 "APPROXIMATE_NUM_LIT",
                                                 "FLOAT",
                                                 "SQUOTA_STRING",
                                                 "DQUOTA_STRING",
                                                 "DOT",
                                                 "LP",
                                                 "RP",
                                                 "LMP",
                                                 "RMP",
                                                 "ASTERISK",
                                                 "PLUS_SIGN",
                                                 "MINUS_SIGN",
                                                 "COMMA",
                                                 "SOLIDUS",
                                                 "MOD",
                                                 "AT_SIGN",
                                                 "ASSIGN_OP",
                                                 "SHARP_SIGN",
                                                 "COLON",
                                                 "SEMI",
                                                 "LE_OP",
                                                 "GE_OP",
                                                 "NE_OP",
                                                 "CARET_OP",
                                                 "TILDE_OP",
                                                 "L_OP",
                                                 "G_OP",
                                                 "E_OP",
                                                 "CONCAT_OP",
                                                 "UNDERSCORE",
                                                 "SPACES",
                                                 "SIMPLE_LETTER",
                                                 "UNSIGNED_INTEGER_FRAGMENT",
                                                 "FLOAT_FRAGMENT",
                                                 "VECTOR",
                                                 "SINGLE_LINE_COMMENT",
                                                 "MULTI_LINE_COMMENT",
                                                 "NEWLINE",
                                                 "REGULAR_ID"};

std::vector<std::string> SQLLexer::_channelNames = {"DEFAULT_TOKEN_CHANNEL",
                                                    "HIDDEN", "COMMENTS"};

std::vector<std::string> SQLLexer::_modeNames = {"DEFAULT_MODE"};

std::vector<std::string> SQLLexer::_literalNames = {"",
                                                    "'OR'",
                                                    "'AND'",
                                                    "'NOT'",
                                                    "'IN'",
                                                    "'CONTAIN_ALL'",
                                                    "'CONTAIN_ANY'",
                                                    "'BETWEEN'",
                                                    "'LIKE'",
                                                    "'WHERE'",
                                                    "'SELECT'",
                                                    "'FROM'",
                                                    "'AS'",
                                                    "'BY'",
                                                    "'ORDER'",
                                                    "'ASC'",
                                                    "'DESC'",
                                                    "'LIMIT'",
                                                    "'TRUE'",
                                                    "'FALSE'",
                                                    "'IS'",
                                                    "'NULL'",
                                                    "",
                                                    "",
                                                    "",
                                                    "",
                                                    "'.'",
                                                    "'('",
                                                    "')'",
                                                    "'['",
                                                    "']'",
                                                    "'*'",
                                                    "'+'",
                                                    "'-'",
                                                    "','",
                                                    "'/'",
                                                    "'%'",
                                                    "'@'",
                                                    "':='",
                                                    "'#'",
                                                    "':'",
                                                    "';'",
                                                    "'<='",
                                                    "'>='",
                                                    "'!='",
                                                    "'^'",
                                                    "'~'",
                                                    "'<'",
                                                    "'>'",
                                                    "'='",
                                                    "'||'",
                                                    "'_'"};

std::vector<std::string> SQLLexer::_symbolicNames = {"",
                                                     "OR",
                                                     "AND",
                                                     "NOT",
                                                     "IN",
                                                     "CONTAIN_ALL",
                                                     "CONTAIN_ANY",
                                                     "BETWEEN",
                                                     "LIKE",
                                                     "WHERE",
                                                     "SELECT",
                                                     "FROM",
                                                     "AS",
                                                     "BY",
                                                     "ORDER",
                                                     "ASC",
                                                     "DESC",
                                                     "LIMIT",
                                                     "TRUE_V",
                                                     "FALSE_V",
                                                     "IS",
                                                     "NULL_V",
                                                     "INTEGER",
                                                     "FLOAT",
                                                     "SQUOTA_STRING",
                                                     "DQUOTA_STRING",
                                                     "DOT",
                                                     "LP",
                                                     "RP",
                                                     "LMP",
                                                     "RMP",
                                                     "ASTERISK",
                                                     "PLUS_SIGN",
                                                     "MINUS_SIGN",
                                                     "COMMA",
                                                     "SOLIDUS",
                                                     "MOD",
                                                     "AT_SIGN",
                                                     "ASSIGN_OP",
                                                     "SHARP_SIGN",
                                                     "COLON",
                                                     "SEMI",
                                                     "LE_OP",
                                                     "GE_OP",
                                                     "NE_OP",
                                                     "CARET_OP",
                                                     "TILDE_OP",
                                                     "L_OP",
                                                     "G_OP",
                                                     "E_OP",
                                                     "CONCAT_OP",
                                                     "UNDERSCORE",
                                                     "SPACES",
                                                     "VECTOR",
                                                     "SINGLE_LINE_COMMENT",
                                                     "MULTI_LINE_COMMENT",
                                                     "REGULAR_ID"};

dfa::Vocabulary SQLLexer::_vocabulary(_literalNames, _symbolicNames);

std::vector<std::string> SQLLexer::_tokenNames;

SQLLexer::Initializer::Initializer() {
  // This code could be in a static initializer lambda, but VS doesn't allow
  // access to private class members from there.
  for (size_t i = 0; i < _symbolicNames.size(); ++i) {
    std::string name = _vocabulary.getLiteralName(i);
    if (name.empty()) {
      name = _vocabulary.getSymbolicName(i);
    }

    if (name.empty()) {
      _tokenNames.push_back("<INVALID>");
    } else {
      _tokenNames.push_back(name);
    }
  }

  _serializedATN = {
      0x3,   0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,
      0x2,   0x3a,   0x1ab,  0x8,    0x1,    0x4,    0x2,    0x9,    0x2,
      0x4,   0x3,    0x9,    0x3,    0x4,    0x4,    0x9,    0x4,    0x4,
      0x5,   0x9,    0x5,    0x4,    0x6,    0x9,    0x6,    0x4,    0x7,
      0x9,   0x7,    0x4,    0x8,    0x9,    0x8,    0x4,    0x9,    0x9,
      0x9,   0x4,    0xa,    0x9,    0xa,    0x4,    0xb,    0x9,    0xb,
      0x4,   0xc,    0x9,    0xc,    0x4,    0xd,    0x9,    0xd,    0x4,
      0xe,   0x9,    0xe,    0x4,    0xf,    0x9,    0xf,    0x4,    0x10,
      0x9,   0x10,   0x4,    0x11,   0x9,    0x11,   0x4,    0x12,   0x9,
      0x12,  0x4,    0x13,   0x9,    0x13,   0x4,    0x14,   0x9,    0x14,
      0x4,   0x15,   0x9,    0x15,   0x4,    0x16,   0x9,    0x16,   0x4,
      0x17,  0x9,    0x17,   0x4,    0x18,   0x9,    0x18,   0x4,    0x19,
      0x9,   0x19,   0x4,    0x1a,   0x9,    0x1a,   0x4,    0x1b,   0x9,
      0x1b,  0x4,    0x1c,   0x9,    0x1c,   0x4,    0x1d,   0x9,    0x1d,
      0x4,   0x1e,   0x9,    0x1e,   0x4,    0x1f,   0x9,    0x1f,   0x4,
      0x20,  0x9,    0x20,   0x4,    0x21,   0x9,    0x21,   0x4,    0x22,
      0x9,   0x22,   0x4,    0x23,   0x9,    0x23,   0x4,    0x24,   0x9,
      0x24,  0x4,    0x25,   0x9,    0x25,   0x4,    0x26,   0x9,    0x26,
      0x4,   0x27,   0x9,    0x27,   0x4,    0x28,   0x9,    0x28,   0x4,
      0x29,  0x9,    0x29,   0x4,    0x2a,   0x9,    0x2a,   0x4,    0x2b,
      0x9,   0x2b,   0x4,    0x2c,   0x9,    0x2c,   0x4,    0x2d,   0x9,
      0x2d,  0x4,    0x2e,   0x9,    0x2e,   0x4,    0x2f,   0x9,    0x2f,
      0x4,   0x30,   0x9,    0x30,   0x4,    0x31,   0x9,    0x31,   0x4,
      0x32,  0x9,    0x32,   0x4,    0x33,   0x9,    0x33,   0x4,    0x34,
      0x9,   0x34,   0x4,    0x35,   0x9,    0x35,   0x4,    0x36,   0x9,
      0x36,  0x4,    0x37,   0x9,    0x37,   0x4,    0x38,   0x9,    0x38,
      0x4,   0x39,   0x9,    0x39,   0x4,    0x3a,   0x9,    0x3a,   0x4,
      0x3b,  0x9,    0x3b,   0x4,    0x3c,   0x9,    0x3c,   0x4,    0x3d,
      0x9,   0x3d,   0x4,    0x3e,   0x9,    0x3e,   0x4,    0x3f,   0x9,
      0x3f,  0x3,    0x2,    0x3,    0x2,    0x3,    0x2,    0x3,    0x3,
      0x3,   0x3,    0x3,    0x3,    0x3,    0x3,    0x3,    0x4,    0x3,
      0x4,   0x3,    0x4,    0x3,    0x4,    0x3,    0x5,    0x3,    0x5,
      0x3,   0x5,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,
      0x6,   0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,
      0x3,   0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,
      0x7,   0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,
      0x3,   0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,
      0x7,   0x3,    0x7,    0x3,    0x7,    0x3,    0x8,    0x3,    0x8,
      0x3,   0x8,    0x3,    0x8,    0x3,    0x8,    0x3,    0x8,    0x3,
      0x8,   0x3,    0x8,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,
      0x3,   0x9,    0x3,    0x9,    0x3,    0xa,    0x3,    0xa,    0x3,
      0xa,   0x3,    0xa,    0x3,    0xa,    0x3,    0xa,    0x3,    0xb,
      0x3,   0xb,    0x3,    0xb,    0x3,    0xb,    0x3,    0xb,    0x3,
      0xb,   0x3,    0xb,    0x3,    0xc,    0x3,    0xc,    0x3,    0xc,
      0x3,   0xc,    0x3,    0xc,    0x3,    0xd,    0x3,    0xd,    0x3,
      0xd,   0x3,    0xe,    0x3,    0xe,    0x3,    0xe,    0x3,    0xf,
      0x3,   0xf,    0x3,    0xf,    0x3,    0xf,    0x3,    0xf,    0x3,
      0xf,   0x3,    0x10,   0x3,    0x10,   0x3,    0x10,   0x3,    0x10,
      0x3,   0x11,   0x3,    0x11,   0x3,    0x11,   0x3,    0x11,   0x3,
      0x11,  0x3,    0x12,   0x3,    0x12,   0x3,    0x12,   0x3,    0x12,
      0x3,   0x12,   0x3,    0x12,   0x3,    0x13,   0x3,    0x13,   0x3,
      0x13,  0x3,    0x13,   0x3,    0x13,   0x3,    0x14,   0x3,    0x14,
      0x3,   0x14,   0x3,    0x14,   0x3,    0x14,   0x3,    0x14,   0x3,
      0x15,  0x3,    0x15,   0x3,    0x15,   0x3,    0x16,   0x3,    0x16,
      0x3,   0x16,   0x3,    0x16,   0x3,    0x16,   0x3,    0x17,   0x3,
      0x17,  0x3,    0x18,   0x5,    0x18,   0xf6,   0xa,    0x18,   0x3,
      0x18,  0x3,    0x18,   0x3,    0x19,   0x3,    0x19,   0x3,    0x19,
      0x5,   0x19,   0xfd,   0xa,    0x19,   0x3,    0x19,   0x3,    0x19,
      0x5,   0x19,   0x101,  0xa,    0x19,   0x5,    0x19,   0x103,  0xa,
      0x19,  0x3,    0x19,   0x5,    0x19,   0x106,  0xa,    0x19,   0x3,
      0x1a,  0x5,    0x1a,   0x109,  0xa,    0x1a,   0x3,    0x1a,   0x3,
      0x1a,  0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1b,
      0x7,   0x1b,   0x111,  0xa,    0x1b,   0xc,    0x1b,   0xe,    0x1b,
      0x114, 0xb,    0x1b,   0x3,    0x1b,   0x3,    0x1b,   0x3,    0x1c,
      0x3,   0x1c,   0x3,    0x1c,   0x3,    0x1c,   0x7,    0x1c,   0x11c,
      0xa,   0x1c,   0xc,    0x1c,   0xe,    0x1c,   0x11f,  0xb,    0x1c,
      0x3,   0x1c,   0x3,    0x1c,   0x3,    0x1d,   0x3,    0x1d,   0x3,
      0x1e,  0x3,    0x1e,   0x3,    0x1f,   0x3,    0x1f,   0x3,    0x20,
      0x3,   0x20,   0x3,    0x21,   0x3,    0x21,   0x3,    0x22,   0x3,
      0x22,  0x3,    0x23,   0x3,    0x23,   0x3,    0x24,   0x3,    0x24,
      0x3,   0x25,   0x3,    0x25,   0x3,    0x26,   0x3,    0x26,   0x3,
      0x27,  0x3,    0x27,   0x3,    0x28,   0x3,    0x28,   0x3,    0x29,
      0x3,   0x29,   0x3,    0x29,   0x3,    0x2a,   0x3,    0x2a,   0x3,
      0x2b,  0x3,    0x2b,   0x3,    0x2c,   0x3,    0x2c,   0x3,    0x2d,
      0x3,   0x2d,   0x3,    0x2d,   0x3,    0x2e,   0x3,    0x2e,   0x3,
      0x2e,  0x3,    0x2f,   0x3,    0x2f,   0x3,    0x2f,   0x3,    0x30,
      0x3,   0x30,   0x3,    0x31,   0x3,    0x31,   0x3,    0x32,   0x3,
      0x32,  0x3,    0x33,   0x3,    0x33,   0x3,    0x34,   0x3,    0x34,
      0x3,   0x35,   0x3,    0x35,   0x3,    0x35,   0x3,    0x36,   0x3,
      0x36,  0x3,    0x37,   0x6,    0x37,   0x15d,  0xa,    0x37,   0xd,
      0x37,  0xe,    0x37,   0x15e,  0x3,    0x37,   0x3,    0x37,   0x3,
      0x38,  0x3,    0x38,   0x3,    0x39,   0x6,    0x39,   0x166,  0xa,
      0x39,  0xd,    0x39,   0xe,    0x39,   0x167,  0x3,    0x3a,   0x7,
      0x3a,  0x16b,  0xa,    0x3a,   0xc,    0x3a,   0xe,    0x3a,   0x16e,
      0xb,   0x3a,   0x3,    0x3a,   0x5,    0x3a,   0x171,  0xa,    0x3a,
      0x3,   0x3a,   0x6,    0x3a,   0x174,  0xa,    0x3a,   0xd,    0x3a,
      0xe,   0x3a,   0x175,  0x3,    0x3b,   0x3,    0x3b,   0x3,    0x3b,
      0x3,   0x3b,   0x3,    0x3b,   0x3,    0x3b,   0x6,    0x3b,   0x17e,
      0xa,   0x3b,   0xd,    0x3b,   0xe,    0x3b,   0x17f,  0x3,    0x3b,
      0x3,   0x3b,   0x3,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x3,
      0x3c,  0x7,    0x3c,   0x188,  0xa,    0x3c,   0xc,    0x3c,   0xe,
      0x3c,  0x18b,  0xb,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x5,
      0x3c,  0x18f,  0xa,    0x3c,   0x3,    0x3c,   0x3,    0x3c,   0x3,
      0x3d,  0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,   0x7,    0x3d,
      0x197, 0xa,    0x3d,   0xc,    0x3d,   0xe,    0x3d,   0x19a,  0xb,
      0x3d,  0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,   0x3,    0x3d,
      0x3,   0x3d,   0x3,    0x3e,   0x5,    0x3e,   0x1a2,  0xa,    0x3e,
      0x3,   0x3e,   0x3,    0x3e,   0x3,    0x3f,   0x3,    0x3f,   0x6,
      0x3f,  0x1a8,  0xa,    0x3f,   0xd,    0x3f,   0xe,    0x3f,   0x1a9,
      0x3,   0x198,  0x2,    0x40,   0x3,    0x3,    0x5,    0x4,    0x7,
      0x5,   0x9,    0x6,    0xb,    0x7,    0xd,    0x8,    0xf,    0x9,
      0x11,  0xa,    0x13,   0xb,    0x15,   0xc,    0x17,   0xd,    0x19,
      0xe,   0x1b,   0xf,    0x1d,   0x10,   0x1f,   0x11,   0x21,   0x12,
      0x23,  0x13,   0x25,   0x14,   0x27,   0x15,   0x29,   0x16,   0x2b,
      0x17,  0x2d,   0x2,    0x2f,   0x18,   0x31,   0x2,    0x33,   0x19,
      0x35,  0x1a,   0x37,   0x1b,   0x39,   0x1c,   0x3b,   0x1d,   0x3d,
      0x1e,  0x3f,   0x1f,   0x41,   0x20,   0x43,   0x21,   0x45,   0x22,
      0x47,  0x23,   0x49,   0x24,   0x4b,   0x25,   0x4d,   0x26,   0x4f,
      0x27,  0x51,   0x28,   0x53,   0x29,   0x55,   0x2a,   0x57,   0x2b,
      0x59,  0x2c,   0x5b,   0x2d,   0x5d,   0x2e,   0x5f,   0x2f,   0x61,
      0x30,  0x63,   0x31,   0x65,   0x32,   0x67,   0x33,   0x69,   0x34,
      0x6b,  0x35,   0x6d,   0x36,   0x6f,   0x2,    0x71,   0x2,    0x73,
      0x2,   0x75,   0x37,   0x77,   0x38,   0x79,   0x39,   0x7b,   0x2,
      0x7d,  0x3a,   0x3,    0x2,    0xb,    0x4,    0x2,    0x2d,   0x2d,
      0x2f,  0x2f,   0x4,    0x2,    0x46,   0x46,   0x48,   0x48,   0x4,
      0x2,   0x29,   0x29,   0x5e,   0x5e,   0x4,    0x2,    0x24,   0x24,
      0x5e,  0x5e,   0x5,    0x2,    0xb,    0xc,    0xf,    0xf,    0x22,
      0x22,  0x3,    0x2,    0x43,   0x5c,   0x3,    0x2,    0x32,   0x3b,
      0x4,   0x2,    0xc,    0xc,    0xf,    0xf,    0x5,    0x2,    0x2f,
      0x2f,  0x32,   0x3b,   0x61,   0x61,   0x2,    0x1be,  0x2,    0x3,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x5,    0x3,    0x2,    0x2,
      0x2,   0x2,    0x7,    0x3,    0x2,    0x2,    0x2,    0x2,    0x9,
      0x3,   0x2,    0x2,    0x2,    0x2,    0xb,    0x3,    0x2,    0x2,
      0x2,   0x2,    0xd,    0x3,    0x2,    0x2,    0x2,    0x2,    0xf,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x11,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x13,   0x3,    0x2,    0x2,    0x2,    0x2,    0x15,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x17,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x19,   0x3,    0x2,    0x2,    0x2,    0x2,    0x1b,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x1d,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x1f,   0x3,    0x2,    0x2,    0x2,    0x2,    0x21,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x23,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x25,   0x3,    0x2,    0x2,    0x2,    0x2,    0x27,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x29,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x2b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x2f,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x33,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x35,   0x3,    0x2,    0x2,    0x2,    0x2,    0x37,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x39,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x3b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x3d,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x3f,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x41,   0x3,    0x2,    0x2,    0x2,    0x2,    0x43,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x45,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x47,   0x3,    0x2,    0x2,    0x2,    0x2,    0x49,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x4b,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x4d,   0x3,    0x2,    0x2,    0x2,    0x2,    0x4f,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x51,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x53,   0x3,    0x2,    0x2,    0x2,    0x2,    0x55,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x57,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x59,   0x3,    0x2,    0x2,    0x2,    0x2,    0x5b,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x5d,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x5f,   0x3,    0x2,    0x2,    0x2,    0x2,    0x61,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x63,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x65,   0x3,    0x2,    0x2,    0x2,    0x2,    0x67,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x69,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x6b,   0x3,    0x2,    0x2,    0x2,    0x2,    0x6d,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x75,   0x3,    0x2,    0x2,
      0x2,   0x2,    0x77,   0x3,    0x2,    0x2,    0x2,    0x2,    0x79,
      0x3,   0x2,    0x2,    0x2,    0x2,    0x7d,   0x3,    0x2,    0x2,
      0x2,   0x3,    0x7f,   0x3,    0x2,    0x2,    0x2,    0x5,    0x82,
      0x3,   0x2,    0x2,    0x2,    0x7,    0x86,   0x3,    0x2,    0x2,
      0x2,   0x9,    0x8a,   0x3,    0x2,    0x2,    0x2,    0xb,    0x8d,
      0x3,   0x2,    0x2,    0x2,    0xd,    0x99,   0x3,    0x2,    0x2,
      0x2,   0xf,    0xa5,   0x3,    0x2,    0x2,    0x2,    0x11,   0xad,
      0x3,   0x2,    0x2,    0x2,    0x13,   0xb2,   0x3,    0x2,    0x2,
      0x2,   0x15,   0xb8,   0x3,    0x2,    0x2,    0x2,    0x17,   0xbf,
      0x3,   0x2,    0x2,    0x2,    0x19,   0xc4,   0x3,    0x2,    0x2,
      0x2,   0x1b,   0xc7,   0x3,    0x2,    0x2,    0x2,    0x1d,   0xca,
      0x3,   0x2,    0x2,    0x2,    0x1f,   0xd0,   0x3,    0x2,    0x2,
      0x2,   0x21,   0xd4,   0x3,    0x2,    0x2,    0x2,    0x23,   0xd9,
      0x3,   0x2,    0x2,    0x2,    0x25,   0xdf,   0x3,    0x2,    0x2,
      0x2,   0x27,   0xe4,   0x3,    0x2,    0x2,    0x2,    0x29,   0xea,
      0x3,   0x2,    0x2,    0x2,    0x2b,   0xed,   0x3,    0x2,    0x2,
      0x2,   0x2d,   0xf2,   0x3,    0x2,    0x2,    0x2,    0x2f,   0xf5,
      0x3,   0x2,    0x2,    0x2,    0x31,   0xf9,   0x3,    0x2,    0x2,
      0x2,   0x33,   0x108,  0x3,    0x2,    0x2,    0x2,    0x35,   0x10c,
      0x3,   0x2,    0x2,    0x2,    0x37,   0x117,  0x3,    0x2,    0x2,
      0x2,   0x39,   0x122,  0x3,    0x2,    0x2,    0x2,    0x3b,   0x124,
      0x3,   0x2,    0x2,    0x2,    0x3d,   0x126,  0x3,    0x2,    0x2,
      0x2,   0x3f,   0x128,  0x3,    0x2,    0x2,    0x2,    0x41,   0x12a,
      0x3,   0x2,    0x2,    0x2,    0x43,   0x12c,  0x3,    0x2,    0x2,
      0x2,   0x45,   0x12e,  0x3,    0x2,    0x2,    0x2,    0x47,   0x130,
      0x3,   0x2,    0x2,    0x2,    0x49,   0x132,  0x3,    0x2,    0x2,
      0x2,   0x4b,   0x134,  0x3,    0x2,    0x2,    0x2,    0x4d,   0x136,
      0x3,   0x2,    0x2,    0x2,    0x4f,   0x138,  0x3,    0x2,    0x2,
      0x2,   0x51,   0x13a,  0x3,    0x2,    0x2,    0x2,    0x53,   0x13d,
      0x3,   0x2,    0x2,    0x2,    0x55,   0x13f,  0x3,    0x2,    0x2,
      0x2,   0x57,   0x141,  0x3,    0x2,    0x2,    0x2,    0x59,   0x143,
      0x3,   0x2,    0x2,    0x2,    0x5b,   0x146,  0x3,    0x2,    0x2,
      0x2,   0x5d,   0x149,  0x3,    0x2,    0x2,    0x2,    0x5f,   0x14c,
      0x3,   0x2,    0x2,    0x2,    0x61,   0x14e,  0x3,    0x2,    0x2,
      0x2,   0x63,   0x150,  0x3,    0x2,    0x2,    0x2,    0x65,   0x152,
      0x3,   0x2,    0x2,    0x2,    0x67,   0x154,  0x3,    0x2,    0x2,
      0x2,   0x69,   0x156,  0x3,    0x2,    0x2,    0x2,    0x6b,   0x159,
      0x3,   0x2,    0x2,    0x2,    0x6d,   0x15c,  0x3,    0x2,    0x2,
      0x2,   0x6f,   0x162,  0x3,    0x2,    0x2,    0x2,    0x71,   0x165,
      0x3,   0x2,    0x2,    0x2,    0x73,   0x16c,  0x3,    0x2,    0x2,
      0x2,   0x75,   0x177,  0x3,    0x2,    0x2,    0x2,    0x77,   0x183,
      0x3,   0x2,    0x2,    0x2,    0x79,   0x192,  0x3,    0x2,    0x2,
      0x2,   0x7b,   0x1a1,  0x3,    0x2,    0x2,    0x2,    0x7d,   0x1a7,
      0x3,   0x2,    0x2,    0x2,    0x7f,   0x80,   0x7,    0x51,   0x2,
      0x2,   0x80,   0x81,   0x7,    0x54,   0x2,    0x2,    0x81,   0x4,
      0x3,   0x2,    0x2,    0x2,    0x82,   0x83,   0x7,    0x43,   0x2,
      0x2,   0x83,   0x84,   0x7,    0x50,   0x2,    0x2,    0x84,   0x85,
      0x7,   0x46,   0x2,    0x2,    0x85,   0x6,    0x3,    0x2,    0x2,
      0x2,   0x86,   0x87,   0x7,    0x50,   0x2,    0x2,    0x87,   0x88,
      0x7,   0x51,   0x2,    0x2,    0x88,   0x89,   0x7,    0x56,   0x2,
      0x2,   0x89,   0x8,    0x3,    0x2,    0x2,    0x2,    0x8a,   0x8b,
      0x7,   0x4b,   0x2,    0x2,    0x8b,   0x8c,   0x7,    0x50,   0x2,
      0x2,   0x8c,   0xa,    0x3,    0x2,    0x2,    0x2,    0x8d,   0x8e,
      0x7,   0x45,   0x2,    0x2,    0x8e,   0x8f,   0x7,    0x51,   0x2,
      0x2,   0x8f,   0x90,   0x7,    0x50,   0x2,    0x2,    0x90,   0x91,
      0x7,   0x56,   0x2,    0x2,    0x91,   0x92,   0x7,    0x43,   0x2,
      0x2,   0x92,   0x93,   0x7,    0x4b,   0x2,    0x2,    0x93,   0x94,
      0x7,   0x50,   0x2,    0x2,    0x94,   0x95,   0x7,    0x61,   0x2,
      0x2,   0x95,   0x96,   0x7,    0x43,   0x2,    0x2,    0x96,   0x97,
      0x7,   0x4e,   0x2,    0x2,    0x97,   0x98,   0x7,    0x4e,   0x2,
      0x2,   0x98,   0xc,    0x3,    0x2,    0x2,    0x2,    0x99,   0x9a,
      0x7,   0x45,   0x2,    0x2,    0x9a,   0x9b,   0x7,    0x51,   0x2,
      0x2,   0x9b,   0x9c,   0x7,    0x50,   0x2,    0x2,    0x9c,   0x9d,
      0x7,   0x56,   0x2,    0x2,    0x9d,   0x9e,   0x7,    0x43,   0x2,
      0x2,   0x9e,   0x9f,   0x7,    0x4b,   0x2,    0x2,    0x9f,   0xa0,
      0x7,   0x50,   0x2,    0x2,    0xa0,   0xa1,   0x7,    0x61,   0x2,
      0x2,   0xa1,   0xa2,   0x7,    0x43,   0x2,    0x2,    0xa2,   0xa3,
      0x7,   0x50,   0x2,    0x2,    0xa3,   0xa4,   0x7,    0x5b,   0x2,
      0x2,   0xa4,   0xe,    0x3,    0x2,    0x2,    0x2,    0xa5,   0xa6,
      0x7,   0x44,   0x2,    0x2,    0xa6,   0xa7,   0x7,    0x47,   0x2,
      0x2,   0xa7,   0xa8,   0x7,    0x56,   0x2,    0x2,    0xa8,   0xa9,
      0x7,   0x59,   0x2,    0x2,    0xa9,   0xaa,   0x7,    0x47,   0x2,
      0x2,   0xaa,   0xab,   0x7,    0x47,   0x2,    0x2,    0xab,   0xac,
      0x7,   0x50,   0x2,    0x2,    0xac,   0x10,   0x3,    0x2,    0x2,
      0x2,   0xad,   0xae,   0x7,    0x4e,   0x2,    0x2,    0xae,   0xaf,
      0x7,   0x4b,   0x2,    0x2,    0xaf,   0xb0,   0x7,    0x4d,   0x2,
      0x2,   0xb0,   0xb1,   0x7,    0x47,   0x2,    0x2,    0xb1,   0x12,
      0x3,   0x2,    0x2,    0x2,    0xb2,   0xb3,   0x7,    0x59,   0x2,
      0x2,   0xb3,   0xb4,   0x7,    0x4a,   0x2,    0x2,    0xb4,   0xb5,
      0x7,   0x47,   0x2,    0x2,    0xb5,   0xb6,   0x7,    0x54,   0x2,
      0x2,   0xb6,   0xb7,   0x7,    0x47,   0x2,    0x2,    0xb7,   0x14,
      0x3,   0x2,    0x2,    0x2,    0xb8,   0xb9,   0x7,    0x55,   0x2,
      0x2,   0xb9,   0xba,   0x7,    0x47,   0x2,    0x2,    0xba,   0xbb,
      0x7,   0x4e,   0x2,    0x2,    0xbb,   0xbc,   0x7,    0x47,   0x2,
      0x2,   0xbc,   0xbd,   0x7,    0x45,   0x2,    0x2,    0xbd,   0xbe,
      0x7,   0x56,   0x2,    0x2,    0xbe,   0x16,   0x3,    0x2,    0x2,
      0x2,   0xbf,   0xc0,   0x7,    0x48,   0x2,    0x2,    0xc0,   0xc1,
      0x7,   0x54,   0x2,    0x2,    0xc1,   0xc2,   0x7,    0x51,   0x2,
      0x2,   0xc2,   0xc3,   0x7,    0x4f,   0x2,    0x2,    0xc3,   0x18,
      0x3,   0x2,    0x2,    0x2,    0xc4,   0xc5,   0x7,    0x43,   0x2,
      0x2,   0xc5,   0xc6,   0x7,    0x55,   0x2,    0x2,    0xc6,   0x1a,
      0x3,   0x2,    0x2,    0x2,    0xc7,   0xc8,   0x7,    0x44,   0x2,
      0x2,   0xc8,   0xc9,   0x7,    0x5b,   0x2,    0x2,    0xc9,   0x1c,
      0x3,   0x2,    0x2,    0x2,    0xca,   0xcb,   0x7,    0x51,   0x2,
      0x2,   0xcb,   0xcc,   0x7,    0x54,   0x2,    0x2,    0xcc,   0xcd,
      0x7,   0x46,   0x2,    0x2,    0xcd,   0xce,   0x7,    0x47,   0x2,
      0x2,   0xce,   0xcf,   0x7,    0x54,   0x2,    0x2,    0xcf,   0x1e,
      0x3,   0x2,    0x2,    0x2,    0xd0,   0xd1,   0x7,    0x43,   0x2,
      0x2,   0xd1,   0xd2,   0x7,    0x55,   0x2,    0x2,    0xd2,   0xd3,
      0x7,   0x45,   0x2,    0x2,    0xd3,   0x20,   0x3,    0x2,    0x2,
      0x2,   0xd4,   0xd5,   0x7,    0x46,   0x2,    0x2,    0xd5,   0xd6,
      0x7,   0x47,   0x2,    0x2,    0xd6,   0xd7,   0x7,    0x55,   0x2,
      0x2,   0xd7,   0xd8,   0x7,    0x45,   0x2,    0x2,    0xd8,   0x22,
      0x3,   0x2,    0x2,    0x2,    0xd9,   0xda,   0x7,    0x4e,   0x2,
      0x2,   0xda,   0xdb,   0x7,    0x4b,   0x2,    0x2,    0xdb,   0xdc,
      0x7,   0x4f,   0x2,    0x2,    0xdc,   0xdd,   0x7,    0x4b,   0x2,
      0x2,   0xdd,   0xde,   0x7,    0x56,   0x2,    0x2,    0xde,   0x24,
      0x3,   0x2,    0x2,    0x2,    0xdf,   0xe0,   0x7,    0x56,   0x2,
      0x2,   0xe0,   0xe1,   0x7,    0x54,   0x2,    0x2,    0xe1,   0xe2,
      0x7,   0x57,   0x2,    0x2,    0xe2,   0xe3,   0x7,    0x47,   0x2,
      0x2,   0xe3,   0x26,   0x3,    0x2,    0x2,    0x2,    0xe4,   0xe5,
      0x7,   0x48,   0x2,    0x2,    0xe5,   0xe6,   0x7,    0x43,   0x2,
      0x2,   0xe6,   0xe7,   0x7,    0x4e,   0x2,    0x2,    0xe7,   0xe8,
      0x7,   0x55,   0x2,    0x2,    0xe8,   0xe9,   0x7,    0x47,   0x2,
      0x2,   0xe9,   0x28,   0x3,    0x2,    0x2,    0x2,    0xea,   0xeb,
      0x7,   0x4b,   0x2,    0x2,    0xeb,   0xec,   0x7,    0x55,   0x2,
      0x2,   0xec,   0x2a,   0x3,    0x2,    0x2,    0x2,    0xed,   0xee,
      0x7,   0x50,   0x2,    0x2,    0xee,   0xef,   0x7,    0x57,   0x2,
      0x2,   0xef,   0xf0,   0x7,    0x4e,   0x2,    0x2,    0xf0,   0xf1,
      0x7,   0x4e,   0x2,    0x2,    0xf1,   0x2c,   0x3,    0x2,    0x2,
      0x2,   0xf2,   0xf3,   0x5,    0x71,   0x39,   0x2,    0xf3,   0x2e,
      0x3,   0x2,    0x2,    0x2,    0xf4,   0xf6,   0x5,    0x47,   0x24,
      0x2,   0xf5,   0xf4,   0x3,    0x2,    0x2,    0x2,    0xf5,   0xf6,
      0x3,   0x2,    0x2,    0x2,    0xf6,   0xf7,   0x3,    0x2,    0x2,
      0x2,   0xf7,   0xf8,   0x5,    0x2d,   0x17,   0x2,    0xf8,   0x30,
      0x3,   0x2,    0x2,    0x2,    0xf9,   0x102,  0x5,    0x73,   0x3a,
      0x2,   0xfa,   0xfc,   0x7,    0x47,   0x2,    0x2,    0xfb,   0xfd,
      0x9,   0x2,    0x2,    0x2,    0xfc,   0xfb,   0x3,    0x2,    0x2,
      0x2,   0xfc,   0xfd,   0x3,    0x2,    0x2,    0x2,    0xfd,   0x100,
      0x3,   0x2,    0x2,    0x2,    0xfe,   0x101,  0x5,    0x73,   0x3a,
      0x2,   0xff,   0x101,  0x5,    0x71,   0x39,   0x2,    0x100,  0xfe,
      0x3,   0x2,    0x2,    0x2,    0x100,  0xff,   0x3,    0x2,    0x2,
      0x2,   0x101,  0x103,  0x3,    0x2,    0x2,    0x2,    0x102,  0xfa,
      0x3,   0x2,    0x2,    0x2,    0x102,  0x103,  0x3,    0x2,    0x2,
      0x2,   0x103,  0x105,  0x3,    0x2,    0x2,    0x2,    0x104,  0x106,
      0x9,   0x3,    0x2,    0x2,    0x105,  0x104,  0x3,    0x2,    0x2,
      0x2,   0x105,  0x106,  0x3,    0x2,    0x2,    0x2,    0x106,  0x32,
      0x3,   0x2,    0x2,    0x2,    0x107,  0x109,  0x5,    0x47,   0x24,
      0x2,   0x108,  0x107,  0x3,    0x2,    0x2,    0x2,    0x108,  0x109,
      0x3,   0x2,    0x2,    0x2,    0x109,  0x10a,  0x3,    0x2,    0x2,
      0x2,   0x10a,  0x10b,  0x5,    0x31,   0x19,   0x2,    0x10b,  0x34,
      0x3,   0x2,    0x2,    0x2,    0x10c,  0x112,  0x7,    0x29,   0x2,
      0x2,   0x10d,  0x111,  0xa,    0x4,    0x2,    0x2,    0x10e,  0x10f,
      0x7,   0x5e,   0x2,    0x2,    0x10f,  0x111,  0xb,    0x2,    0x2,
      0x2,   0x110,  0x10d,  0x3,    0x2,    0x2,    0x2,    0x110,  0x10e,
      0x3,   0x2,    0x2,    0x2,    0x111,  0x114,  0x3,    0x2,    0x2,
      0x2,   0x112,  0x110,  0x3,    0x2,    0x2,    0x2,    0x112,  0x113,
      0x3,   0x2,    0x2,    0x2,    0x113,  0x115,  0x3,    0x2,    0x2,
      0x2,   0x114,  0x112,  0x3,    0x2,    0x2,    0x2,    0x115,  0x116,
      0x7,   0x29,   0x2,    0x2,    0x116,  0x36,   0x3,    0x2,    0x2,
      0x2,   0x117,  0x11d,  0x7,    0x24,   0x2,    0x2,    0x118,  0x11c,
      0xa,   0x5,    0x2,    0x2,    0x119,  0x11a,  0x7,    0x5e,   0x2,
      0x2,   0x11a,  0x11c,  0xb,    0x2,    0x2,    0x2,    0x11b,  0x118,
      0x3,   0x2,    0x2,    0x2,    0x11b,  0x119,  0x3,    0x2,    0x2,
      0x2,   0x11c,  0x11f,  0x3,    0x2,    0x2,    0x2,    0x11d,  0x11b,
      0x3,   0x2,    0x2,    0x2,    0x11d,  0x11e,  0x3,    0x2,    0x2,
      0x2,   0x11e,  0x120,  0x3,    0x2,    0x2,    0x2,    0x11f,  0x11d,
      0x3,   0x2,    0x2,    0x2,    0x120,  0x121,  0x7,    0x24,   0x2,
      0x2,   0x121,  0x38,   0x3,    0x2,    0x2,    0x2,    0x122,  0x123,
      0x7,   0x30,   0x2,    0x2,    0x123,  0x3a,   0x3,    0x2,    0x2,
      0x2,   0x124,  0x125,  0x7,    0x2a,   0x2,    0x2,    0x125,  0x3c,
      0x3,   0x2,    0x2,    0x2,    0x126,  0x127,  0x7,    0x2b,   0x2,
      0x2,   0x127,  0x3e,   0x3,    0x2,    0x2,    0x2,    0x128,  0x129,
      0x7,   0x5d,   0x2,    0x2,    0x129,  0x40,   0x3,    0x2,    0x2,
      0x2,   0x12a,  0x12b,  0x7,    0x5f,   0x2,    0x2,    0x12b,  0x42,
      0x3,   0x2,    0x2,    0x2,    0x12c,  0x12d,  0x7,    0x2c,   0x2,
      0x2,   0x12d,  0x44,   0x3,    0x2,    0x2,    0x2,    0x12e,  0x12f,
      0x7,   0x2d,   0x2,    0x2,    0x12f,  0x46,   0x3,    0x2,    0x2,
      0x2,   0x130,  0x131,  0x7,    0x2f,   0x2,    0x2,    0x131,  0x48,
      0x3,   0x2,    0x2,    0x2,    0x132,  0x133,  0x7,    0x2e,   0x2,
      0x2,   0x133,  0x4a,   0x3,    0x2,    0x2,    0x2,    0x134,  0x135,
      0x7,   0x31,   0x2,    0x2,    0x135,  0x4c,   0x3,    0x2,    0x2,
      0x2,   0x136,  0x137,  0x7,    0x27,   0x2,    0x2,    0x137,  0x4e,
      0x3,   0x2,    0x2,    0x2,    0x138,  0x139,  0x7,    0x42,   0x2,
      0x2,   0x139,  0x50,   0x3,    0x2,    0x2,    0x2,    0x13a,  0x13b,
      0x7,   0x3c,   0x2,    0x2,    0x13b,  0x13c,  0x7,    0x3f,   0x2,
      0x2,   0x13c,  0x52,   0x3,    0x2,    0x2,    0x2,    0x13d,  0x13e,
      0x7,   0x25,   0x2,    0x2,    0x13e,  0x54,   0x3,    0x2,    0x2,
      0x2,   0x13f,  0x140,  0x7,    0x3c,   0x2,    0x2,    0x140,  0x56,
      0x3,   0x2,    0x2,    0x2,    0x141,  0x142,  0x7,    0x3d,   0x2,
      0x2,   0x142,  0x58,   0x3,    0x2,    0x2,    0x2,    0x143,  0x144,
      0x7,   0x3e,   0x2,    0x2,    0x144,  0x145,  0x7,    0x3f,   0x2,
      0x2,   0x145,  0x5a,   0x3,    0x2,    0x2,    0x2,    0x146,  0x147,
      0x7,   0x40,   0x2,    0x2,    0x147,  0x148,  0x7,    0x3f,   0x2,
      0x2,   0x148,  0x5c,   0x3,    0x2,    0x2,    0x2,    0x149,  0x14a,
      0x7,   0x23,   0x2,    0x2,    0x14a,  0x14b,  0x7,    0x3f,   0x2,
      0x2,   0x14b,  0x5e,   0x3,    0x2,    0x2,    0x2,    0x14c,  0x14d,
      0x7,   0x60,   0x2,    0x2,    0x14d,  0x60,   0x3,    0x2,    0x2,
      0x2,   0x14e,  0x14f,  0x7,    0x80,   0x2,    0x2,    0x14f,  0x62,
      0x3,   0x2,    0x2,    0x2,    0x150,  0x151,  0x7,    0x3e,   0x2,
      0x2,   0x151,  0x64,   0x3,    0x2,    0x2,    0x2,    0x152,  0x153,
      0x7,   0x40,   0x2,    0x2,    0x153,  0x66,   0x3,    0x2,    0x2,
      0x2,   0x154,  0x155,  0x7,    0x3f,   0x2,    0x2,    0x155,  0x68,
      0x3,   0x2,    0x2,    0x2,    0x156,  0x157,  0x7,    0x7e,   0x2,
      0x2,   0x157,  0x158,  0x7,    0x7e,   0x2,    0x2,    0x158,  0x6a,
      0x3,   0x2,    0x2,    0x2,    0x159,  0x15a,  0x7,    0x61,   0x2,
      0x2,   0x15a,  0x6c,   0x3,    0x2,    0x2,    0x2,    0x15b,  0x15d,
      0x9,   0x6,    0x2,    0x2,    0x15c,  0x15b,  0x3,    0x2,    0x2,
      0x2,   0x15d,  0x15e,  0x3,    0x2,    0x2,    0x2,    0x15e,  0x15c,
      0x3,   0x2,    0x2,    0x2,    0x15e,  0x15f,  0x3,    0x2,    0x2,
      0x2,   0x15f,  0x160,  0x3,    0x2,    0x2,    0x2,    0x160,  0x161,
      0x8,   0x37,   0x2,    0x2,    0x161,  0x6e,   0x3,    0x2,    0x2,
      0x2,   0x162,  0x163,  0x9,    0x7,    0x2,    0x2,    0x163,  0x70,
      0x3,   0x2,    0x2,    0x2,    0x164,  0x166,  0x9,    0x8,    0x2,
      0x2,   0x165,  0x164,  0x3,    0x2,    0x2,    0x2,    0x166,  0x167,
      0x3,   0x2,    0x2,    0x2,    0x167,  0x165,  0x3,    0x2,    0x2,
      0x2,   0x167,  0x168,  0x3,    0x2,    0x2,    0x2,    0x168,  0x72,
      0x3,   0x2,    0x2,    0x2,    0x169,  0x16b,  0x5,    0x2d,   0x17,
      0x2,   0x16a,  0x169,  0x3,    0x2,    0x2,    0x2,    0x16b,  0x16e,
      0x3,   0x2,    0x2,    0x2,    0x16c,  0x16a,  0x3,    0x2,    0x2,
      0x2,   0x16c,  0x16d,  0x3,    0x2,    0x2,    0x2,    0x16d,  0x170,
      0x3,   0x2,    0x2,    0x2,    0x16e,  0x16c,  0x3,    0x2,    0x2,
      0x2,   0x16f,  0x171,  0x7,    0x30,   0x2,    0x2,    0x170,  0x16f,
      0x3,   0x2,    0x2,    0x2,    0x170,  0x171,  0x3,    0x2,    0x2,
      0x2,   0x171,  0x173,  0x3,    0x2,    0x2,    0x2,    0x172,  0x174,
      0x5,   0x2d,   0x17,   0x2,    0x173,  0x172,  0x3,    0x2,    0x2,
      0x2,   0x174,  0x175,  0x3,    0x2,    0x2,    0x2,    0x175,  0x173,
      0x3,   0x2,    0x2,    0x2,    0x175,  0x176,  0x3,    0x2,    0x2,
      0x2,   0x176,  0x74,   0x3,    0x2,    0x2,    0x2,    0x177,  0x17d,
      0x5,   0x3f,   0x20,   0x2,    0x178,  0x17e,  0x5,    0x47,   0x24,
      0x2,   0x179,  0x17e,  0x5,    0x71,   0x39,   0x2,    0x17a,  0x17e,
      0x5,   0x73,   0x3a,   0x2,    0x17b,  0x17e,  0x7,    0x2e,   0x2,
      0x2,   0x17c,  0x17e,  0x5,    0x6d,   0x37,   0x2,    0x17d,  0x178,
      0x3,   0x2,    0x2,    0x2,    0x17d,  0x179,  0x3,    0x2,    0x2,
      0x2,   0x17d,  0x17a,  0x3,    0x2,    0x2,    0x2,    0x17d,  0x17b,
      0x3,   0x2,    0x2,    0x2,    0x17d,  0x17c,  0x3,    0x2,    0x2,
      0x2,   0x17e,  0x17f,  0x3,    0x2,    0x2,    0x2,    0x17f,  0x17d,
      0x3,   0x2,    0x2,    0x2,    0x17f,  0x180,  0x3,    0x2,    0x2,
      0x2,   0x180,  0x181,  0x3,    0x2,    0x2,    0x2,    0x181,  0x182,
      0x5,   0x41,   0x21,   0x2,    0x182,  0x76,   0x3,    0x2,    0x2,
      0x2,   0x183,  0x184,  0x7,    0x2f,   0x2,    0x2,    0x184,  0x185,
      0x7,   0x2f,   0x2,    0x2,    0x185,  0x189,  0x3,    0x2,    0x2,
      0x2,   0x186,  0x188,  0xa,    0x9,    0x2,    0x2,    0x187,  0x186,
      0x3,   0x2,    0x2,    0x2,    0x188,  0x18b,  0x3,    0x2,    0x2,
      0x2,   0x189,  0x187,  0x3,    0x2,    0x2,    0x2,    0x189,  0x18a,
      0x3,   0x2,    0x2,    0x2,    0x18a,  0x18e,  0x3,    0x2,    0x2,
      0x2,   0x18b,  0x189,  0x3,    0x2,    0x2,    0x2,    0x18c,  0x18f,
      0x5,   0x7b,   0x3e,   0x2,    0x18d,  0x18f,  0x7,    0x2,    0x2,
      0x3,   0x18e,  0x18c,  0x3,    0x2,    0x2,    0x2,    0x18e,  0x18d,
      0x3,   0x2,    0x2,    0x2,    0x18f,  0x190,  0x3,    0x2,    0x2,
      0x2,   0x190,  0x191,  0x8,    0x3c,   0x3,    0x2,    0x191,  0x78,
      0x3,   0x2,    0x2,    0x2,    0x192,  0x193,  0x7,    0x31,   0x2,
      0x2,   0x193,  0x194,  0x7,    0x2c,   0x2,    0x2,    0x194,  0x198,
      0x3,   0x2,    0x2,    0x2,    0x195,  0x197,  0xb,    0x2,    0x2,
      0x2,   0x196,  0x195,  0x3,    0x2,    0x2,    0x2,    0x197,  0x19a,
      0x3,   0x2,    0x2,    0x2,    0x198,  0x199,  0x3,    0x2,    0x2,
      0x2,   0x198,  0x196,  0x3,    0x2,    0x2,    0x2,    0x199,  0x19b,
      0x3,   0x2,    0x2,    0x2,    0x19a,  0x198,  0x3,    0x2,    0x2,
      0x2,   0x19b,  0x19c,  0x7,    0x2c,   0x2,    0x2,    0x19c,  0x19d,
      0x7,   0x31,   0x2,    0x2,    0x19d,  0x19e,  0x3,    0x2,    0x2,
      0x2,   0x19e,  0x19f,  0x8,    0x3d,   0x3,    0x2,    0x19f,  0x7a,
      0x3,   0x2,    0x2,    0x2,    0x1a0,  0x1a2,  0x7,    0xf,    0x2,
      0x2,   0x1a1,  0x1a0,  0x3,    0x2,    0x2,    0x2,    0x1a1,  0x1a2,
      0x3,   0x2,    0x2,    0x2,    0x1a2,  0x1a3,  0x3,    0x2,    0x2,
      0x2,   0x1a3,  0x1a4,  0x7,    0xc,    0x2,    0x2,    0x1a4,  0x7c,
      0x3,   0x2,    0x2,    0x2,    0x1a5,  0x1a8,  0x5,    0x6f,   0x38,
      0x2,   0x1a6,  0x1a8,  0x9,    0xa,    0x2,    0x2,    0x1a7,  0x1a5,
      0x3,   0x2,    0x2,    0x2,    0x1a7,  0x1a6,  0x3,    0x2,    0x2,
      0x2,   0x1a8,  0x1a9,  0x3,    0x2,    0x2,    0x2,    0x1a9,  0x1a7,
      0x3,   0x2,    0x2,    0x2,    0x1a9,  0x1aa,  0x3,    0x2,    0x2,
      0x2,   0x1aa,  0x7e,   0x3,    0x2,    0x2,    0x2,    0x1a,   0x2,
      0xf5,  0xfc,   0x100,  0x102,  0x105,  0x108,  0x110,  0x112,  0x11b,
      0x11d, 0x15e,  0x167,  0x16c,  0x170,  0x175,  0x17d,  0x17f,  0x189,
      0x18e, 0x198,  0x1a1,  0x1a7,  0x1a9,  0x4,    0x8,    0x2,    0x2,
      0x2,   0x4,    0x2,
  };

  atn::ATNDeserializer deserializer;
  _atn = deserializer.deserialize(_serializedATN);

  size_t count = _atn.getNumberOfDecisions();
  _decisionToDFA.reserve(count);
  for (size_t i = 0; i < count; i++) {
    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);
  }
}

SQLLexer::Initializer SQLLexer::_init;


================================================
FILE: src/db/sqlengine/antlr/gen/SQLLexer.h
================================================

// Generated from SQLLexer.g4 by ANTLR 4.8

#pragma once


#include "antlr4-runtime.h"


namespace antlr4 {


class SQLLexer : public antlr4::Lexer {
 public:
  enum {
    OR = 1,
    AND = 2,
    NOT = 3,
    IN = 4,
    CONTAIN_ALL = 5,
    CONTAIN_ANY = 6,
    BETWEEN = 7,
    LIKE = 8,
    WHERE = 9,
    SELECT = 10,
    FROM = 11,
    AS = 12,
    BY = 13,
    ORDER = 14,
    ASC = 15,
    DESC = 16,
    LIMIT = 17,
    TRUE_V = 18,
    FALSE_V = 19,
    IS = 20,
    NULL_V = 21,
    INTEGER = 22,
    FLOAT = 23,
    SQUOTA_STRING = 24,
    DQUOTA_STRING = 25,
    DOT = 26,
    LP = 27,
    RP = 28,
    LMP = 29,
    RMP = 30,
    ASTERISK = 31,
    PLUS_SIGN = 32,
    MINUS_SIGN = 33,
    COMMA = 34,
    SOLIDUS = 35,
    MOD = 36,
    AT_SIGN = 37,
    ASSIGN_OP = 38,
    SHARP_SIGN = 39,
    COLON = 40,
    SEMI = 41,
    LE_OP = 42,
    GE_OP = 43,
    NE_OP = 44,
    CARET_OP = 45,
    TILDE_OP = 46,
    L_OP = 47,
    G_OP = 48,
    E_OP = 49,
    CONCAT_OP = 50,
    UNDERSCORE = 51,
    SPACES = 52,
    VECTOR = 53,
    SINGLE_LINE_COMMENT = 54,
    MULTI_LINE_COMMENT = 55,
    REGULAR_ID = 56
  };

  enum { COMMENTS = 2 };

  SQLLexer(antlr4::CharStream *input);
  ~SQLLexer();

  virtual std::string getGrammarFileName() const override;
  virtual const std::vector<std::string> &getRuleNames() const override;

  virtual const std::vector<std::string> &getChannelNames() const override;
  virtual const std::vector<std::string> &getModeNames() const override;
  virtual const std::vector<std::string> &getTokenNames()
      const override;  // deprecated, use vocabulary instead
  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;

  virtual const std::vector<uint16_t> getSerializedATN() const override;
  virtual const antlr4::atn::ATN &getATN() const override;

 private:
  static std::vector<antlr4::dfa::DFA> _decisionToDFA;
  static antlr4::atn::PredictionContextCache _sharedContextCache;
  static std::vector<std::string> _ruleNames;
  static std::vector<std::string> _tokenNames;
  static std::vector<std::string> _channelNames;
  static std::vector<std::string> _modeNames;

  static std::vector<std::string> _literalNames;
  static std::vector<std::string> _symbolicNames;
  static antlr4::dfa::Vocabulary _vocabulary;
  static antlr4::atn::ATN _atn;
  static std::vector<uint16_t> _serializedATN;


  // Individual action functions triggered by action() above.

  // Individual semantic predicate functions triggered by sempred() above.

  struct Initializer {
    Initializer();
  };
  static Initializer _init;
};

}  // namespace antlr4


================================================
FILE: src/db/sqlengine/antlr/gen/SQLLexer.interp
================================================
token literal names:
null
'OR'
'AND'
'NOT'
'IN'
'CONTAIN_ALL'
'CONTAIN_ANY'
'BETWEEN'
'LIKE'
'WHERE'
'SELECT'
'FROM'
'AS'
'BY'
'ORDER'
'ASC'
'DESC'
'LIMIT'
'TRUE'
'FALSE'
'IS'
'NULL'
null
null
null
null
'.'
'('
')'
'['
']'
'*'
'+'
'-'
','
'/'
'%'
'@'
':='
'#'
':'
';'
'<='
'>='
'!='
'^'
'~'
'<'
'>'
'='
'||'
'_'
null
null
null
null
null

token symbolic names:
null
OR
AND
NOT
IN
CONTAIN_ALL
CONTAIN_ANY
BETWEEN
LIKE
WHERE
SELECT
FROM
AS
BY
ORDER
ASC
DESC
LIMIT
TRUE_V
FALSE_V
IS
NULL_V
INTEGER
FLOAT
SQUOTA_STRING
DQUOTA_STRING
DOT
LP
RP
LMP
RMP
ASTERISK
PLUS_SIGN
MINUS_SIGN
COMMA
SOLIDUS
MOD
AT_SIGN
ASSIGN_OP
SHARP_SIGN
COLON
SEMI
LE_OP
GE_OP
NE_OP
CARET_OP
TILDE_OP
L_OP
G_OP
E_OP
CONCAT_OP
UNDERSCORE
SPACES
VECTOR
SINGLE_LINE_COMMENT
MULTI_LINE_COMMENT
REGULAR_ID

rule names:
OR
AND
NOT
IN
CONTAIN_ALL
CONTAIN_ANY
BETWEEN
LIKE
WHERE
SELECT
FROM
AS
BY
ORDER
ASC
DESC
LIMIT
TRUE_V
FALSE_V
IS
NULL_V
UNSIGNED_INTEGER
INTEGER
APPROXIMATE_NUM_LIT
FLOAT
SQUOTA_STRING
DQUOTA_STRING
DOT
LP
RP
LMP
RMP
ASTERISK
PLUS_SIGN
MINUS_SIGN
COMMA
SOLIDUS
MOD
AT_SIGN
ASSIGN_OP
SHARP_SIGN
COLON
SEMI
LE_OP
GE_OP
NE_OP
CARET_OP
TILDE_OP
L_OP
G_OP
E_OP
CONCAT_OP
UNDERSCORE
SPACES
SIMPLE_LETTER
UNSIGNED_INTEGER_FRAGMENT
FLOAT_FRAGMENT
VECTOR
SINGLE_LINE_COMMENT
MULTI_LINE_COMMENT
NEWLINE
REGULAR_ID

channel names:
DEFAULT_TOKEN_CHANNEL
HIDDEN
null
null
COMMENTS

mode names:
DEFAULT_MODE

atn:
[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 58, 427, 8, 1, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 4, 42, 9, 42, 4, 43, 9, 43, 4, 44, 9, 44, 4, 45, 9, 45, 4, 46, 9, 46, 4, 47, 9, 47, 4, 48, 9, 48, 4, 49, 9, 49, 4, 50, 9, 50, 4, 51, 9, 51, 4, 52, 9, 52, 4, 53, 9, 53, 4, 54, 9, 54, 4, 55, 9, 55, 4, 56, 9, 56, 4, 57, 9, 57, 4, 58, 9, 58, 4, 59, 9, 59, 4, 60, 9, 60, 4, 61, 9, 61, 4, 62, 9, 62, 4, 63, 9, 63, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 11, 3, 12, 3, 12, 3, 12, 3, 12, 3, 12, 3, 13, 3, 13, 3, 13, 3, 14, 3, 14, 3, 14, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 15, 3, 16, 3, 16, 3, 16, 3, 16, 3, 17, 3, 17, 3, 17, 3, 17, 3, 17, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 22, 3, 22, 3, 22, 3, 22, 3, 22, 3, 23, 3, 23, 3, 24, 5, 24, 246, 10, 24, 3, 24, 3, 24, 3, 25, 3, 25, 3, 25, 5, 25, 253, 10, 25, 3, 25, 3, 25, 5, 25, 257, 10, 25, 5, 25, 259, 10, 25, 3, 25, 5, 25, 262, 10, 25, 3, 26, 5, 26, 265, 10, 26, 3, 26, 3, 26, 3, 27, 3, 27, 3, 27, 3, 27, 7, 27, 273, 10, 27, 12, 27, 14, 27, 276, 11, 27, 3, 27, 3, 27, 3, 28, 3, 28, 3, 28, 3, 28, 7, 28, 284, 10, 28, 12, 28, 14, 28, 287, 11, 28, 3, 28, 3, 28, 3, 29, 3, 29, 3, 30, 3, 30, 3, 31, 3, 31, 3, 32, 3, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 35, 3, 35, 3, 36, 3, 36, 3, 37, 3, 37, 3, 38, 3, 38, 3, 39, 3, 39, 3, 40, 3, 40, 3, 41, 3, 41, 3, 41, 3, 42, 3, 42, 3, 43, 3, 43, 3, 44, 3, 44, 3, 45, 3, 45, 3, 45, 3, 46, 3, 46, 3, 46, 3, 47, 3, 47, 3, 47, 3, 48, 3, 48, 3, 49, 3, 49, 3, 50, 3, 50, 3, 51, 3, 51, 3, 52, 3, 52, 3, 53, 3, 53, 3, 53, 3, 54, 3, 54, 3, 55, 6, 55, 349, 10, 55, 13, 55, 14, 55, 350, 3, 55, 3, 55, 3, 56, 3, 56, 3, 57, 6, 57, 358, 10, 57, 13, 57, 14, 57, 359, 3, 58, 7, 58, 363, 10, 58, 12, 58, 14, 58, 366, 11, 58, 3, 58, 5, 58, 369, 10, 58, 3, 58, 6, 58, 372, 10, 58, 13, 58, 14, 58, 373, 3, 59, 3, 59, 3, 59, 3, 59, 3, 59, 3, 59, 6, 59, 382, 10, 59, 13, 59, 14, 59, 383, 3, 59, 3, 59, 3, 60, 3, 60, 3, 60, 3, 60, 7, 60, 392, 10, 60, 12, 60, 14, 60, 395, 11, 60, 3, 60, 3, 60, 5, 60, 399, 10, 60, 3, 60, 3, 60, 3, 61, 3, 61, 3, 61, 3, 61, 7, 61, 407, 10, 61, 12, 61, 14, 61, 410, 11, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 61, 3, 62, 5, 62, 418, 10, 62, 3, 62, 3, 62, 3, 63, 3, 63, 6, 63, 424, 10, 63, 13, 63, 14, 63, 425, 3, 408, 2, 64, 3, 3, 5, 4, 7, 5, 9, 6, 11, 7, 13, 8, 15, 9, 17, 10, 19, 11, 21, 12, 23, 13, 25, 14, 27, 15, 29, 16, 31, 17, 33, 18, 35, 19, 37, 20, 39, 21, 41, 22, 43, 23, 45, 2, 47, 24, 49, 2, 51, 25, 53, 26, 55, 27, 57, 28, 59, 29, 61, 30, 63, 31, 65, 32, 67, 33, 69, 34, 71, 35, 73, 36, 75, 37, 77, 38, 79, 39, 81, 40, 83, 41, 85, 42, 87, 43, 89, 44, 91, 45, 93, 46, 95, 47, 97, 48, 99, 49, 101, 50, 103, 51, 105, 52, 107, 53, 109, 54, 111, 2, 113, 2, 115, 2, 117, 55, 119, 56, 121, 57, 123, 2, 125, 58, 3, 2, 11, 4, 2, 45, 45, 47, 47, 4, 2, 70, 70, 72, 72, 4, 2, 41, 41, 94, 94, 4, 2, 36, 36, 94, 94, 5, 2, 11, 12, 15, 15, 34, 34, 3, 2, 67, 92, 3, 2, 50, 59, 4, 2, 12, 12, 15, 15, 5, 2, 47, 47, 50, 59, 97, 97, 2, 446, 2, 3, 3, 2, 2, 2, 2, 5, 3, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 9, 3, 2, 2, 2, 2, 11, 3, 2, 2, 2, 2, 13, 3, 2, 2, 2, 2, 15, 3, 2, 2, 2, 2, 17, 3, 2, 2, 2, 2, 19, 3, 2, 2, 2, 2, 21, 3, 2, 2, 2, 2, 23, 3, 2, 2, 2, 2, 25, 3, 2, 2, 2, 2, 27, 3, 2, 2, 2, 2, 29, 3, 2, 2, 2, 2, 31, 3, 2, 2, 2, 2, 33, 3, 2, 2, 2, 2, 35, 3, 2, 2, 2, 2, 37, 3, 2, 2, 2, 2, 39, 3, 2, 2, 2, 2, 41, 3, 2, 2, 2, 2, 43, 3, 2, 2, 2, 2, 47, 3, 2, 2, 2, 2, 51, 3, 2, 2, 2, 2, 53, 3, 2, 2, 2, 2, 55, 3, 2, 2, 2, 2, 57, 3, 2, 2, 2, 2, 59, 3, 2, 2, 2, 2, 61, 3, 2, 2, 2, 2, 63, 3, 2, 2, 2, 2, 65, 3, 2, 2, 2, 2, 67, 3, 2, 2, 2, 2, 69, 3, 2, 2, 2, 2, 71, 3, 2, 2, 2, 2, 73, 3, 2, 2, 2, 2, 75, 3, 2, 2, 2, 2, 77, 3, 2, 2, 2, 2, 79, 3, 2, 2, 2, 2, 81, 3, 2, 2, 2, 2, 83, 3, 2, 2, 2, 2, 85, 3, 2, 2, 2, 2, 87, 3, 2, 2, 2, 2, 89, 3, 2, 2, 2, 2, 91, 3, 2, 2, 2, 2, 93, 3, 2, 2, 2, 2, 95, 3, 2, 2, 2, 2, 97, 3, 2, 2, 2, 2, 99, 3, 2, 2, 2, 2, 101, 3, 2, 2, 2, 2, 103, 3, 2, 2, 2, 2, 105, 3, 2, 2, 2, 2, 107, 3, 2, 2, 2, 2, 109, 3, 2, 2, 2, 2, 117, 3, 2, 2, 2, 2, 119, 3, 2, 2, 2, 2, 121, 3, 2, 2, 2, 2, 125, 3, 2, 2, 2, 3, 127, 3, 2, 2, 2, 5, 130, 3, 2, 2, 2, 7, 134, 3, 2, 2, 2, 9, 138, 3, 2, 2, 2, 11, 141, 3, 2, 2, 2, 13, 153, 3, 2, 2, 2, 15, 165, 3, 2, 2, 2, 17, 173, 3, 2, 2, 2, 19, 178, 3, 2, 2, 2, 21, 184, 3, 2, 2, 2, 23, 191, 3, 2, 2, 2, 25, 196, 3, 2, 2, 2, 27, 199, 3, 2, 2, 2, 29, 202, 3, 2, 2, 2, 31, 208, 3, 2, 2, 2, 33, 212, 3, 2, 2, 2, 35, 217, 3, 2, 2, 2, 37, 223, 3, 2, 2, 2, 39, 228, 3, 2, 2, 2, 41, 234, 3, 2, 2, 2, 43, 237, 3, 2, 2, 2, 45, 242, 3, 2, 2, 2, 47, 245, 3, 2, 2, 2, 49, 249, 3, 2, 2, 2, 51, 264, 3, 2, 2, 2, 53, 268, 3, 2, 2, 2, 55, 279, 3, 2, 2, 2, 57, 290, 3, 2, 2, 2, 59, 292, 3, 2, 2, 2, 61, 294, 3, 2, 2, 2, 63, 296, 3, 2, 2, 2, 65, 298, 3, 2, 2, 2, 67, 300, 3, 2, 2, 2, 69, 302, 3, 2, 2, 2, 71, 304, 3, 2, 2, 2, 73, 306, 3, 2, 2, 2, 75, 308, 3, 2, 2, 2, 77, 310, 3, 2, 2, 2, 79, 312, 3, 2, 2, 2, 81, 314, 3, 2, 2, 2, 83, 317, 3, 2, 2, 2, 85, 319, 3, 2, 2, 2, 87, 321, 3, 2, 2, 2, 89, 323, 3, 2, 2, 2, 91, 326, 3, 2, 2, 2, 93, 329, 3, 2, 2, 2, 95, 332, 3, 2, 2, 2, 97, 334, 3, 2, 2, 2, 99, 336, 3, 2, 2, 2, 101, 338, 3, 2, 2, 2, 103, 340, 3, 2, 2, 2, 105, 342, 3, 2, 2, 2, 107, 345, 3, 2, 2, 2, 109, 348, 3, 2, 2, 2, 111, 354, 3, 2, 2, 2, 113, 357, 3, 2, 2, 2, 115, 364, 3, 2, 2, 2, 117, 375, 3, 2, 2, 2, 119, 387, 3, 2, 2, 2, 121, 402, 3, 2, 2, 2, 123, 417, 3, 2, 2, 2, 125, 423, 3, 2, 2, 2, 127, 128, 7, 81, 2, 2, 128, 129, 7, 84, 2, 2, 129, 4, 3, 2, 2, 2, 130, 131, 7, 67, 2, 2, 131, 132, 7, 80, 2, 2, 132, 133, 7, 70, 2, 2, 133, 6, 3, 2, 2, 2, 134, 135, 7, 80, 2, 2, 135, 136, 7, 81, 2, 2, 136, 137, 7, 86, 2, 2, 137, 8, 3, 2, 2, 2, 138, 139, 7, 75, 2, 2, 139, 140, 7, 80, 2, 2, 140, 10, 3, 2, 2, 2, 141, 142, 7, 69, 2, 2, 142, 143, 7, 81, 2, 2, 143, 144, 7, 80, 2, 2, 144, 145, 7, 86, 2, 2, 145, 146, 7, 67, 2, 2, 146, 147, 7, 75, 2, 2, 147, 148, 7, 80, 2, 2, 148, 149, 7, 97, 2, 2, 149, 150, 7, 67, 2, 2, 150, 151, 7, 78, 2, 2, 151, 152, 7, 78, 2, 2, 152, 12, 3, 2, 2, 2, 153, 154, 7, 69, 2, 2, 154, 155, 7, 81, 2, 2, 155, 156, 7, 80, 2, 2, 156, 157, 7, 86, 2, 2, 157, 158, 7, 67, 2, 2, 158, 159, 7, 75, 2, 2, 159, 160, 7, 80, 2, 2, 160, 161, 7, 97, 2, 2, 161, 162, 7, 67, 2, 2, 162, 163, 7, 80, 2, 2, 163, 164, 7, 91, 2, 2, 164, 14, 3, 2, 2, 2, 165, 166, 7, 68, 2, 2, 166, 167, 7, 71, 2, 2, 167, 168, 7, 86, 2, 2, 168, 169, 7, 89, 2, 2, 169, 170, 7, 71, 2, 2, 170, 171, 7, 71, 2, 2, 171, 172, 7, 80, 2, 2, 172, 16, 3, 2, 2, 2, 173, 174, 7, 78, 2, 2, 174, 175, 7, 75, 2, 2, 175, 176, 7, 77, 2, 2, 176, 177, 7, 71, 2, 2, 177, 18, 3, 2, 2, 2, 178, 179, 7, 89, 2, 2, 179, 180, 7, 74, 2, 2, 180, 181, 7, 71, 2, 2, 181, 182, 7, 84, 2, 2, 182, 183, 7, 71, 2, 2, 183, 20, 3, 2, 2, 2, 184, 185, 7, 85, 2, 2, 185, 186, 7, 71, 2, 2, 186, 187, 7, 78, 2, 2, 187, 188, 7, 71, 2, 2, 188, 189, 7, 69, 2, 2, 189, 190, 7, 86, 2, 2, 190, 22, 3, 2, 2, 2, 191, 192, 7, 72, 2, 2, 192, 193, 7, 84, 2, 2, 193, 194, 7, 81, 2, 2, 194, 195, 7, 79, 2, 2, 195, 24, 3, 2, 2, 2, 196, 197, 7, 67, 2, 2, 197, 198, 7, 85, 2, 2, 198, 26, 3, 2, 2, 2, 199, 200, 7, 68, 2, 2, 200, 201, 7, 91, 2, 2, 201, 28, 3, 2, 2, 2, 202, 203, 7, 81, 2, 2, 203, 204, 7, 84, 2, 2, 204, 205, 7, 70, 2, 2, 205, 206, 7, 71, 2, 2, 206, 207, 7, 84, 2, 2, 207, 30, 3, 2, 2, 2, 208, 209, 7, 67, 2, 2, 209, 210, 7, 85, 2, 2, 210, 211, 7, 69, 2, 2, 211, 32, 3, 2, 2, 2, 212, 213, 7, 70, 2, 2, 213, 214, 7, 71, 2, 2, 214, 215, 7, 85, 2, 2, 215, 216, 7, 69, 2, 2, 216, 34, 3, 2, 2, 2, 217, 218, 7, 78, 2, 2, 218, 219, 7, 75, 2, 2, 219, 220, 7, 79, 2, 2, 220, 221, 7, 75, 2, 2, 221, 222, 7, 86, 2, 2, 222, 36, 3, 2, 2, 2, 223, 224, 7, 86, 2, 2, 224, 225, 7, 84, 2, 2, 225, 226, 7, 87, 2, 2, 226, 227, 7, 71, 2, 2, 227, 38, 3, 2, 2, 2, 228, 229, 7, 72, 2, 2, 229, 230, 7, 67, 2, 2, 230, 231, 7, 78, 2, 2, 231, 232, 7, 85, 2, 2, 232, 233, 7, 71, 2, 2, 233, 40, 3, 2, 2, 2, 234, 235, 7, 75, 2, 2, 235, 236, 7, 85, 2, 2, 236, 42, 3, 2, 2, 2, 237, 238, 7, 80, 2, 2, 238, 239, 7, 87, 2, 2, 239, 240, 7, 78, 2, 2, 240, 241, 7, 78, 2, 2, 241, 44, 3, 2, 2, 2, 242, 243, 5, 113, 57, 2, 243, 46, 3, 2, 2, 2, 244, 246, 5, 71, 36, 2, 245, 244, 3, 2, 2, 2, 245, 246, 3, 2, 2, 2, 246, 247, 3, 2, 2, 2, 247, 248, 5, 45, 23, 2, 248, 48, 3, 2, 2, 2, 249, 258, 5, 115, 58, 2, 250, 252, 7, 71, 2, 2, 251, 253, 9, 2, 2, 2, 252, 251, 3, 2, 2, 2, 252, 253, 3, 2, 2, 2, 253, 256, 3, 2, 2, 2, 254, 257, 5, 115, 58, 2, 255, 257, 5, 113, 57, 2, 256, 254, 3, 2, 2, 2, 256, 255, 3, 2, 2, 2, 257, 259, 3, 2, 2, 2, 258, 250, 3, 2, 2, 2, 258, 259, 3, 2, 2, 2, 259, 261, 3, 2, 2, 2, 260, 262, 9, 3, 2, 2, 261, 260, 3, 2, 2, 2, 261, 262, 3, 2, 2, 2, 262, 50, 3, 2, 2, 2, 263, 265, 5, 71, 36, 2, 264, 263, 3, 2, 2, 2, 264, 265, 3, 2, 2, 2, 265, 266, 3, 2, 2, 2, 266, 267, 5, 49, 25, 2, 267, 52, 3, 2, 2, 2, 268, 274, 7, 41, 2, 2, 269, 273, 10, 4, 2, 2, 270, 271, 7, 94, 2, 2, 271, 273, 11, 2, 2, 2, 272, 269, 3, 2, 2, 2, 272, 270, 3, 2, 2, 2, 273, 276, 3, 2, 2, 2, 274, 272, 3, 2, 2, 2, 274, 275, 3, 2, 2, 2, 275, 277, 3, 2, 2, 2, 276, 274, 3, 2, 2, 2, 277, 278, 7, 41, 2, 2, 278, 54, 3, 2, 2, 2, 279, 285, 7, 36, 2, 2, 280, 284, 10, 5, 2, 2, 281, 282, 7, 94, 2, 2, 282, 284, 11, 2, 2, 2, 283, 280, 3, 2, 2, 2, 283, 281, 3, 2, 2, 2, 284, 287, 3, 2, 2, 2, 285, 283, 3, 2, 2, 2, 285, 286, 3, 2, 2, 2, 286, 288, 3, 2, 2, 2, 287, 285, 3, 2, 2, 2, 288, 289, 7, 36, 2, 2, 289, 56, 3, 2, 2, 2, 290, 291, 7, 48, 2, 2, 291, 58, 3, 2, 2, 2, 292, 293, 7, 42, 2, 2, 293, 60, 3, 2, 2, 2, 294, 295, 7, 43, 2, 2, 295, 62, 3, 2, 2, 2, 296, 297, 7, 93, 2, 2, 297, 64, 3, 2, 2, 2, 298, 299, 7, 95, 2, 2, 299, 66, 3, 2, 2, 2, 300, 301, 7, 44, 2, 2, 301, 68, 3, 2, 2, 2, 302, 303, 7, 45, 2, 2, 303, 70, 3, 2, 2, 2, 304, 305, 7, 47, 2, 2, 305, 72, 3, 2, 2, 2, 306, 307, 7, 46, 2, 2, 307, 74, 3, 2, 2, 2, 308, 309, 7, 49, 2, 2, 309, 76, 3, 2, 2, 2, 310, 311, 7, 39, 2, 2, 311, 78, 3, 2, 2, 2, 312, 313, 7, 66, 2, 2, 313, 80, 3, 2, 2, 2, 314, 315, 7, 60, 2, 2, 315, 316, 7, 63, 2, 2, 316, 82, 3, 2, 2, 2, 317, 318, 7, 37, 2, 2, 318, 84, 3, 2, 2, 2, 319, 320, 7, 60, 2, 2, 320, 86, 3, 2, 2, 2, 321, 322, 7, 61, 2, 2, 322, 88, 3, 2, 2, 2, 323, 324, 7, 62, 2, 2, 324, 325, 7, 63, 2, 2, 325, 90, 3, 2, 2, 2, 326, 327, 7, 64, 2, 2, 327, 328, 7, 63, 2, 2, 328, 92, 3, 2, 2, 2, 329, 330, 7, 35, 2, 2, 330, 331, 7, 63, 2, 2, 331, 94, 3, 2, 2, 2, 332, 333, 7, 96, 2, 2, 333, 96, 3, 2, 2, 2, 334, 335, 7, 128, 2, 2, 335, 98, 3, 2, 2, 2, 336, 337, 7, 62, 2, 2, 337, 100, 3, 2, 2, 2, 338, 339, 7, 64, 2, 2, 339, 102, 3, 2, 2, 2, 340, 341, 7, 63, 2, 2, 341, 104, 3, 2, 2, 2, 342, 343, 7, 126, 2, 2, 343, 344, 7, 126, 2, 2, 344, 106, 3, 2, 2, 2, 345, 346, 7, 97, 2, 2, 346, 108, 3, 2, 2, 2, 347, 349, 9, 6, 2, 2, 348, 347, 3, 2, 2, 2, 349, 350, 3, 2, 2, 2, 350, 348, 3, 2, 2, 2, 350, 351, 3, 2, 2, 2, 351, 352, 3, 2, 2, 2, 352, 353, 8, 55, 2, 2, 353, 110, 3, 2, 2, 2, 354, 355, 9, 7, 2, 2, 355, 112, 3, 2, 2, 2, 356, 358, 9, 8, 2, 2, 357, 356, 3, 2, 2, 2, 358, 359, 3, 2, 2, 2, 359, 357, 3, 2, 2, 2, 359, 360, 3, 2, 2, 2, 360, 114, 3, 2, 2, 2, 361, 363, 5, 45, 23, 2, 362, 361, 3, 2, 2, 2, 363, 366, 3, 2, 2, 2, 364, 362, 3, 2, 2, 2, 364, 365, 3, 2, 2, 2, 365, 368, 3, 2, 2, 2, 366, 364, 3, 2, 2, 2, 367, 369, 7, 48, 2, 2, 368, 367, 3, 2, 2, 2, 368, 369, 3, 2, 2, 2, 369, 371, 3, 2, 2, 2, 370, 372, 5, 45, 23, 2, 371, 370, 3, 2, 2, 2, 372, 373, 3, 2, 2, 2, 373, 371, 3, 2, 2, 2, 373, 374, 3, 2, 2, 2, 374, 116, 3, 2, 2, 2, 375, 381, 5, 63, 32, 2, 376, 382, 5, 71, 36, 2, 377, 382, 5, 113, 57, 2, 378, 382, 5, 115, 58, 2, 379, 382, 7, 46, 2, 2, 380, 382, 5, 109, 55, 2, 381, 376, 3, 2, 2, 2, 381, 377, 3, 2, 2, 2, 381, 378, 3, 2, 2, 2, 381, 379, 3, 2, 2, 2, 381, 380, 3, 2, 2, 2, 382, 383, 3, 2, 2, 2, 383, 381, 3, 2, 2, 2, 383, 384, 3, 2, 2, 2, 384, 385, 3, 2, 2, 2, 385, 386, 5, 65, 33, 2, 386, 118, 3, 2, 2, 2, 387, 388, 7, 47, 2, 2, 388, 389, 7, 47, 2, 2, 389, 393, 3, 2, 2, 2, 390, 392, 10, 9, 2, 2, 391, 390, 3, 2, 2, 2, 392, 395, 3, 2, 2, 2, 393, 391, 3, 2, 2, 2, 393, 394, 3, 2, 2, 2, 394, 398, 3, 2, 2, 2, 395, 393, 3, 2, 2, 2, 396, 399, 5, 123, 62, 2, 397, 399, 7, 2, 2, 3, 398, 396, 3, 2, 2, 2, 398, 397, 3, 2, 2, 2, 399, 400, 3, 2, 2, 2, 400, 401, 8, 60, 3, 2, 401, 120, 3, 2, 2, 2, 402, 403, 7, 49, 2, 2, 403, 404, 7, 44, 2, 2, 404, 408, 3, 2, 2, 2, 405, 407, 11, 2, 2, 2, 406, 405, 3, 2, 2, 2, 407, 410, 3, 2, 2, 2, 408, 409, 3, 2, 2, 2, 408, 406, 3, 2, 2, 2, 409, 411, 3, 2, 2, 2, 410, 408, 3, 2, 2, 2, 411, 412, 7, 44, 2, 2, 412, 413, 7, 49, 2, 2, 413, 414, 3, 2, 2, 2, 414, 415, 8, 61, 3, 2, 415, 122, 3, 2, 2, 2, 416, 418, 7, 15, 2, 2, 417, 416, 3, 2, 2, 2, 417, 418, 3, 2, 2, 2, 418, 419, 3, 2, 2, 2, 419, 420, 7, 12, 2, 2, 420, 124, 3, 2, 2, 2, 421, 424, 5, 111, 56, 2, 422, 424, 9, 10, 2, 2, 423, 421, 3, 2, 2, 2, 423, 422, 3, 2, 2, 2, 424, 425, 3, 2, 2, 2, 425, 423, 3, 2, 2, 2, 425, 426, 3, 2, 2, 2, 426, 126, 3, 2, 2, 2, 26, 2, 245, 252, 256, 258, 261, 264, 272, 274, 283, 285, 350, 359, 364, 368, 373, 381, 383, 393, 398, 408, 417, 423, 425, 4, 8, 2, 2, 2, 4, 2]

================================================
FILE: src/db/sqlengine/antlr/gen/SQLLexer.tokens
================================================
OR=1
AND=2
NOT=3
IN=4
CONTAIN_ALL=5
CONTAIN_ANY=6
BETWEEN=7
LIKE=8
WHERE=9
SELECT=10
FROM=11
AS=12
BY=13
ORDER=14
ASC=15
DESC=16
LIMIT=17
TRUE_V=18
FALSE_V=19
IS=20
NULL_V=21
INTEGER=22
FLOAT=23
SQUOTA_STRING=24
DQUOTA_STRING=25
DOT=26
LP=27
RP=28
LMP=29
RMP=30
ASTERISK=31
PLUS_SIGN=32
MINUS_SIGN=33
COMMA=34
SOLIDUS=35
MOD=36
AT_SIGN=37
ASSIGN_OP=38
SHARP_SIGN=39
COLON=40
SEMI=41
LE_OP=42
GE_OP=43
NE_OP=44
CARET_OP=45
TILDE_OP=46
L_OP=47
G_OP=48
E_OP=49
CONCAT_OP=50
UNDERSCORE=51
SPACES=52
VECTOR=53
SINGLE_LINE_COMMENT=54
MULTI_LINE_COMMENT=55
REGULAR_ID=56
'OR'=1
'AND'=2
'NOT'=3
'IN'=4
'CONTAIN_ALL'=5
'CONTAIN_ANY'=6
'BETWEEN'=7
'LIKE'=8
'WHERE'=9
'SELECT'=10
'FROM'=11
'AS'=12
'BY'=13
'ORDER'=14
'ASC'=15
'DESC'=16
'LIMIT'=17
'TRUE'=18
'FALSE'=19
'IS'=20
'NULL'=21
'.'=26
'('=27
')'=28
'['=29
']'=30
'*'=31
'+'=32
'-'=33
','=34
'/'=35
'%'=36
'@'=37
':='=38
'#'=39
':'=40
';'=41
'<='=42
'>='=43
'!='=44
'^'=45
'~'=46
'<'=47
'>'=48
'='=49
'||'=50
'_'=51


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParser.cc
================================================

// Generated from SQLParser.g4 by ANTLR 4.8


#include "SQLParser.h"
#include "SQLParserListener.h"


using namespace antlrcpp;
using namespace antlr4;
using namespace antlr4;

SQLParser::SQLParser(TokenStream *input) : Parser(input) {
  _interpreter = new atn::ParserATNSimulator(this, _atn, _decisionToDFA,
                                             _sharedContextCache);
}

SQLParser::~SQLParser() {
  delete _interpreter;
}

std::string SQLParser::getGrammarFileName() const {
  return "SQLParser.g4";
}

const std::vector<std::string> &SQLParser::getRuleNames() const {
  return _ruleNames;
}

dfa::Vocabulary &SQLParser::getVocabulary() const {
  return _vocabulary;
}


//----------------- Swallow_to_semiContext
//------------------------------------------------------------------

SQLParser::Swallow_to_semiContext::Swallow_to_semiContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

std::vector<tree::TerminalNode *> SQLParser::Swallow_to_semiContext::SEMI() {
  return getTokens(SQLParser::SEMI);
}

tree::TerminalNode *SQLParser::Swallow_to_semiContext::SEMI(size_t i) {
  return getToken(SQLParser::SEMI, i);
}


size_t SQLParser::Swallow_to_semiContext::getRuleIndex() const {
  return SQLParser::RuleSwallow_to_semi;
}

void SQLParser::Swallow_to_semiContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterSwallow_to_semi(this);
}

void SQLParser::Swallow_to_semiContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitSwallow_to_semi(this);
}

SQLParser::Swallow_to_semiContext *SQLParser::swallow_to_semi() {
  Swallow_to_semiContext *_localctx =
      _tracker.createInstance<Swallow_to_semiContext>(_ctx, getState());
  enterRule(_localctx, 0, SQLParser::RuleSwallow_to_semi);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(81);
    _errHandler->sync(this);
    _la = _input->LA(1);
    do {
      setState(80);
      _la = _input->LA(1);
      if (_la == 0 || _la == Token::EOF || (_la == SQLParser::SEMI)) {
        _errHandler->recoverInline(this);
      } else {
        _errHandler->reportMatch(this);
        consume();
      }
      setState(83);
      _errHandler->sync(this);
      _la = _input->LA(1);
    } while (
        (((_la & ~0x3fULL) == 0) &&
         ((1ULL << _la) &
          ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |
           (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |
           (1ULL << SQLParser::CONTAIN_ALL) | (1ULL << SQLParser::CONTAIN_ANY) |
           (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |
           (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |
           (1ULL << SQLParser::FROM) | (1ULL << SQLParser::AS) |
           (1ULL << SQLParser::BY) | (1ULL << SQLParser::ORDER) |
           (1ULL << SQLParser::ASC) | (1ULL << SQLParser::DESC) |
           (1ULL << SQLParser::LIMIT) | (1ULL << SQLParser::TRUE_V) |
           (1ULL << SQLParser::FALSE_V) | (1ULL << SQLParser::IS) |
           (1ULL << SQLParser::NULL_V) | (1ULL << SQLParser::INTEGER) |
           (1ULL << SQLParser::FLOAT) | (1ULL << SQLParser::SQUOTA_STRING) |
           (1ULL << SQLParser::DQUOTA_STRING) | (1ULL << SQLParser::DOT) |
           (1ULL << SQLParser::LP) | (1ULL << SQLParser::RP) |
           (1ULL << SQLParser::LMP) | (1ULL << SQLParser::RMP) |
           (1ULL << SQLParser::ASTERISK) | (1ULL << SQLParser::PLUS_SIGN) |
           (1ULL << SQLParser::MINUS_SIGN) | (1ULL << SQLParser::COMMA) |
           (1ULL << SQLParser::SOLIDUS) | (1ULL << SQLParser::MOD) |
           (1ULL << SQLParser::AT_SIGN) | (1ULL << SQLParser::ASSIGN_OP) |
           (1ULL << SQLParser::SHARP_SIGN) | (1ULL << SQLParser::COLON) |
           (1ULL << SQLParser::LE_OP) | (1ULL << SQLParser::GE_OP) |
           (1ULL << SQLParser::NE_OP) | (1ULL << SQLParser::CARET_OP) |
           (1ULL << SQLParser::TILDE_OP) | (1ULL << SQLParser::L_OP) |
           (1ULL << SQLParser::G_OP) | (1ULL << SQLParser::E_OP) |
           (1ULL << SQLParser::CONCAT_OP) | (1ULL << SQLParser::UNDERSCORE) |
           (1ULL << SQLParser::SPACES) | (1ULL << SQLParser::VECTOR) |
           (1ULL << SQLParser::SINGLE_LINE_COMMENT) |
           (1ULL << SQLParser::MULTI_LINE_COMMENT) |
           (1ULL << SQLParser::REGULAR_ID))) != 0));

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Compilation_unitContext
//------------------------------------------------------------------

SQLParser::Compilation_unitContext::Compilation_unitContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Compilation_unitContext::EOF() {
  return getToken(SQLParser::EOF, 0);
}

std::vector<SQLParser::Unit_statementContext *>
SQLParser::Compilation_unitContext::unit_statement() {
  return getRuleContexts<SQLParser::Unit_statementContext>();
}

SQLParser::Unit_statementContext *
SQLParser::Compilation_unitContext::unit_statement(size_t i) {
  return getRuleContext<SQLParser::Unit_statementContext>(i);
}

std::vector<tree::TerminalNode *>
SQLParser::Compilation_unitContext::SOLIDUS() {
  return getTokens(SQLParser::SOLIDUS);
}

tree::TerminalNode *SQLParser::Compilation_unitContext::SOLIDUS(size_t i) {
  return getToken(SQLParser::SOLIDUS, i);
}

std::vector<tree::TerminalNode *> SQLParser::Compilation_unitContext::SEMI() {
  return getTokens(SQLParser::SEMI);
}

tree::TerminalNode *SQLParser::Compilation_unitContext::SEMI(size_t i) {
  return getToken(SQLParser::SEMI, i);
}


size_t SQLParser::Compilation_unitContext::getRuleIndex() const {
  return SQLParser::RuleCompilation_unit;
}

void SQLParser::Compilation_unitContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterCompilation_unit(this);
}

void SQLParser::Compilation_unitContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitCompilation_unit(this);
}

SQLParser::Compilation_unitContext *SQLParser::compilation_unit() {
  Compilation_unitContext *_localctx =
      _tracker.createInstance<Compilation_unitContext>(_ctx, getState());
  enterRule(_localctx, 2, SQLParser::RuleCompilation_unit);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(89);
    _errHandler->sync(this);
    _la = _input->LA(1);
    do {
      setState(85);
      unit_statement();
      setState(87);
      _errHandler->sync(this);

      _la = _input->LA(1);
      if (_la == SQLParser::SOLIDUS

          || _la == SQLParser::SEMI) {
        setState(86);
        _la = _input->LA(1);
        if (!(_la == SQLParser::SOLIDUS

              || _la == SQLParser::SEMI)) {
          _errHandler->recoverInline(this);
        } else {
          _errHandler->reportMatch(this);
          consume();
        }
      }
      setState(91);
      _errHandler->sync(this);
      _la = _input->LA(1);
    } while (_la == SQLParser::SELECT);
    setState(93);
    match(SQLParser::EOF);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Logic_expr_unitContext
//------------------------------------------------------------------

SQLParser::Logic_expr_unitContext::Logic_expr_unitContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Logic_exprContext *SQLParser::Logic_expr_unitContext::logic_expr() {
  return getRuleContext<SQLParser::Logic_exprContext>(0);
}

tree::TerminalNode *SQLParser::Logic_expr_unitContext::EOF() {
  return getToken(SQLParser::EOF, 0);
}


size_t SQLParser::Logic_expr_unitContext::getRuleIndex() const {
  return SQLParser::RuleLogic_expr_unit;
}

void SQLParser::Logic_expr_unitContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterLogic_expr_unit(this);
}

void SQLParser::Logic_expr_unitContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitLogic_expr_unit(this);
}

SQLParser::Logic_expr_unitContext *SQLParser::logic_expr_unit() {
  Logic_expr_unitContext *_localctx =
      _tracker.createInstance<Logic_expr_unitContext>(_ctx, getState());
  enterRule(_localctx, 4, SQLParser::RuleLogic_expr_unit);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(95);
    logic_expr(0);
    setState(96);
    match(SQLParser::EOF);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Unit_statementContext
//------------------------------------------------------------------

SQLParser::Unit_statementContext::Unit_statementContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Dql_statementContext *
SQLParser::Unit_statementContext::dql_statement() {
  return getRuleContext<SQLParser::Dql_statementContext>(0);
}


size_t SQLParser::Unit_statementContext::getRuleIndex() const {
  return SQLParser::RuleUnit_statement;
}

void SQLParser::Unit_statementContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterUnit_statement(this);
}

void SQLParser::Unit_statementContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitUnit_statement(this);
}

SQLParser::Unit_statementContext *SQLParser::unit_statement() {
  Unit_statementContext *_localctx =
      _tracker.createInstance<Unit_statementContext>(_ctx, getState());
  enterRule(_localctx, 6, SQLParser::RuleUnit_statement);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(98);
    dql_statement();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Where_clauseContext
//------------------------------------------------------------------

SQLParser::Where_clauseContext::Where_clauseContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Where_clauseContext::WHERE() {
  return getToken(SQLParser::WHERE, 0);
}

SQLParser::Logic_exprContext *SQLParser::Where_clauseContext::logic_expr() {
  return getRuleContext<SQLParser::Logic_exprContext>(0);
}


size_t SQLParser::Where_clauseContext::getRuleIndex() const {
  return SQLParser::RuleWhere_clause;
}

void SQLParser::Where_clauseContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterWhere_clause(this);
}

void SQLParser::Where_clauseContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitWhere_clause(this);
}

SQLParser::Where_clauseContext *SQLParser::where_clause() {
  Where_clauseContext *_localctx =
      _tracker.createInstance<Where_clauseContext>(_ctx, getState());
  enterRule(_localctx, 8, SQLParser::RuleWhere_clause);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(100);
    match(SQLParser::WHERE);
    setState(101);
    logic_expr(0);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Logic_exprContext
//------------------------------------------------------------------

SQLParser::Logic_exprContext::Logic_exprContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Relation_exprContext *SQLParser::Logic_exprContext::relation_expr() {
  return getRuleContext<SQLParser::Relation_exprContext>(0);
}

SQLParser::Enclosed_exprContext *SQLParser::Logic_exprContext::enclosed_expr() {
  return getRuleContext<SQLParser::Enclosed_exprContext>(0);
}

std::vector<SQLParser::Logic_exprContext *>
SQLParser::Logic_exprContext::logic_expr() {
  return getRuleContexts<SQLParser::Logic_exprContext>();
}

SQLParser::Logic_exprContext *SQLParser::Logic_exprContext::logic_expr(
    size_t i) {
  return getRuleContext<SQLParser::Logic_exprContext>(i);
}

tree::TerminalNode *SQLParser::Logic_exprContext::AND() {
  return getToken(SQLParser::AND, 0);
}

tree::TerminalNode *SQLParser::Logic_exprContext::OR() {
  return getToken(SQLParser::OR, 0);
}


size_t SQLParser::Logic_exprContext::getRuleIndex() const {
  return SQLParser::RuleLogic_expr;
}

void SQLParser::Logic_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterLogic_expr(this);
}

void SQLParser::Logic_exprContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitLogic_expr(this);
}


SQLParser::Logic_exprContext *SQLParser::logic_expr() {
  return logic_expr(0);
}

SQLParser::Logic_exprContext *SQLParser::logic_expr(int precedence) {
  ParserRuleContext *parentContext = _ctx;
  size_t parentState = getState();
  SQLParser::Logic_exprContext *_localctx =
      _tracker.createInstance<Logic_exprContext>(_ctx, parentState);
  SQLParser::Logic_exprContext *previousContext = _localctx;
  (void)previousContext;  // Silence compiler, in case the context is not used
                          // by generated code.
  size_t startState = 10;
  enterRecursionRule(_localctx, 10, SQLParser::RuleLogic_expr, precedence);


  auto onExit = finally([=] { unrollRecursionContexts(parentContext); });
  try {
    size_t alt;
    enterOuterAlt(_localctx, 1);
    setState(106);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::OR:
      case SQLParser::AND:
      case SQLParser::NOT:
      case SQLParser::IN:
      case SQLParser::BETWEEN:
      case SQLParser::LIKE:
      case SQLParser::WHERE:
      case SQLParser::SELECT:
      case SQLParser::AS:
      case SQLParser::BY:
      case SQLParser::ORDER:
      case SQLParser::ASC:
      case SQLParser::DESC:
      case SQLParser::LIMIT:
      case SQLParser::REGULAR_ID: {
        setState(104);
        relation_expr();
        break;
      }

      case SQLParser::LP: {
        setState(105);
        enclosed_expr();
        break;
      }

      default:
        throw NoViableAltException(this);
    }
    _ctx->stop = _input->LT(-1);
    setState(116);
    _errHandler->sync(this);
    alt = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input, 5,
                                                                     _ctx);
    while (alt != 2 && alt != atn::ATN::INVALID_ALT_NUMBER) {
      if (alt == 1) {
        if (!_parseListeners.empty()) triggerExitRuleEvent();
        previousContext = _localctx;
        setState(114);
        _errHandler->sync(this);
        switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
            _input, 4, _ctx)) {
          case 1: {
            _localctx = _tracker.createInstance<Logic_exprContext>(
                parentContext, parentState);
            pushNewRecursionContext(_localctx, startState, RuleLogic_expr);
            setState(108);

            if (!(precpred(_ctx, 3)))
              throw FailedPredicateException(this, "precpred(_ctx, 3)");
            setState(109);
            match(SQLParser::AND);
            setState(110);
            logic_expr(4);
            break;
          }

          case 2: {
            _localctx = _tracker.createInstance<Logic_exprContext>(
                parentContext, parentState);
            pushNewRecursionContext(_localctx, startState, RuleLogic_expr);
            setState(111);

            if (!(precpred(_ctx, 2)))
              throw FailedPredicateException(this, "precpred(_ctx, 2)");
            setState(112);
            match(SQLParser::OR);
            setState(113);
            logic_expr(3);
            break;
          }
        }
      }
      setState(118);
      _errHandler->sync(this);
      alt = getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(_input,
                                                                       5, _ctx);
    }
  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }
  return _localctx;
}

//----------------- Enclosed_exprContext
//------------------------------------------------------------------

SQLParser::Enclosed_exprContext::Enclosed_exprContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Enclosed_exprContext::LP() {
  return getToken(SQLParser::LP, 0);
}

SQLParser::Logic_exprContext *SQLParser::Enclosed_exprContext::logic_expr() {
  return getRuleContext<SQLParser::Logic_exprContext>(0);
}

tree::TerminalNode *SQLParser::Enclosed_exprContext::RP() {
  return getToken(SQLParser::RP, 0);
}


size_t SQLParser::Enclosed_exprContext::getRuleIndex() const {
  return SQLParser::RuleEnclosed_expr;
}

void SQLParser::Enclosed_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterEnclosed_expr(this);
}

void SQLParser::Enclosed_exprContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitEnclosed_expr(this);
}

SQLParser::Enclosed_exprContext *SQLParser::enclosed_expr() {
  Enclosed_exprContext *_localctx =
      _tracker.createInstance<Enclosed_exprContext>(_ctx, getState());
  enterRule(_localctx, 12, SQLParser::RuleEnclosed_expr);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(119);
    match(SQLParser::LP);
    setState(120);
    logic_expr(0);
    setState(121);
    match(SQLParser::RP);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Relation_exprContext
//------------------------------------------------------------------

SQLParser::Relation_exprContext::Relation_exprContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Relation_exprContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}

SQLParser::Rel_operContext *SQLParser::Relation_exprContext::rel_oper() {
  return getRuleContext<SQLParser::Rel_operContext>(0);
}

SQLParser::Value_exprContext *SQLParser::Relation_exprContext::value_expr() {
  return getRuleContext<SQLParser::Value_exprContext>(0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::LIKE() {
  return getToken(SQLParser::LIKE, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::IN() {
  return getToken(SQLParser::IN, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::LP() {
  return getToken(SQLParser::LP, 0);
}

SQLParser::In_value_expr_listContext *
SQLParser::Relation_exprContext::in_value_expr_list() {
  return getRuleContext<SQLParser::In_value_expr_listContext>(0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::RP() {
  return getToken(SQLParser::RP, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::NOT() {
  return getToken(SQLParser::NOT, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::CONTAIN_ALL() {
  return getToken(SQLParser::CONTAIN_ALL, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::CONTAIN_ANY() {
  return getToken(SQLParser::CONTAIN_ANY, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::IS() {
  return getToken(SQLParser::IS, 0);
}

tree::TerminalNode *SQLParser::Relation_exprContext::NULL_V() {
  return getToken(SQLParser::NULL_V, 0);
}

SQLParser::Function_callContext *
SQLParser::Relation_exprContext::function_call() {
  return getRuleContext<SQLParser::Function_callContext>(0);
}


size_t SQLParser::Relation_exprContext::getRuleIndex() const {
  return SQLParser::RuleRelation_expr;
}

void SQLParser::Relation_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterRelation_expr(this);
}

void SQLParser::Relation_exprContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitRelation_expr(this);
}

SQLParser::Relation_exprContext *SQLParser::relation_expr() {
  Relation_exprContext *_localctx =
      _tracker.createInstance<Relation_exprContext>(_ctx, getState());
  enterRule(_localctx, 14, SQLParser::RuleRelation_expr);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(162);
    _errHandler->sync(this);
    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
        _input, 10, _ctx)) {
      case 1: {
        enterOuterAlt(_localctx, 1);
        setState(123);
        identifier();
        setState(124);
        rel_oper();
        setState(125);
        value_expr();
        break;
      }

      case 2: {
        enterOuterAlt(_localctx, 2);
        setState(127);
        identifier();
        setState(128);
        match(SQLParser::LIKE);
        setState(129);
        value_expr();
        break;
      }

      case 3: {
        enterOuterAlt(_localctx, 3);
        setState(131);
        identifier();
        setState(133);
        _errHandler->sync(this);

        _la = _input->LA(1);
        if (_la == SQLParser::NOT) {
          setState(132);
          match(SQLParser::NOT);
        }
        setState(135);
        match(SQLParser::IN);
        setState(136);
        match(SQLParser::LP);
        setState(137);
        in_value_expr_list();
        setState(138);
        match(SQLParser::RP);
        break;
      }

      case 4: {
        enterOuterAlt(_localctx, 4);
        setState(140);
        identifier();
        setState(142);
        _errHandler->sync(this);

        _la = _input->LA(1);
        if (_la == SQLParser::NOT) {
          setState(141);
          match(SQLParser::NOT);
        }
        setState(144);
        _la = _input->LA(1);
        if (!(_la == SQLParser::CONTAIN_ALL

              || _la == SQLParser::CONTAIN_ANY)) {
          _errHandler->recoverInline(this);
        } else {
          _errHandler->reportMatch(this);
          consume();
        }
        setState(145);
        match(SQLParser::LP);
        setState(147);
        _errHandler->sync(this);

        _la = _input->LA(1);
        if ((((_la & ~0x3fULL) == 0) &&
             ((1ULL << _la) &
              ((1ULL << SQLParser::TRUE_V) | (1ULL << SQLParser::FALSE_V) |
               (1ULL << SQLParser::INTEGER) | (1ULL << SQLParser::FLOAT) |
               (1ULL << SQLParser::SQUOTA_STRING) |
               (1ULL << SQLParser::DQUOTA_STRING))) != 0)) {
          setState(146);
          in_value_expr_list();
        }
        setState(149);
        match(SQLParser::RP);
        break;
      }

      case 5: {
        enterOuterAlt(_localctx, 5);
        setState(151);
        identifier();
        setState(152);
        match(SQLParser::IS);
        setState(154);
        _errHandler->sync(this);

        _la = _input->LA(1);
        if (_la == SQLParser::NOT) {
          setState(153);
          match(SQLParser::NOT);
        }
        setState(156);
        match(SQLParser::NULL_V);
        break;
      }

      case 6: {
        enterOuterAlt(_localctx, 6);
        setState(158);
        function_call();
        setState(159);
        rel_oper();
        setState(160);
        value_expr();
        break;
      }
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Rel_operContext
//------------------------------------------------------------------

SQLParser::Rel_operContext::Rel_operContext(ParserRuleContext *parent_ctx,
                                            size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Rel_operContext::E_OP() {
  return getToken(SQLParser::E_OP, 0);
}

SQLParser::Ne_opContext *SQLParser::Rel_operContext::ne_op() {
  return getRuleContext<SQLParser::Ne_opContext>(0);
}

tree::TerminalNode *SQLParser::Rel_operContext::L_OP() {
  return getToken(SQLParser::L_OP, 0);
}

tree::TerminalNode *SQLParser::Rel_operContext::G_OP() {
  return getToken(SQLParser::G_OP, 0);
}

SQLParser::Le_opContext *SQLParser::Rel_operContext::le_op() {
  return getRuleContext<SQLParser::Le_opContext>(0);
}

SQLParser::Ge_opContext *SQLParser::Rel_operContext::ge_op() {
  return getRuleContext<SQLParser::Ge_opContext>(0);
}


size_t SQLParser::Rel_operContext::getRuleIndex() const {
  return SQLParser::RuleRel_oper;
}

void SQLParser::Rel_operContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterRel_oper(this);
}

void SQLParser::Rel_operContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitRel_oper(this);
}

SQLParser::Rel_operContext *SQLParser::rel_oper() {
  Rel_operContext *_localctx =
      _tracker.createInstance<Rel_operContext>(_ctx, getState());
  enterRule(_localctx, 16, SQLParser::RuleRel_oper);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(170);
    _errHandler->sync(this);
    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
        _input, 11, _ctx)) {
      case 1: {
        enterOuterAlt(_localctx, 1);
        setState(164);
        match(SQLParser::E_OP);
        break;
      }

      case 2: {
        enterOuterAlt(_localctx, 2);
        setState(165);
        ne_op();
        break;
      }

      case 3: {
        enterOuterAlt(_localctx, 3);
        setState(166);
        match(SQLParser::L_OP);
        break;
      }

      case 4: {
        enterOuterAlt(_localctx, 4);
        setState(167);
        match(SQLParser::G_OP);
        break;
      }

      case 5: {
        enterOuterAlt(_localctx, 5);
        setState(168);
        le_op();
        break;
      }

      case 6: {
        enterOuterAlt(_localctx, 6);
        setState(169);
        ge_op();
        break;
      }
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Value_exprContext
//------------------------------------------------------------------

SQLParser::Value_exprContext::Value_exprContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::ConstantContext *SQLParser::Value_exprContext::constant() {
  return getRuleContext<SQLParser::ConstantContext>(0);
}

SQLParser::Function_callContext *SQLParser::Value_exprContext::function_call() {
  return getRuleContext<SQLParser::Function_callContext>(0);
}


size_t SQLParser::Value_exprContext::getRuleIndex() const {
  return SQLParser::RuleValue_expr;
}

void SQLParser::Value_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterValue_expr(this);
}

void SQLParser::Value_exprContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitValue_expr(this);
}

SQLParser::Value_exprContext *SQLParser::value_expr() {
  Value_exprContext *_localctx =
      _tracker.createInstance<Value_exprContext>(_ctx, getState());
  enterRule(_localctx, 18, SQLParser::RuleValue_expr);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(174);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::TRUE_V:
      case SQLParser::FALSE_V:
      case SQLParser::INTEGER:
      case SQLParser::FLOAT:
      case SQLParser::SQUOTA_STRING:
      case SQLParser::DQUOTA_STRING:
      case SQLParser::LMP:
      case SQLParser::VECTOR: {
        enterOuterAlt(_localctx, 1);
        setState(172);
        constant();
        break;
      }

      case SQLParser::OR:
      case SQLParser::AND:
      case SQLParser::NOT:
      case SQLParser::IN:
      case SQLParser::BETWEEN:
      case SQLParser::LIKE:
      case SQLParser::WHERE:
      case SQLParser::SELECT:
      case SQLParser::AS:
      case SQLParser::BY:
      case SQLParser::ORDER:
      case SQLParser::ASC:
      case SQLParser::DESC:
      case SQLParser::LIMIT:
      case SQLParser::REGULAR_ID: {
        enterOuterAlt(_localctx, 2);
        setState(173);
        function_call();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- In_value_expr_listContext
//------------------------------------------------------------------

SQLParser::In_value_expr_listContext::In_value_expr_listContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

std::vector<SQLParser::In_value_exprContext *>
SQLParser::In_value_expr_listContext::in_value_expr() {
  return getRuleContexts<SQLParser::In_value_exprContext>();
}

SQLParser::In_value_exprContext *
SQLParser::In_value_expr_listContext::in_value_expr(size_t i) {
  return getRuleContext<SQLParser::In_value_exprContext>(i);
}

std::vector<tree::TerminalNode *>
SQLParser::In_value_expr_listContext::COMMA() {
  return getTokens(SQLParser::COMMA);
}

tree::TerminalNode *SQLParser::In_value_expr_listContext::COMMA(size_t i) {
  return getToken(SQLParser::COMMA, i);
}


size_t SQLParser::In_value_expr_listContext::getRuleIndex() const {
  return SQLParser::RuleIn_value_expr_list;
}

void SQLParser::In_value_expr_listContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterIn_value_expr_list(this);
}

void SQLParser::In_value_expr_listContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitIn_value_expr_list(this);
}

SQLParser::In_value_expr_listContext *SQLParser::in_value_expr_list() {
  In_value_expr_listContext *_localctx =
      _tracker.createInstance<In_value_expr_listContext>(_ctx, getState());
  enterRule(_localctx, 20, SQLParser::RuleIn_value_expr_list);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(176);
    in_value_expr();
    setState(181);
    _errHandler->sync(this);
    _la = _input->LA(1);
    while (_la == SQLParser::COMMA) {
      setState(177);
      match(SQLParser::COMMA);
      setState(178);
      in_value_expr();
      setState(183);
      _errHandler->sync(this);
      _la = _input->LA(1);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- In_value_exprContext
//------------------------------------------------------------------

SQLParser::In_value_exprContext::In_value_exprContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Constant_num_and_strContext *
SQLParser::In_value_exprContext::constant_num_and_str() {
  return getRuleContext<SQLParser::Constant_num_and_strContext>(0);
}

SQLParser::Bool_valueContext *SQLParser::In_value_exprContext::bool_value() {
  return getRuleContext<SQLParser::Bool_valueContext>(0);
}


size_t SQLParser::In_value_exprContext::getRuleIndex() const {
  return SQLParser::RuleIn_value_expr;
}

void SQLParser::In_value_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterIn_value_expr(this);
}

void SQLParser::In_value_exprContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitIn_value_expr(this);
}

SQLParser::In_value_exprContext *SQLParser::in_value_expr() {
  In_value_exprContext *_localctx =
      _tracker.createInstance<In_value_exprContext>(_ctx, getState());
  enterRule(_localctx, 22, SQLParser::RuleIn_value_expr);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(186);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::INTEGER:
      case SQLParser::FLOAT:
      case SQLParser::SQUOTA_STRING:
      case SQLParser::DQUOTA_STRING: {
        enterOuterAlt(_localctx, 1);
        setState(184);
        constant_num_and_str();
        break;
      }

      case SQLParser::TRUE_V:
      case SQLParser::FALSE_V: {
        enterOuterAlt(_localctx, 2);
        setState(185);
        bool_value();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- ConstantContext
//------------------------------------------------------------------

SQLParser::ConstantContext::ConstantContext(ParserRuleContext *parent_ctx,
                                            size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::NumericContext *SQLParser::ConstantContext::numeric() {
  return getRuleContext<SQLParser::NumericContext>(0);
}

SQLParser::Quoted_stringContext *SQLParser::ConstantContext::quoted_string() {
  return getRuleContext<SQLParser::Quoted_stringContext>(0);
}

SQLParser::Vector_exprContext *SQLParser::ConstantContext::vector_expr() {
  return getRuleContext<SQLParser::Vector_exprContext>(0);
}

SQLParser::Bool_valueContext *SQLParser::ConstantContext::bool_value() {
  return getRuleContext<SQLParser::Bool_valueContext>(0);
}


size_t SQLParser::ConstantContext::getRuleIndex() const {
  return SQLParser::RuleConstant;
}

void SQLParser::ConstantContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterConstant(this);
}

void SQLParser::ConstantContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitConstant(this);
}

SQLParser::ConstantContext *SQLParser::constant() {
  ConstantContext *_localctx =
      _tracker.createInstance<ConstantContext>(_ctx, getState());
  enterRule(_localctx, 24, SQLParser::RuleConstant);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(192);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::INTEGER:
      case SQLParser::FLOAT: {
        enterOuterAlt(_localctx, 1);
        setState(188);
        numeric();
        break;
      }

      case SQLParser::SQUOTA_STRING:
      case SQLParser::DQUOTA_STRING: {
        enterOuterAlt(_localctx, 2);
        setState(189);
        quoted_string();
        break;
      }

      case SQLParser::LMP:
      case SQLParser::VECTOR: {
        enterOuterAlt(_localctx, 3);
        setState(190);
        vector_expr();
        break;
      }

      case SQLParser::TRUE_V:
      case SQLParser::FALSE_V: {
        enterOuterAlt(_localctx, 4);
        setState(191);
        bool_value();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Constant_num_and_strContext
//------------------------------------------------------------------

SQLParser::Constant_num_and_strContext::Constant_num_and_strContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::NumericContext *SQLParser::Constant_num_and_strContext::numeric() {
  return getRuleContext<SQLParser::NumericContext>(0);
}

SQLParser::Quoted_stringContext *
SQLParser::Constant_num_and_strContext::quoted_string() {
  return getRuleContext<SQLParser::Quoted_stringContext>(0);
}


size_t SQLParser::Constant_num_and_strContext::getRuleIndex() const {
  return SQLParser::RuleConstant_num_and_str;
}

void SQLParser::Constant_num_and_strContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr)
    parserListener->enterConstant_num_and_str(this);
}

void SQLParser::Constant_num_and_strContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitConstant_num_and_str(this);
}

SQLParser::Constant_num_and_strContext *SQLParser::constant_num_and_str() {
  Constant_num_and_strContext *_localctx =
      _tracker.createInstance<Constant_num_and_strContext>(_ctx, getState());
  enterRule(_localctx, 26, SQLParser::RuleConstant_num_and_str);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(196);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::INTEGER:
      case SQLParser::FLOAT: {
        enterOuterAlt(_localctx, 1);
        setState(194);
        numeric();
        break;
      }

      case SQLParser::SQUOTA_STRING:
      case SQLParser::DQUOTA_STRING: {
        enterOuterAlt(_localctx, 2);
        setState(195);
        quoted_string();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- MatrixContext
//------------------------------------------------------------------

SQLParser::MatrixContext::MatrixContext(ParserRuleContext *parent_ctx,
                                        size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::MatrixContext::LMP() {
  return getToken(SQLParser::LMP, 0);
}

std::vector<tree::TerminalNode *> SQLParser::MatrixContext::VECTOR() {
  return getTokens(SQLParser::VECTOR);
}

tree::TerminalNode *SQLParser::MatrixContext::VECTOR(size_t i) {
  return getToken(SQLParser::VECTOR, i);
}

tree::TerminalNode *SQLParser::MatrixContext::RMP() {
  return getToken(SQLParser::RMP, 0);
}

std::vector<tree::TerminalNode *> SQLParser::MatrixContext::COMMA() {
  return getTokens(SQLParser::COMMA);
}

tree::TerminalNode *SQLParser::MatrixContext::COMMA(size_t i) {
  return getToken(SQLParser::COMMA, i);
}


size_t SQLParser::MatrixContext::getRuleIndex() const {
  return SQLParser::RuleMatrix;
}

void SQLParser::MatrixContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterMatrix(this);
}

void SQLParser::MatrixContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitMatrix(this);
}

SQLParser::MatrixContext *SQLParser::matrix() {
  MatrixContext *_localctx =
      _tracker.createInstance<MatrixContext>(_ctx, getState());
  enterRule(_localctx, 28, SQLParser::RuleMatrix);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(198);
    match(SQLParser::LMP);
    setState(199);
    match(SQLParser::VECTOR);
    setState(204);
    _errHandler->sync(this);
    _la = _input->LA(1);
    while (_la == SQLParser::COMMA) {
      setState(200);
      match(SQLParser::COMMA);
      setState(201);
      match(SQLParser::VECTOR);
      setState(206);
      _errHandler->sync(this);
      _la = _input->LA(1);
    }
    setState(207);
    match(SQLParser::RMP);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Vector_exprContext
//------------------------------------------------------------------

SQLParser::Vector_exprContext::Vector_exprContext(ParserRuleContext *parent_ctx,
                                                  size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Vector_exprContext::VECTOR() {
  return getToken(SQLParser::VECTOR, 0);
}

SQLParser::MatrixContext *SQLParser::Vector_exprContext::matrix() {
  return getRuleContext<SQLParser::MatrixContext>(0);
}


size_t SQLParser::Vector_exprContext::getRuleIndex() const {
  return SQLParser::RuleVector_expr;
}

void SQLParser::Vector_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterVector_expr(this);
}

void SQLParser::Vector_exprContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitVector_expr(this);
}

SQLParser::Vector_exprContext *SQLParser::vector_expr() {
  Vector_exprContext *_localctx =
      _tracker.createInstance<Vector_exprContext>(_ctx, getState());
  enterRule(_localctx, 30, SQLParser::RuleVector_expr);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(211);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::VECTOR: {
        enterOuterAlt(_localctx, 1);
        setState(209);
        match(SQLParser::VECTOR);
        break;
      }

      case SQLParser::LMP: {
        enterOuterAlt(_localctx, 2);
        setState(210);
        matrix();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Function_value_exprContext
//------------------------------------------------------------------

SQLParser::Function_value_exprContext::Function_value_exprContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Value_exprContext *
SQLParser::Function_value_exprContext::value_expr() {
  return getRuleContext<SQLParser::Value_exprContext>(0);
}

SQLParser::IdentifierContext *
SQLParser::Function_value_exprContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}


size_t SQLParser::Function_value_exprContext::getRuleIndex() const {
  return SQLParser::RuleFunction_value_expr;
}

void SQLParser::Function_value_exprContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterFunction_value_expr(this);
}

void SQLParser::Function_value_exprContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitFunction_value_expr(this);
}

SQLParser::Function_value_exprContext *SQLParser::function_value_expr() {
  Function_value_exprContext *_localctx =
      _tracker.createInstance<Function_value_exprContext>(_ctx, getState());
  enterRule(_localctx, 32, SQLParser::RuleFunction_value_expr);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(215);
    _errHandler->sync(this);
    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
        _input, 19, _ctx)) {
      case 1: {
        enterOuterAlt(_localctx, 1);
        setState(213);
        value_expr();
        break;
      }

      case 2: {
        enterOuterAlt(_localctx, 2);
        setState(214);
        identifier();
        break;
      }
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Function_callContext
//------------------------------------------------------------------

SQLParser::Function_callContext::Function_callContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Function_callContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}

tree::TerminalNode *SQLParser::Function_callContext::LP() {
  return getToken(SQLParser::LP, 0);
}

tree::TerminalNode *SQLParser::Function_callContext::RP() {
  return getToken(SQLParser::RP, 0);
}

std::vector<SQLParser::Function_value_exprContext *>
SQLParser::Function_callContext::function_value_expr() {
  return getRuleContexts<SQLParser::Function_value_exprContext>();
}

SQLParser::Function_value_exprContext *
SQLParser::Function_callContext::function_value_expr(size_t i) {
  return getRuleContext<SQLParser::Function_value_exprContext>(i);
}

std::vector<tree::TerminalNode *> SQLParser::Function_callContext::COMMA() {
  return getTokens(SQLParser::COMMA);
}

tree::TerminalNode *SQLParser::Function_callContext::COMMA(size_t i) {
  return getToken(SQLParser::COMMA, i);
}


size_t SQLParser::Function_callContext::getRuleIndex() const {
  return SQLParser::RuleFunction_call;
}

void SQLParser::Function_callContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterFunction_call(this);
}

void SQLParser::Function_callContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitFunction_call(this);
}

SQLParser::Function_callContext *SQLParser::function_call() {
  Function_callContext *_localctx =
      _tracker.createInstance<Function_callContext>(_ctx, getState());
  enterRule(_localctx, 34, SQLParser::RuleFunction_call);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(217);
    identifier();
    setState(218);
    match(SQLParser::LP);
    setState(227);
    _errHandler->sync(this);

    _la = _input->LA(1);
    if ((((_la & ~0x3fULL) == 0) &&
         ((1ULL << _la) &
          ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |
           (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |
           (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |
           (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |
           (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |
           (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |
           (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |
           (1ULL << SQLParser::TRUE_V) | (1ULL << SQLParser::FALSE_V) |
           (1ULL << SQLParser::INTEGER) | (1ULL << SQLParser::FLOAT) |
           (1ULL << SQLParser::SQUOTA_STRING) |
           (1ULL << SQLParser::DQUOTA_STRING) | (1ULL << SQLParser::LMP) |
           (1ULL << SQLParser::VECTOR) | (1ULL << SQLParser::REGULAR_ID))) !=
             0)) {
      setState(219);
      function_value_expr();
      setState(224);
      _errHandler->sync(this);
      _la = _input->LA(1);
      while (_la == SQLParser::COMMA) {
        setState(220);
        match(SQLParser::COMMA);
        setState(221);
        function_value_expr();
        setState(226);
        _errHandler->sync(this);
        _la = _input->LA(1);
      }
    }
    setState(229);
    match(SQLParser::RP);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Dql_statementContext
//------------------------------------------------------------------

SQLParser::Dql_statementContext::Dql_statementContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Select_statementContext *
SQLParser::Dql_statementContext::select_statement() {
  return getRuleContext<SQLParser::Select_statementContext>(0);
}


size_t SQLParser::Dql_statementContext::getRuleIndex() const {
  return SQLParser::RuleDql_statement;
}

void SQLParser::Dql_statementContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterDql_statement(this);
}

void SQLParser::Dql_statementContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitDql_statement(this);
}

SQLParser::Dql_statementContext *SQLParser::dql_statement() {
  Dql_statementContext *_localctx =
      _tracker.createInstance<Dql_statementContext>(_ctx, getState());
  enterRule(_localctx, 36, SQLParser::RuleDql_statement);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(231);
    select_statement();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Select_statementContext
//------------------------------------------------------------------

SQLParser::Select_statementContext::Select_statementContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Select_statementContext::SELECT() {
  return getToken(SQLParser::SELECT, 0);
}

SQLParser::Selected_elementsContext *
SQLParser::Select_statementContext::selected_elements() {
  return getRuleContext<SQLParser::Selected_elementsContext>(0);
}

SQLParser::From_clauseContext *
SQLParser::Select_statementContext::from_clause() {
  return getRuleContext<SQLParser::From_clauseContext>(0);
}

SQLParser::Where_clauseContext *
SQLParser::Select_statementContext::where_clause() {
  return getRuleContext<SQLParser::Where_clauseContext>(0);
}

SQLParser::Order_by_clauseContext *
SQLParser::Select_statementContext::order_by_clause() {
  return getRuleContext<SQLParser::Order_by_clauseContext>(0);
}

SQLParser::Limit_clauseContext *
SQLParser::Select_statementContext::limit_clause() {
  return getRuleContext<SQLParser::Limit_clauseContext>(0);
}


size_t SQLParser::Select_statementContext::getRuleIndex() const {
  return SQLParser::RuleSelect_statement;
}

void SQLParser::Select_statementContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterSelect_statement(this);
}

void SQLParser::Select_statementContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitSelect_statement(this);
}

SQLParser::Select_statementContext *SQLParser::select_statement() {
  Select_statementContext *_localctx =
      _tracker.createInstance<Select_statementContext>(_ctx, getState());
  enterRule(_localctx, 38, SQLParser::RuleSelect_statement);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(233);
    match(SQLParser::SELECT);
    setState(234);
    selected_elements();
    setState(235);
    from_clause();
    setState(237);
    _errHandler->sync(this);

    _la = _input->LA(1);
    if (_la == SQLParser::WHERE) {
      setState(236);
      where_clause();
    }
    setState(240);
    _errHandler->sync(this);

    _la = _input->LA(1);
    if (_la == SQLParser::ORDER) {
      setState(239);
      order_by_clause();
    }
    setState(243);
    _errHandler->sync(this);

    _la = _input->LA(1);
    if (_la == SQLParser::LIMIT) {
      setState(242);
      limit_clause();
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Selected_elementsContext
//------------------------------------------------------------------

SQLParser::Selected_elementsContext::Selected_elementsContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

std::vector<SQLParser::Selected_elementContext *>
SQLParser::Selected_elementsContext::selected_element() {
  return getRuleContexts<SQLParser::Selected_elementContext>();
}

SQLParser::Selected_elementContext *
SQLParser::Selected_elementsContext::selected_element(size_t i) {
  return getRuleContext<SQLParser::Selected_elementContext>(i);
}

std::vector<tree::TerminalNode *> SQLParser::Selected_elementsContext::COMMA() {
  return getTokens(SQLParser::COMMA);
}

tree::TerminalNode *SQLParser::Selected_elementsContext::COMMA(size_t i) {
  return getToken(SQLParser::COMMA, i);
}


size_t SQLParser::Selected_elementsContext::getRuleIndex() const {
  return SQLParser::RuleSelected_elements;
}

void SQLParser::Selected_elementsContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterSelected_elements(this);
}

void SQLParser::Selected_elementsContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitSelected_elements(this);
}

SQLParser::Selected_elementsContext *SQLParser::selected_elements() {
  Selected_elementsContext *_localctx =
      _tracker.createInstance<Selected_elementsContext>(_ctx, getState());
  enterRule(_localctx, 40, SQLParser::RuleSelected_elements);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(245);
    selected_element();
    setState(250);
    _errHandler->sync(this);
    _la = _input->LA(1);
    while (_la == SQLParser::COMMA) {
      setState(246);
      match(SQLParser::COMMA);
      setState(247);
      selected_element();
      setState(252);
      _errHandler->sync(this);
      _la = _input->LA(1);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Selected_elementContext
//------------------------------------------------------------------

SQLParser::Selected_elementContext::Selected_elementContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Selected_elementContext::ASTERISK() {
  return getToken(SQLParser::ASTERISK, 0);
}

SQLParser::Field_nameContext *SQLParser::Selected_elementContext::field_name() {
  return getRuleContext<SQLParser::Field_nameContext>(0);
}

tree::TerminalNode *SQLParser::Selected_elementContext::AS() {
  return getToken(SQLParser::AS, 0);
}

SQLParser::Field_aliasContext *
SQLParser::Selected_elementContext::field_alias() {
  return getRuleContext<SQLParser::Field_aliasContext>(0);
}


size_t SQLParser::Selected_elementContext::getRuleIndex() const {
  return SQLParser::RuleSelected_element;
}

void SQLParser::Selected_elementContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterSelected_element(this);
}

void SQLParser::Selected_elementContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitSelected_element(this);
}

SQLParser::Selected_elementContext *SQLParser::selected_element() {
  Selected_elementContext *_localctx =
      _tracker.createInstance<Selected_elementContext>(_ctx, getState());
  enterRule(_localctx, 42, SQLParser::RuleSelected_element);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(261);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::ASTERISK: {
        enterOuterAlt(_localctx, 1);
        setState(253);
        match(SQLParser::ASTERISK);
        break;
      }

      case SQLParser::OR:
      case SQLParser::AND:
      case SQLParser::NOT:
      case SQLParser::IN:
      case SQLParser::BETWEEN:
      case SQLParser::LIKE:
      case SQLParser::WHERE:
      case SQLParser::SELECT:
      case SQLParser::AS:
      case SQLParser::BY:
      case SQLParser::ORDER:
      case SQLParser::ASC:
      case SQLParser::DESC:
      case SQLParser::LIMIT:
      case SQLParser::REGULAR_ID: {
        enterOuterAlt(_localctx, 2);
        setState(254);
        field_name();
        setState(256);
        _errHandler->sync(this);

        switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
            _input, 26, _ctx)) {
          case 1: {
            setState(255);
            match(SQLParser::AS);
            break;
          }
        }
        setState(259);
        _errHandler->sync(this);

        _la = _input->LA(1);
        if ((((_la & ~0x3fULL) == 0) &&
             ((1ULL << _la) &
              ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |
               (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |
               (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |
               (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |
               (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |
               (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |
               (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |
               (1ULL << SQLParser::REGULAR_ID))) != 0)) {
          setState(258);
          field_alias();
        }
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- From_clauseContext
//------------------------------------------------------------------

SQLParser::From_clauseContext::From_clauseContext(ParserRuleContext *parent_ctx,
                                                  size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::From_clauseContext::FROM() {
  return getToken(SQLParser::FROM, 0);
}

SQLParser::Tableview_nameContext *
SQLParser::From_clauseContext::tableview_name() {
  return getRuleContext<SQLParser::Tableview_nameContext>(0);
}


size_t SQLParser::From_clauseContext::getRuleIndex() const {
  return SQLParser::RuleFrom_clause;
}

void SQLParser::From_clauseContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterFrom_clause(this);
}

void SQLParser::From_clauseContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitFrom_clause(this);
}

SQLParser::From_clauseContext *SQLParser::from_clause() {
  From_clauseContext *_localctx =
      _tracker.createInstance<From_clauseContext>(_ctx, getState());
  enterRule(_localctx, 44, SQLParser::RuleFrom_clause);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(263);
    match(SQLParser::FROM);
    setState(264);
    tableview_name();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Order_by_clauseContext
//------------------------------------------------------------------

SQLParser::Order_by_clauseContext::Order_by_clauseContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Order_by_clauseContext::ORDER() {
  return getToken(SQLParser::ORDER, 0);
}

tree::TerminalNode *SQLParser::Order_by_clauseContext::BY() {
  return getToken(SQLParser::BY, 0);
}

std::vector<SQLParser::Order_by_elementContext *>
SQLParser::Order_by_clauseContext::order_by_element() {
  return getRuleContexts<SQLParser::Order_by_elementContext>();
}

SQLParser::Order_by_elementContext *
SQLParser::Order_by_clauseContext::order_by_element(size_t i) {
  return getRuleContext<SQLParser::Order_by_elementContext>(i);
}

std::vector<tree::TerminalNode *> SQLParser::Order_by_clauseContext::COMMA() {
  return getTokens(SQLParser::COMMA);
}

tree::TerminalNode *SQLParser::Order_by_clauseContext::COMMA(size_t i) {
  return getToken(SQLParser::COMMA, i);
}


size_t SQLParser::Order_by_clauseContext::getRuleIndex() const {
  return SQLParser::RuleOrder_by_clause;
}

void SQLParser::Order_by_clauseContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterOrder_by_clause(this);
}

void SQLParser::Order_by_clauseContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitOrder_by_clause(this);
}

SQLParser::Order_by_clauseContext *SQLParser::order_by_clause() {
  Order_by_clauseContext *_localctx =
      _tracker.createInstance<Order_by_clauseContext>(_ctx, getState());
  enterRule(_localctx, 46, SQLParser::RuleOrder_by_clause);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(266);
    match(SQLParser::ORDER);
    setState(267);
    match(SQLParser::BY);
    setState(268);
    order_by_element();
    setState(273);
    _errHandler->sync(this);
    _la = _input->LA(1);
    while (_la == SQLParser::COMMA) {
      setState(269);
      match(SQLParser::COMMA);
      setState(270);
      order_by_element();
      setState(275);
      _errHandler->sync(this);
      _la = _input->LA(1);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Order_by_elementContext
//------------------------------------------------------------------

SQLParser::Order_by_elementContext::Order_by_elementContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Field_nameContext *SQLParser::Order_by_elementContext::field_name() {
  return getRuleContext<SQLParser::Field_nameContext>(0);
}

tree::TerminalNode *SQLParser::Order_by_elementContext::ASC() {
  return getToken(SQLParser::ASC, 0);
}

tree::TerminalNode *SQLParser::Order_by_elementContext::DESC() {
  return getToken(SQLParser::DESC, 0);
}


size_t SQLParser::Order_by_elementContext::getRuleIndex() const {
  return SQLParser::RuleOrder_by_element;
}

void SQLParser::Order_by_elementContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterOrder_by_element(this);
}

void SQLParser::Order_by_elementContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitOrder_by_element(this);
}

SQLParser::Order_by_elementContext *SQLParser::order_by_element() {
  Order_by_elementContext *_localctx =
      _tracker.createInstance<Order_by_elementContext>(_ctx, getState());
  enterRule(_localctx, 48, SQLParser::RuleOrder_by_element);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(276);
    field_name();
    setState(278);
    _errHandler->sync(this);

    _la = _input->LA(1);
    if (_la == SQLParser::ASC

        || _la == SQLParser::DESC) {
      setState(277);
      _la = _input->LA(1);
      if (!(_la == SQLParser::ASC

            || _la == SQLParser::DESC)) {
        _errHandler->recoverInline(this);
      } else {
        _errHandler->reportMatch(this);
        consume();
      }
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Limit_clauseContext
//------------------------------------------------------------------

SQLParser::Limit_clauseContext::Limit_clauseContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Limit_clauseContext::LIMIT() {
  return getToken(SQLParser::LIMIT, 0);
}

SQLParser::Int_valueContext *SQLParser::Limit_clauseContext::int_value() {
  return getRuleContext<SQLParser::Int_valueContext>(0);
}


size_t SQLParser::Limit_clauseContext::getRuleIndex() const {
  return SQLParser::RuleLimit_clause;
}

void SQLParser::Limit_clauseContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterLimit_clause(this);
}

void SQLParser::Limit_clauseContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitLimit_clause(this);
}

SQLParser::Limit_clauseContext *SQLParser::limit_clause() {
  Limit_clauseContext *_localctx =
      _tracker.createInstance<Limit_clauseContext>(_ctx, getState());
  enterRule(_localctx, 50, SQLParser::RuleLimit_clause);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(280);
    match(SQLParser::LIMIT);
    setState(281);
    int_value();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Tableview_nameContext
//------------------------------------------------------------------

SQLParser::Tableview_nameContext::Tableview_nameContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Tableview_nameContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}


size_t SQLParser::Tableview_nameContext::getRuleIndex() const {
  return SQLParser::RuleTableview_name;
}

void SQLParser::Tableview_nameContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterTableview_name(this);
}

void SQLParser::Tableview_nameContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitTableview_name(this);
}

SQLParser::Tableview_nameContext *SQLParser::tableview_name() {
  Tableview_nameContext *_localctx =
      _tracker.createInstance<Tableview_nameContext>(_ctx, getState());
  enterRule(_localctx, 52, SQLParser::RuleTableview_name);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(283);
    identifier();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Field_nameContext
//------------------------------------------------------------------

SQLParser::Field_nameContext::Field_nameContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Field_nameContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}


size_t SQLParser::Field_nameContext::getRuleIndex() const {
  return SQLParser::RuleField_name;
}

void SQLParser::Field_nameContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterField_name(this);
}

void SQLParser::Field_nameContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitField_name(this);
}

SQLParser::Field_nameContext *SQLParser::field_name() {
  Field_nameContext *_localctx =
      _tracker.createInstance<Field_nameContext>(_ctx, getState());
  enterRule(_localctx, 54, SQLParser::RuleField_name);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(285);
    identifier();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Table_aliasContext
//------------------------------------------------------------------

SQLParser::Table_aliasContext::Table_aliasContext(ParserRuleContext *parent_ctx,
                                                  size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Table_aliasContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}


size_t SQLParser::Table_aliasContext::getRuleIndex() const {
  return SQLParser::RuleTable_alias;
}

void SQLParser::Table_aliasContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterTable_alias(this);
}

void SQLParser::Table_aliasContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitTable_alias(this);
}

SQLParser::Table_aliasContext *SQLParser::table_alias() {
  Table_aliasContext *_localctx =
      _tracker.createInstance<Table_aliasContext>(_ctx, getState());
  enterRule(_localctx, 56, SQLParser::RuleTable_alias);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(287);
    identifier();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Field_aliasContext
//------------------------------------------------------------------

SQLParser::Field_aliasContext::Field_aliasContext(ParserRuleContext *parent_ctx,
                                                  size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::IdentifierContext *SQLParser::Field_aliasContext::identifier() {
  return getRuleContext<SQLParser::IdentifierContext>(0);
}

tree::TerminalNode *SQLParser::Field_aliasContext::AS() {
  return getToken(SQLParser::AS, 0);
}


size_t SQLParser::Field_aliasContext::getRuleIndex() const {
  return SQLParser::RuleField_alias;
}

void SQLParser::Field_aliasContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterField_alias(this);
}

void SQLParser::Field_aliasContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitField_alias(this);
}

SQLParser::Field_aliasContext *SQLParser::field_alias() {
  Field_aliasContext *_localctx =
      _tracker.createInstance<Field_aliasContext>(_ctx, getState());
  enterRule(_localctx, 58, SQLParser::RuleField_alias);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(290);
    _errHandler->sync(this);

    switch (getInterpreter<atn::ParserATNSimulator>()->adaptivePredict(
        _input, 31, _ctx)) {
      case 1: {
        setState(289);
        match(SQLParser::AS);
        break;
      }
    }
    setState(292);
    identifier();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- NumericContext
//------------------------------------------------------------------

SQLParser::NumericContext::NumericContext(ParserRuleContext *parent_ctx,
                                          size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Int_valueContext *SQLParser::NumericContext::int_value() {
  return getRuleContext<SQLParser::Int_valueContext>(0);
}

SQLParser::Float_valueContext *SQLParser::NumericContext::float_value() {
  return getRuleContext<SQLParser::Float_valueContext>(0);
}


size_t SQLParser::NumericContext::getRuleIndex() const {
  return SQLParser::RuleNumeric;
}

void SQLParser::NumericContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterNumeric(this);
}

void SQLParser::NumericContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitNumeric(this);
}

SQLParser::NumericContext *SQLParser::numeric() {
  NumericContext *_localctx =
      _tracker.createInstance<NumericContext>(_ctx, getState());
  enterRule(_localctx, 60, SQLParser::RuleNumeric);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(296);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::INTEGER: {
        enterOuterAlt(_localctx, 1);
        setState(294);
        int_value();
        break;
      }

      case SQLParser::FLOAT: {
        enterOuterAlt(_localctx, 2);
        setState(295);
        float_value();
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Int_valueContext
//------------------------------------------------------------------

SQLParser::Int_valueContext::Int_valueContext(ParserRuleContext *parent_ctx,
                                              size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Int_valueContext::INTEGER() {
  return getToken(SQLParser::INTEGER, 0);
}


size_t SQLParser::Int_valueContext::getRuleIndex() const {
  return SQLParser::RuleInt_value;
}

void SQLParser::Int_valueContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterInt_value(this);
}

void SQLParser::Int_valueContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitInt_value(this);
}

SQLParser::Int_valueContext *SQLParser::int_value() {
  Int_valueContext *_localctx =
      _tracker.createInstance<Int_valueContext>(_ctx, getState());
  enterRule(_localctx, 62, SQLParser::RuleInt_value);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(298);
    match(SQLParser::INTEGER);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Float_valueContext
//------------------------------------------------------------------

SQLParser::Float_valueContext::Float_valueContext(ParserRuleContext *parent_ctx,
                                                  size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Float_valueContext::FLOAT() {
  return getToken(SQLParser::FLOAT, 0);
}


size_t SQLParser::Float_valueContext::getRuleIndex() const {
  return SQLParser::RuleFloat_value;
}

void SQLParser::Float_valueContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterFloat_value(this);
}

void SQLParser::Float_valueContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitFloat_value(this);
}

SQLParser::Float_valueContext *SQLParser::float_value() {
  Float_valueContext *_localctx =
      _tracker.createInstance<Float_valueContext>(_ctx, getState());
  enterRule(_localctx, 64, SQLParser::RuleFloat_value);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(300);
    match(SQLParser::FLOAT);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Quoted_stringContext
//------------------------------------------------------------------

SQLParser::Quoted_stringContext::Quoted_stringContext(
    ParserRuleContext *parent_ctx, size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Quoted_stringContext::SQUOTA_STRING() {
  return getToken(SQLParser::SQUOTA_STRING, 0);
}

tree::TerminalNode *SQLParser::Quoted_stringContext::DQUOTA_STRING() {
  return getToken(SQLParser::DQUOTA_STRING, 0);
}


size_t SQLParser::Quoted_stringContext::getRuleIndex() const {
  return SQLParser::RuleQuoted_string;
}

void SQLParser::Quoted_stringContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterQuoted_string(this);
}

void SQLParser::Quoted_stringContext::exitRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitQuoted_string(this);
}

SQLParser::Quoted_stringContext *SQLParser::quoted_string() {
  Quoted_stringContext *_localctx =
      _tracker.createInstance<Quoted_stringContext>(_ctx, getState());
  enterRule(_localctx, 66, SQLParser::RuleQuoted_string);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(302);
    _la = _input->LA(1);
    if (!(_la == SQLParser::SQUOTA_STRING

          || _la == SQLParser::DQUOTA_STRING)) {
      _errHandler->recoverInline(this);
    } else {
      _errHandler->reportMatch(this);
      consume();
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Bool_valueContext
//------------------------------------------------------------------

SQLParser::Bool_valueContext::Bool_valueContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Bool_valueContext::TRUE_V() {
  return getToken(SQLParser::TRUE_V, 0);
}

tree::TerminalNode *SQLParser::Bool_valueContext::FALSE_V() {
  return getToken(SQLParser::FALSE_V, 0);
}


size_t SQLParser::Bool_valueContext::getRuleIndex() const {
  return SQLParser::RuleBool_value;
}

void SQLParser::Bool_valueContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterBool_value(this);
}

void SQLParser::Bool_valueContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitBool_value(this);
}

SQLParser::Bool_valueContext *SQLParser::bool_value() {
  Bool_valueContext *_localctx =
      _tracker.createInstance<Bool_valueContext>(_ctx, getState());
  enterRule(_localctx, 68, SQLParser::RuleBool_value);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(304);
    _la = _input->LA(1);
    if (!(_la == SQLParser::TRUE_V

          || _la == SQLParser::FALSE_V)) {
      _errHandler->recoverInline(this);
    } else {
      _errHandler->reportMatch(this);
      consume();
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- IdentifierContext
//------------------------------------------------------------------

SQLParser::IdentifierContext::IdentifierContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

SQLParser::Regular_idContext *SQLParser::IdentifierContext::regular_id() {
  return getRuleContext<SQLParser::Regular_idContext>(0);
}


size_t SQLParser::IdentifierContext::getRuleIndex() const {
  return SQLParser::RuleIdentifier;
}

void SQLParser::IdentifierContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterIdentifier(this);
}

void SQLParser::IdentifierContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitIdentifier(this);
}

SQLParser::IdentifierContext *SQLParser::identifier() {
  IdentifierContext *_localctx =
      _tracker.createInstance<IdentifierContext>(_ctx, getState());
  enterRule(_localctx, 70, SQLParser::RuleIdentifier);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(306);
    regular_id();

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Ne_opContext
//------------------------------------------------------------------

SQLParser::Ne_opContext::Ne_opContext(ParserRuleContext *parent_ctx,
                                      size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Ne_opContext::NE_OP() {
  return getToken(SQLParser::NE_OP, 0);
}


size_t SQLParser::Ne_opContext::getRuleIndex() const {
  return SQLParser::RuleNe_op;
}

void SQLParser::Ne_opContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterNe_op(this);
}

void SQLParser::Ne_opContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitNe_op(this);
}

SQLParser::Ne_opContext *SQLParser::ne_op() {
  Ne_opContext *_localctx =
      _tracker.createInstance<Ne_opContext>(_ctx, getState());
  enterRule(_localctx, 72, SQLParser::RuleNe_op);

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(308);
    match(SQLParser::NE_OP);

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Ge_opContext
//------------------------------------------------------------------

SQLParser::Ge_opContext::Ge_opContext(ParserRuleContext *parent_ctx,
                                      size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Ge_opContext::GE_OP() {
  return getToken(SQLParser::GE_OP, 0);
}

tree::TerminalNode *SQLParser::Ge_opContext::G_OP() {
  return getToken(SQLParser::G_OP, 0);
}

tree::TerminalNode *SQLParser::Ge_opContext::E_OP() {
  return getToken(SQLParser::E_OP, 0);
}


size_t SQLParser::Ge_opContext::getRuleIndex() const {
  return SQLParser::RuleGe_op;
}

void SQLParser::Ge_opContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterGe_op(this);
}

void SQLParser::Ge_opContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitGe_op(this);
}

SQLParser::Ge_opContext *SQLParser::ge_op() {
  Ge_opContext *_localctx =
      _tracker.createInstance<Ge_opContext>(_ctx, getState());
  enterRule(_localctx, 74, SQLParser::RuleGe_op);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(313);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::GE_OP: {
        enterOuterAlt(_localctx, 1);
        setState(310);
        match(SQLParser::GE_OP);
        break;
      }

      case SQLParser::G_OP: {
        enterOuterAlt(_localctx, 2);
        setState(311);
        match(SQLParser::G_OP);
        setState(312);
        match(SQLParser::E_OP);
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Le_opContext
//------------------------------------------------------------------

SQLParser::Le_opContext::Le_opContext(ParserRuleContext *parent_ctx,
                                      size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Le_opContext::LE_OP() {
  return getToken(SQLParser::LE_OP, 0);
}

tree::TerminalNode *SQLParser::Le_opContext::L_OP() {
  return getToken(SQLParser::L_OP, 0);
}

tree::TerminalNode *SQLParser::Le_opContext::E_OP() {
  return getToken(SQLParser::E_OP, 0);
}


size_t SQLParser::Le_opContext::getRuleIndex() const {
  return SQLParser::RuleLe_op;
}

void SQLParser::Le_opContext::enterRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterLe_op(this);
}

void SQLParser::Le_opContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitLe_op(this);
}

SQLParser::Le_opContext *SQLParser::le_op() {
  Le_opContext *_localctx =
      _tracker.createInstance<Le_opContext>(_ctx, getState());
  enterRule(_localctx, 76, SQLParser::RuleLe_op);

  auto onExit = finally([=] { exitRule(); });
  try {
    setState(318);
    _errHandler->sync(this);
    switch (_input->LA(1)) {
      case SQLParser::LE_OP: {
        enterOuterAlt(_localctx, 1);
        setState(315);
        match(SQLParser::LE_OP);
        break;
      }

      case SQLParser::L_OP: {
        enterOuterAlt(_localctx, 2);
        setState(316);
        match(SQLParser::L_OP);
        setState(317);
        match(SQLParser::E_OP);
        break;
      }

      default:
        throw NoViableAltException(this);
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

//----------------- Regular_idContext
//------------------------------------------------------------------

SQLParser::Regular_idContext::Regular_idContext(ParserRuleContext *parent_ctx,
                                                size_t invoking_state)
    : ParserRuleContext(parent_ctx, invoking_state) {}

tree::TerminalNode *SQLParser::Regular_idContext::REGULAR_ID() {
  return getToken(SQLParser::REGULAR_ID, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::OR() {
  return getToken(SQLParser::OR, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::AND() {
  return getToken(SQLParser::AND, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::NOT() {
  return getToken(SQLParser::NOT, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::IN() {
  return getToken(SQLParser::IN, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::BETWEEN() {
  return getToken(SQLParser::BETWEEN, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::LIKE() {
  return getToken(SQLParser::LIKE, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::WHERE() {
  return getToken(SQLParser::WHERE, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::SELECT() {
  return getToken(SQLParser::SELECT, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::AS() {
  return getToken(SQLParser::AS, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::BY() {
  return getToken(SQLParser::BY, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::ORDER() {
  return getToken(SQLParser::ORDER, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::ASC() {
  return getToken(SQLParser::ASC, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::DESC() {
  return getToken(SQLParser::DESC, 0);
}

tree::TerminalNode *SQLParser::Regular_idContext::LIMIT() {
  return getToken(SQLParser::LIMIT, 0);
}


size_t SQLParser::Regular_idContext::getRuleIndex() const {
  return SQLParser::RuleRegular_id;
}

void SQLParser::Regular_idContext::enterRule(
    tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->enterRegular_id(this);
}

void SQLParser::Regular_idContext::exitRule(tree::ParseTreeListener *listener) {
  auto parserListener = dynamic_cast<SQLParserListener *>(listener);
  if (parserListener != nullptr) parserListener->exitRegular_id(this);
}

SQLParser::Regular_idContext *SQLParser::regular_id() {
  Regular_idContext *_localctx =
      _tracker.createInstance<Regular_idContext>(_ctx, getState());
  enterRule(_localctx, 78, SQLParser::RuleRegular_id);
  size_t _la = 0;

  auto onExit = finally([=] { exitRule(); });
  try {
    enterOuterAlt(_localctx, 1);
    setState(320);
    _la = _input->LA(1);
    if (!((((_la & ~0x3fULL) == 0) &&
           ((1ULL << _la) &
            ((1ULL << SQLParser::OR) | (1ULL << SQLParser::AND) |
             (1ULL << SQLParser::NOT) | (1ULL << SQLParser::IN) |
             (1ULL << SQLParser::BETWEEN) | (1ULL << SQLParser::LIKE) |
             (1ULL << SQLParser::WHERE) | (1ULL << SQLParser::SELECT) |
             (1ULL << SQLParser::AS) | (1ULL << SQLParser::BY) |
             (1ULL << SQLParser::ORDER) | (1ULL << SQLParser::ASC) |
             (1ULL << SQLParser::DESC) | (1ULL << SQLParser::LIMIT) |
             (1ULL << SQLParser::REGULAR_ID))) != 0))) {
      _errHandler->recoverInline(this);
    } else {
      _errHandler->reportMatch(this);
      consume();
    }

  } catch (RecognitionException &e) {
    _errHandler->reportError(this, e);
    _localctx->exception = std::current_exception();
    _errHandler->recover(this, _localctx->exception);
  }

  return _localctx;
}

bool SQLParser::sempred(RuleContext *context, size_t ruleIndex,
                        size_t predicateIndex) {
  switch (ruleIndex) {
    case 5:
      return logic_exprSempred(dynamic_cast<Logic_exprContext *>(context),
                               predicateIndex);

    default:
      break;
  }
  return true;
}

bool SQLParser::logic_exprSempred(Logic_exprContext * /*_localctx*/,
                                  size_t predicateIndex) {
  switch (predicateIndex) {
    case 0:
      return precpred(_ctx, 3);
    case 1:
      return precpred(_ctx, 2);

    default:
      break;
  }
  return true;
}

// Static vars and initialization.
std::vector<dfa::DFA> SQLParser::_decisionToDFA;
atn::PredictionContextCache SQLParser::_sharedContextCache;

// We own the ATN which in turn owns the ATN states.
atn::ATN SQLParser::_atn;
std::vector<uint16_t> SQLParser::_serializedATN;

std::vector<std::string> SQLParser::_ruleNames = {"swallow_to_semi",
                                                  "compilation_unit",
                                                  "logic_expr_unit",
                                                  "unit_statement",
                                                  "where_clause",
                                                  "logic_expr",
                                                  "enclosed_expr",
                                                  "relation_expr",
                                                  "rel_oper",
                                                  "value_expr",
                                                  "in_value_expr_list",
                                                  "in_value_expr",
                                                  "constant",
                                                  "constant_num_and_str",
                                                  "matrix",
                                                  "vector_expr",
                                                  "function_value_expr",
                                                  "function_call",
                                                  "dql_statement",
                                                  "select_statement",
                                                  "selected_elements",
                                                  "selected_element",
                                                  "from_clause",
                                                  "order_by_clause",
                                                  "order_by_element",
                                                  "limit_clause",
                                                  "tableview_name",
                                                  "field_name",
                                                  "table_alias",
                                                  "field_alias",
                                                  "numeric",
                                                  "int_value",
                                                  "float_value",
                                                  "quoted_string",
                                                  "bool_value",
                                                  "identifier",
                                                  "ne_op",
                                                  "ge_op",
                                                  "le_op",
                                                  "regular_id"};

std::vector<std::string> SQLParser::_literalNames = {"",
                                                     "'OR'",
                                                     "'AND'",
                                                     "'NOT'",
                                                     "'IN'",
                                                     "'CONTAIN_ALL'",
                                                     "'CONTAIN_ANY'",
                                                     "'BETWEEN'",
                                                     "'LIKE'",
                                                     "'WHERE'",
                                                     "'SELECT'",
                                                     "'FROM'",
                                                     "'AS'",
                                                     "'BY'",
                                                     "'ORDER'",
                                                     "'ASC'",
                                                     "'DESC'",
                                                     "'LIMIT'",
                                                     "'TRUE'",
                                                     "'FALSE'",
                                                     "'IS'",
                                                     "'NULL'",
                                                     "",
                                                     "",
                                                     "",
                                                     "",
                                                     "'.'",
                                                     "'('",
                                                     "')'",
                                                     "'['",
                                                     "']'",
                                                     "'*'",
                                                     "'+'",
                                                     "'-'",
                                                     "','",
                                                     "'/'",
                                                     "'%'",
                                                     "'@'",
                                                     "':='",
                                                     "'#'",
                                                     "':'",
                                                     "';'",
                                                     "'<='",
                                                     "'>='",
                                                     "'!='",
                                                     "'^'",
                                                     "'~'",
                                                     "'<'",
                                                     "'>'",
                                                     "'='",
                                                     "'||'",
                                                     "'_'"};

std::vector<std::string> SQLParser::_symbolicNames = {"",
                                                      "OR",
                                                      "AND",
                                                      "NOT",
                                                      "IN",
                                                      "CONTAIN_ALL",
                                                      "CONTAIN_ANY",
                                                      "BETWEEN",
                                                      "LIKE",
                                                      "WHERE",
                                                      "SELECT",
                                                      "FROM",
                                                      "AS",
                                                      "BY",
                                                      "ORDER",
                                                      "ASC",
                                                      "DESC",
                                                      "LIMIT",
                                                      "TRUE_V",
                                                      "FALSE_V",
                                                      "IS",
                                                      "NULL_V",
                                                      "INTEGER",
                                                      "FLOAT",
                                                      "SQUOTA_STRING",
                                                      "DQUOTA_STRING",
                                                      "DOT",
                                                      "LP",
                                                      "RP",
                                                      "LMP",
                                                      "RMP",
                                                      "ASTERISK",
                                                      "PLUS_SIGN",
                                                      "MINUS_SIGN",
                                                      "COMMA",
                                                      "SOLIDUS",
                                                      "MOD",
                                                      "AT_SIGN",
                                                      "ASSIGN_OP",
                                                      "SHARP_SIGN",
                                                      "COLON",
                                                      "SEMI",
                                                      "LE_OP",
                                                      "GE_OP",
                                                      "NE_OP",
                                                      "CARET_OP",
                                                      "TILDE_OP",
                                                      "L_OP",
                                                      "G_OP",
                                                      "E_OP",
                                                      "CONCAT_OP",
                                                      "UNDERSCORE",
                                                      "SPACES",
                                                      "VECTOR",
                                                      "SINGLE_LINE_COMMENT",
                                                      "MULTI_LINE_COMMENT",
                                                      "REGULAR_ID"};

dfa::Vocabulary SQLParser::_vocabulary(_literalNames, _symbolicNames);

std::vector<std::string> SQLParser::_tokenNames;

SQLParser::Initializer::Initializer() {
  for (size_t i = 0; i < _symbolicNames.size(); ++i) {
    std::string name = _vocabulary.getLiteralName(i);
    if (name.empty()) {
      name = _vocabulary.getSymbolicName(i);
    }

    if (name.empty()) {
      _tokenNames.push_back("<INVALID>");
    } else {
      _tokenNames.push_back(name);
    }
  }

  _serializedATN = {
      0x3,   0x608b, 0xa72a, 0x8133, 0xb9ed, 0x417c, 0x3be7, 0x7786, 0x5964,
      0x3,   0x3a,   0x145,  0x4,    0x2,    0x9,    0x2,    0x4,    0x3,
      0x9,   0x3,    0x4,    0x4,    0x9,    0x4,    0x4,    0x5,    0x9,
      0x5,   0x4,    0x6,    0x9,    0x6,    0x4,    0x7,    0x9,    0x7,
      0x4,   0x8,    0x9,    0x8,    0x4,    0x9,    0x9,    0x9,    0x4,
      0xa,   0x9,    0xa,    0x4,    0xb,    0x9,    0xb,    0x4,    0xc,
      0x9,   0xc,    0x4,    0xd,    0x9,    0xd,    0x4,    0xe,    0x9,
      0xe,   0x4,    0xf,    0x9,    0xf,    0x4,    0x10,   0x9,    0x10,
      0x4,   0x11,   0x9,    0x11,   0x4,    0x12,   0x9,    0x12,   0x4,
      0x13,  0x9,    0x13,   0x4,    0x14,   0x9,    0x14,   0x4,    0x15,
      0x9,   0x15,   0x4,    0x16,   0x9,    0x16,   0x4,    0x17,   0x9,
      0x17,  0x4,    0x18,   0x9,    0x18,   0x4,    0x19,   0x9,    0x19,
      0x4,   0x1a,   0x9,    0x1a,   0x4,    0x1b,   0x9,    0x1b,   0x4,
      0x1c,  0x9,    0x1c,   0x4,    0x1d,   0x9,    0x1d,   0x4,    0x1e,
      0x9,   0x1e,   0x4,    0x1f,   0x9,    0x1f,   0x4,    0x20,   0x9,
      0x20,  0x4,    0x21,   0x9,    0x21,   0x4,    0x22,   0x9,    0x22,
      0x4,   0x23,   0x9,    0x23,   0x4,    0x24,   0x9,    0x24,   0x4,
      0x25,  0x9,    0x25,   0x4,    0x26,   0x9,    0x26,   0x4,    0x27,
      0x9,   0x27,   0x4,    0x28,   0x9,    0x28,   0x4,    0x29,   0x9,
      0x29,  0x3,    0x2,    0x6,    0x2,    0x54,   0xa,    0x2,    0xd,
      0x2,   0xe,    0x2,    0x55,   0x3,    0x3,    0x3,    0x3,    0x5,
      0x3,   0x5a,   0xa,    0x3,    0x6,    0x3,    0x5c,   0xa,    0x3,
      0xd,   0x3,    0xe,    0x3,    0x5d,   0x3,    0x3,    0x3,    0x3,
      0x3,   0x4,    0x3,    0x4,    0x3,    0x4,    0x3,    0x5,    0x3,
      0x5,   0x3,    0x6,    0x3,    0x6,    0x3,    0x6,    0x3,    0x7,
      0x3,   0x7,    0x3,    0x7,    0x5,    0x7,    0x6d,   0xa,    0x7,
      0x3,   0x7,    0x3,    0x7,    0x3,    0x7,    0x3,    0x7,    0x3,
      0x7,   0x3,    0x7,    0x7,    0x7,    0x75,   0xa,    0x7,    0xc,
      0x7,   0xe,    0x7,    0x78,   0xb,    0x7,    0x3,    0x8,    0x3,
      0x8,   0x3,    0x8,    0x3,    0x8,    0x3,    0x9,    0x3,    0x9,
      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,
      0x9,   0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,
      0x88,  0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,
      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,
      0x9,   0x91,   0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,
      0x9,   0x5,    0x9,    0x96,   0xa,    0x9,    0x3,    0x9,    0x3,
      0x9,   0x3,    0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,
      0x9d,  0xa,    0x9,    0x3,    0x9,    0x3,    0x9,    0x3,    0x9,
      0x3,   0x9,    0x3,    0x9,    0x3,    0x9,    0x5,    0x9,    0xa5,
      0xa,   0x9,    0x3,    0xa,    0x3,    0xa,    0x3,    0xa,    0x3,
      0xa,   0x3,    0xa,    0x3,    0xa,    0x5,    0xa,    0xad,   0xa,
      0xa,   0x3,    0xb,    0x3,    0xb,    0x5,    0xb,    0xb1,   0xa,
      0xb,   0x3,    0xc,    0x3,    0xc,    0x3,    0xc,    0x7,    0xc,
      0xb6,  0xa,    0xc,    0xc,    0xc,    0xe,    0xc,    0xb9,   0xb,
      0xc,   0x3,    0xd,    0x3,    0xd,    0x5,    0xd,    0xbd,   0xa,
      0xd,   0x3,    0xe,    0x3,    0xe,    0x3,    0xe,    0x3,    0xe,
      0x5,   0xe,    0xc3,   0xa,    0xe,    0x3,    0xf,    0x3,    0xf,
      0x5,   0xf,    0xc7,   0xa,    0xf,    0x3,    0x10,   0x3,    0x10,
      0x3,   0x10,   0x3,    0x10,   0x7,    0x10,   0xcd,   0xa,    0x10,
      0xc,   0x10,   0xe,    0x10,   0xd0,   0xb,    0x10,   0x3,    0x10,
      0x3,   0x10,   0x3,    0x11,   0x3,    0x11,   0x5,    0x11,   0xd6,
      0xa,   0x11,   0x3,    0x12,   0x3,    0x12,   0x5,    0x12,   0xda,
      0xa,   0x12,   0x3,    0x13,   0x3,    0x13,   0x3,    0x13,   0x3,
      0x13,  0x3,    0x13,   0x7,    0x13,   0xe1,   0xa,    0x13,   0xc,
      0x13,  0xe,    0x13,   0xe4,   0xb,    0x13,   0x5,    0x13,   0xe6,
      0xa,   0x13,   0x3,    0x13,   0x3,    0x13,   0x3,    0x14,   0x3,
      0x14,  0x3,    0x15,   0x3,    0x15,   0x3,    0x15,   0x3,    0x15,
      0x5,   0x15,   0xf0,   0xa,    0x15,   0x3,    0x15,   0x5,    0x15,
      0xf3,  0xa,    0x15,   0x3,    0x15,   0x5,    0x15,   0xf6,   0xa,
      0x15,  0x3,    0x16,   0x3,    0x16,   0x3,    0x16,   0x7,    0x16,
      0xfb,  0xa,    0x16,   0xc,    0x16,   0xe,    0x16,   0xfe,   0xb,
      0x16,  0x3,    0x17,   0x3,    0x17,   0x3,    0x17,   0x5,    0x17,
      0x103, 0xa,    0x17,   0x3,    0x17,   0x5,    0x17,   0x106,  0xa,
      0x17,  0x5,    0x17,   0x108,  0xa,    0x17,   0x3,    0x18,   0x3,
      0x18,  0x3,    0x18,   0x3,    0x19,   0x3,    0x19,   0x3,    0x19,
      0x3,   0x19,   0x3,    0x19,   0x7,    0x19,   0x112,  0xa,    0x19,
      0xc,   0x19,   0xe,    0x19,   0x115,  0xb,    0x19,   0x3,    0x1a,
      0x3,   0x1a,   0x5,    0x1a,   0x119,  0xa,    0x1a,   0x3,    0x1b,
      0x3,   0x1b,   0x3,    0x1b,   0x3,    0x1c,   0x3,    0x1c,   0x3,
      0x1d,  0x3,    0x1d,   0x3,    0x1e,   0x3,    0x1e,   0x3,    0x1f,
      0x5,   0x1f,   0x125,  0xa,    0x1f,   0x3,    0x1f,   0x3,    0x1f,
      0x3,   0x20,   0x3,    0x20,   0x5,    0x20,   0x12b,  0xa,    0x20,
      0x3,   0x21,   0x3,    0x21,   0x3,    0x22,   0x3,    0x22,   0x3,
      0x23,  0x3,    0x23,   0x3,    0x24,   0x3,    0x24,   0x3,    0x25,
      0x3,   0x25,   0x3,    0x26,   0x3,    0x26,   0x3,    0x27,   0x3,
      0x27,  0x3,    0x27,   0x5,    0x27,   0x13c,  0xa,    0x27,   0x3,
      0x28,  0x3,    0x28,   0x3,    0x28,   0x5,    0x28,   0x141,  0xa,
      0x28,  0x3,    0x29,   0x3,    0x29,   0x3,    0x29,   0x2,    0x3,
      0xc,   0x2a,   0x2,    0x4,    0x6,    0x8,    0xa,    0xc,    0xe,
      0x10,  0x12,   0x14,   0x16,   0x18,   0x1a,   0x1c,   0x1e,   0x20,
      0x22,  0x24,   0x26,   0x28,   0x2a,   0x2c,   0x2e,   0x30,   0x32,
      0x34,  0x36,   0x38,   0x3a,   0x3c,   0x3e,   0x40,   0x42,   0x44,
      0x46,  0x48,   0x4a,   0x4c,   0x4e,   0x50,   0x2,    0x9,    0x3,
      0x2,   0x2b,   0x2b,   0x4,    0x2,    0x25,   0x25,   0x2b,   0x2b,
      0x3,   0x2,    0x7,    0x8,    0x3,    0x2,    0x11,   0x12,   0x3,
      0x2,   0x1a,   0x1b,   0x3,    0x2,    0x14,   0x15,   0x6,    0x2,
      0x3,   0x6,    0x9,    0xc,    0xe,    0x13,   0x3a,   0x3a,   0x2,
      0x149, 0x2,    0x53,   0x3,    0x2,    0x2,    0x2,    0x4,    0x5b,
      0x3,   0x2,    0x2,    0x2,    0x6,    0x61,   0x3,    0x2,    0x2,
      0x2,   0x8,    0x64,   0x3,    0x2,    0x2,    0x2,    0xa,    0x66,
      0x3,   0x2,    0x2,    0x2,    0xc,    0x6c,   0x3,    0x2,    0x2,
      0x2,   0xe,    0x79,   0x3,    0x2,    0x2,    0x2,    0x10,   0xa4,
      0x3,   0x2,    0x2,    0x2,    0x12,   0xac,   0x3,    0x2,    0x2,
      0x2,   0x14,   0xb0,   0x3,    0x2,    0x2,    0x2,    0x16,   0xb2,
      0x3,   0x2,    0x2,    0x2,    0x18,   0xbc,   0x3,    0x2,    0x2,
      0x2,   0x1a,   0xc2,   0x3,    0x2,    0x2,    0x2,    0x1c,   0xc6,
      0x3,   0x2,    0x2,    0x2,    0x1e,   0xc8,   0x3,    0x2,    0x2,
      0x2,   0x20,   0xd5,   0x3,    0x2,    0x2,    0x2,    0x22,   0xd9,
      0x3,   0x2,    0x2,    0x2,    0x24,   0xdb,   0x3,    0x2,    0x2,
      0x2,   0x26,   0xe9,   0x3,    0x2,    0x2,    0x2,    0x28,   0xeb,
      0x3,   0x2,    0x2,    0x2,    0x2a,   0xf7,   0x3,    0x2,    0x2,
      0x2,   0x2c,   0x107,  0x3,    0x2,    0x2,    0x2,    0x2e,   0x109,
      0x3,   0x2,    0x2,    0x2,    0x30,   0x10c,  0x3,    0x2,    0x2,
      0x2,   0x32,   0x116,  0x3,    0x2,    0x2,    0x2,    0x34,   0x11a,
      0x3,   0x2,    0x2,    0x2,    0x36,   0x11d,  0x3,    0x2,    0x2,
      0x2,   0x38,   0x11f,  0x3,    0x2,    0x2,    0x2,    0x3a,   0x121,
      0x3,   0x2,    0x2,    0x2,    0x3c,   0x124,  0x3,    0x2,    0x2,
      0x2,   0x3e,   0x12a,  0x3,    0x2,    0x2,    0x2,    0x40,   0x12c,
      0x3,   0x2,    0x2,    0x2,    0x42,   0x12e,  0x3,    0x2,    0x2,
      0x2,   0x44,   0x130,  0x3,    0x2,    0x2,    0x2,    0x46,   0x132,
      0x3,   0x2,    0x2,    0x2,    0x48,   0x134,  0x3,    0x2,    0x2,
      0x2,   0x4a,   0x136,  0x3,    0x2,    0x2,    0x2,    0x4c,   0x13b,
      0x3,   0x2,    0x2,    0x2,    0x4e,   0x140,  0x3,    0x2,    0x2,
      0x2,   0x50,   0x142,  0x3,    0x2,    0x2,    0x2,    0x52,   0x54,
      0xa,   0x2,    0x2,    0x2,    0x53,   0x52,   0x3,    0x2,    0x2,
      0x2,   0x54,   0x55,   0x3,    0x2,    0x2,    0x2,    0x55,   0x53,
      0x3,   0x2,    0x2,    0x2,    0x55,   0x56,   0x3,    0x2,    0x2,
      0x2,   0x56,   0x3,    0x3,    0x2,    0x2,    0x2,    0x57,   0x59,
      0x5,   0x8,    0x5,    0x2,    0x58,   0x5a,   0x9,    0x3,    0x2,
      0x2,   0x59,   0x58,   0x3,    0x2,    0x2,    0x2,    0x59,   0x5a,
      0x3,   0x2,    0x2,    0x2,    0x5a,   0x5c,   0x3,    0x2,    0x2,
      0x2,   0x5b,   0x57,   0x3,    0x2,    0x2,    0x2,    0x5c,   0x5d,
      0x3,   0x2,    0x2,    0x2,    0x5d,   0x5b,   0x3,    0x2,    0x2,
      0x2,   0x5d,   0x5e,   0x3,    0x2,    0x2,    0x2,    0x5e,   0x5f,
      0x3,   0x2,    0x2,    0x2,    0x5f,   0x60,   0x7,    0x2,    0x2,
      0x3,   0x60,   0x5,    0x3,    0x2,    0x2,    0x2,    0x61,   0x62,
      0x5,   0xc,    0x7,    0x2,    0x62,   0x63,   0x7,    0x2,    0x2,
      0x3,   0x63,   0x7,    0x3,    0x2,    0x2,    0x2,    0x64,   0x65,
      0x5,   0x26,   0x14,   0x2,    0x65,   0x9,    0x3,    0x2,    0x2,
      0x2,   0x66,   0x67,   0x7,    0xb,    0x2,    0x2,    0x67,   0x68,
      0x5,   0xc,    0x7,    0x2,    0x68,   0xb,    0x3,    0x2,    0x2,
      0x2,   0x69,   0x6a,   0x8,    0x7,    0x1,    0x2,    0x6a,   0x6d,
      0x5,   0x10,   0x9,    0x2,    0x6b,   0x6d,   0x5,    0xe,    0x8,
      0x2,   0x6c,   0x69,   0x3,    0x2,    0x2,    0x2,    0x6c,   0x6b,
      0x3,   0x2,    0x2,    0x2,    0x6d,   0x76,   0x3,    0x2,    0x2,
      0x2,   0x6e,   0x6f,   0xc,    0x5,    0x2,    0x2,    0x6f,   0x70,
      0x7,   0x4,    0x2,    0x2,    0x70,   0x75,   0x5,    0xc,    0x7,
      0x6,   0x71,   0x72,   0xc,    0x4,    0x2,    0x2,    0x72,   0x73,
      0x7,   0x3,    0x2,    0x2,    0x73,   0x75,   0x5,    0xc,    0x7,
      0x5,   0x74,   0x6e,   0x3,    0x2,    0x2,    0x2,    0x74,   0x71,
      0x3,   0x2,    0x2,    0x2,    0x75,   0x78,   0x3,    0x2,    0x2,
      0x2,   0x76,   0x74,   0x3,    0x2,    0x2,    0x2,    0x76,   0x77,
      0x3,   0x2,    0x2,    0x2,    0x77,   0xd,    0x3,    0x2,    0x2,
      0x2,   0x78,   0x76,   0x3,    0x2,    0x2,    0x2,    0x79,   0x7a,
      0x7,   0x1d,   0x2,    0x2,    0x7a,   0x7b,   0x5,    0xc,    0x7,
      0x2,   0x7b,   0x7c,   0x7,    0x1e,   0x2,    0x2,    0x7c,   0xf,
      0x3,   0x2,    0x2,    0x2,    0x7d,   0x7e,   0x5,    0x48,   0x25,
      0x2,   0x7e,   0x7f,   0x5,    0x12,   0xa,    0x2,    0x7f,   0x80,
      0x5,   0x14,   0xb,    0x2,    0x80,   0xa5,   0x3,    0x2,    0x2,
      0x2,   0x81,   0x82,   0x5,    0x48,   0x25,   0x2,    0x82,   0x83,
      0x7,   0xa,    0x2,    0x2,    0x83,   0x84,   0x5,    0x14,   0xb,
      0x2,   0x84,   0xa5,   0x3,    0x2,    0x2,    0x2,    0x85,   0x87,
      0x5,   0x48,   0x25,   0x2,    0x86,   0x88,   0x7,    0x5,    0x2,
      0x2,   0x87,   0x86,   0x3,    0x2,    0x2,    0x2,    0x87,   0x88,
      0x3,   0x2,    0x2,    0x2,    0x88,   0x89,   0x3,    0x2,    0x2,
      0x2,   0x89,   0x8a,   0x7,    0x6,    0x2,    0x2,    0x8a,   0x8b,
      0x7,   0x1d,   0x2,    0x2,    0x8b,   0x8c,   0x5,    0x16,   0xc,
      0x2,   0x8c,   0x8d,   0x7,    0x1e,   0x2,    0x2,    0x8d,   0xa5,
      0x3,   0x2,    0x2,    0x2,    0x8e,   0x90,   0x5,    0x48,   0x25,
      0x2,   0x8f,   0x91,   0x7,    0x5,    0x2,    0x2,    0x90,   0x8f,
      0x3,   0x2,    0x2,    0x2,    0x90,   0x91,   0x3,    0x2,    0x2,
      0x2,   0x91,   0x92,   0x3,    0x2,    0x2,    0x2,    0x92,   0x93,
      0x9,   0x4,    0x2,    0x2,    0x93,   0x95,   0x7,    0x1d,   0x2,
      0x2,   0x94,   0x96,   0x5,    0x16,   0xc,    0x2,    0x95,   0x94,
      0x3,   0x2,    0x2,    0x2,    0x95,   0x96,   0x3,    0x2,    0x2,
      0x2,   0x96,   0x97,   0x3,    0x2,    0x2,    0x2,    0x97,   0x98,
      0x7,   0x1e,   0x2,    0x2,    0x98,   0xa5,   0x3,    0x2,    0x2,
      0x2,   0x99,   0x9a,   0x5,    0x48,   0x25,   0x2,    0x9a,   0x9c,
      0x7,   0x16,   0x2,    0x2,    0x9b,   0x9d,   0x7,    0x5,    0x2,
      0x2,   0x9c,   0x9b,   0x3,    0x2,    0x2,    0x2,    0x9c,   0x9d,
      0x3,   0x2,    0x2,    0x2,    0x9d,   0x9e,   0x3,    0x2,    0x2,
      0x2,   0x9e,   0x9f,   0x7,    0x17,   0x2,    0x2,    0x9f,   0xa5,
      0x3,   0x2,    0x2,    0x2,    0xa0,   0xa1,   0x5,    0x24,   0x13,
      0x2,   0xa1,   0xa2,   0x5,    0x12,   0xa,    0x2,    0xa2,   0xa3,
      0x5,   0x14,   0xb,    0x2,    0xa3,   0xa5,   0x3,    0x2,    0x2,
      0x2,   0xa4,   0x7d,   0x3,    0x2,    0x2,    0x2,    0xa4,   0x81,
      0x3,   0x2,    0x2,    0x2,    0xa4,   0x85,   0x3,    0x2,    0x2,
      0x2,   0xa4,   0x8e,   0x3,    0x2,    0x2,    0x2,    0xa4,   0x99,
      0x3,   0x2,    0x2,    0x2,    0xa4,   0xa0,   0x3,    0x2,    0x2,
      0x2,   0xa5,   0x11,   0x3,    0x2,    0x2,    0x2,    0xa6,   0xad,
      0x7,   0x33,   0x2,    0x2,    0xa7,   0xad,   0x5,    0x4a,   0x26,
      0x2,   0xa8,   0xad,   0x7,    0x31,   0x2,    0x2,    0xa9,   0xad,
      0x7,   0x32,   0x2,    0x2,    0xaa,   0xad,   0x5,    0x4e,   0x28,
      0x2,   0xab,   0xad,   0x5,    0x4c,   0x27,   0x2,    0xac,   0xa6,
      0x3,   0x2,    0x2,    0x2,    0xac,   0xa7,   0x3,    0x2,    0x2,
      0x2,   0xac,   0xa8,   0x3,    0x2,    0x2,    0x2,    0xac,   0xa9,
      0x3,   0x2,    0x2,    0x2,    0xac,   0xaa,   0x3,    0x2,    0x2,
      0x2,   0xac,   0xab,   0x3,    0x2,    0x2,    0x2,    0xad,   0x13,
      0x3,   0x2,    0x2,    0x2,    0xae,   0xb1,   0x5,    0x1a,   0xe,
      0x2,   0xaf,   0xb1,   0x5,    0x24,   0x13,   0x2,    0xb0,   0xae,
      0x3,   0x2,    0x2,    0x2,    0xb0,   0xaf,   0x3,    0x2,    0x2,
      0x2,   0xb1,   0x15,   0x3,    0x2,    0x2,    0x2,    0xb2,   0xb7,
      0x5,   0x18,   0xd,    0x2,    0xb3,   0xb4,   0x7,    0x24,   0x2,
      0x2,   0xb4,   0xb6,   0x5,    0x18,   0xd,    0x2,    0xb5,   0xb3,
      0x3,   0x2,    0x2,    0x2,    0xb6,   0xb9,   0x3,    0x2,    0x2,
      0x2,   0xb7,   0xb5,   0x3,    0x2,    0x2,    0x2,    0xb7,   0xb8,
      0x3,   0x2,    0x2,    0x2,    0xb8,   0x17,   0x3,    0x2,    0x2,
      0x2,   0xb9,   0xb7,   0x3,    0x2,    0x2,    0x2,    0xba,   0xbd,
      0x5,   0x1c,   0xf,    0x2,    0xbb,   0xbd,   0x5,    0x46,   0x24,
      0x2,   0xbc,   0xba,   0x3,    0x2,    0x2,    0x2,    0xbc,   0xbb,
      0x3,   0x2,    0x2,    0x2,    0xbd,   0x19,   0x3,    0x2,    0x2,
      0x2,   0xbe,   0xc3,   0x5,    0x3e,   0x20,   0x2,    0xbf,   0xc3,
      0x5,   0x44,   0x23,   0x2,    0xc0,   0xc3,   0x5,    0x20,   0x11,
      0x2,   0xc1,   0xc3,   0x5,    0x46,   0x24,   0x2,    0xc2,   0xbe,
      0x3,   0x2,    0x2,    0x2,    0xc2,   0xbf,   0x3,    0x2,    0x2,
      0x2,   0xc2,   0xc0,   0x3,    0x2,    0x2,    0x2,    0xc2,   0xc1,
      0x3,   0x2,    0x2,    0x2,    0xc3,   0x1b,   0x3,    0x2,    0x2,
      0x2,   0xc4,   0xc7,   0x5,    0x3e,   0x20,   0x2,    0xc5,   0xc7,
      0x5,   0x44,   0x23,   0x2,    0xc6,   0xc4,   0x3,    0x2,    0x2,
      0x2,   0xc6,   0xc5,   0x3,    0x2,    0x2,    0x2,    0xc7,   0x1d,
      0x3,   0x2,    0x2,    0x2,    0xc8,   0xc9,   0x7,    0x1f,   0x2,
      0x2,   0xc9,   0xce,   0x7,    0x37,   0x2,    0x2,    0xca,   0xcb,
      0x7,   0x24,   0x2,    0x2,    0xcb,   0xcd,   0x7,    0x37,   0x2,
      0x2,   0xcc,   0xca,   0x3,    0x2,    0x2,    0x2,    0xcd,   0xd0,
      0x3,   0x2,    0x2,    0x2,    0xce,   0xcc,   0x3,    0x2,    0x2,
      0x2,   0xce,   0xcf,   0x3,    0x2,    0x2,    0x2,    0xcf,   0xd1,
      0x3,   0x2,    0x2,    0x2,    0xd0,   0xce,   0x3,    0x2,    0x2,
      0x2,   0xd1,   0xd2,   0x7,    0x20,   0x2,    0x2,    0xd2,   0x1f,
      0x3,   0x2,    0x2,    0x2,    0xd3,   0xd6,   0x7,    0x37,   0x2,
      0x2,   0xd4,   0xd6,   0x5,    0x1e,   0x10,   0x2,    0xd5,   0xd3,
      0x3,   0x2,    0x2,    0x2,    0xd5,   0xd4,   0x3,    0x2,    0x2,
      0x2,   0xd6,   0x21,   0x3,    0x2,    0x2,    0x2,    0xd7,   0xda,
      0x5,   0x14,   0xb,    0x2,    0xd8,   0xda,   0x5,    0x48,   0x25,
      0x2,   0xd9,   0xd7,   0x3,    0x2,    0x2,    0x2,    0xd9,   0xd8,
      0x3,   0x2,    0x2,    0x2,    0xda,   0x23,   0x3,    0x2,    0x2,
      0x2,   0xdb,   0xdc,   0x5,    0x48,   0x25,   0x2,    0xdc,   0xe5,
      0x7,   0x1d,   0x2,    0x2,    0xdd,   0xe2,   0x5,    0x22,   0x12,
      0x2,   0xde,   0xdf,   0x7,    0x24,   0x2,    0x2,    0xdf,   0xe1,
      0x5,   0x22,   0x12,   0x2,    0xe0,   0xde,   0x3,    0x2,    0x2,
      0x2,   0xe1,   0xe4,   0x3,    0x2,    0x2,    0x2,    0xe2,   0xe0,
      0x3,   0x2,    0x2,    0x2,    0xe2,   0xe3,   0x3,    0x2,    0x2,
      0x2,   0xe3,   0xe6,   0x3,    0x2,    0x2,    0x2,    0xe4,   0xe2,
      0x3,   0x2,    0x2,    0x2,    0xe5,   0xdd,   0x3,    0x2,    0x2,
      0x2,   0xe5,   0xe6,   0x3,    0x2,    0x2,    0x2,    0xe6,   0xe7,
      0x3,   0x2,    0x2,    0x2,    0xe7,   0xe8,   0x7,    0x1e,   0x2,
      0x2,   0xe8,   0x25,   0x3,    0x2,    0x2,    0x2,    0xe9,   0xea,
      0x5,   0x28,   0x15,   0x2,    0xea,   0x27,   0x3,    0x2,    0x2,
      0x2,   0xeb,   0xec,   0x7,    0xc,    0x2,    0x2,    0xec,   0xed,
      0x5,   0x2a,   0x16,   0x2,    0xed,   0xef,   0x5,    0x2e,   0x18,
      0x2,   0xee,   0xf0,   0x5,    0xa,    0x6,    0x2,    0xef,   0xee,
      0x3,   0x2,    0x2,    0x2,    0xef,   0xf0,   0x3,    0x2,    0x2,
      0x2,   0xf0,   0xf2,   0x3,    0x2,    0x2,    0x2,    0xf1,   0xf3,
      0x5,   0x30,   0x19,   0x2,    0xf2,   0xf1,   0x3,    0x2,    0x2,
      0x2,   0xf2,   0xf3,   0x3,    0x2,    0x2,    0x2,    0xf3,   0xf5,
      0x3,   0x2,    0x2,    0x2,    0xf4,   0xf6,   0x5,    0x34,   0x1b,
      0x2,   0xf5,   0xf4,   0x3,    0x2,    0x2,    0x2,    0xf5,   0xf6,
      0x3,   0x2,    0x2,    0x2,    0xf6,   0x29,   0x3,    0x2,    0x2,
      0x2,   0xf7,   0xfc,   0x5,    0x2c,   0x17,   0x2,    0xf8,   0xf9,
      0x7,   0x24,   0x2,    0x2,    0xf9,   0xfb,   0x5,    0x2c,   0x17,
      0x2,   0xfa,   0xf8,   0x3,    0x2,    0x2,    0x2,    0xfb,   0xfe,
      0x3,   0x2,    0x2,    0x2,    0xfc,   0xfa,   0x3,    0x2,    0x2,
      0x2,   0xfc,   0xfd,   0x3,    0x2,    0x2,    0x2,    0xfd,   0x2b,
      0x3,   0x2,    0x2,    0x2,    0xfe,   0xfc,   0x3,    0x2,    0x2,
      0x2,   0xff,   0x108,  0x7,    0x21,   0x2,    0x2,    0x100,  0x102,
      0x5,   0x38,   0x1d,   0x2,    0x101,  0x103,  0x7,    0xe,    0x2,
      0x2,   0x102,  0x101,  0x3,    0x2,    0x2,    0x2,    0x102,  0x103,
      0x3,   0x2,    0x2,    0x2,    0x103,  0x105,  0x3,    0x2,    0x2,
      0x2,   0x104,  0x106,  0x5,    0x3c,   0x1f,   0x2,    0x105,  0x104,
      0x3,   0x2,    0x2,    0x2,    0x105,  0x106,  0x3,    0x2,    0x2,
      0x2,   0x106,  0x108,  0x3,    0x2,    0x2,    0x2,    0x107,  0xff,
      0x3,   0x2,    0x2,    0x2,    0x107,  0x100,  0x3,    0x2,    0x2,
      0x2,   0x108,  0x2d,   0x3,    0x2,    0x2,    0x2,    0x109,  0x10a,
      0x7,   0xd,    0x2,    0x2,    0x10a,  0x10b,  0x5,    0x36,   0x1c,
      0x2,   0x10b,  0x2f,   0x3,    0x2,    0x2,    0x2,    0x10c,  0x10d,
      0x7,   0x10,   0x2,    0x2,    0x10d,  0x10e,  0x7,    0xf,    0x2,
      0x2,   0x10e,  0x113,  0x5,    0x32,   0x1a,   0x2,    0x10f,  0x110,
      0x7,   0x24,   0x2,    0x2,    0x110,  0x112,  0x5,    0x32,   0x1a,
      0x2,   0x111,  0x10f,  0x3,    0x2,    0x2,    0x2,    0x112,  0x115,
      0x3,   0x2,    0x2,    0x2,    0x113,  0x111,  0x3,    0x2,    0x2,
      0x2,   0x113,  0x114,  0x3,    0x2,    0x2,    0x2,    0x114,  0x31,
      0x3,   0x2,    0x2,    0x2,    0x115,  0x113,  0x3,    0x2,    0x2,
      0x2,   0x116,  0x118,  0x5,    0x38,   0x1d,   0x2,    0x117,  0x119,
      0x9,   0x5,    0x2,    0x2,    0x118,  0x117,  0x3,    0x2,    0x2,
      0x2,   0x118,  0x119,  0x3,    0x2,    0x2,    0x2,    0x119,  0x33,
      0x3,   0x2,    0x2,    0x2,    0x11a,  0x11b,  0x7,    0x13,   0x2,
      0x2,   0x11b,  0x11c,  0x5,    0x40,   0x21,   0x2,    0x11c,  0x35,
      0x3,   0x2,    0x2,    0x2,    0x11d,  0x11e,  0x5,    0x48,   0x25,
      0x2,   0x11e,  0x37,   0x3,    0x2,    0x2,    0x2,    0x11f,  0x120,
      0x5,   0x48,   0x25,   0x2,    0x120,  0x39,   0x3,    0x2,    0x2,
      0x2,   0x121,  0x122,  0x5,    0x48,   0x25,   0x2,    0x122,  0x3b,
      0x3,   0x2,    0x2,    0x2,    0x123,  0x125,  0x7,    0xe,    0x2,
      0x2,   0x124,  0x123,  0x3,    0x2,    0x2,    0x2,    0x124,  0x125,
      0x3,   0x2,    0x2,    0x2,    0x125,  0x126,  0x3,    0x2,    0x2,
      0x2,   0x126,  0x127,  0x5,    0x48,   0x25,   0x2,    0x127,  0x3d,
      0x3,   0x2,    0x2,    0x2,    0x128,  0x12b,  0x5,    0x40,   0x21,
      0x2,   0x129,  0x12b,  0x5,    0x42,   0x22,   0x2,    0x12a,  0x128,
      0x3,   0x2,    0x2,    0x2,    0x12a,  0x129,  0x3,    0x2,    0x2,
      0x2,   0x12b,  0x3f,   0x3,    0x2,    0x2,    0x2,    0x12c,  0x12d,
      0x7,   0x18,   0x2,    0x2,    0x12d,  0x41,   0x3,    0x2,    0x2,
      0x2,   0x12e,  0x12f,  0x7,    0x19,   0x2,    0x2,    0x12f,  0x43,
      0x3,   0x2,    0x2,    0x2,    0x130,  0x131,  0x9,    0x6,    0x2,
      0x2,   0x131,  0x45,   0x3,    0x2,    0x2,    0x2,    0x132,  0x133,
      0x9,   0x7,    0x2,    0x2,    0x133,  0x47,   0x3,    0x2,    0x2,
      0x2,   0x134,  0x135,  0x5,    0x50,   0x29,   0x2,    0x135,  0x49,
      0x3,   0x2,    0x2,    0x2,    0x136,  0x137,  0x7,    0x2e,   0x2,
      0x2,   0x137,  0x4b,   0x3,    0x2,    0x2,    0x2,    0x138,  0x13c,
      0x7,   0x2d,   0x2,    0x2,    0x139,  0x13a,  0x7,    0x32,   0x2,
      0x2,   0x13a,  0x13c,  0x7,    0x33,   0x2,    0x2,    0x13b,  0x138,
      0x3,   0x2,    0x2,    0x2,    0x13b,  0x139,  0x3,    0x2,    0x2,
      0x2,   0x13c,  0x4d,   0x3,    0x2,    0x2,    0x2,    0x13d,  0x141,
      0x7,   0x2c,   0x2,    0x2,    0x13e,  0x13f,  0x7,    0x31,   0x2,
      0x2,   0x13f,  0x141,  0x7,    0x33,   0x2,    0x2,    0x140,  0x13d,
      0x3,   0x2,    0x2,    0x2,    0x140,  0x13e,  0x3,    0x2,    0x2,
      0x2,   0x141,  0x4f,   0x3,    0x2,    0x2,    0x2,    0x142,  0x143,
      0x9,   0x8,    0x2,    0x2,    0x143,  0x51,   0x3,    0x2,    0x2,
      0x2,   0x25,   0x55,   0x59,   0x5d,   0x6c,   0x74,   0x76,   0x87,
      0x90,  0x95,   0x9c,   0xa4,   0xac,   0xb0,   0xb7,   0xbc,   0xc2,
      0xc6,  0xce,   0xd5,   0xd9,   0xe2,   0xe5,   0xef,   0xf2,   0xf5,
      0xfc,  0x102,  0x105,  0x107,  0x113,  0x118,  0x124,  0x12a,  0x13b,
      0x140,
  };

  atn::ATNDeserializer deserializer;
  _atn = deserializer.deserialize(_serializedATN);

  size_t count = _atn.getNumberOfDecisions();
  _decisionToDFA.reserve(count);
  for (size_t i = 0; i < count; i++) {
    _decisionToDFA.emplace_back(_atn.getDecisionState(i), i);
  }
}

SQLParser::Initializer SQLParser::_init;


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParser.h
================================================

// Generated from SQLParser.g4 by ANTLR 4.8

#pragma once


#include "antlr4-runtime.h"


namespace antlr4 {


class SQLParser : public antlr4::Parser {
 public:
  enum {
    OR = 1,
    AND = 2,
    NOT = 3,
    IN = 4,
    CONTAIN_ALL = 5,
    CONTAIN_ANY = 6,
    BETWEEN = 7,
    LIKE = 8,
    WHERE = 9,
    SELECT = 10,
    FROM = 11,
    AS = 12,
    BY = 13,
    ORDER = 14,
    ASC = 15,
    DESC = 16,
    LIMIT = 17,
    TRUE_V = 18,
    FALSE_V = 19,
    IS = 20,
    NULL_V = 21,
    INTEGER = 22,
    FLOAT = 23,
    SQUOTA_STRING = 24,
    DQUOTA_STRING = 25,
    DOT = 26,
    LP = 27,
    RP = 28,
    LMP = 29,
    RMP = 30,
    ASTERISK = 31,
    PLUS_SIGN = 32,
    MINUS_SIGN = 33,
    COMMA = 34,
    SOLIDUS = 35,
    MOD = 36,
    AT_SIGN = 37,
    ASSIGN_OP = 38,
    SHARP_SIGN = 39,
    COLON = 40,
    SEMI = 41,
    LE_OP = 42,
    GE_OP = 43,
    NE_OP = 44,
    CARET_OP = 45,
    TILDE_OP = 46,
    L_OP = 47,
    G_OP = 48,
    E_OP = 49,
    CONCAT_OP = 50,
    UNDERSCORE = 51,
    SPACES = 52,
    VECTOR = 53,
    SINGLE_LINE_COMMENT = 54,
    MULTI_LINE_COMMENT = 55,
    REGULAR_ID = 56
  };

  enum {
    RuleSwallow_to_semi = 0,
    RuleCompilation_unit = 1,
    RuleLogic_expr_unit = 2,
    RuleUnit_statement = 3,
    RuleWhere_clause = 4,
    RuleLogic_expr = 5,
    RuleEnclosed_expr = 6,
    RuleRelation_expr = 7,
    RuleRel_oper = 8,
    RuleValue_expr = 9,
    RuleIn_value_expr_list = 10,
    RuleIn_value_expr = 11,
    RuleConstant = 12,
    RuleConstant_num_and_str = 13,
    RuleMatrix = 14,
    RuleVector_expr = 15,
    RuleFunction_value_expr = 16,
    RuleFunction_call = 17,
    RuleDql_statement = 18,
    RuleSelect_statement = 19,
    RuleSelected_elements = 20,
    RuleSelected_element = 21,
    RuleFrom_clause = 22,
    RuleOrder_by_clause = 23,
    RuleOrder_by_element = 24,
    RuleLimit_clause = 25,
    RuleTableview_name = 26,
    RuleField_name = 27,
    RuleTable_alias = 28,
    RuleField_alias = 29,
    RuleNumeric = 30,
    RuleInt_value = 31,
    RuleFloat_value = 32,
    RuleQuoted_string = 33,
    RuleBool_value = 34,
    RuleIdentifier = 35,
    RuleNe_op = 36,
    RuleGe_op = 37,
    RuleLe_op = 38,
    RuleRegular_id = 39
  };

  SQLParser(antlr4::TokenStream *input);
  ~SQLParser();

  virtual std::string getGrammarFileName() const override;
  virtual const antlr4::atn::ATN &getATN() const override {
    return _atn;
  };
  virtual const std::vector<std::string> &getTokenNames() const override {
    return _tokenNames;
  };  // deprecated: use vocabulary instead.
  virtual const std::vector<std::string> &getRuleNames() const override;
  virtual antlr4::dfa::Vocabulary &getVocabulary() const override;


  class Swallow_to_semiContext;
  class Compilation_unitContext;
  class Logic_expr_unitContext;
  class Unit_statementContext;
  class Where_clauseContext;
  class Logic_exprContext;
  class Enclosed_exprContext;
  class Relation_exprContext;
  class Rel_operContext;
  class Value_exprContext;
  class In_value_expr_listContext;
  class In_value_exprContext;
  class ConstantContext;
  class Constant_num_and_strContext;
  class MatrixContext;
  class Vector_exprContext;
  class Function_value_exprContext;
  class Function_callContext;
  class Dql_statementContext;
  class Select_statementContext;
  class Selected_elementsContext;
  class Selected_elementContext;
  class From_clauseContext;
  class Order_by_clauseContext;
  class Order_by_elementContext;
  class Limit_clauseContext;
  class Tableview_nameContext;
  class Field_nameContext;
  class Table_aliasContext;
  class Field_aliasContext;
  class NumericContext;
  class Int_valueContext;
  class Float_valueContext;
  class Quoted_stringContext;
  class Bool_valueContext;
  class IdentifierContext;
  class Ne_opContext;
  class Ge_opContext;
  class Le_opContext;
  class Regular_idContext;

  class Swallow_to_semiContext : public antlr4::ParserRuleContext {
   public:
    Swallow_to_semiContext(antlr4::ParserRuleContext *parent_ctx,
                           size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    std::vector<antlr4::tree::TerminalNode *> SEMI();
    antlr4::tree::TerminalNode *SEMI(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Swallow_to_semiContext *swallow_to_semi();

  class Compilation_unitContext : public antlr4::ParserRuleContext {
   public:
    Compilation_unitContext(antlr4::ParserRuleContext *parent_ctx,
                            size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *EOF();
    std::vector<Unit_statementContext *> unit_statement();
    Unit_statementContext *unit_statement(size_t i);
    std::vector<antlr4::tree::TerminalNode *> SOLIDUS();
    antlr4::tree::TerminalNode *SOLIDUS(size_t i);
    std::vector<antlr4::tree::TerminalNode *> SEMI();
    antlr4::tree::TerminalNode *SEMI(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Compilation_unitContext *compilation_unit();

  class Logic_expr_unitContext : public antlr4::ParserRuleContext {
   public:
    Logic_expr_unitContext(antlr4::ParserRuleContext *parent_ctx,
                           size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Logic_exprContext *logic_expr();
    antlr4::tree::TerminalNode *EOF();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Logic_expr_unitContext *logic_expr_unit();

  class Unit_statementContext : public antlr4::ParserRuleContext {
   public:
    Unit_statementContext(antlr4::ParserRuleContext *parent_ctx,
                          size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Dql_statementContext *dql_statement();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Unit_statementContext *unit_statement();

  class Where_clauseContext : public antlr4::ParserRuleContext {
   public:
    Where_clauseContext(antlr4::ParserRuleContext *parent_ctx,
                        size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *WHERE();
    Logic_exprContext *logic_expr();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Where_clauseContext *where_clause();

  class Logic_exprContext : public antlr4::ParserRuleContext {
   public:
    Logic_exprContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Relation_exprContext *relation_expr();
    Enclosed_exprContext *enclosed_expr();
    std::vector<Logic_exprContext *> logic_expr();
    Logic_exprContext *logic_expr(size_t i);
    antlr4::tree::TerminalNode *AND();
    antlr4::tree::TerminalNode *OR();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Logic_exprContext *logic_expr();
  Logic_exprContext *logic_expr(int precedence);
  class Enclosed_exprContext : public antlr4::ParserRuleContext {
   public:
    Enclosed_exprContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *LP();
    Logic_exprContext *logic_expr();
    antlr4::tree::TerminalNode *RP();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Enclosed_exprContext *enclosed_expr();

  class Relation_exprContext : public antlr4::ParserRuleContext {
   public:
    Relation_exprContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();
    Rel_operContext *rel_oper();
    Value_exprContext *value_expr();
    antlr4::tree::TerminalNode *LIKE();
    antlr4::tree::TerminalNode *IN();
    antlr4::tree::TerminalNode *LP();
    In_value_expr_listContext *in_value_expr_list();
    antlr4::tree::TerminalNode *RP();
    antlr4::tree::TerminalNode *NOT();
    antlr4::tree::TerminalNode *CONTAIN_ALL();
    antlr4::tree::TerminalNode *CONTAIN_ANY();
    antlr4::tree::TerminalNode *IS();
    antlr4::tree::TerminalNode *NULL_V();
    Function_callContext *function_call();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Relation_exprContext *relation_expr();

  class Rel_operContext : public antlr4::ParserRuleContext {
   public:
    Rel_operContext(antlr4::ParserRuleContext *parent_ctx,
                    size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *E_OP();
    Ne_opContext *ne_op();
    antlr4::tree::TerminalNode *L_OP();
    antlr4::tree::TerminalNode *G_OP();
    Le_opContext *le_op();
    Ge_opContext *ge_op();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Rel_operContext *rel_oper();

  class Value_exprContext : public antlr4::ParserRuleContext {
   public:
    Value_exprContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    ConstantContext *constant();
    Function_callContext *function_call();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Value_exprContext *value_expr();

  class In_value_expr_listContext : public antlr4::ParserRuleContext {
   public:
    In_value_expr_listContext(antlr4::ParserRuleContext *parent_ctx,
                              size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    std::vector<In_value_exprContext *> in_value_expr();
    In_value_exprContext *in_value_expr(size_t i);
    std::vector<antlr4::tree::TerminalNode *> COMMA();
    antlr4::tree::TerminalNode *COMMA(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  In_value_expr_listContext *in_value_expr_list();

  class In_value_exprContext : public antlr4::ParserRuleContext {
   public:
    In_value_exprContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Constant_num_and_strContext *constant_num_and_str();
    Bool_valueContext *bool_value();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  In_value_exprContext *in_value_expr();

  class ConstantContext : public antlr4::ParserRuleContext {
   public:
    ConstantContext(antlr4::ParserRuleContext *parent_ctx,
                    size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    NumericContext *numeric();
    Quoted_stringContext *quoted_string();
    Vector_exprContext *vector_expr();
    Bool_valueContext *bool_value();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  ConstantContext *constant();

  class Constant_num_and_strContext : public antlr4::ParserRuleContext {
   public:
    Constant_num_and_strContext(antlr4::ParserRuleContext *parent_ctx,
                                size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    NumericContext *numeric();
    Quoted_stringContext *quoted_string();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Constant_num_and_strContext *constant_num_and_str();

  class MatrixContext : public antlr4::ParserRuleContext {
   public:
    MatrixContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *LMP();
    std::vector<antlr4::tree::TerminalNode *> VECTOR();
    antlr4::tree::TerminalNode *VECTOR(size_t i);
    antlr4::tree::TerminalNode *RMP();
    std::vector<antlr4::tree::TerminalNode *> COMMA();
    antlr4::tree::TerminalNode *COMMA(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  MatrixContext *matrix();

  class Vector_exprContext : public antlr4::ParserRuleContext {
   public:
    Vector_exprContext(antlr4::ParserRuleContext *parent_ctx,
                       size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *VECTOR();
    MatrixContext *matrix();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Vector_exprContext *vector_expr();

  class Function_value_exprContext : public antlr4::ParserRuleContext {
   public:
    Function_value_exprContext(antlr4::ParserRuleContext *parent_ctx,
                               size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Value_exprContext *value_expr();
    IdentifierContext *identifier();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Function_value_exprContext *function_value_expr();

  class Function_callContext : public antlr4::ParserRuleContext {
   public:
    Function_callContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();
    antlr4::tree::TerminalNode *LP();
    antlr4::tree::TerminalNode *RP();
    std::vector<Function_value_exprContext *> function_value_expr();
    Function_value_exprContext *function_value_expr(size_t i);
    std::vector<antlr4::tree::TerminalNode *> COMMA();
    antlr4::tree::TerminalNode *COMMA(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Function_callContext *function_call();

  class Dql_statementContext : public antlr4::ParserRuleContext {
   public:
    Dql_statementContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Select_statementContext *select_statement();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Dql_statementContext *dql_statement();

  class Select_statementContext : public antlr4::ParserRuleContext {
   public:
    Select_statementContext(antlr4::ParserRuleContext *parent_ctx,
                            size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *SELECT();
    Selected_elementsContext *selected_elements();
    From_clauseContext *from_clause();
    Where_clauseContext *where_clause();
    Order_by_clauseContext *order_by_clause();
    Limit_clauseContext *limit_clause();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Select_statementContext *select_statement();

  class Selected_elementsContext : public antlr4::ParserRuleContext {
   public:
    Selected_elementsContext(antlr4::ParserRuleContext *parent_ctx,
                             size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    std::vector<Selected_elementContext *> selected_element();
    Selected_elementContext *selected_element(size_t i);
    std::vector<antlr4::tree::TerminalNode *> COMMA();
    antlr4::tree::TerminalNode *COMMA(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Selected_elementsContext *selected_elements();

  class Selected_elementContext : public antlr4::ParserRuleContext {
   public:
    Selected_elementContext(antlr4::ParserRuleContext *parent_ctx,
                            size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *ASTERISK();
    Field_nameContext *field_name();
    antlr4::tree::TerminalNode *AS();
    Field_aliasContext *field_alias();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Selected_elementContext *selected_element();

  class From_clauseContext : public antlr4::ParserRuleContext {
   public:
    From_clauseContext(antlr4::ParserRuleContext *parent_ctx,
                       size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *FROM();
    Tableview_nameContext *tableview_name();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  From_clauseContext *from_clause();

  class Order_by_clauseContext : public antlr4::ParserRuleContext {
   public:
    Order_by_clauseContext(antlr4::ParserRuleContext *parent_ctx,
                           size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *ORDER();
    antlr4::tree::TerminalNode *BY();
    std::vector<Order_by_elementContext *> order_by_element();
    Order_by_elementContext *order_by_element(size_t i);
    std::vector<antlr4::tree::TerminalNode *> COMMA();
    antlr4::tree::TerminalNode *COMMA(size_t i);

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Order_by_clauseContext *order_by_clause();

  class Order_by_elementContext : public antlr4::ParserRuleContext {
   public:
    Order_by_elementContext(antlr4::ParserRuleContext *parent_ctx,
                            size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Field_nameContext *field_name();
    antlr4::tree::TerminalNode *ASC();
    antlr4::tree::TerminalNode *DESC();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Order_by_elementContext *order_by_element();

  class Limit_clauseContext : public antlr4::ParserRuleContext {
   public:
    Limit_clauseContext(antlr4::ParserRuleContext *parent_ctx,
                        size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *LIMIT();
    Int_valueContext *int_value();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Limit_clauseContext *limit_clause();

  class Tableview_nameContext : public antlr4::ParserRuleContext {
   public:
    Tableview_nameContext(antlr4::ParserRuleContext *parent_ctx,
                          size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Tableview_nameContext *tableview_name();

  class Field_nameContext : public antlr4::ParserRuleContext {
   public:
    Field_nameContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Field_nameContext *field_name();

  class Table_aliasContext : public antlr4::ParserRuleContext {
   public:
    Table_aliasContext(antlr4::ParserRuleContext *parent_ctx,
                       size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Table_aliasContext *table_alias();

  class Field_aliasContext : public antlr4::ParserRuleContext {
   public:
    Field_aliasContext(antlr4::ParserRuleContext *parent_ctx,
                       size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    IdentifierContext *identifier();
    antlr4::tree::TerminalNode *AS();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Field_aliasContext *field_alias();

  class NumericContext : public antlr4::ParserRuleContext {
   public:
    NumericContext(antlr4::ParserRuleContext *parent_ctx,
                   size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Int_valueContext *int_value();
    Float_valueContext *float_value();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  NumericContext *numeric();

  class Int_valueContext : public antlr4::ParserRuleContext {
   public:
    Int_valueContext(antlr4::ParserRuleContext *parent_ctx,
                     size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *INTEGER();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Int_valueContext *int_value();

  class Float_valueContext : public antlr4::ParserRuleContext {
   public:
    Float_valueContext(antlr4::ParserRuleContext *parent_ctx,
                       size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *FLOAT();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Float_valueContext *float_value();

  class Quoted_stringContext : public antlr4::ParserRuleContext {
   public:
    Quoted_stringContext(antlr4::ParserRuleContext *parent_ctx,
                         size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *SQUOTA_STRING();
    antlr4::tree::TerminalNode *DQUOTA_STRING();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Quoted_stringContext *quoted_string();

  class Bool_valueContext : public antlr4::ParserRuleContext {
   public:
    Bool_valueContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *TRUE_V();
    antlr4::tree::TerminalNode *FALSE_V();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Bool_valueContext *bool_value();

  class IdentifierContext : public antlr4::ParserRuleContext {
   public:
    IdentifierContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    Regular_idContext *regular_id();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  IdentifierContext *identifier();

  class Ne_opContext : public antlr4::ParserRuleContext {
   public:
    Ne_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *NE_OP();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Ne_opContext *ne_op();

  class Ge_opContext : public antlr4::ParserRuleContext {
   public:
    Ge_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *GE_OP();
    antlr4::tree::TerminalNode *G_OP();
    antlr4::tree::TerminalNode *E_OP();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Ge_opContext *ge_op();

  class Le_opContext : public antlr4::ParserRuleContext {
   public:
    Le_opContext(antlr4::ParserRuleContext *parent_ctx, size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *LE_OP();
    antlr4::tree::TerminalNode *L_OP();
    antlr4::tree::TerminalNode *E_OP();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Le_opContext *le_op();

  class Regular_idContext : public antlr4::ParserRuleContext {
   public:
    Regular_idContext(antlr4::ParserRuleContext *parent_ctx,
                      size_t invoking_state);
    virtual size_t getRuleIndex() const override;
    antlr4::tree::TerminalNode *REGULAR_ID();
    antlr4::tree::TerminalNode *OR();
    antlr4::tree::TerminalNode *AND();
    antlr4::tree::TerminalNode *NOT();
    antlr4::tree::TerminalNode *IN();
    antlr4::tree::TerminalNode *BETWEEN();
    antlr4::tree::TerminalNode *LIKE();
    antlr4::tree::TerminalNode *WHERE();
    antlr4::tree::TerminalNode *SELECT();
    antlr4::tree::TerminalNode *AS();
    antlr4::tree::TerminalNode *BY();
    antlr4::tree::TerminalNode *ORDER();
    antlr4::tree::TerminalNode *ASC();
    antlr4::tree::TerminalNode *DESC();
    antlr4::tree::TerminalNode *LIMIT();

    virtual void enterRule(antlr4::tree::ParseTreeListener *listener) override;
    virtual void exitRule(antlr4::tree::ParseTreeListener *listener) override;
  };

  Regular_idContext *regular_id();


  virtual bool sempred(antlr4::RuleContext *_localctx, size_t ruleIndex,
                       size_t predicateIndex) override;
  bool logic_exprSempred(Logic_exprContext * /*_localctx*/,
                         size_t predicateIndex);

 private:
  static std::vector<antlr4::dfa::DFA> _decisionToDFA;
  static antlr4::atn::PredictionContextCache _sharedContextCache;
  static std::vector<std::string> _ruleNames;
  static std::vector<std::string> _tokenNames;

  static std::vector<std::string> _literalNames;
  static std::vector<std::string> _symbolicNames;
  static antlr4::dfa::Vocabulary _vocabulary;
  static antlr4::atn::ATN _atn;
  static std::vector<uint16_t> _serializedATN;


  struct Initializer {
    Initializer();
  };
  static Initializer _init;
};

}  // namespace antlr4


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParser.interp
================================================
token literal names:
null
'OR'
'AND'
'NOT'
'IN'
'CONTAIN_ALL'
'CONTAIN_ANY'
'BETWEEN'
'LIKE'
'WHERE'
'SELECT'
'FROM'
'AS'
'BY'
'ORDER'
'ASC'
'DESC'
'LIMIT'
'TRUE'
'FALSE'
'IS'
'NULL'
null
null
null
null
'.'
'('
')'
'['
']'
'*'
'+'
'-'
','
'/'
'%'
'@'
':='
'#'
':'
';'
'<='
'>='
'!='
'^'
'~'
'<'
'>'
'='
'||'
'_'
null
null
null
null
null

token symbolic names:
null
OR
AND
NOT
IN
CONTAIN_ALL
CONTAIN_ANY
BETWEEN
LIKE
WHERE
SELECT
FROM
AS
BY
ORDER
ASC
DESC
LIMIT
TRUE_V
FALSE_V
IS
NULL_V
INTEGER
FLOAT
SQUOTA_STRING
DQUOTA_STRING
DOT
LP
RP
LMP
RMP
ASTERISK
PLUS_SIGN
MINUS_SIGN
COMMA
SOLIDUS
MOD
AT_SIGN
ASSIGN_OP
SHARP_SIGN
COLON
SEMI
LE_OP
GE_OP
NE_OP
CARET_OP
TILDE_OP
L_OP
G_OP
E_OP
CONCAT_OP
UNDERSCORE
SPACES
VECTOR
SINGLE_LINE_COMMENT
MULTI_LINE_COMMENT
REGULAR_ID

rule names:
swallow_to_semi
compilation_unit
logic_expr_unit
unit_statement
where_clause
logic_expr
enclosed_expr
relation_expr
rel_oper
value_expr
in_value_expr_list
in_value_expr
constant
constant_num_and_str
matrix
vector_expr
function_value_expr
function_call
dql_statement
select_statement
selected_elements
selected_element
from_clause
order_by_clause
order_by_element
limit_clause
tableview_name
field_name
table_alias
field_alias
numeric
int_value
float_value
quoted_string
bool_value
identifier
ne_op
ge_op
le_op
regular_id


atn:
[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 58, 325, 4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 4, 18, 9, 18, 4, 19, 9, 19, 4, 20, 9, 20, 4, 21, 9, 21, 4, 22, 9, 22, 4, 23, 9, 23, 4, 24, 9, 24, 4, 25, 9, 25, 4, 26, 9, 26, 4, 27, 9, 27, 4, 28, 9, 28, 4, 29, 9, 29, 4, 30, 9, 30, 4, 31, 9, 31, 4, 32, 9, 32, 4, 33, 9, 33, 4, 34, 9, 34, 4, 35, 9, 35, 4, 36, 9, 36, 4, 37, 9, 37, 4, 38, 9, 38, 4, 39, 9, 39, 4, 40, 9, 40, 4, 41, 9, 41, 3, 2, 6, 2, 84, 10, 2, 13, 2, 14, 2, 85, 3, 3, 3, 3, 5, 3, 90, 10, 3, 6, 3, 92, 10, 3, 13, 3, 14, 3, 93, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 7, 3, 7, 3, 7, 5, 7, 109, 10, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 7, 7, 117, 10, 7, 12, 7, 14, 7, 120, 11, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 136, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 145, 10, 9, 3, 9, 3, 9, 3, 9, 5, 9, 150, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 157, 10, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 3, 9, 5, 9, 165, 10, 9, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 3, 10, 5, 10, 173, 10, 10, 3, 11, 3, 11, 5, 11, 177, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 182, 10, 12, 12, 12, 14, 12, 185, 11, 12, 3, 13, 3, 13, 5, 13, 189, 10, 13, 3, 14, 3, 14, 3, 14, 3, 14, 5, 14, 195, 10, 14, 3, 15, 3, 15, 5, 15, 199, 10, 15, 3, 16, 3, 16, 3, 16, 3, 16, 7, 16, 205, 10, 16, 12, 16, 14, 16, 208, 11, 16, 3, 16, 3, 16, 3, 17, 3, 17, 5, 17, 214, 10, 17, 3, 18, 3, 18, 5, 18, 218, 10, 18, 3, 19, 3, 19, 3, 19, 3, 19, 3, 19, 7, 19, 225, 10, 19, 12, 19, 14, 19, 228, 11, 19, 5, 19, 230, 10, 19, 3, 19, 3, 19, 3, 20, 3, 20, 3, 21, 3, 21, 3, 21, 3, 21, 5, 21, 240, 10, 21, 3, 21, 5, 21, 243, 10, 21, 3, 21, 5, 21, 246, 10, 21, 3, 22, 3, 22, 3, 22, 7, 22, 251, 10, 22, 12, 22, 14, 22, 254, 11, 22, 3, 23, 3, 23, 3, 23, 5, 23, 259, 10, 23, 3, 23, 5, 23, 262, 10, 23, 5, 23, 264, 10, 23, 3, 24, 3, 24, 3, 24, 3, 25, 3, 25, 3, 25, 3, 25, 3, 25, 7, 25, 274, 10, 25, 12, 25, 14, 25, 277, 11, 25, 3, 26, 3, 26, 5, 26, 281, 10, 26, 3, 27, 3, 27, 3, 27, 3, 28, 3, 28, 3, 29, 3, 29, 3, 30, 3, 30, 3, 31, 5, 31, 293, 10, 31, 3, 31, 3, 31, 3, 32, 3, 32, 5, 32, 299, 10, 32, 3, 33, 3, 33, 3, 34, 3, 34, 3, 35, 3, 35, 3, 36, 3, 36, 3, 37, 3, 37, 3, 38, 3, 38, 3, 39, 3, 39, 3, 39, 5, 39, 316, 10, 39, 3, 40, 3, 40, 3, 40, 5, 40, 321, 10, 40, 3, 41, 3, 41, 3, 41, 2, 3, 12, 42, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 2, 9, 3, 2, 43, 43, 4, 2, 37, 37, 43, 43, 3, 2, 7, 8, 3, 2, 17, 18, 3, 2, 26, 27, 3, 2, 20, 21, 6, 2, 3, 6, 9, 12, 14, 19, 58, 58, 2, 329, 2, 83, 3, 2, 2, 2, 4, 91, 3, 2, 2, 2, 6, 97, 3, 2, 2, 2, 8, 100, 3, 2, 2, 2, 10, 102, 3, 2, 2, 2, 12, 108, 3, 2, 2, 2, 14, 121, 3, 2, 2, 2, 16, 164, 3, 2, 2, 2, 18, 172, 3, 2, 2, 2, 20, 176, 3, 2, 2, 2, 22, 178, 3, 2, 2, 2, 24, 188, 3, 2, 2, 2, 26, 194, 3, 2, 2, 2, 28, 198, 3, 2, 2, 2, 30, 200, 3, 2, 2, 2, 32, 213, 3, 2, 2, 2, 34, 217, 3, 2, 2, 2, 36, 219, 3, 2, 2, 2, 38, 233, 3, 2, 2, 2, 40, 235, 3, 2, 2, 2, 42, 247, 3, 2, 2, 2, 44, 263, 3, 2, 2, 2, 46, 265, 3, 2, 2, 2, 48, 268, 3, 2, 2, 2, 50, 278, 3, 2, 2, 2, 52, 282, 3, 2, 2, 2, 54, 285, 3, 2, 2, 2, 56, 287, 3, 2, 2, 2, 58, 289, 3, 2, 2, 2, 60, 292, 3, 2, 2, 2, 62, 298, 3, 2, 2, 2, 64, 300, 3, 2, 2, 2, 66, 302, 3, 2, 2, 2, 68, 304, 3, 2, 2, 2, 70, 306, 3, 2, 2, 2, 72, 308, 3, 2, 2, 2, 74, 310, 3, 2, 2, 2, 76, 315, 3, 2, 2, 2, 78, 320, 3, 2, 2, 2, 80, 322, 3, 2, 2, 2, 82, 84, 10, 2, 2, 2, 83, 82, 3, 2, 2, 2, 84, 85, 3, 2, 2, 2, 85, 83, 3, 2, 2, 2, 85, 86, 3, 2, 2, 2, 86, 3, 3, 2, 2, 2, 87, 89, 5, 8, 5, 2, 88, 90, 9, 3, 2, 2, 89, 88, 3, 2, 2, 2, 89, 90, 3, 2, 2, 2, 90, 92, 3, 2, 2, 2, 91, 87, 3, 2, 2, 2, 92, 93, 3, 2, 2, 2, 93, 91, 3, 2, 2, 2, 93, 94, 3, 2, 2, 2, 94, 95, 3, 2, 2, 2, 95, 96, 7, 2, 2, 3, 96, 5, 3, 2, 2, 2, 97, 98, 5, 12, 7, 2, 98, 99, 7, 2, 2, 3, 99, 7, 3, 2, 2, 2, 100, 101, 5, 38, 20, 2, 101, 9, 3, 2, 2, 2, 102, 103, 7, 11, 2, 2, 103, 104, 5, 12, 7, 2, 104, 11, 3, 2, 2, 2, 105, 106, 8, 7, 1, 2, 106, 109, 5, 16, 9, 2, 107, 109, 5, 14, 8, 2, 108, 105, 3, 2, 2, 2, 108, 107, 3, 2, 2, 2, 109, 118, 3, 2, 2, 2, 110, 111, 12, 5, 2, 2, 111, 112, 7, 4, 2, 2, 112, 117, 5, 12, 7, 6, 113, 114, 12, 4, 2, 2, 114, 115, 7, 3, 2, 2, 115, 117, 5, 12, 7, 5, 116, 110, 3, 2, 2, 2, 116, 113, 3, 2, 2, 2, 117, 120, 3, 2, 2, 2, 118, 116, 3, 2, 2, 2, 118, 119, 3, 2, 2, 2, 119, 13, 3, 2, 2, 2, 120, 118, 3, 2, 2, 2, 121, 122, 7, 29, 2, 2, 122, 123, 5, 12, 7, 2, 123, 124, 7, 30, 2, 2, 124, 15, 3, 2, 2, 2, 125, 126, 5, 72, 37, 2, 126, 127, 5, 18, 10, 2, 127, 128, 5, 20, 11, 2, 128, 165, 3, 2, 2, 2, 129, 130, 5, 72, 37, 2, 130, 131, 7, 10, 2, 2, 131, 132, 5, 20, 11, 2, 132, 165, 3, 2, 2, 2, 133, 135, 5, 72, 37, 2, 134, 136, 7, 5, 2, 2, 135, 134, 3, 2, 2, 2, 135, 136, 3, 2, 2, 2, 136, 137, 3, 2, 2, 2, 137, 138, 7, 6, 2, 2, 138, 139, 7, 29, 2, 2, 139, 140, 5, 22, 12, 2, 140, 141, 7, 30, 2, 2, 141, 165, 3, 2, 2, 2, 142, 144, 5, 72, 37, 2, 143, 145, 7, 5, 2, 2, 144, 143, 3, 2, 2, 2, 144, 145, 3, 2, 2, 2, 145, 146, 3, 2, 2, 2, 146, 147, 9, 4, 2, 2, 147, 149, 7, 29, 2, 2, 148, 150, 5, 22, 12, 2, 149, 148, 3, 2, 2, 2, 149, 150, 3, 2, 2, 2, 150, 151, 3, 2, 2, 2, 151, 152, 7, 30, 2, 2, 152, 165, 3, 2, 2, 2, 153, 154, 5, 72, 37, 2, 154, 156, 7, 22, 2, 2, 155, 157, 7, 5, 2, 2, 156, 155, 3, 2, 2, 2, 156, 157, 3, 2, 2, 2, 157, 158, 3, 2, 2, 2, 158, 159, 7, 23, 2, 2, 159, 165, 3, 2, 2, 2, 160, 161, 5, 36, 19, 2, 161, 162, 5, 18, 10, 2, 162, 163, 5, 20, 11, 2, 163, 165, 3, 2, 2, 2, 164, 125, 3, 2, 2, 2, 164, 129, 3, 2, 2, 2, 164, 133, 3, 2, 2, 2, 164, 142, 3, 2, 2, 2, 164, 153, 3, 2, 2, 2, 164, 160, 3, 2, 2, 2, 165, 17, 3, 2, 2, 2, 166, 173, 7, 51, 2, 2, 167, 173, 5, 74, 38, 2, 168, 173, 7, 49, 2, 2, 169, 173, 7, 50, 2, 2, 170, 173, 5, 78, 40, 2, 171, 173, 5, 76, 39, 2, 172, 166, 3, 2, 2, 2, 172, 167, 3, 2, 2, 2, 172, 168, 3, 2, 2, 2, 172, 169, 3, 2, 2, 2, 172, 170, 3, 2, 2, 2, 172, 171, 3, 2, 2, 2, 173, 19, 3, 2, 2, 2, 174, 177, 5, 26, 14, 2, 175, 177, 5, 36, 19, 2, 176, 174, 3, 2, 2, 2, 176, 175, 3, 2, 2, 2, 177, 21, 3, 2, 2, 2, 178, 183, 5, 24, 13, 2, 179, 180, 7, 36, 2, 2, 180, 182, 5, 24, 13, 2, 181, 179, 3, 2, 2, 2, 182, 185, 3, 2, 2, 2, 183, 181, 3, 2, 2, 2, 183, 184, 3, 2, 2, 2, 184, 23, 3, 2, 2, 2, 185, 183, 3, 2, 2, 2, 186, 189, 5, 28, 15, 2, 187, 189, 5, 70, 36, 2, 188, 186, 3, 2, 2, 2, 188, 187, 3, 2, 2, 2, 189, 25, 3, 2, 2, 2, 190, 195, 5, 62, 32, 2, 191, 195, 5, 68, 35, 2, 192, 195, 5, 32, 17, 2, 193, 195, 5, 70, 36, 2, 194, 190, 3, 2, 2, 2, 194, 191, 3, 2, 2, 2, 194, 192, 3, 2, 2, 2, 194, 193, 3, 2, 2, 2, 195, 27, 3, 2, 2, 2, 196, 199, 5, 62, 32, 2, 197, 199, 5, 68, 35, 2, 198, 196, 3, 2, 2, 2, 198, 197, 3, 2, 2, 2, 199, 29, 3, 2, 2, 2, 200, 201, 7, 31, 2, 2, 201, 206, 7, 55, 2, 2, 202, 203, 7, 36, 2, 2, 203, 205, 7, 55, 2, 2, 204, 202, 3, 2, 2, 2, 205, 208, 3, 2, 2, 2, 206, 204, 3, 2, 2, 2, 206, 207, 3, 2, 2, 2, 207, 209, 3, 2, 2, 2, 208, 206, 3, 2, 2, 2, 209, 210, 7, 32, 2, 2, 210, 31, 3, 2, 2, 2, 211, 214, 7, 55, 2, 2, 212, 214, 5, 30, 16, 2, 213, 211, 3, 2, 2, 2, 213, 212, 3, 2, 2, 2, 214, 33, 3, 2, 2, 2, 215, 218, 5, 20, 11, 2, 216, 218, 5, 72, 37, 2, 217, 215, 3, 2, 2, 2, 217, 216, 3, 2, 2, 2, 218, 35, 3, 2, 2, 2, 219, 220, 5, 72, 37, 2, 220, 229, 7, 29, 2, 2, 221, 226, 5, 34, 18, 2, 222, 223, 7, 36, 2, 2, 223, 225, 5, 34, 18, 2, 224, 222, 3, 2, 2, 2, 225, 228, 3, 2, 2, 2, 226, 224, 3, 2, 2, 2, 226, 227, 3, 2, 2, 2, 227, 230, 3, 2, 2, 2, 228, 226, 3, 2, 2, 2, 229, 221, 3, 2, 2, 2, 229, 230, 3, 2, 2, 2, 230, 231, 3, 2, 2, 2, 231, 232, 7, 30, 2, 2, 232, 37, 3, 2, 2, 2, 233, 234, 5, 40, 21, 2, 234, 39, 3, 2, 2, 2, 235, 236, 7, 12, 2, 2, 236, 237, 5, 42, 22, 2, 237, 239, 5, 46, 24, 2, 238, 240, 5, 10, 6, 2, 239, 238, 3, 2, 2, 2, 239, 240, 3, 2, 2, 2, 240, 242, 3, 2, 2, 2, 241, 243, 5, 48, 25, 2, 242, 241, 3, 2, 2, 2, 242, 243, 3, 2, 2, 2, 243, 245, 3, 2, 2, 2, 244, 246, 5, 52, 27, 2, 245, 244, 3, 2, 2, 2, 245, 246, 3, 2, 2, 2, 246, 41, 3, 2, 2, 2, 247, 252, 5, 44, 23, 2, 248, 249, 7, 36, 2, 2, 249, 251, 5, 44, 23, 2, 250, 248, 3, 2, 2, 2, 251, 254, 3, 2, 2, 2, 252, 250, 3, 2, 2, 2, 252, 253, 3, 2, 2, 2, 253, 43, 3, 2, 2, 2, 254, 252, 3, 2, 2, 2, 255, 264, 7, 33, 2, 2, 256, 258, 5, 56, 29, 2, 257, 259, 7, 14, 2, 2, 258, 257, 3, 2, 2, 2, 258, 259, 3, 2, 2, 2, 259, 261, 3, 2, 2, 2, 260, 262, 5, 60, 31, 2, 261, 260, 3, 2, 2, 2, 261, 262, 3, 2, 2, 2, 262, 264, 3, 2, 2, 2, 263, 255, 3, 2, 2, 2, 263, 256, 3, 2, 2, 2, 264, 45, 3, 2, 2, 2, 265, 266, 7, 13, 2, 2, 266, 267, 5, 54, 28, 2, 267, 47, 3, 2, 2, 2, 268, 269, 7, 16, 2, 2, 269, 270, 7, 15, 2, 2, 270, 275, 5, 50, 26, 2, 271, 272, 7, 36, 2, 2, 272, 274, 5, 50, 26, 2, 273, 271, 3, 2, 2, 2, 274, 277, 3, 2, 2, 2, 275, 273, 3, 2, 2, 2, 275, 276, 3, 2, 2, 2, 276, 49, 3, 2, 2, 2, 277, 275, 3, 2, 2, 2, 278, 280, 5, 56, 29, 2, 279, 281, 9, 5, 2, 2, 280, 279, 3, 2, 2, 2, 280, 281, 3, 2, 2, 2, 281, 51, 3, 2, 2, 2, 282, 283, 7, 19, 2, 2, 283, 284, 5, 64, 33, 2, 284, 53, 3, 2, 2, 2, 285, 286, 5, 72, 37, 2, 286, 55, 3, 2, 2, 2, 287, 288, 5, 72, 37, 2, 288, 57, 3, 2, 2, 2, 289, 290, 5, 72, 37, 2, 290, 59, 3, 2, 2, 2, 291, 293, 7, 14, 2, 2, 292, 291, 3, 2, 2, 2, 292, 293, 3, 2, 2, 2, 293, 294, 3, 2, 2, 2, 294, 295, 5, 72, 37, 2, 295, 61, 3, 2, 2, 2, 296, 299, 5, 64, 33, 2, 297, 299, 5, 66, 34, 2, 298, 296, 3, 2, 2, 2, 298, 297, 3, 2, 2, 2, 299, 63, 3, 2, 2, 2, 300, 301, 7, 24, 2, 2, 301, 65, 3, 2, 2, 2, 302, 303, 7, 25, 2, 2, 303, 67, 3, 2, 2, 2, 304, 305, 9, 6, 2, 2, 305, 69, 3, 2, 2, 2, 306, 307, 9, 7, 2, 2, 307, 71, 3, 2, 2, 2, 308, 309, 5, 80, 41, 2, 309, 73, 3, 2, 2, 2, 310, 311, 7, 46, 2, 2, 311, 75, 3, 2, 2, 2, 312, 316, 7, 45, 2, 2, 313, 314, 7, 50, 2, 2, 314, 316, 7, 51, 2, 2, 315, 312, 3, 2, 2, 2, 315, 313, 3, 2, 2, 2, 316, 77, 3, 2, 2, 2, 317, 321, 7, 44, 2, 2, 318, 319, 7, 49, 2, 2, 319, 321, 7, 51, 2, 2, 320, 317, 3, 2, 2, 2, 320, 318, 3, 2, 2, 2, 321, 79, 3, 2, 2, 2, 322, 323, 9, 8, 2, 2, 323, 81, 3, 2, 2, 2, 37, 85, 89, 93, 108, 116, 118, 135, 144, 149, 156, 164, 172, 176, 183, 188, 194, 198, 206, 213, 217, 226, 229, 239, 242, 245, 252, 258, 261, 263, 275, 280, 292, 298, 315, 320]

================================================
FILE: src/db/sqlengine/antlr/gen/SQLParser.tokens
================================================
OR=1
AND=2
NOT=3
IN=4
CONTAIN_ALL=5
CONTAIN_ANY=6
BETWEEN=7
LIKE=8
WHERE=9
SELECT=10
FROM=11
AS=12
BY=13
ORDER=14
ASC=15
DESC=16
LIMIT=17
TRUE_V=18
FALSE_V=19
IS=20
NULL_V=21
INTEGER=22
FLOAT=23
SQUOTA_STRING=24
DQUOTA_STRING=25
DOT=26
LP=27
RP=28
LMP=29
RMP=30
ASTERISK=31
PLUS_SIGN=32
MINUS_SIGN=33
COMMA=34
SOLIDUS=35
MOD=36
AT_SIGN=37
ASSIGN_OP=38
SHARP_SIGN=39
COLON=40
SEMI=41
LE_OP=42
GE_OP=43
NE_OP=44
CARET_OP=45
TILDE_OP=46
L_OP=47
G_OP=48
E_OP=49
CONCAT_OP=50
UNDERSCORE=51
SPACES=52
VECTOR=53
SINGLE_LINE_COMMENT=54
MULTI_LINE_COMMENT=55
REGULAR_ID=56
'OR'=1
'AND'=2
'NOT'=3
'IN'=4
'CONTAIN_ALL'=5
'CONTAIN_ANY'=6
'BETWEEN'=7
'LIKE'=8
'WHERE'=9
'SELECT'=10
'FROM'=11
'AS'=12
'BY'=13
'ORDER'=14
'ASC'=15
'DESC'=16
'LIMIT'=17
'TRUE'=18
'FALSE'=19
'IS'=20
'NULL'=21
'.'=26
'('=27
')'=28
'['=29
']'=30
'*'=31
'+'=32
'-'=33
','=34
'/'=35
'%'=36
'@'=37
':='=38
'#'=39
':'=40
';'=41
'<='=42
'>='=43
'!='=44
'^'=45
'~'=46
'<'=47
'>'=48
'='=49
'||'=50
'_'=51


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParserBaseListener.cc
================================================

// Generated from SQLParser.g4 by ANTLR 4.8


#include "SQLParserBaseListener.h"


using namespace antlr4;


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParserBaseListener.h
================================================

// Generated from SQLParser.g4 by ANTLR 4.8

#pragma once


#include "SQLParserListener.h"
#include "antlr4-runtime.h"


namespace antlr4 {

/**
 * This class provides an empty implementation of SQLParserListener,
 * which can be extended to create a listener which only needs to handle a
 * subset of the available methods.
 */
class SQLParserBaseListener : public SQLParserListener {
 public:
  virtual void enterSwallow_to_semi(
      SQLParser::Swallow_to_semiContext * /*ctx*/) override {}
  virtual void exitSwallow_to_semi(
      SQLParser::Swallow_to_semiContext * /*ctx*/) override {}

  virtual void enterCompilation_unit(
      SQLParser::Compilation_unitContext * /*ctx*/) override {}
  virtual void exitCompilation_unit(
      SQLParser::Compilation_unitContext * /*ctx*/) override {}

  virtual void enterLogic_expr_unit(
      SQLParser::Logic_expr_unitContext * /*ctx*/) override {}
  virtual void exitLogic_expr_unit(
      SQLParser::Logic_expr_unitContext * /*ctx*/) override {}

  virtual void enterUnit_statement(
      SQLParser::Unit_statementContext * /*ctx*/) override {}
  virtual void exitUnit_statement(
      SQLParser::Unit_statementContext * /*ctx*/) override {}

  virtual void enterWhere_clause(
      SQLParser::Where_clauseContext * /*ctx*/) override {}
  virtual void exitWhere_clause(
      SQLParser::Where_clauseContext * /*ctx*/) override {}

  virtual void enterLogic_expr(
      SQLParser::Logic_exprContext * /*ctx*/) override {}
  virtual void exitLogic_expr(SQLParser::Logic_exprContext * /*ctx*/) override {
  }

  virtual void enterEnclosed_expr(
      SQLParser::Enclosed_exprContext * /*ctx*/) override {}
  virtual void exitEnclosed_expr(
      SQLParser::Enclosed_exprContext * /*ctx*/) override {}

  virtual void enterRelation_expr(
      SQLParser::Relation_exprContext * /*ctx*/) override {}
  virtual void exitRelation_expr(
      SQLParser::Relation_exprContext * /*ctx*/) override {}

  virtual void enterRel_oper(SQLParser::Rel_operContext * /*ctx*/) override {}
  virtual void exitRel_oper(SQLParser::Rel_operContext * /*ctx*/) override {}

  virtual void enterValue_expr(
      SQLParser::Value_exprContext * /*ctx*/) override {}
  virtual void exitValue_expr(SQLParser::Value_exprContext * /*ctx*/) override {
  }

  virtual void enterIn_value_expr_list(
      SQLParser::In_value_expr_listContext * /*ctx*/) override {}
  virtual void exitIn_value_expr_list(
      SQLParser::In_value_expr_listContext * /*ctx*/) override {}

  virtual void enterIn_value_expr(
      SQLParser::In_value_exprContext * /*ctx*/) override {}
  virtual void exitIn_value_expr(
      SQLParser::In_value_exprContext * /*ctx*/) override {}

  virtual void enterConstant(SQLParser::ConstantContext * /*ctx*/) override {}
  virtual void exitConstant(SQLParser::ConstantContext * /*ctx*/) override {}

  virtual void enterConstant_num_and_str(
      SQLParser::Constant_num_and_strContext * /*ctx*/) override {}
  virtual void exitConstant_num_and_str(
      SQLParser::Constant_num_and_strContext * /*ctx*/) override {}

  virtual void enterMatrix(SQLParser::MatrixContext * /*ctx*/) override {}
  virtual void exitMatrix(SQLParser::MatrixContext * /*ctx*/) override {}

  virtual void enterVector_expr(
      SQLParser::Vector_exprContext * /*ctx*/) override {}
  virtual void exitVector_expr(
      SQLParser::Vector_exprContext * /*ctx*/) override {}

  virtual void enterFunction_value_expr(
      SQLParser::Function_value_exprContext * /*ctx*/) override {}
  virtual void exitFunction_value_expr(
      SQLParser::Function_value_exprContext * /*ctx*/) override {}

  virtual void enterFunction_call(
      SQLParser::Function_callContext * /*ctx*/) override {}
  virtual void exitFunction_call(
      SQLParser::Function_callContext * /*ctx*/) override {}

  virtual void enterDql_statement(
      SQLParser::Dql_statementContext * /*ctx*/) override {}
  virtual void exitDql_statement(
      SQLParser::Dql_statementContext * /*ctx*/) override {}

  virtual void enterSelect_statement(
      SQLParser::Select_statementContext * /*ctx*/) override {}
  virtual void exitSelect_statement(
      SQLParser::Select_statementContext * /*ctx*/) override {}

  virtual void enterSelected_elements(
      SQLParser::Selected_elementsContext * /*ctx*/) override {}
  virtual void exitSelected_elements(
      SQLParser::Selected_elementsContext * /*ctx*/) override {}

  virtual void enterSelected_element(
      SQLParser::Selected_elementContext * /*ctx*/) override {}
  virtual void exitSelected_element(
      SQLParser::Selected_elementContext * /*ctx*/) override {}

  virtual void enterFrom_clause(
      SQLParser::From_clauseContext * /*ctx*/) override {}
  virtual void exitFrom_clause(
      SQLParser::From_clauseContext * /*ctx*/) override {}

  virtual void enterOrder_by_clause(
      SQLParser::Order_by_clauseContext * /*ctx*/) override {}
  virtual void exitOrder_by_clause(
      SQLParser::Order_by_clauseContext * /*ctx*/) override {}

  virtual void enterOrder_by_element(
      SQLParser::Order_by_elementContext * /*ctx*/) override {}
  virtual void exitOrder_by_element(
      SQLParser::Order_by_elementContext * /*ctx*/) override {}

  virtual void enterLimit_clause(
      SQLParser::Limit_clauseContext * /*ctx*/) override {}
  virtual void exitLimit_clause(
      SQLParser::Limit_clauseContext * /*ctx*/) override {}

  virtual void enterTableview_name(
      SQLParser::Tableview_nameContext * /*ctx*/) override {}
  virtual void exitTableview_name(
      SQLParser::Tableview_nameContext * /*ctx*/) override {}

  virtual void enterField_name(
      SQLParser::Field_nameContext * /*ctx*/) override {}
  virtual void exitField_name(SQLParser::Field_nameContext * /*ctx*/) override {
  }

  virtual void enterTable_alias(
      SQLParser::Table_aliasContext * /*ctx*/) override {}
  virtual void exitTable_alias(
      SQLParser::Table_aliasContext * /*ctx*/) override {}

  virtual void enterField_alias(
      SQLParser::Field_aliasContext * /*ctx*/) override {}
  virtual void exitField_alias(
      SQLParser::Field_aliasContext * /*ctx*/) override {}

  virtual void enterNumeric(SQLParser::NumericContext * /*ctx*/) override {}
  virtual void exitNumeric(SQLParser::NumericContext * /*ctx*/) override {}

  virtual void enterInt_value(SQLParser::Int_valueContext * /*ctx*/) override {}
  virtual void exitInt_value(SQLParser::Int_valueContext * /*ctx*/) override {}

  virtual void enterFloat_value(
      SQLParser::Float_valueContext * /*ctx*/) override {}
  virtual void exitFloat_value(
      SQLParser::Float_valueContext * /*ctx*/) override {}

  virtual void enterQuoted_string(
      SQLParser::Quoted_stringContext * /*ctx*/) override {}
  virtual void exitQuoted_string(
      SQLParser::Quoted_stringContext * /*ctx*/) override {}

  virtual void enterBool_value(
      SQLParser::Bool_valueContext * /*ctx*/) override {}
  virtual void exitBool_value(SQLParser::Bool_valueContext * /*ctx*/) override {
  }

  virtual void enterIdentifier(
      SQLParser::IdentifierContext * /*ctx*/) override {}
  virtual void exitIdentifier(SQLParser::IdentifierContext * /*ctx*/) override {
  }

  virtual void enterNe_op(SQLParser::Ne_opContext * /*ctx*/) override {}
  virtual void exitNe_op(SQLParser::Ne_opContext * /*ctx*/) override {}

  virtual void enterGe_op(SQLParser::Ge_opContext * /*ctx*/) override {}
  virtual void exitGe_op(SQLParser::Ge_opContext * /*ctx*/) override {}

  virtual void enterLe_op(SQLParser::Le_opContext * /*ctx*/) override {}
  virtual void exitLe_op(SQLParser::Le_opContext * /*ctx*/) override {}

  virtual void enterRegular_id(
      SQLParser::Regular_idContext * /*ctx*/) override {}
  virtual void exitRegular_id(SQLParser::Regular_idContext * /*ctx*/) override {
  }


  virtual void enterEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}
  virtual void exitEveryRule(antlr4::ParserRuleContext * /*ctx*/) override {}
  virtual void visitTerminal(antlr4::tree::TerminalNode * /*node*/) override {}
  virtual void visitErrorNode(antlr4::tree::ErrorNode * /*node*/) override {}
};

}  // namespace antlr4


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParserListener.cc
================================================

// Generated from SQLParser.g4 by ANTLR 4.8


#include "SQLParserListener.h"


using namespace antlr4;


================================================
FILE: src/db/sqlengine/antlr/gen/SQLParserListener.h
================================================

// Generated from SQLParser.g4 by ANTLR 4.8

#pragma once


#include "SQLParser.h"
#include "antlr4-runtime.h"


namespace antlr4 {

/**
 * This interface defines an abstract listener for a parse tree produced by
 * SQLParser.
 */
class SQLParserListener : public antlr4::tree::ParseTreeListener {
 public:
  virtual void enterSwallow_to_semi(SQLParser::Swallow_to_semiContext *ctx) = 0;
  virtual void exitSwallow_to_semi(SQLParser::Swallow_to_semiContext *ctx) = 0;

  virtual void enterCompilation_unit(
      SQLParser::Compilation_unitContext *ctx) = 0;
  virtual void exitCompilation_unit(
      SQLParser::Compilation_unitContext *ctx) = 0;

  virtual void enterLogic_expr_unit(SQLParser::Logic_expr_unitContext *ctx) = 0;
  virtual void exitLogic_expr_unit(SQLParser::Logic_expr_unitContext *ctx) = 0;

  virtual void enterUnit_statement(SQLParser::Unit_statementContext *ctx) = 0;
  virtual void exitUnit_statement(SQLParser::Unit_statementContext *ctx) = 0;

  virtual void enterWhere_clause(SQLParser::Where_clauseContext *ctx) = 0;
  virtual void exitWhere_clause(SQLParser::Where_clauseContext *ctx) = 0;

  virtual void enterLogic_expr(SQLParser::Logic_exprContext *ctx) = 0;
  virtual void exitLogic_expr(SQLParser::Logic_exprContext *ctx) = 0;

  virtual void enterEnclosed_expr(SQLParser::Enclosed_exprContext *ctx) = 0;
  virtual void exitEnclosed_expr(SQLParser::Enclosed_exprContext *ctx) = 0;

  virtual void enterRelation_expr(SQLParser::Relation_exprContext *ctx) = 0;
  virtual void exitRelation_expr(SQLParser::Relation_exprContext *ctx) = 0;

  virtual void enterRel_oper(SQLParser::Rel_operContext *ctx) = 0;
  virtual void exitRel_oper(SQLParser::Rel_operContext *ctx) = 0;

  virtual void enterValue_expr(SQLParser::Value_exprContext *ctx) = 0;
  virtual void exitValue_expr(SQLParser::Value_exprContext *ctx) = 0;

  virtual void enterIn_value_expr_list(
      SQLParser::In_value_expr_listContext *ctx) = 0;
  virtual void exitIn_value_expr_list(
      SQLParser::In_value_expr_listContext *ctx) = 0;

  virtual void enterIn_value_expr(SQLParser::In_value_exprContext *ctx) = 0;
  virtual void exitIn_value_expr(SQLParser::In_value_exprContext *ctx) = 0;

  virtual void enterConstant(SQLParser::ConstantContext *ctx) = 0;
  virtual void exitConstant(SQLParser::ConstantContext *ctx) = 0;

  virtual void enterConstant_num_and_str(
      SQLParser::Constant_num_and_strContext *ctx) = 0;
  virtual void exitConstant_num_and_str(
      SQLParser::Constant_num_and_strContext *ctx) = 0;

  virtual void enterMatrix(SQLParser::MatrixContext *ctx) = 0;
  virtual void exitMatrix(SQLParser::MatrixContext *ctx) = 0;

  virtual void enterVector_expr(SQLParser::Vector_exprContext *ctx) = 0;
  virtual void exitVector_expr(SQLParser::Vector_exprContext *ctx) = 0;

  virtual void enterFunction_value_expr(
      SQLParser::Function_value_exprContext *ctx) = 0;
  virtual void exitFunction_value_expr(
      SQLParser::Function_value_exprContext *ctx) = 0;

  virtual void enterFunction_call(SQLParser::Function_callContext *ctx) = 0;
  virtual void exitFunction_call(SQLParser::Function_callContext *ctx) = 0;

  virtual void enterDql_statement(SQLParser::Dql_statementContext *ctx) = 0;
  virtual void exitDql_statement(SQLParser::Dql_statementContext *ctx) = 0;

  virtual void enterSelect_statement(
      SQLParser::Select_statementContext *ctx) = 0;
  virtual void exitSelect_statement(
      SQLParser::Select_statementContext *ctx) = 0;

  virtual void enterSelected_elements(
      SQLParser::Selected_elementsContext *ctx) = 0;
  virtual void exitSelected_elements(
      SQLParser::Selected_elementsContext *ctx) = 0;

  virtual void enterSelected_element(
      SQLParser::Selected_elementContext *ctx) = 0;
  virtual void exitSelected_element(
      SQLParser::Selected_elementContext *ctx) = 0;

  virtual void enterFrom_clause(SQLParser::From_clauseContext *ctx) = 0;
  virtual void exitFrom_clause(SQLParser::From_clauseContext *ctx) = 0;

  virtual void enterOrder_by_clause(SQLParser::Order_by_clauseContext *ctx) = 0;
  virtual void exitOrder_by_clause(SQLParser::Order_by_clauseContext *ctx) = 0;

  virtual void enterOrder_by_element(
      SQLParser::Order_by_elementContext *ctx) = 0;
  virtual void exitOrder_by_element(
      SQLParser::Order_by_elementContext *ctx) = 0;

  virtual void enterLimit_clause(SQLParser::Limit_clauseContext *ctx) = 0;
  virtual void exitLimit_clause(SQLParser::Limit_clauseContext *ctx) = 0;

  virtual void enterTableview_name(SQLParser::Tableview_nameContext *ctx) = 0;
  virtual void exitTableview_name(SQLParser::Tableview_nameContext *ctx) = 0;

  virtual void enterField_name(SQLParser::Field_nameContext *ctx) = 0;
  virtual void exitField_name(SQLParser::Field_nameContext *ctx) = 0;

  virtual void enterTable_alias(SQLParser::Table_aliasContext *ctx) = 0;
  virtual void exitTable_alias(SQLParser::Table_aliasContext *ctx) = 0;

  virtual void enterField_alias(SQLParser::Field_aliasContext *ctx) = 0;
  virtual void exitField_alias(SQLParser::Field_aliasContext *ctx) = 0;

  virtual void enterNumeric(SQLParser::NumericContext *ctx) = 0;
  virtual void exitNumeric(SQLParser::NumericContext *ctx) = 0;

  virtual void enterInt_value(SQLParser::Int_valueContext *ctx) = 0;
  virtual void exitInt_value(SQLParser::Int_valueContext *ctx) = 0;

  virtual void enterFloat_value(SQLParser::Float_valueContext *ctx) = 0;
  virtual void exitFloat_value(SQLParser::Float_valueContext *ctx) = 0;

  virtual void enterQuoted_string(SQLParser::Quoted_stringContext *ctx) = 0;
  virtual void exitQuoted_string(SQLParser::Quoted_stringContext *ctx) = 0;

  virtual void enterBool_value(SQLParser::Bool_valueContext *ctx) = 0;
  virtual void exitBool_value(SQLParser::Bool_valueContext *ctx) = 0;

  virtual void enterIdentifier(SQLParser::IdentifierContext *ctx) = 0;
  virtual void exitIdentifier(SQLParser::IdentifierContext *ctx) = 0;

  virtual void enterNe_op(SQLParser::Ne_opContext *ctx) = 0;
  virtual void exitNe_op(SQLParser::Ne_opContext *ctx) = 0;

  virtual void enterGe_op(SQLParser::Ge_opContext *ctx) = 0;
  virtual void exitGe_op(SQLParser::Ge_opContext *ctx) = 0;

  virtual void enterLe_op(SQLParser::Le_opContext *ctx) = 0;
  virtual void exitLe_op(SQLParser::Le_opContext *ctx) = 0;

  virtual void enterRegular_id(SQLParser::Regular_idContext *ctx) = 0;
  virtual void exitRegular_id(SQLParser::Regular_idContext *ctx) = 0;
};

}  // namespace antlr4


================================================
FILE: src/db/sqlengine/antlr/gen_parser.sh
================================================
#!/bin/sh
#****************************************************************#
# ScriptName: gen_parser.sh
# Author: fancy.lf
# Function: command to generate antlr sql parser code in se directory
#***************************************************************#

java -jar ../../../../../thirdparty/antlr/antlr-4.8-complete.jar -Dlanguage=Cpp -package antlr4 SQLLexer.g4 SQLParser.g4 -o gen
sed -i 's/\bu8"/"/g' gen/*.cc


================================================
FILE: src/db/sqlengine/common/generic_node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>

namespace zvec::sqlengine {

template <typename NodeOp, typename Node>
class Generic_Node {
 public:
  using Ptr = std::shared_ptr<Node>;

  Generic_Node(NodeOp m_op);
  virtual ~Generic_Node() = default;

  void set_left(Ptr m_left);
  void set_right(Ptr m_right);
  const Ptr &left() const {
    return left_;
  }
  const Ptr &right() const {
    return right_;
  }
  Node *left_node() const {
    return left_.get();
  }
  Node *right_node() const {
    return right_.get();
  }
  void set_parent(Generic_Node *m_parent);
  Generic_Node *parent();

  virtual NodeOp op() const {
    return op_;
  }
  virtual void set_op(NodeOp value) {
    op_ = value;
  }
  virtual std::string text() const = 0;

 protected:
  NodeOp op_;
  Ptr left_{nullptr};
  Ptr right_{nullptr};
  Generic_Node *parent_{nullptr};
};

template <typename NodeOp, typename Node>
Generic_Node<NodeOp, Node>::Generic_Node(NodeOp m_op) {
  op_ = m_op;
}

template <typename NodeOp, typename Node>
void Generic_Node<NodeOp, Node>::set_left(Ptr m_left) {
  left_ = std::move(m_left);
  if (left_ != nullptr) {
    left_->set_parent(this);
  }
}
template <typename NodeOp, typename Node>
void Generic_Node<NodeOp, Node>::set_right(Ptr m_right) {
  right_ = std::move(m_right);
  if (right_ != nullptr) {
    right_->set_parent(this);
  }
}
template <typename NodeOp, typename Node>
void Generic_Node<NodeOp, Node>::set_parent(Generic_Node<NodeOp, Node> *value) {
  this->parent_ = value;
}
template <typename NodeOp, typename Node>
Generic_Node<NodeOp, Node> *Generic_Node<NodeOp, Node>::parent() {
  return parent_;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/common/group_by.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/doc.h>

namespace zvec::sqlengine {

struct GroupBy {
  using Ptr = std::shared_ptr<GroupBy>;

  GroupBy() = default;

  GroupBy(std::string group_by_field, uint32_t group_topk, uint32_t group_count)
      : group_by_field(std::move(group_by_field)),
        group_topk(group_topk),
        group_count(group_count) {}

  std::string to_string() {
    return ailego::StringHelper::Concat("field[", group_by_field, "] topk[",
                                        group_topk, "] count[", group_count,
                                        "]");
  }

  std::string group_by_field;
  uint32_t group_topk{0};
  uint32_t group_count{0};
};


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/common/util.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "util.h"
#include <sys/time.h>
#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>
#include <arrow/type.h>
#include <arrow/type_fwd.h>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec::sqlengine {

// std::string
// Util::trim(const std::string str, char c) {
//    std::string tmp = str.substr(str.find_first_not_of(c));
//  return tmp.substr(0, tmp.find_last_not_of(c) + 1);
//}

// above implementation will trim more than one c at front or rear. This is not
// expected.
// below implementation remove both side only one matched char, strictly.
// str is supposed to match on both side at same time. remove both side one byte
// each as c = 0
std::string Util::trim_one_both_side(const std::string &str, unsigned char c) {
  int len = str.length();
  if (len < 2) {
    return str;
  }

  if (str.at(0) == c && str.at(len - 1) == c) {
    return str.substr(1, len - 2);
  }

  return str;
}

void Util::string_replace(const std::string &src, const std::string &dst,
                          std::string *str) {
  std::string::size_type pos = 0;
  std::string::size_type srclen = src.size();
  std::string::size_type dstlen = dst.size();

  while ((pos = str->find(src, pos)) != std::string::npos) {
    str->replace(pos, srclen, dst);
    pos += dstlen;
  }

  return;
}

// normalize sql for parse result after parse
std::string Util::normalize(const std::string &sql) {
  std::string new_sql = sql;
  // rule 1. replace \" with "
  Util::string_replace("\\\"", "\"", &new_sql);
  // rule 2. replace \' with ''
  Util::string_replace("\\\'", "\'", &new_sql);

  return new_sql;
}

std::shared_ptr<arrow::Schema> Util::append_field(
    const arrow::Schema &schema, const std::string &name,
    std::shared_ptr<arrow::DataType> type) {
  auto res = schema.AddField(schema.num_fields(), arrow::field(name, type));
  return res.MoveValueUnsafe();
}

std::shared_ptr<arrow::DataType> Util::sparse_type() {
  return arrow::struct_(arrow::FieldVector{
      arrow::field("index", arrow::binary()),
      arrow::field("value", arrow::binary()),
  });
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/common/util.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <string>
#include <arrow/api.h>
#include <arrow/record_batch.h>

namespace zvec::sqlengine {

static const constexpr char *kFieldScore = "_zvec_score";
static const constexpr char *kFieldVector = "_zvec_vector";
static const constexpr char *kFieldSparseIndices = "_zvec_sindices";
static const constexpr char *kFieldSparseValues = "_zvec_svalues";
static const constexpr char *kFieldIsValid = "_zvec_is_valid";
static const constexpr char *kFieldGroupId = "_zvec_group_id";

static const inline std::string kCheckNotFiltered = "check_not_filtered";
static const inline std::string kFetchVector = "fetch_vector";
static const inline std::string kFetchSparseVector = "fetch_sparse_vector";
static const inline std::string kContainAll = "contain_all";
static const inline std::string kContainAny = "contain_any";

static const inline std::string kFuncArrayLength = "array_length";

#define enum_to_string(x) #x

class Util {
 public:
  static std::string trim_one_both_side(const std::string &str,
                                        unsigned char c);
  static void string_replace(const std::string &strsrc,
                             const std::string &strdst, std::string *str);
  static std::string normalize(const std::string &sql);

  static std::shared_ptr<arrow::Schema> append_field(
      const arrow::Schema &schema, const std::string &name,
      std::shared_ptr<arrow::DataType> type);

  static std::shared_ptr<arrow::DataType> sparse_type();
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/base_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <memory>

namespace zvec::sqlengine {

class BaseInfo {
 public:
  using Ptr = std::shared_ptr<BaseInfo>;

  BaseInfo(const std::string &value) {
    table_name_ = value;
  }

  virtual ~BaseInfo() {}

  BaseInfo(const BaseInfo &info) {
    table_name_ = info.table_name_;
  }

  BaseInfo &operator=(const BaseInfo &info) {
    table_name_ = info.table_name_;
    return *this;
  }

  std::string table_name() {
    return table_name_;
  }

  bool validate() {
    return true;
  }

  const std::string &err_msg() {
    return err_msg_;
  }

  void set_err_msg(const std::string &value) {
    err_msg_ = value;
  }

  virtual std::string to_string() = 0;

 private:
  std::string table_name_{""};
  std::string err_msg_{""};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/case_changing_charstream.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include "CharStream.h"

namespace zvec::sqlengine {

using namespace antlr4;

class CaseChangingCharStream : public CharStream {
 public:
  // Constructs a new CaseChangingCharStream wrapping the given {@link
  // CharStream} forcing all characters to upper_ case or lower case.
  // @param stream_ The stream_ to wrap.
  // @param upper_ If true force each symbol to upper_ case, otherwise force to
  // lower.
  CaseChangingCharStream(CharStream *m_stream, bool m_upper) {
    stream_ = m_stream;
    upper_ = m_upper;
  }

  std::string getText(const misc::Interval &interval) {
    return stream_->getText(interval);
  }

  void consume() {
    stream_->consume();
  }

  size_t LA(ssize_t i) {
    size_t c = stream_->LA(i);
    if (c <= 0) {
      return c;
    }
    if (upper_) {
      return toupper((int)c);
    }
    return tolower((int)c);
  }

  ssize_t mark() {
    return stream_->mark();
  }

  void release(ssize_t marker) {
    stream_->release(marker);
  }

  size_t index() {
    return stream_->index();
  }

  void seek(size_t m_index) {
    stream_->seek(m_index);
  }

  size_t size() {
    return stream_->size();
  }

  std::string getSourceName() const {
    return stream_->getSourceName();
  }

  std::string toString() const {
    return stream_->toString();
  }

 private:
  CharStream *stream_;
  bool upper_ = true;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/error_verbose_listener.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include "BaseErrorListener.h"

namespace zvec::sqlengine {

using namespace antlr4;

#define UNUSED(x) (void)x

class ErrorVerboseListener : BaseErrorListener {
 public:
  ErrorVerboseListener() {}
  ~ErrorVerboseListener() {}

  void syntaxError(Recognizer *recognizer, Token *offendingSymbol, size_t line,
                   size_t charPositionInLine, const std::string &msg,
                   std::exception_ptr e) {
    UNUSED(recognizer);
    UNUSED(offendingSymbol);
    UNUSED(e);

    err_msg_ = std::to_string(line) + " " + std::to_string(charPositionInLine) +
               " " + msg;
    return;
  }

  const std::string &err_msg() {
    return err_msg_;
  }

 private:
  std::string err_msg_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/node.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "node.h"
#include <assert.h>
#include <sstream>
#include "db/sqlengine/common/util.h"

namespace zvec::sqlengine {

Node::Node() : Generic_Node(NodeOp::T_NONE) {}

Node::Node(NodeOp m_op) : Generic_Node(m_op) {
  set_op(m_op);
}

void Node::set_op(NodeOp value) {
  Generic_Node<NodeOp, Node>::set_op(value);
  set_type_by_op();
}

std::string Node::op_name() const {
  return type_to_str(op_);
}

Node::NodeType Node::type() {
  return type_;
}

void Node::set_type_by_op() {
  NodeType node_type = NodeType::NO_TYPE;
  switch (op()) {
    case NodeOp::T_AND:
    case NodeOp::T_OR:
      node_type = NodeType::LOGIC_EXPR;
      break;

    case NodeOp::T_EQ:
    case NodeOp::T_NE:
    case NodeOp::T_GT:
    case NodeOp::T_GE:
    case NodeOp::T_LT:
    case NodeOp::T_LE:
    case NodeOp::T_LIKE:
    case NodeOp::T_IN:
    case NodeOp::T_CONTAIN_ALL:
    case NodeOp::T_CONTAIN_ANY:
    case NodeOp::T_IS_NULL:
    case NodeOp::T_IS_NOT_NULL:
      node_type = NodeType::REL_EXPR;
      break;

    case NodeOp::T_PLUS:
    case NodeOp::T_MINUS:
    case NodeOp::T_MUL:
    case NodeOp::T_DIV:
      node_type = NodeType::ARITH_EXPR;
      break;

    case NodeOp::T_FUNCTION_CALL:
      node_type = NodeType::FUNC;
      break;

    case NodeOp::T_RANGE_VALUE:
    case NodeOp::T_LIST_VALUE:
    case NodeOp::T_VECTOR_MATRIX_VALUE:
    case NodeOp::T_INT_VALUE:
    case NodeOp::T_FLOAT_VALUE:
    case NodeOp::T_STRING_VALUE:
    case NodeOp::T_BOOL_VALUE:
    case NodeOp::T_NULL_VALUE:
      node_type = NodeType::CONST;
      break;
    case NodeOp::T_ID:
      node_type = NodeType::ID;
      break;
    default:
      break;
  }

  type_ = node_type;
}

std::string Node::text() const {
  std::stringstream stream;
  switch (type_) {
    case NodeType::LOGIC_EXPR:
      stream << "(" << left()->text() << ") " << op_name() << " ("
             << right()->text() << ")";
      break;
    case NodeType::REL_EXPR:
      stream << left()->text() << op_name() << right()->text();
      break;
    default:
      break;
  }

  return stream.str();
}

std::string Node::to_string() {
  return text();
}

//========================================================================

RangeNode::RangeNode() : Node(NodeOp::T_RANGE_VALUE) {}

RangeNode::RangeNode(bool m_min_equal, bool m_max_equal) {
  set_op(NodeOp::T_RANGE_VALUE);
  min_equal_ = m_min_equal;
  max_equal_ = m_max_equal;
}

void RangeNode::set_min_equal(bool value) {
  min_equal_ = value;
}

void RangeNode::set_max_equal(bool value) {
  max_equal_ = value;
}

bool RangeNode::min_equal() {
  return min_equal_;
}
bool RangeNode::max_equal() {
  return max_equal_;
}

std::string RangeNode::text() const {
  return (min_equal_ ? "[" : "(") + left()->text() + "~" + right()->text() +
         (max_equal_ ? "]" : ")");
}

void RangeNode::set_child_op(NodeOp value) {
  child_op_ = value;
}

NodeOp RangeNode::child_op() {
  return child_op_;
}

//========================================================================

ConstantNode::ConstantNode(const std::string &m_value) {
  value_ = m_value;
}

void ConstantNode::set_value(const std::string &m_value) {
  value_ = m_value;
}
const std::string &ConstantNode::value() {
  return value_;
}

std::string ConstantNode::text() const {
  return value_;
}

//========================================================================

IDNode::IDNode(const std::string &m_value) {
  value_ = m_value;
  set_op(NodeOp::T_ID);
}

void IDNode::set_value(const std::string &m_value) {
  value_ = m_value;
}
const std::string &IDNode::value() {
  return value_;
}

std::string IDNode::text() const {
  return value_;
}

//========================================================================

FuncNode::FuncNode() : Node(NodeOp::T_FUNCTION_CALL) {}

void FuncNode::set_func_name_node(Node::Ptr func_name_node) {
  func_name_node_ = std::move(func_name_node);
}

const Node::Ptr &FuncNode::get_func_name_node() {
  return func_name_node_;
}

void FuncNode::add_argument(Node::Ptr argument_node) {
  arguments_.emplace_back(std::move(argument_node));
}

const std::vector<Node::Ptr> &FuncNode::arguments() {
  return arguments_;
}

std::string FuncNode::text() const {
  std::stringstream stream;
  stream << func_name_node_->text();
  stream << "(";

  int i = 0;
  for (auto argument : arguments_) {
    if (i > 0) {
      stream << ", ";
    }
    stream << argument->text();
    i++;
  }
  stream << ")";
  return stream.str();
}


//========================================================================

std::string InValueExprListNode::text() const {
  std::stringstream stream;
  if (exclude_) {
    stream << "NOT ";
  }

  stream << "(";

  int i = 0;
  for (auto in_value_expr : in_value_expr_list_) {
    if (i > 0) {
      stream << ", ";
    }
    stream << in_value_expr->text();
    i++;
  }
  stream << ")";
  return stream.str();
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <vector>
#include <zvec/db/query_params.h>
#include "db/sqlengine/common/generic_node.h"

namespace zvec::sqlengine {

enum class NodeOp {
  T_NONE,
  T_NOT,
  T_AND,
  T_OR,
  T_EQ,
  T_NE,
  T_GT,
  T_GE,
  T_LT,
  T_LE,
  T_BETWEEN,
  T_LIKE,
  T_IN,
  T_CONTAIN_ALL,
  T_CONTAIN_ANY,
  T_IS_NULL,
  T_IS_NOT_NULL,
  T_PLUS,
  T_MINUS,
  T_MUL,
  T_DIV,
  T_FUNCTION_CALL,
  T_RANGE_VALUE,
  T_LIST_VALUE,
  T_VECTOR_MATRIX_VALUE,
  T_INT_VALUE,
  T_FLOAT_VALUE,
  T_STRING_VALUE,
  T_NULL_VALUE,
  T_ID,
  T_BOOL_VALUE
};

class Node : public Generic_Node<NodeOp, Node> {
 public:
  using Ptr = std::shared_ptr<Node>;

  static inline std::string type_to_str(NodeOp c) {
    static std::string names[] = {"NONE",
                                  "!",
                                  "and",
                                  "or",
                                  "=",
                                  "!=",
                                  ">",
                                  ">=",
                                  "<",
                                  "<=",
                                  "BETWEEN",
                                  " LIKE ",
                                  " IN ",
                                  " CONTAIN_ALL ",
                                  " CONTAIN_ANY ",
                                  "IS_NULL",
                                  "IS_NOT_NULL",
                                  "+",
                                  "-",
                                  "*",
                                  "/",
                                  "FUNCTION_CALL",
                                  "RANGE_VALUE",
                                  "LIST_VALUE",
                                  "VECTOR_MATRIX_VALUE",
                                  "VECTOR_FEATURES_VALUE",
                                  "INT_VALUE",
                                  "FLOAT_VALUE",
                                  "STRING_VALUE",
                                  "NULL_VALUE",
                                  "ID",
                                  "BOOL_VALUE"};

    return names[static_cast<int>(c)];
  }

  enum class NodeType {
    NO_TYPE,
    LOGIC_EXPR,
    REL_EXPR,
    ENCLOSED_ARITH_EXPR,
    ARITH_EXPR,
    FUNC,
    CONST,
    ID
  };

 public:
  Node();
  Node(NodeOp op);
  ~Node() override = default;

  void set_op(NodeOp op) override;
  std::string op_name() const;

  NodeType type();

  virtual std::string text() const override;
  std::string to_string();

 private:
  void set_type_by_op();

 private:
  static const std::string node_op_names[];

 private:
  NodeType type_{NodeType::NO_TYPE};
};

class RangeNode : public Node {
 public:
  using Ptr = std::shared_ptr<RangeNode>;

  RangeNode();
  RangeNode(bool m_min_equal, bool m_max_equal);
  virtual ~RangeNode() = default;

  void set_min_equal(bool value);
  void set_max_equal(bool value);

  bool min_equal();
  bool max_equal();

  std::string text() const override;
  void set_child_op(NodeOp value);
  NodeOp child_op();

 private:
  bool min_equal_{false}, max_equal_{false};
  NodeOp child_op_{NodeOp::T_NONE};
};

class VectorMatrixNode : public Node {
 public:
  using Ptr = std::shared_ptr<VectorMatrixNode>;

  VectorMatrixNode(std::string matrix, std::string sparse_indices,
                   std::string sparse_values, QueryParams::Ptr query_params)
      : matrix_(std::move(matrix)),
        sparse_indices_(std::move(sparse_indices)),
        sparse_values_(std::move(sparse_values)),
        query_params_(std::move(query_params)) {
    set_op(NodeOp::T_VECTOR_MATRIX_VALUE);
  }

  const std::string &matrix() const {
    return matrix_;
  }

  const std::string &sparse_indices() const {
    return sparse_indices_;
  }

  const std::string &sparse_values() const {
    return sparse_values_;
  }

  const QueryParams::Ptr &query_params() const {
    return query_params_;
  }

  std::string text() const override {
    // do not distinguish between matrix and vector
    static std::string txt = "[...]";
    return txt;
  }

 private:
  std::string matrix_;
  std::string sparse_indices_;
  std::string sparse_values_;
  QueryParams::Ptr query_params_;
};

class ConstantNode : public Node {
 public:
  using Ptr = std::shared_ptr<ConstantNode>;

  ConstantNode(const std::string &m_value);

  void set_value(const std::string &m_value);
  const std::string &value();

  std::string text() const override;

 private:
  std::string value_{""};
};

class IDNode : public Node {
 public:
  using Ptr = std::shared_ptr<IDNode>;

  IDNode(const std::string &m_value);

  void set_value(const std::string &m_value);
  const std::string &value();

  std::string text() const override;

 private:
  std::string value_{""};
};

class FuncNode : public Node {
 public:
  using Ptr = std::shared_ptr<FuncNode>;

  FuncNode();
  virtual ~FuncNode() = default;

  void set_func_name_node(Node::Ptr func_name_node);
  const Node::Ptr &get_func_name_node();

  void add_argument(Node::Ptr argument_node);
  const std::vector<Node::Ptr> &arguments();

  std::string text() const override;

 private:
  Node::Ptr func_name_node_{nullptr};
  std::vector<Node::Ptr> arguments_{};
};

class InValueExprListNode : public Node {
 public:
  using Ptr = std::shared_ptr<InValueExprListNode>;

  InValueExprListNode() : Node(NodeOp::T_LIST_VALUE) {}

  void add_in_value_expr(Node::Ptr in_value_expr) {
    in_value_expr_list_.emplace_back(std::move(in_value_expr));
  }

  const std::vector<Node::Ptr> &in_value_expr_list() {
    return in_value_expr_list_;
  }

  bool exclude() {
    return exclude_;
  }

  void set_exclude(bool val) {
    exclude_ = val;
  }

  std::string text() const override;

 private:
  std::vector<Node::Ptr> in_value_expr_list_{};
  bool exclude_{false};
};


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/orderby_elem_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include "node.h"

namespace zvec::sqlengine {

class OrderByElemInfo {
 public:
  using Ptr = std::shared_ptr<OrderByElemInfo>;

  OrderByElemInfo() = default;

  const std::string &field_name() {
    return field_name_;
  }

  void set_field_name(const std::string &value) {
    field_name_ = value;
  }

  void set_desc() {
    desc_ = true;
  }

  bool is_desc() {
    return desc_;
  }

  std::string to_string() {
    std::string str = field_name_ + " " + (desc_ ? "DESC" : "ASC");
    return str;
  }

 private:
  std::string field_name_{""};
  bool desc_{false};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/query_parser.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_parser.h"

namespace zvec::sqlengine {

SQLInfo::Ptr QueryParser::parse(const std::string &query) {
  ZVecSQLParser se_sql_parser_;

  SQLInfo::Ptr sql_info = se_sql_parser_.parse(query);
  if (sql_info == NULL) {
    err_msg_ = se_sql_parser_.err_msg();
    return NULL;
  }

  return sql_info;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/query_parser.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "sql_info.h"
#include "zvec_sql_parser.h"

namespace zvec::sqlengine {

class QueryParser {
 public:
  SQLInfo::Ptr parse(const std::string &query);

  const std::string &err_msg() {
    return err_msg_;
  }

 private:
  std::string err_msg_{""};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/select_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "select_info.h"

namespace zvec::sqlengine {

SelectInfo::SelectInfo(const std::string &m_table_name)
    : BaseInfo(m_table_name) {}

SelectInfo::~SelectInfo() {}

SelectInfo::SelectInfo(const SelectInfo &info) : BaseInfo(info) {
  if (info.selected_elems_.empty() == false) {
    for (auto iter = info.selected_elems_.begin();
         iter != info.selected_elems_.end(); iter++) {
      add_selected_elem(std::make_shared<SelectedElemInfo>(*(*iter)));
    }
  }

  if (info.orderby_elems_.empty() == false) {
    for (auto iter = info.orderby_elems_.begin();
         iter != info.orderby_elems_.end(); iter++) {
      add_order_by_elem(std::make_shared<OrderByElemInfo>(*(*iter)));
    }
  }

  search_cond_ = copy_node(info.search_cond_);

  limit_ = info.limit_;
}

SelectInfo &SelectInfo::operator=(const SelectInfo &info) {
  if (info.selected_elems_.empty() == false) {
    for (auto iter = info.selected_elems_.begin();
         iter != info.selected_elems_.end(); iter++) {
      add_selected_elem(std::make_shared<SelectedElemInfo>(*(*iter)));
    }
  }

  if (info.orderby_elems_.empty() == false) {
    for (auto iter = info.orderby_elems_.begin();
         iter != info.orderby_elems_.end(); iter++) {
      add_order_by_elem(std::make_shared<OrderByElemInfo>(*(*iter)));
    }
  }

  search_cond_ = copy_node(info.search_cond_);

  limit_ = info.limit_;

  return *this;
}

Node::Ptr SelectInfo::copy_node(const Node::Ptr &node) {
  Node::Ptr new_node = nullptr;

  if (node == nullptr) {
    return nullptr;
  }

  if (node->op() == NodeOp::T_INT_VALUE ||
      node->op() == NodeOp::T_FLOAT_VALUE ||
      node->op() == NodeOp::T_STRING_VALUE ||
      node->op() == NodeOp::T_NULL_VALUE ||
      node->op() == NodeOp::T_BOOL_VALUE) {
    ConstantNode::Ptr constant_node =
        std::dynamic_pointer_cast<ConstantNode>(node);
    new_node = std::make_shared<ConstantNode>(constant_node->value());
  } else if (node->op() == NodeOp::T_ID) {
    IDNode::Ptr id_node = std::dynamic_pointer_cast<IDNode>(node);
    new_node = std::make_shared<IDNode>(id_node->value());
  } else if (node->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {
    VectorMatrixNode::Ptr vector_node =
        std::dynamic_pointer_cast<VectorMatrixNode>(node);
    new_node = std::make_shared<VectorMatrixNode>(
        vector_node->matrix(), vector_node->sparse_indices(),
        vector_node->sparse_values(), vector_node->query_params());
  } else if (node->op() == NodeOp::T_FUNCTION_CALL) {
    FuncNode::Ptr func_node = std::dynamic_pointer_cast<FuncNode>(node);
    FuncNode::Ptr new_func_node = std::make_shared<FuncNode>();
    new_func_node->set_func_name_node(
        copy_node(func_node->get_func_name_node()));
    for (auto argument : func_node->arguments()) {
      new_func_node->add_argument(copy_node(argument));
    }
    new_node = std::move(new_func_node);
  } else { /* others are normal Node */
    new_node = std::make_shared<Node>();
  }


  // copy nodeOp
  new_node->set_op(node->op());

  // copy left & right
  if (node->left() != nullptr) {
    new_node->set_left(copy_node(node->left()));
  }
  if (node->right() != nullptr) {
    new_node->set_right(copy_node(node->right()));
  }

  return new_node;
}

void SelectInfo::add_selected_elem(SelectedElemInfo::Ptr selected_elem_info) {
  selected_elems_.push_back(std::move(selected_elem_info));
}

void SelectInfo::add_order_by_elem(OrderByElemInfo::Ptr orderby_elem_info) {
  orderby_elems_.push_back(std::move(orderby_elem_info));
}

void SelectInfo::set_limit(int value) {
  limit_ = value;
}

void SelectInfo::set_search_cond(Node::Ptr cond) {
  search_cond_ = std::move(cond);
}

const std::vector<SelectedElemInfo::Ptr> &SelectInfo::selected_elems() {
  return selected_elems_;
}
const std::vector<OrderByElemInfo::Ptr> &SelectInfo::orderby_elems() {
  return orderby_elems_;
}

int SelectInfo::limit() {
  return limit_;
}

const Node::Ptr &SelectInfo::search_cond() const {
  return search_cond_;
}

Node::Ptr &SelectInfo::mutable_search_cond() {
  return search_cond_;
}

std::string SelectInfo::to_string() {
  std::string str;

  str += "table: " + table_name();
  str += "\n";

  if (selected_elems_.empty() == false) {
    str += "SelectedElems: ";
    for (auto iter = selected_elems_.begin(); iter != selected_elems_.end();
         iter++) {
      if (iter != selected_elems_.begin()) {
        str += ", ";
      }
      str += (*iter)->to_string();
    }
    str += "\n";
  }

  if (include_vector_) {
    str += "Include Vector: true";
    str += "\n";
  }

  if (search_cond_ != nullptr) {
    str += "Search Condition: ";
    str += search_cond_->text();
    str += "\n";
  }

  if (orderby_elems_.empty() == false) {
    str += "Orderby Elems: ";
    for (auto iter = orderby_elems_.begin(); iter != orderby_elems_.end();
         iter++) {
      if (iter != orderby_elems_.begin()) {
        str += ", ";
      }
      str += (*iter)->to_string();
    }
    str += "\n";
  }

  if (limit_ != -1) {
    str += "limit: " + std::to_string(limit_) + " ";
    str += "\n";
  }

  return str;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/select_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <memory>
#include <vector>
#include "db/sqlengine/common/group_by.h"
#include "base_info.h"
#include "node.h"
#include "orderby_elem_info.h"
#include "selected_elem_info.h"

namespace zvec::sqlengine {

class SelectInfo : public BaseInfo {
 public:
  using Ptr = std::shared_ptr<SelectInfo>;

  SelectInfo(const std::string &m_table_name);
  ~SelectInfo();

  SelectInfo(const SelectInfo &info);
  SelectInfo &operator=(const SelectInfo &info);

  const std::vector<SelectedElemInfo::Ptr> &selected_elems();
  const std::vector<OrderByElemInfo::Ptr> &orderby_elems();
  int limit();
  const Node::Ptr &search_cond() const;
  Node::Ptr &mutable_search_cond();

  void add_selected_elem(SelectedElemInfo::Ptr selected_elem_info);
  void add_order_by_elem(OrderByElemInfo::Ptr orderby_elem_info);
  void set_limit(int value);
  void set_search_cond(Node::Ptr cond);

  void set_include_vector(bool value) {
    include_vector_ = value;
  }

  bool include_vector() {
    return include_vector_;
  }

  void set_include_doc_id(bool value) {
    include_doc_id_ = value;
  }

  bool is_include_doc_id() {
    return include_doc_id_;
  }

  void set_group_by(GroupBy::Ptr group_by) {
    group_by_ = std::move(group_by);
  }
  const GroupBy::Ptr &group_by() const {
    return group_by_;
  }

  std::string to_string();

 private:
  Node::Ptr copy_node(const Node::Ptr &node);

 private:
  std::vector<SelectedElemInfo::Ptr> selected_elems_{};
  std::vector<OrderByElemInfo::Ptr> orderby_elems_{};
  Node::Ptr search_cond_{nullptr};
  GroupBy::Ptr group_by_{};
  int limit_{-1};
  bool include_vector_{false};
  bool include_doc_id_{false};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/selected_elem_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "selected_elem_info.h"

namespace zvec::sqlengine {

std::string SelectedElemInfo::to_string() const {
  std::string str = "";
  if (is_asterisk()) {
    str += "*";
  } else if (is_func_call()) {
    if (is_func_param_asterisk()) {
      str += func_name_ + "(*)";
    } else {
      str += func_name_ + "(" + func_param_ + ")";
    }
  } else {
    str = field_name_;
    if (!alias().empty()) {
      str += " as " + alias();
    }
  }

  return str;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/selected_elem_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include "node.h"

namespace zvec::sqlengine {

class SelectedElemInfo {
 public:
  using Ptr = std::shared_ptr<SelectedElemInfo>;

  void set_asterisk(const bool value) {
    asterisk_ = value;
  }

  bool is_asterisk() const {
    return asterisk_;
  }

  void set_empty(const bool value) {
    empty_ = value;
  }

  bool is_empty() const {
    return empty_;
  }

  void set_field_name(const std::string &value) {
    field_name_ = value;
  }

  const std::string &field_name() const {
    return field_name_;
  }

  void set_alias(const std::string &value) {
    alias_ = value;
  }

  const std::string &alias() const {
    return alias_;
  }

  const std::string &func_name() const {
    return func_name_;
  }

  void set_func_name(const std::string &value) {
    func_name_ = value;
    if (!value.empty()) {
      func_call_ = true;
    }
  }

  const std::string &func_param() const {
    return func_param_;
  }

  void set_func_param(const std::string &value) {
    func_param_ = value;
  }

  bool is_func_call() const {
    return func_call_;
  }

  void set_func_param_asterisk(bool value) {
    func_param_asterisk_ = value;
  }
  bool is_func_param_asterisk() const {
    return func_param_asterisk_;
  }

  std::string to_string() const;

 private:
  bool asterisk_{false};
  bool empty_{false};

  std::string field_name_{""};
  std::string alias_{""};

  std::string func_name_{""};
  bool func_call_{false};
  std::string func_param_{""};
  bool func_param_asterisk_{false};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/sql_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sql_info.h"
#include <memory>
#include "db/sqlengine/common/util.h"
#include "select_info.h"


namespace zvec::sqlengine {

SQLInfo::SQLInfo(SQLType m_type, BaseInfo::Ptr m_base_info) {
  type_ = m_type;
  base_info_ = m_base_info;
}

SQLInfo::~SQLInfo() {}

SQLInfo::SQLInfo(const SQLInfo &info) {
  type_ = info.type_;
  if (type_ == SQLType::SELECT) {
    SelectInfo::Ptr select_info = std::make_shared<SelectInfo>(
        *(std::dynamic_pointer_cast<SelectInfo>(info.base_info_)));
    base_info_ = select_info;
  } else {
    base_info_ = nullptr;
  }
}

SQLInfo &SQLInfo::operator=(const SQLInfo &info) {
  type_ = info.type_;
  if (type_ == SQLType::SELECT) {
    SelectInfo::Ptr select_info = std::make_shared<SelectInfo>(
        *(std::dynamic_pointer_cast<SelectInfo>(info.base_info_)));
    base_info_ = select_info;
  } else {
    base_info_ = nullptr;
  }

  return *this;
}

void SQLInfo::set_base_info(BaseInfo::Ptr value) {
  base_info_ = std::move(value);
}

const BaseInfo::Ptr &SQLInfo::base_info() const {
  return base_info_;
}

void SQLInfo::set_type(SQLType value) {
  type_ = value;
}

SQLInfo::SQLType SQLInfo::type() const {
  return type_;
}

std::string SQLInfo::type_name() const {
  return type_to_str(type_);
}

std::string SQLInfo::to_string() {
  std::string str = "SQL Info: {\n";
  str += "Type: " + type_name();
  str += "\n";
  str += "Info:";
  str += "\n";
  str += base_info_->to_string();
  str += "}";
  return str;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/sql_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include "base_info.h"

namespace zvec::sqlengine {

class SQLInfo {
 public:
  using Ptr = std::shared_ptr<SQLInfo>;

  enum class SQLType {
    NONE,
    INSERT,
    UPSERT,
    UPDATE,
    DELETE,
    CREATE,
    DROP,
    SELECT,
    SHOW_TABLES
  };
  static inline std::string type_to_str(SQLType c) {
    static std::string names[] = {"NONE",   "INSERT", "UPSERT",
                                  "UPDATE", "DELETE", "CREATE",
                                  "DROP",   "SELECT", "SHOW_TABLES"};
    return names[static_cast<int>(c)];
  }

 public:
  SQLInfo(SQLType type, BaseInfo::Ptr m_base_info);
  ~SQLInfo();

  SQLInfo(const SQLInfo &info);
  SQLInfo &operator=(const SQLInfo &info);

  void set_base_info(BaseInfo::Ptr value);
  void set_type(SQLType value);

  SQLType type() const;
  std::string type_name() const;
  const BaseInfo::Ptr &base_info() const;

  std::string to_string();

 private:
  SQLType type_{SQLType::NONE};
  BaseInfo::Ptr base_info_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/sql_info_helper.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sql_info_helper.h"
#include <stdint.h>
#include <memory>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/db/doc.h>
#include "db/sqlengine/common/group_by.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/parser/node.h"
#include "select_info.h"

namespace zvec::sqlengine {

using namespace zvec;

Node::Ptr handle_vector(const VectorQuery &request, std::string * /*err_msg*/) {
  Node::Ptr rel_exp = std::make_shared<Node>(NodeOp::T_EQ);
  rel_exp->set_left(std::make_shared<IDNode>(request.field_name_));
  rel_exp->set_right(std::make_shared<VectorMatrixNode>(
      request.query_vector_, request.query_sparse_indices_,
      request.query_sparse_values_, request.query_params_));
  return rel_exp;
}

void handle_query_field(const VectorQuery *query, SelectInfo *selected_info) {
  if (!query->output_fields_.has_value()) {
    SelectedElemInfo::Ptr selected_elem_info =
        std::make_shared<SelectedElemInfo>();
    selected_elem_info->set_asterisk(true);
    selected_info->add_selected_elem(std::move(selected_elem_info));
  } else if (query->output_fields_->empty()) {
    // select no field if output_fields is specified with empty vector
    SelectedElemInfo::Ptr selected_elem_info =
        std::make_shared<SelectedElemInfo>();
    selected_elem_info->set_empty(true);
    selected_info->add_selected_elem(std::move(selected_elem_info));
  } else {
    for (const auto &field : *query->output_fields_) {
      SelectedElemInfo::Ptr selected_elem_info =
          std::make_shared<SelectedElemInfo>();
      if (field == "*") {
        selected_elem_info->set_asterisk(true);
      } else {
        selected_elem_info->set_field_name(field);
      }
      selected_info->add_selected_elem(std::move(selected_elem_info));
    }
  }
}

bool SQLInfoHelper::MessageToSQLInfo(const VectorQuery *query,
                                     Node::Ptr filter_node,
                                     std::shared_ptr<GroupBy> group_by,
                                     sqlengine::SQLInfo::Ptr *sql_info,
                                     std::string *err_msg) {
  Node::Ptr index_params_node_ptr = nullptr;
  if (!query->query_vector_.empty() || !query->query_sparse_indices_.empty()) {
    index_params_node_ptr = handle_vector(*query, err_msg);
    if (index_params_node_ptr == nullptr) {
      return false;
    }
  }

  Node::Ptr cond_expr = nullptr;
  if (index_params_node_ptr && filter_node) {
    cond_expr = std::make_shared<Node>(NodeOp::T_AND);
    cond_expr->set_left(index_params_node_ptr);
    cond_expr->set_right(filter_node);
  } else if (index_params_node_ptr) {
    cond_expr = index_params_node_ptr;
  } else if (filter_node) {
    cond_expr = filter_node;
  }

  SelectInfo::Ptr select_info = std::make_shared<SelectInfo>("");
  handle_query_field(query, select_info.get());
  select_info->set_search_cond(cond_expr);

  uint32_t topk = query->topk_;
  select_info->set_limit(topk);
  select_info->set_include_vector(query->include_vector_);
  select_info->set_include_doc_id(query->include_doc_id_);

  select_info->set_group_by(std::move(group_by));
  //
  // for (int i = 0; i < query->order_by_fields_size(); ++i) {
  //   auto orderby_elem_info = std::make_shared<OrderByElemInfo>();
  //   orderby_elem_info->set_field_name(query->order_by_fields(i).field());
  //   if (query->order_by_fields(i).desc()) {
  //     orderby_elem_info->set_desc();
  //   }
  //   select_info->add_order_by_elem(std::move(orderby_elem_info));
  // }

  *sql_info = std::make_shared<SQLInfo>(SQLInfo::SQLType::SELECT, select_info);
  return true;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/sql_info_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/db/doc.h>
#include "db/sqlengine/common/group_by.h"
#include "db/sqlengine/parser/node.h"
#include "db/sqlengine/parser/sql_info.h"

namespace zvec::sqlengine {

class SQLInfoHelper {
 public:
  //! Perform QueryRequest to sql info conversion:
  static bool MessageToSQLInfo(const VectorQuery *query, Node::Ptr filter_node,
                               std::shared_ptr<GroupBy> group_by,
                               sqlengine::SQLInfo::Ptr *sql_info,
                               std::string *err_msg);
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_cached_sql_parser.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec_cached_sql_parser.h"
#include <exception>
#include <typeinfo>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/string_helper.h>
#include "atn/ParserATNSimulator.h"
#include "db/sqlengine/antlr/gen/SQLLexer.h"
#include "db/sqlengine/antlr/gen/SQLParser.h"
#include "db/sqlengine/common/util.h"
#include "case_changing_charstream.h"
#include "error_verbose_listener.h"
#include "node.h"
#include "select_info.h"
#include "selected_elem_info.h"

using namespace antlr4;
using namespace tree;
using namespace atn;

namespace zvec::sqlengine {

std::unordered_map<std::string, SQLInfo::Ptr>
    ZVecCachedSQLParser::sql_info_map_{};
std::unordered_map<std::string, Node::Ptr> ZVecCachedSQLParser::filter_map_;
uint32_t ZVecCachedSQLParser::Hit{0};
uint32_t ZVecCachedSQLParser::Miss{0};

ZVecCachedSQLParser::ZVecCachedSQLParser(uint32_t cache_count)
    : cache_count_(cache_count) {}

ZVecCachedSQLParser::~ZVecCachedSQLParser() {}

SQLInfo::Ptr ZVecCachedSQLParser::parse(const std::string &query,
                                        bool need_formatted_tree) {
  std::string query_cache_key{""};
  SQLInfo::Ptr cached_sql_info = get_from_cache(query, &query_cache_key);
  if (cached_sql_info != nullptr) {
    return cached_sql_info;
  }

  SQLInfo::Ptr new_sql_info = real_parser_.parse(query, need_formatted_tree);
  if (new_sql_info == nullptr) {
    // no need to cache parse failed sql. just return.
    err_msg_ = real_parser_.err_msg();
    return nullptr;
  }

  put_into_cache(query_cache_key, new_sql_info);

  return new_sql_info;
}

void ZVecCachedSQLParser::put_into_cache(const std::string &query_cache_key,
                                         const SQLInfo::Ptr &new_sql_info) {
  {
    std::unique_lock guard(shared_mutex_);
    if (sql_info_map_.size() >= cache_count_) {
      // if full, clear to refresh new sql
      sql_info_map_.clear();
      Hit = Miss = 0;
    }
    sql_info_map_.emplace(query_cache_key, new_sql_info);
  }

  LOG_DEBUG("cache emplaced. [%s] [%s] ", query_cache_key.c_str(),
            new_sql_info->to_string().c_str());

  return;
}

SQLInfo::Ptr ZVecCachedSQLParser::get_from_cache(const std::string &query,
                                                 std::string *query_cache_key) {
  // find [ and ], must only one occurrence.
  std::string::size_type left_pos, right_pos;
  left_pos = query.find("[");
  if (left_pos == query.npos) {
    return nullptr;
  }
  // find from left_pos+1
  right_pos = query.rfind("]");
  if (right_pos == query.npos) {
    return nullptr;
  }

  // ok, let's find it.
  *query_cache_key = query.substr(0, left_pos);
  query_cache_key->append(query.begin() + right_pos + 1, query.end());
  std::string vector_text = query.substr(left_pos, right_pos - left_pos + 1);

  SQLInfo::Ptr cached_sql_info = nullptr;
  SQLInfo::Ptr copied_sql_info = nullptr;
  {  // lock only in this block. after sql_info is copied, just unlock.
    std::shared_lock guard(shared_mutex_);
    auto iter = sql_info_map_.find(*query_cache_key);
    if (iter == sql_info_map_.end()) {
      ++Miss;
      LOG_DEBUG("cache miss. key: [%s]", query_cache_key->c_str());
      return nullptr;
    }

    cached_sql_info = iter->second;
    // copy cached_sql_info
    copied_sql_info = std::make_shared<SQLInfo>(*cached_sql_info);
  }

  // parse vector part
  Node::Ptr vector_node = parse_vector_text(&vector_text);
  if (vector_node == nullptr) {
    LOG_DEBUG("wrong vector format: [%s]", vector_text.c_str());
    return nullptr;
  }
  // replace vector in copied_sql_info
  if (replace_vector_node(copied_sql_info, vector_node) != 0) {
    LOG_WARN("replace_vector_node failed. [%s][%s]", query.c_str(),
             vector_text.c_str());
    return nullptr;
  }

  ++Hit;
  LOG_DEBUG("cache hit. key: [%s] sql_info: [%s]", query_cache_key->c_str(),
            copied_sql_info->to_string().c_str());
  return copied_sql_info;
}

int ZVecCachedSQLParser::replace_vector_node(SQLInfo::Ptr cached_sql_info,
                                             Node::Ptr vector_node) {
  SelectInfo::Ptr cached_select_info =
      std::dynamic_pointer_cast<SelectInfo>(cached_sql_info->base_info());
  if (cached_select_info == nullptr) {
    LOG_WARN("wrong select_info in cache. [%s]",
             cached_sql_info->to_string().c_str());
    return -1;
  }

  Node::Ptr search_cond = cached_select_info->mutable_search_cond();
  if (search_cond == nullptr) {
    LOG_WARN("wrong search_cond in cache. [%s]",
             cached_sql_info->to_string().c_str());
    return -1;
  }

  replace_flag_ = false;
  if (traverse_to_replace(search_cond, vector_node) != 0 ||
      replace_flag_ == false) {
    LOG_WARN("replace search_cond in cache failed. [%s]",
             cached_sql_info->to_string().c_str());
    return -1;
  }

  return 0;
}

int ZVecCachedSQLParser::traverse_to_replace(Node::Ptr ptr,
                                             Node::Ptr vector_node) {
  if (ptr->op() == NodeOp::T_VECTOR_MATRIX_VALUE) {
    Node *parent = dynamic_cast<Node *>(ptr->parent());
    if (parent == nullptr) {
      LOG_WARN("wrong parent node in cache. [%s]", ptr->to_string().c_str());
      return -1;
    }
    if (parent->left() == ptr) {
      parent->set_left(vector_node);
      replace_flag_ = true;
    } else if (parent->right() == ptr) {
      parent->set_right(vector_node);
      replace_flag_ = true;
    } else {
      LOG_WARN("wrong node in cache. [%s]", ptr->to_string().c_str());
      return -1;
    }
    return 0;
  }

  if (ptr->left() != nullptr) {
    if (traverse_to_replace(ptr->left(), vector_node) < 0) {
      return -1;
    }
    if (replace_flag_) {
      return 0;
    }
  }
  if (ptr->right() != nullptr) {
    if (traverse_to_replace(ptr->right(), vector_node) != 0) {
      return -1;
    }
    if (replace_flag_) {
      return 0;
    }
  }

  return 0;
}

Node::Ptr ZVecCachedSQLParser::parse_filter(const std::string &filter,
                                            bool need_formatted_tree) {
  {
    std::shared_lock guard(shared_mutex_);
    auto iter = filter_map_.find(filter);
    if (iter != filter_map_.end()) {
      ++Hit;
      return iter->second;
    }
    ++Miss;
  }
  auto node = real_parser_.parse_filter(filter, need_formatted_tree);
  err_msg_ = real_parser_.err_msg();
  formatted_tree_ = real_parser_.formatted_tree();
  if (node != nullptr) {
    std::unique_lock guard(shared_mutex_);
    if (filter_map_.size() >= cache_count_) {
      // clear cache if full
      filter_map_.clear();
      Hit = Miss = 0;
    }
    filter_map_.emplace(filter, node);
  }
  return node;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_cached_sql_parser.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <shared_mutex>
#include <unordered_map>
#include "select_info.h"
#include "zvec_sql_parser.h"

namespace zvec::sqlengine {

class ZVecCachedSQLParser : public ZVecParser {
 public:
  ZVecCachedSQLParser(uint32_t cache_count);
  ~ZVecCachedSQLParser();

  SQLInfo::Ptr parse(const std::string &query,
                     bool need_formatted_tree = false) override;

  Node::Ptr parse_filter(const std::string &filter,
                         bool need_formatted_tree = false) override;

 private:
  void put_into_cache(const std::string &query_cache_key,
                      const SQLInfo::Ptr &sql_info);
  SQLInfo::Ptr get_from_cache(const std::string &query,
                              std::string *query_cache_key);

  int replace_vector_node(SQLInfo::Ptr cached_sql_info, Node::Ptr vector_node);
  int traverse_to_replace(Node::Ptr ptr, Node::Ptr vector_node);

 private:
  static std::unordered_map<std::string, SQLInfo::Ptr> sql_info_map_;
  static std::unordered_map<std::string, Node::Ptr> filter_map_;
  static uint32_t Hit;
  static uint32_t Miss;
  inline static std::shared_mutex shared_mutex_;

 private:
  bool replace_flag_{false};
  ZVecSQLParser real_parser_;
  uint32_t cache_count_{0};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_parser.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec_parser.h"
#include <float.h>
#include <stdint.h>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <typeinfo>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/string_helper.h>
#include "db/sqlengine/common/util.h"
#include "tree/ParseTree.h"
#include "zvec_cached_sql_parser.h"
#include "zvec_sql_parser.h"

using namespace antlr4;
using namespace tree;

namespace zvec::sqlengine {

const std::string &ZVecParser::err_msg() {
  return err_msg_;
}

std::string ZVecParser::trim(std::string &value) {
  value = Util::trim_one_both_side(value, '\'');
  value = Util::trim_one_both_side(value, '\"');
  // Util::string_replace(value, "\\", "");
  return value;
}

const std::string &ZVecParser::formatted_tree() {
  return formatted_tree_;
}

std::string ZVecParser::to_formatted_string_tree(void *tree, void *parser) {
  if (tree == NULL || parser == NULL) {
    return "";
  }

  ParseTree *parse_tree = reinterpret_cast<ParseTree *>(tree);
  ZVecParser *se_parser = reinterpret_cast<ZVecParser *>(parser);

  std::string tree_text = parse_tree->toStringTree(se_parser);

  int pos = 0, pos1 = 0, pos2 = 0, start = 0;
  int i = 0, num = 0;
  const std::string DELIMITER = "  ";
  const std::string LINE = "\n";
  int lastPos1 = 0;

  std::string out;

  while (true) {
    std::string formatted = "";

    pos1 = (int)tree_text.find_first_of('(', start);
    pos2 = (int)tree_text.find_first_of(')', start);

    if (pos1 == 0) {
      start = pos + 1;
      continue;
    }

    if (pos1 < 0 && pos2 < 0) {
      break;
    }

    if (pos1 >= 0 && pos1 < pos2) {
      if (lastPos1 == 1) {
        formatted += "(";
      }
      pos = pos1;
      formatted += tree_text.substr(start, (size_t)pos1 - start);
      num++;
    } else {
      if (lastPos1 == 1) {
        formatted += "(";
      }
      pos = pos2;
      formatted += tree_text.substr(start, (size_t)pos2 - start) + ")";
      num--;
    }

    formatted += LINE;
    for (i = 0; i < num; i++) {
      formatted += DELIMITER;
    }

    start = pos + 1;

    if (pos == pos1) {
      lastPos1 = 1;
    } else {
      lastPos1 = 0;
    }

    out += formatted;
  }

  return out;
}

void ZVecParser::save_to_file(const std::string &file_name,
                              const std::string &formatted) {
  std::ofstream outfile(file_name);
  outfile << formatted;
  outfile << std::endl;
  outfile.close();
}

ZVecParser::Ptr ZVecParser::create() {
  // TODO: support config
  // auto &config = zvec::Config::Instance();
  // int32_t cache_count = config.get_sql_info_cache_count();
  return create(100);
}

ZVecParser::Ptr ZVecParser::create(int cache_count) {
  // if not config, or if config between 0 and 100, upround to 100
  if (cache_count >= 0 && cache_count < DEFAULT_CACHE_COUNT) {
    cache_count = DEFAULT_CACHE_COUNT;
  }

  if (cache_count > 0) {
    LOG_DEBUG("ZVecCachedSQLParser enabled. effective cache_count %d",
              cache_count);
    return std::make_shared<ZVecCachedSQLParser>(cache_count);
  } else {
    LOG_DEBUG("ZVecCachedSQLParser disabled.");
    return std::make_shared<ZVecSQLParser>();
  }
}

Node::Ptr ZVecParser::parse_vector_text(std::string *vector_text) {
  return std::make_shared<VectorMatrixNode>(std::move(*vector_text), "", "",
                                            nullptr);
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_parser.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "node.h"
#include "sql_info.h"

namespace zvec::sqlengine {

using VoidPtr = void *;

class ZVecParser {
 public:
  using Ptr = std::shared_ptr<ZVecParser>;

  ZVecParser() = default;
  virtual ~ZVecParser() {};

  virtual SQLInfo::Ptr parse(const std::string &query,
                             bool formatted_tree = false) = 0;

  virtual Node::Ptr parse_filter(const std::string &filter,
                                 bool need_formatted_tree = false) = 0;


 protected:
  std::string trim(std::string &value);
  virtual std::string to_formatted_string_tree(void *tree, void *parser);
  virtual void save_to_file(const std::string &file_name,
                            const std::string &formatted);

 public:
  virtual const std::string &err_msg();
  virtual const std::string &formatted_tree();

  Node::Ptr parse_vector_text(std::string *vector_text);

 public:
  static ZVecParser::Ptr create();
  static ZVecParser::Ptr create(int cache_count);
  const static int32_t DEFAULT_CACHE_COUNT{100};

 protected:
  std::string err_msg_{""};
  std::string formatted_tree_{""};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_sql_parser.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec_sql_parser.h"
#include <exception>
#include <memory>
#include <zvec/ailego/logger/logger.h>
#include "atn/ParserATNSimulator.h"
#include "db/sqlengine/antlr/gen/SQLLexer.h"
#include "db/sqlengine/antlr/gen/SQLParser.h"
#include "db/sqlengine/common/util.h"
#include "case_changing_charstream.h"
#include "error_verbose_listener.h"
#include "node.h"
#include "select_info.h"
#include "selected_elem_info.h"

using namespace antlr4;
using namespace tree;
using namespace atn;

namespace zvec::sqlengine {

SQLInfo::Ptr ZVecSQLParser::parse(const std::string &query,
                                  bool need_formatted_tree) {
  try {
    ANTLRInputStream input(query);
    CaseChangingCharStream in(&input, true);

    SQLLexer lexer(&in);

    CommonTokenStream tokens(&lexer);

    SQLParser parser(&tokens);

    // remove and add new error listeners
    ErrorVerboseListener lexer_error_listener;
    lexer.removeErrorListeners();  // remove all error listeners
    lexer.addErrorListener((ANTLRErrorListener *)&lexer_error_listener);  // add
    ErrorVerboseListener parser_error_listener;
    parser.removeErrorListeners();  // remove all error listeners
    parser.addErrorListener(
        (ANTLRErrorListener *)&parser_error_listener);  // add

    // int64_t curtime = Util::cur_micro_second_time();
    ParseTree *tree = parser.compilation_unit();

    if (lexer.getNumberOfSyntaxErrors() > 0 ||
        parser.getNumberOfSyntaxErrors() > 0) {
      LOG_INFO("SLL failed. using LL");
      tokens.reset();
      parser.reset();
      parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(
          PredictionMode::LL);
      tree = parser.compilation_unit();
    }

    // int64_t duration = Util::cur_micro_second_time() - curtime;
    // printf("parsing time %ld\n", duration);
    // LOG_DEBUG("antlr parsing time: [%ld]", duration);

    if (lexer.getNumberOfSyntaxErrors() > 0) {
      err_msg_ = "lexer error [" + lexer_error_listener.err_msg() + "]";
      return nullptr;
    }
    if (parser.getNumberOfSyntaxErrors() > 0) {
      err_msg_ = "syntax error [" + parser_error_listener.err_msg() + "]";
      return nullptr;
    }

    if (need_formatted_tree) {
      formatted_tree_ = to_formatted_string_tree(tree, &parser);
    }

    SQLInfo::Ptr sqlInfo = sql_info(tree);
    return sqlInfo;
  } catch (std::exception &e) {
    err_msg_ = "parse error [" + std::string(e.what()) + "]";
    return nullptr;
  }
}

SQLInfo::Ptr ZVecSQLParser::sql_info(VoidPtr tree) {
  ParseTree *parseTree = reinterpret_cast<ParseTree *>(tree);
  SQLParser::Compilation_unitContext *compilation_unit_node =
      (SQLParser::Compilation_unitContext *)parseTree;
  SQLParser::Unit_statementContext *unit_statement_node =
      (SQLParser::Unit_statementContext *)compilation_unit_node->children[0];

  SQLInfo::SQLType sqlType = sql_type(unit_statement_node);
  if (sqlType == SQLInfo::SQLType::NONE) {
    return nullptr;
  }

  BaseInfo::Ptr baseInfo = nullptr;
  switch (sqlType) {
    case SQLInfo::SQLType::SELECT:
      baseInfo =
          select_info(unit_statement_node->dql_statement()->select_statement());
      break;
    default:
      break;
  }

  if (baseInfo == nullptr) {
    return nullptr;
  }

  if (baseInfo->validate() == false) {
    err_msg_ = baseInfo->err_msg();
    return nullptr;
  }

  SQLInfo::Ptr sqlInfo = std::make_shared<SQLInfo>(sqlType, baseInfo);
  return sqlInfo;
}

SQLInfo::SQLType ZVecSQLParser::sql_type(VoidPtr node) {
  SQLParser::Unit_statementContext *unit_statement_node =
      reinterpret_cast<SQLParser::Unit_statementContext *>(node);

  SQLParser::Dql_statementContext *dql_statement_node =
      (SQLParser::Dql_statementContext *)unit_statement_node->dql_statement();

  if (dql_statement_node != nullptr) {
    if (dql_statement_node->select_statement() != nullptr) {
      return SQLInfo::SQLType::SELECT;
    }
  }

  return SQLInfo::SQLType::NONE;
}

SelectInfo::Ptr ZVecSQLParser::select_info(VoidPtr node) {
  SQLParser::Select_statementContext *select_statement_node =
      reinterpret_cast<SQLParser::Select_statementContext *>(node);

  SQLParser::Selected_elementsContext *selected_elements_node =
      select_statement_node->selected_elements();
  SQLParser::From_clauseContext *from_clause_node =
      select_statement_node->from_clause();
  SQLParser::Where_clauseContext *where_node =
      select_statement_node->where_clause();
  SQLParser::Order_by_clauseContext *order_by_node =
      select_statement_node->order_by_clause();
  SQLParser::Limit_clauseContext *limit_node =
      select_statement_node->limit_clause();

  std::string table_name = "";

  if (from_clause_node->tableview_name() != nullptr) {
    table_name = from_clause_node->tableview_name()->getText();
  }
  SelectInfo::Ptr selectInfo = std::make_shared<SelectInfo>(table_name);

  for (auto selected_element_node :
       selected_elements_node->selected_element()) {
    SelectedElemInfo::Ptr selected_elem_info =
        std::make_shared<SelectedElemInfo>();

    if (selected_element_node->field_name() != nullptr) {
      selected_elem_info->set_field_name(
          selected_element_node->field_name()->getText());
      if (selected_element_node->field_alias() != nullptr) {
        selected_elem_info->set_alias(
            selected_element_node->field_alias()->getText());
      }
    } else if (selected_element_node->ASTERISK()) {
      selected_elem_info->set_asterisk(true);
    }

    selectInfo->add_selected_elem(std::move(selected_elem_info));
  }

  if (where_node) {
    Node::Ptr cond = handle_logic_expr_node(where_node->logic_expr());
    if (cond == nullptr) {
      return nullptr;
    }
    selectInfo->set_search_cond(std::move(cond));
  }

  if (order_by_node != nullptr) {
    for (auto order_by_element : order_by_node->order_by_element()) {
      auto orderby_elem_info = std::make_shared<OrderByElemInfo>();
      orderby_elem_info->set_field_name(
          order_by_element->field_name()->getText());
      if (order_by_element->DESC()) {
        orderby_elem_info->set_desc();
      }
      selectInfo->add_order_by_elem(std::move(orderby_elem_info));
    }
  }

  if (limit_node != nullptr) {
    selectInfo->set_limit(std::stoi(limit_node->int_value()->getText()));
  }

  return selectInfo;
}

Node::Ptr ZVecSQLParser::handle_logic_expr_node(VoidPtr node) {
  SQLParser::Logic_exprContext *logicExprNode =
      reinterpret_cast<SQLParser::Logic_exprContext *>(node);
  const std::vector<SQLParser::Logic_exprContext *> &logicExprChildNodes =
      logicExprNode->logic_expr();

  if (logicExprNode->OR() != nullptr) {
    Node::Ptr orExpr = std::make_shared<Node>(NodeOp::T_OR);
    orExpr->set_left(handle_logic_expr_node(logicExprChildNodes[0]));
    orExpr->set_right(handle_logic_expr_node(logicExprChildNodes[1]));
    return orExpr;
  } else if (logicExprNode->AND() != nullptr) {
    Node::Ptr andExpr = std::make_shared<Node>(NodeOp::T_AND);
    andExpr->set_left(handle_logic_expr_node(logicExprChildNodes[0]));
    andExpr->set_right(handle_logic_expr_node(logicExprChildNodes[1]));
    return andExpr;
  } else if (logicExprNode->enclosed_expr() != nullptr) {
    // enclosed_expr is represented by sub-tree structure
    return handle_logic_expr_node(logicExprNode->enclosed_expr()->logic_expr());
  } else if (logicExprNode->relation_expr() != nullptr) {
    return handle_rel_expr_node(logicExprNode->relation_expr());
  }

  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_rel_expr_left_node(VoidPtr node) {
  SQLParser::Relation_exprContext *relationExprNode =
      reinterpret_cast<SQLParser::Relation_exprContext *>(node);
  // either identifier or function call
  if (relationExprNode->identifier() != nullptr) {
    return handle_id_node(relationExprNode->identifier());
  } else if (relationExprNode->function_call() != nullptr) {
    return handle_function_call_node(relationExprNode->function_call());
  }

  err_msg_ = "Parse failed. Unexpected rel expr left node." +
             relationExprNode->getText();
  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_rel_expr_node(VoidPtr node) {
  SQLParser::Relation_exprContext *relationExprNode =
      reinterpret_cast<SQLParser::Relation_exprContext *>(node);
  if (relationExprNode->rel_oper() != nullptr) {
    SQLParser::Rel_operContext *op = relationExprNode->rel_oper();
    NodeOp nodeOp = NodeOp::T_NONE;
    if (op->E_OP()) {
      nodeOp = NodeOp::T_EQ;
    } else if (op->ne_op()) {
      nodeOp = NodeOp::T_NE;
    } else if (op->L_OP()) {
      nodeOp = NodeOp::T_LT;
    } else if (op->G_OP()) {
      nodeOp = NodeOp::T_GT;
    } else if (op->le_op()) {
      nodeOp = NodeOp::T_LE;
    } else if (op->ge_op()) {
      nodeOp = NodeOp::T_GE;
    }
    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);
    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));
    Node::Ptr value_node =
        handle_value_expr_node(relationExprNode->value_expr());
    if (value_node == nullptr) {
      return nullptr;
    }
    relationalExpr->set_right(std::move(value_node));
    return relationalExpr;
  } else if (relationExprNode->LIKE() != nullptr) {
    NodeOp nodeOp = NodeOp::T_LIKE;
    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);
    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));
    Node::Ptr value_node =
        handle_value_expr_node(relationExprNode->value_expr());
    if (value_node == nullptr) {
      return nullptr;
    }
    relationalExpr->set_right(std::move(value_node));
    return relationalExpr;
  } else if (relationExprNode->IN() != nullptr ||
             relationExprNode->CONTAIN_ALL() != nullptr ||
             relationExprNode->CONTAIN_ANY() != nullptr) {
    NodeOp nodeOp = NodeOp::T_NONE;

    if (relationExprNode->CONTAIN_ALL() != nullptr) {
      nodeOp = NodeOp::T_CONTAIN_ALL;
    } else if (relationExprNode->CONTAIN_ANY() != nullptr) {
      nodeOp = NodeOp::T_CONTAIN_ANY;
    } else {
      //      relationExprNode->IN() != nullptr
      nodeOp = NodeOp::T_IN;
    }

    Node::Ptr relationalExpr = std::make_shared<Node>(nodeOp);
    relationalExpr->set_left(handle_rel_expr_left_node(relationExprNode));
    Node::Ptr in_value_expr_list_node =
        handle_in_value_expr_list_node(relationExprNode->in_value_expr_list(),
                                       relationExprNode->NOT() != nullptr);
    if (in_value_expr_list_node == nullptr) {
      return nullptr;
    }
    relationalExpr->set_right(std::move(in_value_expr_list_node));
    return relationalExpr;
  } else if (relationExprNode->NULL_V() != nullptr) {
    NodeOp nodeOp = NodeOp::T_IS_NULL;
    if (relationExprNode->NOT() != nullptr) {
      nodeOp = NodeOp::T_IS_NOT_NULL;
    }
    auto null_node = std::make_shared<Node>(nodeOp);
    null_node->set_left(handle_rel_expr_left_node(relationExprNode));
    auto right = std::make_shared<ConstantNode>("");
    right->set_op(NodeOp::T_NULL_VALUE);
    null_node->set_right(std::move(right));
    return null_node;
  }

  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_value_expr_node(VoidPtr node) {
  SQLParser::Value_exprContext *valueExprNode =
      reinterpret_cast<SQLParser::Value_exprContext *>(node);

  if (valueExprNode->constant() != nullptr) {
    return handle_const_node(valueExprNode->constant());
  } else if (valueExprNode->function_call() != nullptr) {
    return handle_function_call_node(valueExprNode->function_call());
  }

  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_function_value_expr_node(VoidPtr node) {
  SQLParser::Function_value_exprContext *valueExprNode =
      reinterpret_cast<SQLParser::Function_value_exprContext *>(node);

  if (valueExprNode->value_expr() != nullptr) {
    return handle_value_expr_node(valueExprNode->value_expr());
  } else if (valueExprNode->identifier() != nullptr) {
    return handle_id_node(valueExprNode->identifier());
  }

  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_in_value_expr_node(VoidPtr node) {
  SQLParser::In_value_exprContext *inValueExprNode =
      reinterpret_cast<SQLParser::In_value_exprContext *>(node);

  if (inValueExprNode->constant_num_and_str() != nullptr) {
    return handle_const_num_and_str_node(
        inValueExprNode->constant_num_and_str());
  } else if (inValueExprNode->bool_value() != nullptr) {
    return handle_bool_value_node(inValueExprNode->bool_value());
  }

  return nullptr;
}

Node::Ptr ZVecSQLParser::handle_bool_value_node(
    antlr4::SQLParser::Bool_valueContext *node) {
  // normalize bool value
  auto value = node->TRUE_V() ? "true" : "false";
  auto constExpr = std::make_shared<ConstantNode>(value);
  constExpr->set_op(NodeOp::T_BOOL_VALUE);
  return constExpr;
}

Node::Ptr ZVecSQLParser::handle_in_value_expr_list_node(VoidPtr node,
                                                        bool exclude) {
  SQLParser::In_value_expr_listContext *inValueExprListContext =
      reinterpret_cast<SQLParser::In_value_expr_listContext *>(node);

  InValueExprListNode::Ptr in_value_expr_list_node =
      std::make_shared<InValueExprListNode>();
  in_value_expr_list_node->set_exclude(exclude);
  if (!inValueExprListContext) {
    return in_value_expr_list_node;
  }

  auto in_value_expr_list = inValueExprListContext->in_value_expr();
  for (auto in_value_expr : in_value_expr_list) {
    Node::Ptr in_value_node = handle_in_value_expr_node(in_value_expr);
    if (in_value_node == nullptr) {
      return nullptr;
    }
    in_value_expr_list_node->add_in_value_expr(std::move(in_value_node));
  }

  return in_value_expr_list_node;
}

Node::Ptr ZVecSQLParser::handle_function_call_node(VoidPtr node) {
  SQLParser::Function_callContext *function_call_node =
      reinterpret_cast<SQLParser::Function_callContext *>(node);

  FuncNode::Ptr func_node_ptr = std::make_shared<FuncNode>();

  func_node_ptr->set_func_name_node(
      handle_id_node(function_call_node->identifier()));
  auto value_expr_list = function_call_node->function_value_expr();
  for (auto value_expr : value_expr_list) {
    Node::Ptr value_node = handle_function_value_expr_node(value_expr);
    if (value_node == nullptr) {
      return nullptr;
    }
    func_node_ptr->add_argument(std::move(value_node));
  }

  return func_node_ptr;
}

Node::Ptr ZVecSQLParser::handle_const_node(VoidPtr node) {
  Node::Ptr constExpr = nullptr;
  SQLParser::ConstantContext *constantNode =
      reinterpret_cast<SQLParser::ConstantContext *>(node);
  if (constantNode->numeric()) {
    constExpr =
        std::make_shared<ConstantNode>(constantNode->numeric()->getText());
    if (constantNode->numeric()->int_value()) {
      constExpr->set_op(NodeOp::T_INT_VALUE);
    } else if (constantNode->numeric()->float_value()) {
      constExpr->set_op(NodeOp::T_FLOAT_VALUE);
    }
  } else if (constantNode->quoted_string()) {
    std::string value = constantNode->quoted_string()->getText();
    value = trim(value);
    value = Util::normalize(value);
    constExpr = std::make_shared<ConstantNode>(value);
    constExpr->set_op(NodeOp::T_STRING_VALUE);
  } else if (constantNode->vector_expr()) {
    constExpr = handle_vector_expr_node(constantNode->vector_expr());
    if (constExpr == nullptr) {
      err_msg_ = "Parse failed. vector format error." +
                 constantNode->vector_expr()->getText();
      LOG_ERROR("Parse failed. vector format error. [%s]",
                constantNode->vector_expr()->getText().c_str());
      return nullptr;
    }
  } else if (constantNode->bool_value()) {
    constExpr = handle_bool_value_node(constantNode->bool_value());
  }

  return constExpr;
}

Node::Ptr ZVecSQLParser::handle_const_num_and_str_node(VoidPtr node) {
  Node::Ptr constExpr = nullptr;
  SQLParser::Constant_num_and_strContext *constant_num_and_str_Node =
      reinterpret_cast<SQLParser::Constant_num_and_strContext *>(node);
  if (constant_num_and_str_Node->numeric()) {
    constExpr = std::make_shared<ConstantNode>(
        constant_num_and_str_Node->numeric()->getText());
    if (constant_num_and_str_Node->numeric()->int_value()) {
      constExpr->set_op(NodeOp::T_INT_VALUE);
    } else if (constant_num_and_str_Node->numeric()->float_value()) {
      constExpr->set_op(NodeOp::T_FLOAT_VALUE);
    }
  } else if (constant_num_and_str_Node->quoted_string()) {
    std::string value = constant_num_and_str_Node->quoted_string()->getText();
    value = trim(value);
    value = Util::normalize(value);
    constExpr = std::make_shared<ConstantNode>(value);
    constExpr->set_op(NodeOp::T_STRING_VALUE);
  }

  return constExpr;
}

Node::Ptr ZVecSQLParser::handle_vector_expr_node(VoidPtr node) {
  SQLParser::Vector_exprContext *vector_ExprNode =
      reinterpret_cast<SQLParser::Vector_exprContext *>(node);

  std::string vector_text = vector_ExprNode->getText();
  return parse_vector_text(&vector_text);
}

Node::Ptr ZVecSQLParser::handle_id_node(VoidPtr node) {
  SQLParser::IdentifierContext *identifierNode =
      reinterpret_cast<SQLParser::IdentifierContext *>(node);

  Node::Ptr identifierExpr =
      std::make_shared<IDNode>(identifierNode->getText());
  identifierExpr->set_op(NodeOp::T_ID);
  return identifierExpr;
}

Node::Ptr ZVecSQLParser::parse_filter(const std::string &filter,
                                      bool need_formatted_tree) {
  try {
    ANTLRInputStream input(filter);
    CaseChangingCharStream in(&input, true);

    SQLLexer lexer(&in);

    CommonTokenStream tokens(&lexer);

    SQLParser parser(&tokens);

    // remove and add new error listeners
    ErrorVerboseListener lexer_error_listener;
    lexer.removeErrorListeners();  // remove all error listeners
    lexer.addErrorListener((ANTLRErrorListener *)&lexer_error_listener);  // add
    ErrorVerboseListener parser_error_listener;
    parser.removeErrorListeners();  // remove all error listeners
    parser.addErrorListener(
        (ANTLRErrorListener *)&parser_error_listener);  // add

    // int64_t curtime = Util::cur_micro_second_time();
    ParseTree *tree = parser.logic_expr_unit();

    if (lexer.getNumberOfSyntaxErrors() > 0 ||
        parser.getNumberOfSyntaxErrors() > 0) {
      LOG_INFO("SLL failed. using LL");
      tokens.reset();
      parser.reset();
      parser.getInterpreter<ParserATNSimulator>()->setPredictionMode(
          PredictionMode::LL);
      tree = parser.logic_expr_unit();
    }

    // int64_t duration = Util::cur_micro_second_time() - curtime;
    // printf("parsing time %ld\n", duration);
    // LOG_DEBUG("antlr parsing time: [%ld]", duration);

    if (lexer.getNumberOfSyntaxErrors() > 0) {
      err_msg_ = "lexer error [" + lexer_error_listener.err_msg() + "]";
      return nullptr;
    }
    if (parser.getNumberOfSyntaxErrors() > 0) {
      err_msg_ = "syntax error [" + parser_error_listener.err_msg() + "]";
      return nullptr;
    }

    if (need_formatted_tree) {
      formatted_tree_ = to_formatted_string_tree(tree, &parser);
    }
    auto *logic_expr_tree =
        dynamic_cast<SQLParser::Logic_expr_unitContext *>(tree);
    if (logic_expr_tree == nullptr ||
        logic_expr_tree->logic_expr() == nullptr) {
      err_msg_ = "parse error [null tree]";
      return nullptr;
    }

    return handle_logic_expr_node(logic_expr_tree->logic_expr());
  } catch (const std::exception &e) {
    err_msg_ = "parse error [" + std::string(e.what()) + "]";
    return nullptr;
  }
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/parser/zvec_sql_parser.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "db/sqlengine/antlr/gen/SQLParser.h"
#include "select_info.h"
#include "zvec_parser.h"

namespace zvec::sqlengine {

class ZVecSQLParser : public ZVecParser {
 public:
  ZVecSQLParser() = default;

  SQLInfo::Ptr parse(const std::string &query,
                     bool need_formatted_tree = false) override;

  Node::Ptr parse_filter(const std::string &filter,
                         bool need_formatted_tree = false) override;

 private:
  SQLInfo::Ptr sql_info(VoidPtr tree);

  SQLInfo::SQLType sql_type(VoidPtr node);
  SelectInfo::Ptr select_info(VoidPtr node);

  Node::Ptr handle_logic_expr_node(VoidPtr node);
  Node::Ptr handle_rel_expr_node(VoidPtr node);
  Node::Ptr handle_rel_expr_left_node(VoidPtr node);
  Node::Ptr handle_value_expr_node(VoidPtr node);
  Node::Ptr handle_function_value_expr_node(VoidPtr node);
  Node::Ptr handle_in_value_expr_node(VoidPtr node);
  Node::Ptr handle_in_value_expr_list_node(VoidPtr node, bool exclude);
  Node::Ptr handle_id_node(VoidPtr node);
  Node::Ptr handle_const_node(VoidPtr node);
  Node::Ptr handle_const_num_and_str_node(VoidPtr node);
  Node::Ptr handle_bool_value_node(antlr4::SQLParser::Bool_valueContext *node);
  Node::Ptr handle_vector_expr_node(VoidPtr node);
  Node::Ptr handle_function_call_node(VoidPtr node);
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/doc_filter.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/planner/doc_filter.h"
#include <optional>
#include <arrow/acero/exec_plan.h>
#include <arrow/table.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/config.h>
#include "db/sqlengine/planner/invert_search.h"

namespace zvec::sqlengine {

Status DocFilter::compute_filter() {
  if (invert_cond_) {
    InvertSearch search(segment_.get());
    auto invert_res = search.exec_invert_cond_tree(invert_cond_.get());
    if (!invert_res) {
      return Status::InternalError("Execute invert search failed: ",
                                   invert_res.error().message());
    }
    invert_result_ = invert_res.value();
    invert_filter_ = invert_result_->make_filter();
  }

  if (forward_plan_) {
    auto forward_result = arrow::acero::DeclarationToTable(*forward_plan_);
    if (!forward_result.ok()) {
      return Status::InternalError("Execute filter bitmap failed: ",
                                   forward_result.status().ToString());
    }
    // has only one column with boolean type
    auto &forward_table = forward_result.ValueUnsafe();
    if (forward_table->num_columns() != 1 ||
        forward_table->column(0)->type() != arrow::boolean()) {
      return Status::InternalError("Filter bitmap is not boolean type");
    }
    forward_bitmap_ = forward_table->column(0);
  }

  if (forward_filter_expr_) {
    // get schema to bind to Expression
    auto table = segment_->fetch(query_info_->get_forward_filter_field_names(),
                                 std::vector<int>{});
    if (!table) {
      return Status::InternalError("Fetch forward failed");
    }
    auto bind_res = forward_filter_expr_->Bind(*table->schema());
    if (!bind_res.ok()) {
      return Status::InternalError("Bind forward filter expression failed",
                                   bind_res.status().ToString());
    }
    *forward_filter_expr_ = bind_res.MoveValueUnsafe();
  }
  return Status::OK();
}

bool DocFilter::empty() const {
  return !(delete_filter_ || invert_filter_ || forward_plan_ ||
           forward_filter_expr_);
}

bool DocFilter::is_filtered(uint64_t id) const {
  if (delete_filter_ && delete_filter_->is_filtered(id)) {
    return true;
  }
  if (invert_filter_ && invert_filter_->is_filtered(id)) {
    return true;
  }
  auto forward_bit = get_forward_bit(id);
  if (!forward_bit) {
    return false;
  }
  // revert to return false if forward filter is matched
  return !forward_bit.value();
}

std::optional<bool> DocFilter::get_forward_bit(uint64_t id) const {
  if (forward_filter_expr_) {
    return is_matched_by_forward_filter(id);
  }
  if (!forward_bitmap_) {
    return std::nullopt;
  }
  uint64_t rows_seen = 0;
  for (int c = 0; c < forward_bitmap_->num_chunks(); c++) {
    const auto &arr = forward_bitmap_->chunk(c);
    if (id < rows_seen + arr->length()) {
      auto *bool_array = static_cast<arrow::BooleanArray *>(arr.get());
      return (*bool_array)[id - rows_seen];
    }
    rows_seen += arr->length();
  }
  LOG_ERROR("ID is out or range: id[%zu] count[%zu]", (size_t)id,
            (size_t)rows_seen);
  return std::nullopt;
}

std::optional<std::vector<uint64_t>> DocFilter::get_bf_by_keys_and_update() {
  auto meta = segment_->meta();
  if (!meta) {
    return std::nullopt;
  }
  // TODO: support forward
  if (!invert_result_) {
    return std::nullopt;
  }
  size_t doc_count = meta->doc_count();
  float brute_force_by_keys_ratio =
      GlobalConfig::Instance().brute_force_by_keys_ratio();
  uint64_t bf_by_keys_threshold = meta->doc_count() * brute_force_by_keys_ratio;

  // decide to use brute force by keys or not
  if (size_t match_count = invert_result_->count();
      match_count <= bf_by_keys_threshold) {
    std::vector<uint32_t> ids;
    invert_result_->extract_ids(&ids);
    invert_filter_.reset();
    invert_result_.reset();
    LOG_INFO("Use brute force by keys, doc_count[%zu] invert_result_count[%zu]",
             doc_count, match_count);
    return std::vector<uint64_t>(ids.begin(), ids.end());
  } else {
    LOG_DEBUG(
        "Not use brute force by keys, doc_count[%zu] invert_result_count[%zu]",
        doc_count, match_count);
  }
  return std::nullopt;
}

std::optional<bool> DocFilter::is_matched_by_forward_filter(uint64_t id) const {
  auto exec_batch =
      segment_->fetch(query_info_->get_forward_filter_field_names(), id);
  if (!exec_batch) {
    LOG_ERROR("Fetch forward failed, id[%zu]", (size_t)id);
    return std::nullopt;
  }
  auto maybe_result = arrow::compute::ExecuteScalarExpression(
      *forward_filter_expr_, *exec_batch);
  if (!maybe_result.ok()) {
    LOG_ERROR("Execute scalar expression failed, id[%zu] err[%s]", (size_t)id,
              maybe_result.status().ToString().c_str());
    return std::nullopt;
  }
  arrow::Datum datum = maybe_result.MoveValueUnsafe();
  if (datum.is_scalar()) {
    return datum.scalar_as<arrow::BooleanScalar>().value;
  }
  LOG_ERROR("Datum is not scalar, id[%zu] type[%s]", (size_t)id,
            datum.type()->ToString().c_str());
  return std::nullopt;
}


}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/doc_filter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <arrow/acero/api.h>
#include <arrow/chunked_array.h>
#include <zvec/db/status.h>
#include "db/index/column/inverted_column/inverted_search_result.h"
#include "db/index/common/index_filter.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_info.h"
#include "db/sqlengine/analyzer/query_node.h"

namespace zvec::sqlengine {

class DocFilter : public IndexFilter {
 public:
  using Ptr = std::shared_ptr<DocFilter>;

  DocFilter(Segment::Ptr segment, QueryInfo::Ptr query_info,
            std::unique_ptr<arrow::acero::Declaration> forward_plan,
            std::unique_ptr<arrow::compute::Expression> forward_filter)
      : segment_(std::move(segment)),
        query_info_(std::move(query_info)),
        delete_filter_(segment_->get_filter()),
        invert_cond_(query_info_->invert_cond()),
        forward_plan_(std::move(forward_plan)),
        forward_filter_expr_(std::move(forward_filter)) {}

  Status compute_filter();

  bool is_filtered(uint64_t id) const override;

  //! get brute force by keys and clear `invert_filter_` if suitable
  std::optional<std::vector<uint64_t>> get_bf_by_keys_and_update();

  bool empty() const;

 private:
  std::optional<bool> get_forward_bit(uint64_t id) const;
  std::optional<bool> is_matched_by_forward_filter(uint64_t id) const;

 private:
  Segment::Ptr segment_;
  QueryInfo::Ptr query_info_;
  IndexFilter::Ptr delete_filter_;
  QueryNode::Ptr invert_cond_;
  // either forward_plan_ or forward_expr_ is set
  std::unique_ptr<arrow::acero::Declaration> forward_plan_;
  std::unique_ptr<arrow::compute::Expression> forward_filter_expr_;

  InvertedSearchResult::Ptr invert_result_;
  IndexFilter::Ptr invert_filter_{nullptr};

  std::shared_ptr<arrow::ChunkedArray> forward_bitmap_;
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/invert_recall_node.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/planner/invert_recall_node.h"
#include <arrow/api.h>
#include <zvec/ailego/logger/logger.h>
#include "db/sqlengine/planner/invert_search.h"

namespace cp = arrow::compute;

namespace zvec::sqlengine {

arrow::AsyncGenerator<std::optional<cp::ExecBatch>> InvertRecallNode::gen() {
  auto state_ptr = std::make_shared<State>();
  return [self = shared_from_this(), state_ptr = std::move(state_ptr)]()
             -> arrow::Future<std::optional<cp::ExecBatch>> {
    auto &state = *state_ptr;

    if (!state.iter_) {
      auto invert_ret = self->prepare();
      if (!invert_ret) {
        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
            arrow::Status::ExecutionError("prepare invert failed:",
                                          invert_ret.error().c_str()));
      }
      state.invert_result_ = invert_ret.value();
      state.iter_ = state.invert_result_->create_iterator();
    }

    if (!state.iter_->valid()) {
      // return nullopt to indicate end
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          std::nullopt);
    }

    // collect a batch
    std::vector<int> indices;
    indices.reserve(self->batch_size_);
    for (int i = 0; state.iter_->valid() && i < self->batch_size_;
         state.iter_->next()) {
      if (self->seg_filter_ &&
          self->seg_filter_->is_filtered(state.iter_->doc_id())) {
        continue;
      }
      i++;
      indices.push_back(state.iter_->doc_id());
    }
    if (indices.empty()) {
      // return nullopt to indicate end
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          std::nullopt);
    }

    auto table = self->segment_->fetch(self->fetched_columns_, indices);
    if (!table) {
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          arrow::Status::UnknownError("fetch table failed"));
    }
    auto batch = table->CombineChunksToBatch();
    if (!batch.ok()) {
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          arrow::Status::ExecutionError("combine chunks to batch failed:",
                                        batch.status().ToString()));
    }
    cp::ExecBatch exec_batch(*batch.ValueUnsafe());
    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
        std::move(exec_batch));
  };
}

Result<InvertedSearchResult::Ptr> InvertRecallNode::prepare() {
  InvertSearch search(segment_.get());
  return search.exec_invert_cond_tree(query_info_->invert_cond().get());
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/invert_recall_node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <arrow/acero/api.h>
#include <arrow/api.h>
#include "db/index/column/common/index_results.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_info.h"

namespace zvec::sqlengine {

class InvertRecallNode : public std::enable_shared_from_this<InvertRecallNode> {
 public:
  InvertRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,
                   int batch_size)
      : segment_(std::move(segment)),
        query_info_(std::move(query_info)),
        // need fetch for forward filter, order by
        fetched_columns_(query_info_->get_all_fetched_scalar_field_names()),
        seg_filter_(segment_->get_filter()),
        batch_size_(batch_size) {
    auto table = segment_->fetch(fetched_columns_, std::vector<int>{});
    schema_ = table->schema();
  }

  //! get schema
  std::shared_ptr<arrow::Schema> schema() const {
    return schema_;
  }

  arrow::AsyncGenerator<std::optional<cp::ExecBatch>> gen();

 private:
  Result<InvertedSearchResult::Ptr> prepare();

 private:
  struct State {
    InvertedSearchResult::Ptr invert_result_;
    IndexResults::IteratorUPtr iter_;
  };

  Segment::Ptr segment_;
  QueryInfo::Ptr query_info_;
  const std::vector<std::string> &fetched_columns_;
  IndexFilter::Ptr seg_filter_;
  int batch_size_;
  std::shared_ptr<arrow::Schema> schema_;
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/invert_search.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "invert_search.h"
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/type.h>
#include "db/sqlengine/analyzer/query_node.h"
#include "db/sqlengine/common/util.h"

namespace zvec::sqlengine {

const std::unordered_map<QueryNodeOp, CompareOp> kOpMap_ = {
    {QueryNodeOp::Q_EQ, CompareOp::EQ},
    {QueryNodeOp::Q_NE, CompareOp::NE},
    {QueryNodeOp::Q_LT, CompareOp::LT},
    {QueryNodeOp::Q_LE, CompareOp::LE},
    {QueryNodeOp::Q_GT, CompareOp::GT},
    {QueryNodeOp::Q_GE, CompareOp::GE},
    {QueryNodeOp::Q_LIKE, CompareOp::LIKE},
    {QueryNodeOp::Q_IN, CompareOp::CONTAIN_ANY},
    {QueryNodeOp::Q_CONTAIN_ALL, CompareOp::CONTAIN_ALL},
    {QueryNodeOp::Q_CONTAIN_ANY, CompareOp::CONTAIN_ANY},
    {QueryNodeOp::Q_IS_NULL, CompareOp::IS_NULL},
    {QueryNodeOp::Q_IS_NOT_NULL, CompareOp::IS_NOT_NULL},
};

Result<InvertedSearchResult::Ptr> InvertSearch::exec_invert_cond_tree(
    const QueryNode *query_node) {
  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {
    if (query_node->left() != nullptr) {
      auto left_res = exec_invert_cond_tree(query_node->left_node());
      if (!left_res) {
        return left_res;
      }
      if (query_node->right() == nullptr) {
        return left_res;
      } else {
        auto right_res = exec_invert_cond_tree(query_node->right_node());
        if (!right_res) {
          return right_res;
        }
        query_node->op() == QueryNodeOp::Q_AND
            ? left_res.value()->AND(*right_res.value())
            : left_res.value()->OR(*right_res.value());
        return left_res;
      }
    }
    if (query_node->right() != nullptr) {
      return exec_invert_cond_tree(query_node->right_node());
    }
    return tl::make_unexpected(Status::InvalidArgument(
        "exec_invert_cond_tree, logic expr has no left or right node."));
  }

  if (query_node->type() == QueryNode::QueryNodeType::REL_EXPR) {
    return exec_invert_cond_node(query_node);
  }

  return tl::make_unexpected(Status::InvalidArgument(
      "exec_invert_cond_tree unexpected type:", query_node->op_name()));
}

CompareOp InvertSearch::query_nodeop2search_op(QueryNodeOp op) {
  auto iter = kOpMap_.find(op);
  if (iter == kOpMap_.end()) {
    return CompareOp::NONE;
  }
  return iter->second;
}

Result<InvertedSearchResult::Ptr> InvertSearch::exec_invert_cond_node(
    const QueryNode *invert_cond) {
  auto term_node = invert_cond->right();

  // get search oper
  CompareOp oper = query_nodeop2search_op(invert_cond->op());
  if (oper == CompareOp::NONE) {
    return tl::make_unexpected(Status::InvalidArgument(
        "do_invert_scan, get search operator failed. op:",
        invert_cond->op_name()));
  }

  bool is_array_length = false;
  auto *left_node = invert_cond->left_node();
  std::string invert_field_name;
  if (left_node->op() == QueryNodeOp::Q_ID) {
    invert_field_name = left_node->text();
  } else if (left_node->op() == QueryNodeOp::Q_FUNCTION_CALL) {
    const QueryFuncNode *func_node =
        dynamic_cast<const QueryFuncNode *>(left_node);
    const auto &func_name = func_node->get_func_name();
    const auto &arguments = func_node->arguments();
    if (func_name == kFuncArrayLength) {
      invert_field_name = arguments[0]->text();
      is_array_length = true;
    } else {
      return tl::make_unexpected(Status::InvalidArgument(
          "do_invert_scan, unsupported function call. func:",
          func_name.c_str()));
    }
  } else {
    return tl::make_unexpected(Status::InvalidArgument(
        "do_invert_scan, unsupported left node. op:", left_node->op_name()));
  }

  // get field reader
  auto invert_reader = segment_->get_scalar_indexer(invert_field_name);
  if (invert_reader == nullptr) {
    return tl::make_unexpected(Status::InvalidArgument(
        "do_invert_scan, get invert column reader failed. field:",
        invert_field_name.c_str()));
  }

  if (oper == CompareOp::IS_NULL) {
    auto invert_res = invert_reader->search_null();
    if (!invert_res) {
      return tl::make_unexpected(
          Status::InvalidArgument("invert column reader search null failed."));
    }
    return invert_res;
  } else if (oper == CompareOp::IS_NOT_NULL) {
    auto invert_res = invert_reader->search_non_null();
    if (!invert_res) {
      return tl::make_unexpected(Status::InvalidArgument(
          "invert column reader search not null failed."));
    }
    return invert_res;
  } else if (oper == CompareOp::CONTAIN_ALL || oper == CompareOp::CONTAIN_ANY) {
    // NOTE: IN is handled as CONTAIN_ANY
    QueryListNode::Ptr list_node =
        std::dynamic_pointer_cast<QueryListNode>(term_node);
    if (list_node->exclude()) {
      oper = oper == CompareOp::CONTAIN_ALL ? CompareOp::NOT_CONTAIN_ALL
                                            : CompareOp::NOT_CONTAIN_ANY;
    }
    auto invert_res =
        invert_reader->multi_search(list_node->to_value_list(), oper);
    if (!invert_res) {
      return tl::make_unexpected(Status::InvalidArgument(
          "invert column reader multi_search failed. op:", int(oper)));
    }
    return invert_res;
  } else if (!is_array_length) {
    auto invert_term = term_node->text();
    auto invert_res = invert_reader->search(invert_term, oper);
    if (!invert_res) {
      return tl::make_unexpected(Status::InvalidArgument(
          "invert column reader search failed. term:", invert_term.c_str(),
          " op:", invert_cond->op_name().c_str()));
    }
    return invert_res;
  } else {
    auto invert_term = term_node->text();
    uint32_t len = *(reinterpret_cast<const uint32_t *>(invert_term.data()));
    auto invert_res = invert_reader->search_array_len(len, oper);
    if (!invert_res) {
      return tl::make_unexpected(Status::InvalidArgument(
          "invert column reader search failed. term:", invert_term.c_str(),
          " op:", invert_cond->op_name().c_str()));
    }
    return invert_res;
  }
}


}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/invert_search.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/pattern/expected.hpp>
#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_node.h"

namespace zvec::sqlengine {

class InvertSearch {
 public:
  using Ptr = std::shared_ptr<InvertSearch>;

  InvertSearch(zvec::Segment *segment) : segment_(segment) {}

  Result<InvertedSearchResult::Ptr> exec_invert_cond_tree(
      const QueryNode *invert_cond);

  static CompareOp query_nodeop2search_op(QueryNodeOp op);

 private:
  Result<InvertedSearchResult::Ptr> exec_invert_cond_node(
      const QueryNode *invert_cond);

 private:
  zvec::Segment *segment_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/op_register.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "db/sqlengine/planner/op_register.h"
#include <arrow/status.h>
#include "db/sqlengine/planner/ops/check_not_filtered_op.h"
#include "db/sqlengine/planner/ops/contain_op.h"
#include "db/sqlengine/planner/ops/fetch_vector_op.h"

namespace zvec::sqlengine {

arrow::Status OpRegister::register_ops() {
  ARROW_RETURN_NOT_OK(CheckNotFilteredOp::register_op());
  ARROW_RETURN_NOT_OK(FetchVectorOp::register_op());
  ARROW_RETURN_NOT_OK(ContainOp::register_op());
  return arrow::Status::OK();
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/op_register.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <arrow/status.h>

namespace zvec::sqlengine {

class OpRegister {
 public:
  static arrow::Status register_ops();
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/check_not_filtered_op.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "db/sqlengine/planner/ops/check_not_filtered_op.h"
#include <arrow/type_fwd.h>
#include "db/sqlengine/common/util.h"

namespace zvec::sqlengine {

arrow::Status CheckNotFilteredOp::CheckNotFilteredFunction(
    cp::KernelContext *ctx, const cp::ExecSpan &batch, cp::ExecResult *out) {
  CheckNotFilteredState *state =
      static_cast<CheckNotFilteredState *>(ctx->state());
  auto *filter = state->args.filter.get();
  if (filter == nullptr) {
    return arrow::Status::ExecutionError("filter is null");
  }

  auto row_span = batch[0].array.GetSpan<uint64_t>(1, batch.length);
  std::shared_ptr<arrow::BooleanBuilder> builder =
      std::make_shared<arrow::BooleanBuilder>(ctx->memory_pool());
  ARROW_RETURN_NOT_OK(builder->Reserve(batch.length));
  for (int i = 0; i < batch.length; i++) {
    builder->UnsafeAppend(!filter->is_filtered(row_span[i]));
  }
  std::shared_ptr<arrow::Array> result_array;
  ARROW_RETURN_NOT_OK(builder->Finish(&result_array));

  out->value = std::move(result_array->data());
  return arrow::Status::OK();
}

arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
CheckNotFilteredOp::InitExprValue(arrow::compute::KernelContext *,
                                  const arrow::compute::KernelInitArgs &args) {
  auto func_options =
      static_cast<const CheckNotFilteredOp::Options *>(args.options);
  return std::make_unique<CheckNotFilteredOp::CheckNotFilteredState>(
      func_options ? func_options : nullptr);
}


arrow::Status CheckNotFilteredOp::register_op() {
  static Options options = Options::Defaults();
  auto func = std::make_shared<cp::ScalarFunction>(
      kCheckNotFiltered, cp::Arity::Unary(), func_doc, &options, false);
  cp::ScalarKernel kernel({arrow::uint64()}, arrow::boolean(),
                          CheckNotFilteredFunction, InitExprValue);

  kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
  kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;

  ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));

  auto registry = cp::GetFunctionRegistry();
  ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));

  return arrow::Status::OK();
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/check_not_filtered_op.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <arrow/api.h>
#include <arrow/compute/api.h>
#include "db/index/common/index_filter.h"


namespace zvec::sqlengine {

namespace cp = arrow::compute;

class CheckNotFilteredOp {
 public:
  class CheckNotFilteredOptionsType
      : public arrow::compute::FunctionOptionsType {
   public:
    static const arrow::compute::FunctionOptionsType *GetInstance() {
      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(
          new CheckNotFilteredOptionsType());
      return instance.get();
    }

    const char *type_name() const override {
      return "CheckNotFiltered";
    }

    std::string Stringify(
        const arrow::compute::FunctionOptions & /*options*/) const override {
      return type_name();
    }

    bool Compare(const arrow::compute::FunctionOptions &options,
                 const arrow::compute::FunctionOptions &other) const override {
      const auto &lop = static_cast<const Options &>(options);
      const auto &rop = static_cast<const Options &>(other);
      return lop.args.filter == rop.args.filter;
    }

    std::unique_ptr<arrow::compute::FunctionOptions> Copy(
        const arrow::compute::FunctionOptions &options) const override {
      const auto &opts = static_cast<const Options &>(options);
      return std::make_unique<Options>(opts.args.filter);
    }
  };

  struct Args {
    IndexFilter::Ptr filter;
  };

  class Options : public cp::FunctionOptions {
   public:
    Options() : Options(nullptr) {}

    Options(IndexFilter::Ptr filter)
        : cp::FunctionOptions(CheckNotFilteredOptionsType::GetInstance()),
          args{std::move(filter)} {}

    static inline constexpr char const kTypeName[] =
        "CheckNotFilteredFunctionOptions";

    static Options Defaults() {
      return Options();
    }

    Args get_args() const {
      return args;
    }

    Args args;
  };

  struct CheckNotFilteredState : public arrow::compute::KernelState {
    Args args;

    explicit CheckNotFilteredState(const Options *o) {
      if (o) {
        args = o->get_args();
      }
    }
  };


  static arrow::Status CheckNotFilteredFunction(cp::KernelContext *ctx,
                                                const cp::ExecSpan &batch,
                                                cp::ExecResult *out);

  static inline const cp::FunctionDoc func_doc{
      "check if the segment row id is not filtered",
      "returns not_filtered(x)",
      {"segment_row_id"},
      "Options"};

  static arrow::Status register_op();

  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
  InitExprValue(arrow::compute::KernelContext *,
                const arrow::compute::KernelInitArgs &args);
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/contain_op.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "db/sqlengine/planner/ops/contain_op.h"
#include <memory>
#include <arrow/api.h>
#include <zvec/db/type.h>
#include "db/sqlengine/common/util.h"

namespace zvec::sqlengine {

enum class ContainType { kContainAll, kContainAny };
template <typename ArrowArrayType, ContainType contain_type>
bool match_value(const arrow::Array *value_array, int64_t offset,
                 int64_t length, const arrow::Array *value_set_array) {
  auto *value_typed_arr = static_cast<const ArrowArrayType *>(value_array);
  auto *value_set_typed_arr =
      static_cast<const ArrowArrayType *>(value_set_array);
  if (contain_type == ContainType::kContainAll) {
    for (int j = 0; j < value_set_typed_arr->length(); ++j) {
      bool contain = false;
      for (int i = 0; i < length; ++i) {
        if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||
                      std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||
                      std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||
                      std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {
          if (value_typed_arr->GetView(offset + i) ==
              value_set_typed_arr->GetView(j)) {
            contain = true;
            break;
          }
        } else {
          if (value_typed_arr->Value(offset + i) ==
              value_set_typed_arr->Value(j)) {
            contain = true;
            break;
          }
        }
      }
      if (!contain) {
        return false;
      }
    }
    return true;
  } else {  // contain_type == kContainAny
    for (int j = 0; j < value_set_typed_arr->length(); ++j) {
      for (int i = 0; i < length; ++i) {
        if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||
                      std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||
                      std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||
                      std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {
          if (value_typed_arr->GetView(offset + i) ==
              value_set_typed_arr->GetView(j)) {
            return true;
          }
        } else {
          if (value_typed_arr->Value(offset + i) ==
              value_set_typed_arr->Value(j)) {
            return true;
          }
        }
      }
    }
    return false;
  }
}

template <ContainType contain_type>
arrow::Status ContainFunction(cp::KernelContext *ctx, const cp::ExecSpan &batch,
                              cp::ExecResult *out) {
  auto *state = static_cast<ContainOp::ContainState *>(ctx->state());
  const auto &value_set = state->args.value_set;
  if (value_set == nullptr) {
    return arrow::Status::ExecutionError("value_set is null");
  }

  const auto &input_array = batch[0].array;
  if (batch[0].type()->id() != arrow::Type::LIST) {
    return arrow::Status::ExecutionError("batch type is not list");
  }
  if (!input_array.type->field(0)->type()->Equals(value_set->type())) {
    return arrow::Status::ExecutionError(
        "value_set type is not equal to batch type");
  }
  auto list_array =
      std::dynamic_pointer_cast<arrow::ListArray>(input_array.ToArray());

  std::shared_ptr<arrow::BooleanBuilder> builder =
      std::make_shared<arrow::BooleanBuilder>(ctx->memory_pool());
  ARROW_RETURN_NOT_OK(builder->Reserve(batch.length));
  const auto &list_value_array = list_array->values();
  for (int i = 0; i < batch.length; i++) {
    // a whole list may be null for a doc
    if (list_array->IsNull(i)) {
      ARROW_RETURN_NOT_OK(builder->AppendNull());
      continue;
    }
    auto length = list_array->value_length(i);
    auto offset = list_array->value_offset(i);
    bool match = false;
    switch (state->args.data_type) {
      case DataType::ARRAY_INT32:
        match = match_value<arrow::Int32Array, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_UINT32:
        match = match_value<arrow::UInt32Array, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_INT64:
        match = match_value<arrow::Int64Array, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_UINT64:
        match = match_value<arrow::UInt64Array, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_FLOAT:
        match = match_value<arrow::FloatArray, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_DOUBLE:
        match = match_value<arrow::DoubleArray, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_STRING:
        match = match_value<arrow::StringArray, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      case DataType::ARRAY_BOOL:
        match = match_value<arrow::BooleanArray, contain_type>(
            list_value_array.get(), offset, length, value_set.get());
        break;

      default:
        return arrow::Status::ExecutionError("unsupported data type");
    }
    ARROW_RETURN_NOT_OK(builder->Append(match));
  }

  std::shared_ptr<arrow::Array> result_array;
  ARROW_RETURN_NOT_OK(builder->Finish(&result_array));

  out->value = std::move(result_array->data());
  //   out->array_data()->type = batch[0].type()->GetShared::Ptr();
  return arrow::Status::OK();
}

arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
ContainOp::InitExprValue(arrow::compute::KernelContext *,
                         const arrow::compute::KernelInitArgs &args) {
  auto func_options = static_cast<const ContainOp::Options *>(args.options);
  return std::make_unique<ContainOp::ContainState>(func_options ? func_options
                                                                : nullptr);
}


arrow::Status ContainOp::register_op() {
  static Options options = Options::Defaults();

  {
    auto func = std::make_shared<cp::ScalarFunction>(
        kContainAll, cp::Arity::Unary(), func_doc, &options, false);
    for (const auto &type :
         {arrow::int32(), arrow::uint32(), arrow::int64(), arrow::uint64(),
          arrow::float32(), arrow::float64(), arrow::utf8(),
          arrow::boolean()}) {
      cp::ScalarKernel kernel({arrow::list(type)}, arrow::boolean(),
                              ContainFunction<ContainType::kContainAll>,
                              InitExprValue);
      kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
      kernel.null_handling = cp::NullHandling::INTERSECTION;
      ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));
    }

    auto registry = cp::GetFunctionRegistry();
    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));
  }

  {
    auto func = std::make_shared<cp::ScalarFunction>(
        kContainAny, cp::Arity::Unary(), func_doc, &options, false);
    for (const auto &type :
         {arrow::int32(), arrow::uint32(), arrow::int64(), arrow::uint64(),
          arrow::float32(), arrow::float64(), arrow::utf8(),
          arrow::boolean()}) {
      cp::ScalarKernel kernel({arrow::list(type)}, arrow::boolean(),
                              ContainFunction<ContainType::kContainAny>,
                              InitExprValue);
      kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
      kernel.null_handling = cp::NullHandling::INTERSECTION;
      ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));
    }

    auto registry = cp::GetFunctionRegistry();
    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));
  }

  return arrow::Status::OK();
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/contain_op.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <memory>
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <zvec/db/type.h>


namespace zvec::sqlengine {

namespace cp = arrow::compute;

class ContainOp {
 public:
  class ContainOptionsType : public arrow::compute::FunctionOptionsType {
   public:
    static const arrow::compute::FunctionOptionsType *GetInstance() {
      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(
          new ContainOptionsType());
      return instance.get();
    }

    const char *type_name() const override {
      return "Contain";
    }

    std::string Stringify(
        const arrow::compute::FunctionOptions & /*options*/) const override {
      return type_name();
    }

    bool Compare(const arrow::compute::FunctionOptions &options,
                 const arrow::compute::FunctionOptions &other) const override {
      const auto &lop = static_cast<const Options &>(options);
      const auto &rop = static_cast<const Options &>(other);
      if (lop.args.data_type != rop.args.data_type) {
        return false;
      }
      auto *left_value = lop.args.value_set.get();
      auto *right_value = rop.args.value_set.get();
      if (left_value && right_value) {
        return left_value->Equals(*right_value);
      } else if (!left_value && !right_value) {
        return true;
      } else {
        return false;
      }
    }

    std::unique_ptr<arrow::compute::FunctionOptions> Copy(
        const arrow::compute::FunctionOptions &options) const override {
      const auto &opts = static_cast<const Options &>(options);
      return std::make_unique<Options>(opts.args);
    }
  };

  struct Args {
    std::shared_ptr<arrow::Array> value_set;
    DataType data_type;
  };

  class Options : public cp::FunctionOptions {
   public:
    Options() : Options(Args{}) {}

    Options(Args args)
        : cp::FunctionOptions(ContainOptionsType::GetInstance()),
          args(std::move(args)) {}

    static inline constexpr char const kTypeName[] = "ContainFunctionOptions";

    static Options Defaults() {
      return Options();
    }

    Args get_args() const {
      return args;
    }

    Args args;
  };

  struct ContainState : public arrow::compute::KernelState {
    Args args;

    explicit ContainState(const Options *o) {
      if (o) {
        args = o->get_args();
      }
    }
  };


  static inline const cp::FunctionDoc func_doc{
      "check if contain_all/any",
      "returns contain_all/any(x)",
      {"value_set"},
      "Options",
  };

  static arrow::Status register_op();

  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
  InitExprValue(arrow::compute::KernelContext *,
                const arrow::compute::KernelInitArgs &args);
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/fetch_vector_op.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "db/sqlengine/planner/ops/fetch_vector_op.h"
#include <arrow/status.h>
#include "db/index/column/vector_column/combined_vector_column_indexer.h"
#include "db/sqlengine/common/util.h"

namespace zvec::sqlengine {

#define CHECK_ARROW_STATUS(msg, status)                                        \
  if (!status.ok()) {                                                          \
    return tl::make_unexpected(Status::InternalError(msg, status.ToString())); \
  }

template <typename Iter>
Result<std::shared_ptr<arrow::Array>> fetch_dense_vector_helper(
    const CombinedVectorColumnIndexer *indexer, Iter begin, Iter end) {
  size_t count = std::distance(begin, end);
  arrow::BinaryBuilder vector_builder;
  CHECK_ARROW_STATUS("Reserve vector builder failed:",
                     vector_builder.Reserve(count));
  for (Iter i = begin; i != end; ++i) {
    auto vector_res = indexer->Fetch(*i);
    if (!vector_res) {
      return tl::make_unexpected(vector_res.error());
    }
    const auto &data = std::get<vector_column_params::DenseVectorBuffer>(
                           vector_res.value().vector_buffer)
                           .data;
    if (data.empty()) {
      CHECK_ARROW_STATUS("Append null failed:", vector_builder.AppendNull());
    } else {
      CHECK_ARROW_STATUS("Append vector failed:", vector_builder.Append(data));
    }
  }
  auto vector_array_ret = vector_builder.Finish();
  if (!vector_array_ret.ok()) {
    return tl::make_unexpected(Status::InternalError(
        "finish vector builder failed:", vector_array_ret.status().ToString()));
  }
  return vector_array_ret.MoveValueUnsafe();
}

Result<std::shared_ptr<arrow::Array>> FetchVectorOp::fetch_dense_vector(
    const Segment &segment, const std::string &vector_name,
    const std::vector<int> &rows) {
  auto indexer = segment.get_combined_vector_indexer(vector_name);
  if (!indexer) {
    return tl::make_unexpected(
        Status::InvalidArgument("vector index not found:", vector_name));
  }
  return fetch_dense_vector_helper(indexer.get(), rows.begin(), rows.end());
}


template <typename Iter>
Result<std::shared_ptr<arrow::Array>> fetch_sparse_vector_helper(
    const CombinedVectorColumnIndexer *indexer, Iter begin, Iter end) {
  size_t count = std::distance(begin, end);
  std::unique_ptr<arrow::StructBuilder> sparse_builder;
  arrow::BinaryBuilder *sparse_index_builder = nullptr;
  arrow::BinaryBuilder *sparse_value_builder = nullptr;
  auto array_builder = arrow::MakeBuilder(Util::sparse_type());
  if (!array_builder.ok()) {
    return tl::make_unexpected(Status::InternalError(
        "make builder failed:", array_builder.status().ToString()));
  }
  sparse_builder.reset(dynamic_cast<arrow::StructBuilder *>(
      array_builder.ValueUnsafe().release()));
  sparse_index_builder =
      dynamic_cast<arrow::BinaryBuilder *>(sparse_builder->field_builder(0));
  sparse_value_builder =
      dynamic_cast<arrow::BinaryBuilder *>(sparse_builder->field_builder(1));

  CHECK_ARROW_STATUS("Reserve failed:", sparse_builder->Reserve(count));
  CHECK_ARROW_STATUS("Reserve failed:", sparse_index_builder->Reserve(count));
  CHECK_ARROW_STATUS("Reserve failed:", sparse_value_builder->Reserve(count));
  for (auto i = begin; i != end; i++) {
    auto vector_res = indexer->Fetch(*i);
    if (!vector_res) {
      return tl::make_unexpected(vector_res.error());
    }
    const auto &data = std::get<vector_column_params::SparseVectorBuffer>(
        vector_res.value().vector_buffer);
    if (data.indices.empty()) {
      // will auto append to sub builder
      CHECK_ARROW_STATUS("Append failed", sparse_builder->AppendNull());
    } else {
      CHECK_ARROW_STATUS("Append failed", sparse_builder->Append(true));
      CHECK_ARROW_STATUS("Append failed",
                         sparse_index_builder->Append(data.indices));
      CHECK_ARROW_STATUS("Append failed",
                         sparse_value_builder->Append(data.values));
    }
  }

  auto vector_array_ret = sparse_builder->Finish();
  if (!vector_array_ret.ok()) {
    return tl::make_unexpected(Status::InternalError(
        "finish vector builder failed:", vector_array_ret.status().ToString()));
  }
  return vector_array_ret.MoveValueUnsafe();
}

Result<std::shared_ptr<arrow::Array>> FetchVectorOp::fetch_sparse_vector(
    const Segment &segment, const std::string &vector_name,
    const std::vector<int> &rows) {
  auto indexer = segment.get_combined_vector_indexer(vector_name);
  if (!indexer) {
    return tl::make_unexpected(
        Status::InvalidArgument("vector index not found:", vector_name));
  }
  return fetch_sparse_vector_helper(indexer.get(), rows.begin(), rows.end());
}

std::unique_ptr<cp::FunctionOptions> FetchVectorOp::FunctionOptionsType::Copy(
    const cp::FunctionOptions &) const {
  return std::make_unique<FetchVectorFunctionOptions>();
}

arrow::Status FetchVectorOp::FetchVectorFunction(cp::KernelContext *ctx,
                                                 const cp::ExecSpan &batch,
                                                 cp::ExecResult *out) {
  FetchVectorState *state = static_cast<FetchVectorState *>(ctx->state());
  if (state->args.indexer == nullptr) {
    return arrow::Status::ExecutionError("indexer is null");
  }

  auto row_span = batch[0].array.GetSpan<uint64_t>(1, batch.length);
  Result<std::shared_ptr<arrow::Array>> res;
  if (state->args.is_dense) {
    res = fetch_dense_vector_helper(state->args.indexer.get(), row_span.begin(),
                                    row_span.end());
  } else {
    res = fetch_sparse_vector_helper(state->args.indexer.get(),
                                     row_span.begin(), row_span.end());
  }
  if (!res) {
    return arrow::Status::ExecutionError("fetch vector failed:",
                                         res.error().c_str());
  }

  out->value = std::move(res.value()->data());
  return arrow::Status::OK();
}

arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
FetchVectorOp::InitExprValue(arrow::compute::KernelContext *,
                             const arrow::compute::KernelInitArgs &args) {
  auto func_options = static_cast<const FetchVectorOp::Options *>(args.options);
  return std::make_unique<FetchVectorOp::FetchVectorState>(
      func_options ? func_options : nullptr);
}


arrow::Status FetchVectorOp::register_op() {
  static Options options = Options::Defaults();
  {
    const std::string name = "fetch_vector";
    auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(),
                                                     func_doc, &options, false);
    cp::ScalarKernel kernel({arrow::uint64()}, arrow::binary(),
                            FetchVectorFunction, InitExprValue);

    kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
    kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;

    ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));

    auto registry = cp::GetFunctionRegistry();
    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));
  }

  {
    const std::string name = "fetch_sparse_vector";
    auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(),
                                                     func_doc, &options, false);
    cp::ScalarKernel kernel({arrow::uint64()}, Util::sparse_type(),
                            FetchVectorFunction, InitExprValue);

    kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
    kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;

    ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));

    auto registry = cp::GetFunctionRegistry();
    ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));
  }

  return arrow::Status::OK();
}


}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/ops/fetch_vector_op.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <arrow/api.h>
#include <zvec/db/status.h>
#include "db/index/column/vector_column/combined_vector_column_indexer.h"
#include "db/index/segment/segment.h"

namespace zvec::sqlengine {

namespace cp = arrow::compute;

template <typename OptionsType>
struct OptionsWrapper : public cp::KernelState {
  explicit OptionsWrapper(OptionsType options) : options(std::move(options)) {}

  static arrow::Result<std::unique_ptr<KernelState>> Init(
      cp::KernelContext * /*ctx*/, const cp::KernelInitArgs &args) {
    if (auto options = static_cast<const OptionsType *>(args.options)) {
      return std::make_unique<OptionsWrapper>(*options);
    }

    return arrow::Status::Invalid(
        "Attempted to initialize KernelState from null FunctionOptions");
  }

  static const OptionsType &Get(const KernelState &state) {
    return ::arrow::internal::checked_cast<const OptionsWrapper &>(state)
        .options;
  }

  static const OptionsType &Get(cp::KernelContext *ctx) {
    return Get(*ctx->state());
  }

  OptionsType options;
};

class FetchVectorOp {
 public:
  static Result<std::shared_ptr<arrow::Array>> fetch_dense_vector(
      const Segment &segment, const std::string &vector_name,
      const std::vector<int> &rows);

  static Result<std::shared_ptr<arrow::Array>> fetch_sparse_vector(
      const Segment &segment, const std::string &vector_name,
      const std::vector<int> &rows);

  static arrow::Status register_op();

  class FunctionOptionsType : public cp::FunctionOptionsType {
    const char *type_name() const override {
      return "FetchVectorFunctionOptionsType";
    }

    std::string Stringify(const cp::FunctionOptions &) const override {
      return "FetchVectorFunctionOptionsType";
    }

    bool Compare(const cp::FunctionOptions &,
                 const cp::FunctionOptions &) const override {
      return false;
    }

    std::unique_ptr<cp::FunctionOptions> Copy(
        const cp::FunctionOptions &) const override;
    // optional: support for serialization
    // Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const
    // override; Result<std::unique_ptr<FunctionOptions>> Deserialize(const
    // Buffer&) const override;
  };

  static cp::FunctionOptionsType *GetFetchVectorFunctionOptionsType() {
    static FunctionOptionsType options_type;
    return &options_type;
  }

  class FetchVectorFunctionOptions : public cp::FunctionOptions {
   public:
    FetchVectorFunctionOptions()
        : cp::FunctionOptions(GetFetchVectorFunctionOptionsType()) {}
  };

  class FetchVectorOptionsType : public arrow::compute::FunctionOptionsType {
   public:
    static const arrow::compute::FunctionOptionsType *GetInstance() {
      static std::unique_ptr<arrow::compute::FunctionOptionsType> instance(
          new FetchVectorOptionsType());
      return instance.get();
    }

    const char *type_name() const override {
      return "FetchVector";
    }

    std::string Stringify(
        const arrow::compute::FunctionOptions & /*options*/) const override {
      return type_name();
    }

    bool Compare(const arrow::compute::FunctionOptions &options,
                 const arrow::compute::FunctionOptions &other) const override {
      const auto &lop = static_cast<const Options &>(options);
      const auto &rop = static_cast<const Options &>(other);
      return lop.args.is_dense == rop.args.is_dense &&
             lop.args.indexer == rop.args.indexer;
    }

    std::unique_ptr<arrow::compute::FunctionOptions> Copy(
        const arrow::compute::FunctionOptions &options) const override {
      const auto &opts = static_cast<const Options &>(options);
      return std::make_unique<Options>(opts.args.indexer, opts.args.is_dense);
    }
  };

  struct Args {
    CombinedVectorColumnIndexer::Ptr indexer;
    bool is_dense{true};
  };

  class Options : public cp::FunctionOptions {
   public:
    Options() : Options(nullptr, true) {}

    Options(CombinedVectorColumnIndexer::Ptr indexer, bool is_dense)
        : cp::FunctionOptions(FetchVectorOptionsType::GetInstance()),
          args{indexer, is_dense} {}

    static inline constexpr char const kTypeName[] =
        "FetchVectorFunctionOptions";

    static Options Defaults() {
      return Options();
    }

    Args get_args() const {
      return args;
    }

    Args args;
  };

  struct FetchVectorState : public arrow::compute::KernelState {
    Args args;

    explicit FetchVectorState(const Options *o) {
      if (o) {
        args = o->get_args();
      }
    }
  };


  static arrow::Status FetchVectorFunction(cp::KernelContext *ctx,
                                           const cp::ExecSpan &batch,
                                           cp::ExecResult *out);

  static inline const cp::FunctionDoc func_doc{
      "fetch dense or sparse vector",
      "returns fetch_vector(x)",
      {"segment_row_id"},
      "Options",
  };

  static arrow::Result<std::unique_ptr<arrow::compute::KernelState>>
  InitExprValue(arrow::compute::KernelContext *,
                const arrow::compute::KernelInitArgs &args);
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/optimizer.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "optimizer.h"
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/config.h>
#include <zvec/db/type.h>
#include "db/sqlengine/analyzer/query_info_helper.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/planner/invert_search.h"

namespace zvec::sqlengine {


Optimizer::Ptr InvertCondOptimizer::CreateInvertCondOptimizer(
    CollectionSchema *collection_schema) {
  return std::make_shared<InvertCondOptimizer>(collection_schema);
}

// return true if invert cond should be convert to forward cond
bool InvertCondOptimizer::ratio_rule(Segment *segment,
                                     QueryRelNode *invert_cond) {
  if (invert_cond == nullptr) {
    return false;
  }

  if (invert_cond->op() == QueryNodeOp::Q_LIKE ||
      invert_cond->op() == QueryNodeOp::Q_IN ||
      invert_cond->op() == QueryNodeOp::Q_CONTAIN_ANY ||
      invert_cond->op() == QueryNodeOp::Q_CONTAIN_ALL ||
      invert_cond->op() == QueryNodeOp::Q_EQ ||
      invert_cond->op() == QueryNodeOp::Q_NE) {
    return false;
  }

  const QueryNode::Ptr &left = invert_cond->left();

  const std::string column_name = left->text();
  auto invert_column_reader = segment->get_scalar_indexer(column_name);
  if (invert_column_reader == nullptr) {
    LOG_ERROR("Get invert column reader failed. invert_cond [%s]",
              invert_cond->text().c_str());
    return false;
  }

  CompareOp oper = InvertSearch::query_nodeop2search_op(invert_cond->op());
  if (oper == CompareOp::NONE) {
    LOG_ERROR("Optimizer get search operator failed. invert_cond [%s]",
              invert_cond->text().c_str());
    return false;
  }

  std::string invert_term = invert_cond->right()->text();

  float invert_to_forward_scan_ratio =
      GlobalConfig::Instance().invert_to_forward_scan_ratio();

  uint64_t total_size = 0;
  uint64_t range_size = 0;
  Status status = invert_column_reader->evaluate_ratio(
      invert_term, oper, &total_size, &range_size);
  if (!status.ok()) {
    LOG_WARN("Optimizer evaluate failed. invert_cond [%s] err[%s]",
             invert_cond->text().c_str(), status.c_str());
    return false;
  }

  float ratio = 0.0;
  if (total_size > 0) {
    ratio = (range_size * 1.0) / total_size;
  }

  if (ratio < invert_to_forward_scan_ratio) {
    return false;
  }

  LOG_DEBUG(
      "Optimizer evaluate result reach threshold. "
      "invert_cond [%s] total_size[%zu] range_size[%zu] ratio[%f]",
      invert_cond->text().c_str(), (size_t)total_size, (size_t)range_size,
      ratio);

  return true;
}

// return true if ratio rule return false, meaning invert cond no need to be
// optimized by ratio rule and still keep as invert cond is
bool InvertCondOptimizer::invert_rule(Segment *segment,
                                      QueryRelNode *invert_cond) {
  return !ratio_rule(segment, invert_cond);
}

void InvertCondOptimizer::convert_invert_cond_to_forward(
    QueryInfo *query_info, QueryNode *invert_cond) {
  if (invert_cond == nullptr) {
    return;
  }

  if (invert_cond->type() == QueryNode::QueryNodeType::REL_EXPR) {
    // convert invert cond to forward cond
    QueryRelNode *query_rel_node =
        reinterpret_cast<QueryRelNode *>(invert_cond);

    const QueryNode::Ptr &left = query_rel_node->left();
    const QueryNode::Ptr &right = query_rel_node->right();

    const std::string column_name = left->text();

    query_rel_node->set_forward();

    // 1. add column to forward field
    auto forward_field = collection_schema_->get_forward_field(column_name);
    DataType data_type = forward_field->element_data_type();
    // currently array invert field won't be converted to forward
    // bool is_array_type = forward_field->is_array_type();
    query_info->add_forward_filter_schema_ptr(column_name,
                                              std::move(forward_field));

    // 2. Revert numeric buf to numeric text
    std::string numeric_text{""};
    if (QueryInfoHelper::data_buf_2_text(right->text(), data_type,
                                         &numeric_text)) {
      right->set_text(numeric_text);
    }

    return;
  }

  convert_invert_cond_to_forward(query_info, invert_cond->left().get());
  convert_invert_cond_to_forward(query_info, invert_cond->right().get());
}


void InvertCondOptimizer::check_node_except_subroot(QueryNode *invert_cond,
                                                    QueryNode *invert_subroot,
                                                    bool *rest_has_invert) {
  if (invert_cond == nullptr) {
    return;
  }

  // skip subroot
  if (invert_subroot != nullptr && invert_cond == invert_subroot) {
    return;
  }

  if (invert_cond->type() == QueryNode::QueryNodeType::REL_EXPR) {
    QueryRelNode *query_rel_node =
        reinterpret_cast<QueryRelNode *>(invert_cond);
    if (query_rel_node->rule_result()) {
      *rest_has_invert = true;
    }
    return;
  }

  check_node_except_subroot(invert_cond->left().get(), invert_subroot,
                            rest_has_invert);
  if (*rest_has_invert) {
    return;
  }
  check_node_except_subroot(invert_cond->right().get(), invert_subroot,
                            rest_has_invert);
}

bool InvertCondOptimizer::apply_optimize_result(QueryInfo *query_info,
                                                QueryNode *invert_subroot) {
  // case 1. invert subroot same as invert cond, do nothing
  if (invert_subroot == query_info->invert_cond().get()) {
    LOG_DEBUG("No need to move to forward, invert conds are all eligable. ");
    return false;
  }

  // case 2. invert subroot is not found
  if (invert_subroot == nullptr) {
    // That invert_subroot is nullptr may means different scenarios,
    // 1. All invert conditions should be converted to forward condition
    // according to optimize rule.
    // 2. Some invert condition should be converted to forward, which result in
    // left invert conditions are not able to be invert condition any more, eg:
    // A or B B won't be invert cond after A converted to forward. We need only
    // to optimize scenario 1, and leave scenario 2 untouched. Achieve the check
    // also by check_node_except_subroot same as in case 3.

    bool rest_has_invert = false;
    check_node_except_subroot(query_info->invert_cond().get(), nullptr,
                              &rest_has_invert);
    if (rest_has_invert) {
      LOG_DEBUG(
          "invert_subroot is not found, but failed in "
          "check_node_except_subroot");
      return false;
    }

    QueryNode::Ptr subroot_ptr = query_info->invert_cond();

    query_info->set_invert_cond(nullptr);

    // convert invert cond to forward cond
    convert_invert_cond_to_forward(query_info, subroot_ptr.get());

    // move to forward cond
    if (query_info->filter_cond() == nullptr) {
      query_info->set_filter_cond(std::move(subroot_ptr));
    } else {
      QueryNode::Ptr filter_node = std::make_shared<QueryNode>();
      filter_node->set_op(QueryNodeOp::Q_AND);
      filter_node->set_left(query_info->filter_cond());
      filter_node->set_right(std::move(subroot_ptr));
      query_info->set_filter_cond(std::move(filter_node));
    }

    LOG_DEBUG("All invert conds moved to forward cond. forward conds [%s]",
              query_info->filter_cond()->text().c_str());

    return true;
  }

  // case 3. subroot is found and be part of invert cond
  LOG_DEBUG(
      "find invert_subroot in invert cond. "
      "invert cond [%s] and invert_subroot [%s]. ",
      query_info->invert_cond()->text().c_str(),
      invert_subroot->text().c_str());

  // If other nodes outside invert subroot still be invert cond,
  // these nodes should not be convert to forward cond. Not to optimize.
  bool rest_has_invert = false;
  check_node_except_subroot(query_info->invert_cond().get(), invert_subroot,
                            &rest_has_invert);
  if (rest_has_invert) {
    LOG_DEBUG("invert_subroot failed in check_node_except_subroot");
    return false;
  }

  QueryNode::Ptr invert_subroot_ptr =
      invert_subroot->detach_from_invert_cond(query_info);

  QueryNode::Ptr invert2forward = query_info->invert_cond();

  // convert rest of invert cond to forward cond
  convert_invert_cond_to_forward(query_info, invert2forward.get());

  // move to forward cond
  if (query_info->filter_cond() == nullptr) {
    query_info->set_filter_cond(std::move(invert2forward));
  } else {
    QueryNode::Ptr filter_node = std::make_shared<QueryNode>();
    filter_node->set_op(QueryNodeOp::Q_AND);
    filter_node->set_left(query_info->filter_cond());
    filter_node->set_right(std::move(invert2forward));
    query_info->set_filter_cond(std::move(filter_node));
  }

  // set subroot as invert cond
  query_info->set_invert_cond(std::move(invert_subroot_ptr));

  LOG_DEBUG("Optimized. forward cond [%s], invert cond [%s]. ",
            query_info->filter_cond()->text().c_str(),
            query_info->invert_cond()->text().c_str());

  return true;
}

bool InvertCondOptimizer::optimize(Segment *segment, QueryInfo *query_info) {
  auto invert_cond = query_info->invert_cond();
  // TODO: check if support optimize for mutable
  if (invert_cond == nullptr) {
    return false;
  }

  // find invert subroot after considering ratio rule,
  // specifically, which invert cond subroot is still eligable.
  SubRootResult invert_subroot;
  std::function<bool(QueryRelNode * node)> rule = std::bind(
      &InvertCondOptimizer::invert_rule, this, segment, std::placeholders::_1);
  QueryInfoHelper::find_subroot_by_rule(invert_cond.get(), rule,
                                        &invert_subroot);

  return apply_optimize_result(query_info, invert_subroot.subroot);
  ;
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/optimizer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_info.h"

namespace zvec::sqlengine {

class Optimizer {
 public:
  using Ptr = std::shared_ptr<Optimizer>;

  virtual bool optimize(Segment *segment, QueryInfo *query_info) = 0;
};

class InvertCondOptimizer : public Optimizer {
 public:
  explicit InvertCondOptimizer(CollectionSchema *collection_schema)
      : collection_schema_(collection_schema) {}

  virtual ~InvertCondOptimizer() = default;

 public:
  static Optimizer::Ptr CreateInvertCondOptimizer(
      CollectionSchema *collection_schema);

 public:
  bool optimize(Segment *segment, QueryInfo *query_info) override;

 protected:
  virtual bool invert_rule(Segment *segment, QueryRelNode *invert_cond);

 private:
  bool ratio_rule(Segment *segment, QueryRelNode *invert_cond);

  bool apply_optimize_result(QueryInfo *query_info, QueryNode *invert_subroot);

  void convert_invert_cond_to_forward(QueryInfo *query_info,
                                      QueryNode *invert_cond);

  void check_node_except_subroot(QueryNode *invert_cond,
                                 QueryNode *invert_subroot,
                                 bool *rest_has_invert);

 private:
  CollectionSchema *collection_schema_{nullptr};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/plan_info.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "plan_info.h"
#include <arrow/api.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include "db/common/error_code.h"

namespace zvec::sqlengine {

Result<std::unique_ptr<arrow::RecordBatchReader>>
PlanInfo::execute_to_reader() {
  auto res = arrow::acero::DeclarationToReader(plan_);
  if (!res.ok()) {
    return tl::make_unexpected(Status::InternalError(
        "execute plan_info failed: ", res.status().ToString()));
  }
  return res.MoveValueUnsafe();
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/plan_info.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <list>
#include <memory>
#include <string>
#include <arrow/acero/api.h>
#include <arrow/acero/exec_plan.h>
#include <zvec/db/status.h>

namespace zvec::sqlengine {

class PlanInfo {
 public:
  using Ptr = std::shared_ptr<PlanInfo>;

  PlanInfo(arrow::acero::Declaration plan,
           std::shared_ptr<arrow::Schema> schema)
      : plan_(std::move(plan)), schema_(std::move(schema)) {}

  Result<std::unique_ptr<arrow::RecordBatchReader>> execute_to_reader();

  std::shared_ptr<arrow::Schema> schema() const {
    return schema_;
  }

 private:
  arrow::acero::Declaration plan_;
  std::shared_ptr<arrow::Schema> schema_;
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/query_planner.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "query_planner.h"
#include <memory>
#include <utility>
#include <vector>
#include <arrow/acero/api.h>
#include <arrow/compute/api.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"
#include "db/common/global_resource.h"
#include "db/sqlengine/analyzer/query_info.h"
#include "db/sqlengine/analyzer/query_node.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/planner/invert_recall_node.h"
#include "db/sqlengine/planner/ops/check_not_filtered_op.h"
#include "db/sqlengine/planner/ops/contain_op.h"
#include "db/sqlengine/planner/ops/fetch_vector_op.h"
#include "db/sqlengine/planner/plan_info.h"
#include "db/sqlengine/planner/segment_node.h"
#include "db/sqlengine/planner/vector_recall_node.h"
#include "optimizer.h"

namespace zvec::sqlengine {

namespace cp = ::arrow::compute;
namespace ac = ::arrow::acero;

QueryPlanner::QueryPlanner(CollectionSchema *schema) : schema_(schema) {}

template <typename T>
auto convert_node_to_value(const QueryNode::Ptr &node) {
  const std::string &str = node->text();
  T value;
  if constexpr (std::is_same_v<T, int32_t>) {
    ailego::StringHelper::ToInt32(str, &value);
  } else if constexpr (std::is_same_v<T, int64_t>) {
    ailego::StringHelper::ToInt64(str, &value);
  } else if constexpr (std::is_same_v<T, uint32_t>) {
    ailego::StringHelper::ToUint32(str, &value);
  } else if constexpr (std::is_same_v<T, uint64_t>) {
    ailego::StringHelper::ToUint64(str, &value);
  } else if constexpr (std::is_same_v<T, float>) {
    ailego::StringHelper::ToFloat(str, &value);
  } else if constexpr (std::is_same_v<T, double>) {
    ailego::StringHelper::ToDouble(str, &value);
  } else {
    static_assert(!std::is_same_v<T, T>, "Unsupported type for conversion");
  }
  return value;
}


template <typename ArrowType>
arrow::Result<std::shared_ptr<arrow::Array>> to_arrow_array(
    const std::vector<QueryNode::Ptr> &input) {
  using CType = typename ArrowType::c_type;
  typename arrow::TypeTraits<ArrowType>::BuilderType builder;
  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));

  for (auto &s : input) {
    ARROW_RETURN_NOT_OK(builder.Append(convert_node_to_value<CType>(s)));
  }

  std::shared_ptr<arrow::Array> array;
  ARROW_RETURN_NOT_OK(builder.Finish(&array));
  return array;
}

arrow::Result<std::shared_ptr<arrow::Array>> to_arrow_string_array(
    const std::vector<QueryNode::Ptr> &input) {
  arrow::StringBuilder builder;
  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));

  for (auto &s : input) {
    ARROW_RETURN_NOT_OK(builder.Append(s->text()));
  }

  std::shared_ptr<arrow::Array> array;
  ARROW_RETURN_NOT_OK(builder.Finish(&array));
  return array;
}

arrow::Result<std::shared_ptr<arrow::Array>> to_arrow_bool_array(
    const std::vector<QueryNode::Ptr> &input) {
  arrow::BooleanBuilder builder;
  ARROW_RETURN_NOT_OK(builder.Reserve(input.size()));

  for (auto &s : input) {
    // input is normalized to "true" or "false"
    ARROW_RETURN_NOT_OK(builder.Append(s->text() == "true"));
  }

  std::shared_ptr<arrow::Array> array;
  ARROW_RETURN_NOT_OK(builder.Finish(&array));
  return array;
}

arrow::Result<std::shared_ptr<arrow::Array>> create_array_from_list_node(
    DataType data_type, const QueryListNode *list_node) {
  auto const &value_expr_list = list_node->value_expr_list();
  switch (data_type) {
    case DataType::INT32:
      return to_arrow_array<arrow::Int32Type>(value_expr_list);
    case DataType::UINT32:
      return to_arrow_array<arrow::UInt32Type>(value_expr_list);
    case DataType::INT64:
      return to_arrow_array<arrow::Int64Type>(value_expr_list);
    case DataType::UINT64:
      return to_arrow_array<arrow::UInt64Type>(value_expr_list);
    case DataType::FLOAT:
      return to_arrow_array<arrow::FloatType>(value_expr_list);
    case DataType::DOUBLE:
      return to_arrow_array<arrow::DoubleType>(value_expr_list);
    case DataType::STRING:
      return to_arrow_string_array(value_expr_list);
    case DataType::BOOL:
      return to_arrow_bool_array(value_expr_list);
    default:
      LOG_ERROR("Unsupported data type for list node. %d", (int)data_type);
      return arrow::Status::Invalid("Unsupported data type for list node.");
  }
}

Result<cp::Expression> QueryPlanner::create_filter_node(
    const QueryNode *query_node) {
  const QueryNode *left = query_node->left_node();
  const QueryNode *right = query_node->right_node();

  arrow::Expression left_exp;
  DataType data_type;
  if (left->op() == QueryNodeOp::Q_ID) {
    left_exp = cp::field_ref(left->text());
    auto field_schema = schema_->get_forward_field(left->text());
    data_type = field_schema->data_type();
  } else if (left->op() == QueryNodeOp::Q_FUNCTION_CALL) {
    const QueryFuncNode *func_node = dynamic_cast<const QueryFuncNode *>(left);
    const auto &func_name = func_node->get_func_name_node()->text();
    const auto &arguments = func_node->arguments();
    if (func_name == kFuncArrayLength) {
      left_exp =
          cp::call("list_value_length", {cp::field_ref(arguments[0]->text())});
      // assume array_length argument is uint32
      data_type = DataType::UINT32;
    } else {
      return tl::make_unexpected(
          Status::InvalidArgument("unexpected function call", func_name));
    }
  } else {
    LOG_ERROR("Unexpected left op. expr[%s]", query_node->text().c_str());
    return tl::make_unexpected(
        Status::InvalidArgument("unexpected left op", left->text()));
  }

  cp::Expression right_exp;
  const std::string &filter_value = right->text();
  auto op = query_node->op();
  if (op == QueryNodeOp::Q_IS_NULL) {
    return cp::is_null(std::move(left_exp));
  } else if (op == QueryNodeOp::Q_IS_NOT_NULL) {
    return cp::is_valid(std::move(left_exp));
  }

  // TODO: check invalid filter
  if (op == QueryNodeOp::Q_IN || op == QueryNodeOp::Q_CONTAIN_ALL ||
      op == QueryNodeOp::Q_CONTAIN_ANY) {
    const QueryListNode *list_node = dynamic_cast<const QueryListNode *>(right);
    auto array_res = create_array_from_list_node(
        FieldSchema::get_element_data_type(data_type), list_node);
    if (!array_res.ok()) {
      return tl::make_unexpected(Status::InvalidArgument(
          "create array failed", array_res.status().ToString()));
    }
    if (op == QueryNodeOp::Q_IN) {
      auto in_filter = cp::call(
          "is_in", {std::move(left_exp)},
          std::make_shared<cp::SetLookupOptions>(array_res.MoveValueUnsafe()));
      if (list_node->exclude()) {
        return cp::not_(std::move(in_filter));
      }
      return in_filter;
    }
    auto contain_filter =
        cp::call(op == QueryNodeOp::Q_CONTAIN_ALL ? kContainAll : kContainAny,
                 {std::move(left_exp)},
                 std::make_shared<ContainOp::Options>(
                     ContainOp::Args{array_res.MoveValueUnsafe(), data_type}));
    if (list_node->exclude()) {
      return cp::not_(std::move(contain_filter));
    }
    return contain_filter;
  }

  switch (data_type) {
    case DataType::STRING: {
      if (op == sqlengine::QueryNodeOp::Q_LIKE) {
        return cp::call("match_like", {std::move(left_exp)},
                        cp::MatchSubstringOptions(filter_value));
      } else {
        right_exp = cp::literal(filter_value);
      }
      break;
    }
    case DataType::INT32: {
      int32_t int32_value;
      ailego::StringHelper::ToInt32(filter_value, &int32_value);
      right_exp = cp::literal(int32_value);
      break;
    }
    case DataType::UINT32: {
      uint32_t uint32_value;
      ailego::StringHelper::ToUint32(filter_value, &uint32_value);
      right_exp = cp::literal(uint32_value);
      break;
    }
    case DataType::INT64: {
      int64_t int64_value;
      ailego::StringHelper::ToInt64(filter_value, &int64_value);
      right_exp = cp::literal(int64_value);
      break;
    }
    case DataType::UINT64: {
      uint64_t uint64_value;
      ailego::StringHelper::ToUint64(filter_value, &uint64_value);
      right_exp = cp::literal(uint64_value);
      break;
    }
    case DataType::FLOAT: {
      float float_value;
      ailego::StringHelper::ToFloat(filter_value, &float_value);
      right_exp = cp::literal(float_value);
      break;
    }
    case DataType::DOUBLE: {
      double double_value;
      ailego::StringHelper::ToDouble(filter_value, &double_value);
      right_exp = cp::literal(double_value);
      break;
    }
    case DataType::BOOL: {
      std::string lower_filter_value;
      lower_filter_value.resize(filter_value.size());
      bool bool_value;
      std::transform(filter_value.begin(), filter_value.end(),
                     lower_filter_value.begin(), ::tolower);
      if (lower_filter_value == "true") {
        bool_value = true;
      } else if (lower_filter_value == "false") {
        bool_value = false;
      } else {
        LOG_ERROR("Unrecognized bool value: %s", filter_value.c_str());
        return tl::make_unexpected(
            Status::InvalidArgument("unexpected bool value", filter_value));
      }
      right_exp = cp::literal(bool_value);
      break;
    }
    default: {
      LOG_ERROR("filter to data type is not supported.");
      return tl::make_unexpected(Status::InvalidArgument(
          "filter to data type is not supported", data_type));
      break;
    }
  }

  switch (op) {
    case sqlengine::QueryNodeOp::Q_EQ:
      return cp::equal(std::move(left_exp), std::move(right_exp));
    case sqlengine::QueryNodeOp::Q_NE:
      return cp::not_equal(std::move(left_exp), std::move(right_exp));
    case sqlengine::QueryNodeOp::Q_GT:
      return cp::greater(std::move(left_exp), std::move(right_exp));
    case sqlengine::QueryNodeOp::Q_LT:
      return cp::less(std::move(left_exp), std::move(right_exp));
    case sqlengine::QueryNodeOp::Q_GE:
      return cp::greater_equal(std::move(left_exp), std::move(right_exp));
    case sqlengine::QueryNodeOp::Q_LE:
      return cp::less_equal(std::move(left_exp), std::move(right_exp));
      // NOTE: Q_LIKE already handled above

    default:
      return tl::make_unexpected(Status::InvalidArgument("unexpected op", op));
      break;
  }
  return tl::make_unexpected(Status::InvalidArgument("unexpected op", op));
}

Result<cp::Expression> QueryPlanner::parse_filter(const QueryNode *query_node) {
  if (!query_node) {
    return cp::literal(true);
  }
  if (query_node->type() == QueryNode::QueryNodeType::REL_EXPR) {
    return create_filter_node(query_node);
  }
  if (query_node->type() == QueryNode::QueryNodeType::LOGIC_EXPR) {
    auto left = parse_filter(query_node->left_node());
    if (!left) {
      return left;
    }
    auto right = parse_filter(query_node->right_node());
    if (!right) {
      return right;
    }
    if (query_node->op() == QueryNodeOp::Q_AND) {
      return cp::and_(std::move(left.value()), std::move(right.value()));
    } else if (query_node->op() == QueryNodeOp::Q_OR) {
      return cp::or_(std::move(left.value()), std::move(right.value()));
    }
  }
  return tl::make_unexpected(
      Status::InvalidArgument("unexpected ", query_node->text()));
}


Result<PlanInfo::Ptr> QueryPlanner::make_plan(
    const std::vector<Segment::Ptr> &segments, const std::string &trace_id,
    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {
  // make logic plan from query_info
  // PlanInfo::Ptr logical_plan = make_logical_plan(query_info);

  // do logic optimization here

  // as we don't have logic optimization in a period of time,
  // simply make physical plan directly from query info
  return make_physical_plan(segments, trace_id, query_infos);
}

Result<PlanInfo::Ptr> QueryPlanner::make_physical_plan(
    const std::vector<Segment::Ptr> &segments, const std::string & /*trace_id*/,
    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {
  const std::string &table_name = schema_->name();
  if (segments.empty()) {
    LOG_ERROR("Segment not found [%s]", table_name.c_str());
    return tl::make_unexpected(
        Status::InvalidArgument("segment not found:", table_name));
  }

  QueryInfo *query_info = (*query_infos)[0].get();
  LOG_DEBUG("Making plan for collection[%s] query_info[%s]", table_name.c_str(),
            query_info->to_string().c_str());
  int topn = query_info->query_topn();
  auto vector_cond = query_info->vector_cond_info();
  bool has_group_by = query_info->group_by() != nullptr;

  // optimize plan by instrument query info condition, eg adjust invert cond
  Optimizer::Ptr optimizer =
      InvertCondOptimizer::CreateInvertCondOptimizer(schema_);
  int num_segments = segments.size();
  std::vector<PlanInfo::Ptr> segment_plans(segments.size());
  for (int idx = 0; idx < num_segments; ++idx) {
    auto &segment = segments[idx];
    auto &segment_query_info = (*query_infos)[idx];
    bool only_invert_before_opt =
        segment_query_info->invert_cond() != nullptr &&
        segment_query_info->filter_cond() == nullptr;
    if (optimizer) {
      // Optimize by change query info if needed.
      if (!optimizer->optimize(segment.get(), segment_query_info.get())) {
        LOG_DEBUG(
            "Not optimized. collection[%s] segment[%zu] "
            "segment_query_info[%s]",
            table_name.c_str(), (size_t)segment->id(),
            segment_query_info->to_string().c_str());
      } else {
        LOG_DEBUG(
            "Optimized. collection[%s] segment[%zu] segment_query_info[%s]",
            table_name.c_str(), (size_t)segment->id(),
            segment_query_info->to_string().c_str());
      }
    }
    bool only_forward_after_opt =
        segment_query_info->invert_cond() == nullptr &&
        segment_query_info->filter_cond() != nullptr;
    // if only invert cond before opt and only forward cond after opt,
    // single stage search should be performed as large ratio of docs match
    // with filter
    bool single_stage_search = only_invert_before_opt && only_forward_after_opt;
    std::unique_ptr<arrow::compute::Expression> forward_filter;
    if (query_info->filter_cond()) {
      auto filter = parse_filter(query_info->filter_cond().get());
      if (!filter) {
        LOG_ERROR("Parse filter failed: %s", filter.error().c_str());
        return tl::make_unexpected(filter.error());
      }
      forward_filter =
          std::make_unique<cp::Expression>(std::move(filter.value()));
    }

    Result<PlanInfo::Ptr> seg_plan;
    if (query_info->vector_cond_info()) {
      seg_plan = vector_scan(segment, std::move(segment_query_info),
                             std::move(forward_filter), single_stage_search);
    } else if (query_info->invert_cond()) {
      seg_plan = invert_scan(segment, std::move(segment_query_info),
                             std::move(forward_filter));
    } else {
      seg_plan = forward_scan(segment, std::move(segment_query_info),
                              std::move(forward_filter));
    }
    if (!seg_plan) {
      LOG_ERROR("Make plan failed: %s", seg_plan.error().c_str());
      return seg_plan;
    }
    if (segments.size() == 1) {
      return seg_plan;
    }
    segment_plans[idx] = std::move(seg_plan.value());
  }

  // multi segment logic
  ailego::ThreadPool *pool = GlobalResource::Instance().query_thread_pool();
  auto recall_node =
      std::make_shared<SegmentNode>(std::move(segment_plans), pool);
  auto source_node_options =
      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),
                                      arrow::compute::Ordering::Implicit()};
  ac::Declaration node{"source", source_node_options};

  if (vector_cond) {
    node = ac::Declaration{"order_by",
                           {std::move(node)},
                           ac::OrderByNodeOptions{cp::Ordering{{cp::SortKey{
                               kFieldScore, vector_cond->is_reverse_sort()
                                                ? cp::SortOrder::Descending
                                                : cp::SortOrder::Ascending}}}}};
  }

  // group by need to collect all docs
  if (!has_group_by) {
    node = ac::Declaration{
        "fetch", {std::move(node)}, ac::FetchNodeOptions{0, topn}};
  }
  return std::make_shared<PlanInfo>(std::move(node), recall_node->schema());
}

Result<PlanInfo::Ptr> QueryPlanner::forward_scan(
    Segment::Ptr seg, QueryInfo::Ptr query_info,
    std::unique_ptr<arrow::compute::Expression> forward_filter) {
  auto reader = seg->scan(query_info->get_all_fetched_scalar_field_names());
  auto schema = reader->schema();
  ac::Declaration node{
      "record_batch_reader_source",
      ac::RecordBatchReaderSourceNodeOptions{std::move(reader)}};

  auto seg_filter = seg->get_filter();
  if (seg_filter) {
    cp::Expression check_not_filtered =
        cp::call(kCheckNotFiltered, {cp::field_ref(LOCAL_ROW_ID)},
                 std::make_shared<CheckNotFilteredOp::Options>(seg_filter));
    node =
        ac::Declaration{"filter",
                        {std::move(node)},
                        ac::FilterNodeOptions(std::move(check_not_filtered))};
  }

  if (forward_filter) {
    node = ac::Declaration{"filter",
                           {std::move(node)},
                           ac::FilterNodeOptions(std::move(*forward_filter))};
  }

  if (query_info->is_include_vector()) {
    std::vector<cp::Expression> expressions;
    std::vector<std::string> names =
        query_info->get_all_fetched_scalar_field_names();
    for (const auto &field_name : names) {
      expressions.emplace_back(cp::field_ref(field_name));
    }
    for (const auto &vector_field : query_info->selected_vector_fields()) {
      auto indexer = seg->get_combined_vector_indexer(vector_field.field_name);
      if (!indexer) {
        return tl::make_unexpected(Status::InvalidArgument(
            "vector indexer not found:", vector_field.field_name));
      }
      if (vector_field.field_schema_ptr->is_dense_vector()) {
        expressions.emplace_back(
            cp::call("fetch_vector", {cp::field_ref(LOCAL_ROW_ID)},
                     std::make_shared<FetchVectorOp::Options>(indexer, true)));
        schema = Util::append_field(*schema, vector_field.field_name,
                                    arrow::binary());
      } else {
        expressions.emplace_back(
            cp::call("fetch_sparse_vector", {cp::field_ref(LOCAL_ROW_ID)},
                     std::make_shared<FetchVectorOp::Options>(indexer, false)));
        schema = Util::append_field(*schema, vector_field.field_name,
                                    Util::sparse_type());
      }
      names.emplace_back(vector_field.field_name);
    }
    node = ac::Declaration{
        "project",
        {std::move(node)},
        ac::ProjectNodeOptions{std::move(expressions), std::move(names)}};
  }

  node = ac::Declaration{"fetch",
                         {std::move(node)},
                         ac::FetchNodeOptions{0, query_info->query_topn()}};
  return std::make_shared<PlanInfo>(std::move(node), std::move(schema));
}

Result<PlanInfo::Ptr> QueryPlanner::vector_scan(
    Segment::Ptr seg, QueryInfo::Ptr query_info,
    std::unique_ptr<arrow::compute::Expression> forward_filter,
    bool single_stage_search) {
  std::unique_ptr<ac::Declaration> forward_filter_plan;
  // if single stage search is not enabled, first run acero plan to get
  // forward bitmap, then filter during vector search. otherwise, filter
  // forward during forward search.
  if (forward_filter && !single_stage_search) {
    ac::RecordBatchReaderSourceNodeOptions source_options{
        seg->scan(query_info->get_forward_filter_field_names())};
    forward_filter_plan.reset(new ac::Declaration{ac::Declaration::Sequence({
        {"record_batch_reader_source", std::move(source_options)},
        {
            "project",
            ac::ProjectNodeOptions{{std::move(*forward_filter)},
                                   {kFieldIsValid}},
        },
    })});
    forward_filter.reset();
  }
  auto doc_filter = std::make_shared<DocFilter>(seg, query_info,
                                                std::move(forward_filter_plan),
                                                std::move(forward_filter));

  int topn = query_info->query_topn();
  int batch_size = get_batch_size(*query_info, false);
  auto recall_node = std::make_shared<VectorRecallNode>(
      std::move(seg), std::move(query_info), std::move(doc_filter), batch_size,
      single_stage_search);

  auto source_node_options =
      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),
                                      arrow::compute::Ordering::Implicit()};
  ac::Declaration node{"source", source_node_options};
  // group by need to collect all docs
  if (!recall_node->query_info()->group_by()) {
    node = ac::Declaration{
        "fetch", {std::move(node)}, ac::FetchNodeOptions{0, topn}};
  }
  return std::make_shared<PlanInfo>(std::move(node), recall_node->schema());
}

Result<PlanInfo::Ptr> QueryPlanner::invert_scan(
    Segment::Ptr seg, QueryInfo::Ptr query_info,
    std::unique_ptr<arrow::compute::Expression> forward_filter) {
  auto topn = query_info->query_topn();
  int batch_size = get_batch_size(*query_info, forward_filter != nullptr);
  auto recall_node =
      std::make_shared<InvertRecallNode>(seg, query_info, batch_size);

  auto source_node_options =
      arrow::acero::SourceNodeOptions{recall_node->schema(), recall_node->gen(),
                                      arrow::compute::Ordering::Implicit()};
  ac::Declaration node{"source", source_node_options};
  if (forward_filter) {
    node = ac::Declaration{"filter",
                           {std::move(node)},
                           ac::FilterNodeOptions(std::move(*forward_filter))};
  }

  auto schema = recall_node->schema();
  if (query_info->is_include_vector()) {
    std::vector<cp::Expression> expressions;
    std::vector<std::string> names =
        query_info->get_all_fetched_scalar_field_names();
    for (const auto &field_name : names) {
      expressions.emplace_back(cp::field_ref(field_name));
    }
    for (const auto &vector_field : query_info->selected_vector_fields()) {
      auto indexer = seg->get_combined_vector_indexer(vector_field.field_name);
      if (!indexer) {
        return tl::make_unexpected(Status::InvalidArgument(
            "vector indexer not found:", vector_field.field_name));
      }
      if (vector_field.field_schema_ptr->is_dense_vector()) {
        expressions.emplace_back(
            cp::call("fetch_vector", {cp::field_ref(LOCAL_ROW_ID)},
                     std::make_shared<FetchVectorOp::Options>(indexer, true)));
        schema = Util::append_field(*schema, vector_field.field_name,
                                    arrow::binary());
      } else {
        expressions.emplace_back(
            cp::call("fetch_sparse_vector", {cp::field_ref(LOCAL_ROW_ID)},
                     std::make_shared<FetchVectorOp::Options>(indexer, false)));
        schema = Util::append_field(*schema, vector_field.field_name,
                                    Util::sparse_type());
      }
      names.emplace_back(vector_field.field_name);
    }
    node = ac::Declaration{
        "project",
        {std::move(node)},
        ac::ProjectNodeOptions{std::move(expressions), std::move(names)}};
  }

  node = ac::Declaration{
      "fetch", {std::move(node)}, ac::FetchNodeOptions{0, topn}};
  return std::make_shared<PlanInfo>(std::move(node), std::move(schema));
}

int QueryPlanner::get_batch_size(const QueryInfo &info, bool has_later_filter) {
  // ref https://arrow.apache.org/docs/developers/cpp/acero.html#batch-size
  if (!info.query_orderbys().empty() || has_later_filter) {
    return 32 * 1024;
  }
  return std::min(info.query_topn(), 32U * 1024);
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/query_planner.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <arrow/acero/exec_plan.h>
#include <arrow/compute/expression.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/db/status.h>
#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_info.h"
#include "plan_info.h"

namespace zvec::sqlengine {

class QueryPlanner {
 public:
  QueryPlanner(CollectionSchema *schema);

  Result<PlanInfo::Ptr> make_plan(
      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,
      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);


 private:
  Result<PlanInfo::Ptr> make_physical_plan(
      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,
      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);

  Result<PlanInfo::Ptr> make_group_by_physical_plan(
      const std::vector<Segment::Ptr> &segments, const std::string &trace_id,
      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);

 private:
  Result<cp::Expression> parse_filter(const QueryNode *node);

  Result<cp::Expression> create_filter_node(const QueryNode *node);

  Result<PlanInfo::Ptr> vector_scan(
      Segment::Ptr seg, QueryInfo::Ptr query_info,
      std::unique_ptr<arrow::compute::Expression> forward_filter,
      bool single_stage_search);
  Result<PlanInfo::Ptr> invert_scan(
      Segment::Ptr seg, QueryInfo::Ptr query_info,
      std::unique_ptr<arrow::compute::Expression> forward_filter);
  Result<PlanInfo::Ptr> forward_scan(
      Segment::Ptr seg, QueryInfo::Ptr query_info,
      std::unique_ptr<arrow::compute::Expression> forward_filter);

  static int get_batch_size(const QueryInfo &info, bool has_later_filter);

 private:
  CollectionSchema *schema_{nullptr};
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/planner/segment_node.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/planner/segment_node.h"
#include <memory>
#include <optional>
#include <arrow/record_batch.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/parallel/thread_pool.h>

namespace zvec::sqlengine {

namespace cp = arrow::compute;

arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>>
SegmentNode::gen() {
  return [self = shared_from_this()]()
             -> arrow::Future<std::optional<arrow::compute::ExecBatch>> {
    if (!self->prepared_.exchange(true)) {
      auto status = self->prepare();
      if (!status.ok()) {
        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
            arrow::Status::ExecutionError("prepare segment node failed:",
                                          status.c_str()));
      }
    }
    // process backward
    std::shared_ptr<arrow::RecordBatch> batch;
    while (!self->readers_.empty()) {
      auto &back = self->readers_.back();
      auto status = back->ReadNext(&batch);
      if (!status.ok()) {
        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
            arrow::Status::ExecutionError("read next batch failed:",
                                          status.ToString()));
      }
      if (batch == nullptr) {
        LOG_DEBUG("batch finished: %p", back.get());
        self->readers_.pop_back();
        continue;
      }
      LOG_INFO("Segment batch: %p %s", back.get(), batch->ToString().c_str());
      cp::ExecBatch exec_batch(*batch);
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          std::move(exec_batch));
    };
    // 返回空的optional表示结束
    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
        std::nullopt);
  };
}

Status SegmentNode::prepare() {
  auto group = thread_pool_->make_group();

  std::vector<Result<std::unique_ptr<arrow::RecordBatchReader>>> results_;
  results_.resize(segment_plans_.size());
  for (size_t i = 0; i < segment_plans_.size(); i++) {
    auto &plan = segment_plans_[i];
    group->execute([&, i]() { results_[i] = plan->execute_to_reader(); });
  }
  group->wait_finish();
  for (size_t i = 0; i < segment_plans_.size(); i++) {
    auto &result = results_[i];
    if (!result) {
      return result.error();
    }
    readers_[i] = std::move(result.value());
  }
  return Status::OK();
}


}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/segment_node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <memory>
#include <optional>
#include <arrow/acero/api.h>
#include <arrow/api.h>
#include <arrow/util/async_generator.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/db/status.h>
#include "db/sqlengine/planner/plan_info.h"

namespace zvec::sqlengine {

class SegmentNode : public std::enable_shared_from_this<SegmentNode> {
 public:
  SegmentNode(std::vector<PlanInfo::Ptr> segment_plans,
              ailego::ThreadPool *thread_pool)
      : segment_plans_(std::move(segment_plans)),
        thread_pool_(thread_pool),
        readers_(segment_plans_.size()) {}

  //! get schema
  std::shared_ptr<arrow::Schema> schema() const {
    return segment_plans_[0]->schema();
  }

  arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>> gen();

 private:
  Status prepare();

 private:
  std::vector<PlanInfo::Ptr> segment_plans_;
  ailego::ThreadPool *thread_pool_;

  std::vector<std::unique_ptr<arrow::RecordBatchReader>> readers_;
  std::atomic_bool prepared_{false};
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/vector_recall_node.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/planner/vector_recall_node.h"
#include <cstdint>
#include <memory>
#include <string>
#include <arrow/array/builder_binary.h>
#include <arrow/result.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/core/framework/index_meta.h>
#include <zvec/db/index_params.h>
#include <zvec/db/schema.h>
#include <zvec/db/type.h>
#include "db/index/column/vector_column/vector_column_params.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/planner/ops/fetch_vector_op.h"

namespace zvec::sqlengine {

VectorRecallNode::VectorRecallNode(Segment::Ptr segment,
                                   QueryInfo::Ptr query_info,
                                   DocFilter::Ptr doc_filter, int batch_size,
                                   bool single_stage_search)
    : segment_(std::move(segment)),
      query_info_(std::move(query_info)),
      doc_filter_(doc_filter),
      batch_size_(batch_size),
      // need fetch filter fields if single stage search, otherwise only fetch
      // selectd scalar fields, as forward filter is already performed and order
      // by only support vector score
      fetched_columns_(single_stage_search
                           ? query_info_->get_all_fetched_scalar_field_names()
                           : query_info_->get_selected_scalar_field_names()) {
  auto table = segment_->fetch(fetched_columns_, std::vector<int>{});
  schema_ = table->schema();
  schema_ = Util::append_field(*schema_, kFieldScore, arrow::float32());
  if (query_info_->is_include_vector()) {
    for (auto &field : query_info_->selected_vector_fields()) {
      if (field.field_schema_ptr->is_dense_vector()) {
        schema_ =
            Util::append_field(*schema_, field.field_name, arrow::binary());
      } else {
        schema_ =
            Util::append_field(*schema_, field.field_name, Util::sparse_type());
      }
    }
  }
  if (query_info_->group_by()) {
    schema_ = Util::append_field(*schema_, kFieldGroupId, arrow::utf8());
  }
}

arrow::AsyncGenerator<std::optional<cp::ExecBatch>> VectorRecallNode::gen() {
  auto state_ptr = std::make_shared<State>(shared_from_this());
  return [state_ptr = std::move(state_ptr)]() mutable
         -> arrow::Future<std::optional<cp::ExecBatch>> {
    auto &state = *state_ptr;
    if (!state.iter_) {
      auto vector_ret = state.self_->prepare();
      if (!vector_ret) {
        return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
            arrow::Status::ExecutionError("prepare vector failed:",
                                          vector_ret.error().c_str()));
      }
      state.vector_result_ = vector_ret.value();
      state.iter_ = state.vector_result_->create_iterator();
    }

    // check if there is any data
    if (!state.iter_->valid()) {
      // return empty optional to indicate end
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          std::nullopt);
    }

    auto record_batch = state.collect_batch();
    if (!record_batch.ok()) {
      return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
          arrow::Status::ExecutionError("collect batch failed:",
                                        record_batch.status().ToString()));
    }
    cp::ExecBatch exec_batch(*record_batch.ValueOrDie());
    return arrow::Future<std::optional<cp::ExecBatch>>::MakeFinished(
        std::move(exec_batch));
  };
}

std::string decode_group_id_from_forward(const FieldSchema *schema,
                                         const arrow::Array &array) {
  if (array.IsNull(0)) {
    return "";
  }
  switch (schema->data_type()) {
    case DataType::INT32:
      return std::to_string(
          static_cast<const arrow::Int32Array &>(array).Value(0));
    case DataType::UINT32:
      return std::to_string(
          static_cast<const arrow::UInt32Array &>(array).Value(0));
    case DataType::INT64:
      return std::to_string(
          static_cast<const arrow::Int64Array &>(array).Value(0));
    case DataType::UINT64:
      return std::to_string(
          static_cast<const arrow::UInt64Array &>(array).Value(0));
    case DataType::STRING:
      return static_cast<const arrow::StringArray &>(array).GetString(0);
    case DataType::FLOAT:
      return std::to_string(
          static_cast<const arrow::FloatArray &>(array).Value(0));
    case DataType::DOUBLE:
      return std::to_string(
          static_cast<const arrow::DoubleArray &>(array).Value(0));
    case DataType::BOOL:
      return static_cast<const arrow::BooleanArray &>(array).Value(0) ? "true"
                                                                      : "false";
    default:
      LOG_ERROR("Unsupported data type: %d", (int)schema->data_type());
      return "";
  }
}

Result<IndexResults::Ptr> VectorRecallNode::prepare() {
  auto filter_status = doc_filter_->compute_filter();
  if (!filter_status.ok()) {
    return tl::make_unexpected(filter_status);
  }
  auto &vector_cond_ = query_info_->vector_cond_info();
  CombinedVectorColumnIndexer::Ptr vector_indexer;
  if (auto *vector_params = dynamic_cast<const VectorIndexParams *>(
          vector_cond_->vector_schema()->index_params().get());
      vector_params == nullptr ||
      vector_params->quantize_type() == QuantizeType::UNDEFINED) {
    vector_indexer = segment_->get_combined_vector_indexer(
        vector_cond_->vector_field_name());
  } else {
    vector_indexer = segment_->get_quant_combined_vector_indexer(
        vector_cond_->vector_field_name());
  }
  if (!vector_indexer) {
    return tl::make_unexpected(Status::InvalidArgument(
        "vector index not found:", vector_cond_->vector_field_name()));
  }
  vector_column_params::QueryParams query_params;
  query_params.topk = query_info_->query_topn();
  query_params.data_type = vector_cond_->vector_schema()->data_type();
  query_params.dimension = vector_cond_->dimension();
  query_params.query_params = vector_cond_->query_params();
  auto brute_force_keys = doc_filter_->get_bf_by_keys_and_update();
  if (brute_force_keys) {
    query_params.bf_pks.emplace_back(std::move(brute_force_keys.value()));
  }
  // set filter after brute force check
  query_params.filter = doc_filter_->empty() ? nullptr : doc_filter_.get();
  if (const auto &group_by = query_info_->group_by(); group_by) {
    auto group_fun = [this, &group_by](uint64_t row_id) -> std::string {
      auto table = segment_->fetch({group_by->group_by_field},
                                   std::vector<int>{(int)row_id});
      static std::string kEmpty;
      if (!table) {
        LOG_ERROR("Fetch group by field failed: field[%s] row_id[%zu]",
                  group_by->group_by_field.c_str(), (size_t)row_id);
        return kEmpty;
      }
      if (table->num_rows() != 1) {
        LOG_ERROR(
            "Fetch group by field failed: field[%s] row_id[%zu] rows[%zu]",
            group_by->group_by_field.c_str(), (size_t)row_id,
            (size_t)table->num_rows());
        return kEmpty;
      }
      if (table->column(0)->chunk(0)->IsNull(0)) {
        return kEmpty;
      }
      return decode_group_id_from_forward(query_info_->group_by_schema_ptr(),
                                          *table->column(0)->chunk(0));
    };
    query_params.group_by =
        std::make_unique<vector_column_params::GroupByParams>(
            group_by->group_topk, group_by->group_count, std::move(group_fun));
  }

  vector_column_params::VectorData vector_data;
  if (vector_cond_->vector_schema()->is_dense_vector()) {
    vector_data.vector =
        vector_column_params::DenseVector{vector_cond_->vector_term().data()};
  } else {
    vector_data.vector = vector_column_params::SparseVector{
        vector_cond_->sparse_count(),
        vector_cond_->vector_sparse_indices().data(),
        vector_cond_->vector_sparse_values().data()};
  }

  auto vector_ret = vector_indexer->Search(vector_data, query_params);
  if (!vector_ret) {
    return tl::make_unexpected(vector_ret.error());
  }
  return vector_ret;
}

arrow::Result<std::shared_ptr<arrow::RecordBatch>>
VectorRecallNode::State::collect_batch() {
  // collect a batch
  std::vector<int> indices;
  indices.reserve(self_->batch_size_);
  arrow::FloatBuilder builder;
  arrow::StringBuilder group_id_builder;
  for (int i = 0; iter_->valid() && i < self_->batch_size_;
       i++, iter_->next()) {
    indices.push_back(iter_->doc_id());
    ARROW_RETURN_NOT_OK(builder.Append(iter_->score()));
    if (self_->query_info_->group_by()) {
      ARROW_RETURN_NOT_OK(group_id_builder.Append(iter_->group_id()));
    }
  }
  auto table = self_->segment_->fetch(self_->fetched_columns_, indices);
  if (!table) {
    return arrow::Status::ExecutionError("fetch table failed");
  }
  auto batch = table->CombineChunksToBatch();
  if (!batch.ok()) {
    return arrow::Status::ExecutionError("combine chunks to batch failed:",
                                         batch.status().ToString());
  }
  auto score_array = builder.Finish();
  if (!score_array.ok()) {
    return arrow::Status::ExecutionError("finish builder failed:",
                                         score_array.status().ToString());
  }
  auto record_batch = std::move(batch.ValueUnsafe());
  ARROW_ASSIGN_OR_RAISE(
      record_batch,
      record_batch->AddColumn(record_batch->num_columns(), kFieldScore,
                              score_array.MoveValueUnsafe()));

  if (self_->query_info_->is_include_vector()) {
    for (auto &field : self_->query_info_->selected_vector_fields()) {
      Result<std::shared_ptr<arrow::Array>> array_res;
      if (field.field_schema_ptr->is_dense_vector()) {
        array_res = FetchVectorOp::fetch_dense_vector(
            *self_->segment_, field.field_name, indices);
      } else {
        array_res = FetchVectorOp::fetch_sparse_vector(
            *self_->segment_, field.field_name, indices);
      }
      if (!array_res) {
        return arrow::Status::ExecutionError("fetch vector failed:",
                                             array_res.error().c_str());
      }
      ARROW_ASSIGN_OR_RAISE(
          record_batch,
          record_batch->AddColumn(record_batch->num_columns(), field.field_name,
                                  std::move(array_res.value())));
    }
  }

  if (self_->query_info_->group_by()) {
    auto group_id_array = group_id_builder.Finish();
    if (!group_id_array.ok()) {
      return arrow::Status::ExecutionError("finish group id builder failed:",
                                           group_id_array.status().ToString());
    }
    ARROW_ASSIGN_OR_RAISE(
        record_batch,
        record_batch->AddColumn(record_batch->num_columns(), kFieldGroupId,
                                group_id_array.MoveValueUnsafe()));
  }

  LOG_DEBUG("Record batch: %s", record_batch->ToString().c_str());
  return record_batch;
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/planner/vector_recall_node.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <arrow/acero/api.h>
#include <arrow/api.h>
#include <zvec/db/status.h>
#include "db/index/column/common/index_results.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/analyzer/query_info.h"
#include "db/sqlengine/planner/doc_filter.h"

namespace zvec::sqlengine {

class VectorRecallNode : public std::enable_shared_from_this<VectorRecallNode> {
 public:
  using Ptr = std::shared_ptr<VectorRecallNode>;
  VectorRecallNode(Segment::Ptr segment, QueryInfo::Ptr query_info,
                   DocFilter::Ptr doc_filter, int batch_size,
                   bool single_stage_search);

  //! get schema
  std::shared_ptr<arrow::Schema> schema() const {
    return schema_;
  }

  arrow::AsyncGenerator<std::optional<cp::ExecBatch>> gen();

  const QueryInfo::Ptr &query_info() const {
    return query_info_;
  }

 private:
  Result<IndexResults::Ptr> prepare();

 private:
  struct State {
    State(VectorRecallNode::Ptr self) : self_(std::move(self)) {}

    arrow::Result<std::shared_ptr<arrow::RecordBatch>> collect_batch();

    VectorRecallNode::Ptr self_;
    IndexResults::Ptr vector_result_;
    IndexResults::IteratorUPtr iter_;
  };

  Segment::Ptr segment_;
  QueryInfo::Ptr query_info_;
  DocFilter::Ptr doc_filter_;
  int batch_size_;
  const std::vector<std::string> &fetched_columns_;
  std::shared_ptr<arrow::Schema> schema_;
};

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/sqlengine.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/sqlengine_impl.h"


namespace zvec::sqlengine {

SQLEngine::Ptr SQLEngine::create(zvec::Profiler::Ptr profiler) {
  return std::make_shared<SQLEngineImpl>(std::move(profiler));
}

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/sqlengine.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/db/doc.h>
#include <zvec/db/status.h>
#include "db/common/profiler.h"
#include "db/index/segment/segment.h"

namespace zvec::sqlengine {

class SQLEngine {
 public:
  using Ptr = std::shared_ptr<SQLEngine>;
  virtual ~SQLEngine();

  virtual Result<DocPtrList> execute(
      CollectionSchema::Ptr collection, const VectorQuery &query,
      const std::vector<Segment::Ptr> &segments) = 0;

  virtual Result<GroupResults> execute_group_by(
      CollectionSchema::Ptr collection,
      const GroupByVectorQuery &group_by_query,
      const std::vector<Segment::Ptr> &segments) = 0;

 public:
  static SQLEngine::Ptr create(zvec::Profiler::Ptr profiler);
};

}  // namespace zvec::sqlengine


================================================
FILE: src/db/sqlengine/sqlengine_impl.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include "db/sqlengine/sqlengine_impl.h"
#include <unordered_map>
#include <zvec/ailego/logger/logger.h>
#include <zvec/db/doc.h>
#include <zvec/db/type.h>
#include "db/common/constants.h"
#include "db/sqlengine/analyzer/query_analyzer.h"
#include "db/sqlengine/parser/sql_info_helper.h"
#include "db/sqlengine/parser/zvec_parser.h"
#include "db/sqlengine/planner/op_register.h"
#include "db/sqlengine/planner/query_planner.h"

namespace zvec::sqlengine {

void global_init() {
  static std::once_flag once;
  // run once
  std::call_once(once, []() {
    auto status = arrow::compute::Initialize();
    if (!status.ok()) {
      LOG_ERROR("arrow compute init failed: [%s]", status.ToString().c_str());
      abort();
    }
    status = OpRegister::register_ops();
    if (!status.ok()) {
      LOG_ERROR("arrow compute register op failed: [%s]",
                status.ToString().c_str());
      abort();
    }
  });
}

SQLEngine::~SQLEngine() = default;

SQLEngineImpl::SQLEngineImpl(zvec::Profiler::Ptr profiler)
    : profiler_(std::move(profiler)) {}

Result<DocPtrList> SQLEngineImpl::execute(
    CollectionSchema::Ptr collection, const VectorQuery &query,
    const std::vector<Segment::Ptr> &segments) {
  if (segments.empty()) {
    return DocPtrList{};
  }

  auto query_info = parse_request(collection, query, nullptr);
  if (!query_info) {
    return tl::make_unexpected(query_info.error());
  }
  if (query_info.value()->is_filter_unsatisfiable()) {
    LOG_WARN("filter is unsatisfiable: %s",
             query_info.value()->to_string().c_str());
    return {};
  }
  const auto &select_item_meta_ptrs =
      query_info.value()->select_item_schema_ptrs();
  std::vector<QueryInfo::Ptr> query_infos(segments.size(), query_info.value());
  auto reader = search_by_query_info(collection, segments, &query_infos);
  if (!reader) {
    return tl::make_unexpected(
        Status::InternalError("Execute plan failed: ", reader.error().c_str()));
  }
  return fill_result(select_item_meta_ptrs, reader.value().get());
}

VectorQuery from_group_by(const GroupByVectorQuery &gq) {
  VectorQuery vq;
  vq.field_name_ = gq.field_name_;
  vq.query_vector_ = gq.query_vector_;
  vq.query_sparse_indices_ = gq.query_sparse_indices_;
  vq.query_sparse_values_ = gq.query_sparse_values_;
  vq.filter_ = gq.filter_;
  vq.include_vector_ = gq.include_vector_;
  vq.query_params_ = gq.query_params_;
  vq.output_fields_ = gq.output_fields_;
  vq.topk_ = 0;
  return vq;
}

Result<GroupResults> SQLEngineImpl::execute_group_by(
    CollectionSchema::Ptr collection, const GroupByVectorQuery &group_by_query,
    const std::vector<Segment::Ptr> &segments) {
  if (segments.empty()) {
    return GroupResults{};
  }

  VectorQuery query = from_group_by(group_by_query);
  auto query_info = parse_request(
      collection, query,
      std::make_shared<GroupBy>(group_by_query.group_by_field_name_,
                                group_by_query.group_topk_,
                                group_by_query.group_count_));
  if (!query_info) {
    return tl::make_unexpected(query_info.error());
  }
  if (query_info.value()->is_filter_unsatisfiable()) {
    LOG_WARN("filter is unsatisfiable: %s",
             query_info.value()->to_string().c_str());
    return {};
  }
  std::vector<QueryInfo::Ptr> query_infos(segments.size(), query_info.value());
  auto reader = search_by_query_info(collection, segments, &query_infos);
  if (!reader) {
    return tl::make_unexpected(
        Status::InternalError("Execute plan failed: ", reader.error().c_str()));
  }
  return fill_group_by_result(*query_info.value(), reader.value().get());
}

Result<QueryInfo::Ptr> SQLEngineImpl::parse_sql_info(
    const CollectionSchema &schema, const SQLInfo::Ptr &sql_info) {
  profiler_->open_stage("analyze stage");
  QueryAnalyzer analyzer;
  auto query_info = analyzer.analyze(schema, sql_info);
  if (!query_info) {
    return tl::make_unexpected(Status::InvalidArgument(
        "Analyze sql info failed:", query_info.error().c_str()));
  }
  profiler_->close_stage();
  LOG_DEBUG("query_info: [%s]", query_info.value()->to_string().c_str());
  return query_info.value();
}

Result<QueryInfo::Ptr> SQLEngineImpl::parse_request(
    CollectionSchema::Ptr collection, const VectorQuery &request,
    std::shared_ptr<GroupBy> group_by) {
  profiler_->open_stage("message_to_sqlinfo");
  sqlengine::SQLInfo::Ptr sql_info;
  std::string err_msg;
  Node::Ptr filter_node;
  if (!request.filter_.empty()) {
    ZVecParser::Ptr parser = ZVecParser::create();
    filter_node = parser->parse_filter(request.filter_);
    if (filter_node == nullptr) {
      LOG_ERROR("parse filter failed. reason:[%s] filter:[%s]",
                parser->err_msg().c_str(), request.filter_.c_str());
      return tl::make_unexpected(
          Status::InvalidArgument("Invalid filter:", parser->err_msg()));
    }
  }
  if (group_by) {
    auto &group = *group_by;
    if (group.group_by_field.empty() || group.group_count == 0 ||
        group.group_topk == 0) {
      return tl::make_unexpected(Status::InvalidArgument(
          "Invalid group by request: group_by", group.group_by_field,
          " group_count: ", group.group_count,
          " group_topk: ", group.group_topk));
    }
  }

  sqlengine::SQLInfoHelper::MessageToSQLInfo(&request, std::move(filter_node),
                                             std::move(group_by), &sql_info,
                                             &err_msg);
  profiler_->close_stage();
  if (!err_msg.empty()) {
    LOG_ERROR("QueryAgent, message to sql info failed, err_msg: %s",
              err_msg.c_str());
    return tl::make_unexpected(
        Status::InvalidArgument("To sql info failed:", err_msg));
  }
  LOG_DEBUG("Sql info is %s", sql_info->to_string().c_str());
  return parse_sql_info(*collection, std::move(sql_info));
}

Result<std::unique_ptr<arrow::RecordBatchReader>>
SQLEngineImpl::search_by_query_info(
    CollectionSchema::Ptr collection, const std::vector<Segment::Ptr> &segments,
    std::vector<sqlengine::QueryInfo::Ptr> *query_infos) {
  global_init();

  profiler_->open_stage("plan stage");
  QueryPlanner planner(collection.get());
  auto plan_info =
      planner.make_plan(segments, profiler_->trace_id(), query_infos);
  if (!plan_info) {
    LOG_ERROR("plan query_info failed: [%s]", plan_info.error().c_str());
    return tl::make_unexpected(plan_info.error());
  }
  profiler_->close_stage();
  // LOG_DEBUG("plan_info: [%s]", plan_info->to_string().c_str());
  return plan_info.value()->execute_to_reader();
}

#define GET_FIELD_FROM_RECORD_BATCH(res, field_name)                         \
  auto res = record_batch.GetColumnByName(field_name);                       \
  if (!res) {                                                                \
    return Status::InternalError("Get column by name failed: ", field_name); \
  }

template <typename T>
std::vector<T> to_vector(const char *data, size_t size) {
  std::vector<T> vec(size);
  memcpy(vec.data(), data, size * sizeof(T));
  return vec;
}

template <typename VectorType>
Status fill_doc_sparse_vector(const arrow::StructArray *typed_arr,
                              const std::string &field_name,
                              DocPtrList::iterator doc_it) {
  auto *indices = (const arrow::BinaryArray *)typed_arr->field(0).get();
  auto *values = (const arrow::BinaryArray *)typed_arr->field(1).get();
  bool has_null = typed_arr->null_count() > 0;
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    if (has_null && typed_arr->IsNull(i)) {
      continue;
    }
    auto indice_data = indices->GetView(i);
    auto value_data = values->GetView(i);
    uint32_t count = indice_data.size() / sizeof(uint32_t);
    if (count != value_data.size() / sizeof(VectorType)) {
      return Status::InvalidArgument("Dimension not match:", count, " vs ",
                                     value_data.size() / sizeof(VectorType));
    }
    (*doc_it)->set(
        field_name,
        std::make_pair(to_vector<uint32_t>(indice_data.data(), count),
                       to_vector<VectorType>(value_data.data(), count)));
  }
  return Status::OK();
}

template <typename VectorType>
Status fill_doc_vector(const arrow::BinaryArray *typed_arr,
                       const std::string &field_name, int dimension,
                       DocPtrList::iterator doc_it) {
  bool no_null = typed_arr->null_count() == 0;
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    if (no_null || !typed_arr->IsNull(i)) {
      auto data = typed_arr->GetView(i);
      if ((size_t)dimension != data.size() / sizeof(VectorType)) {
        return Status::InvalidArgument("Dimension not match:", dimension,
                                       " vs ",
                                       data.size() / sizeof(VectorType));
      }
      (*doc_it)->set(field_name, std::vector<VectorType>(
                                     (const VectorType *)&data[0],
                                     (const VectorType *)&data[0] + dimension));
    }
  }
  return Status::OK();
}

template <typename ArrowArrayType>
Status fill_doc_field(const arrow::Array *arr, const std::string &field_name,
                      DocPtrList::iterator doc_it) {
  auto *typed_arr = static_cast<const ArrowArrayType *>(arr);
  bool no_null = typed_arr->null_count() == 0;
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    if (no_null || !typed_arr->IsNull(i)) {
      if constexpr (std::is_same_v<ArrowArrayType, arrow::StringArray> ||
                    std::is_same_v<ArrowArrayType, arrow::LargeStringArray> ||
                    std::is_same_v<ArrowArrayType, arrow::BinaryArray> ||
                    std::is_same_v<ArrowArrayType, arrow::LargeBinaryArray>) {
        (*doc_it)->set(field_name, typed_arr->GetString(i));
      } else {
        (*doc_it)->set(field_name, typed_arr->Value(i));
      }
    }
  }
  return Status::OK();
}

template <typename ArrowArrayType, typename ElementType>
Status fill_doc_array_field(const arrow::Array *arr,
                            const std::string &field_name,
                            DocPtrList::iterator doc_it) {
  const auto *list_arr = static_cast<const arrow::ListArray *>(arr);
  auto *typed_arr =
      dynamic_cast<const ArrowArrayType *>(list_arr->values().get());
  bool has_null = list_arr->null_count() > 0;
  for (int64_t i = 0; i < list_arr->length(); ++i, ++doc_it) {
    if (has_null && list_arr->IsNull(i)) {
      continue;
    }
    int64_t offset = list_arr->value_offset(i);
    int64_t length = list_arr->value_length(i);
    std::vector<ElementType> vec(length);
    for (int64_t j = 0; j < length; ++j) {
      vec[j] = typed_arr->Value(offset + j);
    }
    (*doc_it)->set(field_name, std::move(vec));
  }
  return Status::OK();
}

Status fill_doc_field(const std::shared_ptr<arrow::Array> &chunk,
                      const FieldSchema &field_schema,
                      DocPtrList::iterator doc_it) {
  switch (field_schema.data_type()) {
    case DataType::INT32:
      return fill_doc_field<arrow::Int32Array>(chunk.get(), field_schema.name(),
                                               doc_it);
    case DataType::UINT32:
      return fill_doc_field<arrow::UInt32Array>(chunk.get(),
                                                field_schema.name(), doc_it);
    case DataType::INT64:
      return fill_doc_field<arrow::Int64Array>(chunk.get(), field_schema.name(),
                                               doc_it);
    case DataType::UINT64:
      return fill_doc_field<arrow::UInt64Array>(chunk.get(),
                                                field_schema.name(), doc_it);
    case DataType::FLOAT:
      return fill_doc_field<arrow::FloatArray>(chunk.get(), field_schema.name(),
                                               doc_it);
    case DataType::DOUBLE:
      return fill_doc_field<arrow::DoubleArray>(chunk.get(),
                                                field_schema.name(), doc_it);
    case DataType::BOOL:
      return fill_doc_field<arrow::BooleanArray>(chunk.get(),
                                                 field_schema.name(), doc_it);
    case DataType::BINARY:
      return fill_doc_field<arrow::BinaryArray>(chunk.get(),
                                                field_schema.name(), doc_it);

    case DataType::STRING:
      return fill_doc_field<arrow::StringArray>(chunk.get(),
                                                field_schema.name(), doc_it);

    case DataType::ARRAY_INT32:
      return fill_doc_array_field<arrow::Int32Array, int32_t>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_INT64:
      return fill_doc_array_field<arrow::Int64Array, int64_t>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_UINT32:
      return fill_doc_array_field<arrow::UInt32Array, uint32_t>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_UINT64:
      return fill_doc_array_field<arrow::UInt64Array, uint64_t>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_FLOAT:
      return fill_doc_array_field<arrow::FloatArray, float>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_DOUBLE:
      return fill_doc_array_field<arrow::DoubleArray, double>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_STRING:
      return fill_doc_array_field<arrow::StringArray, std::string>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_BINARY:
      return fill_doc_array_field<arrow::BinaryArray, std::string>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::ARRAY_BOOL:
      return fill_doc_array_field<arrow::BooleanArray, bool>(
          chunk.get(), field_schema.name(), doc_it);

    case DataType::VECTOR_FP32:
      return fill_doc_vector<float>((arrow::BinaryArray *)chunk.get(),
                                    field_schema.name(),
                                    field_schema.dimension(), doc_it);

    case DataType::VECTOR_FP64:
      return fill_doc_vector<double>((arrow::BinaryArray *)chunk.get(),
                                     field_schema.name(),
                                     field_schema.dimension(), doc_it);
    case DataType::VECTOR_FP16:
      return fill_doc_vector<float16_t>((arrow::BinaryArray *)chunk.get(),
                                        field_schema.name(),
                                        field_schema.dimension(), doc_it);

    case DataType::VECTOR_INT16:
      return fill_doc_vector<int16_t>((arrow::BinaryArray *)chunk.get(),
                                      field_schema.name(),
                                      field_schema.dimension(), doc_it);

    case DataType::VECTOR_INT8:
      return fill_doc_vector<int8_t>((arrow::BinaryArray *)chunk.get(),
                                     field_schema.name(),
                                     field_schema.dimension(), doc_it);

    case DataType::VECTOR_BINARY32:
      return fill_doc_vector<uint32_t>(
          (arrow::BinaryArray *)chunk.get(), field_schema.name(),
          field_schema.dimension() / sizeof(uint32_t), doc_it);

    case DataType::VECTOR_BINARY64:
      return fill_doc_vector<uint64_t>(
          (arrow::BinaryArray *)chunk.get(), field_schema.name(),
          field_schema.dimension() / sizeof(uint64_t), doc_it);

    case DataType::SPARSE_VECTOR_FP32:
      return fill_doc_sparse_vector<float>((arrow::StructArray *)chunk.get(),
                                           field_schema.name(), doc_it);

    case DataType::SPARSE_VECTOR_FP16:
      return fill_doc_sparse_vector<float16_t>(
          (arrow::StructArray *)chunk.get(), field_schema.name(), doc_it);

    default:
      return Status::InvalidArgument("Datatype not supported:",
                                     field_schema.data_type());
  }
  return Status::OK();
}

void fill_doc_id(const std::shared_ptr<arrow::Array> &doc_id_array,
                 DocPtrList::iterator doc_it) {
  arrow::UInt64Array *typed_arr =
      static_cast<arrow::UInt64Array *>(doc_id_array.get());
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    // doc_id is non-null
    (*doc_it)->set_doc_id(typed_arr->Value(i));
  }
}

void fill_doc_score(const std::shared_ptr<arrow::Array> &doc_id_array,
                    DocPtrList::iterator doc_it) {
  arrow::FloatArray *typed_arr =
      static_cast<arrow::FloatArray *>(doc_id_array.get());
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    // doc_score is non-null
    (*doc_it)->set_score(typed_arr->Value(i));
  }
}

void fill_user_id(const std::shared_ptr<arrow::Array> &user_id_array,
                  DocPtrList::iterator doc_it) {
  arrow::StringArray *typed_arr =
      static_cast<arrow::StringArray *>(user_id_array.get());
  for (int64_t i = 0; i < typed_arr->length(); ++i, ++doc_it) {
    // user_id is non-null
    (*doc_it)->set_pk(typed_arr->GetString(i));
  }
}

Status record_batch_to_doc_list(
    const std::vector<FieldAndSchema> &output_fields,
    const arrow::RecordBatch &record_batch, DocPtrList::iterator doc_it) {
  GET_FIELD_FROM_RECORD_BATCH(user_id_array, USER_ID);
  fill_user_id(user_id_array, doc_it);
  if (auto doc_id_array = record_batch.GetColumnByName(GLOBAL_DOC_ID);
      doc_id_array != nullptr) {
    fill_doc_id(doc_id_array, doc_it);
  }
  if (auto score_array = record_batch.GetColumnByName(kFieldScore);
      score_array != nullptr) {
    fill_doc_score(score_array, doc_it);
  }

  for (auto &[field_name, field_schema] : output_fields) {
    GET_FIELD_FROM_RECORD_BATCH(field_array, field_name);
    if (auto status = fill_doc_field(field_array, *field_schema, doc_it);
        !status.ok()) {
      return status;
    }
  }
  if (ailego::LoggerBroker::IsLevelEnabled(ailego::Logger::LEVEL_DEBUG)) {
    for (int i = 0; i < record_batch.num_rows(); i++) {
      LOG_DEBUG("Doc: %s", (*(doc_it + i))->to_detail_string().c_str());
    }
  }
  return Status::OK();
}

Result<DocPtrList> SQLEngineImpl::fill_result(
    const std::vector<FieldAndSchema> &output_fields,
    arrow::RecordBatchReader *reader) {
  DocPtrList docs;
  std::shared_ptr<RecordBatch> record_batch;
  while (true) {
    auto read_res = reader->ReadNext(&record_batch);
    if (!read_res.ok()) {
      return tl::make_unexpected(Status::InternalError(
          "Read record batch failed: ", read_res.ToString()));
    }
    if (record_batch == nullptr) {
      break;
    }
    size_t cur_size = docs.size();
    docs.resize(docs.size() + record_batch->num_rows());
    for (int i = 0; i < record_batch->num_rows(); i++) {
      docs[cur_size + i] = std::make_shared<Doc>();
    }
    auto status = record_batch_to_doc_list(output_fields, *record_batch,
                                           docs.begin() + cur_size);
    if (!status.ok()) {
      return tl::make_unexpected(status);
    }
  }
  return docs;
}


Result<GroupResults> SQLEngineImpl::fill_group_by_result(
    const QueryInfo &query_info, arrow::RecordBatchReader *reader) {
  const std::vector<FieldAndSchema> &output_fields =
      query_info.select_item_schema_ptrs();
  uint32_t group_count = query_info.group_by()->group_count;
  uint32_t group_topk = query_info.group_by()->group_topk;
  std::shared_ptr<RecordBatch> record_batch;
  std::unordered_map<std::string, std::vector<Doc>> group_to_docs;
  while (true) {
    auto read_res = reader->ReadNext(&record_batch);
    if (!read_res.ok()) {
      return tl::make_unexpected(Status::InternalError(
          "Read record batch failed: ", read_res.ToString()));
    }
    if (record_batch == nullptr) {
      break;
    }
    DocPtrList docs(record_batch->num_rows());
    for (int i = 0; i < record_batch->num_rows(); i++) {
      docs[i] = std::make_shared<Doc>();
    }
    auto status =
        record_batch_to_doc_list(output_fields, *record_batch, docs.begin());
    if (!status.ok()) {
      return tl::make_unexpected(status);
    }
    auto group_id_array = record_batch->GetColumnByName(kFieldGroupId);
    if (!group_id_array) {
      return tl::make_unexpected(
          Status::InternalError("Get group_id_array failed"));
    }
    arrow::StringArray *typed_arr =
        static_cast<arrow::StringArray *>(group_id_array.get());
    for (int i = 0; i < record_batch->num_rows(); i++) {
      if (!typed_arr->IsNull(i)) {
        // docs already order by score
        auto &group_docs = group_to_docs[typed_arr->GetString(i)];
        if (group_docs.size() < group_count) {
          group_docs.push_back(std::move(*docs[i]));
        }
      }
    }
  }
  GroupResults group_results;
  for (auto &kv : group_to_docs) {
    group_results.emplace_back(
        GroupResult{std::move(kv.first), std::move(kv.second)});
  }
  std::sort(group_results.begin(), group_results.end(),
            [&query_info](GroupResult &a, GroupResult &b) {
              if (query_info.vector_cond_info()->is_reverse_sort()) {
                return a.docs_[0].score() > b.docs_[0].score();
              }
              return a.docs_[0].score() < b.docs_[0].score();
            });
  if (group_results.size() > group_topk) {
    group_results.resize(group_topk);
  }
  for (auto &group_result : group_results) {
    LOG_DEBUG("Group: %s", group_result.group_by_value_.c_str());
    for (auto &doc : group_result.docs_) {
      LOG_DEBUG("\tDoc: %s", doc.to_detail_string().c_str());
    }
  }
  return group_results;
}

}  // namespace zvec::sqlengine

================================================
FILE: src/db/sqlengine/sqlengine_impl.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <memory.h>
#include <memory>
#include <vector>
#include <arrow/api.h>
#include <zvec/db/doc.h>
#include <zvec/db/schema.h>
#include "analyzer/query_info.h"
#include "common/group_by.h"
#include "db/sqlengine/common/util.h"
#include "db/sqlengine/parser/sql_info.h"
#include "db/sqlengine/sqlengine.h"

namespace zvec::sqlengine {

class SQLEngineImpl : public SQLEngine {
 public:
  SQLEngineImpl(zvec::Profiler::Ptr profiler);

  //! Parse pb request
  Result<QueryInfo::Ptr> parse_request(CollectionSchema::Ptr collection,
                                       const VectorQuery &request,
                                       std::shared_ptr<GroupBy> group_by);

  //! Perform search with given query_info, segments and index filter
  Result<std::unique_ptr<arrow::RecordBatchReader>> search_by_query_info(
      CollectionSchema::Ptr collection,
      const std::vector<Segment::Ptr> &segments,
      std::vector<sqlengine::QueryInfo::Ptr> *query_infos);

  Result<DocPtrList> execute(
      CollectionSchema::Ptr collection, const VectorQuery &query,
      const std::vector<Segment::Ptr> &segments) override;

  Result<GroupResults> execute_group_by(
      CollectionSchema::Ptr collection,
      const GroupByVectorQuery &group_by_query,
      const std::vector<Segment::Ptr> &segments) override;

  const std::string &execution_time_info() {
    return execution_time_info_;
  }

 private:
  Result<DocPtrList> fill_result(
      const std::vector<FieldAndSchema> &output_fields,
      arrow::RecordBatchReader *reader);

  Result<QueryInfo::Ptr> parse_sql_info(const CollectionSchema &schema,
                                        const SQLInfo::Ptr &sql_info);

  Result<GroupResults> fill_group_by_result(const QueryInfo &query_info,
                                            arrow::RecordBatchReader *reader);

 private:
  zvec::Profiler::Ptr profiler_;
  std::string execution_time_info_{};
};

}  // namespace zvec::sqlengine

================================================
FILE: src/include/zvec/ailego/buffer/buffer_manager.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <sys/stat.h>
#include <chrono>
#include <cstdint>
#include <filesystem>
#include <memory>
#include <vector>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/pattern/singleton.h>

namespace arrow {
class ChunkedArray;
class Array;
class DataType;
class Scalar;
template <typename T>
class Result;
class Status;
class Buffer;
}  // namespace arrow

namespace zvec {


namespace ailego {


struct BufferID;
class BufferManager;
class BufferHandle;


struct BufferID {
  struct ParquetPos {
    int column;
    int row_group;
  };
  struct VectorPos {
    uint32_t offset;
    uint32_t length;
  };
  union Position {
    explicit Position() = default;
    ParquetPos forward;
    VectorPos vector;
  };
  enum TYPE {
    PARQUET = 1,
    VECTOR = 2,
    UNKNOWN = 0,
  };


  static std::uint64_t getLastModifiedNs(const std::filesystem::path &p) {
    auto ftime = std::filesystem::last_write_time(p);
    return static_cast<std::uint64_t>(ftime.time_since_epoch().count());
  }

  // Cross-platform helper to get nanosecond modification time
  //   static long get_st_mtime_nsec(const struct stat &file_stat) {
  // #ifdef __APPLE__
  //     return file_stat.st_mtim.tv_nsec;
  // #else
  //     return file_stat.st_mtim.tv_nsec;
  // #endif
  //   }

  static BufferID ParquetID(const std::string &file_name, int column,
                            int row_group) {
    BufferID buffer_id{};
    buffer_id.type = TYPE::PARQUET;
    buffer_id.file_name = file_name;
    buffer_id.pos.forward.column = column;
    buffer_id.pos.forward.row_group = row_group;
    struct stat file_stat;
    if (stat(file_name.c_str(), &file_stat) == 0) {
      // file_stat.st_ino contains the inode number
      // file_stat.st_dev contains the device ID
      // Together they uniquely identify a file
      buffer_id.file_id = file_stat.st_ino;
      std::filesystem::path p(file_name);
      buffer_id.mtime = getLastModifiedNs(p);
    }
    return buffer_id;
  }

  static BufferID VectorID(const std::string &file_name, uint32_t offset,
                           uint32_t length) {
    BufferID buffer_id{};
    buffer_id.type = TYPE::VECTOR;
    buffer_id.file_name = file_name;
    struct stat file_stat;
    if (stat(file_name.c_str(), &file_stat) == 0) {
      buffer_id.file_id = file_stat.st_ino;
      std::filesystem::path p(file_name);
      buffer_id.mtime = getLastModifiedNs(p);
    }
    buffer_id.pos.vector.offset = offset;
    buffer_id.pos.vector.length = length;
    return buffer_id;
  }

  explicit BufferID() = default;

  // Type of the file backing this buffer
  TYPE type{UNKNOWN};

  // Name of the file backing this buffer
  std::string file_name{};

  // Unique file id
  uint64_t file_id{};

  long mtime{};

  // To identify which part of the backing file should be loaded into the buffer
  Position pos{};


  // Get the forward ID
  const inline struct ParquetPos &parquet() const {
    return pos.forward;
  }


  // Get the vector ID
  const inline struct VectorPos &vector() const {
    return pos.vector;
  }


  // Get debug string
  const std::string to_string() const {
    std::string msg{"Buffer["};
    if (type == TYPE::PARQUET) {
      msg += "parquet: " + file_name + "[" + std::to_string(file_id) + "]" +
             ", column: " + std::to_string(parquet().column) +
             ", row_group: " + std::to_string(parquet().row_group);
    } else if (type == TYPE::VECTOR) {
      msg += "vector: " + file_name + "[" + std::to_string(file_id) + "]" +
             ", offset: " + std::to_string(vector().offset);
    } else {
      msg += "unknown";
    }
    msg += ", mtime: " + std::to_string(mtime);
    msg += "]";
    return msg;
  }
};


// Thread-safe LRU buffer implementation.
class BufferManager : public Singleton<BufferManager> {
  friend BufferHandle;

 public:
  void init(uint64_t limit, uint32_t num_shards = 1);

  BufferHandle acquire(BufferID &buffer_id);

  std::unique_ptr<BufferHandle> acquire_ptr(BufferID &buffer_id);

  uint64_t total_size_in_bytes() const;

  ~BufferManager();

 private:
  struct BufferContext;

  class BufferPool;

  // Custom deleter for Arrow buffer that automatically notifies us when the
  // buffer is no longer referenced by Arrow
  struct ArrowBufferDeleter {
    explicit ArrowBufferDeleter(BufferContext *c);
    BufferContext *context;
    // Only reduces the reference count but does not actually release the
    // buffer, since the buffer memory is managed by the BufferManager.
    void operator()(arrow::Buffer *);
  };

  std::vector<BufferPool *> pools_;
};


class BufferHandle {
 public:
  typedef std::unique_ptr<BufferHandle> Pointer;

  explicit BufferHandle(BufferManager::BufferContext *context = nullptr);
  BufferHandle(const BufferHandle &) = delete;
  BufferHandle(BufferHandle &&) = default;
  BufferHandle &operator=(const BufferHandle &) = delete;
  BufferHandle &operator=(BufferHandle &&) = default;


  ~BufferHandle();


  // Pin parquet data in memory by allocating arrow buffers of appropriate size
  // and reading data from the backing file.
  // The lifecycle of the allocated memory is automatically managed through
  // shared pointers. The buffers are guaranteed to be held until they are not
  // referenced.
  // Returns a pointer to the loaded ChunkedArray in Arrow format.
  std::shared_ptr<arrow::ChunkedArray> pin_parquet_data();


  // Pin vector data in memory by allocating a buffer of appropriate size and
  // loading data from the backing file.
  // The memory is guaranteed to be held until unpin() is called. The caller
  // must call unpin() to release the memory when it is no longer needed.
  // Returns a raw memory address.
  void *pin_vector_data();


  // Reduce the reference count for this vector buffer.
  // Returns true if this was the last reference.
  // When reference count is zero, the buffer is moved to the eviction list and
  // becomes eligible for removal under memory pressure.
  bool unpin_vector_data();


  // Get the current reference count.
  uint32_t references() const;


  // Get the buffer size.
  uint32_t size() const;


 private:
  using BufferContext = BufferManager::BufferContext;
  using BufferPool = BufferManager::BufferPool;

  BufferContext *context_;
  BufferPool *pool_;
};


}  // namespace ailego


}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/buffer/buffer_pool.h
================================================
#pragma once

#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <atomic>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <zvec/ailego/internal/platform.h>
#include "concurrentqueue.h"

namespace zvec {
namespace ailego {

using block_id_t = size_t;
using version_t = size_t;

class LPMap;

class LRUCache {
 public:
  typedef std::pair<block_id_t, version_t> BlockType;
  typedef moodycamel::ConcurrentQueue<BlockType> ConcurrentQueue;

  int init(size_t block_size);

  bool evict_single_block(BlockType &item);

  bool add_single_block(const LPMap *lp_map, const BlockType &block,
                        int block_type);

  void clear_dead_node(const LPMap *lp_map);

 private:
  constexpr static size_t CATCH_QUEUE_NUM = 3;
  size_t block_size_{0};
  std::vector<ConcurrentQueue> queues_;
  alignas(64) std::atomic<size_t> evict_queue_insertions_{0};
};

class LPMap {
  struct Entry {
    alignas(64) std::atomic<int> ref_count;
    alignas(64) std::atomic<version_t> load_count;
    char *buffer;
  };

 public:
  LPMap() : entry_num_(0), entries_(nullptr) {}
  ~LPMap() {
    delete[] entries_;
  }

  void init(size_t entry_num);

  char *acquire_block(block_id_t block_id, bool lru_mode);

  void release_block(block_id_t block_id);

  char *evict_block(block_id_t block_id);

  char *set_block_acquired(block_id_t block_id, char *buffer);

  void recycle(moodycamel::ConcurrentQueue<char *> &free_buffers);

  size_t entry_num() const {
    return entry_num_;
  }

  inline bool isDeadBlock(LRUCache::BlockType block) const {
    Entry &entry = entries_[block.first];
    return block.second != entry.load_count.load();
  }

 private:
  size_t entry_num_{0};
  Entry *entries_{nullptr};
  LRUCache cache_;
};

class VecBufferPoolHandle;

class VecBufferPool {
 public:
  typedef std::shared_ptr<VecBufferPool> Pointer;

  VecBufferPool(const std::string &filename);
  ~VecBufferPool() {
    // Free all buffers in the free list
    char *buf = nullptr;
    while (free_buffers_.try_dequeue(buf)) {
      ailego_free(buf);
    }
    // Free any buffers still pinned in the map
    for (size_t i = 0; i < lp_map_.entry_num(); ++i) {
      char *b = lp_map_.evict_block(i);
      if (b) ailego_free(b);
    }
    close(fd_);
  }

  int init(size_t pool_capacity, size_t block_size, size_t segment_count);

  VecBufferPoolHandle get_handle();

  char *acquire_buffer(block_id_t block_id, size_t offset, size_t size,
                       int retry = 0);

  int get_meta(size_t offset, size_t length, char *buffer);

  size_t file_size() const {
    return file_size_;
  }

  bool no_lru_mode() {
    return no_lru_mode_;
  }

 private:
  int fd_;
  size_t file_size_;
  size_t pool_capacity_;
  bool no_lru_mode_;

 public:
  LPMap lp_map_;

 private:
  std::vector<std::unique_ptr<std::mutex>> mutex_vec_;
  moodycamel::ConcurrentQueue<char *> free_buffers_;
};

class VecBufferPoolHandle {
 public:
  VecBufferPoolHandle(VecBufferPool &pool) : pool_(pool) {}
  VecBufferPoolHandle(VecBufferPoolHandle &&other) : pool_(other.pool_) {}

  ~VecBufferPoolHandle() = default;

  typedef std::shared_ptr<VecBufferPoolHandle> Pointer;

  char *get_block(size_t offset, size_t size, size_t block_id);

  int get_meta(size_t offset, size_t length, char *buffer);

  void release_one(block_id_t block_id);

  void acquire_one(block_id_t block_id);

 private:
  VecBufferPool &pool_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/buffer/concurrentqueue.h
================================================
// Provides a C++11 implementation of a multi-producer, multi-consumer lock-free
// queue. An overview, including benchmark results, is provided here:
//     http://moodycamel.com/blog/2014/a-fast-general-purpose-lock-free-queue-for-c++
// The full design is also described in excruciating detail at:
//    http://moodycamel.com/blog/2014/detailed-design-of-a-lock-free-queue

// Simplified BSD license:
// Copyright (c) 2013-2020, Cameron Desrochers.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// - Redistributions of source code must retain the above copyright notice, this
// list of conditions and the following disclaimer.
// - Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

// Also dual-licensed under the Boost Software License (see LICENSE.md)

#pragma once

#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
// Disable -Wconversion warnings (spuriously triggered when Traits::size_t and
// Traits::index_t are set to < 32 bits, causing integer promotion, causing
// warnings upon assigning any computed values)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"

#ifdef MCDBGQ_USE_RELACY
#pragma GCC diagnostic ignored "-Wint-to-pointer-cast"
#endif
#endif

#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
// VS2019 with /W4 warns about constant conditional expressions but unless
// /std=c++17 or higher does not support `if constexpr`, so we have no choice
// but to simply disable the warning
#pragma warning(push)
#pragma warning(disable : 4127)  // conditional expression is constant
#endif

#if defined(__APPLE__)
#include "TargetConditionals.h"
#endif

#ifdef MCDBGQ_USE_RELACY
#include "relacy/relacy_std.hpp"
#include "relacy_shims.h"
// We only use malloc/free anyway, and the delete macro messes up `= delete`
// method declarations. We'll override the default trait malloc ourselves
// without a macro.
#undef new
#undef delete
#undef malloc
#undef free
#else
#include <atomic>  // Requires C++11. Sorry VS2010.
#include <cassert>
#endif
#include <algorithm>
#include <array>
#include <climits>  // for CHAR_BIT
#include <cstddef>  // for max_align_t
#include <cstdint>
#include <cstdlib>
#include <limits>
#include <mutex>  // used for thread exit synchronization
#include <thread>  // partly for __WINPTHREADS_VERSION if on MinGW-w64 w/ POSIX threading
#include <type_traits>
#include <utility>

// Platform-specific definitions of a numeric thread ID type and an invalid
// value
namespace moodycamel {
namespace details {
template <typename thread_id_t>
struct thread_id_converter {
  typedef thread_id_t thread_id_numeric_size_t;
  typedef thread_id_t thread_id_hash_t;
  static thread_id_hash_t prehash(thread_id_t const &x) {
    return x;
  }
};
}  // namespace details
}  // namespace moodycamel
#if defined(MCDBGQ_USE_RELACY)
namespace moodycamel {
namespace details {
typedef std::uint32_t thread_id_t;
static const thread_id_t invalid_thread_id = 0xFFFFFFFFU;
static const thread_id_t invalid_thread_id2 = 0xFFFFFFFEU;
static inline thread_id_t thread_id() {
  return rl::thread_index();
}
}  // namespace details
}  // namespace moodycamel
#elif defined(_WIN32) || defined(__WINDOWS__) || defined(__WIN32__)
// No sense pulling in windows.h in a header, we'll manually declare the
// function we use and rely on backwards-compatibility for this not to break
extern "C"
    __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
namespace moodycamel {
namespace details {
static_assert(sizeof(unsigned long) == sizeof(std::uint32_t),
              "Expected size of unsigned long to be 32 bits on Windows");
typedef std::uint32_t thread_id_t;
static const thread_id_t invalid_thread_id =
    0;  // See http://blogs.msdn.com/b/oldnewthing/archive/2004/02/23/78395.aspx
static const thread_id_t invalid_thread_id2 =
    0xFFFFFFFFU;  // Not technically guaranteed to be invalid, but is never used
                  // in practice. Note that all Win32 thread IDs are presently
                  // multiples of 4.
static inline thread_id_t thread_id() {
  return static_cast<thread_id_t>(::GetCurrentThreadId());
}
}  // namespace details
}  // namespace moodycamel
#elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
    (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__) ||  \
    defined(MOODYCAMEL_NO_THREAD_LOCAL)
namespace moodycamel {
namespace details {
static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8,
              "std::thread::id is expected to be either 4 or 8 bytes");

typedef std::thread::id thread_id_t;
static const thread_id_t invalid_thread_id;  // Default ctor creates invalid ID

// Note we don't define a invalid_thread_id2 since std::thread::id doesn't have
// one; it's only used if MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is defined
// anyway, which it won't be.
static inline thread_id_t thread_id() {
  return std::this_thread::get_id();
}

template <std::size_t>
struct thread_id_size {};
template <>
struct thread_id_size<4> {
  typedef std::uint32_t numeric_t;
};
template <>
struct thread_id_size<8> {
  typedef std::uint64_t numeric_t;
};

template <>
struct thread_id_converter<thread_id_t> {
  typedef thread_id_size<sizeof(thread_id_t)>::numeric_t
      thread_id_numeric_size_t;
#ifndef __APPLE__
  typedef std::size_t thread_id_hash_t;
#else
  typedef thread_id_numeric_size_t thread_id_hash_t;
#endif

  static thread_id_hash_t prehash(thread_id_t const &x) {
#ifndef __APPLE__
    return std::hash<std::thread::id>()(x);
#else
    return *reinterpret_cast<thread_id_hash_t const *>(&x);
#endif
  }
};
}
}
#else
// Use a nice trick from this answer: http://stackoverflow.com/a/8438730/21475
// In order to get a numeric thread ID in a platform-independent way, we use a
// thread-local static variable's address as a thread identifier :-)
#if defined(__GNUC__) || defined(__INTEL_COMPILER)
#define MOODYCAMEL_THREADLOCAL __thread
#elif defined(_MSC_VER)
#define MOODYCAMEL_THREADLOCAL __declspec(thread)
#else
// Assume C++11 compliant compiler
#define MOODYCAMEL_THREADLOCAL thread_local
#endif
namespace moodycamel {
namespace details {
typedef std::uintptr_t thread_id_t;
static const thread_id_t invalid_thread_id = 0;  // Address can't be nullptr
static const thread_id_t invalid_thread_id2 =
    1;  // Member accesses off a null pointer are also generally invalid. Plus
        // it's not aligned.
inline thread_id_t thread_id() {
  static MOODYCAMEL_THREADLOCAL int x;
  return reinterpret_cast<thread_id_t>(&x);
}
}
}
#endif

// Constexpr if
#ifndef MOODYCAMEL_CONSTEXPR_IF
#if (defined(_MSC_VER) && defined(_HAS_CXX17) && _HAS_CXX17) || \
    __cplusplus > 201402L
#define MOODYCAMEL_CONSTEXPR_IF if constexpr
#define MOODYCAMEL_MAYBE_UNUSED [[maybe_unused]]
#else
#define MOODYCAMEL_CONSTEXPR_IF if
#define MOODYCAMEL_MAYBE_UNUSED
#endif
#endif

// Exceptions
#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
#if (defined(_MSC_VER) && defined(_CPPUNWIND)) ||   \
    (defined(__GNUC__) && defined(__EXCEPTIONS)) || \
    (!defined(_MSC_VER) && !defined(__GNUC__))
#define MOODYCAMEL_EXCEPTIONS_ENABLED
#endif
#endif
#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
#define MOODYCAMEL_TRY try
#define MOODYCAMEL_CATCH(...) catch (__VA_ARGS__)
#define MOODYCAMEL_RETHROW throw
#define MOODYCAMEL_THROW(expr) throw(expr)
#else
#define MOODYCAMEL_TRY MOODYCAMEL_CONSTEXPR_IF(true)
#define MOODYCAMEL_CATCH(...) else MOODYCAMEL_CONSTEXPR_IF(false)
#define MOODYCAMEL_RETHROW
#define MOODYCAMEL_THROW(expr)
#endif

#ifndef MOODYCAMEL_NOEXCEPT
#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
#define MOODYCAMEL_NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) true
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) true
#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1800
// VS2012's std::is_nothrow_[move_]constructible is broken and returns true when
// it shouldn't :-( We have to assume *all* non-trivial constructors may throw
// on VS2012!
#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)    \
  (std::is_rvalue_reference<valueType>::value &&           \
           std::is_move_constructible<type>::value         \
       ? std::is_trivially_move_constructible<type>::value \
       : std::is_trivially_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \
  ((std::is_rvalue_reference<valueType>::value &&              \
            std::is_move_assignable<type>::value               \
        ? std::is_trivially_move_assignable<type>::value ||    \
              std::is_nothrow_move_assignable<type>::value     \
        : std::is_trivially_copy_assignable<type>::value ||    \
              std::is_nothrow_copy_assignable<type>::value) && \
   MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#elif defined(_MSC_VER) && defined(_NOEXCEPT) && _MSC_VER < 1900
#define MOODYCAMEL_NOEXCEPT _NOEXCEPT
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr)       \
  (std::is_rvalue_reference<valueType>::value &&              \
           std::is_move_constructible<type>::value            \
       ? std::is_trivially_move_constructible<type>::value || \
             std::is_nothrow_move_constructible<type>::value  \
       : std::is_trivially_copy_constructible<type>::value || \
             std::is_nothrow_copy_constructible<type>::value)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr)      \
  ((std::is_rvalue_reference<valueType>::value &&              \
            std::is_move_assignable<type>::value               \
        ? std::is_trivially_move_assignable<type>::value ||    \
              std::is_nothrow_move_assignable<type>::value     \
        : std::is_trivially_copy_assignable<type>::value ||    \
              std::is_nothrow_copy_assignable<type>::value) && \
   MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr))
#else
#define MOODYCAMEL_NOEXCEPT noexcept
#define MOODYCAMEL_NOEXCEPT_CTOR(type, valueType, expr) noexcept(expr)
#define MOODYCAMEL_NOEXCEPT_ASSIGN(type, valueType, expr) noexcept(expr)
#endif
#endif

#ifndef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#else
// VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a
// crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445 g++ <=4.7 doesn't
// support thread_local either. Finally, iOS/ARM doesn't have support for it
// either, and g++/ARM allows it to compile but it's unconfirmed to actually
// work
#if (!defined(_MSC_VER) || _MSC_VER >= 1900) &&                        \
    (!defined(__MINGW32__) && !defined(__MINGW64__) ||                 \
     !defined(__WINPTHREADS_VERSION)) &&                               \
    (!defined(__GNUC__) || __GNUC__ > 4 ||                             \
     (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) &&                        \
    (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && \
    !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)
// Assume `thread_local` is fully supported in all other C++11
// compilers/platforms
#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED  // tentatively enabled for now;
                                                 // years ago several users
                                                 // report having problems with
                                                 // it on
#endif
#endif
#endif

// VS2012 doesn't support deleted functions.
// In this case, we declare the function normally but don't define it. A link
// error will be generated if the function is called.
#ifndef MOODYCAMEL_DELETE_FUNCTION
#if defined(_MSC_VER) && _MSC_VER < 1800
#define MOODYCAMEL_DELETE_FUNCTION
#else
#define MOODYCAMEL_DELETE_FUNCTION = delete
#endif
#endif

namespace moodycamel {
namespace details {
#ifndef MOODYCAMEL_ALIGNAS
// VS2013 doesn't support alignas or alignof, and align() requires a constant
// literal
#if defined(_MSC_VER) && _MSC_VER <= 1800
#define MOODYCAMEL_ALIGNAS(alignment) __declspec(align(alignment))
#define MOODYCAMEL_ALIGNOF(obj) __alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \
  typename details::Vs2013Aligned<std::alignment_of<obj>::value, T>::type
template <int Align, typename T>
struct Vs2013Aligned {};  // default, unsupported alignment
template <typename T>
struct Vs2013Aligned<1, T> {
  typedef __declspec(align(1)) T type;
};
template <typename T>
struct Vs2013Aligned<2, T> {
  typedef __declspec(align(2)) T type;
};
template <typename T>
struct Vs2013Aligned<4, T> {
  typedef __declspec(align(4)) T type;
};
template <typename T>
struct Vs2013Aligned<8, T> {
  typedef __declspec(align(8)) T type;
};
template <typename T>
struct Vs2013Aligned<16, T> {
  typedef __declspec(align(16)) T type;
};
template <typename T>
struct Vs2013Aligned<32, T> {
  typedef __declspec(align(32)) T type;
};
template <typename T>
struct Vs2013Aligned<64, T> {
  typedef __declspec(align(64)) T type;
};
template <typename T>
struct Vs2013Aligned<128, T> {
  typedef __declspec(align(128)) T type;
};
template <typename T>
struct Vs2013Aligned<256, T> {
  typedef __declspec(align(256)) T type;
};
#else
template <typename T>
struct identity {
  typedef T type;
};
#define MOODYCAMEL_ALIGNAS(alignment) alignas(alignment)
#define MOODYCAMEL_ALIGNOF(obj) alignof(obj)
#define MOODYCAMEL_ALIGNED_TYPE_LIKE(T, obj) \
  alignas(alignof(obj)) typename details::identity<T>::type
#endif
#endif
}  // namespace details
}  // namespace moodycamel


// TSAN can false report races in lock-free code.  To enable TSAN to be used
// from projects that use this one, we can apply per-function compile-time
// suppression. See
// https://clang.llvm.org/docs/ThreadSanitizer.html#has-feature-thread-sanitizer
#define MOODYCAMEL_NO_TSAN
#if defined(__has_feature)
#if __has_feature(thread_sanitizer)
#undef MOODYCAMEL_NO_TSAN
#define MOODYCAMEL_NO_TSAN __attribute__((no_sanitize("thread")))
#endif  // TSAN
#endif  // TSAN

// Compiler-specific likely/unlikely hints
namespace moodycamel {
namespace details {
#if defined(__GNUC__)
static inline bool(likely)(bool x) {
  return __builtin_expect((x), true);
}
static inline bool(unlikely)(bool x) {
  return __builtin_expect((x), false);
}
#else
static inline bool(likely)(bool x) {
  return x;
}
static inline bool(unlikely)(bool x) {
  return x;
}
#endif
}  // namespace details
}  // namespace moodycamel

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
#include "internal/concurrentqueue_internal_debug.h"
#endif

namespace moodycamel {
namespace details {
template <typename T>
struct const_numeric_max {
  static_assert(std::is_integral<T>::value,
                "const_numeric_max can only be used with integers");
  static const T value =
      std::numeric_limits<T>::is_signed
          ? (static_cast<T>(1) << (sizeof(T) * CHAR_BIT - 1)) -
                static_cast<T>(1)
          : static_cast<T>(-1);
};

#if defined(__GLIBCXX__)
typedef ::max_align_t
    std_max_align_t;  // libstdc++ forgot to add it to std:: for a while
#else
typedef std::max_align_t std_max_align_t;  // Others (e.g. MSVC) insist it can
                                           // *only* be accessed via std::
#endif

// Some platforms have incorrectly set max_align_t to a type with <8 bytes
// alignment even while supporting 8-byte aligned scalar values (*cough* 32-bit
// iOS). Work around this with our own union. See issue #64.
typedef union {
  std_max_align_t x;
  long long y;
  void *z;
} max_align_t;
}  // namespace details

// Default traits for the ConcurrentQueue. To change some of the
// traits without re-implementing all of them, inherit from this
// struct and shadow the declarations you wish to be different;
// since the traits are used as a template type parameter, the
// shadowed declarations will be used where defined, and the defaults
// otherwise.
struct ConcurrentQueueDefaultTraits {
  // General-purpose size type. std::size_t is strongly recommended.
  typedef std::size_t size_t;

  // The type used for the enqueue and dequeue indices. Must be at least as
  // large as size_t. Should be significantly larger than the number of elements
  // you expect to hold at once, especially if you have a high turnover rate;
  // for example, on 32-bit x86, if you expect to have over a hundred million
  // elements or pump several million elements through your queue in a very
  // short space of time, using a 32-bit type *may* trigger a race condition.
  // A 64-bit int type is recommended in that case, and in practice will
  // prevent a race condition no matter the usage of the queue. Note that
  // whether the queue is lock-free with a 64-int type depends on the whether
  // std::atomic<std::uint64_t> is lock-free, which is platform-specific.
  typedef std::size_t index_t;

  // Internally, all elements are enqueued and dequeued from multi-element
  // blocks; this is the smallest controllable unit. If you expect few elements
  // but many producers, a smaller block size should be favoured. For few
  // producers and/or many elements, a larger block size is preferred. A sane
  // default is provided. Must be a power of 2.
  static const size_t BLOCK_SIZE = 32;

  // For explicit producers (i.e. when using a producer token), the block is
  // checked for being empty by iterating through a list of flags, one per
  // element. For large block sizes, this is too inefficient, and switching to
  // an atomic counter-based approach is faster. The switch is made for block
  // sizes strictly larger than this threshold.
  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = 32;

  // How many full blocks can be expected for a single explicit producer? This
  // should reflect that number's maximum for optimal performance. Must be a
  // power of 2.
  static const size_t EXPLICIT_INITIAL_INDEX_SIZE = 32;

  // How many full blocks can be expected for a single implicit producer? This
  // should reflect that number's maximum for optimal performance. Must be a
  // power of 2.
  static const size_t IMPLICIT_INITIAL_INDEX_SIZE = 32;

  // The initial size of the hash table mapping thread IDs to implicit
  // producers. Note that the hash is resized every time it becomes half full.
  // Must be a power of two, and either 0 or at least 1. If 0, implicit
  // production (using the enqueue methods without an explicit producer token)
  // is disabled.
  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = 32;

  // Controls the number of items that an explicit consumer (i.e. one with a
  // token) must consume before it causes all consumers to rotate and move on to
  // the next internal queue.
  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =
      256;

  // The maximum number of elements (inclusive) that can be enqueued to a
  // sub-queue. Enqueue operations that would cause this limit to be surpassed
  // will fail. Note that this limit is enforced at the block level (for
  // performance reasons), i.e. it's rounded up to the nearest block size.
  static const size_t MAX_SUBQUEUE_SIZE =
      details::const_numeric_max<size_t>::value;

  // The number of times to spin before sleeping when waiting on a semaphore.
  // Recommended values are on the order of 1000-10000 unless the number of
  // consumer threads exceeds the number of idle cores (in which case try
  // 0-100). Only affects instances of the BlockingConcurrentQueue.
  static const int MAX_SEMA_SPINS = 10000;

  // Whether to recycle dynamically-allocated blocks into an internal free list
  // or not. If false, only pre-allocated blocks (controlled by the constructor
  // arguments) will be recycled, and all others will be `free`d back to the
  // heap. Note that blocks consumed by explicit producers are only freed on
  // destruction of the queue (not following destruction of the token)
  // regardless of this trait.
  static const bool RECYCLE_ALLOCATED_BLOCKS = false;


#ifndef MCDBGQ_USE_RELACY
  // Memory allocation can be customized if needed.
  // malloc should return nullptr on failure, and handle alignment like
  // std::malloc.
#if defined(malloc) || defined(free)
  // Gah, this is 2015, stop defining macros that break standard code already!
  // Work around malloc/free being special macros:
  static inline void *WORKAROUND_malloc(size_t size) {
    return malloc(size);
  }
  static inline void WORKAROUND_free(void *ptr) {
    return free(ptr);
  }
  static inline void *(malloc)(size_t size) {
    return WORKAROUND_malloc(size);
  }
  static inline void(free)(void *ptr) {
    return WORKAROUND_free(ptr);
  }
#else
  static inline void *malloc(size_t size) {
    return std::malloc(size);
  }
  static inline void free(void *ptr) {
    return std::free(ptr);
  }
#endif
#else
  // Debug versions when running under the Relacy race detector (ignore
  // these in user code)
  static inline void *malloc(size_t size) {
    return rl::rl_malloc(size, $);
  }
  static inline void free(void *ptr) {
    return rl::rl_free(ptr, $);
  }
#endif
};


// When producing or consuming many elements, the most efficient way is to:
//    1) Use one of the bulk-operation methods of the queue with a token
//    2) Failing that, use the bulk-operation methods without a token
//    3) Failing that, create a token and use that with the single-item methods
//    4) Failing that, use the single-parameter methods of the queue
// Having said that, don't create tokens willy-nilly -- ideally there should be
// a maximum of one token per thread (of each kind).
struct ProducerToken;
struct ConsumerToken;

template <typename T, typename Traits>
class ConcurrentQueue;
template <typename T, typename Traits>
class BlockingConcurrentQueue;
class ConcurrentQueueTests;


namespace details {
struct ConcurrentQueueProducerTypelessBase {
  ConcurrentQueueProducerTypelessBase *next;
  std::atomic<bool> inactive;
  ProducerToken *token;

  ConcurrentQueueProducerTypelessBase()
      : next(nullptr), inactive(false), token(nullptr) {}
};

template <bool use32>
struct _hash_32_or_64 {
  static inline std::uint32_t hash(std::uint32_t h) {
    // MurmurHash3 finalizer -- see
    // https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
    // Since the thread ID is already unique, all we really want to do is
    // propagate that uniqueness evenly across all the bits, so that we can use
    // a subset of the bits while reducing collisions significantly
    h ^= h >> 16;
    h *= 0x85ebca6b;
    h ^= h >> 13;
    h *= 0xc2b2ae35;
    return h ^ (h >> 16);
  }
};
template <>
struct _hash_32_or_64<1> {
  static inline std::uint64_t hash(std::uint64_t h) {
    h ^= h >> 33;
    h *= 0xff51afd7ed558ccd;
    h ^= h >> 33;
    h *= 0xc4ceb9fe1a85ec53;
    return h ^ (h >> 33);
  }
};
template <std::size_t size>
struct hash_32_or_64 : public _hash_32_or_64<(size > 4)> {};

static inline size_t hash_thread_id(thread_id_t id) {
  static_assert(
      sizeof(thread_id_t) <= 8,
      "Expected a platform where thread IDs are at most 64-bit values");
  return static_cast<size_t>(
      hash_32_or_64<sizeof(
          thread_id_converter<thread_id_t>::thread_id_hash_t)>::
          hash(thread_id_converter<thread_id_t>::prehash(id)));
}

template <typename T>
static inline bool circular_less_than(T a, T b) {
  static_assert(
      std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,
      "circular_less_than is intended to be used only with unsigned integer "
      "types");
  return static_cast<T>(a - b) >
         static_cast<T>(static_cast<T>(1)
                        << (static_cast<T>(sizeof(T) * CHAR_BIT - 1)));
  // Note: extra parens around rhs of operator<< is MSVC bug:
  // https://developercommunity2.visualstudio.com/t/C4554-triggers-when-both-lhs-and-rhs-is/10034931
  //       silencing the bug requires #pragma warning(disable: 4554) around the
  //       calling code and has no effect when done here.
}

template <typename U>
static inline char *align_for(char *ptr) {
  const std::size_t alignment = std::alignment_of<U>::value;
  return ptr +
         (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) %
             alignment;
}

template <typename T>
static inline T ceil_to_pow_2(T x) {
  static_assert(
      std::is_integral<T>::value && !std::numeric_limits<T>::is_signed,
      "ceil_to_pow_2 is intended to be used only with unsigned integer types");

  // Adapted from
  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
  --x;
  x |= x >> 1;
  x |= x >> 2;
  x |= x >> 4;
  for (std::size_t i = 1; i < sizeof(T); i <<= 1) {
    x |= x >> (i << 3);
  }
  ++x;
  return x;
}

template <typename T>
static inline void swap_relaxed(std::atomic<T> &left, std::atomic<T> &right) {
  T temp = left.load(std::memory_order_relaxed);
  left.store(right.load(std::memory_order_relaxed), std::memory_order_relaxed);
  right.store(temp, std::memory_order_relaxed);
}

template <typename T>
static inline T const &nomove(T const &x) {
  return x;
}

template <bool Enable>
struct nomove_if {
  template <typename T>
  static inline T const &eval(T const &x) {
    return x;
  }
};

template <>
struct nomove_if<false> {
  template <typename U>
  static inline auto eval(U &&x) -> decltype(std::forward<U>(x)) {
    return std::forward<U>(x);
  }
};

template <typename It>
static inline auto deref_noexcept(It &it) MOODYCAMEL_NOEXCEPT -> decltype(*it) {
  return *it;
}

#if defined(__clang__) || !defined(__GNUC__) || __GNUC__ > 4 || \
    (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
template <typename T>
struct is_trivially_destructible : std::is_trivially_destructible<T> {};
#else
template <typename T>
struct is_trivially_destructible : std::has_trivial_destructor<T> {};
#endif

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
#ifdef MCDBGQ_USE_RELACY
typedef RelacyThreadExitListener ThreadExitListener;
typedef RelacyThreadExitNotifier ThreadExitNotifier;
#else
class ThreadExitNotifier;

struct ThreadExitListener {
  typedef void (*callback_t)(void *);
  callback_t callback;
  void *userData;

  ThreadExitListener *next;   // reserved for use by the ThreadExitNotifier
  ThreadExitNotifier *chain;  // reserved for use by the ThreadExitNotifier
};

class ThreadExitNotifier {
 public:
  static void subscribe(ThreadExitListener *listener) {
    auto &tlsInst = instance();
    std::lock_guard<std::mutex> guard(mutex());
    listener->next = tlsInst.tail;
    listener->chain = &tlsInst;
    tlsInst.tail = listener;
  }

  static void unsubscribe(ThreadExitListener *listener) {
    std::lock_guard<std::mutex> guard(mutex());
    if (!listener->chain) {
      return;  // race with ~ThreadExitNotifier
    }
    auto &tlsInst = *listener->chain;
    listener->chain = nullptr;
    ThreadExitListener **prev = &tlsInst.tail;
    for (auto ptr = tlsInst.tail; ptr != nullptr; ptr = ptr->next) {
      if (ptr == listener) {
        *prev = ptr->next;
        break;
      }
      prev = &ptr->next;
    }
  }

 private:
  ThreadExitNotifier() : tail(nullptr) {}
  ThreadExitNotifier(ThreadExitNotifier const &) MOODYCAMEL_DELETE_FUNCTION;
  ThreadExitNotifier &operator=(ThreadExitNotifier const &)
      MOODYCAMEL_DELETE_FUNCTION;

  ~ThreadExitNotifier() {
    // This thread is about to exit, let everyone know!
    assert(this == &instance() &&
           "If this assert fails, you likely have a buggy compiler! Change the "
           "preprocessor conditions such that "
           "MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED is no longer defined.");
    std::lock_guard<std::mutex> guard(mutex());
    for (auto ptr = tail; ptr != nullptr; ptr = ptr->next) {
      ptr->chain = nullptr;
      ptr->callback(ptr->userData);
    }
  }

  // Thread-local
  static inline ThreadExitNotifier &instance() {
    static thread_local ThreadExitNotifier notifier;
    return notifier;
  }

  static inline std::mutex &mutex() {
    // Must be static because the ThreadExitNotifier could be destroyed while
    // unsubscribe is called
    static std::mutex mutex;
    return mutex;
  }

 private:
  ThreadExitListener *tail;
};
#endif
#endif

template <typename T>
struct static_is_lock_free_num {
  enum { value = 0 };
};
template <>
struct static_is_lock_free_num<signed char> {
  enum { value = ATOMIC_CHAR_LOCK_FREE };
};
template <>
struct static_is_lock_free_num<short> {
  enum { value = ATOMIC_SHORT_LOCK_FREE };
};
template <>
struct static_is_lock_free_num<int> {
  enum { value = ATOMIC_INT_LOCK_FREE };
};
template <>
struct static_is_lock_free_num<long> {
  enum { value = ATOMIC_LONG_LOCK_FREE };
};
template <>
struct static_is_lock_free_num<long long> {
  enum { value = ATOMIC_LLONG_LOCK_FREE };
};
template <typename T>
struct static_is_lock_free
    : static_is_lock_free_num<typename std::make_signed<T>::type> {};
template <>
struct static_is_lock_free<bool> {
  enum { value = ATOMIC_BOOL_LOCK_FREE };
};
template <typename U>
struct static_is_lock_free<U *> {
  enum { value = ATOMIC_POINTER_LOCK_FREE };
};
}  // namespace details


struct ProducerToken {
  template <typename T, typename Traits>
  explicit ProducerToken(ConcurrentQueue<T, Traits> &queue);

  template <typename T, typename Traits>
  explicit ProducerToken(BlockingConcurrentQueue<T, Traits> &queue);

  ProducerToken(ProducerToken &&other) MOODYCAMEL_NOEXCEPT
      : producer(other.producer) {
    other.producer = nullptr;
    if (producer != nullptr) {
      producer->token = this;
    }
  }

  inline ProducerToken &operator=(ProducerToken &&other) MOODYCAMEL_NOEXCEPT {
    swap(other);
    return *this;
  }

  void swap(ProducerToken &other) MOODYCAMEL_NOEXCEPT {
    std::swap(producer, other.producer);
    if (producer != nullptr) {
      producer->token = this;
    }
    if (other.producer != nullptr) {
      other.producer->token = &other;
    }
  }

  // A token is always valid unless:
  //     1) Memory allocation failed during construction
  //     2) It was moved via the move constructor
  //        (Note: assignment does a swap, leaving both potentially valid)
  //     3) The associated queue was destroyed
  // Note that if valid() returns true, that only indicates
  // that the token is valid for use with a specific queue,
  // but not which one; that's up to the user to track.
  inline bool valid() const {
    return producer != nullptr;
  }

  ~ProducerToken() {
    if (producer != nullptr) {
      producer->token = nullptr;
      producer->inactive.store(true, std::memory_order_release);
    }
  }

  // Disable copying and assignment
  ProducerToken(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;
  ProducerToken &operator=(ProducerToken const &) MOODYCAMEL_DELETE_FUNCTION;

 private:
  template <typename T, typename Traits>
  friend class ConcurrentQueue;
  friend class ConcurrentQueueTests;

 protected:
  details::ConcurrentQueueProducerTypelessBase *producer;
};


struct ConsumerToken {
  template <typename T, typename Traits>
  explicit ConsumerToken(ConcurrentQueue<T, Traits> &q);

  template <typename T, typename Traits>
  explicit ConsumerToken(BlockingConcurrentQueue<T, Traits> &q);

  ConsumerToken(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT
      : initialOffset(other.initialOffset),
        lastKnownGlobalOffset(other.lastKnownGlobalOffset),
        itemsConsumedFromCurrent(other.itemsConsumedFromCurrent),
        currentProducer(other.currentProducer),
        desiredProducer(other.desiredProducer) {}

  inline ConsumerToken &operator=(ConsumerToken &&other) MOODYCAMEL_NOEXCEPT {
    swap(other);
    return *this;
  }

  void swap(ConsumerToken &other) MOODYCAMEL_NOEXCEPT {
    std::swap(initialOffset, other.initialOffset);
    std::swap(lastKnownGlobalOffset, other.lastKnownGlobalOffset);
    std::swap(itemsConsumedFromCurrent, other.itemsConsumedFromCurrent);
    std::swap(currentProducer, other.currentProducer);
    std::swap(desiredProducer, other.desiredProducer);
  }

  // Disable copying and assignment
  ConsumerToken(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;
  ConsumerToken &operator=(ConsumerToken const &) MOODYCAMEL_DELETE_FUNCTION;

 private:
  template <typename T, typename Traits>
  friend class ConcurrentQueue;
  friend class ConcurrentQueueTests;

 private:  // but shared with ConcurrentQueue
  std::uint32_t initialOffset;
  std::uint32_t lastKnownGlobalOffset;
  std::uint32_t itemsConsumedFromCurrent;
  details::ConcurrentQueueProducerTypelessBase *currentProducer;
  details::ConcurrentQueueProducerTypelessBase *desiredProducer;
};

// Need to forward-declare this swap because it's in a namespace.
// See
// http://stackoverflow.com/questions/4492062/why-does-a-c-friend-class-need-a-forward-declaration-only-in-other-namespaces
template <typename T, typename Traits>
inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,
                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)
    MOODYCAMEL_NOEXCEPT;


template <typename T, typename Traits = ConcurrentQueueDefaultTraits>
class ConcurrentQueue {
 public:
  typedef ::moodycamel::ProducerToken producer_token_t;
  typedef ::moodycamel::ConsumerToken consumer_token_t;

  typedef typename Traits::index_t index_t;
  typedef typename Traits::size_t size_t;

  static const size_t BLOCK_SIZE = static_cast<size_t>(Traits::BLOCK_SIZE);
  static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD =
      static_cast<size_t>(Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD);
  static const size_t EXPLICIT_INITIAL_INDEX_SIZE =
      static_cast<size_t>(Traits::EXPLICIT_INITIAL_INDEX_SIZE);
  static const size_t IMPLICIT_INITIAL_INDEX_SIZE =
      static_cast<size_t>(Traits::IMPLICIT_INITIAL_INDEX_SIZE);
  static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE =
      static_cast<size_t>(Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE);
  static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE =
      static_cast<std::uint32_t>(
          Traits::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE);
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4307)  // + integral constant overflow (that's what
                                 // the ternary expression is for!)
#pragma warning(disable : 4309)  // static_cast: Truncation of constant value
#endif
  static const size_t MAX_SUBQUEUE_SIZE =
      (details::const_numeric_max<size_t>::value -
           static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) <
       BLOCK_SIZE)
          ? details::const_numeric_max<size_t>::value
          : ((static_cast<size_t>(Traits::MAX_SUBQUEUE_SIZE) +
              (BLOCK_SIZE - 1)) /
             BLOCK_SIZE * BLOCK_SIZE);
#ifdef _MSC_VER
#pragma warning(pop)
#endif

  static_assert(!std::numeric_limits<size_t>::is_signed &&
                    std::is_integral<size_t>::value,
                "Traits::size_t must be an unsigned integral type");
  static_assert(!std::numeric_limits<index_t>::is_signed &&
                    std::is_integral<index_t>::value,
                "Traits::index_t must be an unsigned integral type");
  static_assert(sizeof(index_t) >= sizeof(size_t),
                "Traits::index_t must be at least as wide as Traits::size_t");
  static_assert((BLOCK_SIZE > 1) && !(BLOCK_SIZE & (BLOCK_SIZE - 1)),
                "Traits::BLOCK_SIZE must be a power of 2 (and at least 2)");
  static_assert((EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD > 1) &&
                    !(EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD &
                      (EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD - 1)),
                "Traits::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD must be a "
                "power of 2 (and greater than 1)");
  static_assert((EXPLICIT_INITIAL_INDEX_SIZE > 1) &&
                    !(EXPLICIT_INITIAL_INDEX_SIZE &
                      (EXPLICIT_INITIAL_INDEX_SIZE - 1)),
                "Traits::EXPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and "
                "greater than 1)");
  static_assert((IMPLICIT_INITIAL_INDEX_SIZE > 1) &&
                    !(IMPLICIT_INITIAL_INDEX_SIZE &
                      (IMPLICIT_INITIAL_INDEX_SIZE - 1)),
                "Traits::IMPLICIT_INITIAL_INDEX_SIZE must be a power of 2 (and "
                "greater than 1)");
  static_assert(
      (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) ||
          !(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE &
            (INITIAL_IMPLICIT_PRODUCER_HASH_SIZE - 1)),
      "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be a power of 2");
  static_assert(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0 ||
                    INITIAL_IMPLICIT_PRODUCER_HASH_SIZE >= 1,
                "Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE must be at least "
                "1 (or 0 to disable implicit enqueueing)");

 public:
  // Creates a queue with at least `capacity` element slots; note that the
  // actual number of elements that can be inserted without additional memory
  // allocation depends on the number of producers and the block size (e.g. if
  // the block size is equal to `capacity`, only a single block will be
  // allocated up-front, which means only a single producer will be able to
  // enqueue elements without an extra allocation -- blocks aren't shared
  // between producers). This method is not thread safe -- it is up to the user
  // to ensure that the queue is fully constructed before it starts being used
  // by other threads (this includes making the memory effects of construction
  // visible, possibly with a memory barrier).
  explicit ConcurrentQueue(size_t capacity = 32 * BLOCK_SIZE)
      : producerListTail(nullptr),
        producerCount(0),
        initialBlockPoolIndex(0),
        nextExplicitConsumerId(0),
        globalExplicitConsumerOffset(0) {
    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
    populate_initial_implicit_producer_hash();
    populate_initial_block_list(capacity / BLOCK_SIZE +
                                ((capacity & (BLOCK_SIZE - 1)) == 0 ? 0 : 1));

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
    // Track all the producers using a fully-resolved typed list for
    // each kind; this makes it possible to debug them starting from
    // the root queue object (otherwise wacky casts are needed that
    // don't compile in the debugger's expression evaluator).
    explicitProducers.store(nullptr, std::memory_order_relaxed);
    implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
  }

  // Computes the correct amount of pre-allocated blocks for you based
  // on the minimum number of elements you want available at any given
  // time, and the maximum concurrent number of each type of producer.
  ConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers,
                  size_t maxImplicitProducers)
      : producerListTail(nullptr),
        producerCount(0),
        initialBlockPoolIndex(0),
        nextExplicitConsumerId(0),
        globalExplicitConsumerOffset(0) {
    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
    populate_initial_implicit_producer_hash();
    size_t blocks = (((minCapacity + BLOCK_SIZE - 1) / BLOCK_SIZE) - 1) *
                        (maxExplicitProducers + 1) +
                    2 * (maxExplicitProducers + maxImplicitProducers);
    populate_initial_block_list(blocks);

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
    explicitProducers.store(nullptr, std::memory_order_relaxed);
    implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif
  }

  // Note: The queue should not be accessed concurrently while it's
  // being deleted. It's up to the user to synchronize this.
  // This method is not thread safe.
  ~ConcurrentQueue() {
    // Destroy producers
    auto ptr = producerListTail.load(std::memory_order_relaxed);
    while (ptr != nullptr) {
      auto next = ptr->next_prod();
      if (ptr->token != nullptr) {
        ptr->token->producer = nullptr;
      }
      destroy(ptr);
      ptr = next;
    }

    // Destroy implicit producer hash tables
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE != 0) {
      auto hash = implicitProducerHash.load(std::memory_order_relaxed);
      while (hash != nullptr) {
        auto prev = hash->prev;
        if (prev != nullptr) {  // The last hash is part of this object and was
                                // not allocated dynamically
          for (size_t i = 0; i != hash->capacity; ++i) {
            hash->entries[i].~ImplicitProducerKVP();
          }
          hash->~ImplicitProducerHash();
          (Traits::free)(hash);
        }
        hash = prev;
      }
    }

    // Destroy global free list
    auto block = freeList.head_unsafe();
    while (block != nullptr) {
      auto next = block->freeListNext.load(std::memory_order_relaxed);
      if (block->dynamicallyAllocated) {
        destroy(block);
      }
      block = next;
    }

    // Destroy initial free list
    destroy_array(initialBlockPool, initialBlockPoolSize);
  }

  // Disable copying and copy assignment
  ConcurrentQueue(ConcurrentQueue const &) MOODYCAMEL_DELETE_FUNCTION;
  ConcurrentQueue &operator=(ConcurrentQueue const &)
      MOODYCAMEL_DELETE_FUNCTION;

  // Moving is supported, but note that it is *not* a thread-safe operation.
  // Nobody can use the queue while it's being moved, and the memory effects
  // of that move must be propagated to other threads before they can use it.
  // Note: When a queue is moved, its tokens are still valid but can only be
  // used with the destination queue (i.e. semantically they are moved along
  // with the queue itself).
  ConcurrentQueue(ConcurrentQueue &&other) MOODYCAMEL_NOEXCEPT
      : producerListTail(
            other.producerListTail.load(std::memory_order_relaxed)),
        producerCount(other.producerCount.load(std::memory_order_relaxed)),
        initialBlockPoolIndex(
            other.initialBlockPoolIndex.load(std::memory_order_relaxed)),
        initialBlockPool(other.initialBlockPool),
        initialBlockPoolSize(other.initialBlockPoolSize),
        freeList(std::move(other.freeList)),
        nextExplicitConsumerId(
            other.nextExplicitConsumerId.load(std::memory_order_relaxed)),
        globalExplicitConsumerOffset(other.globalExplicitConsumerOffset.load(
            std::memory_order_relaxed)) {
    // Move the other one into this, and leave the other one as an empty queue
    implicitProducerHashResizeInProgress.clear(std::memory_order_relaxed);
    populate_initial_implicit_producer_hash();
    swap_implicit_producer_hashes(other);

    other.producerListTail.store(nullptr, std::memory_order_relaxed);
    other.producerCount.store(0, std::memory_order_relaxed);
    other.nextExplicitConsumerId.store(0, std::memory_order_relaxed);
    other.globalExplicitConsumerOffset.store(0, std::memory_order_relaxed);

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
    explicitProducers.store(
        other.explicitProducers.load(std::memory_order_relaxed),
        std::memory_order_relaxed);
    other.explicitProducers.store(nullptr, std::memory_order_relaxed);
    implicitProducers.store(
        other.implicitProducers.load(std::memory_order_relaxed),
        std::memory_order_relaxed);
    other.implicitProducers.store(nullptr, std::memory_order_relaxed);
#endif

    other.initialBlockPoolIndex.store(0, std::memory_order_relaxed);
    other.initialBlockPoolSize = 0;
    other.initialBlockPool = nullptr;

    reown_producers();
  }

  inline ConcurrentQueue &operator=(ConcurrentQueue &&other)
      MOODYCAMEL_NOEXCEPT {
    return swap_internal(other);
  }

  // Swaps this queue's state with the other's. Not thread-safe.
  // Swapping two queues does not invalidate their tokens, however
  // the tokens that were created for one queue must be used with
  // only the swapped queue (i.e. the tokens are tied to the
  // queue's movable state, not the object itself).
  inline void swap(ConcurrentQueue &other) MOODYCAMEL_NOEXCEPT {
    swap_internal(other);
  }

 private:
  ConcurrentQueue &swap_internal(ConcurrentQueue &other) {
    if (this == &other) {
      return *this;
    }

    details::swap_relaxed(producerListTail, other.producerListTail);
    details::swap_relaxed(producerCount, other.producerCount);
    details::swap_relaxed(initialBlockPoolIndex, other.initialBlockPoolIndex);
    std::swap(initialBlockPool, other.initialBlockPool);
    std::swap(initialBlockPoolSize, other.initialBlockPoolSize);
    freeList.swap(other.freeList);
    details::swap_relaxed(nextExplicitConsumerId, other.nextExplicitConsumerId);
    details::swap_relaxed(globalExplicitConsumerOffset,
                          other.globalExplicitConsumerOffset);

    swap_implicit_producer_hashes(other);

    reown_producers();
    other.reown_producers();

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
    details::swap_relaxed(explicitProducers, other.explicitProducers);
    details::swap_relaxed(implicitProducers, other.implicitProducers);
#endif

    return *this;
  }

 public:
  // Enqueues a single item (by copying it).
  // Allocates memory if required. Only fails if memory allocation fails (or
  // implicit production is disabled because
  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or
  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
  // Thread-safe.
  inline bool enqueue(T const &item) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue<CanAlloc>(item);
  }

  // Enqueues a single item (by moving it, if possible).
  // Allocates memory if required. Only fails if memory allocation fails (or
  // implicit production is disabled because
  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or
  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
  // Thread-safe.
  inline bool enqueue(T &&item) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue<CanAlloc>(std::move(item));
  }

  // Enqueues a single item (by copying it) using an explicit producer token.
  // Allocates memory if required. Only fails if memory allocation fails (or
  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
  // Thread-safe.
  inline bool enqueue(producer_token_t const &token, T const &item) {
    return inner_enqueue<CanAlloc>(token, item);
  }

  // Enqueues a single item (by moving it, if possible) using an explicit
  // producer token. Allocates memory if required. Only fails if memory
  // allocation fails (or Traits::MAX_SUBQUEUE_SIZE has been defined and would
  // be surpassed). Thread-safe.
  inline bool enqueue(producer_token_t const &token, T &&item) {
    return inner_enqueue<CanAlloc>(token, std::move(item));
  }

  // Enqueues several items.
  // Allocates memory if required. Only fails if memory allocation fails (or
  // implicit production is disabled because
  // Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0, or
  // Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed). Note:
  // Use std::make_move_iterator if the elements should be moved instead of
  // copied. Thread-safe.
  template <typename It>
  bool enqueue_bulk(It itemFirst, size_t count) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue_bulk<CanAlloc>(itemFirst, count);
  }

  // Enqueues several items using an explicit producer token.
  // Allocates memory if required. Only fails if memory allocation fails
  // (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
  // Note: Use std::make_move_iterator if the elements should be moved
  // instead of copied.
  // Thread-safe.
  template <typename It>
  bool enqueue_bulk(producer_token_t const &token, It itemFirst, size_t count) {
    return inner_enqueue_bulk<CanAlloc>(token, itemFirst, count);
  }

  // Enqueues a single item (by copying it).
  // Does not allocate memory. Fails if not enough room to enqueue (or implicit
  // production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
  // is 0).
  // Thread-safe.
  inline bool try_enqueue(T const &item) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue<CannotAlloc>(item);
  }

  // Enqueues a single item (by moving it, if possible).
  // Does not allocate memory (except for one-time implicit producer).
  // Fails if not enough room to enqueue (or implicit production is
  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
  // Thread-safe.
  inline bool try_enqueue(T &&item) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue<CannotAlloc>(std::move(item));
  }

  // Enqueues a single item (by copying it) using an explicit producer token.
  // Does not allocate memory. Fails if not enough room to enqueue.
  // Thread-safe.
  inline bool try_enqueue(producer_token_t const &token, T const &item) {
    return inner_enqueue<CannotAlloc>(token, item);
  }

  // Enqueues a single item (by moving it, if possible) using an explicit
  // producer token. Does not allocate memory. Fails if not enough room to
  // enqueue. Thread-safe.
  inline bool try_enqueue(producer_token_t const &token, T &&item) {
    return inner_enqueue<CannotAlloc>(token, std::move(item));
  }

  // Enqueues several items.
  // Does not allocate memory (except for one-time implicit producer).
  // Fails if not enough room to enqueue (or implicit production is
  // disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
  // Note: Use std::make_move_iterator if the elements should be moved
  // instead of copied.
  // Thread-safe.
  template <typename It>
  bool try_enqueue_bulk(It itemFirst, size_t count) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0)
    return false;
    else return inner_enqueue_bulk<CannotAlloc>(itemFirst, count);
  }

  // Enqueues several items using an explicit producer token.
  // Does not allocate memory. Fails if not enough room to enqueue.
  // Note: Use std::make_move_iterator if the elements should be moved
  // instead of copied.
  // Thread-safe.
  template <typename It>
  bool try_enqueue_bulk(producer_token_t const &token, It itemFirst,
                        size_t count) {
    return inner_enqueue_bulk<CannotAlloc>(token, itemFirst, count);
  }


  // Attempts to dequeue from the queue.
  // Returns false if all producer streams appeared empty at the time they
  // were checked (so, the queue is likely but not guaranteed to be empty).
  // Never allocates. Thread-safe.
  template <typename U>
  bool try_dequeue(U &item) {
    // Instead of simply trying each producer in turn (which could cause
    // needless contention on the first producer), we score them heuristically.
    size_t nonEmptyCount = 0;
    ProducerBase *best = nullptr;
    size_t bestSize = 0;
    for (auto ptr = producerListTail.load(std::memory_order_acquire);
         nonEmptyCount < 3 && ptr != nullptr; ptr = ptr->next_prod()) {
      auto size = ptr->size_approx();
      if (size > 0) {
        if (size > bestSize) {
          bestSize = size;
          best = ptr;
        }
        ++nonEmptyCount;
      }
    }

    // If there was at least one non-empty queue but it appears empty at the
    // time we try to dequeue from it, we need to make sure every queue's been
    // tried
    if (nonEmptyCount > 0) {
      if ((details::likely)(best->dequeue(item))) {
        return true;
      }
      for (auto ptr = producerListTail.load(std::memory_order_acquire);
           ptr != nullptr; ptr = ptr->next_prod()) {
        if (ptr != best && ptr->dequeue(item)) {
          return true;
        }
      }
    }
    return false;
  }

  // Attempts to dequeue from the queue.
  // Returns false if all producer streams appeared empty at the time they
  // were checked (so, the queue is likely but not guaranteed to be empty).
  // This differs from the try_dequeue(item) method in that this one does
  // not attempt to reduce contention by interleaving the order that producer
  // streams are dequeued from. So, using this method can reduce overall
  // throughput under contention, but will give more predictable results in
  // single-threaded consumer scenarios. This is mostly only useful for internal
  // unit tests. Never allocates. Thread-safe.
  template <typename U>
  bool try_dequeue_non_interleaved(U &item) {
    for (auto ptr = producerListTail.load(std::memory_order_acquire);
         ptr != nullptr; ptr = ptr->next_prod()) {
      if (ptr->dequeue(item)) {
        return true;
      }
    }
    return false;
  }

  // Attempts to dequeue from the queue using an explicit consumer token.
  // Returns false if all producer streams appeared empty at the time they
  // were checked (so, the queue is likely but not guaranteed to be empty).
  // Never allocates. Thread-safe.
  template <typename U>
  bool try_dequeue(consumer_token_t &token, U &item) {
    // The idea is roughly as follows:
    // Every 256 items from one producer, make everyone rotate (increase the
    // global offset) -> this means the highest efficiency consumer dictates the
    // rotation speed of everyone else, more or less If you see that the global
    // offset has changed, you must reset your consumption counter and move to
    // your designated place If there's no items where you're supposed to be,
    // keep moving until you find a producer with some items If the global
    // offset has not changed but you've run out of items to consume, move over
    // from your current position until you find an producer with something in
    // it

    if (token.desiredProducer == nullptr ||
        token.lastKnownGlobalOffset !=
            globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
      if (!update_current_producer_after_rotation(token)) {
        return false;
      }
    }

    // If there was at least one non-empty queue but it appears empty at the
    // time we try to dequeue from it, we need to make sure every queue's been
    // tried
    if (static_cast<ProducerBase *>(token.currentProducer)->dequeue(item)) {
      if (++token.itemsConsumedFromCurrent ==
          EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
        globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
      }
      return true;
    }

    auto tail = producerListTail.load(std::memory_order_acquire);
    auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();
    if (ptr == nullptr) {
      ptr = tail;
    }
    while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {
      if (ptr->dequeue(item)) {
        token.currentProducer = ptr;
        token.itemsConsumedFromCurrent = 1;
        return true;
      }
      ptr = ptr->next_prod();
      if (ptr == nullptr) {
        ptr = tail;
      }
    }
    return false;
  }

  // Attempts to dequeue several elements from the queue.
  // Returns the number of items actually dequeued.
  // Returns 0 if all producer streams appeared empty at the time they
  // were checked (so, the queue is likely but not guaranteed to be empty).
  // Never allocates. Thread-safe.
  template <typename It>
  size_t try_dequeue_bulk(It itemFirst, size_t max) {
    size_t count = 0;
    for (auto ptr = producerListTail.load(std::memory_order_acquire);
         ptr != nullptr; ptr = ptr->next_prod()) {
      count += ptr->dequeue_bulk(itemFirst, max - count);
      if (count == max) {
        break;
      }
    }
    return count;
  }

  // Attempts to dequeue several elements from the queue using an explicit
  // consumer token. Returns the number of items actually dequeued. Returns 0 if
  // all producer streams appeared empty at the time they were checked (so, the
  // queue is likely but not guaranteed to be empty). Never allocates.
  // Thread-safe.
  template <typename It>
  size_t try_dequeue_bulk(consumer_token_t &token, It itemFirst, size_t max) {
    if (token.desiredProducer == nullptr ||
        token.lastKnownGlobalOffset !=
            globalExplicitConsumerOffset.load(std::memory_order_relaxed)) {
      if (!update_current_producer_after_rotation(token)) {
        return 0;
      }
    }

    size_t count = static_cast<ProducerBase *>(token.currentProducer)
                       ->dequeue_bulk(itemFirst, max);
    if (count == max) {
      if ((token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(max)) >=
          EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE) {
        globalExplicitConsumerOffset.fetch_add(1, std::memory_order_relaxed);
      }
      return max;
    }
    token.itemsConsumedFromCurrent += static_cast<std::uint32_t>(count);
    max -= count;

    auto tail = producerListTail.load(std::memory_order_acquire);
    auto ptr = static_cast<ProducerBase *>(token.currentProducer)->next_prod();
    if (ptr == nullptr) {
      ptr = tail;
    }
    while (ptr != static_cast<ProducerBase *>(token.currentProducer)) {
      auto dequeued = ptr->dequeue_bulk(itemFirst, max);
      count += dequeued;
      if (dequeued != 0) {
        token.currentProducer = ptr;
        token.itemsConsumedFromCurrent = static_cast<std::uint32_t>(dequeued);
      }
      if (dequeued == max) {
        break;
      }
      max -= dequeued;
      ptr = ptr->next_prod();
      if (ptr == nullptr) {
        ptr = tail;
      }
    }
    return count;
  }


  // Attempts to dequeue from a specific producer's inner queue.
  // If you happen to know which producer you want to dequeue from, this
  // is significantly faster than using the general-case try_dequeue methods.
  // Returns false if the producer's queue appeared empty at the time it
  // was checked (so, the queue is likely but not guaranteed to be empty).
  // Never allocates. Thread-safe.
  template <typename U>
  inline bool try_dequeue_from_producer(producer_token_t const &producer,
                                        U &item) {
    return static_cast<ExplicitProducer *>(producer.producer)->dequeue(item);
  }

  // Attempts to dequeue several elements from a specific producer's inner
  // queue. Returns the number of items actually dequeued. If you happen to know
  // which producer you want to dequeue from, this is significantly faster than
  // using the general-case try_dequeue methods. Returns 0 if the producer's
  // queue appeared empty at the time it was checked (so, the queue is likely
  // but not guaranteed to be empty). Never allocates. Thread-safe.
  template <typename It>
  inline size_t try_dequeue_bulk_from_producer(producer_token_t const &producer,
                                               It itemFirst, size_t max) {
    return static_cast<ExplicitProducer *>(producer.producer)
        ->dequeue_bulk(itemFirst, max);
  }


  // Returns an estimate of the total number of elements currently in the queue.
  // This estimate is only accurate if the queue has completely stabilized
  // before it is called (i.e. all enqueue and dequeue operations have completed
  // and their memory effects are visible on the calling thread, and no further
  // operations start while this method is being called). Thread-safe.
  size_t size_approx() const {
    size_t size = 0;
    for (auto ptr = producerListTail.load(std::memory_order_acquire);
         ptr != nullptr; ptr = ptr->next_prod()) {
      size += ptr->size_approx();
    }
    return size;
  }


  // Returns true if the underlying atomic variables used by
  // the queue are lock-free (they should be on most platforms).
  // Thread-safe.
  static constexpr bool is_lock_free() {
    return details::static_is_lock_free<bool>::value == 2 &&
           details::static_is_lock_free<size_t>::value == 2 &&
           details::static_is_lock_free<std::uint32_t>::value == 2 &&
           details::static_is_lock_free<index_t>::value == 2 &&
           details::static_is_lock_free<void *>::value == 2 &&
           details::static_is_lock_free<typename details::thread_id_converter<
               details::thread_id_t>::thread_id_numeric_size_t>::value == 2;
  }


 private:
  friend struct ProducerToken;
  friend struct ConsumerToken;
  struct ExplicitProducer;
  friend struct ExplicitProducer;
  struct ImplicitProducer;
  friend struct ImplicitProducer;
  friend class ConcurrentQueueTests;

  enum AllocationMode { CanAlloc, CannotAlloc };


  ///////////////////////////////
  // Queue methods
  ///////////////////////////////

  template <AllocationMode canAlloc, typename U>
  inline bool inner_enqueue(producer_token_t const &token, U &&element) {
    return static_cast<ExplicitProducer *>(token.producer)
        ->ConcurrentQueue::ExplicitProducer::template enqueue<canAlloc>(
            std::forward<U>(element));
  }

  template <AllocationMode canAlloc, typename U>
  inline bool inner_enqueue(U &&element) {
    auto producer = get_or_add_implicit_producer();
    return producer == nullptr
               ? false
               : producer->ConcurrentQueue::ImplicitProducer::template enqueue<
                     canAlloc>(std::forward<U>(element));
  }

  template <AllocationMode canAlloc, typename It>
  inline bool inner_enqueue_bulk(producer_token_t const &token, It itemFirst,
                                 size_t count) {
    return static_cast<ExplicitProducer *>(token.producer)
        ->ConcurrentQueue::ExplicitProducer::template enqueue_bulk<canAlloc>(
            itemFirst, count);
  }

  template <AllocationMode canAlloc, typename It>
  inline bool inner_enqueue_bulk(It itemFirst, size_t count) {
    auto producer = get_or_add_implicit_producer();
    return producer == nullptr
               ? false
               : producer->ConcurrentQueue::ImplicitProducer::
                     template enqueue_bulk<canAlloc>(itemFirst, count);
  }

  inline bool update_current_producer_after_rotation(consumer_token_t &token) {
    // Ah, there's been a rotation, figure out where we should be!
    auto tail = producerListTail.load(std::memory_order_acquire);
    if (token.desiredProducer == nullptr && tail == nullptr) {
      return false;
    }
    auto prodCount = producerCount.load(std::memory_order_relaxed);
    auto globalOffset =
        globalExplicitConsumerOffset.load(std::memory_order_relaxed);
    if ((details::unlikely)(token.desiredProducer == nullptr)) {
      // Aha, first time we're dequeueing anything.
      // Figure out our local position
      // Note: offset is from start, not end, but we're traversing from end --
      // subtract from count first
      std::uint32_t offset = prodCount - 1 - (token.initialOffset % prodCount);
      token.desiredProducer = tail;
      for (std::uint32_t i = 0; i != offset; ++i) {
        token.desiredProducer =
            static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
        if (token.desiredProducer == nullptr) {
          token.desiredProducer = tail;
        }
      }
    }

    std::uint32_t delta = globalOffset - token.lastKnownGlobalOffset;
    if (delta >= prodCount) {
      delta = delta % prodCount;
    }
    for (std::uint32_t i = 0; i != delta; ++i) {
      token.desiredProducer =
          static_cast<ProducerBase *>(token.desiredProducer)->next_prod();
      if (token.desiredProducer == nullptr) {
        token.desiredProducer = tail;
      }
    }

    token.lastKnownGlobalOffset = globalOffset;
    token.currentProducer = token.desiredProducer;
    token.itemsConsumedFromCurrent = 0;
    return true;
  }


  ///////////////////////////
  // Free list
  ///////////////////////////

  template <typename N>
  struct FreeListNode {
    FreeListNode() : freeListRefs(0), freeListNext(nullptr) {}

    std::atomic<std::uint32_t> freeListRefs;
    std::atomic<N *> freeListNext;
  };

  // A simple CAS-based lock-free free list. Not the fastest thing in the world
  // under heavy contention, but simple and correct (assuming nodes are never
  // freed until after the free list is destroyed), and fairly speedy under low
  // contention.
  template <typename N>  // N must inherit FreeListNode or have the same fields
                         // (and initialization of them)
  struct FreeList {
    FreeList() : freeListHead(nullptr) {}
    FreeList(FreeList &&other)
        : freeListHead(other.freeListHead.load(std::memory_order_relaxed)) {
      other.freeListHead.store(nullptr, std::memory_order_relaxed);
    }
    void swap(FreeList &other) {
      details::swap_relaxed(freeListHead, other.freeListHead);
    }

    FreeList(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;
    FreeList &operator=(FreeList const &) MOODYCAMEL_DELETE_FUNCTION;

    inline void add(N *node) {
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
      debug::DebugLock lock(mutex);
#endif
      // We know that the should-be-on-freelist bit is 0 at this point, so it's
      // safe to set it using a fetch_add
      if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST,
                                       std::memory_order_acq_rel) == 0) {
        // Oh look! We were the last ones referencing this node, and we know
        // we want to add it to the free list, so let's do it!
        add_knowing_refcount_is_zero(node);
      }
    }

    inline N *try_get() {
#ifdef MCDBGQ_NOLOCKFREE_FREELIST
      debug::DebugLock lock(mutex);
#endif
      auto head = freeListHead.load(std::memory_order_acquire);
      while (head != nullptr) {
        auto prevHead = head;
        auto refs = head->freeListRefs.load(std::memory_order_relaxed);
        if ((refs & REFS_MASK) == 0 ||
            !head->freeListRefs.compare_exchange_strong(
                refs, refs + 1, std::memory_order_acquire)) {
          head = freeListHead.load(std::memory_order_acquire);
          continue;
        }

        // Good, reference count has been incremented (it wasn't at zero), which
        // means we can read the next and not worry about it changing between
        // now and the time we do the CAS
        auto next = head->freeListNext.load(std::memory_order_relaxed);
        if (freeListHead.compare_exchange_strong(head, next,
                                                 std::memory_order_acquire,
                                                 std::memory_order_relaxed)) {
          // Yay, got the node. This means it was on the list, which means
          // shouldBeOnFreeList must be false no matter the refcount (because
          // nobody else knows it's been taken off yet, it can't have been put
          // back on).
          assert((head->freeListRefs.load(std::memory_order_relaxed) &
                  SHOULD_BE_ON_FREELIST) == 0);

          // Decrease refcount twice, once for our ref, and once for the list's
          // ref
          head->freeListRefs.fetch_sub(2, std::memory_order_release);
          return head;
        }

        // OK, the head must have changed on us, but we still need to decrease
        // the refcount we increased. Note that we don't need to release any
        // memory effects, but we do need to ensure that the reference count
        // decrement happens-after the CAS on the head.
        refs = prevHead->freeListRefs.fetch_sub(1, std::memory_order_acq_rel);
        if (refs == SHOULD_BE_ON_FREELIST + 1) {
          add_knowing_refcount_is_zero(prevHead);
        }
      }

      return nullptr;
    }

    // Useful for traversing the list when there's no contention (e.g. to
    // destroy remaining nodes)
    N *head_unsafe() const {
      return freeListHead.load(std::memory_order_relaxed);
    }

   private:
    inline void add_knowing_refcount_is_zero(N *node) {
      // Since the refcount is zero, and nobody can increase it once it's zero
      // (except us, and we run only one copy of this method per node at a time,
      // i.e. the single thread case), then we know we can safely change the
      // next pointer of the node; however, once the refcount is back above
      // zero, then other threads could increase it (happens under heavy
      // contention, when the refcount goes to zero in between a load and a
      // refcount increment of a node in try_get, then back up to something
      // non-zero, then the refcount increment is done by the other thread) --
      // so, if the CAS to add the node to the actual list fails, decrease the
      // refcount and leave the add operation to the next thread who puts the
      // refcount back at zero (which could be us, hence the loop).
      auto head = freeListHead.load(std::memory_order_relaxed);
      while (true) {
        node->freeListNext.store(head, std::memory_order_relaxed);
        node->freeListRefs.store(1, std::memory_order_release);
        if (!freeListHead.compare_exchange_strong(head, node,
                                                  std::memory_order_release,
                                                  std::memory_order_relaxed)) {
          // Hmm, the add failed, but we can only try again when the refcount
          // goes back to zero
          if (node->freeListRefs.fetch_add(SHOULD_BE_ON_FREELIST - 1,
                                           std::memory_order_acq_rel) == 1) {
            continue;
          }
        }
        return;
      }
    }

   private:
    // Implemented like a stack, but where node order doesn't matter (nodes are
    // inserted out of order under contention)
    std::atomic<N *> freeListHead;

    static const std::uint32_t REFS_MASK = 0x7FFFFFFF;
    static const std::uint32_t SHOULD_BE_ON_FREELIST = 0x80000000;

#ifdef MCDBGQ_NOLOCKFREE_FREELIST
    debug::DebugMutex mutex;
#endif
  };


  ///////////////////////////
  // Block
  ///////////////////////////

  enum InnerQueueContext { implicit_context = 0, explicit_context = 1 };

  struct Block {
    Block()
        : next(nullptr),
          elementsCompletelyDequeued(0),
          freeListRefs(0),
          freeListNext(nullptr),
          dynamicallyAllocated(true) {
#ifdef MCDBGQ_TRACKMEM
      owner = nullptr;
#endif
    }

    template <InnerQueueContext context>
    inline bool is_empty() const {
      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&
                              BLOCK_SIZE <=
                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
        // Check flags
        for (size_t i = 0; i < BLOCK_SIZE; ++i) {
          if (!emptyFlags[i].load(std::memory_order_relaxed)) {
            return false;
          }
        }

        // Aha, empty; make sure we have all other memory effects that happened
        // before the empty flags were set
        std::atomic_thread_fence(std::memory_order_acquire);
        return true;
      }
      else {
        // Check counter
        if (elementsCompletelyDequeued.load(std::memory_order_relaxed) ==
            BLOCK_SIZE) {
          std::atomic_thread_fence(std::memory_order_acquire);
          return true;
        }
        assert(elementsCompletelyDequeued.load(std::memory_order_relaxed) <=
               BLOCK_SIZE);
        return false;
      }
    }

    // Returns true if the block is now empty (does not apply in explicit
    // context)
    template <InnerQueueContext context>
    inline bool set_empty(MOODYCAMEL_MAYBE_UNUSED index_t i) {
      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&
                              BLOCK_SIZE <=
                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
        // Set flag
        assert(!emptyFlags[BLOCK_SIZE - 1 -
                           static_cast<size_t>(
                               i & static_cast<index_t>(BLOCK_SIZE - 1))]
                    .load(std::memory_order_relaxed));
        emptyFlags[BLOCK_SIZE - 1 -
                   static_cast<size_t>(i &
                                       static_cast<index_t>(BLOCK_SIZE - 1))]
            .store(true, std::memory_order_release);
        return false;
      }
      else {
        // Increment counter
        auto prevVal =
            elementsCompletelyDequeued.fetch_add(1, std::memory_order_acq_rel);
        assert(prevVal < BLOCK_SIZE);
        return prevVal == BLOCK_SIZE - 1;
      }
    }

    // Sets multiple contiguous item statuses to 'empty' (assumes no wrapping
    // and count > 0). Returns true if the block is now empty (does not apply in
    // explicit context).
    template <InnerQueueContext context>
    inline bool set_many_empty(MOODYCAMEL_MAYBE_UNUSED index_t i,
                               size_t count) {
      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&
                              BLOCK_SIZE <=
                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
        // Set flags
        std::atomic_thread_fence(std::memory_order_release);
        i = BLOCK_SIZE - 1 -
            static_cast<size_t>(i & static_cast<index_t>(BLOCK_SIZE - 1)) -
            count + 1;
        for (size_t j = 0; j != count; ++j) {
          assert(!emptyFlags[i + j].load(std::memory_order_relaxed));
          emptyFlags[i + j].store(true, std::memory_order_relaxed);
        }
        return false;
      }
      else {
        // Increment counter
        auto prevVal = elementsCompletelyDequeued.fetch_add(
            count, std::memory_order_acq_rel);
        assert(prevVal + count <= BLOCK_SIZE);
        return prevVal + count == BLOCK_SIZE;
      }
    }

    template <InnerQueueContext context>
    inline void set_all_empty() {
      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&
                              BLOCK_SIZE <=
                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
        // Set all flags
        for (size_t i = 0; i != BLOCK_SIZE; ++i) {
          emptyFlags[i].store(true, std::memory_order_relaxed);
        }
      }
      else {
        // Reset counter
        elementsCompletelyDequeued.store(BLOCK_SIZE, std::memory_order_relaxed);
      }
    }

    template <InnerQueueContext context>
    inline void reset_empty() {
      MOODYCAMEL_CONSTEXPR_IF(context == explicit_context &&
                              BLOCK_SIZE <=
                                  EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD) {
        // Reset flags
        for (size_t i = 0; i != BLOCK_SIZE; ++i) {
          emptyFlags[i].store(false, std::memory_order_relaxed);
        }
      }
      else {
        // Reset counter
        elementsCompletelyDequeued.store(0, std::memory_order_relaxed);
      }
    }

    inline T *operator[](index_t idx) MOODYCAMEL_NOEXCEPT {
      return static_cast<T *>(static_cast<void *>(elements)) +
             static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));
    }
    inline T const *operator[](index_t idx) const MOODYCAMEL_NOEXCEPT {
      return static_cast<T const *>(static_cast<void const *>(elements)) +
             static_cast<size_t>(idx & static_cast<index_t>(BLOCK_SIZE - 1));
    }

   private:
    static_assert(std::alignment_of<T>::value <= sizeof(T),
                  "The queue does not support types with an alignment greater "
                  "than their size at this time");
    MOODYCAMEL_ALIGNED_TYPE_LIKE(char[sizeof(T) * BLOCK_SIZE], T) elements;

   public:
    Block *next;
    std::atomic<size_t> elementsCompletelyDequeued;
    std::atomic<bool> emptyFlags
        [BLOCK_SIZE <= EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD ? BLOCK_SIZE : 1];

   public:
    std::atomic<std::uint32_t> freeListRefs;
    std::atomic<Block *> freeListNext;
    bool dynamicallyAllocated;  // Perhaps a better name for this would be
                                // 'isNotPartOfInitialBlockPool'

#ifdef MCDBGQ_TRACKMEM
    void *owner;
#endif
  };
  static_assert(std::alignment_of<Block>::value >= std::alignment_of<T>::value,
                "Internal error: Blocks must be at least as aligned as the "
                "type they are wrapping");


#ifdef MCDBGQ_TRACKMEM
 public:
  struct MemStats;

 private:
#endif

  ///////////////////////////
  // Producer base
  ///////////////////////////

  struct ProducerBase : public details::ConcurrentQueueProducerTypelessBase {
    ProducerBase(ConcurrentQueue *parent_, bool isExplicit_)
        : tailIndex(0),
          headIndex(0),
          dequeueOptimisticCount(0),
          dequeueOvercommit(0),
          tailBlock(nullptr),
          isExplicit(isExplicit_),
          parent(parent_) {}

    virtual ~ProducerBase() {}

    template <typename U>
    inline bool dequeue(U &element) {
      if (isExplicit) {
        return static_cast<ExplicitProducer *>(this)->dequeue(element);
      } else {
        return static_cast<ImplicitProducer *>(this)->dequeue(element);
      }
    }

    template <typename It>
    inline size_t dequeue_bulk(It &itemFirst, size_t max) {
      if (isExplicit) {
        return static_cast<ExplicitProducer *>(this)->dequeue_bulk(itemFirst,
                                                                   max);
      } else {
        return static_cast<ImplicitProducer *>(this)->dequeue_bulk(itemFirst,
                                                                   max);
      }
    }

    inline ProducerBase *next_prod() const {
      return static_cast<ProducerBase *>(next);
    }

    inline size_t size_approx() const {
      auto tail = tailIndex.load(std::memory_order_relaxed);
      auto head = headIndex.load(std::memory_order_relaxed);
      return details::circular_less_than(head, tail)
                 ? static_cast<size_t>(tail - head)
                 : 0;
    }

    inline index_t getTail() const {
      return tailIndex.load(std::memory_order_relaxed);
    }

   protected:
    std::atomic<index_t> tailIndex;  // Where to enqueue to next
    std::atomic<index_t> headIndex;  // Where to dequeue from next

    std::atomic<index_t> dequeueOptimisticCount;
    std::atomic<index_t> dequeueOvercommit;

    Block *tailBlock;

   public:
    bool isExplicit;
    ConcurrentQueue *parent;

   protected:
#ifdef MCDBGQ_TRACKMEM
    friend struct MemStats;
#endif
  };


  ///////////////////////////
  // Explicit queue
  ///////////////////////////

  struct ExplicitProducer : public ProducerBase {
    explicit ExplicitProducer(ConcurrentQueue *parent_)
        : ProducerBase(parent_, true),
          blockIndex(nullptr),
          pr_blockIndexSlotsUsed(0),
          pr_blockIndexSize(EXPLICIT_INITIAL_INDEX_SIZE >> 1),
          pr_blockIndexFront(0),
          pr_blockIndexEntries(nullptr),
          pr_blockIndexRaw(nullptr) {
      size_t poolBasedIndexSize =
          details::ceil_to_pow_2(parent_->initialBlockPoolSize) >> 1;
      if (poolBasedIndexSize > pr_blockIndexSize) {
        pr_blockIndexSize = poolBasedIndexSize;
      }

      new_block_index(0);  // This creates an index with double the number of
                           // current entries, i.e. EXPLICIT_INITIAL_INDEX_SIZE
    }

    ~ExplicitProducer() {
      // Destruct any elements not yet dequeued.
      // Since we're in the destructor, we can assume all elements
      // are either completely dequeued or completely not (no halfways).
      if (this->tailBlock !=
          nullptr) {  // Note this means there must be a block index too
        // First find the block that's partially dequeued, if any
        Block *halfDequeuedBlock = nullptr;
        if ((this->headIndex.load(std::memory_order_relaxed) &
             static_cast<index_t>(BLOCK_SIZE - 1)) != 0) {
          // The head's not on a block boundary, meaning a block somewhere is
          // partially dequeued (or the head block is the tail block and was
          // fully dequeued, but the head/tail are still not on a boundary)
          size_t i = (pr_blockIndexFront - pr_blockIndexSlotsUsed) &
                     (pr_blockIndexSize - 1);
          while (details::circular_less_than<index_t>(
              pr_blockIndexEntries[i].base + BLOCK_SIZE,
              this->headIndex.load(std::memory_order_relaxed))) {
            i = (i + 1) & (pr_blockIndexSize - 1);
          }
          assert(details::circular_less_than<index_t>(
              pr_blockIndexEntries[i].base,
              this->headIndex.load(std::memory_order_relaxed)));
          halfDequeuedBlock = pr_blockIndexEntries[i].block;
        }

        // Start at the head block (note the first line in the loop gives us the
        // head from the tail on the first iteration)
        auto block = this->tailBlock;
        do {
          block = block->next;
          if (block->ConcurrentQueue::Block::template is_empty<
                  explicit_context>()) {
            continue;
          }

          size_t i = 0;  // Offset into block
          if (block == halfDequeuedBlock) {
            i = static_cast<size_t>(
                this->headIndex.load(std::memory_order_relaxed) &
                static_cast<index_t>(BLOCK_SIZE - 1));
          }

          // Walk through all the items in the block; if this is the tail block,
          // we need to stop when we reach the tail index
          auto lastValidIndex =
              (this->tailIndex.load(std::memory_order_relaxed) &
               static_cast<index_t>(BLOCK_SIZE - 1)) == 0
                  ? BLOCK_SIZE
                  : static_cast<size_t>(
                        this->tailIndex.load(std::memory_order_relaxed) &
                        static_cast<index_t>(BLOCK_SIZE - 1));
          while (i != BLOCK_SIZE &&
                 (block != this->tailBlock || i != lastValidIndex)) {
            (*block)[i++]->~T();
          }
        } while (block != this->tailBlock);
      }

      // Destroy all blocks that we own
      if (this->tailBlock != nullptr) {
        auto block = this->tailBlock;
        do {
          auto nextBlock = block->next;
          this->parent->add_block_to_free_list(block);
          block = nextBlock;
        } while (block != this->tailBlock);
      }

      // Destroy the block indices
      auto header = static_cast<BlockIndexHeader *>(pr_blockIndexRaw);
      while (header != nullptr) {
        auto prev = static_cast<BlockIndexHeader *>(header->prev);
        header->~BlockIndexHeader();
        (Traits::free)(header);
        header = prev;
      }
    }

    template <AllocationMode allocMode, typename U>
    inline bool enqueue(U &&element) {
      index_t currentTailIndex =
          this->tailIndex.load(std::memory_order_relaxed);
      index_t newTailIndex = 1 + currentTailIndex;
      if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
        // We reached the end of a block, start a new one
        auto startBlock = this->tailBlock;
        auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;
        if (this->tailBlock != nullptr &&
            this->tailBlock->next->ConcurrentQueue::Block::template is_empty<
                explicit_context>()) {
          // We can re-use the block ahead of us, it's empty!
          this->tailBlock = this->tailBlock->next;
          this->tailBlock->ConcurrentQueue::Block::template reset_empty<
              explicit_context>();

          // We'll put the block on the block index (guaranteed to be room since
          // we're conceptually removing the last block from it first -- except
          // instead of removing then adding, we can just overwrite). Note that
          // there must be a valid block index here, since even if allocation
          // failed in the ctor, it would have been re-attempted when adding the
          // first block to the queue; since there is such a block, a block
          // index must have been successfully allocated.
        } else {
          // Whatever head value we see here is >= the last value we saw here
          // (relatively), and <= its current value. Since we have the most
          // recent tail, the head must be
          // <= to it.
          auto head = this->headIndex.load(std::memory_order_relaxed);
          assert(!details::circular_less_than<index_t>(currentTailIndex, head));
          if (!details::circular_less_than<index_t>(
                  head, currentTailIndex + BLOCK_SIZE) ||
              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
               (MAX_SUBQUEUE_SIZE == 0 ||
                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
            // We can't enqueue in another block because there's not enough
            // leeway -- the tail could surpass the head by the time the block
            // fills up! (Or we'll exceed the size limit, if the second part of
            // the condition was true.)
            return false;
          }
          // We're going to need a new block; check that the block index has
          // room
          if (pr_blockIndexRaw == nullptr ||
              pr_blockIndexSlotsUsed == pr_blockIndexSize) {
            // Hmm, the circular block index is already full -- we'll need
            // to allocate a new index. Note pr_blockIndexRaw can only be
            // nullptr if the initial allocation failed in the constructor.

            MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {
              return false;
            }
            else if (!new_block_index(pr_blockIndexSlotsUsed)) {
              return false;
            }
          }

          // Insert a new block in the circular linked list
          auto newBlock =
              this->parent
                  ->ConcurrentQueue::template requisition_block<allocMode>();
          if (newBlock == nullptr) {
            return false;
          }
#ifdef MCDBGQ_TRACKMEM
          newBlock->owner = this;
#endif
          newBlock->ConcurrentQueue::Block::template reset_empty<
              explicit_context>();
          if (this->tailBlock == nullptr) {
            newBlock->next = newBlock;
          } else {
            newBlock->next = this->tailBlock->next;
            this->tailBlock->next = newBlock;
          }
          this->tailBlock = newBlock;
          ++pr_blockIndexSlotsUsed;
        }

        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(
            T, U,
            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {
          // The constructor may throw. We want the element not to appear in the
          // queue in that case (without corrupting the queue):
          MOODYCAMEL_TRY {
            new ((*this->tailBlock)[currentTailIndex])
                T(std::forward<U>(element));
          }
          MOODYCAMEL_CATCH(...) {
            // Revert change to the current block, but leave the new block
            // available for next time
            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
            this->tailBlock =
                startBlock == nullptr ? this->tailBlock : startBlock;
            MOODYCAMEL_RETHROW;
          }
        }
        else {
          (void)startBlock;
          (void)originalBlockIndexSlotsUsed;
        }

        // Add block to block index
        auto &entry = blockIndex.load(std::memory_order_relaxed)
                          ->entries[pr_blockIndexFront];
        entry.base = currentTailIndex;
        entry.block = this->tailBlock;
        blockIndex.load(std::memory_order_relaxed)
            ->front.store(pr_blockIndexFront, std::memory_order_release);
        pr_blockIndexFront = (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);

        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(
            T, U,
            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {
          this->tailIndex.store(newTailIndex, std::memory_order_release);
          return true;
        }
      }

      // Enqueue
      new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));

      this->tailIndex.store(newTailIndex, std::memory_order_release);
      return true;
    }

    template <typename U>
    bool dequeue(U &element) {
      auto tail = this->tailIndex.load(std::memory_order_relaxed);
      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
      if (details::circular_less_than<index_t>(
              this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
                  overcommit,
              tail)) {
        // Might be something to dequeue, let's give it a try

        // Note that this if is purely for performance purposes in the common
        // case when the queue is empty and the values are eventually consistent
        // -- we may enter here spuriously.

        // Note that whatever the values of overcommit and tail are, they are
        // not going to change (unless we change them) and must be the same
        // value at this point (inside the if) as when the if condition was
        // evaluated.

        // We insert an acquire fence here to synchronize-with the release upon
        // incrementing dequeueOvercommit below. This ensures that whatever the
        // value we got loaded into overcommit, the load of dequeueOptisticCount
        // in the fetch_add below will result in a value at least as recent as
        // that (and therefore at least as large). Note that I believe a
        // compiler (signal) fence here would be sufficient due to the nature of
        // fetch_add (all read-modify-write operations are guaranteed to work on
        // the latest value in the modification order), but unfortunately that
        // can't be shown to be correct using only the C++11 standard. See
        // http://stackoverflow.com/questions/18223161/what-are-the-c11-memory-ordering-guarantees-in-this-corner-case
        std::atomic_thread_fence(std::memory_order_acquire);

        // Increment optimistic counter, then check if it went over the boundary
        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(
            1, std::memory_order_relaxed);

        // Note that since dequeueOvercommit must be <= dequeueOptimisticCount
        // (because dequeueOvercommit is only ever incremented after
        // dequeueOptimisticCount -- this is enforced in the `else` block
        // below), and since we now have a version of dequeueOptimisticCount
        // that is at least as recent as overcommit (due to the release upon
        // incrementing dequeueOvercommit and the acquire above that
        // synchronizes with it), overcommit <= myDequeueCount. However, we
        // can't assert this since both dequeueOptimisticCount and
        // dequeueOvercommit may (independently) overflow; in such a case,
        // though, the logic still holds since the difference between the two is
        // maintained.

        // Note that we reload tail here in case it changed; it will be the same
        // value as before or greater, since this load is sequenced after
        // (happens after) the earlier load above. This is supported by
        // read-read coherency (as defined in the standard), explained here:
        // http://en.cppreference.com/w/cpp/atomic/memory_order
        tail = this->tailIndex.load(std::memory_order_acquire);
        if ((details::likely)(details::circular_less_than<index_t>(
                myDequeueCount - overcommit, tail))) {
          // Guaranteed to be at least one element to dequeue!

          // Get the index. Note that since there's guaranteed to be at least
          // one element, this will never exceed tail. We need to do an
          // acquire-release fence here since it's possible that whatever
          // condition got us to this point was for an earlier enqueued element
          // (that we already see the memory effects for), but that by the time
          // we increment somebody else has incremented it, and we need to see
          // the memory effects for *that* element, which is in such a case is
          // necessarily visible on the thread that incremented it in the first
          // place with the more current condition (they must have acquired a
          // tail that is at least as recent).
          auto index = this->headIndex.fetch_add(1, std::memory_order_acq_rel);


          // Determine which block the element is in

          auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
          auto localBlockIndexHead =
              localBlockIndex->front.load(std::memory_order_acquire);

          // We need to be careful here about subtracting and dividing because
          // of index wrap-around. When an index wraps, we need to preserve the
          // sign of the offset when dividing it by the block size (in order to
          // get a correct signed block count offset in all cases):
          auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
          auto blockBaseIndex = index & ~static_cast<index_t>(BLOCK_SIZE - 1);
          auto offset = static_cast<size_t>(
              static_cast<typename std::make_signed<index_t>::type>(
                  blockBaseIndex - headBase) /
              static_cast<typename std::make_signed<index_t>::type>(
                  BLOCK_SIZE));
          auto block = localBlockIndex
                           ->entries[(localBlockIndexHead + offset) &
                                     (localBlockIndex->size - 1)]
                           .block;

          // Dequeue
          auto &el = *((*block)[index]);
          if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {
            // Make sure the element is still fully dequeued and destroyed even
            // if the assignment throws
            struct Guard {
              Block *block;
              index_t index;

              ~Guard() {
                (*block)[index]->~T();
                block->ConcurrentQueue::Block::template set_empty<
                    explicit_context>(index);
              }
            } guard = {block, index};

            element = std::move(el);  // NOLINT
          } else {
            element = std::move(el);  // NOLINT
            el.~T();                  // NOLINT
            block->ConcurrentQueue::Block::template set_empty<explicit_context>(
                index);
          }

          return true;
        } else {
          // Wasn't anything to dequeue after all; make the effective dequeue
          // count eventually consistent
          this->dequeueOvercommit.fetch_add(
              1, std::memory_order_release);  // Release so that the fetch_add
                                              // on dequeueOptimisticCount is
                                              // guaranteed to happen before
                                              // this write
        }
      }

      return false;
    }

    template <AllocationMode allocMode, typename It>
    bool MOODYCAMEL_NO_TSAN enqueue_bulk(It itemFirst, size_t count) {
      // First, we need to make sure we have enough room to enqueue all of the
      // elements; this means pre-allocating blocks and putting them in the
      // block index (but only if all the allocations succeeded).
      index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
      auto startBlock = this->tailBlock;
      auto originalBlockIndexFront = pr_blockIndexFront;
      auto originalBlockIndexSlotsUsed = pr_blockIndexSlotsUsed;

      Block *firstAllocatedBlock = nullptr;

      // Figure out how many blocks we'll need to allocate, and do so
      size_t blockBaseDiff =
          ((startTailIndex + count - 1) &
           ~static_cast<index_t>(BLOCK_SIZE - 1)) -
          ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
      index_t currentTailIndex =
          (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
      if (blockBaseDiff > 0) {
        // Allocate as many blocks as possible from ahead
        while (blockBaseDiff > 0 && this->tailBlock != nullptr &&
               this->tailBlock->next != firstAllocatedBlock &&
               this->tailBlock->next->ConcurrentQueue::Block::template is_empty<
                   explicit_context>()) {
          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

          this->tailBlock = this->tailBlock->next;
          firstAllocatedBlock = firstAllocatedBlock == nullptr
                                    ? this->tailBlock
                                    : firstAllocatedBlock;

          auto &entry = blockIndex.load(std::memory_order_relaxed)
                            ->entries[pr_blockIndexFront];
          entry.base = currentTailIndex;
          entry.block = this->tailBlock;
          pr_blockIndexFront =
              (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
        }

        // Now allocate as many blocks as necessary from the block pool
        while (blockBaseDiff > 0) {
          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

          auto head = this->headIndex.load(std::memory_order_relaxed);
          assert(!details::circular_less_than<index_t>(currentTailIndex, head));
          bool full =
              !details::circular_less_than<index_t>(
                  head, currentTailIndex + BLOCK_SIZE) ||
              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
               (MAX_SUBQUEUE_SIZE == 0 ||
                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));
          if (pr_blockIndexRaw == nullptr ||
              pr_blockIndexSlotsUsed == pr_blockIndexSize || full) {
            MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {
              // Failed to allocate, undo changes (but keep injected blocks)
              pr_blockIndexFront = originalBlockIndexFront;
              pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
              this->tailBlock =
                  startBlock == nullptr ? firstAllocatedBlock : startBlock;
              return false;
            }
            else if (full || !new_block_index(originalBlockIndexSlotsUsed)) {
              // Failed to allocate, undo changes (but keep injected blocks)
              pr_blockIndexFront = originalBlockIndexFront;
              pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
              this->tailBlock =
                  startBlock == nullptr ? firstAllocatedBlock : startBlock;
              return false;
            }

            // pr_blockIndexFront is updated inside new_block_index, so we need
            // to update our fallback value too (since we keep the new index
            // even if we later fail)
            originalBlockIndexFront = originalBlockIndexSlotsUsed;
          }

          // Insert a new block in the circular linked list
          auto newBlock =
              this->parent
                  ->ConcurrentQueue::template requisition_block<allocMode>();
          if (newBlock == nullptr) {
            pr_blockIndexFront = originalBlockIndexFront;
            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
            this->tailBlock =
                startBlock == nullptr ? firstAllocatedBlock : startBlock;
            return false;
          }

#ifdef MCDBGQ_TRACKMEM
          newBlock->owner = this;
#endif
          newBlock->ConcurrentQueue::Block::template set_all_empty<
              explicit_context>();
          if (this->tailBlock == nullptr) {
            newBlock->next = newBlock;
          } else {
            newBlock->next = this->tailBlock->next;
            this->tailBlock->next = newBlock;
          }
          this->tailBlock = newBlock;
          firstAllocatedBlock = firstAllocatedBlock == nullptr
                                    ? this->tailBlock
                                    : firstAllocatedBlock;

          ++pr_blockIndexSlotsUsed;

          auto &entry = blockIndex.load(std::memory_order_relaxed)
                            ->entries[pr_blockIndexFront];
          entry.base = currentTailIndex;
          entry.block = this->tailBlock;
          pr_blockIndexFront =
              (pr_blockIndexFront + 1) & (pr_blockIndexSize - 1);
        }

        // Excellent, all allocations succeeded. Reset each block's emptiness
        // before we fill them up, and publish the new block index front
        auto block = firstAllocatedBlock;
        while (true) {
          block->ConcurrentQueue::Block::template reset_empty<
              explicit_context>();
          if (block == this->tailBlock) {
            break;
          }
          block = block->next;
        }

        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(
            T, decltype(*itemFirst),
            new (static_cast<T *>(nullptr))
                T(details::deref_noexcept(itemFirst)))) {
          blockIndex.load(std::memory_order_relaxed)
              ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),
                            std::memory_order_release);
        }
      }

      // Enqueue, one block at a time
      index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
      currentTailIndex = startTailIndex;
      auto endBlock = this->tailBlock;
      this->tailBlock = startBlock;
      assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||
             firstAllocatedBlock != nullptr || count == 0);
      if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&
          firstAllocatedBlock != nullptr) {
        this->tailBlock = firstAllocatedBlock;
      }
      while (true) {
        index_t stopIndex =
            (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
            static_cast<index_t>(BLOCK_SIZE);
        if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
          stopIndex = newTailIndex;
        }
        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(
            T, decltype(*itemFirst),
            new (static_cast<T *>(nullptr))
                T(details::deref_noexcept(itemFirst)))) {
          while (currentTailIndex != stopIndex) {
            new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
          }
        }
        else {
          MOODYCAMEL_TRY {
            while (currentTailIndex != stopIndex) {
              // Must use copy constructor even if move constructor is available
              // because we may have to revert if there's an exception.
              // Sorry about the horrible templated next line, but it was the
              // only way to disable moving *at compile time*, which is
              // important because a type may only define a (noexcept) move
              // constructor, and so calls to the cctor will not compile, even
              // if they are in an if branch that will never be executed
              new ((*this->tailBlock)[currentTailIndex]) T(
                  details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(
                      T, decltype(*itemFirst),
                      new (static_cast<T *>(nullptr)) T(details::deref_noexcept(
                          itemFirst)))>::eval(*itemFirst));
              ++currentTailIndex;
              ++itemFirst;
            }
          }
          MOODYCAMEL_CATCH(...) {
            // Oh dear, an exception's been thrown -- destroy the elements that
            // were enqueued so far and revert the entire bulk operation (we'll
            // keep any allocated blocks in our linked list for later, though).
            auto constructedStopIndex = currentTailIndex;
            auto lastBlockEnqueued = this->tailBlock;

            pr_blockIndexFront = originalBlockIndexFront;
            pr_blockIndexSlotsUsed = originalBlockIndexSlotsUsed;
            this->tailBlock =
                startBlock == nullptr ? firstAllocatedBlock : startBlock;

            if (!details::is_trivially_destructible<T>::value) {
              auto block = startBlock;
              if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==
                  0) {
                block = firstAllocatedBlock;
              }
              currentTailIndex = startTailIndex;
              while (true) {
                stopIndex =
                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                    static_cast<index_t>(BLOCK_SIZE);
                if (details::circular_less_than<index_t>(constructedStopIndex,
                                                         stopIndex)) {
                  stopIndex = constructedStopIndex;
                }
                while (currentTailIndex != stopIndex) {
                  (*block)[currentTailIndex++]->~T();
                }
                if (block == lastBlockEnqueued) {
                  break;
                }
                block = block->next;
              }
            }
            MOODYCAMEL_RETHROW;
          }
        }

        if (this->tailBlock == endBlock) {
          assert(currentTailIndex == newTailIndex);
          break;
        }
        this->tailBlock = this->tailBlock->next;
      }

      MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(
          T, decltype(*itemFirst),
          new (static_cast<T *>(nullptr))
              T(details::deref_noexcept(itemFirst)))) {
        if (firstAllocatedBlock != nullptr)
          blockIndex.load(std::memory_order_relaxed)
              ->front.store((pr_blockIndexFront - 1) & (pr_blockIndexSize - 1),
                            std::memory_order_release);
      }

      this->tailIndex.store(newTailIndex, std::memory_order_release);
      return true;
    }

    template <typename It>
    size_t dequeue_bulk(It &itemFirst, size_t max) {
      auto tail = this->tailIndex.load(std::memory_order_relaxed);
      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
      auto desiredCount = static_cast<size_t>(
          tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
                  overcommit));
      if (details::circular_less_than<size_t>(0, desiredCount)) {
        desiredCount = desiredCount < max ? desiredCount : max;
        std::atomic_thread_fence(std::memory_order_acquire);

        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(
            desiredCount, std::memory_order_relaxed);

        tail = this->tailIndex.load(std::memory_order_acquire);
        auto actualCount =
            static_cast<size_t>(tail - (myDequeueCount - overcommit));
        if (details::circular_less_than<size_t>(0, actualCount)) {
          actualCount = desiredCount < actualCount ? desiredCount : actualCount;
          if (actualCount < desiredCount) {
            this->dequeueOvercommit.fetch_add(desiredCount - actualCount,
                                              std::memory_order_release);
          }

          // Get the first index. Note that since there's guaranteed to be at
          // least actualCount elements, this will never exceed tail.
          auto firstIndex =
              this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);

          // Determine which block the first element is in
          auto localBlockIndex = blockIndex.load(std::memory_order_acquire);
          auto localBlockIndexHead =
              localBlockIndex->front.load(std::memory_order_acquire);

          auto headBase = localBlockIndex->entries[localBlockIndexHead].base;
          auto firstBlockBaseIndex =
              firstIndex & ~static_cast<index_t>(BLOCK_SIZE - 1);
          auto offset = static_cast<size_t>(
              static_cast<typename std::make_signed<index_t>::type>(
                  firstBlockBaseIndex - headBase) /
              static_cast<typename std::make_signed<index_t>::type>(
                  BLOCK_SIZE));
          auto indexIndex =
              (localBlockIndexHead + offset) & (localBlockIndex->size - 1);

          // Iterate the blocks and dequeue
          auto index = firstIndex;
          do {
            auto firstIndexInBlock = index;
            index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                               static_cast<index_t>(BLOCK_SIZE);
            endIndex =
                details::circular_less_than<index_t>(
                    firstIndex + static_cast<index_t>(actualCount), endIndex)
                    ? firstIndex + static_cast<index_t>(actualCount)
                    : endIndex;
            auto block = localBlockIndex->entries[indexIndex].block;
            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,
                                           details::deref_noexcept(itemFirst) =
                                               std::move((*(*block)[index])))) {
              while (index != endIndex) {
                auto &el = *((*block)[index]);
                *itemFirst++ = std::move(el);
                el.~T();
                ++index;
              }
            } else {
              MOODYCAMEL_TRY {
                while (index != endIndex) {
                  auto &el = *((*block)[index]);
                  *itemFirst = std::move(el);
                  ++itemFirst;
                  el.~T();
                  ++index;
                }
              }
              MOODYCAMEL_CATCH(...) {
                // It's too late to revert the dequeue, but we can make sure
                // that all the dequeued objects are properly destroyed and the
                // block index (and empty count) are properly updated before we
                // propagate the exception
                do {
                  block = localBlockIndex->entries[indexIndex].block;
                  while (index != endIndex) {
                    (*block)[index++]->~T();
                  }
                  block->ConcurrentQueue::Block::template set_many_empty<
                      explicit_context>(
                      firstIndexInBlock,
                      static_cast<size_t>(endIndex - firstIndexInBlock));
                  indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);

                  firstIndexInBlock = index;
                  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                             static_cast<index_t>(BLOCK_SIZE);
                  endIndex =
                      details::circular_less_than<index_t>(
                          firstIndex + static_cast<index_t>(actualCount),
                          endIndex)
                          ? firstIndex + static_cast<index_t>(actualCount)
                          : endIndex;
                } while (index != firstIndex + actualCount);

                MOODYCAMEL_RETHROW;
              }
            }
            block->ConcurrentQueue::Block::template set_many_empty<
                explicit_context>(
                firstIndexInBlock,
                static_cast<size_t>(endIndex - firstIndexInBlock));
            indexIndex = (indexIndex + 1) & (localBlockIndex->size - 1);
          } while (index != firstIndex + actualCount);

          return actualCount;
        } else {
          // Wasn't anything to dequeue after all; make the effective dequeue
          // count eventually consistent
          this->dequeueOvercommit.fetch_add(desiredCount,
                                            std::memory_order_release);
        }
      }

      return 0;
    }

   private:
    struct BlockIndexEntry {
      index_t base;
      Block *block;
    };

    struct BlockIndexHeader {
      size_t size;
      std::atomic<size_t>
          front;  // Current slot (not next, like pr_blockIndexFront)
      BlockIndexEntry *entries;
      void *prev;
    };


    bool new_block_index(size_t numberOfFilledSlotsToExpose) {
      auto prevBlockSizeMask = pr_blockIndexSize - 1;

      // Create the new block
      pr_blockIndexSize <<= 1;
      auto newRawPtr = static_cast<char *>((Traits::malloc)(
          sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -
          1 + sizeof(BlockIndexEntry) * pr_blockIndexSize));
      if (newRawPtr == nullptr) {
        pr_blockIndexSize >>= 1;  // Reset to allow graceful retry
        return false;
      }

      auto newBlockIndexEntries = reinterpret_cast<BlockIndexEntry *>(
          details::align_for<BlockIndexEntry>(newRawPtr +
                                              sizeof(BlockIndexHeader)));

      // Copy in all the old indices, if any
      size_t j = 0;
      if (pr_blockIndexSlotsUsed != 0) {
        auto i =
            (pr_blockIndexFront - pr_blockIndexSlotsUsed) & prevBlockSizeMask;
        do {
          newBlockIndexEntries[j++] = pr_blockIndexEntries[i];
          i = (i + 1) & prevBlockSizeMask;
        } while (i != pr_blockIndexFront);
      }

      // Update everything
      auto header = new (newRawPtr) BlockIndexHeader;
      header->size = pr_blockIndexSize;
      header->front.store(numberOfFilledSlotsToExpose - 1,
                          std::memory_order_relaxed);
      header->entries = newBlockIndexEntries;
      header->prev = pr_blockIndexRaw;  // we link the new block to the old one
                                        // so we can free it later

      pr_blockIndexFront = j;
      pr_blockIndexEntries = newBlockIndexEntries;
      pr_blockIndexRaw = newRawPtr;
      blockIndex.store(header, std::memory_order_release);

      return true;
    }

   private:
    std::atomic<BlockIndexHeader *> blockIndex;

    // To be used by producer only -- consumer must use the ones in referenced
    // by blockIndex
    size_t pr_blockIndexSlotsUsed;
    size_t pr_blockIndexSize;
    size_t pr_blockIndexFront;  // Next slot (not current)
    BlockIndexEntry *pr_blockIndexEntries;
    void *pr_blockIndexRaw;

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
   public:
    ExplicitProducer *nextExplicitProducer;

   private:
#endif

#ifdef MCDBGQ_TRACKMEM
    friend struct MemStats;
#endif
  };


  //////////////////////////////////
  // Implicit queue
  //////////////////////////////////

  struct ImplicitProducer : public ProducerBase {
    ImplicitProducer(ConcurrentQueue *parent_)
        : ProducerBase(parent_, false),
          nextBlockIndexCapacity(IMPLICIT_INITIAL_INDEX_SIZE),
          blockIndex(nullptr) {
      new_block_index();
    }

    ~ImplicitProducer() {
      // Note that since we're in the destructor we can assume that all
      // enqueue/dequeue operations completed already; this means that all
      // undequeued elements are placed contiguously across contiguous blocks,
      // and that only the first and last remaining blocks can be only partially
      // empty (all other remaining blocks must be completely full).

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
      // Unregister ourselves for thread termination notification
      if (!this->inactive.load(std::memory_order_relaxed)) {
        details::ThreadExitNotifier::unsubscribe(&threadExitListener);
      }
#endif

      // Destroy all remaining elements!
      auto tail = this->tailIndex.load(std::memory_order_relaxed);
      auto index = this->headIndex.load(std::memory_order_relaxed);
      Block *block = nullptr;
      assert(index == tail || details::circular_less_than(index, tail));
      bool forceFreeLastBlock =
          index != tail;  // If we enter the loop, then the last (tail) block
                          // will not be freed
      while (index != tail) {
        if ((index & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 ||
            block == nullptr) {
          if (block != nullptr) {
            // Free the old block
            this->parent->add_block_to_free_list(block);
          }

          block = get_block_index_entry_for_index(index)->value.load(
              std::memory_order_relaxed);
        }

        ((*block)[index])->~T();
        ++index;
      }
      // Even if the queue is empty, there's still one block that's not on the
      // free list (unless the head index reached the end of it, in which case
      // the tail will be poised to create a new block).
      if (this->tailBlock != nullptr &&
          (forceFreeLastBlock ||
           (tail & static_cast<index_t>(BLOCK_SIZE - 1)) != 0)) {
        this->parent->add_block_to_free_list(this->tailBlock);
      }

      // Destroy block index
      auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
      if (localBlockIndex != nullptr) {
        for (size_t i = 0; i != localBlockIndex->capacity; ++i) {
          localBlockIndex->index[i]->~BlockIndexEntry();
        }
        do {
          auto prev = localBlockIndex->prev;
          localBlockIndex->~BlockIndexHeader();
          (Traits::free)(localBlockIndex);
          localBlockIndex = prev;
        } while (localBlockIndex != nullptr);
      }
    }

    template <AllocationMode allocMode, typename U>
    inline bool enqueue(U &&element) {
      index_t currentTailIndex =
          this->tailIndex.load(std::memory_order_relaxed);
      index_t newTailIndex = 1 + currentTailIndex;
      if ((currentTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0) {
        // We reached the end of a block, start a new one
        auto head = this->headIndex.load(std::memory_order_relaxed);
        assert(!details::circular_less_than<index_t>(currentTailIndex, head));
        if (!details::circular_less_than<index_t>(
                head, currentTailIndex + BLOCK_SIZE) ||
            (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
             (MAX_SUBQUEUE_SIZE == 0 ||
              MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head))) {
          return false;
        }
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
        debug::DebugLock lock(mutex);
#endif
        // Find out where we'll be inserting this block in the block index
        BlockIndexEntry *idxEntry;
        if (!insert_block_index_entry<allocMode>(idxEntry, currentTailIndex)) {
          return false;
        }

        // Get ahold of a new block
        auto newBlock =
            this->parent
                ->ConcurrentQueue::template requisition_block<allocMode>();
        if (newBlock == nullptr) {
          rewind_block_index_tail();
          idxEntry->value.store(nullptr, std::memory_order_relaxed);
          return false;
        }
#ifdef MCDBGQ_TRACKMEM
        newBlock->owner = this;
#endif
        newBlock
            ->ConcurrentQueue::Block::template reset_empty<implicit_context>();

        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(
            T, U,
            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {
          // May throw, try to insert now before we publish the fact that we
          // have this new block
          MOODYCAMEL_TRY {
            new ((*newBlock)[currentTailIndex]) T(std::forward<U>(element));
          }
          MOODYCAMEL_CATCH(...) {
            rewind_block_index_tail();
            idxEntry->value.store(nullptr, std::memory_order_relaxed);
            this->parent->add_block_to_free_list(newBlock);
            MOODYCAMEL_RETHROW;
          }
        }

        // Insert the new block into the index
        idxEntry->value.store(newBlock, std::memory_order_relaxed);

        this->tailBlock = newBlock;

        MOODYCAMEL_CONSTEXPR_IF(!MOODYCAMEL_NOEXCEPT_CTOR(
            T, U,
            new (static_cast<T *>(nullptr)) T(std::forward<U>(element)))) {
          this->tailIndex.store(newTailIndex, std::memory_order_release);
          return true;
        }
      }

      // Enqueue
      new ((*this->tailBlock)[currentTailIndex]) T(std::forward<U>(element));

      this->tailIndex.store(newTailIndex, std::memory_order_release);
      return true;
    }

    template <typename U>
    bool dequeue(U &element) {
      // See ExplicitProducer::dequeue for rationale and explanation
      index_t tail = this->tailIndex.load(std::memory_order_relaxed);
      index_t overcommit =
          this->dequeueOvercommit.load(std::memory_order_relaxed);
      if (details::circular_less_than<index_t>(
              this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
                  overcommit,
              tail)) {
        std::atomic_thread_fence(std::memory_order_acquire);

        index_t myDequeueCount = this->dequeueOptimisticCount.fetch_add(
            1, std::memory_order_relaxed);
        tail = this->tailIndex.load(std::memory_order_acquire);
        if ((details::likely)(details::circular_less_than<index_t>(
                myDequeueCount - overcommit, tail))) {
          index_t index =
              this->headIndex.fetch_add(1, std::memory_order_acq_rel);

          // Determine which block the element is in
          auto entry = get_block_index_entry_for_index(index);

          // Dequeue
          auto block = entry->value.load(std::memory_order_relaxed);
          auto &el = *((*block)[index]);

          if (!MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&, element = std::move(el))) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
            // Note: Acquiring the mutex with every dequeue instead of only when
            // a block is released is very sub-optimal, but it is, after all,
            // purely debug code.
            debug::DebugLock lock(producer->mutex);
#endif
            struct Guard {
              Block *block;
              index_t index;
              BlockIndexEntry *entry;
              ConcurrentQueue *parent;

              ~Guard() {
                (*block)[index]->~T();
                if (block->ConcurrentQueue::Block::template set_empty<
                        implicit_context>(index)) {
                  entry->value.store(nullptr, std::memory_order_relaxed);
                  parent->add_block_to_free_list(block);
                }
              }
            } guard = {block, index, entry, this->parent};

            element = std::move(el);  // NOLINT
          } else {
            element = std::move(el);  // NOLINT
            el.~T();                  // NOLINT

            if (block->ConcurrentQueue::Block::template set_empty<
                    implicit_context>(index)) {
              {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                debug::DebugLock lock(mutex);
#endif
                // Add the block back into the global free pool (and remove from
                // block index)
                entry->value.store(nullptr, std::memory_order_relaxed);
              }
              this->parent->add_block_to_free_list(
                  block);  // releases the above store
            }
          }

          return true;
        } else {
          this->dequeueOvercommit.fetch_add(1, std::memory_order_release);
        }
      }

      return false;
    }

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4706)  // assignment within conditional expression
#endif
    template <AllocationMode allocMode, typename It>
    bool enqueue_bulk(It itemFirst, size_t count) {
      // First, we need to make sure we have enough room to enqueue all of the
      // elements; this means pre-allocating blocks and putting them in the
      // block index (but only if all the allocations succeeded).

      // Note that the tailBlock we start off with may not be owned by us any
      // more; this happens if it was filled up exactly to the top (setting
      // tailIndex to the first index of the next block which is not yet
      // allocated), then dequeued completely (putting it on the free list)
      // before we enqueue again.

      index_t startTailIndex = this->tailIndex.load(std::memory_order_relaxed);
      auto startBlock = this->tailBlock;
      Block *firstAllocatedBlock = nullptr;
      auto endBlock = this->tailBlock;

      // Figure out how many blocks we'll need to allocate, and do so
      size_t blockBaseDiff =
          ((startTailIndex + count - 1) &
           ~static_cast<index_t>(BLOCK_SIZE - 1)) -
          ((startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1));
      index_t currentTailIndex =
          (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
      if (blockBaseDiff > 0) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
        debug::DebugLock lock(mutex);
#endif
        do {
          blockBaseDiff -= static_cast<index_t>(BLOCK_SIZE);
          currentTailIndex += static_cast<index_t>(BLOCK_SIZE);

          // Find out where we'll be inserting this block in the block index
          BlockIndexEntry *idxEntry =
              nullptr;  // initialization here unnecessary but compiler can't
                        // always tell
          Block *newBlock;
          bool indexInserted = false;
          auto head = this->headIndex.load(std::memory_order_relaxed);
          assert(!details::circular_less_than<index_t>(currentTailIndex, head));
          bool full =
              !details::circular_less_than<index_t>(
                  head, currentTailIndex + BLOCK_SIZE) ||
              (MAX_SUBQUEUE_SIZE != details::const_numeric_max<size_t>::value &&
               (MAX_SUBQUEUE_SIZE == 0 ||
                MAX_SUBQUEUE_SIZE - BLOCK_SIZE < currentTailIndex - head));

          if (full ||
              !(indexInserted = insert_block_index_entry<allocMode>(
                    idxEntry, currentTailIndex)) ||
              (newBlock =
                   this->parent->ConcurrentQueue::template requisition_block<
                       allocMode>()) == nullptr) {
            // Index allocation or block allocation failed; revert any other
            // allocations and index insertions done so far for this operation
            if (indexInserted) {
              rewind_block_index_tail();
              idxEntry->value.store(nullptr, std::memory_order_relaxed);
            }
            currentTailIndex =
                (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
            for (auto block = firstAllocatedBlock; block != nullptr;
                 block = block->next) {
              currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
              idxEntry = get_block_index_entry_for_index(currentTailIndex);
              idxEntry->value.store(nullptr, std::memory_order_relaxed);
              rewind_block_index_tail();
            }
            this->parent->add_blocks_to_free_list(firstAllocatedBlock);
            this->tailBlock = startBlock;

            return false;
          }

#ifdef MCDBGQ_TRACKMEM
          newBlock->owner = this;
#endif
          newBlock->ConcurrentQueue::Block::template reset_empty<
              implicit_context>();
          newBlock->next = nullptr;

          // Insert the new block into the index
          idxEntry->value.store(newBlock, std::memory_order_relaxed);

          // Store the chain of blocks so that we can undo if later allocations
          // fail, and so that we can find the blocks when we do the actual
          // enqueueing
          if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||
              firstAllocatedBlock != nullptr) {
            assert(this->tailBlock != nullptr);
            this->tailBlock->next = newBlock;
          }
          this->tailBlock = newBlock;
          endBlock = newBlock;
          firstAllocatedBlock =
              firstAllocatedBlock == nullptr ? newBlock : firstAllocatedBlock;
        } while (blockBaseDiff > 0);
      }

      // Enqueue, one block at a time
      index_t newTailIndex = startTailIndex + static_cast<index_t>(count);
      currentTailIndex = startTailIndex;
      this->tailBlock = startBlock;
      assert((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) != 0 ||
             firstAllocatedBlock != nullptr || count == 0);
      if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) == 0 &&
          firstAllocatedBlock != nullptr) {
        this->tailBlock = firstAllocatedBlock;
      }
      while (true) {
        index_t stopIndex =
            (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
            static_cast<index_t>(BLOCK_SIZE);
        if (details::circular_less_than<index_t>(newTailIndex, stopIndex)) {
          stopIndex = newTailIndex;
        }
        MOODYCAMEL_CONSTEXPR_IF(MOODYCAMEL_NOEXCEPT_CTOR(
            T, decltype(*itemFirst),
            new (static_cast<T *>(nullptr))
                T(details::deref_noexcept(itemFirst)))) {
          while (currentTailIndex != stopIndex) {
            new ((*this->tailBlock)[currentTailIndex++]) T(*itemFirst++);
          }
        }
        else {
          MOODYCAMEL_TRY {
            while (currentTailIndex != stopIndex) {
              new ((*this->tailBlock)[currentTailIndex]) T(
                  details::nomove_if<!MOODYCAMEL_NOEXCEPT_CTOR(
                      T, decltype(*itemFirst),
                      new (static_cast<T *>(nullptr)) T(details::deref_noexcept(
                          itemFirst)))>::eval(*itemFirst));
              ++currentTailIndex;
              ++itemFirst;
            }
          }
          MOODYCAMEL_CATCH(...) {
            auto constructedStopIndex = currentTailIndex;
            auto lastBlockEnqueued = this->tailBlock;

            if (!details::is_trivially_destructible<T>::value) {
              auto block = startBlock;
              if ((startTailIndex & static_cast<index_t>(BLOCK_SIZE - 1)) ==
                  0) {
                block = firstAllocatedBlock;
              }
              currentTailIndex = startTailIndex;
              while (true) {
                stopIndex =
                    (currentTailIndex & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                    static_cast<index_t>(BLOCK_SIZE);
                if (details::circular_less_than<index_t>(constructedStopIndex,
                                                         stopIndex)) {
                  stopIndex = constructedStopIndex;
                }
                while (currentTailIndex != stopIndex) {
                  (*block)[currentTailIndex++]->~T();
                }
                if (block == lastBlockEnqueued) {
                  break;
                }
                block = block->next;
              }
            }

            currentTailIndex =
                (startTailIndex - 1) & ~static_cast<index_t>(BLOCK_SIZE - 1);
            for (auto block = firstAllocatedBlock; block != nullptr;
                 block = block->next) {
              currentTailIndex += static_cast<index_t>(BLOCK_SIZE);
              auto idxEntry = get_block_index_entry_for_index(currentTailIndex);
              idxEntry->value.store(nullptr, std::memory_order_relaxed);
              rewind_block_index_tail();
            }
            this->parent->add_blocks_to_free_list(firstAllocatedBlock);
            this->tailBlock = startBlock;
            MOODYCAMEL_RETHROW;
          }
        }

        if (this->tailBlock == endBlock) {
          assert(currentTailIndex == newTailIndex);
          break;
        }
        this->tailBlock = this->tailBlock->next;
      }
      this->tailIndex.store(newTailIndex, std::memory_order_release);
      return true;
    }
#ifdef _MSC_VER
#pragma warning(pop)
#endif

    template <typename It>
    size_t dequeue_bulk(It &itemFirst, size_t max) {
      auto tail = this->tailIndex.load(std::memory_order_relaxed);
      auto overcommit = this->dequeueOvercommit.load(std::memory_order_relaxed);
      auto desiredCount = static_cast<size_t>(
          tail - (this->dequeueOptimisticCount.load(std::memory_order_relaxed) -
                  overcommit));
      if (details::circular_less_than<size_t>(0, desiredCount)) {
        desiredCount = desiredCount < max ? desiredCount : max;
        std::atomic_thread_fence(std::memory_order_acquire);

        auto myDequeueCount = this->dequeueOptimisticCount.fetch_add(
            desiredCount, std::memory_order_relaxed);

        tail = this->tailIndex.load(std::memory_order_acquire);
        auto actualCount =
            static_cast<size_t>(tail - (myDequeueCount - overcommit));
        if (details::circular_less_than<size_t>(0, actualCount)) {
          actualCount = desiredCount < actualCount ? desiredCount : actualCount;
          if (actualCount < desiredCount) {
            this->dequeueOvercommit.fetch_add(desiredCount - actualCount,
                                              std::memory_order_release);
          }

          // Get the first index. Note that since there's guaranteed to be at
          // least actualCount elements, this will never exceed tail.
          auto firstIndex =
              this->headIndex.fetch_add(actualCount, std::memory_order_acq_rel);

          // Iterate the blocks and dequeue
          auto index = firstIndex;
          BlockIndexHeader *localBlockIndex;
          auto indexIndex =
              get_block_index_index_for_index(index, localBlockIndex);
          do {
            auto blockStartIndex = index;
            index_t endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                               static_cast<index_t>(BLOCK_SIZE);
            endIndex =
                details::circular_less_than<index_t>(
                    firstIndex + static_cast<index_t>(actualCount), endIndex)
                    ? firstIndex + static_cast<index_t>(actualCount)
                    : endIndex;

            auto entry = localBlockIndex->index[indexIndex];
            auto block = entry->value.load(std::memory_order_relaxed);
            if (MOODYCAMEL_NOEXCEPT_ASSIGN(T, T &&,
                                           details::deref_noexcept(itemFirst) =
                                               std::move((*(*block)[index])))) {
              while (index != endIndex) {
                auto &el = *((*block)[index]);
                *itemFirst++ = std::move(el);
                el.~T();
                ++index;
              }
            } else {
              MOODYCAMEL_TRY {
                while (index != endIndex) {
                  auto &el = *((*block)[index]);
                  *itemFirst = std::move(el);
                  ++itemFirst;
                  el.~T();
                  ++index;
                }
              }
              MOODYCAMEL_CATCH(...) {
                do {
                  entry = localBlockIndex->index[indexIndex];
                  block = entry->value.load(std::memory_order_relaxed);
                  while (index != endIndex) {
                    (*block)[index++]->~T();
                  }

                  if (block->ConcurrentQueue::Block::template set_many_empty<
                          implicit_context>(
                          blockStartIndex,
                          static_cast<size_t>(endIndex - blockStartIndex))) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                    debug::DebugLock lock(mutex);
#endif
                    entry->value.store(nullptr, std::memory_order_relaxed);
                    this->parent->add_block_to_free_list(block);
                  }
                  indexIndex =
                      (indexIndex + 1) & (localBlockIndex->capacity - 1);

                  blockStartIndex = index;
                  endIndex = (index & ~static_cast<index_t>(BLOCK_SIZE - 1)) +
                             static_cast<index_t>(BLOCK_SIZE);
                  endIndex =
                      details::circular_less_than<index_t>(
                          firstIndex + static_cast<index_t>(actualCount),
                          endIndex)
                          ? firstIndex + static_cast<index_t>(actualCount)
                          : endIndex;
                } while (index != firstIndex + actualCount);

                MOODYCAMEL_RETHROW;
              }
            }
            if (block->ConcurrentQueue::Block::template set_many_empty<
                    implicit_context>(
                    blockStartIndex,
                    static_cast<size_t>(endIndex - blockStartIndex))) {
              {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
                debug::DebugLock lock(mutex);
#endif
                // Note that the set_many_empty above did a release, meaning
                // that anybody who acquires the block we're about to free can
                // use it safely since our writes (and reads!) will have
                // happened-before then.
                entry->value.store(nullptr, std::memory_order_relaxed);
              }
              this->parent->add_block_to_free_list(
                  block);  // releases the above store
            }
            indexIndex = (indexIndex + 1) & (localBlockIndex->capacity - 1);
          } while (index != firstIndex + actualCount);

          return actualCount;
        } else {
          this->dequeueOvercommit.fetch_add(desiredCount,
                                            std::memory_order_release);
        }
      }

      return 0;
    }

   private:
    // The block size must be > 1, so any number with the low bit set is an
    // invalid block base index
    static const index_t INVALID_BLOCK_BASE = 1;

    struct BlockIndexEntry {
      std::atomic<index_t> key;
      std::atomic<Block *> value;
    };

    struct BlockIndexHeader {
      size_t capacity;
      std::atomic<size_t> tail;
      BlockIndexEntry *entries;
      BlockIndexEntry **index;
      BlockIndexHeader *prev;
    };

    template <AllocationMode allocMode>
    inline bool insert_block_index_entry(BlockIndexEntry *&idxEntry,
                                         index_t blockStartIndex) {
      auto localBlockIndex =
          blockIndex.load(std::memory_order_relaxed);  // We're the only writer
                                                       // thread, relaxed is OK
      if (localBlockIndex == nullptr) {
        return false;  // this can happen if new_block_index failed in the
                       // constructor
      }
      size_t newTail =
          (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &
          (localBlockIndex->capacity - 1);
      idxEntry = localBlockIndex->index[newTail];
      if (idxEntry->key.load(std::memory_order_relaxed) == INVALID_BLOCK_BASE ||
          idxEntry->value.load(std::memory_order_relaxed) == nullptr) {
        idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
        localBlockIndex->tail.store(newTail, std::memory_order_release);
        return true;
      }

      // No room in the old block index, try to allocate another one!
      MOODYCAMEL_CONSTEXPR_IF(allocMode == CannotAlloc) {
        return false;
      }
      else if (!new_block_index()) {
        return false;
      }
      else {
        localBlockIndex = blockIndex.load(std::memory_order_relaxed);
        newTail = (localBlockIndex->tail.load(std::memory_order_relaxed) + 1) &
                  (localBlockIndex->capacity - 1);
        idxEntry = localBlockIndex->index[newTail];
        assert(idxEntry->key.load(std::memory_order_relaxed) ==
               INVALID_BLOCK_BASE);
        idxEntry->key.store(blockStartIndex, std::memory_order_relaxed);
        localBlockIndex->tail.store(newTail, std::memory_order_release);
        return true;
      }
    }

    inline void rewind_block_index_tail() {
      auto localBlockIndex = blockIndex.load(std::memory_order_relaxed);
      localBlockIndex->tail.store(
          (localBlockIndex->tail.load(std::memory_order_relaxed) - 1) &
              (localBlockIndex->capacity - 1),
          std::memory_order_relaxed);
    }

    inline BlockIndexEntry *get_block_index_entry_for_index(
        index_t index) const {
      BlockIndexHeader *localBlockIndex;
      auto idx = get_block_index_index_for_index(index, localBlockIndex);
      return localBlockIndex->index[idx];
    }

    inline size_t get_block_index_index_for_index(
        index_t index, BlockIndexHeader *&localBlockIndex) const {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
      debug::DebugLock lock(mutex);
#endif
      index &= ~static_cast<index_t>(BLOCK_SIZE - 1);
      localBlockIndex = blockIndex.load(std::memory_order_acquire);
      auto tail = localBlockIndex->tail.load(std::memory_order_acquire);
      auto tailBase =
          localBlockIndex->index[tail]->key.load(std::memory_order_relaxed);
      assert(tailBase != INVALID_BLOCK_BASE);
      // Note: Must use division instead of shift because the index may wrap
      // around, causing a negative offset, whose negativity we want to preserve
      auto offset = static_cast<size_t>(
          static_cast<typename std::make_signed<index_t>::type>(index -
                                                                tailBase) /
          static_cast<typename std::make_signed<index_t>::type>(BLOCK_SIZE));
      size_t idx = (tail + offset) & (localBlockIndex->capacity - 1);
      assert(localBlockIndex->index[idx]->key.load(std::memory_order_relaxed) ==
                 index &&
             localBlockIndex->index[idx]->value.load(
                 std::memory_order_relaxed) != nullptr);
      return idx;
    }

    bool new_block_index() {
      auto prev = blockIndex.load(std::memory_order_relaxed);
      size_t prevCapacity = prev == nullptr ? 0 : prev->capacity;
      auto entryCount = prev == nullptr ? nextBlockIndexCapacity : prevCapacity;
      auto raw = static_cast<char *>((Traits::malloc)(
          sizeof(BlockIndexHeader) + std::alignment_of<BlockIndexEntry>::value -
          1 + sizeof(BlockIndexEntry) * entryCount +
          std::alignment_of<BlockIndexEntry *>::value - 1 +
          sizeof(BlockIndexEntry *) * nextBlockIndexCapacity));
      if (raw == nullptr) {
        return false;
      }

      auto header = new (raw) BlockIndexHeader;
      auto entries = reinterpret_cast<BlockIndexEntry *>(
          details::align_for<BlockIndexEntry>(raw + sizeof(BlockIndexHeader)));
      auto index = reinterpret_cast<BlockIndexEntry **>(
          details::align_for<BlockIndexEntry *>(
              reinterpret_cast<char *>(entries) +
              sizeof(BlockIndexEntry) * entryCount));
      if (prev != nullptr) {
        auto prevTail = prev->tail.load(std::memory_order_relaxed);
        auto prevPos = prevTail;
        size_t i = 0;
        do {
          prevPos = (prevPos + 1) & (prev->capacity - 1);
          index[i++] = prev->index[prevPos];
        } while (prevPos != prevTail);
        assert(i == prevCapacity);
      }
      for (size_t i = 0; i != entryCount; ++i) {
        new (entries + i) BlockIndexEntry;
        entries[i].key.store(INVALID_BLOCK_BASE, std::memory_order_relaxed);
        index[prevCapacity + i] = entries + i;
      }
      header->prev = prev;
      header->entries = entries;
      header->index = index;
      header->capacity = nextBlockIndexCapacity;
      header->tail.store((prevCapacity - 1) & (nextBlockIndexCapacity - 1),
                         std::memory_order_relaxed);

      blockIndex.store(header, std::memory_order_release);

      nextBlockIndexCapacity <<= 1;

      return true;
    }

   private:
    size_t nextBlockIndexCapacity;
    std::atomic<BlockIndexHeader *> blockIndex;

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
   public:
    details::ThreadExitListener threadExitListener;

   private:
#endif

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
   public:
    ImplicitProducer *nextImplicitProducer;

   private:
#endif

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODBLOCKINDEX
    mutable debug::DebugMutex mutex;
#endif
#ifdef MCDBGQ_TRACKMEM
    friend struct MemStats;
#endif
  };


  //////////////////////////////////
  // Block pool manipulation
  //////////////////////////////////

  void populate_initial_block_list(size_t blockCount) {
    initialBlockPoolSize = blockCount;
    if (initialBlockPoolSize == 0) {
      initialBlockPool = nullptr;
      return;
    }

    initialBlockPool = create_array<Block>(blockCount);
    if (initialBlockPool == nullptr) {
      initialBlockPoolSize = 0;
    }
    for (size_t i = 0; i < initialBlockPoolSize; ++i) {
      initialBlockPool[i].dynamicallyAllocated = false;
    }
  }

  inline Block *try_get_block_from_initial_pool() {
    if (initialBlockPoolIndex.load(std::memory_order_relaxed) >=
        initialBlockPoolSize) {
      return nullptr;
    }

    auto index = initialBlockPoolIndex.fetch_add(1, std::memory_order_relaxed);

    return index < initialBlockPoolSize ? (initialBlockPool + index) : nullptr;
  }

  inline void add_block_to_free_list(Block *block) {
#ifdef MCDBGQ_TRACKMEM
    block->owner = nullptr;
#endif
    if (!Traits::RECYCLE_ALLOCATED_BLOCKS && block->dynamicallyAllocated) {
      destroy(block);
    } else {
      freeList.add(block);
    }
  }

  inline void add_blocks_to_free_list(Block *block) {
    while (block != nullptr) {
      auto next = block->next;
      add_block_to_free_list(block);
      block = next;
    }
  }

  inline Block *try_get_block_from_free_list() {
    return freeList.try_get();
  }

  // Gets a free block from one of the memory pools, or allocates a new one (if
  // applicable)
  template <AllocationMode canAlloc>
  Block *requisition_block() {
    auto block = try_get_block_from_initial_pool();
    if (block != nullptr) {
      return block;
    }

    block = try_get_block_from_free_list();
    if (block != nullptr) {
      return block;
    }

    MOODYCAMEL_CONSTEXPR_IF(canAlloc == CanAlloc) {
      return create<Block>();
    }
    else {
      return nullptr;
    }
  }


#ifdef MCDBGQ_TRACKMEM
 public:
  struct MemStats {
    size_t allocatedBlocks;
    size_t usedBlocks;
    size_t freeBlocks;
    size_t ownedBlocksExplicit;
    size_t ownedBlocksImplicit;
    size_t implicitProducers;
    size_t explicitProducers;
    size_t elementsEnqueued;
    size_t blockClassBytes;
    size_t queueClassBytes;
    size_t implicitBlockIndexBytes;
    size_t explicitBlockIndexBytes;

    friend class ConcurrentQueue;

   private:
    static MemStats getFor(ConcurrentQueue *q) {
      MemStats stats = {0};

      stats.elementsEnqueued = q->size_approx();

      auto block = q->freeList.head_unsafe();
      while (block != nullptr) {
        ++stats.allocatedBlocks;
        ++stats.freeBlocks;
        block = block->freeListNext.load(std::memory_order_relaxed);
      }

      for (auto ptr = q->producerListTail.load(std::memory_order_acquire);
           ptr != nullptr; ptr = ptr->next_prod()) {
        bool implicit = dynamic_cast<ImplicitProducer *>(ptr) != nullptr;
        stats.implicitProducers += implicit ? 1 : 0;
        stats.explicitProducers += implicit ? 0 : 1;

        if (implicit) {
          auto prod = static_cast<ImplicitProducer *>(ptr);
          stats.queueClassBytes += sizeof(ImplicitProducer);
          auto head = prod->headIndex.load(std::memory_order_relaxed);
          auto tail = prod->tailIndex.load(std::memory_order_relaxed);
          auto hash = prod->blockIndex.load(std::memory_order_relaxed);
          if (hash != nullptr) {
            for (size_t i = 0; i != hash->capacity; ++i) {
              if (hash->index[i]->key.load(std::memory_order_relaxed) !=
                      ImplicitProducer::INVALID_BLOCK_BASE &&
                  hash->index[i]->value.load(std::memory_order_relaxed) !=
                      nullptr) {
                ++stats.allocatedBlocks;
                ++stats.ownedBlocksImplicit;
              }
            }
            stats.implicitBlockIndexBytes +=
                hash->capacity *
                sizeof(typename ImplicitProducer::BlockIndexEntry);
            for (; hash != nullptr; hash = hash->prev) {
              stats.implicitBlockIndexBytes +=
                  sizeof(typename ImplicitProducer::BlockIndexHeader) +
                  hash->capacity *
                      sizeof(typename ImplicitProducer::BlockIndexEntry *);
            }
          }
          for (; details::circular_less_than<index_t>(head, tail);
               head += BLOCK_SIZE) {
            // auto block = prod->get_block_index_entry_for_index(head);
            ++stats.usedBlocks;
          }
        } else {
          auto prod = static_cast<ExplicitProducer *>(ptr);
          stats.queueClassBytes += sizeof(ExplicitProducer);
          auto tailBlock = prod->tailBlock;
          bool wasNonEmpty = false;
          if (tailBlock != nullptr) {
            auto block = tailBlock;
            do {
              ++stats.allocatedBlocks;
              if (!block->ConcurrentQueue::Block::template is_empty<
                      explicit_context>() ||
                  wasNonEmpty) {
                ++stats.usedBlocks;
                wasNonEmpty = wasNonEmpty || block != tailBlock;
              }
              ++stats.ownedBlocksExplicit;
              block = block->next;
            } while (block != tailBlock);
          }
          auto index = prod->blockIndex.load(std::memory_order_relaxed);
          while (index != nullptr) {
            stats.explicitBlockIndexBytes +=
                sizeof(typename ExplicitProducer::BlockIndexHeader) +
                index->size *
                    sizeof(typename ExplicitProducer::BlockIndexEntry);
            index = static_cast<typename ExplicitProducer::BlockIndexHeader *>(
                index->prev);
          }
        }
      }

      auto freeOnInitialPool =
          q->initialBlockPoolIndex.load(std::memory_order_relaxed) >=
                  q->initialBlockPoolSize
              ? 0
              : q->initialBlockPoolSize -
                    q->initialBlockPoolIndex.load(std::memory_order_relaxed);
      stats.allocatedBlocks += freeOnInitialPool;
      stats.freeBlocks += freeOnInitialPool;

      stats.blockClassBytes = sizeof(Block) * stats.allocatedBlocks;
      stats.queueClassBytes += sizeof(ConcurrentQueue);

      return stats;
    }
  };

  // For debugging only. Not thread-safe.
  MemStats getMemStats() {
    return MemStats::getFor(this);
  }

 private:
  friend struct MemStats;
#endif


  //////////////////////////////////
  // Producer list manipulation
  //////////////////////////////////

  ProducerBase *recycle_or_create_producer(bool isExplicit) {
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
    debug::DebugLock lock(implicitProdMutex);
#endif
    // Try to re-use one first
    for (auto ptr = producerListTail.load(std::memory_order_acquire);
         ptr != nullptr; ptr = ptr->next_prod()) {
      if (ptr->inactive.load(std::memory_order_relaxed) &&
          ptr->isExplicit == isExplicit) {
        bool expected = true;
        if (ptr->inactive.compare_exchange_strong(expected, /* desired */ false,
                                                  std::memory_order_acquire,
                                                  std::memory_order_relaxed)) {
          // We caught one! It's been marked as activated, the caller can have
          // it
          return ptr;
        }
      }
    }

    return add_producer(
        isExplicit ? static_cast<ProducerBase *>(create<ExplicitProducer>(this))
                   : create<ImplicitProducer>(this));
  }

  ProducerBase *add_producer(ProducerBase *producer) {
    // Handle failed memory allocation
    if (producer == nullptr) {
      return nullptr;
    }

    producerCount.fetch_add(1, std::memory_order_relaxed);

    // Add it to the lock-free list
    auto prevTail = producerListTail.load(std::memory_order_relaxed);
    do {
      producer->next = prevTail;
    } while (!producerListTail.compare_exchange_weak(
        prevTail, producer, std::memory_order_release,
        std::memory_order_relaxed));

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
    if (producer->isExplicit) {
      auto prevTailExplicit = explicitProducers.load(std::memory_order_relaxed);
      do {
        static_cast<ExplicitProducer *>(producer)->nextExplicitProducer =
            prevTailExplicit;
      } while (!explicitProducers.compare_exchange_weak(
          prevTailExplicit, static_cast<ExplicitProducer *>(producer),
          std::memory_order_release, std::memory_order_relaxed));
    } else {
      auto prevTailImplicit = implicitProducers.load(std::memory_order_relaxed);
      do {
        static_cast<ImplicitProducer *>(producer)->nextImplicitProducer =
            prevTailImplicit;
      } while (!implicitProducers.compare_exchange_weak(
          prevTailImplicit, static_cast<ImplicitProducer *>(producer),
          std::memory_order_release, std::memory_order_relaxed));
    }
#endif

    return producer;
  }

  void reown_producers() {
    // After another instance is moved-into/swapped-with this one, all the
    // producers we stole still think their parents are the other queue.
    // So fix them up!
    for (auto ptr = producerListTail.load(std::memory_order_relaxed);
         ptr != nullptr; ptr = ptr->next_prod()) {
      ptr->parent = this;
    }
  }


  //////////////////////////////////
  // Implicit producer hash
  //////////////////////////////////

  struct ImplicitProducerKVP {
    std::atomic<details::thread_id_t> key;
    ImplicitProducer *value;  // No need for atomicity since it's only read by
                              // the thread that sets it in the first place

    ImplicitProducerKVP() : value(nullptr) {}

    ImplicitProducerKVP(ImplicitProducerKVP &&other) MOODYCAMEL_NOEXCEPT {
      key.store(other.key.load(std::memory_order_relaxed),
                std::memory_order_relaxed);
      value = other.value;
    }

    inline ImplicitProducerKVP &operator=(ImplicitProducerKVP &&other)
        MOODYCAMEL_NOEXCEPT {
      swap(other);
      return *this;
    }

    inline void swap(ImplicitProducerKVP &other) MOODYCAMEL_NOEXCEPT {
      if (this != &other) {
        details::swap_relaxed(key, other.key);
        std::swap(value, other.value);
      }
    }
  };

  template <typename XT, typename XTraits>
  friend void moodycamel::swap(
      typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &,
      typename ConcurrentQueue<XT, XTraits>::ImplicitProducerKVP &)
      MOODYCAMEL_NOEXCEPT;

  struct ImplicitProducerHash {
    size_t capacity;
    ImplicitProducerKVP *entries;
    ImplicitProducerHash *prev;
  };

  inline void populate_initial_implicit_producer_hash() {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
      return;
    }
    else {
      implicitProducerHashCount.store(0, std::memory_order_relaxed);
      auto hash = &initialImplicitProducerHash;
      hash->capacity = INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
      hash->entries = &initialImplicitProducerHashEntries[0];
      for (size_t i = 0; i != INITIAL_IMPLICIT_PRODUCER_HASH_SIZE; ++i) {
        initialImplicitProducerHashEntries[i].key.store(
            details::invalid_thread_id, std::memory_order_relaxed);
      }
      hash->prev = nullptr;
      implicitProducerHash.store(hash, std::memory_order_relaxed);
    }
  }

  void swap_implicit_producer_hashes(ConcurrentQueue &other) {
    MOODYCAMEL_CONSTEXPR_IF(INITIAL_IMPLICIT_PRODUCER_HASH_SIZE == 0) {
      return;
    }
    else {
      // Swap (assumes our implicit producer hash is initialized)
      initialImplicitProducerHashEntries.swap(
          other.initialImplicitProducerHashEntries);
      initialImplicitProducerHash.entries =
          &initialImplicitProducerHashEntries[0];
      other.initialImplicitProducerHash.entries =
          &other.initialImplicitProducerHashEntries[0];

      details::swap_relaxed(implicitProducerHashCount,
                            other.implicitProducerHashCount);

      details::swap_relaxed(implicitProducerHash, other.implicitProducerHash);
      if (implicitProducerHash.load(std::memory_order_relaxed) ==
          &other.initialImplicitProducerHash) {
        implicitProducerHash.store(&initialImplicitProducerHash,
                                   std::memory_order_relaxed);
      } else {
        ImplicitProducerHash *hash;
        for (hash = implicitProducerHash.load(std::memory_order_relaxed);
             hash->prev != &other.initialImplicitProducerHash;
             hash = hash->prev) {
          continue;
        }
        hash->prev = &initialImplicitProducerHash;
      }
      if (other.implicitProducerHash.load(std::memory_order_relaxed) ==
          &initialImplicitProducerHash) {
        other.implicitProducerHash.store(&other.initialImplicitProducerHash,
                                         std::memory_order_relaxed);
      } else {
        ImplicitProducerHash *hash;
        for (hash = other.implicitProducerHash.load(std::memory_order_relaxed);
             hash->prev != &initialImplicitProducerHash; hash = hash->prev) {
          continue;
        }
        hash->prev = &other.initialImplicitProducerHash;
      }
    }
  }

  // Only fails (returns nullptr) if memory allocation fails
  ImplicitProducer *get_or_add_implicit_producer() {
    // Note that since the data is essentially thread-local (key is thread ID),
    // there's a reduced need for fences (memory ordering is already consistent
    // for any individual thread), except for the current table itself.

    // Start by looking for the thread ID in the current and all previous hash
    // tables. If it's not found, it must not be in there yet, since this same
    // thread would have added it previously to one of the tables that we
    // traversed.

    // Code and algorithm adapted from
    // http://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
    debug::DebugLock lock(implicitProdMutex);
#endif

    auto id = details::thread_id();
    auto hashedId = details::hash_thread_id(id);

    auto mainHash = implicitProducerHash.load(std::memory_order_acquire);
    assert(
        mainHash !=
        nullptr);  // silence clang-tidy and MSVC warnings (hash cannot be null)
    for (auto hash = mainHash; hash != nullptr; hash = hash->prev) {
      // Look for the id in this hash
      auto index = hashedId;
      while (true) {  // Not an infinite loop because at least one slot is free
                      // in the hash table
        index &= hash->capacity - 1u;

        auto probedKey =
            hash->entries[index].key.load(std::memory_order_relaxed);
        if (probedKey == id) {
          // Found it! If we had to search several hashes deep, though, we
          // should lazily add it to the current main hash table to avoid the
          // extended search next time. Note there's guaranteed to be room in
          // the current hash table since every subsequent table implicitly
          // reserves space for all previous tables (there's only one
          // implicitProducerHashCount).
          auto value = hash->entries[index].value;
          if (hash != mainHash) {
            index = hashedId;
            while (true) {
              index &= mainHash->capacity - 1u;
              auto empty = details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
              auto reusable = details::invalid_thread_id2;
              if (mainHash->entries[index].key.compare_exchange_strong(
                      empty, id, std::memory_order_seq_cst,
                      std::memory_order_relaxed) ||
                  mainHash->entries[index].key.compare_exchange_strong(
                      reusable, id, std::memory_order_seq_cst,
                      std::memory_order_relaxed)) {
#else
              if (mainHash->entries[index].key.compare_exchange_strong(
                      empty, id, std::memory_order_seq_cst,
                      std::memory_order_relaxed)) {
#endif
                mainHash->entries[index].value = value;
                break;
              }
              ++index;
            }
          }

          return value;
        }
        if (probedKey == details::invalid_thread_id) {
          break;  // Not in this hash table
        }
        ++index;
      }
    }

    // Insert!
    auto newCount =
        1 + implicitProducerHashCount.fetch_add(1, std::memory_order_relaxed);
    while (true) {
      // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
      if (newCount >= (mainHash->capacity >> 1) &&
          !implicitProducerHashResizeInProgress.test_and_set(
              std::memory_order_acquire)) {
        // We've acquired the resize lock, try to allocate a bigger hash table.
        // Note the acquire fence synchronizes with the release fence at the end
        // of this block, and hence when we reload implicitProducerHash it must
        // be the most recent version (it only gets changed within this locked
        // block).
        mainHash = implicitProducerHash.load(std::memory_order_acquire);
        if (newCount >= (mainHash->capacity >> 1)) {
          size_t newCapacity = mainHash->capacity << 1;
          while (newCount >= (newCapacity >> 1)) {
            newCapacity <<= 1;
          }
          auto raw = static_cast<char *>(
              (Traits::malloc)(sizeof(ImplicitProducerHash) +
                               std::alignment_of<ImplicitProducerKVP>::value -
                               1 + sizeof(ImplicitProducerKVP) * newCapacity));
          if (raw == nullptr) {
            // Allocation failed
            implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
            implicitProducerHashResizeInProgress.clear(
                std::memory_order_relaxed);
            return nullptr;
          }

          auto newHash = new (raw) ImplicitProducerHash;
          newHash->capacity = static_cast<size_t>(newCapacity);
          newHash->entries = reinterpret_cast<ImplicitProducerKVP *>(
              details::align_for<ImplicitProducerKVP>(
                  raw + sizeof(ImplicitProducerHash)));
          for (size_t i = 0; i != newCapacity; ++i) {
            new (newHash->entries + i) ImplicitProducerKVP;
            newHash->entries[i].key.store(details::invalid_thread_id,
                                          std::memory_order_relaxed);
          }
          newHash->prev = mainHash;
          implicitProducerHash.store(newHash, std::memory_order_release);
          implicitProducerHashResizeInProgress.clear(std::memory_order_release);
          mainHash = newHash;
        } else {
          implicitProducerHashResizeInProgress.clear(std::memory_order_release);
        }
      }

      // If it's < three-quarters full, add to the old one anyway so that we
      // don't have to wait for the next table to finish being allocated by
      // another thread (and if we just finished allocating above, the condition
      // will always be true)
      if (newCount < (mainHash->capacity >> 1) + (mainHash->capacity >> 2)) {
        auto producer =
            static_cast<ImplicitProducer *>(recycle_or_create_producer(false));
        if (producer == nullptr) {
          implicitProducerHashCount.fetch_sub(1, std::memory_order_relaxed);
          return nullptr;
        }

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
        producer->threadExitListener.callback =
            &ConcurrentQueue::implicit_producer_thread_exited_callback;
        producer->threadExitListener.userData = producer;
        details::ThreadExitNotifier::subscribe(&producer->threadExitListener);
#endif

        auto index = hashedId;
        while (true) {
          index &= mainHash->capacity - 1u;
          auto empty = details::invalid_thread_id;
#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
          auto reusable = details::invalid_thread_id2;
          if (mainHash->entries[index].key.compare_exchange_strong(
                  reusable, id, std::memory_order_seq_cst,
                  std::memory_order_relaxed)) {
            implicitProducerHashCount.fetch_sub(
                1,
                std::memory_order_relaxed);  // already counted as a used slot
            mainHash->entries[index].value = producer;
            break;
          }
#endif
          if (mainHash->entries[index].key.compare_exchange_strong(
                  empty, id, std::memory_order_seq_cst,
                  std::memory_order_relaxed)) {
            mainHash->entries[index].value = producer;
            break;
          }
          ++index;
        }
        return producer;
      }

      // Hmm, the old hash is quite full and somebody else is busy allocating a
      // new one. We need to wait for the allocating thread to finish (if it
      // succeeds, we add, if not, we try to allocate ourselves).
      mainHash = implicitProducerHash.load(std::memory_order_acquire);
    }
  }

#ifdef MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED
  void implicit_producer_thread_exited(ImplicitProducer *producer) {
    // Remove from hash
#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
    debug::DebugLock lock(implicitProdMutex);
#endif
    auto hash = implicitProducerHash.load(std::memory_order_acquire);
    assert(hash != nullptr);  // The thread exit listener is only registered if
                              // we were added to a hash in the first place
    auto id = details::thread_id();
    auto hashedId = details::hash_thread_id(id);
    details::thread_id_t probedKey;

    // We need to traverse all the hashes just in case other threads aren't on
    // the current one yet and are trying to add an entry thinking there's a
    // free slot (because they reused a producer)
    for (; hash != nullptr; hash = hash->prev) {
      auto index = hashedId;
      do {
        index &= hash->capacity - 1u;
        probedKey = id;
        if (hash->entries[index].key.compare_exchange_strong(
                probedKey, details::invalid_thread_id2,
                std::memory_order_seq_cst, std::memory_order_relaxed)) {
          break;
        }
        ++index;
      } while (
          probedKey !=
          details::invalid_thread_id);  // Can happen if the hash has changed
                                        // but we weren't put back in it yet, or
                                        // if we weren't added to this hash in
                                        // the first place
    }

    // Mark the queue as being recyclable
    producer->inactive.store(true, std::memory_order_release);
  }

  static void implicit_producer_thread_exited_callback(void *userData) {
    auto producer = static_cast<ImplicitProducer *>(userData);
    auto queue = producer->parent;
    queue->implicit_producer_thread_exited(producer);
  }
#endif

  //////////////////////////////////
  // Utility functions
  //////////////////////////////////

  template <typename TAlign>
  static inline void *aligned_malloc(size_t size) {
    MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <=
                            std::alignment_of<details::max_align_t>::value)
    return (Traits::malloc)(size);
    else {
      size_t alignment = std::alignment_of<TAlign>::value;
      void *raw = (Traits::malloc)(size + alignment - 1 + sizeof(void *));
      if (!raw) return nullptr;
      char *ptr = details::align_for<TAlign>(reinterpret_cast<char *>(raw) +
                                             sizeof(void *));
      *(reinterpret_cast<void **>(ptr) - 1) = raw;
      return ptr;
    }
  }

  template <typename TAlign>
  static inline void aligned_free(void *ptr) {
    MOODYCAMEL_CONSTEXPR_IF(std::alignment_of<TAlign>::value <=
                            std::alignment_of<details::max_align_t>::value)
    return (Traits::free)(ptr);
    else(Traits::free)(ptr ? *(reinterpret_cast<void **>(ptr) - 1) : nullptr);
  }

  template <typename U>
  static inline U *create_array(size_t count) {
    assert(count > 0);
    U *p = static_cast<U *>(aligned_malloc<U>(sizeof(U) * count));
    if (p == nullptr) return nullptr;

    for (size_t i = 0; i != count; ++i) new (p + i) U();
    return p;
  }

  template <typename U>
  static inline void destroy_array(U *p, size_t count) {
    if (p != nullptr) {
      assert(count > 0);
      for (size_t i = count; i != 0;) (p + --i)->~U();
    }
    aligned_free<U>(p);
  }

  template <typename U>
  static inline U *create() {
    void *p = aligned_malloc<U>(sizeof(U));
    return p != nullptr ? new (p) U : nullptr;
  }

  template <typename U, typename A1>
  static inline U *create(A1 &&a1) {
    void *p = aligned_malloc<U>(sizeof(U));
    return p != nullptr ? new (p) U(std::forward<A1>(a1)) : nullptr;
  }

  template <typename U>
  static inline void destroy(U *p) {
    if (p != nullptr) p->~U();
    aligned_free<U>(p);
  }

 private:
  std::atomic<ProducerBase *> producerListTail;
  std::atomic<std::uint32_t> producerCount;

  std::atomic<size_t> initialBlockPoolIndex;
  Block *initialBlockPool;
  size_t initialBlockPoolSize;

#ifndef MCDBGQ_USEDEBUGFREELIST
  FreeList<Block> freeList;
#else
  debug::DebugFreeList<Block> freeList;
#endif

  std::atomic<ImplicitProducerHash *> implicitProducerHash;
  std::atomic<size_t>
      implicitProducerHashCount;  // Number of slots logically used
  ImplicitProducerHash initialImplicitProducerHash;
  std::array<ImplicitProducerKVP, INITIAL_IMPLICIT_PRODUCER_HASH_SIZE>
      initialImplicitProducerHashEntries;
  std::atomic_flag implicitProducerHashResizeInProgress;

  std::atomic<std::uint32_t> nextExplicitConsumerId;
  std::atomic<std::uint32_t> globalExplicitConsumerOffset;

#ifdef MCDBGQ_NOLOCKFREE_IMPLICITPRODHASH
  debug::DebugMutex implicitProdMutex;
#endif

#ifdef MOODYCAMEL_QUEUE_INTERNAL_DEBUG
  std::atomic<ExplicitProducer *> explicitProducers;
  std::atomic<ImplicitProducer *> implicitProducers;
#endif
};


template <typename T, typename Traits>
ProducerToken::ProducerToken(ConcurrentQueue<T, Traits> &queue)
    : producer(queue.recycle_or_create_producer(true)) {
  if (producer != nullptr) {
    producer->token = this;
  }
}

template <typename T, typename Traits>
ProducerToken::ProducerToken(BlockingConcurrentQueue<T, Traits> &queue)
    : producer(reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)
                   ->recycle_or_create_producer(true)) {
  if (producer != nullptr) {
    producer->token = this;
  }
}

template <typename T, typename Traits>
ConsumerToken::ConsumerToken(ConcurrentQueue<T, Traits> &queue)
    : itemsConsumedFromCurrent(0),
      currentProducer(nullptr),
      desiredProducer(nullptr) {
  initialOffset =
      queue.nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}

template <typename T, typename Traits>
ConsumerToken::ConsumerToken(BlockingConcurrentQueue<T, Traits> &queue)
    : itemsConsumedFromCurrent(0),
      currentProducer(nullptr),
      desiredProducer(nullptr) {
  initialOffset =
      reinterpret_cast<ConcurrentQueue<T, Traits> *>(&queue)
          ->nextExplicitConsumerId.fetch_add(1, std::memory_order_release);
  lastKnownGlobalOffset = static_cast<std::uint32_t>(-1);
}

template <typename T, typename Traits>
inline void swap(ConcurrentQueue<T, Traits> &a,
                 ConcurrentQueue<T, Traits> &b) MOODYCAMEL_NOEXCEPT {
  a.swap(b);
}

inline void swap(ProducerToken &a, ProducerToken &b) MOODYCAMEL_NOEXCEPT {
  a.swap(b);
}

inline void swap(ConsumerToken &a, ConsumerToken &b) MOODYCAMEL_NOEXCEPT {
  a.swap(b);
}

template <typename T, typename Traits>
inline void swap(typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &a,
                 typename ConcurrentQueue<T, Traits>::ImplicitProducerKVP &b)
    MOODYCAMEL_NOEXCEPT {
  a.swap(b);
}

}  // namespace moodycamel

#if defined(_MSC_VER) && (!defined(_HAS_CXX17) || !_HAS_CXX17)
#pragma warning(pop)
#endif

#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
#pragma GCC diagnostic pop
#endif

================================================
FILE: src/include/zvec/ailego/container/blob.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <string>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! AiLego Blob Wrap
 */
class BlobWrap {
 public:
  //! Constructor
  BlobWrap(void) : buffer_(nullptr), size_(0u) {}

  //! Constructor
  BlobWrap(const BlobWrap &rhs) : buffer_(rhs.buffer_), size_(rhs.size_) {}

  //! Constructor
  BlobWrap(BlobWrap &&rhs) : buffer_(rhs.buffer_), size_(rhs.size_) {
    rhs.buffer_ = nullptr;
    rhs.size_ = 0u;
  }

  //! Constructor
  BlobWrap(const void *buf, size_t len)
      : buffer_(const_cast<void *>(buf)), size_(len) {}

  //! Constructor
  BlobWrap(const std::string &buf)
      : buffer_(const_cast<char *>(buf.data())), size_(buf.size()) {}

  //! Destructor
  ~BlobWrap(void) {}

  //! Assignment
  BlobWrap &operator=(const BlobWrap &rhs) {
    buffer_ = rhs.buffer_;
    size_ = rhs.size_;
    return *this;
  }

  //! Assignment
  BlobWrap &operator=(BlobWrap &&rhs) {
    buffer_ = rhs.buffer_;
    size_ = rhs.size_;
    rhs.buffer_ = nullptr;
    rhs.size_ = 0u;
    return *this;
  }

  //! Test if the blob is valid
  bool is_valid(void) const {
    return (buffer_ && size_);
  }

  //! Mount a buffer as blob
  void mount(void *buf, size_t len) {
    buffer_ = buf;
    size_ = len;
  }

  //! Mount a string as blob
  void mount(std::string &buf) {
    buffer_ = const_cast<char *>(buf.data());
    size_ = buf.size();
  }

  //! Umount the buffer of blob
  void umount(void) {
    buffer_ = nullptr;
    size_ = 0u;
  }

  //! Retrieve buffer of blob
  void *buffer(void) {
    return buffer_;
  }

  //! Retrieve buffer of blob
  const void *buffer(void) const {
    return buffer_;
  }

  //! Retrieve size of blob
  size_t size(void) const {
    return size_;
  }

  //! Copy a buffer into blob
  void copy(const void *buf, size_t len) {
    memcpy(buffer_, buf, std::min(size_, len));
  }

  //! Copy a blob to blob
  void copy(const BlobWrap &rhs) {
    memcpy(buffer_, rhs.buffer_, std::min(size_, rhs.size_));
  }

  //! Copy a string to blob
  void copy(const std::string &str) {
    memcpy(buffer_, str.data(), std::min(size_, str.size()));
  }

  //! Zero the buffer of blob
  void zero(void) {
    memset(buffer_, 0, size_);
  }

 private:
  void *buffer_;
  size_t size_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/container/cube.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <string>
#include <typeinfo>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {
namespace internal {

/*! Cube Policy
 */
struct CubePolicy {
  //! Destructor
  virtual ~CubePolicy(void) {}

  //! Assign `src` to `dst`
  virtual void assign(const void *src, void **dst) = 0;

  //! Cleanup value
  virtual void cleanup(void **val) = 0;

  //! Clone value of `src` to `dst`
  virtual void clone(void *const *src, void **dst) = 0;

  //! Move `src` to `dst`
  virtual void move(void *src, void **dst) = 0;

  //! Retrieve size
  virtual size_t size(void) const = 0;

  //! Retrieve type information
  virtual const std::type_info &type(void) const = 0;

  //! Retrieve value
  virtual void *value(void **src) = 0;

  //! Retrieve value
  virtual const void *value(void *const *src) const = 0;
};

/*! Small Cube Policy
 */
template <typename T>
struct SmallCubePolicy : public CubePolicy {
  //! Assign `src` to `dst`
  void assign(const void *src, void **dst) {
    new (dst) T(*reinterpret_cast<const T *>(src));
  }

  //! Cleanup value
  void cleanup(void **val) {
    reinterpret_cast<T *>(val)->~T();
  }

  //! Clone value of `src` to `dst`
  void clone(void *const *src, void **dst) {
    new (dst) T(*reinterpret_cast<const T *>(src));
  }

  //! Move `src` to `dst`
  void move(void *src, void **dst) {
    new (dst) T(std::move(*reinterpret_cast<T *>(src)));
  }

  //! Retrieve size
  size_t size(void) const {
    return sizeof(T);
  }

  //! Retrieve type information
  const std::type_info &type(void) const {
    return typeid(T);
  }

  //! Retrieve value
  void *value(void **src) {
    return reinterpret_cast<void *>(src);
  }

  //! Retrieve value
  const void *value(void *const *src) const {
    return reinterpret_cast<const void *>(src);
  }
};

/*! Large Cube Policy
 */
template <typename T>
struct LargeCubePolicy : public CubePolicy {
  //! Assign `src` to `dst`
  void assign(const void *src, void **dst) {
    *dst = new T(*reinterpret_cast<const T *>(src));
  }

  //! Cleanup value
  void cleanup(void **val) {
    delete (reinterpret_cast<T *>(*val));
  }

  //! Clone value of `src` to `dst`
  void clone(void *const *src, void **dst) {
    *dst = new T(**reinterpret_cast<T *const *>(src));
  }

  //! Move `src` to `dst`
  void move(void *src, void **dst) {
    *dst = new T(std::move(*reinterpret_cast<T *>(src)));
  }

  //! Retrieve size
  size_t size(void) const {
    return sizeof(T);
  }

  //! Retrieve type information
  const std::type_info &type(void) const {
    return typeid(T);
  }

  //! Retrieve value
  void *value(void **src) {
    return *src;
  }

  //! Retrieve value
  const void *value(void *const *src) const {
    return *src;
  }
};

/*! Policy Selector
 */
template <typename T, typename = void>
struct PolicySelector {
  typedef LargeCubePolicy<T> Type;
};

/*! Policy Selector
 */
template <typename T>
struct PolicySelector<
    T, typename std::enable_if<sizeof(T) <= sizeof(void *)>::type> {
  typedef SmallCubePolicy<T> Type;
};

}  // namespace internal

/*! Cube class
 */
class Cube {
 public:
  //! Constructor
  Cube(void) : policy_(Cube::Policy<Cube::EmptyPolicy>()), object_(nullptr) {}

  //! Constructor
  template <typename T>
  Cube(const T &rhs) : policy_(Cube::Policy<T>()), object_(nullptr) {
    policy_->assign(&rhs, &object_);
  }

  //! Constructor
  template <typename T, typename = typename std::enable_if<
                            !std::is_same<Cube &, T>::value &&
                            !std::is_same<T &, T>::value>::type>
  Cube(T &&rhs) : policy_(Cube::Policy<T>()), object_(nullptr) {
    policy_->move(&rhs, &object_);
  }

  //! Constructor
  Cube(const char *str)
      : policy_(Cube::Policy<std::string>()), object_(nullptr) {
    std::string rhs(str);
    policy_->move(&rhs, &object_);
  }

  //! Constructor
  Cube(char str[]) : policy_(Cube::Policy<std::string>()), object_(nullptr) {
    std::string rhs(str);
    policy_->move(&rhs, &object_);
  }

  //! Constructor
  Cube(const Cube &rhs) : policy_(rhs.policy_), object_(nullptr) {
    policy_->clone(&rhs.object_, &object_);
  }

  //! Constructor
  Cube(Cube &&rhs) : policy_(rhs.policy_), object_(rhs.object_) {
    rhs.policy_ = Cube::Policy<Cube::EmptyPolicy>();
    rhs.object_ = nullptr;
  }

  //! Destructor
  ~Cube(void) {
    policy_->cleanup(&object_);
  }

  //! Assignment
  template <typename T>
  Cube &operator=(const T &rhs) {
    this->assign(rhs);
    return *this;
  }

  //! Assignment
  template <typename T, typename = typename std::enable_if<
                            !std::is_same<Cube &, T>::value &&
                            !std::is_same<T &, T>::value>::type>
  Cube &operator=(T &&rhs) {
    this->assign(std::forward<T>(rhs));
    return *this;
  }

  //! Assignment
  Cube &operator=(const Cube &rhs) {
    this->assign(rhs);
    return *this;
  }

  //! Assignment
  Cube &operator=(Cube &&rhs) {
    this->assign(std::forward<Cube>(rhs));
    return *this;
  }

  //! Assignment
  Cube &operator=(const char *str) {
    this->assign(str);
    return *this;
  }

  //! Assignment
  Cube &operator=(char str[]) {
    this->assign(str);
    return *this;
  }

  //! Retrieve object in original type
  template <typename T>
  operator T &() {
    return this->cast<T>();
  }

  //! Retrieve object in original type
  template <typename T>
  operator const T &() const {
    return this->cast<T>();
  }

  //! Assign content
  template <typename T>
  void assign(const T &rhs) {
    policy_->cleanup(&object_);
    policy_ = Cube::Policy<T>();
    policy_->assign(&rhs, &object_);
  }

  //! Assign content
  template <typename T, typename = typename std::enable_if<
                            !std::is_same<Cube &, T>::value &&
                            !std::is_same<T &, T>::value>::type>
  void assign(T &&rhs) {
    policy_->cleanup(&object_);
    policy_ = Cube::Policy<T>();
    policy_->move(&rhs, &object_);
  }

  //! Assign content from another Cube
  void assign(const Cube &rhs) {
    policy_->cleanup(&object_);
    policy_ = rhs.policy_;
    policy_->clone(&rhs.object_, &object_);
  }

  //! Assign content from another Cube
  void assign(Cube &&rhs) {
    if (this != &rhs) {
      policy_->cleanup(&object_);
      policy_ = rhs.policy_;
      object_ = rhs.object_;
      rhs.policy_ = Cube::Policy<Cube::EmptyPolicy>();
      rhs.object_ = nullptr;
    }
  }

  //! Assign content
  void assign(const char *str) {
    policy_->cleanup(&object_);
    policy_ = Cube::Policy<std::string>();
    std::string rhs(str);
    policy_->move(&rhs, &object_);
  }

  //! Assign content
  void assign(char str[]) {
    policy_->cleanup(&object_);
    policy_ = Cube::Policy<std::string>();
    std::string rhs(str);
    policy_->move(&rhs, &object_);
  }

  //! Swap the content with another Cube
  Cube &swap(Cube &rhs) {
    std::swap(policy_, rhs.policy_);
    std::swap(object_, rhs.object_);
    return *this;
  }

  //! Cast to the original type
  template <typename T>
  T &cast(void) {
    if (policy_ != Cube::Policy<T>()) {
      throw std::bad_cast();
    }
    return *reinterpret_cast<T *>(policy_->value(&object_));
  }

  //! Cast to the original type
  template <typename T>
  const T &cast(void) const {
    if (policy_ != Cube::Policy<T>()) {
      throw std::bad_cast();
    }
    return *reinterpret_cast<const T *>(policy_->value(&object_));
  }

  //! Cast to the original type (unsafe)
  template <typename T>
  T &unsafe_cast(void) {
    return *reinterpret_cast<T *>(policy_->value(&object_));
  }

  //! Cast to the original type (unsafe)
  template <typename T>
  const T &unsafe_cast(void) const {
    return *reinterpret_cast<const T *>(policy_->value(&object_));
  }

  //! Test if the Cube is empty
  bool empty(void) const {
    return (policy_ == Cube::Policy<Cube::EmptyPolicy>());
  }

  //! Reset Cube allocated memory
  void reset(void) {
    policy_->cleanup(&object_);
    policy_ = Cube::Policy<Cube::EmptyPolicy>();
    object_ = nullptr;
  }

  //! Test if the Cube is compatible with another one
  bool compatible(const Cube &rhs) const {
    return (policy_ == rhs.policy_ || policy_->type() == rhs.policy_->type());
  }

  //! Test if the Cube is compatible with another one
  template <typename T>
  bool compatible(void) const {
    return (policy_ == Cube::Policy<T>() ||
            policy_->type() == Cube::Policy<T>()->type());
  }

  //! Retrieve size
  size_t size(void) const {
    return (!this->empty() ? policy_->size() : 0u);
  }

  //! Retrieve type information
  const std::type_info &type(void) const {
    return (!this->empty() ? policy_->type() : typeid(void));
  }

 protected:
  /*! Empty Policy
   */
  struct EmptyPolicy {};

  //! Make a static policy object
  template <typename T>
  static internal::CubePolicy *MakePolicy(void) {
    static typename internal::PolicySelector<T>::Type policy;
    return (&policy);
  }

  //! Retrieve a static policy object
  template <typename T>
  static internal::CubePolicy *Policy(void) {
    return MakePolicy<typename UnderlyingType<T>::type>();
  }

 private:
  //! Members
  internal::CubePolicy *policy_;
  void *object_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/container/heap.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <functional>
#include <limits>
#include <utility>
#include <vector>

namespace zvec {
namespace ailego {

/*! Heap Adapter
 */
template <typename T, typename TCompare = std::less<T>,
          typename TBase = std::vector<T>>
class Heap : public TBase {
 public:
  //! Constructor
  Heap(void)
      : TBase(), limit_(std::numeric_limits<size_t>::max()), compare_() {}

  //! Constructor
  template <typename... Args>
  Heap(size_t max, Args &&...args)
      : TBase(),
        limit_(std::max<size_t>(max, 1u)),
        compare_(std::forward<Args>(args)...) {
    TBase::reserve(limit_);
  }

  //! Constructor
  Heap(const Heap &rhs)
      : TBase(rhs), limit_(rhs.limit_), compare_(rhs.compare_) {}

  //! Constructor
  Heap(Heap &&rhs)
      : TBase(std::move(rhs)),
        limit_(rhs.limit_),
        compare_(std::move(rhs.compare_)) {}

  //! Constructor
  Heap(const TBase &rhs)
      : TBase(rhs), limit_(std::numeric_limits<size_t>::max()), compare_() {
    std::make_heap(TBase::begin(), TBase::end(), compare_);
  }

  //! Constructor
  Heap(TBase &&rhs)
      : TBase(std::move(rhs)),
        limit_(std::numeric_limits<size_t>::max()),
        compare_() {
    std::make_heap(TBase::begin(), TBase::end(), compare_);
  }

  //! Assignment
  Heap &operator=(const Heap &rhs) {
    TBase::operator=(static_cast<const TBase &>(rhs));
    limit_ = rhs.limit_;
    compare_ = rhs.compare_;
    return *this;
  }

  //! Assignment
  Heap &operator=(Heap &&rhs) {
    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));
    limit_ = rhs.limit_;
    compare_ = std::move(rhs.compare_);
    return *this;
  }

  //! Exchange the content
  void swap(Heap &rhs) {
    TBase::swap(static_cast<TBase &>(rhs));
    std::swap(limit_, rhs.limit_);
    std::swap(compare_, rhs.compare_);
  }

  //! Pop the front element
  void pop(void) {
    if (TBase::size() > 1) {
      auto last = TBase::end() - 1;
      this->replace_heap(TBase::begin(), last, std::move(*last));
    }
    TBase::pop_back();
  }

  //! Insert a new element into the heap
  template <class... TArgs>
  void emplace(TArgs &&...args) {
    if (this->full()) {
      typename std::remove_reference<T>::type val(std::forward<TArgs>(args)...);

      auto first = TBase::begin();
      if (compare_(val, *first)) {
        this->replace_heap(first, TBase::end(), std::move(val));
      }
    } else {
      TBase::emplace_back(std::forward<TArgs>(args)...);
      std::push_heap(TBase::begin(), TBase::end(), compare_);
    }
  }

  //! Insert a new element into the heap
  void push(const T &val) {
    if (this->full()) {
      auto first = TBase::begin();
      if (compare_(val, *first)) {
        this->replace_heap(first, TBase::end(), val);
      }
    } else {
      TBase::push_back(val);
      std::push_heap(TBase::begin(), TBase::end(), compare_);
    }
  }

  //! Insert a new element into the heap
  void push(T &&val) {
    if (this->full()) {
      auto first = TBase::begin();
      if (compare_(val, *first)) {
        this->replace_heap(first, TBase::end(), std::move(val));
      }
    } else {
      TBase::push_back(std::move(val));
      std::push_heap(TBase::begin(), TBase::end(), compare_);
    }
  }

  //! Retrieve the limit of heap
  size_t limit(void) const {
    return limit_;
  }

  //! Limit the heap with max size
  void limit(size_t max) {
    limit_ = std::max<size_t>(max, 1u);
    TBase::reserve(limit_);
  }

  //! Unlimit the size of heap
  void unlimit(void) {
    limit_ = std::numeric_limits<size_t>::max();
  }

  //! Check whether the heap is full
  bool full(void) const {
    return (TBase::size() == limit_);
  }

  //! Update the heap
  void update(void) {
    std::make_heap(TBase::begin(), TBase::end(), compare_);
    while (limit_ < TBase::size()) {
      this->pop();
    }
  }

  //! Sort the elements in the heap
  void sort(void) {
    std::sort(TBase::begin(), TBase::end(), compare_);
  }

 protected:
  //! Replace the top element of heap
  template <typename TRandomIterator, typename TValue>
  void replace_heap(TRandomIterator first, TRandomIterator last, TValue &&val) {
    using _DistanceType =
        typename std::iterator_traits<TRandomIterator>::difference_type;

    _DistanceType hole = 0;
    _DistanceType count = _DistanceType(last - first);

    if (count > 1) {
      _DistanceType child = (hole << 1) + 1;

      while (child < count) {
        _DistanceType right_child = child + 1;

        if (right_child < count &&
            compare_(*(first + child), *(first + right_child))) {
          child = right_child;
        }
        if (!compare_(val, *(first + child))) {
          break;
        }
        *(first + hole) = std::move(*(first + child));
        hole = child;
        child = (hole << 1) + 1;
      }
    }
    *(first + hole) = std::forward<TValue>(val);
  }

 private:
  size_t limit_;
  TCompare compare_;
};

/*! Key Value Heap Comparer
 */
template <typename TKey, typename TValue, typename TCompare = std::less<TValue>>
struct KeyValueHeapComparer {
  //! Function call
  bool operator()(const std::pair<TKey, TValue> &lhs,
                  const std::pair<TKey, TValue> &rhs) const {
    return compare_(lhs.second, rhs.second);
  }

 private:
  TCompare compare_;
};

/*! Key Value Heap
 */
template <typename TKey, typename TValue, typename TCompare = std::less<TValue>>
using KeyValueHeap =
    Heap<std::pair<TKey, TValue>, KeyValueHeapComparer<TKey, TValue, TCompare>>;

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/container/hypercube.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <string>
#include <zvec/ailego/container/cube.h>

namespace zvec {
namespace ailego {

/*! Hypercube
 */
class Hypercube {
 public:
  //! Constructor
  Hypercube(void) : cubes_() {}

  //! Constructor
  Hypercube(const Hypercube &rhs) : cubes_(rhs.cubes_) {}

  //! Constructor
  Hypercube(Hypercube &&rhs) : cubes_() {
    cubes_.swap(rhs.cubes_);
  }

  //! Destructor
  ~Hypercube(void) {}

  //! Assignment
  Hypercube &operator=(const Hypercube &rhs) {
    cubes_ = rhs.cubes_;
    return *this;
  }

  //! Assignment
  Hypercube &operator=(Hypercube &&rhs) {
    cubes_ = std::move(rhs.cubes_);
    return *this;
  }

  //! Overloaded operator []
  Cube &operator[](const std::string &key) {
    return cubes_[key];
  }

  //! Overloaded operator []
  Cube &operator[](std::string &&key) {
    return cubes_[std::forward<std::string>(key)];
  }

  //! Test if the element is exist
  bool has(const std::string &key) const {
    return (cubes_.find(key) != cubes_.end());
  }

  //! Test if the hyper cube is empty
  bool empty(void) const {
    return cubes_.empty();
  }

  //! Insert a key-value pair into map
  bool insert(const std::string &key, Cube &&val) {
    return cubes_.emplace(key, std::forward<Cube>(val)).second;
  }

  //! Insert a key-value pair into map
  bool insert(std::string &&key, Cube &&val) {
    return cubes_
        .emplace(std::forward<std::string>(key), std::forward<Cube>(val))
        .second;
  }

  //! Insert a key-value pair into map
  template <typename T>
  bool insert(const std::string &key, T &&val) {
    return cubes_.emplace(key, Cube(std::forward<T>(val))).second;
  }

  //! Insert a key-value pair into map
  template <typename T>
  bool insert(std::string &&key, T &&val) {
    return cubes_
        .emplace(std::forward<std::string>(key), Cube(std::forward<T>(val)))
        .second;
  }

  //! Insert or assign a key-value pair to map
  void insert_or_assign(const std::string &key, Cube &&val) {
    auto it = cubes_.lower_bound(key);
    if (it != cubes_.end() && it->first == key) {
      it->second = std::forward<Cube>(val);
    } else {
      cubes_.emplace_hint(it, key, std::forward<Cube>(val));
    }
  }

  //! Insert or assign a key-value pair to map
  void insert_or_assign(std::string &&key, Cube &&val) {
    auto it = cubes_.lower_bound(key);
    if (it != cubes_.end() && it->first == key) {
      it->second = std::forward<Cube>(val);
    } else {
      cubes_.emplace_hint(it, std::forward<std::string>(key),
                          std::forward<Cube>(val));
    }
  }

  //! Insert or assign a key-value pair to map
  template <typename T>
  void insert_or_assign(const std::string &key, T &&val) {
    auto it = cubes_.lower_bound(key);
    if (it != cubes_.end() && it->first == key) {
      it->second = Cube(std::forward<T>(val));
    } else {
      cubes_.emplace_hint(it, key, Cube(std::forward<T>(val)));
    }
  }

  //! Insert or assign a key-value pair to map
  template <typename T>
  void insert_or_assign(std::string &&key, T &&val) {
    auto it = cubes_.lower_bound(key);
    if (it != cubes_.end() && it->first == key) {
      it->second = Cube(std::forward<T>(val));
    } else {
      cubes_.emplace_hint(it, std::forward<std::string>(key),
                          Cube(std::forward<T>(val)));
    }
  }

  //! Clear the map
  void clear(void) {
    cubes_.clear();
  }

  //! Swap the map
  void swap(Hypercube &rhs) {
    cubes_.swap(rhs.cubes_);
  }

  //! Erase the pair via a key
  bool erase(const std::string &key) {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      cubes_.erase(iter);
      return true;
    }
    return false;
  }

  //! Retrieve the value via a key
  bool get(const std::string &key, Cube *out) const {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      *out = iter->second;
      return true;
    }
    return false;
  }

  //! Retrieve the value via a key
  Cube *get(const std::string &key) {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      return &iter->second;
    }
    return nullptr;
  }

  //! Retrieve the value via a key
  const Cube *get(const std::string &key) const {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      return &iter->second;
    }
    return nullptr;
  }

  //! Retrieve the value via a key
  template <typename T>
  bool get(const std::string &key, T *out) const {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      if (iter->second.compatible<T>()) {
        *out = iter->second.unsafe_cast<T>();
        return true;
      }
    }
    return false;
  }

  //! Retrieve the value via a key
  template <typename T>
  T &get(const std::string &key, T &def) {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      if (iter->second.compatible<T>()) {
        return iter->second.unsafe_cast<T>();
      }
    }
    return def;
  }

  //! Retrieve the value via a key
  template <typename T>
  const T &get(const std::string &key, const T &def) const {
    auto iter = cubes_.find(key);
    if (iter != cubes_.end()) {
      if (iter->second.compatible<T>()) {
        return iter->second.unsafe_cast<T>();
      }
    }
    return def;
  }

  //! Merge another hyper cube
  void merge(const Hypercube &rhs) {
    for (const auto &it : rhs.cubes_) {
      auto iter = cubes_.find(it.first);
      if (iter != cubes_.end()) {
        iter->second = it.second;
      } else {
        cubes_.emplace(it.first, it.second);
      }
    }
  }

  //! Merge another hyper cube
  void merge(Hypercube &&rhs) {
    for (auto &it : rhs.cubes_) {
      auto iter = cubes_.find(it.first);
      if (iter != cubes_.end()) {
        iter->second = std::move(it.second);
      } else {
        cubes_.emplace(std::move(it.first), std::move(it.second));
      }
    }
  }

  //! Retrieve the cubes
  const std::map<std::string, Cube> &cubes(void) const {
    return cubes_;
  }

  //! Retrieve the cubes
  std::map<std::string, Cube> *mutable_cubes(void) {
    return &cubes_;
  }

 private:
  std::map<std::string, Cube> cubes_;
};

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/container/params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/container/hypercube.h>

namespace zvec {
namespace ailego {

//! Trying compatible with T
#define _TRYING_COMPATIBLE(cube, T, out)                                     \
  if (cube->compatible<T>())                                                 \
  return (                                                                   \
      *out = static_cast<typename std::remove_pointer<decltype(out)>::type>( \
          cube->unsafe_cast<T>()),                                           \
      true)

//! Trying compatible with T (Boolean)
#define _TRYING_COMPATIBLE_BOOL(cube, T, out) \
  if (cube->compatible<T>()) return (*out = !!cube->unsafe_cast<T>(), true)

//! Trying compatible with T (String)
#define _TRYING_COMPATIBLE_STRING(cube, T, out) \
  if (cube->compatible<T>())                    \
  return (out->assign(std::to_string(cube->unsafe_cast<T>())), true)

//! Trying convert string
#define _TRYING_CONVERT_STRING(cube, out)                                      \
  if (cube->compatible<std::string>())                                         \
  return (*out = Params::StringCast<std::remove_pointer<decltype(out)>::type>( \
              cube->unsafe_cast<std::string>()),                               \
          true)

/*! Index Params
 */
class Params {
 public:
  //! Constructor
  Params(void) : hypercube_() {}

  //! Constructor
  Params(const Params &rhs) : hypercube_(rhs.hypercube_) {}

  //! Constructor
  Params(Params &&rhs) : hypercube_() {
    hypercube_.swap(rhs.hypercube_);
  }

  //! Destructor
  ~Params(void) {}

  //! Assignment
  Params &operator=(const Params &rhs) {
    hypercube_ = rhs.hypercube_;
    return *this;
  }

  //! Assignment
  Params &operator=(Params &&rhs) {
    hypercube_.swap(rhs.hypercube_);
    return *this;
  }

  //! Overloaded operator []
  ailego::Cube &operator[](const std::string &key) {
    return hypercube_[key];
  }

  //! Overloaded operator []
  ailego::Cube &operator[](std::string &&key) {
    return hypercube_[std::move(key)];
  }

  //! Test if the element is exist
  bool has(const std::string &key) const {
    return hypercube_.has(key);
  }

  //! Test if the map is empty
  bool empty(void) const {
    return hypercube_.empty();
  }

  //! Clear the map
  void clear(void) {
    hypercube_.clear();
  }

  //! Erase the pair via a key
  bool erase(const std::string &key) {
    return hypercube_.erase(key);
  }

  //! Merge another index params
  void merge(const Params &rhs) {
    hypercube_.merge(rhs.hypercube_);
  }

  //! Merge another index params
  void merge(Params &&rhs) {
    hypercube_.merge(std::move(rhs.hypercube_));
  }

  //! Set the value of key in T
  template <typename T>
  bool insert(const std::string &key, T &&val) {
    return hypercube_.insert<T>(key, std::forward<T>(val));
  }

  //! Set the value of key in T
  template <typename T>
  bool insert(std::string &&key, T &&val) {
    return hypercube_.insert<T>(std::forward<std::string>(key),
                                std::forward<T>(val));
  }

  //! Set the value of key in T
  template <typename T>
  void set(const std::string &key, T &&val) {
    hypercube_.insert_or_assign<T>(key, std::forward<T>(val));
  }

  //! Set the value of key in T
  template <typename T>
  void set(std::string &&key, T &&val) {
    hypercube_.insert_or_assign<T>(std::forward<std::string>(key),
                                   std::forward<T>(val));
  }

  //! Retrieve the value in boolean
  bool get(const std::string &key, bool *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE_BOOL(cube, char, out);
      _TRYING_COMPATIBLE_BOOL(cube, unsigned char, out);
      _TRYING_COMPATIBLE_BOOL(cube, signed char, out);
      _TRYING_COMPATIBLE_BOOL(cube, short int, out);
      _TRYING_COMPATIBLE_BOOL(cube, unsigned short int, out);
      _TRYING_COMPATIBLE_BOOL(cube, int, out);
      _TRYING_COMPATIBLE_BOOL(cube, unsigned int, out);
      _TRYING_COMPATIBLE_BOOL(cube, long int, out);
      _TRYING_COMPATIBLE_BOOL(cube, unsigned long int, out);
      _TRYING_COMPATIBLE_BOOL(cube, long long int, out);
      _TRYING_COMPATIBLE_BOOL(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE_BOOL(cube, float, out);
      _TRYING_COMPATIBLE_BOOL(cube, double, out);
      _TRYING_COMPATIBLE_BOOL(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'char'
  bool get(const std::string &key, char *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'unsigned char'
  bool get(const std::string &key, unsigned char *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'signed char'
  bool get(const std::string &key, signed char *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'short int'
  bool get(const std::string &key, short int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'unsigned short int'
  bool get(const std::string &key, unsigned short int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'int'
  bool get(const std::string &key, int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'unsigned int'
  bool get(const std::string &key, unsigned int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'long int'
  bool get(const std::string &key, long int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'unsigned long int'
  bool get(const std::string &key, unsigned long int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'long long int'
  bool get(const std::string &key, long long int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'unsigned long long int'
  bool get(const std::string &key, unsigned long long int *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'float'
  bool get(const std::string &key, float *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'double'
  bool get(const std::string &key, double *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in 'long double'
  bool get(const std::string &key, long double *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, long double, out);
      _TRYING_COMPATIBLE(cube, double, out);
      _TRYING_COMPATIBLE(cube, float, out);
      _TRYING_COMPATIBLE(cube, long long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE(cube, long int, out);
      _TRYING_COMPATIBLE(cube, unsigned long int, out);
      _TRYING_COMPATIBLE(cube, int, out);
      _TRYING_COMPATIBLE(cube, unsigned int, out);
      _TRYING_COMPATIBLE(cube, short int, out);
      _TRYING_COMPATIBLE(cube, unsigned short int, out);
      _TRYING_COMPATIBLE(cube, char, out);
      _TRYING_COMPATIBLE(cube, unsigned char, out);
      _TRYING_COMPATIBLE(cube, signed char, out);
      _TRYING_COMPATIBLE(cube, bool, out);
      _TRYING_CONVERT_STRING(cube, out);
    }
    return false;
  }

  //! Retrieve the value in string
  bool get(const std::string &key, std::string *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, std::string, out);
      _TRYING_COMPATIBLE_STRING(cube, bool, out);
      _TRYING_COMPATIBLE_STRING(cube, char, out);
      _TRYING_COMPATIBLE_STRING(cube, unsigned char, out);
      _TRYING_COMPATIBLE_STRING(cube, signed char, out);
      _TRYING_COMPATIBLE_STRING(cube, short int, out);
      _TRYING_COMPATIBLE_STRING(cube, unsigned short int, out);
      _TRYING_COMPATIBLE_STRING(cube, int, out);
      _TRYING_COMPATIBLE_STRING(cube, unsigned int, out);
      _TRYING_COMPATIBLE_STRING(cube, long int, out);
      _TRYING_COMPATIBLE_STRING(cube, unsigned long int, out);
      _TRYING_COMPATIBLE_STRING(cube, long long int, out);
      _TRYING_COMPATIBLE_STRING(cube, unsigned long long int, out);
      _TRYING_COMPATIBLE_STRING(cube, float, out);
      _TRYING_COMPATIBLE_STRING(cube, double, out);
      _TRYING_COMPATIBLE_STRING(cube, long double, out);
    }
    return false;
  }

  //! Retrieve the value in T
  template <typename T>
  bool get(const std::string &key, T *out) const {
    const ailego::Cube *cube = hypercube_.get(key);
    if (cube) {
      _TRYING_COMPATIBLE(cube, T, out);
    }
    return false;
  }

  //! Retrieve the value in boolean
  bool get_as_bool(const std::string &key) const {
    bool result = false;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in int8
  int8_t get_as_int8(const std::string &key) const {
    int8_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in int16
  int16_t get_as_int16(const std::string &key) const {
    int16_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in int32
  int32_t get_as_int32(const std::string &key) const {
    int32_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in int64
  int64_t get_as_int64(const std::string &key) const {
    int64_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in uint8
  uint8_t get_as_uint8(const std::string &key) const {
    uint8_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in uint16
  uint16_t get_as_uint16(const std::string &key) const {
    uint16_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in uint32
  uint32_t get_as_uint32(const std::string &key) const {
    uint32_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in uint64
  uint64_t get_as_uint64(const std::string &key) const {
    uint64_t result = 0;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in float
  float get_as_float(const std::string &key) const {
    float result = 0.0f;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in double
  double get_as_double(const std::string &key) const {
    double result = 0.0f;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the value in string
  std::string get_as_string(const std::string &key) const {
    std::string result;
    this->get(key, &result);
    return result;
  }

  //! Retrieve the debug string
  std::string debug_string(void) const {
    std::string str;
    SerializeToBuffer(*this, &str);
    return str;
  }

  //! Retrieve the map of parameters
  const ailego::Hypercube &hypercube(void) const {
    return hypercube_;
  }

  //! Retrieve the map of parameters
  ailego::Hypercube *mutable_hypercube(void) {
    return &hypercube_;
  }

  //! Parse parameters from buffer (Json format)
  static bool ParseFromBuffer(const std::string &buf, Params *params);

  //! Parse parameters from OS environment
  static void ParseFromEnvironment(Params *params);

  //! Serialize parameters into buffer
  static void SerializeToBuffer(const Params &params, std::string *buf);

 protected:
  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, float>::value, T>::type {
    return std::strtof(str.c_str(), nullptr);
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, double>::value, T>::type {
    return std::strtod(str.c_str(), nullptr);
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, long double>::value, T>::type {
    return std::strtold(str.c_str(), nullptr);
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, char>::value, T>::type {
    return static_cast<char>(std::strtol(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, signed char>::value, T>::type {
    return static_cast<signed char>(std::strtol(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, unsigned char>::value, T>::type {
    return static_cast<unsigned char>(std::strtoul(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, short int>::value, T>::type {
    return static_cast<short int>(std::strtol(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, int>::value, T>::type {
    return static_cast<int>(std::strtol(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, long int>::value, T>::type {
    return static_cast<long int>(std::strtol(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, long long int>::value, T>::type {
    return static_cast<long long int>(std::strtoll(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, unsigned short int>::value,
                              T>::type {
    return static_cast<unsigned short int>(
        std::strtoul(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, unsigned int>::value, T>::type {
    return static_cast<unsigned int>(std::strtoul(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, unsigned long int>::value,
                              T>::type {
    return static_cast<unsigned long int>(
        std::strtoul(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, unsigned long long int>::value,
                              T>::type {
    return static_cast<unsigned long long int>(
        std::strtoull(str.c_str(), nullptr, 0));
  }

  //! Convert string type to another type
  template <typename T>
  static auto StringCast(const std::string &str) ->
      typename std::enable_if<std::is_same<T, bool>::value, T>::type {
    if (str.empty()) {
      return false;
    }
    char c = str[0];
    if (c == 'Y' || c == 'T' || c == 'y' || c == 't') {
      return true;
    }
    return !!std::strtof(str.c_str(), nullptr);
  }

 private:
  ailego::Hypercube hypercube_;
};

#undef _TRYING_COMPATIBLE
#undef _TRYING_COMPATIBLE
#undef _TRYING_COMPATIBLE_BOOL
#undef _TRYING_COMPATIBLE_STRING
#undef _TRYING_CONVERT_STRING

}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/container/vector.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <initializer_list>
#include <stdexcept>
#include <string>
#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace ailego {

/*! Fixed Vector
 */
template <typename T, size_t N>
class FixedVector {
 public:
  enum { MAX_SIZE = N };

  //! Constructor
  template <typename... U>
  FixedVector(U... vals) : data_{vals...} {}

  //! Overloaded operator []
  T &operator[](size_t i) {
    return data_[i];
  }

  //! Overloaded operator []
  constexpr const T &operator[](size_t i) const {
    return data_[i];
  }

  //! Retrieve data pointer
  T *data(void) {
    return data_;
  }

  //! Retrieve data pointer
  const T *data(void) const {
    return data_;
  }

  //! Retrieve count of elements in vector
  constexpr size_t size(void) const {
    return MAX_SIZE;
  }

  //! Convert a array pointer to vector pointer
  static FixedVector *Cast(T arr[N]) {
    return reinterpret_cast<FixedVector<T, N> *>(arr);
  }

  //! Convert a array pointer to vector pointer
  static const FixedVector *Cast(const T arr[N]) {
    return reinterpret_cast<const FixedVector<T, N> *>(arr);
  }

 private:
  //! Data member
  T data_[N];
};

/*! Numerical Vector Adapter
 */
template <typename T, typename TBase = std::string,
          typename =
              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>
class NumericalVector : public TBase {
 public:
  typedef typename std::remove_cv<T>::type ValueType;
  typedef ValueType *iterator;
  typedef const ValueType *const_iterator;

  //! Constructor
  NumericalVector(void) : TBase() {}

  //! Constructor
  explicit NumericalVector(size_t dim) : TBase() {
    this->resize(dim);
  }

  //! Constructor
  NumericalVector(size_t dim, const ValueType &val) : TBase() {
    this->resize(dim, val);
  }

  //! Constructor
  NumericalVector(const NumericalVector &rhs) : TBase(rhs) {}

  //! Constructor
  NumericalVector(NumericalVector &&rhs) : TBase(std::forward<TBase>(rhs)) {}

  //! Constructor
  NumericalVector(const TBase &rhs) : TBase(rhs) {
    if (TBase::size() % sizeof(T) != 0) {
      throw std::length_error("Unmatched length");
    }
  }

  //! Constructor
  NumericalVector(TBase &&rhs) : TBase(std::move(rhs)) {
    if (TBase::size() % sizeof(T) != 0) {
      throw std::length_error("Unmatched length");
    }
  }

  //! Constructor
  NumericalVector(std::initializer_list<ValueType> il) : TBase() {
    for (const auto &it : il) {
      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));
    }
  }

  //! Assignment
  NumericalVector &operator=(const NumericalVector &rhs) {
    TBase::operator=(static_cast<const TBase &>(rhs));
    return *this;
  }

  //! Assignment
  NumericalVector &operator=(NumericalVector &&rhs) {
    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));
    return *this;
  }

  //! Assignment
  NumericalVector &operator=(const TBase &rhs) {
    TBase::operator=(rhs);
    return *this;
  }

  //! Assignment
  NumericalVector &operator=(TBase &&rhs) {
    TBase::operator=(std::move(rhs));
    return *this;
  }

  //! Overloaded operator []
  ValueType &operator[](size_t i) {
    return *(this->data() + i);
  }

  //! Overloaded operator []
  const ValueType &operator[](size_t i) const {
    return *(this->data() + i);
  }

  //! Appends a copy of value
  NumericalVector &append(const ValueType &val) {
    TBase::append(reinterpret_cast<const char *>(&val), sizeof(ValueType));
    return *this;
  }

  //! Append a copy of value
  void append(std::initializer_list<ValueType> il) {
    for (const auto &it : il) {
      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));
    }
  }

  //! Assign content to vector
  void assign(const ValueType *vec, size_t len) {
    TBase::assign(reinterpret_cast<const char *>(vec), len * sizeof(ValueType));
  }

  //! Assign content to vector
  void assign(size_t n, const ValueType &val) {
    this->clear();
    this->resize(n, val);
  }

  //! Assign content to vector
  void assign(std::initializer_list<ValueType> il) {
    this->clear();
    for (const auto &it : il) {
      TBase::append(reinterpret_cast<const char *>(&it), sizeof(ValueType));
    }
  }

  //! Retrieve element
  ValueType &at(size_t i) {
    return *(this->data() + i);
  }

  //! Retrieve element
  const ValueType &at(size_t i) const {
    return *(this->data() + i);
  }

  //! Access last element
  ValueType &back(void) {
    return *(this->rbegin());
  }

  //! Access last element
  const ValueType &back(void) const {
    return *(this->rbegin());
  }

  //! Retrieve iterator to beginning
  iterator begin(void) {
    return this->data();
  }

  //! Retrieve iterator to beginning
  const_iterator begin(void) const {
    return this->data();
  }

  //! Retrieve size of allocated storage
  size_t capacity(void) const {
    return (TBase::capacity() / sizeof(ValueType));
  }

  //! Clear the vector
  void clear(void) {
    TBase::clear();
  }

  //! Retrieve pointer of data
  ValueType *data(void) {
    return reinterpret_cast<ValueType *>(&(TBase::operator[](0)));
  }

  //! Retrieve pointer of data
  const ValueType *data(void) const {
    return reinterpret_cast<const ValueType *>(TBase::data());
  }

  //! Test if vector is empty
  bool empty(void) const {
    return TBase::empty();
  }

  //! An iterator to the past-the-end
  iterator end(void) {
    return (this->data() + this->size());
  }

  //! An iterator to the past-the-end
  const_iterator end(void) const {
    return (this->data() + this->size());
  }

  //! Access first element
  ValueType &front(void) {
    return *(this->begin());
  }

  //! Access first element
  const ValueType &front(void) const {
    return *(this->begin());
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    TBase::reserve(n * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n) {
    TBase::resize(n * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n, const ValueType &val) {
    size_t count = this->size();

    TBase::resize(n * sizeof(ValueType));
    for (size_t i = count; i < n; ++i) {
      *(this->data() + i) = val;
    }
  }

  //! Retrieve dimension of vector
  size_t size(void) const {
    return (TBase::size() / sizeof(ValueType));
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return (TBase::size() / sizeof(ValueType));
  }

  //! Retrieve size of vector in bytes
  size_t bytes(void) const {
    return TBase::size();
  }

  //! Swap vector values
  void swap(NumericalVector &vec) {
    TBase::swap(static_cast<TBase &>(vec));
  }
};

/*! Nibble Vector Adapter
 */
template <typename T, typename TBase = std::string,
          typename = typename std::enable_if<std::is_integral<T>::value>::type>
class NibbleVector : public TBase {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;
  using StoreType = typename std::make_unsigned<ValueType>::type;

  //! const_iterator of Nibble Vector
  class const_iterator {
   public:
    //! Constructor
    const_iterator(void) : i_(0), owner_(nullptr) {}

    //! Constructor
    const_iterator(const NibbleVector *owner, size_t i)
        : i_(i), owner_(owner) {}

    //! Equality
    bool operator==(const const_iterator &rhs) const {
      return (i_ == rhs.i_);
    }

    //! No equality
    bool operator!=(const const_iterator &rhs) const {
      return (i_ != rhs.i_);
    }

    //! Increment (Prefix)
    const_iterator &operator++() {
      ++i_;
      return *this;
    }

    //! Increment (Suffix)
    const_iterator operator++(int) {
      const_iterator tmp = *this;
      ++i_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_iterator &operator--() {
      --i_;
      return *this;
    }

    //! Decrement (Suffix)
    const_iterator operator--(int) {
      const_iterator tmp = *this;
      --i_;
      return tmp;
    }

    //! operator "+="
    const_iterator &operator+=(size_t offset) {
      i_ += offset;
      return *this;
    }

    //! operator "-="
    const_iterator &operator-=(size_t offset) {
      i_ -= offset;
      return *this;
    }

    //! Indirection (Signed integral)
    ValueType operator*() const {
      return owner_->element<ValueType>(i_);
    }

   private:
    size_t i_;
    const NibbleVector *owner_;
  };

  //! Constructor
  NibbleVector(void) : TBase() {}

  //! Constructor
  explicit NibbleVector(size_t dim) : TBase() {
    this->resize(dim);
  }

  //! Constructor
  NibbleVector(size_t dim, ValueType val) : TBase() {
    this->resize(dim, val);
  }

  //! Constructor
  NibbleVector(const NibbleVector &rhs) : TBase(rhs) {}

  //! Constructor
  NibbleVector(NibbleVector &&rhs) : TBase(std::forward<TBase>(rhs)) {}

  //! Constructor
  NibbleVector(const TBase &rhs) : TBase(rhs) {}

  //! Constructor
  NibbleVector(TBase &&rhs) : TBase(std::move(rhs)) {}

  //! Constructor
  NibbleVector(std::initializer_list<ValueType> il) : TBase() {
    this->resize(il.size());

    size_t index = 0;
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));

    for (auto val : il) {
      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));
      ++index;
    }
  }

  //! Assignment
  NibbleVector &operator=(const NibbleVector &rhs) {
    TBase::operator=(static_cast<const TBase &>(rhs));
    return *this;
  }

  //! Assignment
  NibbleVector &operator=(NibbleVector &&rhs) {
    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));
    return *this;
  }

  //! Assignment
  NibbleVector &operator=(const TBase &rhs) {
    TBase::operator=(rhs);
    return *this;
  }

  //! Assignment
  NibbleVector &operator=(TBase &&rhs) {
    TBase::operator=(std::move(rhs));
    return *this;
  }

  //! Overloaded operator [] (Signed integral)
  ValueType operator[](size_t i) const {
    return this->at(i);
  }

  //! Appends a copy of value
  NibbleVector &append(ValueType lo, ValueType hi) {
    TBase::push_back(((uint8_t)(hi & 0xf) << 4) | (uint8_t)(lo & 0xf));
    return *this;
  }

  //! Append a copy of value
  void append(std::initializer_list<ValueType> il) {
    size_t index = this->size();
    this->resize(index + il.size());

    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    for (auto val : il) {
      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));
      ++index;
    }
  }

  //! Assign content to vector
  void assign(const ValueType *vec, size_t len) {
    this->clear();
    this->resize(len);

    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    for (size_t i = 0; i != len; ++i) {
      arr[i >> 1] |= ((uint8_t)(vec[i] & 0xf) << ((i & 1) << 2));
    }
  }

  //! Assign content to vector
  void assign(size_t n, ValueType val) {
    this->clear();
    this->resize(n, val);
  }

  //! Assign content to vector
  void assign(std::initializer_list<ValueType> il) {
    this->clear();
    this->resize(il.size());

    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    size_t index = 0;

    for (auto val : il) {
      arr[index >> 1] |= ((uint8_t)(val & 0xf) << ((index & 1) << 2));
      ++index;
    }
  }

  //! Set a element
  void set(size_t i, ValueType val) {
    uint8_t *it = reinterpret_cast<uint8_t *>(&(TBase::operator[](i >> 1)));
    if (i & 1) {
      *it = (*it & 0x0f) | ((uint8_t)(val & 0xf) << 4);
    } else {
      *it = (*it & 0xf0) | (uint8_t)(val & 0xf);
    }
  }

  //! Retrieve element
  ValueType at(size_t i) const {
    return this->element<ValueType>(i);
  }

  //! Access last element
  ValueType back(void) const {
    return this->at(this->size() - 1);
  }

  //! Retrieve iterator to beginning
  const_iterator begin(void) const {
    return const_iterator(this, 0);
  }

  //! Retrieve size of allocated storage
  size_t capacity(void) const {
    return (TBase::capacity() << 1);
  }

  //! Clear the vector
  void clear(void) {
    TBase::clear();
  }

  //! Retrieve pointer of data
  StoreType *data(void) {
    return reinterpret_cast<StoreType *>(&(TBase::operator[](0)));
  }

  //! Retrieve pointer of data
  const StoreType *data(void) const {
    return reinterpret_cast<const StoreType *>(TBase::data());
  }

  //! Test if vector is empty
  bool empty(void) const {
    return TBase::empty();
  }

  //! An iterator to the past-the-end
  const_iterator end(void) const {
    return const_iterator(this, this->size());
  }

  //! Access first element
  ValueType front(void) const {
    return this->at(0);
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    TBase::reserve((n + (sizeof(ValueType) << 1) - 1) /
                   (sizeof(ValueType) << 1) * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n) {
    TBase::resize((n + (sizeof(ValueType) << 1) - 1) /
                  (sizeof(ValueType) << 1) * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n, ValueType val) {
    TBase::resize((n + (sizeof(ValueType) << 1) - 1) /
                      (sizeof(ValueType) << 1) * sizeof(ValueType),
                  ((uint8_t)(val & 0xf) << 4) | (uint8_t)(val & 0xf));
  }

  //! Retrieve dimension of vector
  size_t size(void) const {
    return (TBase::size() << 1);
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return (TBase::size() << 1);
  }

  //! Retrieve size of vector in bytes
  size_t bytes(void) const {
    return TBase::size();
  }

  //! Swap vector values
  void swap(NibbleVector &vec) {
    TBase::swap(static_cast<TBase &>(vec));
  }

 protected:
  //! Retrieve element (Signed integral)
  template <typename U>
  auto element(size_t i) const ->
      typename std::enable_if<std::is_signed<U>::value, U>::type {
    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());
    return (static_cast<int8_t>(arr[i >> 1] << (~(i << 2) & 4)) >> 4);
  }

  //! Retrieve element (Unsigned integral)
  template <typename U>
  auto element(size_t i) const ->
      typename std::enable_if<std::is_unsigned<U>::value, U>::type {
    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());
    return ((arr[i >> 1] >> ((i & 1) << 2)) & 0xf);
  }
};

/*! Binary Vector Adapter
 */
template <typename T, typename TBase = std::string,
          typename = typename std::enable_if<std::is_integral<T>::value>::type>
class BinaryVector : public TBase {
 public:
  //! Type of value
  using ValueType = typename std::remove_cv<T>::type;

  //! const_iterator of Binary Vector
  class const_iterator {
   public:
    //! Constructor
    const_iterator(void) : i_(0), arr_(nullptr) {}

    //! Constructor
    const_iterator(const void *buf, size_t i)
        : i_(i), arr_(reinterpret_cast<const uint8_t *>(buf)) {}

    //! Equality
    bool operator==(const const_iterator &rhs) const {
      return (i_ == rhs.i_);
    }

    //! No equality
    bool operator!=(const const_iterator &rhs) const {
      return (i_ != rhs.i_);
    }

    //! Increment (Prefix)
    const_iterator &operator++() {
      ++i_;
      return *this;
    }

    //! Increment (Suffix)
    const_iterator operator++(int) {
      const_iterator tmp = *this;
      ++i_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_iterator &operator--() {
      --i_;
      return *this;
    }

    //! Decrement (Suffix)
    const_iterator operator--(int) {
      const_iterator tmp = *this;
      --i_;
      return tmp;
    }

    //! operator "+="
    const_iterator &operator+=(size_t offset) {
      i_ += offset;
      return *this;
    }

    //! operator "-="
    const_iterator &operator-=(size_t offset) {
      i_ -= offset;
      return *this;
    }

    //! Indirection (eg. *iter)
    bool operator*() const {
      return ((arr_[i_ >> 3] & (1u << (i_ & 7))) != 0);
    }

   private:
    size_t i_;
    const uint8_t *arr_;
  };

  //! Constructor
  BinaryVector(void) : TBase() {}

  //! Constructor
  explicit BinaryVector(size_t dim) : TBase() {
    this->resize(dim);
  }

  //! Constructor
  BinaryVector(size_t dim, bool val) : TBase() {
    this->resize(dim, val);
  }

  //! Constructor
  BinaryVector(const BinaryVector &rhs) : TBase(rhs) {}

  //! Constructor
  BinaryVector(BinaryVector &&rhs) : TBase(std::move(rhs)) {}

  //! Constructor
  BinaryVector(const TBase &rhs) : TBase(rhs) {
    if (TBase::size() % sizeof(T) != 0) {
      throw std::length_error("Unmatched length");
    }
  }

  //! Constructor
  BinaryVector(TBase &&rhs) : TBase(std::move(rhs)) {
    if (TBase::size() % sizeof(T) != 0) {
      throw std::length_error("Unmatched length");
    }
  }

  //! Constructor
  BinaryVector(std::initializer_list<bool> il) : TBase() {
    this->resize(il.size());

    size_t index = 0;
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));

    for (auto val : il) {
      if (val) {
        arr[index >> 3] |= (uint8_t)(1u << (index & 7));
      }
      ++index;
    }
  }

  //! Assignment
  BinaryVector &operator=(const BinaryVector &rhs) {
    TBase::operator=(static_cast<const TBase &>(rhs));
    return *this;
  }

  //! Assignment
  BinaryVector &operator=(BinaryVector &&rhs) {
    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));
    return *this;
  }

  //! Assignment
  BinaryVector &operator=(const TBase &rhs) {
    TBase::operator=(rhs);
    return *this;
  }

  //! Assignment
  BinaryVector &operator=(TBase &&rhs) {
    TBase::operator=(std::move(rhs));
    return *this;
  }

  //! Overloaded operator []
  bool operator[](size_t i) const {
    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());
    return ((arr[i >> 3] & (1u << (i & 7))) != 0);
  }

  //! Assign content to vector
  void assign(const bool *vec, size_t len) {
    this->clear();
    this->resize(len);

    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    for (size_t i = 0; i < len; ++i) {
      bool val = vec[i];
      if (val) {
        arr[i >> 3] |= (1u << (i & 7));
      }
    }
  }

  //! Assign content to vector
  void assign(size_t n, bool val) {
    this->clear();
    this->resize(n, val);
  }

  //! Assign content to vector
  void assign(std::initializer_list<bool> il) {
    this->clear();
    this->resize(il.size());

    size_t index = 0;
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    for (auto val : il) {
      if (val) {
        arr[index >> 3] |= (uint8_t)(1u << (index & 7));
      }
      ++index;
    }
  }

  //! Retrieve element
  bool at(size_t i) const {
    const uint8_t *arr = reinterpret_cast<const uint8_t *>(TBase::data());
    return ((arr[i >> 3] & (1u << (i & 7))) != 0);
  }

  //! Set a bit
  void set(size_t i) {
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    arr[i >> 3] |= (uint8_t)(1u << (i & 7));
  }

  //! Reset a bit
  void reset(size_t i) {
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    arr[i >> 3] &= (uint8_t)(~(1u << (i & 7)));
  }

  //! Toggle a bit
  void flip(size_t i) {
    uint8_t *arr = reinterpret_cast<uint8_t *>(&(TBase::operator[](0)));
    arr[i >> 3] ^= (uint8_t)(1u << (i & 7));
  }

  //! Access last element
  bool back(void) const {
    return this->at(this->size() - 1);
  }

  //! Retrieve const_iterator to beginning
  const_iterator begin(void) const {
    return const_iterator(this->data(), 0);
  }

  //! Retrieve size of allocated storage
  size_t capacity(void) const {
    return (TBase::capacity() << 3);
  }

  //! Clear the vector
  void clear(void) {
    TBase::clear();
  }

  //! Retrieve pointer of data
  ValueType *data(void) {
    return reinterpret_cast<ValueType *>(&(TBase::operator[](0)));
  }

  //! Retrieve pointer of data
  const ValueType *data(void) const {
    return reinterpret_cast<const ValueType *>(TBase::data());
  }

  //! Test if vector is empty
  bool empty(void) const {
    return TBase::empty();
  }

  //! An const_iterator to the past-the-end
  const_iterator end(void) const {
    return const_iterator(this->data(), this->size());
  }

  //! Access first element
  bool front(void) const {
    return this->at(0);
  }

  //! Request a change in capacity
  void reserve(size_t n) {
    TBase::reserve((n + (sizeof(ValueType) << 3) - 1) /
                   (sizeof(ValueType) << 3) * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n) {
    TBase::resize((n + (sizeof(ValueType) << 3) - 1) /
                  (sizeof(ValueType) << 3) * sizeof(ValueType));
  }

  //! Resize the vector to a length of n elements
  void resize(size_t n, bool val) {
    TBase::resize((n + (sizeof(ValueType) << 3) - 1) /
                      (sizeof(ValueType) << 3) * sizeof(ValueType),
                  val ? 0xffu : 0u);
  }

  //! Retrieve dimension of vector
  size_t size(void) const {
    return (TBase::size() << 3);
  }

  //! Retrieve dimension of vector
  size_t dimension(void) const {
    return (TBase::size() << 3);
  }

  //! Retrieve size of vector in bytes
  size_t bytes(void) const {
    return TBase::size();
  }

  //! Swap vector values
  void swap(BinaryVector &vec) {
    TBase::swap(static_cast<TBase &>(vec));
  }
};

/*! Hybrid Vector Adapter
 */
template <typename T, typename TBase = std::string,
          typename =
              typename std::enable_if<IsTriviallyCopyable<T>::value>::type>
class HybridVector : public NumericalVector<T, TBase> {
 public:
  typedef typename std::remove_cv<T>::type ValueType;
  typedef ValueType *iterator;
  typedef const ValueType *const_iterator;

  //! Constructor
  HybridVector(void) = default;

  //! Constructor
  explicit HybridVector(size_t dim) : NumericalVector<T, TBase>(dim) {}

  //! Constructor
  HybridVector(const HybridVector &rhs)
      : NumericalVector<T, TBase>(rhs),
        sparse_count_(rhs.sparse_count_),
        sparse_indices_(rhs.sparse_indices_),
        sparse_data_(rhs.sparse_data_) {}

  //! Constructor
  HybridVector(HybridVector &&rhs)
      : NumericalVector<T, TBase>(std::forward<NumericalVector<T, TBase>>(rhs)),
        sparse_count_(rhs.sparse_count_),
        sparse_indices_(std::move(rhs.sparse_indices_)),
        sparse_data_(std::move(rhs.sparse_data_)) {}

  //! Assignment
  HybridVector &operator=(const HybridVector &rhs) {
    NumericalVector<T, TBase>::operator=(
        static_cast<const NumericalVector<T, TBase> &>(rhs));
    sparse_count_ = rhs.sparse_count_;
    sparse_indices_ = rhs.sparse_indices_;
    sparse_data_ = rhs.sparse_data_;

    return *this;
  }

  //! Assignment
  HybridVector &operator=(HybridVector &&rhs) {
    NumericalVector<T, TBase>::operator=(
        std::move(static_cast<NumericalVector<T, TBase> &&>(rhs)));
    sparse_count_ = rhs.sparse_count_;
    sparse_indices_ = std::move(rhs.sparse_indices_);
    sparse_data_ = std::move(rhs.sparse_data_);

    return *this;
  }

  size_t sparse_count() const {
    return sparse_count_;
  }

  const uint32_t *sparse_indices() const {
    return reinterpret_cast<const uint32_t *>(sparse_indices_.data());
  }

  const ValueType *sparse_data(void) const {
    return reinterpret_cast<const ValueType *>(sparse_data_.data());
  }

  //! Request a change in capacity
  void resize_for_sparse(size_t n) {
    sparse_indices_.resize(n * sizeof(uint32_t));
    sparse_data_.resize(n * sizeof(ValueType));
  }

  int add_sparses(const NumericalVector<uint32_t> &indexes,
                  const NumericalVector<ValueType> &values) {
    sparse_count_ = indexes.size();

    sparse_indices_ = (const std::string &)indexes;
    sparse_data_ = (const std::string &)values;

    return 0;
  }

 private:
  //! Data Member
  size_t sparse_count_;
  std::string sparse_indices_;
  std::string sparse_data_;
};

/*! Sparse Vector Adapter
 */
template <typename T>
class SparseVector {
 public:
  typedef typename std::remove_cv<T>::type ValueType;
  typedef ValueType *iterator;
  typedef const ValueType *const_iterator;

  //! Constructor
  SparseVector(void) = default;

  size_t sparse_count() const {
    return sparse_count_;
  }

  const uint32_t *sparse_indices() const {
    return reinterpret_cast<const uint32_t *>(sparse_indices_.data());
  }

  const ValueType *sparse_data(void) const {
    return reinterpret_cast<const ValueType *>(sparse_data_.data());
  }

  //! Request a change in capacity
  void resize_for_sparse(size_t n) {
    sparse_indices_.resize(n * sizeof(uint32_t));
    sparse_data_.resize(n * sizeof(ValueType));
  }

  int add_sparses(const NumericalVector<uint32_t> &indexes,
                  const NumericalVector<ValueType> &values) {
    sparse_count_ = indexes.size();

    sparse_indices_ = (const std::string &)indexes;
    sparse_data_ = (const std::string &)values;

    return 0;
  }

 private:
  //! Data Member
  size_t sparse_count_;
  std::string sparse_indices_;
  std::string sparse_data_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/encoding/json/mod_json.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <math.h>
#include <stdbool.h>
#include <stdint.h>

#if !defined(__cplusplus) && defined(_MSC_VER)
#if !defined(inline)
#define inline __inline
#endif
#endif

#if defined(__cplusplus)
extern "C" {
#endif

#define MOD_JSON_FALSE (false)
#define MOD_JSON_TRUE (true)
#define MOD_JSON_INFINITY (INFINITY)

/*! JSON Type
 */
enum mod_json_type {
  mod_json_type_null = 0,
  mod_json_type_boolean = 1,
  mod_json_type_integer = 2,
  mod_json_type_float = 3,
  mod_json_type_string = 4,
  mod_json_type_array = 5,
  mod_json_type_object = 6
};

/*! JSON Token State
 */
enum mod_json_state {
  mod_json_state_null = 0,
  mod_json_state_start = 1,
  mod_json_state_finish = 2,
  mod_json_state_array_start = 3,
  mod_json_state_array_half = 4,
  mod_json_state_array_finish = 5,
  mod_json_state_object_start = 6,
  mod_json_state_object_half1 = 7,
  mod_json_state_object_half2 = 8,
  mod_json_state_object_finish = 9,
  mod_json_state_max = 10
};

/*! JSON Token Error Code
 */
enum mod_json_error {
  mod_json_error_null = 0,
  mod_json_error_invalid = 1,
  mod_json_error_state = 2,
  mod_json_error_empty = 3,
  mod_json_error_break = 4,
  mod_json_error_depth = 5,
  mod_json_error_trunc = 6,
  mod_json_error_start = 7,
  mod_json_error_array = 8,
  mod_json_error_object = 9,
  mod_json_error_key = 10,
  mod_json_error_value = 11,
  mod_json_error_quote = 12
};

/*! JSON Token Event
 */
enum mod_json_event {
  mod_json_event_null = 0,
  mod_json_event_field = 1,
  mod_json_event_object = 2,
  mod_json_event_array = 3,
  mod_json_event_boolean = 4,
  mod_json_event_integer = 5,
  mod_json_event_float = 6,
  mod_json_event_string = 7
};

typedef unsigned int mod_json_size_t;
typedef int mod_json_ssize_t;
typedef bool mod_json_boolean_t;
typedef char mod_json_char_t;
typedef const char mod_json_cchar_t;
typedef unsigned char mod_json_uchar_t;
typedef long long mod_json_integer_t;
typedef double mod_json_float_t;
typedef void mod_json_void_t;
typedef enum mod_json_type mod_json_type_t;
typedef union mod_json_any mod_json_any_t;
typedef struct mod_json_value mod_json_value_t;
typedef struct mod_json_string mod_json_string_t;
typedef struct mod_json_array mod_json_array_t;
typedef struct mod_json_object mod_json_object_t;
typedef struct mod_json_pair mod_json_pair_t;
typedef struct mod_json_option mod_json_option_t;
typedef enum mod_json_state mod_json_state_t;
typedef enum mod_json_error mod_json_error_t;
typedef enum mod_json_event mod_json_event_t;
typedef struct mod_json_token mod_json_token_t;

/*! Callback function when parsing JSON
 */
typedef int (*mod_json_event_proc)(mod_json_token_t *tok, mod_json_void_t *val,
                                   mod_json_size_t len);

/*! JSON Any
 */
union mod_json_any {
  mod_json_object_t *c_obj;
  mod_json_array_t *c_arr;
  mod_json_string_t *c_str;
  mod_json_float_t c_float;
  mod_json_boolean_t c_bool;
  mod_json_integer_t c_int;
};

/*! JSON Value
 */
struct mod_json_value {
  mod_json_ssize_t refer;
  mod_json_type_t type;
  mod_json_any_t data;
};

/*! JSON String
 */
struct mod_json_string {
  mod_json_ssize_t refer;
  mod_json_size_t size;
  mod_json_char_t *first;
  mod_json_char_t *last;
};

/*! JSON Array
 */
struct mod_json_array {
  mod_json_ssize_t refer;
  mod_json_size_t size;
  mod_json_value_t **first;
  mod_json_value_t **last;
};

/*! JSON Pair
 */
struct mod_json_pair {
  mod_json_string_t *key;
  mod_json_value_t *val;
};

/*! JSON Object
 */
struct mod_json_object {
  mod_json_ssize_t refer;
  mod_json_size_t size;
  mod_json_pair_t *first;
  mod_json_pair_t *last;
};

#define MOD_JSON_COMMENT 0x0001  /* Enable comments */
#define MOD_JSON_UNSTRICT 0x0002 /* Enable loose JSON string */
#define MOD_JSON_SIMPLE 0x0004   /* Enable simple format */
#define MOD_JSON_SQUOTE 0x0008   /* Enable single quotes support */

/*! JSON Option
 */
struct mod_json_option {
  mod_json_size_t options;
  mod_json_size_t object_depth;
  mod_json_size_t array_depth;
};

/**
 *  \brief           Create and set a JSON null value
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_null(void);

/**
 *  \brief           Create and set a JSON object value
 *  \param obj       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_object(mod_json_object_t *obj);

/**
 *  \brief           Create and set a JSON array value
 *  \param arr       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_array(mod_json_array_t *arr);

/**
 *  \brief           Create and set a JSON string value
 *  \param str       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_string(mod_json_string_t *str);

/**
 *  \brief           Create and set a JSON string buffer
 *  \param buf       The pointer of string buffer
 *  \param len       The length of string buffer
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_buffer(mod_json_cchar_t *buf,
                                            mod_json_size_t len);

/**
 *  \brief           Create and set a JSON integer value
 *  \param num       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_integer(mod_json_integer_t num);

/**
 *  \brief           Create and set a JSON float value
 *  \param dbl       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_float(mod_json_float_t dbl);

/**
 *  \brief           Create and set a JSON boolean value
 *  \param bol       The value to be assigned
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_set_boolean(mod_json_boolean_t bol);

/**
 *  \brief           Assign a JSON value as null
 *  \param val       The pointer of value
 */
void mod_json_value_assign_null(mod_json_value_t *val);

/**
 *  \brief           Assign a JSON value as a object
 *  \param val       The pointer of value
 *  \param obj       The value to be assigned
 */
void mod_json_value_assign_object(mod_json_value_t *val,
                                  mod_json_object_t *obj);

/**
 *  \brief           Assign a JSON value as an array
 *  \param val       The pointer of value
 *  \param arr       The value to be assigned
 */
void mod_json_value_assign_array(mod_json_value_t *val, mod_json_array_t *arr);

/**
 *  \brief           Assign a JSON value as a string
 *  \param val       The pointer of value
 *  \param str       The value to be assigned
 */
void mod_json_value_assign_string(mod_json_value_t *val,
                                  mod_json_string_t *str);

/**
 *  \brief           Assign a JSON value as an integer
 *  \param val       The pointer of value
 *  \param num       The value to be assigned
 */
void mod_json_value_assign_integer(mod_json_value_t *val,
                                   mod_json_integer_t num);

/**
 *  \brief           Assign a JSON value as a float
 *  \param val       The pointer of value
 *  \param dbl       The value to be assigned
 */
void mod_json_value_assign_float(mod_json_value_t *val, mod_json_float_t dbl);

/**
 *  \brief           Assign a JSON value as a boolean
 *  \param val       The pointer of value
 *  \param bol       The value to be assigned
 */
void mod_json_value_assign_boolean(mod_json_value_t *val,
                                   mod_json_boolean_t bol);

/**
 *  \brief           Assign a new JSON value
 *  \param dst       The pointer of destination value (can't be null)
 *  \param src       The pointer of source value (can be null)
 */
void mod_json_value_assign(mod_json_value_t *dst, mod_json_value_t *src);

/**
 *  \brief           Merge a JSON value into another one
 *  \param dst       The pointer of destination value (can't be null)
 *  \param src       The pointer of source value (can be null)
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_value_merge(mod_json_value_t *dst, mod_json_value_t *src);

/**
 *  \brief           Retrieve object of a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates unmatched type or empty.
 */
mod_json_object_t *mod_json_value_object(mod_json_value_t *val);

/**
 *  \brief           Retrieve array of a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates unmatched type or empty.
 */
mod_json_array_t *mod_json_value_array(mod_json_value_t *val);

/**
 *  \brief           Retrieve string of a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates unmatched type or empty.
 */
mod_json_string_t *mod_json_value_string(mod_json_value_t *val);

/**
 *  \brief           Retrieve c-string of a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates unmatched type or empty.
 */
mod_json_cchar_t *mod_json_value_cstring(mod_json_value_t *val);

/**
 *  \brief           Retrieve float of a JSON value
 *  \param val       The pointer of value
 *  \return          It will try converting the unmatched
                     value to float. If nothing be done,
                     returns zero by default.
 */
mod_json_float_t mod_json_value_float(mod_json_value_t *val);

/**
 *  \brief           Retrieve boolean of a JSON value
 *  \param val       The pointer of value
 *  \return          If string, object or array is not empty,
                     number(integer or float) does not equal
                     to zero, it returns true.
 */
mod_json_boolean_t mod_json_value_boolean(mod_json_value_t *val);

/**
 *  \brief           Retrieve integer of a JSON value
 *  \param val       The pointer of value
 *  \return          It will try converting the unmatched
                     value to integer. If nothing be done,
                     returns zero by default.
 */
mod_json_integer_t mod_json_value_integer(mod_json_value_t *val);

/**
 *  \brief           Clone a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_value_clone(mod_json_value_t *val);

/**
 *  \brief           Retrieve non-zero if they are equal
 *  \param lhs       The pointer of left value
 *  \param rhs       The pointer of right value
 *  \return          1 indicates true, 0 indicates false.
 */
mod_json_boolean_t mod_json_value_is_equal(mod_json_value_t *lhs,
                                           mod_json_value_t *rhs);

/**
 *  \brief           Unset or destroy a JSON value
 *  \param val       The pointer of value
 */
void mod_json_value_unset(mod_json_value_t *val);

/**
 *  \brief           Increase reference count of a JSON value
 *  \param val       The pointer of value
 *  \return          The original pointer of value
 */
static inline mod_json_value_t *mod_json_value_get(mod_json_value_t *val) {
  ++val->refer;
  return val;
}

/**
 *  \brief           Decrease reference count of a JSON value
 *  \param val       The pointer of value
 *  \return          The new number of refer-counter
 */
static inline mod_json_ssize_t mod_json_value_put(mod_json_value_t *val) {
  return (--val->refer);
}

/**
 *  \brief           Retrieve refer-counter of a JSON value
 *  \param val       The pointer of value
 *  \return          The number of refer-counter
 */
static inline mod_json_ssize_t mod_json_value_refer(mod_json_value_t *val) {
  return (val ? val->refer : -1);
}

/**
 *  \brief           Set the refer-counter as leaked
 *  \param val       The pointer of value
 */
static inline void mod_json_value_set_leaked(mod_json_value_t *val) {
  val->refer = 0;
}

/**
 *  \brief           Retrieve non-zero if refer-counter is leaked
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_leaked(
    mod_json_value_t *val) {
  return (val->refer <= 0);
}

/**
 *  \brief           Retrieve non-zero if refer-counter is shared
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_shared(
    mod_json_value_t *val) {
  return (val->refer > 1);
}

/**
 *  \brief           Grab (get or clone) a JSON value
 *  \param val       The pointer of value
 *  \return          Null indicates failure
 */
static inline mod_json_value_t *mod_json_value_grab(mod_json_value_t *val) {
  /* Is it leaked? */
  if (!mod_json_value_is_leaked(val)) {
    return mod_json_value_get(val);
  }
  return mod_json_value_clone(val);
}

/**
 *  \brief           Retrieve type of a JSON value
 *  \param val       The pointer of value
 *  \return          The code of type
 */
static inline mod_json_type_t mod_json_value_type(mod_json_value_t *val) {
  return (val->type);
}

/**
 *  \brief           Retrieve non-zero if a JSON value is null
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_null(mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_null : MOD_JSON_TRUE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON array
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_array(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_array : MOD_JSON_FALSE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON object
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_object(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_object : MOD_JSON_FALSE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON string
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_string(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_string : MOD_JSON_FALSE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON float
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_float(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_float : MOD_JSON_FALSE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON boolean
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_boolean(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_boolean : MOD_JSON_FALSE);
}

/**
 *  \brief           Retrieve non-zero if it is a JSON integer
 *  \param val       The pointer of value
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_value_is_integer(
    mod_json_value_t *val) {
  return (val ? val->type == mod_json_type_integer : MOD_JSON_FALSE);
}

/**
 *  \brief           Request a change in capacity
 *  \param str       The pointer of string
 *  \param n         The requested size of capacity
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_string_reserve(mod_json_string_t *str, mod_json_size_t n);

/**
 *  \brief           Create and set a JSON string
 *  \param cstr      The pointer of c-string
 *  \param len       The length of c-string
 *  \return          Null indicates failure.
 */
mod_json_string_t *mod_json_string_set(mod_json_cchar_t *cstr,
                                       mod_json_size_t len);

/**
 *  \brief           Assign new content to a JSON string
 *  \param str       The pointer of string
 *  \param cstr      The pointer of c-string
 *  \param len       The length of c-string
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_string_assign(mod_json_string_t *str, mod_json_cchar_t *cstr,
                           mod_json_size_t len);

/**
 *  \brief           Clone a JSON string
 *  \param str       The pointer of string
 *  \return          Null indicates failure.
 */
static inline mod_json_string_t *mod_json_string_clone(mod_json_string_t *str) {
  return (str ? mod_json_string_set(str->first,
                                    (mod_json_size_t)(str->last - str->first))
              : (mod_json_string_t *)0);
}

/**
 *  \brief           Unset or destroy a JSON string
 *  \param str       The pointer of string
 */
void mod_json_string_unset(mod_json_string_t *str);

/**
 *  \brief           Reset a JSON string
 *  \param str       The pointer of string
 */
void mod_json_string_reset(mod_json_string_t *str);

/**
 *  \brief           Append a c-string to a JSON string
 *  \param str       The pointer of string
 *  \param cstr      The pointer of c-string
 *  \param len       The length of c-string
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_string_append(mod_json_string_t *str, mod_json_cchar_t *cstr,
                           mod_json_size_t len);

/**
 *  \brief           Add a copy of a JSON string
 *  \param str       The main string
 *  \param val       The appended string
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_string_add(mod_json_string_t *str, mod_json_string_t *val);

/**
 *  \brief           Retrieve HASH of a JSON string
 *  \param str       The pointer of string
 *  \return          The value of HASH
 */
mod_json_size_t mod_json_string_hash(mod_json_string_t *str);

/**
 *  \brief           Compare two JSON strings (case sensitive)
 *  \param str1      The first string
 *  \param str2      The second string
 *  \return          0 indicates equal.
 */
int mod_json_string_compare(mod_json_string_t *str1, mod_json_string_t *str2);

/**
 *  \brief           Convert a JSON string to an integer
 *  \param str       The pointer of string
 *  \return          If nothing be done, returns zero by default.
 */
mod_json_integer_t mod_json_string_integer(mod_json_string_t *str);

/**
 *  \brief           Convert a JSON string to a float
 *  \param str       The pointer of string
 *  \return          If nothing be done, returns zero by default.
 */
mod_json_float_t mod_json_string_float(mod_json_string_t *str);

/**
 *  \brief           Encode a JSON string
 *  \param src       The pointer of source string
 *  \return          Null indicates failure.
 */
mod_json_string_t *mod_json_string_encode(mod_json_string_t *src);

/**
 *  \brief           Decode a JSON string
 *  \param src       The pointer of source string
 *  \return          Null indicates failure.
 */
mod_json_string_t *mod_json_string_decode(mod_json_string_t *src);

/**
 *  \brief           Increase reference count of a JSON string
 *  \param str       The pointer of string
 *  \return          The original pointer of string
 */
static inline mod_json_string_t *mod_json_string_get(mod_json_string_t *str) {
  ++str->refer;
  return str;
}

/**
 *  \brief           Decrease reference count of a JSON string
 *  \param str       The pointer of string
 *  \return          The new number of refer-counter
 */
static inline mod_json_ssize_t mod_json_string_put(mod_json_string_t *str) {
  return (--str->refer);
}

/**
 *  \brief           Retrieve refer-counter of a JSON string
 *  \param str       The pointer of string
 *  \return          The number of refer-counter
 */
static inline mod_json_ssize_t mod_json_string_refer(mod_json_string_t *str) {
  return (str ? str->refer : -1);
}

/**
 *  \brief           Set the refer-counter as leaked
 *  \param str       The pointer of string
 */
static inline void mod_json_string_set_leaked(mod_json_string_t *str) {
  str->refer = 0;
}

/**
 *  \brief           Retrieve non-zero if refer-counter is leaked
 *  \param str       The pointer of string
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_string_is_leaked(
    mod_json_string_t *str) {
  return (str->refer <= 0);
}

/**
 *  \brief           Retrieve non-zero if refer-counter is shared
 *  \param str       The pointer of string
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_string_is_shared(
    mod_json_string_t *str) {
  return (str->refer > 1);
}

/**
 *  \brief           Grab (get or clone) a JSON string
 *  \param str       The pointer of string
 *  \return          Null indicates failure
 */
static inline mod_json_string_t *mod_json_string_grab(mod_json_string_t *str) {
  /* Is it leaked? */
  if (!mod_json_string_is_leaked(str)) {
    return mod_json_string_get(str);
  }
  return mod_json_string_clone(str);
}

/**
 *  \brief           Retrieve c-string of a JSON string
 *  \param str       The pointer of string
 *  \return          The pointer of c-string
 */
static inline mod_json_cchar_t *mod_json_string_cstr(mod_json_string_t *str) {
  return (str ? str->first : (mod_json_cchar_t *)0);
}

/**
 *  \brief           Retrieve data pointer of a JSON string
 *  \param str       The pointer of string
 *  \return          The pointer of data
 */
static inline mod_json_char_t *mod_json_string_data(mod_json_string_t *str) {
  return (str ? str->first : (mod_json_char_t *)0);
}

/**
 *  \brief           Retrieve capacity of a JSON string
 *  \param str       The pointer of string
 *  \return          The size of allocated storage
 */
static inline mod_json_size_t mod_json_string_capacity(mod_json_string_t *str) {
  return (str ? (str->size - 1) : 0);
}

/**
 *  \brief           Retrieve length of a JSON string
 *  \param str       The pointer of string
 *  \return          The length of string
 */
static inline mod_json_size_t mod_json_string_length(mod_json_string_t *str) {
  return (str ? (mod_json_size_t)(str->last - str->first) : 0);
}

/**
 *  \brief           Retrieve non-zero if a JSON string is empty
 *  \param str       The pointer of string
 *  \return          0 indicates non-empty
 */
static inline mod_json_boolean_t mod_json_string_empty(mod_json_string_t *str) {
  return (mod_json_string_length(str) == 0);
}

/**
 *  \brief           Create and set a JSON array
 *  \param size      The initialized size of array
 *  \return          Null indicates failure.
 */
mod_json_array_t *mod_json_array_set(mod_json_size_t size);

/**
 *  \brief           Clone a JSON array
 *  \param arr       The pointer of array
 *  \return          Null indicates failure.
 */
mod_json_array_t *mod_json_array_clone(mod_json_array_t *arr);

/**
 *  \brief           Retrieve non-zero if they are equal
 *  \param lhs       The pointer of left array
 *  \param rhs       The pointer of right array
 *  \return          1 indicates true, 0 indicates false.
 */
mod_json_boolean_t mod_json_array_is_equal(mod_json_array_t *lhs,
                                           mod_json_array_t *rhs);

/**
 *  \brief           Unset or destroy a JSON array
 *  \param arr       The pointer of array
 */
void mod_json_array_unset(mod_json_array_t *arr);

/**
 *  \brief           Reset a JSON array
 *  \param arr       The pointer of array
 */
void mod_json_array_reset(mod_json_array_t *arr);

/**
 *  \brief           Create and set a JSON array (default parameters)
 *  \param size      The initialized size of array
 *  \return          Null indicates failure.
 */
static inline mod_json_array_t *mod_json_array_set_default(void) {
  return mod_json_array_set(0);
}

/**
 *  \brief           Increase reference count of a JSON array
 *  \param arr       The pointer of array
 *  \return          The original pointer of array
 */
static inline mod_json_array_t *mod_json_array_get(mod_json_array_t *arr) {
  ++arr->refer;
  return arr;
}

/**
 *  \brief           Decrease reference count of a JSON array
 *  \param str       The pointer of array
 *  \return          The new number of refer-counter
 */
static inline mod_json_ssize_t mod_json_array_put(mod_json_array_t *arr) {
  return (--arr->refer);
}

/**
 *  \brief           Retrieve refer-counter of a JSON array
 *  \param arr       The pointer of array
 *  \return          The number of refer-counter
 */
static inline mod_json_ssize_t mod_json_array_refer(mod_json_array_t *arr) {
  return (arr ? arr->refer : -1);
}

/**
 *  \brief           Set the refer-counter as leaked
 *  \param arr       The pointer of array
 */
static inline void mod_json_array_set_leaked(mod_json_array_t *arr) {
  arr->refer = 0;
}

/**
 *  \brief           Retrieve non-zero if refer-counter is leaked
 *  \param arr       The pointer of array
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_array_is_leaked(
    mod_json_array_t *arr) {
  return (arr->refer <= 0);
}

/**
 *  \brief           Retrieve non-zero if refer-counter is shared
 *  \param arr       The pointer of array
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_array_is_shared(
    mod_json_array_t *arr) {
  return (arr->refer > 1);
}

/**
 *  \brief           Grab (get or clone) a JSON array
 *  \param arr       The pointer of array
 *  \return          Null indicates failure
 */
static inline mod_json_array_t *mod_json_array_grab(mod_json_array_t *arr) {
  /* Is it leaked? */
  if (!mod_json_array_is_leaked(arr)) {
    return mod_json_array_get(arr);
  }
  return mod_json_array_clone(arr);
}

/**
 *  \brief           Retrieve count of elements in a JSON array
 *  \param arr       The pointer of array
 *  \return          The count of elements
 */
static inline mod_json_size_t mod_json_array_count(mod_json_array_t *arr) {
  return (arr ? (mod_json_size_t)(arr->last - arr->first) : 0);
}

/**
 *  \brief           Retrieve capacity of a JSON array
 *  \param arr       The pointer of array
 *  \return          The size of allocated storage
 */
static inline mod_json_size_t mod_json_array_capacity(mod_json_array_t *arr) {
  return (arr ? arr->size : 0);
}

/**
 *  \brief           Retrieve non-zero if a JSON array is empty
 *  \param arr       The pointer of array
 *  \return          0 indicates non-empty
 */
static inline mod_json_boolean_t mod_json_array_empty(mod_json_array_t *arr) {
  return (mod_json_array_count(arr) == 0);
}

/**
 *  \brief           Retrieve the begin of a JSON array
 *  \param arr       The pointer of array
 *  \return          The pointer of begin
 */
static inline mod_json_value_t **mod_json_array_begin(mod_json_array_t *arr) {
  return (arr->first);
}

/**
 *  \brief           Retrieve the reverse begin of a JSON array
 *  \param arr       The pointer of array
 *  \return          The pointer of reverse begin
 */
static inline mod_json_value_t **mod_json_array_rbegin(mod_json_array_t *arr) {
  return (arr->last - 1);
}

/**
 *  \brief           Retrieve the end of a JSON array
 *  \param arr       The pointer of array
 *  \return          The pointer of end
 */
static inline mod_json_value_t **mod_json_array_end(mod_json_array_t *arr) {
  return (arr->last);
}

/**
 *  \brief           Retrieve the reverse end of a JSON array
 *  \param arr       The pointer of array
 *  \return          The pointer of reverse end
 */
static inline mod_json_value_t **mod_json_array_rend(mod_json_array_t *arr) {
  return (arr->first - 1);
}

/**
 *  \brief           Request a change in capacity
 *  \param arr       The pointer of array
 *  \param n         The requested size of capacity
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_array_reserve(mod_json_array_t *arr, mod_json_size_t n);

/**
 *  \brief           Reverse the order of the elements in an array
 *  \param arr       The pointer of array
 *  \return          0 indicates success, -1 indicates failure.
 */
void mod_json_array_reverse(mod_json_array_t *arr);

/**
 *  \brief           Push a value into a JSON array
 *  \param arr       The pointer of array
 *  \param val       The pointer of value
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_array_push(mod_json_array_t *arr, mod_json_value_t *val);

/**
 *  \brief           Pop the last element from a JSON array
 *  \param arr       The pointer of array
 */
void mod_json_array_pop(mod_json_array_t *arr);

/**
 *  \brief           Remove the first element of a JSON array
 *  \param arr       The pointer of array
 */
void mod_json_array_shift(mod_json_array_t *arr);

/**
 *  \brief           Retrieve a value in JSON array
 *  \param arr       The pointer of array
 *  \param id        The index (start from zero)
 *  \return          Null indicates no one be found.
 */
mod_json_value_t *mod_json_array_at(mod_json_array_t *arr, mod_json_size_t id);

/**
 *  \brief           Merge a JSON array into another one
 *  \param dst       The pointer of destination array (can't be null)
 *  \param src       The pointer of source array (can't be null)
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_array_merge(mod_json_array_t *dst, mod_json_array_t *src);

/**
 *  \brief           Resize a JSON array so that it contains n elements
 *  \param arr       The pointer of array
 *  \param n         The new size, expressed in number of elements
 *  \param val       The pointer of value assigned (can be null)
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_array_resize(mod_json_array_t *arr, mod_json_size_t n,
                          mod_json_value_t *val);

/**
 *  \brief           Retrieve key of a JSON pair
 *  \param pair      The pointer of pair
 *  \return          The key of pair
 */
static inline mod_json_string_t *mod_json_pair_key(mod_json_pair_t *pair) {
  return (pair->key);
}

/**
 *  \brief           Retrieve value of a JSON pair
 *  \param pair      The pointer of pair
 *  \return          The value of pair
 */
static inline mod_json_value_t *mod_json_pair_value(mod_json_pair_t *pair) {
  return (pair->val);
}

/**
 *  \brief           Create and set a JSON object
 *  \param size      The initialized size of object
 *  \return          Null indicates failure.
 */
mod_json_object_t *mod_json_object_set(mod_json_size_t size);

/**
 *  \brief           Clone a JSON object
 *  \param obj       The pointer of object
 *  \return          Null indicates failure.
 */
mod_json_object_t *mod_json_object_clone(mod_json_object_t *obj);

/**
 *  \brief           Retrieve non-zero if they are equal
 *  \param lhs       The pointer of left object
 *  \param rhs       The pointer of right object
 *  \return          1 indicates true, 0 indicates false.
 */
mod_json_boolean_t mod_json_object_is_equal(mod_json_object_t *lhs,
                                            mod_json_object_t *rhs);

/**
 *  \brief           Unset or destroy a JSON object
 *  \param obj       The pointer of object
 */
void mod_json_object_unset(mod_json_object_t *obj);

/**
 *  \brief           Reset a JSON object
 *  \param obj       The pointer of object
 */
void mod_json_object_reset(mod_json_object_t *obj);

/**
 *  \brief           Create and set a JSON object (default parameters)
 *  \return          Null indicates failure.
 */
static inline mod_json_object_t *mod_json_object_set_default(void) {
  return mod_json_object_set(0);
}

/**
 *  \brief           Increase reference count of a JSON object
 *  \param obj       The pointer of object
 *  \return          The original pointer of object
 */
static inline mod_json_object_t *mod_json_object_get(mod_json_object_t *obj) {
  ++obj->refer;
  return obj;
}

/**
 *  \brief           Decrease reference count of a JSON object
 *  \param str       The pointer of object
 *  \return          The new number of refer-counter
 */
static inline mod_json_ssize_t mod_json_object_put(mod_json_object_t *obj) {
  return (--obj->refer);
}

/**
 *  \brief           Retrieve refer-counter of a JSON object
 *  \param obj       The pointer of object
 *  \return          The number of refer-counter
 */
static inline mod_json_ssize_t mod_json_object_refer(mod_json_object_t *obj) {
  return (obj ? obj->refer : -1);
}

/**
 *  \brief           Set the refer-counter as leaked
 *  \param obj       The pointer of object
 */
static inline void mod_json_object_set_leaked(mod_json_object_t *obj) {
  obj->refer = 0;
}

/**
 *  \brief           Retrieve non-zero if refer-counter is leaked
 *  \param obj       The pointer of object
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_object_is_leaked(
    mod_json_object_t *obj) {
  return (obj->refer <= 0);
}

/**
 *  \brief           Retrieve non-zero if refer-counter is shared
 *  \param obj       The pointer of object
 *  \return          1 indicates TRUE, 0 indicates FALSE
 */
static inline mod_json_boolean_t mod_json_object_is_shared(
    mod_json_object_t *obj) {
  return (obj->refer > 1);
}

/**
 *  \brief           Grab (get or clone) a JSON object
 *  \param obj       The pointer of object
 *  \return          Null indicates failure
 */
static inline mod_json_object_t *mod_json_object_grab(mod_json_object_t *obj) {
  /* Is it leaked? */
  if (!mod_json_object_is_leaked(obj)) {
    return mod_json_object_get(obj);
  }
  return mod_json_object_clone(obj);
}

/**
 *  \brief           Retrieve count of elements in a JSON object
 *  \param obj       The pointer of object
 *  \return          The count of elements
 */
static inline mod_json_size_t mod_json_object_count(mod_json_object_t *obj) {
  return (obj ? (mod_json_size_t)(obj->last - obj->first) : 0);
}

/**
 *  \brief           Retrieve non-zero if a JSON object is empty
 *  \param obj       The pointer of object
 *  \return          0 indicates non-empty
 */
static inline mod_json_boolean_t mod_json_object_empty(mod_json_object_t *obj) {
  return (mod_json_object_count(obj) == 0);
}

/**
 *  \brief           Retrieve the begin of a JSON object
 *  \param obj       The pointer of object
 *  \return          The pointer of begin
 */
static inline mod_json_pair_t *mod_json_object_begin(mod_json_object_t *obj) {
  return (obj->first);
}

/**
 *  \brief           Retrieve the reverse begin of a JSON object
 *  \param obj       The pointer of object
 *  \return          The pointer of reverse begin
 */
static inline mod_json_pair_t *mod_json_object_rbegin(mod_json_object_t *obj) {
  return (obj->last - 1);
}

/**
 *  \brief           Retrieve the end of a JSON object
 *  \param obj       The pointer of object
 *  \return          The pointer of end
 */
static inline mod_json_pair_t *mod_json_object_end(mod_json_object_t *obj) {
  return (obj->last);
}

/**
 *  \brief           Retrieve the reverse end of a JSON object
 *  \param obj       The pointer of object
 *  \return          The pointer of reverse end
 */
static inline mod_json_pair_t *mod_json_object_rend(mod_json_object_t *obj) {
  return (obj->first - 1);
}

/**
 *  \brief           Insert a pair into a JSON object
 *  \param obj       The pointer of object
 *  \param key       The string of key
 *  \param val       The pointer of value
 *  \return          The pair inserted, Null indicates failure.
 */
mod_json_pair_t *mod_json_object_insert(mod_json_object_t *obj,
                                        mod_json_string_t *key,
                                        mod_json_value_t *val);

/**
 *  \brief           Assign a pair into a JSON object
 *  \param obj       The pointer of object
 *  \param key       The string of key
 *  \param val       The pointer of value
 *  \return          The pair assigned, Null indicates failure.
 */
mod_json_pair_t *mod_json_object_assign(mod_json_object_t *obj,
                                        mod_json_string_t *key,
                                        mod_json_value_t *val);

/**
 *  \brief           Touch a pair in a JSON object
 *  \param obj       The pointer of object
 *  \param key       The c-string of key
 */
mod_json_pair_t *mod_json_object_touch(mod_json_object_t *obj,
                                       mod_json_cchar_t *key);

/**
 *  \brief           Erase a pair from a JSON object
 *  \param obj       The pointer of object
 *  \param key       The c-string of key
 */
void mod_json_object_erase(mod_json_object_t *obj, mod_json_cchar_t *key);

/**
 *  \brief           Get a value in a JSON object
 *  \param obj       The pointer of object
 *  \param key       The c-string of key
 *  \return          Null indicates failure.
 */
mod_json_value_t *mod_json_object_at(mod_json_object_t *obj,
                                     mod_json_cchar_t *key);

/**
 *  \brief           Find a pair in a JSON object
 *  \param obj       The pointer of object
 *  \param key       The c-string of key
 *  \return          Null indicates failure.
 */
mod_json_pair_t *mod_json_object_find(mod_json_object_t *obj,
                                      mod_json_cchar_t *key);

/**
 *  \brief           Merge a JSON object into another one
 *  \param dst       The pointer of destination object (can't be null)
 *  \param src       The pointer of source object (can't be null)
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_object_merge(mod_json_object_t *dst, mod_json_object_t *src);

/**
 *  \brief           Create a JSON token
 *  \param opt       The options of parser
 *  \return          The pointer of token, Null indicates failure.
 */
mod_json_token_t *mod_json_token_create(mod_json_option_t *opt);

/**
 *  \brief           Destroy a JSON token
 *  \param tok       The pointer of token
 */
void mod_json_token_destroy(mod_json_token_t *tok);

/**
 *  \brief           Parse a c-string with a JSON token
 *  \param tok       The pointer of token
 *  \param cstr      The pointer of c-string
 *  \return          0 indicates success, -1 indicates failure.
 */
int mod_json_token_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr);

/**
 *  \brief           Retrieve error of a JSON token
 *  \param tok       The pointer of token
 *  \return          The code of error
 */
mod_json_error_t mod_json_token_error(mod_json_token_t *tok);

/**
 *  \brief           Retrieve error context of a JSON token
 *  \param tok       The pointer of token
 *  \return          The pointer of context, null indicates non-errors
 */
mod_json_cchar_t *mod_json_token_context(mod_json_token_t *tok);

/**
 *  \brief           Retrieve state of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of state
 */
mod_json_state_t mod_json_token_state(mod_json_token_t *tok);

/**
 *  \brief           Retrieve object depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of object depth
 */
mod_json_size_t mod_json_token_object_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve array depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of array depth
 */
mod_json_size_t mod_json_token_array_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve max object depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of max object depth
 */
mod_json_size_t mod_json_token_max_object_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve max array depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of max array depth
 */
mod_json_size_t mod_json_token_max_array_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of depth
 */
mod_json_size_t mod_json_token_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve max depth of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of max depth
 */
mod_json_size_t mod_json_token_max_depth(mod_json_token_t *tok);

/**
 *  \brief           Retrieve parameter of a JSON token
 *  \param tok       The pointer of token
 *  \return          The value of parameter
 */
mod_json_void_t *mod_json_token_param(mod_json_token_t *tok);

/**
 *  \brief           Set parameter of a JSON token
 *  \param tok       The pointer of token
 *  \param param     The value of parameter
 */
void mod_json_token_set_param(mod_json_token_t *tok, mod_json_void_t *param);

/**
 *  \brief           Register callback function of a JSON token
 *  \param tok       The pointer of token
 *  \param proc      The pointer of callback function
 */
void mod_json_token_set_event(mod_json_token_t *tok, mod_json_event_proc proc);

/**
 *  \brief           Retrieve event code of a JSON token
 *  \param tok       The pointer of token
 *  \return          The code of event
 */
mod_json_event_t mod_json_token_event(mod_json_token_t *tok);

/**
 *  \brief           Parse a c-string with a JSON token
 *  \param tok       The pointer of token
 *  \param cstr      The pointer of c-string
 *  \return          The pointer of value, Null indicates failure.
 */
mod_json_value_t *mod_json_parse(mod_json_token_t *tok, mod_json_cchar_t *cstr);

/**
 *  \brief           Parse a c-string simply
 *  \param cstr      The pointer of c-string
 *  \param opts      The options of parser
 *  \return          The pointer of value, Null indicates failure.
 */
mod_json_value_t *mod_json_parse_simply(mod_json_cchar_t *cstr,
                                        mod_json_size_t opts);

/**
 *  \brief           Dump a JSON value in string
 *  \param val       The pointer of value
 *  \return          Null indicates failure.
 */
mod_json_string_t *mod_json_dump(mod_json_value_t *val);

#if defined(__cplusplus)
} /* extern "C" */
#endif


================================================
FILE: src/include/zvec/ailego/encoding/json/mod_json_plus.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cfloat>
#include <cstring>
#include <stdexcept>
#include <string>
#include "mod_json.h"

namespace zvec {
namespace ailego {

/*! JSON String
 */
class JsonString {
 public:
  typedef mod_json_size_t size_type;
  typedef mod_json_ssize_t ssize_type;
  typedef mod_json_float_t float_type;
  typedef mod_json_integer_t integer_type;

  //! Constructor
  JsonString(void) : str_(0) {}

  //! Constructor
  JsonString(const JsonString &rhs) : str_(0) {
    if (rhs.str_) {
      str_ = mod_json_string_grab(rhs.str_);
    }
  }

#if __cplusplus >= 201103L
  //! Constructor
  JsonString(JsonString &&rhs) : str_(rhs.str_) {
    rhs.str_ = 0;
  }
#endif

  //! Constructor
  JsonString(const char *cstr) {
    str_ = cstr ? mod_json_string_set(cstr, (mod_json_size_t)std::strlen(cstr))
                : 0;
  }

  //! Constructor
  JsonString(const char *cstr, size_type len) {
    str_ = mod_json_string_set(cstr, len);
  }

  //! Constructor
  JsonString(const std::string &str) {
    str_ = mod_json_string_set(str.c_str(), (mod_json_size_t)str.size());
  }

  //! Destructor
  ~JsonString(void) {
    mod_json_string_unset(str_);
  }

  //! Assign new contents to the string, replacing its current content
  JsonString &operator=(const JsonString &rhs) {
    this->assign(rhs);
    return *this;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the string, replacing its current content
  JsonString &operator=(JsonString &&rhs) {
    this->assign(std::move(rhs));
    return *this;
  }
#endif

  //! Assign new contents to the string, replacing its current content
  JsonString &operator=(const char *cstr) {
    this->assign(cstr);
    return *this;
  }

  //! Assign new contents to the string, replacing its current content
  JsonString &operator=(const std::string &rhs) {
    this->assign(rhs);
    return *this;
  }

  //! Append a JSON string
  JsonString &operator+=(const JsonString &str) {
    this->append(str);
    return *this;
  }

  //! Append a c-style string
  JsonString &operator+=(const char *cstr) {
    this->append(cstr);
    return *this;
  }

  //! Append a character to string
  JsonString &operator+=(char c) {
    this->append(c);
    return *this;
  }

  //! Equality
  bool operator==(const JsonString &rhs) const {
    return (mod_json_string_compare(str_, rhs.str_) == 0);
  }

  //! No equality
  bool operator!=(const JsonString &rhs) const {
    return !(*this == rhs);
  }

  //! Retrieve the character at index n
  char &operator[](size_type n) {
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonString::operator[]");
    }
    return *(str_->first + n);
  }

  //! Retrieve the character at index n
  const char &operator[](size_type n) const {
    return *(str_->first + n);
  }

  //! Retrieve non-zero if the string is valid
  bool is_valid(void) const {
    return (str_ != (mod_json_string_t *)0);
  }

  //! Retrieve non-zero if the string is empty
  bool empty(void) const {
    return mod_json_string_empty(str_);
  }

  //! Assign a JSON string
  void assign(const JsonString &rhs) {
    mod_json_string_unset(str_);
    str_ = rhs.str_ ? mod_json_string_grab(rhs.str_) : 0;
  }

#if __cplusplus >= 201103L
  //! Assign a JSON string
  void assign(JsonString &&rhs) {
    mod_json_string_unset(str_);
    str_ = rhs.str_;
    rhs.str_ = 0;
  }
#endif

  //! Assign a c-style string
  void assign(const char *cstr) {
    if (cstr) {
      if (!copy_on_write() ||
          mod_json_string_assign(str_, cstr,
                                 (mod_json_size_t)std::strlen(cstr)) != 0) {
        throw std::runtime_error("JsonString::assign");
      }
    }
  }

  //! Assign a c-style string
  void assign(const char *cstr, size_type len) {
    if (!copy_on_write() || mod_json_string_assign(str_, cstr, len) != 0) {
      throw std::runtime_error("JsonString::assign");
    }
  }

  //! Assign a STL-style string
  void assign(const std::string &str) {
    if (!copy_on_write() ||
        mod_json_string_assign(str_, str.c_str(),
                               (mod_json_size_t)str.size()) != 0) {
      throw std::runtime_error("JsonString::assign");
    }
  }

  //! Append a JSON string
  void append(const JsonString &str) {
    if (str.str_) {
      if (!copy_on_write() || mod_json_string_add(str_, str.str_) != 0) {
        throw std::runtime_error("JsonString::append");
      }
    }
  }

  //! Append a c-style string
  void append(const char *cstr) {
    if (cstr) {
      if (!copy_on_write() ||
          mod_json_string_append(str_, cstr,
                                 (mod_json_size_t)std::strlen(cstr)) != 0) {
        throw std::runtime_error("JsonString::append");
      }
    }
  }

  //! Append a c-style string
  void append(const char *cstr, size_type len) {
    if (!copy_on_write() || mod_json_string_append(str_, cstr, len) != 0) {
      throw std::runtime_error("JsonString::append");
    }
  }

  //! Append a STL-style string
  void append(const std::string &str) {
    if (!copy_on_write() ||
        mod_json_string_append(str_, str.c_str(),
                               (mod_json_size_t)str.size()) != 0) {
      throw std::runtime_error("JsonString::append");
    }
  }

  //! Append a character to string
  void append(char c) {
    if (!copy_on_write() || mod_json_string_append(str_, &c, 1) != 0) {
      throw std::runtime_error("JsonString::append");
    }
  }

  //! Retrieve the character at index n
  char &at(size_type n) {
    if (this->size() <= n) {
      throw std::out_of_range("JsonString::at");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonString::at");
    }
    return *(str_->first + n);
  }

  //! Retrieve the character at index n
  const char &at(size_type n) const {
    if (this->size() <= n) {
      throw std::out_of_range("JsonString::at");
    }
    return *(str_->first + n);
  }

  //! Request a change in capacity
  void reserve(size_type n) {
    if (!copy_on_write() || mod_json_string_reserve(str_, n) != 0) {
      throw std::runtime_error("JsonString::reserve");
    }
  }

  //! Clear the JSON string
  void clear(void) {
    mod_json_string_unset(str_);
    str_ = 0;
  }

  //! Exchange the content with another JSON string
  void swap(JsonString &rhs) {
    mod_json_string_t *str = str_;
    str_ = rhs.str_;
    rhs.str_ = str;
  }

  //! Retrieve the data pointer
  char *data(void) {
    return mod_json_string_data(str_);
  }

  //! Retrieve the data pointer
  const char *data(void) const {
    return mod_json_string_data(str_);
  }

  //! Retrieve HASH of a JSON string
  size_type hash(void) const {
    return mod_json_string_hash(str_);
  }

  //! Compare two JSON strings (case sensitive)
  int compare(const JsonString &rhs) const {
    return mod_json_string_compare(str_, rhs.str_);
  }

  //! Compare two strings (case sensitive)
  int compare(const char *cstr) const {
    const char *self = this->c_str();
    if (self && cstr) {
      return std::strcmp(self, cstr);
    }

    // particular case
    if (!self && cstr) {
      return -1;
    } else if (self && !cstr) {
      return 1;
    }
    return 0;
  }

  // Encode a JSON string
  JsonString encode(void) const {
    JsonString ret;
    ret.str_ = mod_json_string_encode(str_);
    return ret;
  }

  // Decode a JSON string
  JsonString decode(void) const {
    JsonString ret;
    ret.str_ = mod_json_string_decode(str_);
    return ret;
  }

  //! Retrieve the capacity of string
  size_type capacity(void) const {
    return mod_json_string_capacity(str_);
  }

  //! Retrieve the length of string
  size_type size(void) const {
    return mod_json_string_length(str_);
  }

  //! Retrieve the length of string
  size_type length(void) const {
    return mod_json_string_length(str_);
  }

  //! Retrieve refer-counter of string
  ssize_type refer(void) const {
    return mod_json_string_refer(str_);
  }

  //! Retrieve the c-style string
  const char *c_str(void) const {
    return mod_json_string_cstr(str_);
  }

  //! Convert string to float
  float_type as_float(void) const {
    return mod_json_string_float(str_);
  }

  //! Convert string to integer
  integer_type as_integer(void) const {
    return mod_json_string_integer(str_);
  }

  //! Retrieve string as a STL string
  std::string as_stl_string(void) const {
    if (!this->empty()) {
      return std::string(this->data(), this->size());
    }
    return std::string();
  }

 protected:
  //! Clone the string for writing
  bool copy_on_write(void) {
    if (str_) {
      if (mod_json_string_is_shared(str_)) {
        mod_json_string_put(str_);
        str_ = mod_json_string_clone(str_);
      }
    } else {
      str_ = mod_json_string_set("", 0);
    }
    return (str_ != 0);
  }

  //! Clone the value and leak it
  bool copy_and_leak(void) {
    if (copy_on_write()) {
      mod_json_string_set_leaked(str_);
      return true;
    }
    return false;
  }

 private:
  mod_json_string_t *str_;
};

class JsonArray;
class JsonObject;

/*! JSON Value
 */
class JsonValue {
 public:
  typedef mod_json_size_t size_type;
  typedef mod_json_ssize_t ssize_type;
  typedef mod_json_float_t float_type;
  typedef mod_json_integer_t integer_type;

  //! Constructor
  JsonValue(void) : val_(0) {}

  //! Constructor
  explicit JsonValue(const bool &val) {
    val_ = mod_json_value_set_boolean((mod_json_boolean_t)val);
  }

  //! Constructor
  explicit JsonValue(const signed char &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const char &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const short int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const long int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const long long int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const float &val) {
    val_ = mod_json_value_set_float((mod_json_float_t)val);
  }

  //! Constructor
  explicit JsonValue(const double &val) {
    val_ = mod_json_value_set_float((mod_json_float_t)val);
  }

  //! Constructor
  explicit JsonValue(const long double &val) {
    val_ = mod_json_value_set_float((mod_json_float_t)val);
  }

  //! Constructor
  explicit JsonValue(const unsigned char &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const unsigned short int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const unsigned int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const unsigned long int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  explicit JsonValue(const unsigned long long int &val) {
    val_ = mod_json_value_set_integer((mod_json_integer_t)val);
  }

  //! Constructor
  JsonValue(const JsonString &val) {
    val_ = mod_json_value_set_string(*(mod_json_string_t **)&val);
  }

  //! Constructor
  JsonValue(const char *val) {
    val_ = mod_json_value_set_buffer(
        val, val ? (mod_json_size_t)std::strlen(val) : 0);
  }

  //! Constructor
  JsonValue(const char *val, size_type len) {
    val_ = mod_json_value_set_buffer(val, len);
  }

  //! Constructor
  JsonValue(const std::string &val) {
    val_ = mod_json_value_set_buffer(val.data(), (mod_json_size_t)val.size());
  }

  //! Constructor
  JsonValue(const JsonArray &val) {
    val_ = mod_json_value_set_array(*(mod_json_array_t **)&val);
  }

  //! Constructor
  JsonValue(const JsonObject &val) {
    val_ = mod_json_value_set_object(*(mod_json_object_t **)&val);
  }

  //! Constructor
  JsonValue(const JsonValue &rhs) : val_(0) {
    if (rhs.val_) {
      val_ = mod_json_value_grab(rhs.val_);
    }
  }

#if __cplusplus >= 201103L
  //! Constructor
  JsonValue(JsonValue &&rhs) : val_(rhs.val_) {
    rhs.val_ = 0;
  }
#endif

  //! Destructor
  ~JsonValue(void) {
    mod_json_value_unset(val_);
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const JsonValue &rhs) {
    this->assign(rhs);
    return *this;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(JsonValue &&rhs) {
    this->assign(std::move(rhs));
    return *this;
  }
#endif

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const bool &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const signed char &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const char &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const short int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const long int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const long long int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const float &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const double &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const long double &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const unsigned char &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const unsigned short int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const unsigned int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const unsigned long int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const unsigned long long int &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const JsonString &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const char *val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const std::string &val) {
    this->assign(val);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const JsonArray &arr) {
    this->assign(arr);
    return *this;
  }

  //! Assign new contents to the value, replacing its current content
  JsonValue &operator=(const JsonObject &obj) {
    this->assign(obj);
    return *this;
  }

  //! Equality
  bool operator==(const JsonValue &rhs) const {
    return mod_json_value_is_equal(val_, rhs.val_);
  }

  //! No equality
  bool operator!=(const JsonValue &rhs) const {
    return !(*this == rhs);
  }

  //! Treat self value as object by force, retrieving value of a key
  JsonValue &operator[](const char *key) {
    return this->get_value(key);
  }

  //! Retrieve a reference of value by a key
  JsonValue operator[](const char *key) const {
    return this->get_value(key);
  }

  //! Treat self value as object by force, retrieving value of a key
  JsonValue &operator[](const JsonString &key) {
    return this->get_value(key.c_str());
  }

  //! Retrieve a reference of value by a key
  JsonValue operator[](const JsonString &key) const {
    return this->get_value(key.c_str());
  }

  //! Treat self value as object by force, retrieving value of a key
  JsonValue &operator[](const std::string &key) {
    return this->get_value(key.c_str());
  }

  //! Retrieve a reference of value by a key
  JsonValue operator[](const std::string &key) const {
    return this->get_value(key.c_str());
  }

  //! Treat self value as array by force, retrieving value at index n
  JsonValue &operator[](size_type n) {
    return this->get_value(n);
  }

  //! Retrieve a reference of value at index n
  JsonValue operator[](size_type n) const {
    return this->get_value(n);
  }

  //! Retrieve non-zero if the value is valid
  bool is_valid(void) const {
    return (val_ != (mod_json_value_t *)0);
  }

  //! Retrieve non-zero if the value is a object
  bool is_object(void) const {
    return mod_json_value_is_object(val_);
  }

  //! Retrieve non-zero if the value is an array
  bool is_array(void) const {
    return mod_json_value_is_array(val_);
  }

  //! Retrieve non-zero if the value is a string
  bool is_string(void) const {
    return mod_json_value_is_string(val_);
  }

  //! Retrieve non-zero if the value is null
  bool is_null(void) const {
    return mod_json_value_is_null(val_);
  }

  //! Retrieve non-zero if the value is a float
  bool is_float(void) const {
    return mod_json_value_is_float(val_);
  }

  //! Retrieve non-zero if the value is an integer
  bool is_integer(void) const {
    return mod_json_value_is_integer(val_);
  }

  //! Retrieve non-zero if the value is a boolean
  bool is_boolean(void) const {
    return mod_json_value_is_boolean(val_);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const JsonValue &rhs) {
    mod_json_value_unset(val_);
    val_ = rhs.val_ ? mod_json_value_grab(rhs.val_) : 0;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the value, replacing its current content
  void assign(JsonValue &&rhs) {
    mod_json_value_unset(val_);
    val_ = rhs.val_;
    rhs.val_ = 0;
  }
#endif

  //! Assign new contents to the value, replacing its current content
  void assign(const bool &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_boolean(val_, (mod_json_boolean_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const signed char &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const char &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const short int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const long int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const long long int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const float &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_float(val_, (mod_json_float_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const double &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_float(val_, (mod_json_float_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const long double &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_float(val_, (mod_json_float_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const unsigned char &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const unsigned short int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const unsigned int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const unsigned long int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const unsigned long long int &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_integer(val_, (mod_json_integer_t)val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const JsonString &val) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_string(val_, *(mod_json_string_t **)&val);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const char *val) {
    JsonString str(val);
    if (!str.is_valid() || !copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const char *val, size_type len) {
    JsonString str(val, len);
    if (!str.is_valid() || !copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const std::string &val) {
    JsonString str(val);
    if (!str.is_valid() || !copy_on_write()) {
      throw std::runtime_error("JsonValue::assign");
    }
    mod_json_value_assign_string(val_, *(mod_json_string_t **)&str);
  }

  //! Assign new contents to the value, replacing its current content
  void assign(const JsonArray &arr);

  //! Assign new contents to the value, replacing its current content
  void assign(const JsonObject &obj);

  //! Retrieve refer-counter of JSON value
  ssize_type refer(void) const {
    return mod_json_value_refer(val_);
  }

  //! Retrieve value as JSON format string
  JsonString as_json_string(void) const {
    mod_json_string_t *tmp = mod_json_dump(val_);
    JsonString ret = *reinterpret_cast<JsonString *>(&tmp);
    if (tmp) {
      mod_json_string_unset(tmp);
    }
    return ret;
  }

  //! Retrieve value as a STL string
  std::string as_stl_string(void) const {
    if (is_string()) {
      return to_string().as_stl_string();
    }
    return std::string();
  }

  //! Retrieve value as JSON string
  const JsonString &as_string(void) const {
    if (!is_string()) {
      throw std::logic_error("JsonValue::as_string");
    }
    return to_string();
  }

  //! Retrieve value as c-style string
  const char *as_c_string(void) const {
    return mod_json_value_cstring(val_);
  }

  //! Retrieve value as JSON string
  JsonString &as_string(void) {
    if (!is_string()) {
      throw std::logic_error("JsonValue::as_string");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonValue::as_string");
    }
    return to_string();
  }

  //! Retrieve value as JSON array
  const JsonArray &as_array(void) const {
    if (!is_array()) {
      throw std::logic_error("JsonValue::as_array");
    }
    return to_array();
  }

  //! Retrieve value as JSON array
  JsonArray &as_array(void) {
    if (!is_array()) {
      throw std::logic_error("JsonValue::as_array");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonValue::as_array");
    }
    return to_array();
  }

  //! Retrieve value as JSON object
  const JsonObject &as_object(void) const {
    if (!is_object()) {
      throw std::logic_error("JsonValue::as_object");
    }
    return to_object();
  }

  //! Retrieve value as JSON object
  JsonObject &as_object(void) {
    if (!is_object()) {
      throw std::logic_error("JsonValue::as_object");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonValue::as_object");
    }
    return to_object();
  }

  //! Retrieve value as float
  float_type as_float(void) const {
    return mod_json_value_float(val_);
  }

  //! Retrieve value as integer
  integer_type as_integer(void) const {
    return mod_json_value_integer(val_);
  }

  //! Retrieve value as boolean
  bool as_bool(void) const {
    return mod_json_value_boolean(val_);
  }

  //! Exchange the content with another JSON value
  void swap(JsonValue &rhs) {
    mod_json_value_t *val = val_;
    val_ = rhs.val_;
    rhs.val_ = val;
  }

  //! Merge another JSON value
  void merge(const JsonValue &rhs) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonValue::merge");
    }
    mod_json_value_merge(val_, rhs.val_);
  }

  //! Parse a sting as a JSON value
  bool parse(const char *str) {
    mod_json_token_t *tok = mod_json_token_create(NULL);

    if (tok) {
      mod_json_value_t *jval = mod_json_parse(tok, str);

      mod_json_token_destroy(tok);
      if (jval) {
        *this = *reinterpret_cast<JsonValue *>(&jval);
        mod_json_value_unset(jval);
        return is_valid();
      }
    }
    return false;
  }

  //! Parse a sting as a JSON value
  bool parse(const JsonString &str) {
    return this->parse(str.c_str());
  }

  //! Parse a sting as a JSON value
  bool parse(const std::string &str) {
    return this->parse(str.c_str());
  }

 protected:
  //! Clone the value for writing
  bool copy_on_write(void) {
    if (val_) {
      if (mod_json_value_is_shared(val_)) {
        mod_json_value_put(val_);
        val_ = mod_json_value_clone(val_);
      }
    } else {
      val_ = mod_json_value_set_null();
    }
    return (val_ != 0);
  }

  //! Clone the value and leak it
  bool copy_and_leak(void) {
    if (copy_on_write()) {
      mod_json_value_set_leaked(val_);
      return true;
    }
    return false;
  }

  //! Convert value to JSON object
  JsonObject &to_object(void);

  //! Convert value to JSON object
  const JsonObject &to_object(void) const;

  //! Convert value to JSON array
  JsonArray &to_array(void);

  //! Convert value to JSON array
  const JsonArray &to_array(void) const;

  //! Convert value to JSON string
  JsonString &to_string(void);

  //! Convert value to JSON string
  const JsonString &to_string(void) const;

  //! Treat self value as object by force, retrieving value of a key
  JsonValue &get_value(const char *key);

  //! Retrieve a reference of value by a key
  JsonValue get_value(const char *key) const;

  //! Treat self value as array by force, retrieving value at index n
  JsonValue &get_value(size_type n);

  //! Retrieve a reference of value at index n
  JsonValue get_value(size_type n) const;

  //! Set the new array to the value, replacing its current content
  void set_value(const JsonArray &val);

  //! Set the new object to the value, replacing its current content
  void set_value(const JsonObject &val);

 private:
  mod_json_value_t *val_;
};

/*! JSON Array
 */
class JsonArray {
 public:
  typedef mod_json_size_t size_type;
  typedef mod_json_ssize_t ssize_type;

  class iterator;
  class const_iterator;
  class reverse_iterator;
  class const_reverse_iterator;

  /*! Const iterator of JSON Array
   */
  class const_iterator {
   public:
    //! Constructor
    const_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const const_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const const_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    const_iterator &operator++() {
      ++iter_;
      return *this;
    }

    //! Increment (Suffix)
    const_iterator operator++(int) {
      const_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_iterator &operator--() {
      --iter_;
      return *this;
    }

    //! Decrement (Suffix)
    const_iterator operator--(int) {
      const_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    const JsonValue &operator*() const {
      return *reinterpret_cast<const JsonValue *>(iter_);
    }

    //! Structure dereference (eg. iter->)
    const JsonValue *operator->() const {
      return reinterpret_cast<const JsonValue *>(iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonArray;
    friend class JsonArray::iterator;
    friend class JsonArray::reverse_iterator;
    friend class JsonArray::const_reverse_iterator;

    //! Constructor for friends
    const_iterator(mod_json_value_t *const *iter) : iter_(iter) {}

   private:
    mod_json_value_t *const *iter_;
  };

  /*! iterator of JSON Array
   */
  class iterator {
   public:
    //! Constructor
    iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    iterator &operator++() {
      ++iter_;
      return *this;
    }

    //! Increment (Suffix)
    iterator operator++(int) {
      iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    iterator &operator--() {
      --iter_;
      return *this;
    }

    //! Decrement (Suffix)
    iterator operator--(int) {
      iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    JsonValue &operator*() const {
      return *reinterpret_cast<JsonValue *>(iter_);
    }

    //! Structure dereference (eg. iter->)
    JsonValue *operator->() const {
      return reinterpret_cast<JsonValue *>(iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

    //! Retrieve as reverse iterator
    operator reverse_iterator() const {
      return reverse_iterator(iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonArray;
    friend class JsonArray::reverse_iterator;

    //! Constructor for friends
    iterator(mod_json_value_t **iter) : iter_(iter) {}

   private:
    mod_json_value_t **iter_;
  };

  /*! Const Reverse iterator of JSON Array
   */
  class const_reverse_iterator {
   public:
    //! Constructor
    const_reverse_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const const_reverse_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const const_reverse_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    const_reverse_iterator &operator++() {
      --iter_;
      return *this;
    }

    //! Increment (Suffix)
    const_reverse_iterator operator++(int) {
      const_reverse_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_reverse_iterator &operator--() {
      ++iter_;
      return *this;
    }

    //! Decrement (Suffix)
    const_reverse_iterator operator--(int) {
      const_reverse_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    const JsonValue &operator*() const {
      return *reinterpret_cast<const JsonValue *>(iter_);
    }

    //! Structure dereference (eg. iter->)
    const JsonValue *operator->() const {
      return reinterpret_cast<const JsonValue *>(iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

   protected:
    friend class JsonArray;
    friend class JsonArray::iterator;
    friend class JsonArray::const_iterator;
    friend class JsonArray::reverse_iterator;

    //! Constructor for friends
    const_reverse_iterator(mod_json_value_t *const *iter) : iter_(iter) {}

   private:
    mod_json_value_t *const *iter_;
  };

  /*! Reverse iterator of JSON Array
   */
  class reverse_iterator {
   public:
    //! Constructor
    reverse_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const reverse_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const reverse_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    reverse_iterator &operator++() {
      --iter_;
      return *this;
    }

    //! Increment (Suffix)
    reverse_iterator operator++(int) {
      reverse_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    reverse_iterator &operator--() {
      ++iter_;
      return *this;
    }

    //! Decrement (Suffix)
    reverse_iterator operator--(int) {
      reverse_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    JsonValue &operator*() const {
      return *reinterpret_cast<JsonValue *>(iter_);
    }

    //! Structure dereference (eg. iter->)
    JsonValue *operator->() const {
      return reinterpret_cast<JsonValue *>(iter_);
    }

    //! Retrieve as iterator
    operator iterator() const {
      return iterator(iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonArray;
    friend class JsonArray::iterator;

    //! Constructor for friends
    reverse_iterator(mod_json_value_t **iter) : iter_(iter) {}

   private:
    mod_json_value_t **iter_;
  };

  //! Constructor
  JsonArray(void) : arr_(0) {}

  //! Constructor
  JsonArray(const JsonArray &rhs) : arr_(0) {
    if (rhs.arr_) {
      arr_ = mod_json_array_grab(rhs.arr_);
    }
  }

#if __cplusplus >= 201103L
  //! Constructor
  JsonArray(JsonArray &&rhs) : arr_(rhs.arr_) {
    rhs.arr_ = 0;
  }
#endif

  //! Destructor
  ~JsonArray(void) {
    mod_json_array_unset(arr_);
  }

  //! Assign new contents to the array, replacing its current content
  JsonArray &operator=(const JsonArray &rhs) {
    this->assign(rhs);
    return *this;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the array, replacing its current content
  JsonArray &operator=(JsonArray &&rhs) {
    this->assign(std::move(rhs));
    return *this;
  }
#endif

  //! Equality
  bool operator==(const JsonArray &rhs) const {
    return mod_json_array_is_equal(arr_, rhs.arr_);
  }

  //! No equality
  bool operator!=(const JsonArray &rhs) const {
    return !(*this == rhs);
  }

  //! Retrieve the value at index n, if no one exists, throw an exception.
  JsonValue &operator[](size_type n) {
    return this->at(n);
  }

  //! Retrieve the value at index n, if no one exists, return a null value.
  JsonValue operator[](size_type n) const {
    return ((n < this->size()) ? this->get_value(n) : JsonValue());
  }

  //! Retrieve non-zero if the array is valid
  bool is_valid(void) const {
    return (arr_ != (mod_json_array_t *)0);
  }

  //! Retrieve non-zero if the array is empty
  bool empty(void) const {
    return mod_json_array_empty(arr_);
  }

  //! Retrieve the size of JSON array
  size_type size(void) const {
    return mod_json_array_count(arr_);
  }

  //! Retrieve the capacity of JSON array
  size_type capacity(void) const {
    return mod_json_array_capacity(arr_);
  }

  //! Retrieve refer-counter of JSON array
  ssize_type refer(void) const {
    return mod_json_array_refer(arr_);
  }

  //! Assign new contents to the array, replacing its current content
  void assign(const JsonArray &rhs) {
    mod_json_array_unset(arr_);
    arr_ = rhs.arr_ ? mod_json_array_grab(rhs.arr_) : 0;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the array, replacing its current content
  void assign(JsonArray &&rhs) {
    mod_json_array_unset(arr_);
    arr_ = rhs.arr_;
    rhs.arr_ = 0;
  }
#endif

  //! Request a change in capacity
  void reserve(size_type n) {
    if (!copy_on_write() || mod_json_array_reserve(arr_, n) != 0) {
      throw std::runtime_error("JsonArray::reserve");
    }
  }

  //! Reverse the order of the elements
  void reverse(void) {
    if (arr_ && copy_on_write()) {
      mod_json_array_reverse(arr_);
    }
  }

  //! Push a value to array
  void push(const JsonValue &val) {
    JsonValue tmp(val);

    if (!copy_on_write() ||
        mod_json_array_push(arr_, *((mod_json_value_t **)&tmp)) != 0) {
      throw std::runtime_error("JsonArray::push");
    }
  }

  //! Pop the last element from array
  void pop(void) {
    if (arr_) {
      if (!copy_on_write()) {
        throw std::runtime_error("JsonArray::pop");
      }
      mod_json_array_pop(arr_);
    }
  }

  //! Remove the first element of array
  void shift(void) {
    if (arr_) {
      if (!copy_on_write()) {
        throw std::runtime_error("JsonArray::shift");
      }
      mod_json_array_shift(arr_);
    }
  }

  //! Retrieve the value at index n
  JsonValue &at(size_type n) {
    if (this->size() <= n) {
      throw std::out_of_range("JsonArray::at");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonArray::at");
    }
    return this->get_value(n);
  }

  //! Retrieve the value at index n
  const JsonValue &at(size_type n) const {
    if (this->size() <= n) {
      throw std::out_of_range("JsonArray::at");
    }
    return this->get_value(n);
  }

  //! Retrieve a reference to the first element
  JsonValue &front(void) {
    if (this->size() <= 0) {
      throw std::out_of_range("JsonArray::front");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonArray::front");
    }
    return this->get_value(0);
  }

  //! Retrieve a reference to the first element
  const JsonValue &front(void) const {
    if (this->size() <= 0) {
      throw std::out_of_range("JsonArray::front");
    }
    return this->get_value(0);
  }

  //! Retrieve a reference to the last element
  JsonValue &back(void) {
    if (this->size() <= 0) {
      throw std::out_of_range("JsonArray::back");
    }
    if (!copy_and_leak()) {
      throw std::runtime_error("JsonArray::back");
    }
    return this->get_value(this->size() - 1);
  }

  //! Retrieve a reference to the last element
  const JsonValue &back(void) const {
    if (this->size() <= 0) {
      throw std::out_of_range("JsonArray::back");
    }
    return this->get_value(this->size() - 1);
  }

  //! Clear the JSON array
  void clear(void) {
    mod_json_array_unset(arr_);
    arr_ = 0;
  }

  //! Exchange the content with another JSON array
  void swap(JsonArray &rhs) {
    mod_json_array_t *arr = arr_;
    arr_ = rhs.arr_;
    rhs.arr_ = arr;
  }

  //! Merge another JSON array
  void merge(const JsonArray &rhs) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonArray::merge");
    }
    mod_json_array_merge(arr_, rhs.arr_);
  }

  //! Resize a JSON array so that it contains n elements
  void resize(size_type n, const JsonValue &val = JsonValue()) {
    if (!copy_on_write() ||
        mod_json_array_resize(arr_, n, *((mod_json_value_t **)&val)) != 0) {
      throw std::runtime_error("JsonArray::resize");
    }
  }

  //! Retrieve an iterator pointing to the first element
  iterator begin(void) {
    if (copy_and_leak()) {
      return iterator(mod_json_array_begin(arr_));
    }
    return iterator();
  }

  //! Retrieve a const iterator pointing to the first element
  const_iterator begin(void) const {
    if (arr_) {
      return const_iterator(mod_json_array_begin(arr_));
    }
    return const_iterator();
  }

  //! Retrieve a const iterator pointing to the first element
  const_iterator cbegin(void) const {
    if (arr_) {
      return const_iterator(mod_json_array_begin(arr_));
    }
    return const_iterator();
  }

  //! Retrieve a reverse iterator pointing to the last element
  reverse_iterator rbegin(void) {
    if (copy_and_leak()) {
      return reverse_iterator(mod_json_array_rbegin(arr_));
    }
    return reverse_iterator();
  }

  //! Retrieve a const reverse iterator pointing to the last element
  const_reverse_iterator rbegin(void) const {
    if (arr_) {
      return const_reverse_iterator(mod_json_array_rbegin(arr_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve a const reverse iterator pointing to the last element
  const_reverse_iterator crbegin(void) const {
    if (arr_) {
      return const_reverse_iterator(mod_json_array_rbegin(arr_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve an iterator pointing to the past-the-end element
  iterator end(void) {
    if (copy_and_leak()) {
      return iterator(mod_json_array_end(arr_));
    }
    return iterator();
  }

  //! Retrieve a const iterator pointing to the past-the-end element
  const_iterator end(void) const {
    if (arr_) {
      return const_iterator(mod_json_array_end(arr_));
    }
    return const_iterator();
  }

  //! Retrieve a const iterator pointing to the past-the-end element
  const_iterator cend(void) const {
    if (arr_) {
      return const_iterator(mod_json_array_end(arr_));
    }
    return const_iterator();
  }

  //! Retrieve a reverse pointing to the past-the-end element
  reverse_iterator rend(void) {
    if (copy_and_leak()) {
      return reverse_iterator(mod_json_array_rend(arr_));
    }
    return reverse_iterator();
  }

  //! Retrieve a const reverse pointing to the past-the-end element
  const_reverse_iterator rend(void) const {
    if (arr_) {
      return const_reverse_iterator(mod_json_array_rend(arr_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve a const reverse pointing to the past-the-end element
  const_reverse_iterator crend(void) const {
    if (arr_) {
      return const_reverse_iterator(mod_json_array_rend(arr_));
    }
    return const_reverse_iterator();
  }

 protected:
  //! Clone the array for writing
  bool copy_on_write(void) {
    if (arr_) {
      if (mod_json_array_is_shared(arr_)) {
        mod_json_array_put(arr_);
        arr_ = mod_json_array_clone(arr_);
      }
    } else {
      arr_ = mod_json_array_set_default();
    }
    return (arr_ != 0);
  }

  //! Clone the array and leak it
  bool copy_and_leak(void) {
    if (copy_on_write()) {
      mod_json_array_set_leaked(arr_);
      return true;
    }
    return false;
  }

  //! Retrieve the value at index n
  JsonValue &get_value(size_type n) {
    return *reinterpret_cast<JsonValue *>(arr_->first + n);
  }

  //! Retrieve the value at index n
  const JsonValue &get_value(size_type n) const {
    return *reinterpret_cast<JsonValue *>(arr_->first + n);
  }

 private:
  mod_json_array_t *arr_;
};

/*! JSON Pair
 */
class JsonPair {
 public:
  //! Constructor
  JsonPair(void) : pair_(0) {}

  //! Retrieve non-zero if the pair is valid
  bool is_valid(void) const {
    return (pair_ != (mod_json_pair_t *)0);
  }

  //! Retrieve the key of pair
  const JsonString &key(void) const {
    return *reinterpret_cast<JsonString *>(&pair_->key);
  }

  //! Retrieve the value of pair
  JsonValue &value(void) {
    return *reinterpret_cast<JsonValue *>(&pair_->val);
  }

  //! Retrieve the value of pair
  const JsonValue &value(void) const {
    return *reinterpret_cast<JsonValue *>(&pair_->val);
  }

 protected:
  friend class JsonObject;

  //! Constructor for friends
  JsonPair(mod_json_pair_t *pair) : pair_(pair) {}

  //! Constructor for friends
  JsonPair(const JsonPair &rhs) : pair_(rhs.pair_) {}

 private:
  mod_json_pair_t *pair_;
};

/*! JSON Object
 */
class JsonObject {
 public:
  typedef mod_json_size_t size_type;
  typedef mod_json_ssize_t ssize_type;

  class iterator;
  class const_iterator;
  class reverse_iterator;
  class const_reverse_iterator;

  /*! Const iterator of JSON Object
   */
  class const_iterator {
   public:
    //! Constructor
    const_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const const_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const const_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    const_iterator &operator++() {
      ++iter_;
      return *this;
    }

    //! Increment (Suffix)
    const_iterator operator++(int) {
      const_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_iterator &operator--() {
      --iter_;
      return *this;
    }

    //! Decrement (Suffix)
    const_iterator operator--(int) {
      const_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    const JsonPair &operator*() const {
      return *reinterpret_cast<const JsonPair *>(&iter_);
    }

    //! Structure dereference (eg. iter->)
    const JsonPair *operator->() const {
      return reinterpret_cast<const JsonPair *>(&iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonObject;
    friend class JsonObject::iterator;
    friend class JsonObject::reverse_iterator;
    friend class JsonObject::const_reverse_iterator;

    //! Constructor for friends
    const_iterator(const mod_json_pair_t *iter) : iter_(iter) {}

   private:
    const mod_json_pair_t *iter_;
  };

  /*! iterator of JSON Object
   */
  class iterator {
   public:
    //! Constructor
    iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    iterator &operator++() {
      ++iter_;
      return *this;
    }

    //! Increment (Suffix)
    iterator operator++(int) {
      iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    iterator &operator--() {
      --iter_;
      return *this;
    }

    //! Decrement (Suffix)
    iterator operator--(int) {
      iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    JsonPair &operator*() const {
      return *reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);
    }

    //! Structure dereference (eg. iter->)
    JsonPair *operator->() const {
      return reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

    //! Retrieve as reverse iterator
    operator reverse_iterator() const {
      return reverse_iterator(iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonObject;
    friend class JsonObject::reverse_iterator;

    //! Constructor for friends
    iterator(mod_json_pair_t *iter) : iter_(iter) {}

   private:
    mod_json_pair_t *iter_;
  };

  /*! Const Reverse iterator of JSON Object
   */
  class const_reverse_iterator {
   public:
    //! Constructor
    const_reverse_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const const_reverse_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const const_reverse_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    const_reverse_iterator &operator++() {
      --iter_;
      return *this;
    }

    //! Increment (Suffix)
    const_reverse_iterator operator++(int) {
      const_reverse_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    const_reverse_iterator &operator--() {
      ++iter_;
      return *this;
    }

    //! Decrement (Suffix)
    const_reverse_iterator operator--(int) {
      const_reverse_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    const JsonPair &operator*() const {
      return *reinterpret_cast<const JsonPair *>(&iter_);
    }

    //! Structure dereference (eg. iter->)
    const JsonPair *operator->() const {
      return reinterpret_cast<const JsonPair *>(&iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

   protected:
    friend class JsonObject;
    friend class JsonObject::iterator;
    friend class JsonObject::const_iterator;
    friend class JsonObject::reverse_iterator;

    //! Constructor for friends
    const_reverse_iterator(const mod_json_pair_t *iter) : iter_(iter) {}

   private:
    const mod_json_pair_t *iter_;
  };

  /*! iterator of JSON Object
   */
  class reverse_iterator {
   public:
    //! Constructor
    reverse_iterator(void) : iter_(0) {}

    //! Equality
    bool operator==(const reverse_iterator &rhs) const {
      return (iter_ == rhs.iter_);
    }

    //! No equality
    bool operator!=(const reverse_iterator &rhs) const {
      return (iter_ != rhs.iter_);
    }

    //! Increment (Prefix)
    reverse_iterator &operator++() {
      --iter_;
      return *this;
    }

    //! Increment (Suffix)
    reverse_iterator operator++(int) {
      reverse_iterator tmp = *this;
      --iter_;
      return tmp;
    }

    //! Decrement (Prefix)
    reverse_iterator &operator--() {
      ++iter_;
      return *this;
    }

    //! Decrement (Suffix)
    reverse_iterator operator--(int) {
      reverse_iterator tmp = *this;
      ++iter_;
      return tmp;
    }

    //! Indirection (eg. *iter)
    JsonPair &operator*() const {
      return *reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);
    }

    //! Structure dereference (eg. iter->)
    JsonPair *operator->() const {
      return reinterpret_cast<JsonPair *>((mod_json_pair_t **)&iter_);
    }

    //! Retrieve as iterator
    operator iterator() const {
      return iterator(iter_);
    }

    //! Retrieve as const iterator
    operator const_iterator() const {
      return const_iterator(iter_);
    }

    //! Retrieve as const reverse iterator
    operator const_reverse_iterator() const {
      return const_reverse_iterator(iter_);
    }

   protected:
    friend class JsonObject;
    friend class JsonArray::iterator;

    //! Constructor for friends
    reverse_iterator(mod_json_pair_t *iter) : iter_(iter) {}

   private:
    mod_json_pair_t *iter_;
  };

  //! Constructor
  JsonObject(void) : obj_(0) {}

  //! Constructor
  JsonObject(const JsonObject &rhs) : obj_(0) {
    if (rhs.obj_) {
      obj_ = mod_json_object_grab(rhs.obj_);
    }
  }

#if __cplusplus >= 201103L
  //! Constructor
  JsonObject(JsonObject &&rhs) : obj_(rhs.obj_) {
    rhs.obj_ = 0;
  }
#endif

  //! Destructor
  ~JsonObject(void) {
    mod_json_object_unset(obj_);
  }

  //! Assign new contents to the object, replacing its current content
  JsonObject &operator=(const JsonObject &rhs) {
    this->assign(rhs);
    return *this;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the object, replacing its current content
  JsonObject &operator=(JsonObject &&rhs) {
    this->assign(std::move(rhs));
    return *this;
  }
#endif

  //! Equality
  bool operator==(const JsonObject &rhs) const {
    return mod_json_object_is_equal(obj_, rhs.obj_);
  }

  //! No equality
  bool operator!=(const JsonObject &rhs) const {
    return !(*this == rhs);
  }

  //! Retrieve the value of a key, if no one exists, create a new one.
  JsonValue &operator[](const char *key) {
    if (!key) {
      throw std::invalid_argument("JsonObject::operator[]");
    }

    if (!copy_and_leak()) {
      throw std::runtime_error("JsonObject::operator[]");
    }

    JsonPair pair(mod_json_object_touch(obj_, key));
    if (!pair.is_valid()) {
      throw std::runtime_error("JsonObject::operator[]");
    }
    return pair.value();
  }

  //! Retrieve the value of a key, if no one exists, return a null value.
  JsonValue operator[](const char *key) const {
    if (!key) {
      throw std::invalid_argument("JsonObject::operator[]");
    }

    JsonPair pair(mod_json_object_find(obj_, key));
    return (pair.is_valid() ? pair.value() : JsonValue());
  }

  //! Retrieve the value of a key, if no one exists, create a new one.
  JsonValue &operator[](const JsonString &key) {
    return (*this)[key.c_str()];
  }

  //! Retrieve the value of a key, if no one exists, return a null value.
  JsonValue operator[](const JsonString &key) const {
    return (*this)[key.c_str()];
  }

  //! Retrieve non-zero if the object is valid
  bool is_valid(void) const {
    return (obj_ != (mod_json_object_t *)0);
  }

  //! Retrieve non-zero if the object is empty
  bool empty(void) const {
    return mod_json_object_empty(obj_);
  }

  //! Retrieve the size of JSON object
  size_type size(void) const {
    return mod_json_object_count(obj_);
  }

  //! Retrieve refer-counter of JSON object
  ssize_type refer(void) const {
    return mod_json_object_refer(obj_);
  }

  //! Assign new contents to the object, replacing its current content
  void assign(const JsonObject &rhs) {
    mod_json_object_unset(obj_);
    obj_ = rhs.obj_ ? mod_json_object_grab(rhs.obj_) : 0;
  }

#if __cplusplus >= 201103L
  //! Assign new contents to the object, replacing its current content
  void assign(JsonObject &&rhs) {
    mod_json_object_unset(obj_);
    obj_ = rhs.obj_;
    rhs.obj_ = 0;
  }
#endif

  //! Clear the JSON object
  void clear(void) {
    mod_json_object_unset(obj_);
    obj_ = 0;
  }

  //! Set the value of a key
  bool set(const JsonString &key, const JsonValue &val) {
    JsonValue tmp(val);
    if (!copy_on_write()) {
      throw std::runtime_error("JsonObject::set");
    }
    return (mod_json_object_insert(obj_, *(mod_json_string_t **)&key,
                                   *(mod_json_value_t **)&tmp) !=
            (mod_json_pair_t *)0);
  }

  //! Retrieve the value of a key
  bool get(const char *key, JsonValue *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = pair.value();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, JsonString *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid() || !pair.value().is_string()) {
      return false;
    }
    *val = pair.value().as_string();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, std::string *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid() || !pair.value().is_string()) {
      return false;
    }
    *val = pair.value().as_stl_string();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, JsonArray *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid() || !pair.value().is_array()) {
      return false;
    }
    *val = pair.value().as_array();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, JsonObject *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid() || !pair.value().is_object()) {
      return false;
    }
    *val = pair.value().as_object();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, bool *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = pair.value().as_bool();
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, signed char *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<signed char>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, char *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<char>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, short int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<short int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, long int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<long int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, long long int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<long long int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, unsigned char *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<unsigned char>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, unsigned short int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<unsigned short int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, unsigned int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<unsigned int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, unsigned long int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<unsigned long int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, unsigned long long int *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<unsigned long long int>(pair.value().as_integer());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, float *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<float>(pair.value().as_float());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, double *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<double>(pair.value().as_float());
    return true;
  }

  //! Retrieve the value of a key
  bool get(const char *key, long double *val) const {
    const JsonPair pair(mod_json_object_find(obj_, key));
    if (!pair.is_valid()) {
      return false;
    }
    *val = static_cast<long double>(pair.value().as_float());
    return true;
  }

  //! Retrieve the value of a key
  template <typename T>
  bool get(const JsonString &key, T *val) const {
    return this->get(key.c_str(), val);
  }

  //! Retrieve the value of a key
  template <typename T>
  bool get(const std::string &key, T *val) const {
    return this->get(key.c_str(), val);
  }

  //! Delete a key-value pair from JSON object
  void unset(const char *key) {
    if (obj_ && key) {
      if (!copy_on_write()) {
        throw std::runtime_error("JsonObject::unset");
      }
      mod_json_object_erase(obj_, key);
    }
  }

  //! Retrieve non-zero if the key exists in JSON object
  bool has(const char *key) const {
    return (mod_json_object_find(obj_, key) != (mod_json_pair_t *)0);
  }

  //! Exchange the content with another JSON object
  void swap(JsonObject &rhs) {
    mod_json_object_t *obj = obj_;
    obj_ = rhs.obj_;
    rhs.obj_ = obj;
  }

  //! Merge another JSON object
  void merge(const JsonObject &rhs) {
    if (!copy_on_write()) {
      throw std::runtime_error("JsonObject::merge");
    }
    mod_json_object_merge(obj_, rhs.obj_);
  }

  //! Retrieve an iterator pointing to the first element
  iterator begin(void) {
    if (copy_and_leak()) {
      return iterator(mod_json_object_begin(obj_));
    }
    return iterator();
  }

  //! Retrieve a const iterator pointing to the first element
  const_iterator begin(void) const {
    if (obj_) {
      return const_iterator(mod_json_object_begin(obj_));
    }
    return const_iterator();
  }

  //! Retrieve a const iterator pointing to the first element
  const_iterator cbegin(void) const {
    if (obj_) {
      return const_iterator(mod_json_object_begin(obj_));
    }
    return const_iterator();
  }

  //! Retrieve a reverse iterator pointing to the last element
  reverse_iterator rbegin(void) {
    if (copy_and_leak()) {
      return reverse_iterator(mod_json_object_rbegin(obj_));
    }
    return reverse_iterator();
  }

  //! Retrieve a const reverse iterator pointing to the last element
  const_reverse_iterator rbegin(void) const {
    if (obj_) {
      return const_reverse_iterator(mod_json_object_rbegin(obj_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve a const reverse iterator pointing to the last element
  const_reverse_iterator crbegin(void) const {
    if (obj_) {
      return const_reverse_iterator(mod_json_object_rbegin(obj_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve an iterator pointing to the past-the-end element
  iterator end(void) {
    if (copy_and_leak()) {
      return iterator(mod_json_object_end(obj_));
    }
    return iterator();
  }

  //! Retrieve a const iterator pointing to the past-the-end element
  const_iterator end(void) const {
    if (obj_) {
      return const_iterator(mod_json_object_end(obj_));
    }
    return const_iterator();
  }

  //! Retrieve a const iterator pointing to the past-the-end element
  const_iterator cend(void) const {
    if (obj_) {
      return const_iterator(mod_json_object_end(obj_));
    }
    return const_iterator();
  }

  //! Retrieve a reverse pointing to the past-the-end element
  reverse_iterator rend(void) {
    if (copy_and_leak()) {
      return reverse_iterator(mod_json_object_rend(obj_));
    }
    return reverse_iterator();
  }

  //! Retrieve a const reverse pointing to the past-the-end element
  const_reverse_iterator rend(void) const {
    if (obj_) {
      return const_reverse_iterator(mod_json_object_rend(obj_));
    }
    return const_reverse_iterator();
  }

  //! Retrieve a const reverse pointing to the past-the-end element
  const_reverse_iterator crend(void) const {
    if (obj_) {
      return const_reverse_iterator(mod_json_object_rend(obj_));
    }
    return const_reverse_iterator();
  }

 protected:
  //! Clone the object for writing
  bool copy_on_write(void) {
    if (obj_) {
      if (mod_json_object_is_shared(obj_)) {
        mod_json_object_put(obj_);
        obj_ = mod_json_object_clone(obj_);
      }
    } else {
      obj_ = mod_json_object_set_default();
    }
    return (obj_ != 0);
  }

  //! Clone the object and leak it
  bool copy_and_leak(void) {
    if (copy_on_write()) {
      mod_json_object_set_leaked(obj_);
      return true;
    }
    return false;
  }

 private:
  mod_json_object_t *obj_;
};

//! Assign new contents to the value, replacing its current content
inline void JsonValue::assign(const JsonArray &arr) {
  this->set_value(arr);
}

//! Assign new contents to the value, replacing its current content
inline void JsonValue::assign(const JsonObject &obj) {
  this->set_value(obj);
}

//! Convert value to JSON object
inline JsonObject &JsonValue::to_object(void) {
  return *reinterpret_cast<JsonObject *>(&val_->data.c_obj);
}

//! Convert value to JSON object
inline const JsonObject &JsonValue::to_object(void) const {
  return *reinterpret_cast<JsonObject *>(&val_->data.c_obj);
}

//! Convert value to JSON array
inline JsonArray &JsonValue::to_array(void) {
  return *reinterpret_cast<JsonArray *>(&val_->data.c_arr);
}

//! Convert value to JSON array
inline const JsonArray &JsonValue::to_array(void) const {
  return *reinterpret_cast<JsonArray *>(&val_->data.c_arr);
}

//! Convert value to JSON string
inline JsonString &JsonValue::to_string(void) {
  return *reinterpret_cast<JsonString *>(&val_->data.c_str);
}

//! Convert value to JSON string
inline const JsonString &JsonValue::to_string(void) const {
  return *reinterpret_cast<JsonString *>(&val_->data.c_str);
}

//! Treat self value as object by force, retrieving value of a key
inline JsonValue &JsonValue::get_value(const char *key) {
  if (!is_object()) {
    *this = JsonObject();
  }
  if (!copy_and_leak()) {
    throw std::runtime_error("JsonValue::get_value");
  }
  return (to_object())[key];
}

//! Retrieve a reference of value by a key
inline JsonValue JsonValue::get_value(const char *key) const {
  return (is_object() ? (to_object())[key] : JsonValue());
}

//! Treat self value as array by force, retrieving value at index n
inline JsonValue &JsonValue::get_value(size_type n) {
  if (!is_array()) {
    throw std::logic_error("JsonValue::get_value");
  }
  if (!copy_and_leak()) {
    throw std::runtime_error("JsonValue::get_value");
  }
  return (to_array())[n];
}

//! Retrieve a reference of value at index n
inline JsonValue JsonValue::get_value(size_type n) const {
  return (is_array() ? (to_array())[n] : JsonValue());
}

//! Set the new array to the value, replacing its current content
inline void JsonValue::set_value(const JsonArray &val) {
  if (!copy_on_write()) {
    throw std::runtime_error("JsonValue::set_value");
  }
  mod_json_value_assign_array(val_, *(mod_json_array_t **)&val);
}

//! Set the new object to the value, replacing its current content
inline void JsonValue::set_value(const JsonObject &val) {
  if (!copy_on_write()) {
    throw std::runtime_error("JsonValue::set_value");
  }
  mod_json_value_assign_object(val_, *(mod_json_object_t **)&val);
}

/*! JSON Parser
 */
class JsonParser {
 public:
  typedef mod_json_size_t size_type;

  //! Constructor
  JsonParser(void)
      : state_(mod_json_state_null), error_(mod_json_error_null), context_(0) {
    option_.options = 0;
    option_.object_depth = 0;
    option_.array_depth = 0;
  }

  //! Destructor
  ~JsonParser(void) {}

  //! Set the max object depth
  void set_object_depth(size_type depth) {
    option_.object_depth = depth;
  }

  //! Set the max array depth
  void set_array_depth(size_type depth) {
    option_.array_depth = depth;
  }

  //! Enable/Disable comments
  void set_comment(bool enable = true) {
    if (enable) {
      option_.options |= MOD_JSON_COMMENT;
    } else {
      option_.options &= ~MOD_JSON_COMMENT;
    }
  }

  //! Enable/Disable loose strings
  void set_unstrict(bool enable = true) {
    if (enable) {
      option_.options |= MOD_JSON_UNSTRICT;
    } else {
      option_.options &= ~MOD_JSON_UNSTRICT;
    }
  }

  //! Enable/Disable simple format
  void set_simple(bool enable = true) {
    if (enable) {
      option_.options |= MOD_JSON_SIMPLE;
    } else {
      option_.options &= ~MOD_JSON_SIMPLE;
    }
  }

  //! Enable/Disable single quotes support
  void set_squote(bool enable = true) {
    if (enable) {
      option_.options |= MOD_JSON_SQUOTE;
    } else {
      option_.options &= ~MOD_JSON_SQUOTE;
    }
  }

  //! Convert a sting to a JSON value
  bool parse(const char *str, JsonValue *out) {
    mod_json_token_t *tok;

    state_ = mod_json_state_null;
    error_ = mod_json_error_null;
    context_ = str;

    tok = mod_json_token_create(&option_);
    if (tok) {
      mod_json_value_t *jval;

      jval = mod_json_parse(tok, str);

      /* save information of token */
      state_ = mod_json_token_state(tok);
      error_ = mod_json_token_error(tok);
      context_ = mod_json_token_context(tok);
      mod_json_token_destroy(tok);

      if (jval) {
        *out = *reinterpret_cast<JsonValue *>(&jval);
        mod_json_value_unset(jval);

        return out->is_valid();
      }
    }
    return false;
  }

  //! Retrieve the error code of parser
  int error(void) const {
    return (int)error_;
  }

  //! Retrieve the state code of parser
  int state(void) const {
    return (int)state_;
  }

  //! Retrieve the context of parser
  const char *context(void) const {
    return context_;
  }

 private:
  mod_json_option_t option_;
  mod_json_state_t state_;
  mod_json_error_t error_;
  mod_json_cchar_t *context_;
};

/*! JSON Dumper
 */
class JsonDumper {
 public:
  //! Constructor
  JsonDumper(void) : str_() {}

  //! Destructor
  ~JsonDumper(void) {}

  //! Dump a JSON value to string
  bool dump(const JsonValue &val) {
    mod_json_string_t *str;

    str = mod_json_dump(*((mod_json_value_t **)&val));
    str_ = *reinterpret_cast<JsonString *>(&str);
    if (str) {
      mod_json_string_unset(str);
      return true;
    }
    return false;
  }

  //! Retrieve result of dumper
  JsonString &result(void) {
    return str_;
  }

  //! Retrieve result of dumper
  const JsonString &result(void) const {
    return str_;
  }

 private:
  JsonString str_;
};

//! Equality
static inline bool operator==(const ailego::JsonString &lhs, const char *rhs) {
  const char *self = lhs.c_str();
  if (self == rhs) {
    return true;
  }

  if (self && rhs) {
    return (std::strcmp(self, rhs) == 0);
  }
  return false;
}

//! Equality
static inline bool operator==(const char *lhs, const ailego::JsonString &rhs) {
  return (rhs == lhs);
}

//! Equality
static inline bool operator==(const ailego::JsonString &lhs,
                              const std::string &rhs) {
  std::size_t ls = lhs.size();
  std::size_t rs = rhs.size();
  if (ls == 0 && rs == 0) {
    return true;
  }

  if (ls == rs) {
    const char *ld = lhs.data();
    const char *rd = rhs.data();

    if (ld && rd) {
      return (std::memcmp(ld, rd, ls) == 0);
    }
  }
  return false;
}

//! Equality
static inline bool operator==(const std::string &lhs, const JsonString &rhs) {
  return (rhs == lhs);
}

//! Equality
static inline bool operator==(const JsonString &lhs, const JsonValue &rhs) {
  return (rhs.is_string() ? lhs == rhs.as_string() : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const JsonString &rhs) {
  return (lhs.is_string() ? lhs.as_string() == rhs : false);
}

//! Equality
static inline bool operator==(const JsonArray &lhs, const JsonValue &rhs) {
  return (rhs.is_array() ? lhs == rhs.as_array() : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const JsonArray &rhs) {
  return (lhs.is_array() ? lhs.as_array() == rhs : false);
}

//! Equality
static inline bool operator==(const JsonObject &lhs, const JsonValue &rhs) {
  return (rhs.is_object() ? lhs == rhs.as_object() : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const JsonObject &rhs) {
  return (lhs.is_object() ? lhs.as_object() == rhs : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const bool &rhs) {
  return (lhs.is_boolean() ? lhs.as_bool() == rhs : false);
}

//! Equality
static inline bool operator==(const bool &lhs, const JsonValue &rhs) {
  return (rhs.is_boolean() ? lhs == rhs.as_bool() : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const signed char &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const signed char &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const char &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const char &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const short int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const short int &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const int &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const long int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const long int &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const long long int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const long long int &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const float &rhs) {
  if (lhs.is_float()) {
    double diff = static_cast<double>(lhs.as_float() - rhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const float &lhs, const JsonValue &rhs) {
  if (rhs.is_float()) {
    double diff = static_cast<double>(rhs.as_float() - lhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const double &rhs) {
  if (lhs.is_float()) {
    double diff = static_cast<double>(lhs.as_float() - rhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const double &lhs, const JsonValue &rhs) {
  if (rhs.is_float()) {
    double diff = static_cast<double>(rhs.as_float() - lhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const long double &rhs) {
  if (lhs.is_float()) {
    double diff = static_cast<double>(lhs.as_float() - rhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const long double &lhs, const JsonValue &rhs) {
  if (rhs.is_float()) {
    double diff = static_cast<double>(rhs.as_float() - lhs);
    return ((diff < DBL_EPSILON) && (diff > -DBL_EPSILON));
  }
  return false;
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const unsigned char &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const unsigned char &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs,
                              const unsigned short int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const unsigned short int &lhs,
                              const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const unsigned int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const unsigned int &lhs, const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs,
                              const unsigned long int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const unsigned long int &lhs,
                              const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs,
                              const unsigned long long int &rhs) {
  return (lhs.is_integer()
              ? lhs.as_integer() == static_cast<JsonValue::integer_type>(rhs)
              : false);
}

//! Equality
static inline bool operator==(const unsigned long long int &lhs,
                              const JsonValue &rhs) {
  return (rhs.is_integer()
              ? static_cast<JsonValue::integer_type>(lhs) == rhs.as_integer()
              : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const char *rhs) {
  return (lhs.is_string() ? lhs.as_string() == rhs : false);
}

//! Equality
static inline bool operator==(const char *lhs, const JsonValue &rhs) {
  return (rhs.is_string() ? lhs == rhs.as_string() : false);
}

//! Equality
static inline bool operator==(const JsonValue &lhs, const std::string &rhs) {
  return (lhs.is_string() ? lhs.as_string() == rhs : false);
}

//! Equality
static inline bool operator==(const std::string &lhs, const JsonValue &rhs) {
  return (rhs.is_string() ? lhs == rhs.as_string() : false);
}

//! No equality
static inline bool operator!=(const JsonString &lhs, const char *rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const char *lhs, const JsonString &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonString &lhs, const std::string &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const std::string &lhs, const JsonString &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonString &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const JsonString &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonArray &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const JsonArray &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonObject &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const JsonObject &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const bool &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const bool &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const signed char &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const signed char &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const char &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const char &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const short int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const short int &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const int &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const long int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const long int &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const long long int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const long long int &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const float &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const float &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const double &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const double &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const long double &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const long double &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const unsigned char &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const unsigned char &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs,
                              const unsigned short int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const unsigned short int &lhs,
                              const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const unsigned int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const unsigned int &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs,
                              const unsigned long int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const unsigned long int &lhs,
                              const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs,
                              const unsigned long long int &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const unsigned long long int &lhs,
                              const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const char *rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const char *lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const JsonValue &lhs, const std::string &rhs) {
  return !(lhs == rhs);
}

//! No equality
static inline bool operator!=(const std::string &lhs, const JsonValue &rhs) {
  return !(lhs == rhs);
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/encoding/json.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/encoding/json/mod_json_plus.h>


================================================
FILE: src/include/zvec/ailego/hash/crc32c.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Crc32c Hash
 */
struct Crc32c {
  //! Compute the CRC32C checksum for the source data buffer
  static uint32_t Hash(const void *data, size_t len, uint32_t crc);

  //! Compute the CRC32C checksum for the source data buffer
  static inline uint32_t Hash(const void *data, size_t len) {
    return Hash(data, len, 0u);
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/hash/jump_hash.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
namespace zvec {
namespace ailego {

//! Jump consistent hash algorithm (https://arxiv.org/pdf/1406.2294.pdf)
static inline int32_t JumpHash(uint64_t key, int32_t num_buckets) {
  int64_t b = 1, j = 0;
  while (j < num_buckets) {
    b = j;
    key = key * 2862933555777941757ULL + 1;
    j = (int64_t)(double(b + 1) *
                  (double(1LL << 31) / double((key >> 33) + 1)));
  }
  return (int32_t)b;
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/internal/platform.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#if defined(_WIN32) || defined(_WIN64)
#include <sdkddkver.h>
#endif

#include <sys/types.h>
#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#if defined(_MSC_VER)
#include <intrin.h>
#else
#include <strings.h>
#include <unistd.h>
#if defined(__x86_64__) || defined(__i386)
#include <x86intrin.h>
#endif
#if defined(__ARM_NEON)
#include <arm_neon.h>
#endif
#if defined(__ARM_FEATURE_CRC32)
#include <arm_acle.h>
#endif
#endif

#if defined(__cplusplus)
extern "C" {
#endif

#ifndef NDEBUG
#define AILEGO_DEBUG
#endif

//! Fixed Intel intrinsics macro in MSVC
#if defined(_MSC_VER)
#if (_M_IX86_FP == 2 || defined(_M_AMD64) || defined(_M_X64))
#define __SSE__ 1
#define __SSE2__ 1
#if _MSC_VER >= 1500
#define __SSE3__ 1
#define __SSSE3__ 1
#define __SSE4_1__ 1
#define __SSE4_2__ 1
#endif
#elif _M_IX86_FP == 1
#define __SSE__ 1
#endif
#endif  // _MSC_VER

#if defined(_WIN32) || defined(_WIN64)
#if defined(_WIN64)
#define AILEGO_M64
#else
#define AILEGO_M32
#endif
#endif

#if defined(__GNUC__)
#if defined(__x86_64__) || defined(__aarch64__) || defined(__ppc64__)
#define AILEGO_M64
#else
#define AILEGO_M32
#endif
#endif

#ifndef AILEGO_ALIGNED
#if defined(_MSC_VER)
#define AILEGO_ALIGNED(x) __declspec(align(x))
#define AILEGO_DEPRECATED __declspec(deprecated)
#elif defined(__GNUC__)
#define AILEGO_ALIGNED(x) __attribute__((aligned(x)))
#define AILEGO_DEPRECATED __attribute__((deprecated))
#else
#define AILEGO_ALIGNED(x)
#define AILEGO_DEPRECATED
#endif
#endif

//! Add 'inline' for MSVC
#if defined(_MSC_VER) && !defined(__cplusplus)
#if !defined(inline)
#define inline __inline
#endif
#endif

//! Add 'ssize_t' for MSVC
#if defined(_MSC_VER)
typedef intptr_t ssize_t;
#endif

#if defined(_MSC_VER)
//! Returns the number of trailing 0-bits in x
static inline int ailego_ctz32(uint32_t x) {
  unsigned long r = 0;
  _BitScanForward(&r, x);
  return (int)r;
}

//! Returns the number of leading 0-bits in x
static inline int ailego_clz32(uint32_t x) {
  unsigned long r = 0;
  _BitScanReverse(&r, x);
  return (31 - (int)r);
}

#if defined(AILEGO_M64)
//! Returns the number of trailing 0-bits in x
static inline int ailego_ctz64(uint64_t x) {
  unsigned long r = 0;
  _BitScanForward64(&r, x);
  return (int)r;
}

//! Returns the number of leading 0-bits in x
static inline int ailego_clz64(uint64_t x) {
  unsigned long r = 0;
  _BitScanReverse64(&r, x);
  return (63 - (int)r);
}
#else
//! Returns the number of trailing 0-bits in x
static inline int ailego_ctz64(uint64_t x) {
  unsigned long r = 0;
  unsigned long m = (unsigned long)x;
  _BitScanForward(&r, m);
  if (r == 0) {
    m = (unsigned long)(x >> 32);
    _BitScanForward(&r, m);
    if (r != 0) {
      r += 32;
    }
  }
  return (int)r;
}

//! Returns the number of leading 0-bits in x
static inline int ailego_clz64(uint64_t x) {
  unsigned long r = 0;
  unsigned long m = (unsigned long)(x >> 32);
  _BitScanReverse(&r, m);
  if (r != 0) {
    return (31 - (int)r);
  }
  m = (unsigned long)x;
  _BitScanReverse(&r, m);
  return (63 - (int)r);
}
#endif  // AILEGO_M64

//! Counts the number of one bits
#define ailego_popcount32(x) (__popcnt(x))
#define ailego_popcount64(x) (__popcnt64(x))
#define ailego_likely(x) (x)
#define ailego_unlikely(x) (x)
#ifdef __SSE__
#define ailego_prefetch(p) _mm_prefetch((p), 0)
#else
#define ailego_prefetch(p) ((void)(p))
#endif
#else  // !_MSC_VER
#define ailego_ctz32(x) (__builtin_ctz(x))
#define ailego_ctz64(x) (__builtin_ctzll(x))
#define ailego_clz32(x) (__builtin_clz(x))
#define ailego_clz64(x) (__builtin_clzll(x))
#define ailego_popcount32(x) (__builtin_popcount(x))
#define ailego_popcount64(x) (__builtin_popcountl(x))
#define ailego_likely(x) (__builtin_expect(!!(x), 1))
#define ailego_unlikely(x) (__builtin_expect(!!(x), 0))
#define ailego_prefetch(p) (__builtin_prefetch((p)))
#endif  // _MSC_VER

#if defined(AILEGO_M64)
#define ailego_ctz ailego_ctz64
#define ailego_clz ailego_clz64
#define ailego_popcount ailego_popcount64
#else
#define ailego_ctz ailego_ctz32
#define ailego_clz ailego_clz32
#define ailego_popcount ailego_popcount32
#endif  // AILEGO_M64

#if defined(__arm__) || defined(__aarch64__)
// ARMv7 Architecture Reference Manual (for YIELD)
// ARM Compiler toolchain Compiler Reference (for __yield() instrinsic)
#if defined(__CC_ARM)
#define ailego_yield() __yield()
#else
#define ailego_yield() __asm__ __volatile__("yield")
#endif  // __CC_ARM
#elif defined(__SSE2__)
#define ailego_yield() _mm_pause()
#else
#define ailego_yield() ((void)0)
#endif  // __arm__ || __aarch64__

#if defined(_MSC_VER)
#define ailego_aligned_malloc(SIZE, ALIGN) \
  _aligned_malloc((size_t)(SIZE), (ALIGN))
#define ailego_aligned_free _aligned_free
#else  // !_MSC_VER
#if defined(_ISOC11_SOURCE)
#define ailego_aligned_malloc(SIZE, ALIGN) \
  aligned_alloc((ALIGN), (size_t)(SIZE))
#else  // !_ISOC11_SOURCE
#define ailego_aligned_malloc(SIZE, ALIGN) \
  ailego_posix_malloc((size_t)(SIZE), (ALIGN))
#endif  // _ISOC11_SOURCE
#define ailego_aligned_free free
#endif  // _MSC_VER

#if !defined(__SANITIZE_ADDRESS__)
#if defined(__has_feature)
#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__ 1
#endif  // address_sanitizer
#endif  // __has_feature
#endif  // !__SANITIZE_ADDRESS__

#if !defined(__SANITIZE_ADDRESS__)
#if !defined(ailego_malloc)
#if defined(__AVX512F__)
#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 64)
#elif defined(__AVX__)
#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 32)
#elif defined(__SSE__)
#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)
#elif defined(__ARM_NEON)
#define ailego_malloc(SIZE) ailego_aligned_malloc((SIZE), 16)
#endif
#endif  // !ailego_malloc
#if (defined(__SSE__) || defined(__ARM_NEON)) && !defined(ailego_free)
#define ailego_free ailego_aligned_free
#endif
#endif  // !__SANITIZE_ADDRESS__

#ifndef ailego_malloc
#define ailego_malloc(SIZE) malloc((size_t)(SIZE))
#endif
#ifndef ailego_free
#define ailego_free free
#endif

#ifndef ailego_offsetof
#define ailego_offsetof(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER)
#endif

#ifndef ailego_align
#define ailego_align(SIZE, BOUND) (((SIZE) + ((BOUND) - 1)) & ~((BOUND) - 1))
#endif

#ifndef ailego_align8
#define ailego_align8(SIZE) ailego_align(SIZE, 8)
#endif

#ifndef ailego_min
#define ailego_min(A, B) (((A) < (B)) ? (A) : (B))
#endif

#ifndef ailego_max
#define ailego_max(A, B) (((A) > (B)) ? (A) : (B))
#endif

#ifndef ailego_malloc_object
#define ailego_malloc_object(TYPE) ((TYPE *)ailego_malloc(sizeof(TYPE)))
#endif
#ifndef ailego_malloc_array
#define ailego_malloc_array(TYPE, SIZE) \
  ((TYPE *)ailego_malloc(SIZE * sizeof(TYPE)))
#endif

#ifndef ailego_minus_if_ne_zero
#define ailego_minus_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) return (-1)
#endif

#ifndef ailego_zero_if_ne_zero
#define ailego_zero_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) return (0)
#endif

#ifndef ailego_null_if_ne_zero
#define ailego_null_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) return (NULL)
#endif

#ifndef ailego_false_if_ne_zero
#define ailego_false_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) return (false)
#endif

#ifndef ailego_return_if_ne_zero
#define ailego_return_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) return
#endif

#ifndef ailego_break_if_ne_zero
#define ailego_break_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) break
#endif

#ifndef ailego_continue_if_ne_zero
#define ailego_continue_if_ne_zero(COND) \
  if (ailego_unlikely((COND) != 0)) continue
#endif

#ifndef ailego_do_if_ne_zero
#define ailego_do_if_ne_zero(COND) if (ailego_unlikely((COND) != 0))
#endif

#ifndef ailego_minus_if_lt_zero
#define ailego_minus_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) return (-1)
#endif

#ifndef ailego_zero_if_lt_zero
#define ailego_zero_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) return (0)
#endif

#ifndef ailego_null_if_lt_zero
#define ailego_null_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) return (NULL)
#endif

#ifndef ailego_false_if_lt_zero
#define ailego_false_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) return (false)
#endif

#ifndef ailego_return_if_lt_zero
#define ailego_return_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) return
#endif

#ifndef ailego_break_if_lt_zero
#define ailego_break_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) break
#endif

#ifndef ailego_continue_if_lt_zero
#define ailego_continue_if_lt_zero(COND) \
  if (ailego_unlikely((COND) < 0)) continue
#endif

#ifndef ailego_do_if_lt_zero
#define ailego_do_if_lt_zero(COND) if (ailego_unlikely((COND) < 0))
#endif

#ifndef ailego_minus_if_false
#define ailego_minus_if_false(COND) \
  if (ailego_unlikely(!(COND))) return (-1)
#endif

#ifndef ailego_zero_if_false
#define ailego_zero_if_false(COND) \
  if (ailego_unlikely(!(COND))) return (0)
#endif

#ifndef ailego_null_if_false
#define ailego_null_if_false(COND) \
  if (ailego_unlikely(!(COND))) return (NULL)
#endif

#ifndef ailego_false_if_false
#define ailego_false_if_false(COND) \
  if (ailego_unlikely(!(COND))) return (false)
#endif

#ifndef ailego_return_if_false
#define ailego_return_if_false(COND) \
  if (ailego_unlikely(!(COND))) return
#endif

#ifndef ailego_break_if_false
#define ailego_break_if_false(COND) \
  if (ailego_unlikely(!(COND))) break
#endif

#ifndef ailego_continue_if_false
#define ailego_continue_if_false(COND) \
  if (ailego_unlikely(!(COND))) continue
#endif

#ifndef ailego_do_if_false
#define ailego_do_if_false(COND) if (ailego_unlikely(!(COND)))
#endif

#ifndef ailego_compile_assert
#define ailego_compile_assert(COND, MSG) \
  typedef char Static_Assertion_##MSG[(!!(COND)) * 2 - 1]
#endif

#ifndef ailego_static_assert3
#define ailego_static_assert3(COND, LINE) \
  ailego_compile_assert(COND, At_Line_##LINE)
#endif

#ifndef ailego_static_assert2
#define ailego_static_assert2(COND, LINE) ailego_static_assert3(COND, LINE)
#endif

#ifndef ailego_static_assert
#define ailego_static_assert(COND) ailego_static_assert2(COND, __LINE__)
#endif

//! Abort and report if an assertion is failed
#ifndef ailego_assert_abort
#define ailego_assert_abort(COND, MSG)                                         \
  (void)(ailego_likely(COND) || (ailego_assert_report(__FILE__, __FUNCTION__,  \
                                                      __LINE__, #COND, (MSG)), \
                                 abort(), 0))
#endif

#ifdef AILEGO_DEBUG
#ifndef ailego_assert
#define ailego_assert(COND) ailego_assert_abort(COND, "")
#endif
#ifndef ailego_assert_with
#define ailego_assert_with(COND, MSG) ailego_assert_abort(COND, MSG)
#endif
#else  // !AILEGO_DEBUG
#ifndef ailego_assert
#define ailego_assert(COND) ((void)0)
#endif
#ifndef ailego_assert_with
#define ailego_assert_with(COND, MSG) ((void)0)
#endif
#endif  // AILEGO_DEBUG

#ifndef ailego_check
#define ailego_check(COND) ailego_assert_abort(COND, "")
#endif
#ifndef ailego_check_with
#define ailego_check_with(COND, MSG) ailego_assert_abort(COND, MSG)
#endif

#ifndef _MSC_VER
//! Allocates memory on a specified alignment boundary
static inline void *ailego_posix_malloc(size_t size, size_t align) {
  void *ptr;
  ailego_null_if_ne_zero(posix_memalign(&ptr, align, size));
  return ptr;
}
#endif

//! Report an assertion is failed
static inline void ailego_assert_report(const char *file, const char *func,
                                        int line, const char *cond,
                                        const char *msg) {
  fprintf(stderr, "Assertion failed: (%s) in %s(), %s line %d. %s\n", cond,
          func, file, line, msg);
}

// because the behavior of conversion from negative float to unsigned integer
// is undefined (on arm, result will be zero), it's necessary to convert it
// to signed integer firstly
static inline uint8_t static_cast_from_float_to_uint8(float data) {
  return static_cast<uint8_t>(static_cast<int32_t>(data));
}

static inline uint16_t static_cast_from_float_to_uint16(float data) {
  return static_cast<uint16_t>(static_cast<int32_t>(data));
}

#ifdef __cplusplus
} /* extern "C" */
#endif


================================================
FILE: src/include/zvec/ailego/io/file.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/utility/file_helper.h>

namespace zvec {
namespace ailego {

/*! File Utility
 */
class File {
 public:
  //! Native Handle in OS
  typedef FileHelper::NativeHandle NativeHandle;

  //! Invalid Handle
  static constexpr NativeHandle InvalidHandle = (NativeHandle)(-1);

  //! Specifies the position in a file to use for seeking.
  enum struct Origin { Begin = 0, Current = 1, End = 2 };

  //! Options of memory mapping
  enum {
    MMAP_READONLY = 1,
    MMAP_SHARED = 2,
    MMAP_LOCKED = 4,
    MMAP_WARMUP = 8,
    MMAP_POPULATE = 16,
    MMAP_HUGE_PAGE = 32,
  };

  //! Constructor
  File(void) : native_handle_(File::InvalidHandle), read_only_(false) {}

  //! Constructor
  File(File &&rhs) {
    read_only_ = rhs.read_only_;
    native_handle_ = rhs.native_handle_;
    rhs.read_only_ = false;
    rhs.native_handle_ = File::InvalidHandle;
  }

  //! Destructor
  ~File(void) {
    this->close();
  }

  //! Assignment
  File &operator=(File &&rhs) {
    read_only_ = rhs.read_only_;
    native_handle_ = rhs.native_handle_;
    rhs.read_only_ = false;
    rhs.native_handle_ = File::InvalidHandle;
    return *this;
  }

  //! Test if the file is valid
  bool is_valid(void) const {
    return (native_handle_ != File::InvalidHandle);
  }

  //! Retrieve non-zero if memory region is read only
  bool read_only(void) const {
    return read_only_;
  }

  //! Retrieve native handle
  NativeHandle native_handle(void) const {
    return native_handle_;
  }

  //! Create a local file
  bool create(const char *path, size_t size, bool direct);

  //! Open a local file
  bool open(const char *path, bool rdonly, bool direct);

  //! Close the local file
  void close(void);

  //! Reset the file
  void reset(void);

  //! Write data into the file
  size_t write(const void *data, size_t len);

  //! Write data into the file
  size_t write(ssize_t off, const void *data, size_t len);

  //! Read data from the file
  size_t read(void *buf, size_t len);

  //! Read data from the file
  size_t read(ssize_t off, void *buf, size_t len);

  //! Synchronize memory with physical storage
  bool flush(void);

  //! Sets the current position of the file to the given value
  bool seek(ssize_t off, Origin origin);

  //! Truncate the file to a specified length
  bool truncate(size_t len);

  //! Retrieve size of file
  size_t size(void) const;

  //! Retrieve offset of file
  ssize_t offset(void) const;

  //! Create a local file
  bool create(const char *path, size_t len) {
    return this->create(path, len, false);
  }

  //! Create a local file
  bool create(const std::string &path, size_t len, bool direct) {
    return this->create(path.c_str(), len, direct);
  }

  //! Create a local file
  bool create(const std::string &path, size_t len) {
    return this->create(path.c_str(), len);
  }

  //! Open a local file
  bool open(const char *path, bool rdonly) {
    return this->open(path, rdonly, false);
  }

  //! Open a local file
  bool open(const std::string &path, bool rdonly, bool direct) {
    return this->open(path.c_str(), rdonly, direct);
  }

  //! Open a local file
  bool open(const std::string &path, bool rdonly) {
    return this->open(path.c_str(), rdonly);
  }

  //! Map a region of file into memory
  void *map(ssize_t off, size_t len, int opts) {
    if (read_only_) {
      opts |= File::MMAP_READONLY;
    }
    return File::MemoryMap(native_handle_, off, len, opts);
  }

  //! Map a region of file into memory
  static void *MemoryMap(NativeHandle handle, ssize_t off, size_t len,
                         int opts);

  //! Map an anonymous region into memory
  static void *MemoryMap(size_t len, int opts);

  //! Remap the region into memory
  static void *MemoryRemap(void *oldptr, size_t oldsize, void *newptr,
                           size_t newsize);

  //! Unmap a mapping region
  static void MemoryUnmap(void *addr, size_t len);

  //! Synchronize a memory map
  static bool MemoryFlush(void *addr, size_t len);

  //! Lock the memory region into RAM
  static bool MemoryLock(void *addr, size_t len);

  //! Unlock the memory region in RAM
  static bool MemoryUnlock(void *addr, size_t len);

  //! Warm up a memory region
  static void MemoryWarmup(void *addr, size_t len);

  //! Delete a name and possibly the file it refers to
  static bool Delete(const char *path) {
    return FileHelper::DeleteFile(path);
  }

  //! Delete a name and possibly the file it refers to
  static bool Delete(const std::string &path) {
    return FileHelper::DeleteFile(path.c_str());
  }

  //! Change the name or location of a file
  static bool Rename(const char *oldpath, const char *newpath) {
    return FileHelper::RenameFile(oldpath, newpath);
  }

  //! Change the name or location of a file
  static bool Rename(const std::string &oldpath, const std::string &newpath) {
    return FileHelper::RenameFile(oldpath.c_str(), newpath.c_str());
  }

  //! Retrieve the base name from a path
  static const char *BaseName(const char *path) {
    return FileHelper::BaseName(path);
  }

  //! Retrieve the base name from a path
  static const char *BaseName(const std::string &path) {
    return BaseName(path.c_str());
  }

  //! Make directories' path
  static bool MakePath(const char *path) {
    return FileHelper::MakePath(path);
  }

  //! Make directories' path
  static bool MakePath(const std::string &path) {
    return FileHelper::MakePath(path.c_str());
  }

  //! Remove a file or a directory (includes files & subdirectories)
  static bool RemovePath(const char *path) {
    return FileHelper::RemovePath(path);
  }

  //! Remove a file or a directory (includes files & subdirectories)
  static bool RemovePath(const std::string &path) {
    return FileHelper::RemovePath(path.c_str());
  }

  //! Remove a directory (includes files & subdirectories)
  static bool RemoveDirectory(const char *path) {
    return FileHelper::RemoveDirectory(path);
  }

  //! Remove a directory (includes files & subdirectories)
  static bool RemoveDirectory(const std::string &path) {
    return FileHelper::RemoveDirectory(path.c_str());
  }

  //! Retrieve non-zero if the path exists
  static bool IsExist(const char *path) {
    return FileHelper::IsExist(path);
  }

  //! Retrieve non-zero if the path exists
  static bool IsExist(const std::string &path) {
    return FileHelper::IsExist(path.c_str());
  }

  //! Retrieve non-zero if the path is a regular file
  static bool IsRegular(const char *path) {
    return FileHelper::IsRegular(path);
  }

  //! Retrieve non-zero if the path is a regular file
  static bool IsRegular(const std::string &path) {
    return FileHelper::IsRegular(path.c_str());
  }

  //! Retrieve non-zero if the path is a directory
  static bool IsDirectory(const char *path) {
    return FileHelper::IsDirectory(path);
  }

  //! Retrieve non-zero if the path is a directory
  static bool IsDirectory(const std::string &path) {
    return FileHelper::IsDirectory(path.c_str());
  }

  //! Retrieve non-zero if the path is a symbolic link
  static bool IsSymbolicLink(const char *path) {
    return FileHelper::IsSymbolicLink(path);
  }

  //! Retrieve non-zero if the path is a symbolic link
  static bool IsSymbolicLink(const std::string &path) {
    return FileHelper::IsSymbolicLink(path.c_str());
  }

  //! Retrieve non-zero if two paths are pointing to the same file
  static bool IsSame(const char *path1, const char *path2) {
    return FileHelper::IsSame(path1, path2);
  }

  //! Retrieve non-zero if two paths are pointing to the same file
  static bool IsSame(const std::string &path1, const std::string &path2) {
    return FileHelper::IsSame(path1.c_str(), path2.c_str());
  }

 private:
  //! Disable them
  File(const File &) = delete;
  File &operator=(const File &) = delete;

  //! Members
  NativeHandle native_handle_;
  bool read_only_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/io/mmap_file.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/io/file.h>

namespace zvec {
namespace ailego {

/*! Memory Mapping File
 */
class MMapFile {
 public:
  //! Constructor
  MMapFile(void)
      : read_only_(false), region_(nullptr), region_size_(0), offset_(0) {}

  //! Constructor
  MMapFile(MMapFile &&rhs) {
    read_only_ = rhs.read_only_;
    region_ = rhs.region_;
    region_size_ = rhs.region_size_;
    offset_ = rhs.offset_;
    rhs.read_only_ = false;
    rhs.region_ = nullptr;
    rhs.region_size_ = 0;
    rhs.offset_ = 0;
  }

  //! Destructor
  ~MMapFile(void) {
    this->close();
  }

  //! Assignment
  MMapFile &operator=(MMapFile &&rhs) {
    read_only_ = rhs.read_only_;
    region_ = rhs.region_;
    region_size_ = rhs.region_size_;
    offset_ = rhs.offset_;
    rhs.read_only_ = false;
    rhs.region_ = nullptr;
    rhs.region_size_ = 0;
    rhs.offset_ = 0;
    return *this;
  }

  //! Test if the file is valid
  bool is_valid(void) const {
    return (region_ != nullptr);
  }

  //! Retrieve non-zero if memory region is read only
  bool read_only(void) const {
    return read_only_;
  }

  //! Create a memory mapping file
  bool create(const char *path, size_t len) {
    ailego_false_if_false(!region_ && path);

    File file;
    ailego_false_if_false(file.create(path, len));

    region_ = File::MemoryMap(file.native_handle(), 0, len, File::MMAP_SHARED);
    ailego_false_if_false(region_);

    read_only_ = false;
    region_size_ = len;
    return true;
  }

  //! Create a memory mapping file
  bool create(const std::string &path, size_t len) {
    return this->create(path.c_str(), len);
  }

  //! Open a memory mapping file
  bool open(const char *path, bool rdonly, bool shared) {
    ailego_false_if_false(!region_ && path);

    File file;
    ailego_false_if_false(file.open(path, rdonly, false));

    size_t len = file.size();
    int opts = 0;
    if (rdonly) {
      opts |= File::MMAP_READONLY;
    }
    if (shared) {
      opts |= File::MMAP_SHARED;
    }
    region_ = File::MemoryMap(file.native_handle(), 0, len, opts);
    ailego_false_if_false(region_);

    read_only_ = rdonly;
    region_size_ = len;
    return true;
  }

  //! Open a memory mapping file
  bool open(const std::string &path, bool rdonly, bool shared) {
    return this->open(path.c_str(), rdonly, shared);
  }

  //! Open a memory mapping file
  bool open(const char *path, bool rdonly) {
    return this->open(path, rdonly, false);
  }

  //! Open a memory mapping file
  bool open(const std::string &path, bool rdonly) {
    return this->open(path, rdonly, false);
  }

  //! Close the memory mapping file
  void close(void) {
    File::MemoryUnmap(region_, region_size_);
    region_ = nullptr;
    region_size_ = 0;
    offset_ = 0;
  }

  //! Synchronize memory with physical storage
  bool flush(void) {
    return File::MemoryFlush(region_, region_size_);
  }

  //! Lock the memory region into RAM
  bool lock(void) {
    return File::MemoryLock(region_, region_size_);
  }

  //! Unlock the memory region in RAM
  bool unlock(void) {
    return File::MemoryUnlock(region_, region_size_);
  }

  //! Warm up the memory region
  void warmup(void) {
    File::MemoryWarmup(region_, region_size_);
  }

  //! Reset the file
  void reset(void) {
    offset_ = 0;
  }

  //! Write data into the storage
  size_t write(const void *data, size_t len) {
    if (offset_ + len > region_size_) {
      len = region_size_ - offset_;
    }
    memcpy((uint8_t *)region_ + offset_, data, len);
    offset_ += len;
    return len;
  }

  //! Write data into the storage
  size_t write(size_t off, const void *data, size_t len) {
    if (off + len > region_size_) {
      if (off > region_size_) {
        off = region_size_;
      }
      len = region_size_ - off;
    }
    memcpy((uint8_t *)region_ + off, data, len);
    return len;
  }

  //! Read data from the storage (Zero-copy)
  size_t read(const void **data, size_t len) {
    if (offset_ + len > region_size_) {
      len = region_size_ - offset_;
    }
    *data = (uint8_t *)region_ + offset_;
    offset_ += len;
    return len;
  }

  //! Read data from the storage (Zero-copy)
  size_t read(size_t off, const void **data, size_t len) {
    if (off + len > region_size_) {
      if (off > region_size_) {
        off = region_size_;
      }
      len = region_size_ - off;
    }
    *data = (uint8_t *)region_ + off;
    return len;
  }

  //! Read data from the storage
  size_t read(void *data, size_t len) {
    if (offset_ + len > region_size_) {
      len = region_size_ - offset_;
    }
    memcpy(data, (uint8_t *)region_ + offset_, len);
    offset_ += len;
    return len;
  }

  //! Read data from the storage
  size_t read(size_t off, void *data, size_t len) {
    if (off + len > region_size_) {
      if (off > region_size_) {
        off = region_size_;
      }
      len = region_size_ - off;
    }
    memcpy(data, (uint8_t *)region_ + off, len);
    return len;
  }

  //! Retrieve memory region of file
  void *region(void) const {
    return region_;
  }

  //! Retrieve region size of file
  size_t size(void) const {
    return region_size_;
  }

  //! Retrieve offset of file
  size_t offset(void) const {
    return offset_;
  }

 private:
  //! Disable them
  MMapFile(const MMapFile &) = delete;
  MMapFile &operator=(const MMapFile &) = delete;

  //! Members
  bool read_only_;
  void *region_;
  size_t region_size_;
  size_t offset_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/logger/logger.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdarg>
#include <memory>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/pattern/factory.h>

//! Register Index Logger
#define FACTORY_REGISTER_LOGGER_ALIAS(__NAME__, __IMPL__, ...)      \
  AILEGO_FACTORY_REGISTER(__NAME__, zvec::ailego::Logger, __IMPL__, \
                          ##__VA_ARGS__)

//! Register Index Logger
#define FACTORY_REGISTER_LOGGER(__IMPL__, ...) \
  FACTORY_REGISTER_LOGGER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

#define PROXIMA_LOG_IMPL(level, format, ...)                             \
  do {                                                                   \
    if (zvec::ailego::LoggerBroker::IsLevelEnabled(level)) {             \
      zvec::ailego::LoggerBroker::Log(level, __FILE__, __LINE__, format, \
                                      ##__VA_ARGS__);                    \
    }                                                                    \
  } while (0)

//! Log Debug Message
#ifndef LOG_DEBUG
#define LOG_DEBUG(format, ...) \
  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_DEBUG, format, ##__VA_ARGS__)
#endif

//! Log Information Message
#ifndef LOG_INFO
#define LOG_INFO(format, ...) \
  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_INFO, format, ##__VA_ARGS__)
#endif

//! Log Warn Message
#ifndef LOG_WARN
#define LOG_WARN(format, ...) \
  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_WARN, format, ##__VA_ARGS__)
#endif

//! Log Error Message
#ifndef LOG_ERROR
#define LOG_ERROR(format, ...) \
  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_ERROR, format, ##__VA_ARGS__)
#endif

//! Log Fatal Message
#ifndef LOG_FATAL
#define LOG_FATAL(format, ...) \
  PROXIMA_LOG_IMPL(zvec::ailego::Logger::LEVEL_FATAL, format, ##__VA_ARGS__)
#endif

namespace zvec {
namespace ailego {

/*! Index Logger
 */
struct Logger {
  //! Index Logger Pointer
  typedef std::shared_ptr<Logger> Pointer;

  static const int LEVEL_DEBUG;
  static const int LEVEL_INFO;
  static const int LEVEL_WARN;
  static const int LEVEL_ERROR;
  static const int LEVEL_FATAL;

  //! Retrieve string of level
  static const char *LevelString(int level) {
    static const char *info[] = {"DEBUG", " INFO", " WARN", "ERROR", "FATAL"};
    if (level < (int)(sizeof(info) / sizeof(info[0]))) {
      return info[level];
    }
    return "";
  }

  //! Retrieve symbol of level
  static char LevelSymbol(int level) {
    static const char info[5] = {'D', 'I', 'W', 'E', 'F'};
    if (level < (int)(sizeof(info) / sizeof(info[0]))) {
      return info[level];
    }
    return ' ';
  }

  //! Destructor
  virtual ~Logger(void) {}

  //! Initialize Logger
  virtual int init(const Params &params) = 0;

  //! Cleanup Logger
  virtual int cleanup(void) = 0;

  //! Log Message
  virtual void log(int level, const char *file, int line, const char *format,
                   va_list args) = 0;
};

/*! Index Logger Broker
 */
class LoggerBroker {
 public:
  //! Register Logger
  static Logger::Pointer Register(Logger::Pointer logger) {
    Logger::Pointer ret = std::move(logger_);
    logger_ = std::move(logger);
    return ret;
  }

  //! Register Logger with init params
  static int Register(Logger::Pointer logger, const ailego::Params &params) {
    //! Cleanup the previous, before initizlizing the new one
    if (logger_) {
      logger_->cleanup();
    }
    logger_ = std::move(logger);
    return logger_->init(params);
  }

  //! Unregister Logger
  static void Unregister(void) {
    logger_ = nullptr;
  }

  //! Set Level of Logger
  static void SetLevel(int level) {
    logger_level_ = level;
  }

  //! Check if log level is enabled
  static bool IsLevelEnabled(int level) {
    return logger_level_ <= level && logger_;
  }

  //! Log Message
  __attribute__((format(printf, 4, 5))) static void Log(
      int level, const char *file, int line, const char *format, ...) {
    if (IsLevelEnabled(level)) {
      va_list args;
      va_start(args, format);
      logger_->log(level, file, line, format, args);
      va_end(args);
    }
  }

 private:
  //! Disable them
  LoggerBroker(void) = delete;
  LoggerBroker(const LoggerBroker &) = delete;
  LoggerBroker(LoggerBroker &&) = delete;

  //! Members
  static int logger_level_;
  static Logger::Pointer logger_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/math_batch/utils.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstddef>
namespace zvec::ailego::DistanceBatch {

typedef void (*DistanceBatchQueryPreprocessFunc)(void *query, size_t dim);

}  // namespace zvec::ailego::DistanceBatch

================================================
FILE: src/include/zvec/ailego/parallel/thread_pool.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <condition_variable>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
#include <vector>
#include <zvec/ailego/pattern/closure.h>

namespace zvec {
namespace ailego {

/*! Thread Pool
 */
class ThreadPool {
 public:
  /*! Thread Pool Task Group
   */
  class TaskGroup : public std::enable_shared_from_this<TaskGroup> {
   public:
    using Pointer = std::shared_ptr<TaskGroup>;

    //! Constructor
    TaskGroup(ThreadPool *pool) : pool_(pool) {}

    //! Push a task to the queue
    void enqueue(const ClosureHandler &handle) {
      pool_->enqueue(handle, this->shared_from_this(), nullptr);
    }

    //! Push a task to the queue
    void enqueue(ClosureHandler &&handle) {
      pool_->enqueue(std::move(handle), this->shared_from_this(), nullptr);
    }

    //! Submit a task to the queue
    void submit(ClosureHandler &&handle) {
      return enqueue_and_wake(std::move(handle));
    }

    //! Push a task to the queue
    void enqueue_and_wake(const ClosureHandler &handle) {
      pool_->enqueue_and_wake(handle, this->shared_from_this(), nullptr);
    }

    //! Push a task to the queue
    void enqueue_and_wake(ClosureHandler &&handle) {
      pool_->enqueue_and_wake(std::move(handle), this->shared_from_this(),
                              nullptr);
    }

    //! Execute a function as a task in pool
    template <typename... TArgs>
    void execute_and_wait(TArgs &&...args) {
      ThreadPool::TaskControl ctrl;
      pool_->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...),
                              this->shared_from_this(), &ctrl);
      ctrl.wait();
    }

    //! Execute a function as a task in pool
    template <typename... TArgs>
    void execute(TArgs &&...args) {
      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));
    }

    //! Wait until all tasks in group finished
    void wait_finish(void) {
      std::unique_lock<std::mutex> lock(mutex_);
      cond_.wait(lock, [this]() { return this->is_finished(); });
    }

    //! Check if the group is finished
    bool is_finished(void) const {
      return (active_count_ == 0 && pending_count_ == 0);
    }

    //! Retrieve count of pending tasks in group
    size_t pending_count(void) const {
      return pending_count_.load(std::memory_order_relaxed);
    }

    //! Retrieve count of active tasks in group
    size_t active_count(void) const {
      return active_count_.load(std::memory_order_relaxed);
    }

   protected:
    friend class ThreadPool;

    //! Mark a task enqueued
    void mark_task_enqueued(void) {
      ++pending_count_;
    }

    //! Mark a task actived
    void mark_task_actived(void) {
      std::lock_guard<std::mutex> lock(mutex_);
      ++active_count_;
      --pending_count_;
    }

    //! Notify a task finished
    void notify(void) {
      std::lock_guard<std::mutex> lock(mutex_);
      if (--active_count_ == 0 && pending_count_ == 0) {
        cond_.notify_all();
      }
    }

   private:
    //! Members
    ThreadPool *pool_{nullptr};
    std::atomic_uint active_count_{0};
    std::atomic_uint pending_count_{0};
    std::mutex mutex_{};
    std::condition_variable cond_{};
  };

  //! Constructor
  explicit ThreadPool(uint32_t size, bool binding);

  //! Constructor
  explicit ThreadPool(bool binding)
      : ThreadPool{std::max(std::thread::hardware_concurrency(), 1u), binding} {
  }

  //! Constructor
  ThreadPool(void) : ThreadPool{false} {}

  //! Destructor
  ~ThreadPool(void) {
    this->stop();

    // Join all threads
    for (auto it = pool_.begin(); it != pool_.end(); ++it) {
      if (it->joinable()) {
        it->join();
      }
    }
  }

  //! Retrieve thread count in pool
  size_t count(void) const {
    return pool_.size();
  }

  //! Stop all threads
  void stop(void) {
    // Set stop flag as ture, then wake all threads
    stopping_ = true;
    std::lock_guard<std::mutex> lock(queue_mutex_);
    work_cond_.notify_all();
  }

  //! Push a task to the queue
  void enqueue(const ClosureHandler &handle) {
    this->enqueue(handle, nullptr);
  }

  //! Push a task to the queue
  void enqueue(ClosureHandler &&handle) {
    this->enqueue(std::move(handle), nullptr);
  }

  //! Push a task to the queue
  void enqueue_and_wake(const ClosureHandler &handle) {
    this->enqueue_and_wake(handle, nullptr);
  }

  //! Push a task to the queue
  void enqueue_and_wake(ClosureHandler &&handle) {
    this->enqueue_and_wake(std::move(handle), nullptr);
  }

  //! Execute a function as a task in pool
  template <typename... TArgs>
  void execute_and_wait(TArgs &&...args) {
    ThreadPool::TaskControl ctrl;
    this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...), &ctrl);
    ctrl.wait();
  }

  //! Execute a function as a task in pool
  template <typename... TArgs>
  void execute(TArgs &&...args) {
    this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));
  }

  //! Wake any one thread
  void wake_any(void) {
    std::lock_guard<std::mutex> lock(queue_mutex_);
    work_cond_.notify_one();
  }

  //! Wake all threads
  void wake_all(void) {
    std::lock_guard<std::mutex> lock(queue_mutex_);
    work_cond_.notify_all();
  }

  //! Wait until all threads finished processing
  void wait_finish(void) {
    std::unique_lock<std::mutex> lock(wait_mutex_);
    finished_cond_.wait(lock, [this]() { return this->is_finished(); });
  }

  //! Wait until all threads stopped processing
  void wait_stop(void) {
    std::unique_lock<std::mutex> lock(wait_mutex_);
    stopped_cond_.wait(lock, [this]() { return this->is_stopped(); });
  }

  //! Make a task group
  TaskGroup::Pointer make_group(void) {
    return std::make_shared<TaskGroup>(this);
  }

  //! Check if the pool is finished
  bool is_finished(void) const {
    return (active_count_ == 0 && pending_count_ == 0);
  }

  //! Check if the pool is stopped
  bool is_stopped(void) const {
    return (worker_count_ == 0);
  }

  //! Retrieve count of worker in pool
  size_t worker_count(void) const {
    return worker_count_.load(std::memory_order_relaxed);
  }

  //! Retrieve count of pending tasks in pool
  size_t pending_count(void) const {
    return pending_count_.load(std::memory_order_relaxed);
  }

  //! Retrieve count of active tasks in pool
  size_t active_count(void) const {
    return active_count_.load(std::memory_order_relaxed);
  }

  //! Get the thread index via thread id
  int indexof(const std::thread::id &thread_id) const {
    for (size_t i = 0; i < pool_.size(); ++i) {
      if (pool_[i].get_id() == thread_id) {
        return static_cast<int>(i);
      }
    }
    return -1;
  }

  //! Get the current work thread index
  int indexof_this(void) const {
    return this->indexof(std::this_thread::get_id());
  }

  //! Bind threads to processors
  void bind(void);

  //! Unbind threads of processors
  void unbind(void);

 protected:
  //! Thread task control
  class TaskControl {
   public:
    //! Notify task finished
    void notify(void) {
      finished_ = true;
      std::lock_guard<std::mutex> lock(mutex_);
      cond_.notify_one();
    }

    //! Wait until task finished
    void wait(void) {
      std::unique_lock<std::mutex> lock(mutex_);
      cond_.wait(lock, [this]() { return finished_.load(); });
    }

   private:
    std::atomic_bool finished_{false};
    std::mutex mutex_{};
    std::condition_variable cond_{};
  };

  //! Thread task
  struct Task {
    // Constructor
    Task(const ClosureHandler &h, TaskControl *c) : handle(h), control(c) {}

    // Constructor
    Task(ClosureHandler &&h, TaskControl *c)
        : handle(std::move(h)), control(c) {}

    // Constructor
    Task(const ClosureHandler &h, TaskGroup::Pointer &&g, TaskControl *c)
        : handle(h), group(std::move(g)), control(c) {}

    // Constructor
    Task(ClosureHandler &&h, TaskGroup::Pointer &&g, TaskControl *c)
        : handle(std::move(h)), group(std::move(g)), control(c) {}

    // Constructor
    Task(void) {}

    //! Members
    ClosureHandler handle{};
    TaskGroup::Pointer group{nullptr};
    TaskControl *control{nullptr};
  };

  //! Thread worker callback
  void worker(void);

  //! Pick a task from queue
  bool picking(Task *task);

  //! Push a task to the queue
  template <typename T>
  void enqueue(T &&handle, TaskControl *ctrl) {
    if (handle) {
      std::lock_guard<std::mutex> lock(queue_mutex_);
      ++pending_count_;
      queue_.emplace(std::forward<T>(handle), ctrl);
    }
  }

  //! Push a task to the queue with group
  template <typename T>
  void enqueue(T &&handle, TaskGroup::Pointer &&group, TaskControl *ctrl) {
    if (handle) {
      std::lock_guard<std::mutex> lock(queue_mutex_);
      ++pending_count_;
      group->mark_task_enqueued();
      queue_.emplace(std::forward<T>(handle), std::move(group), ctrl);
    }
  }

  //! Push a task to the queue
  template <typename T>
  void enqueue_and_wake(T &&handle, TaskControl *ctrl) {
    if (handle) {
      std::lock_guard<std::mutex> lock(queue_mutex_);
      ++pending_count_;
      queue_.emplace(std::forward<T>(handle), ctrl);
      work_cond_.notify_one();
    }
  }

  //! Push a task to the queue with group
  template <typename T>
  void enqueue_and_wake(T &&handle, TaskGroup::Pointer &&group,
                        TaskControl *ctrl) {
    if (handle) {
      std::lock_guard<std::mutex> lock(queue_mutex_);
      ++pending_count_;
      group->mark_task_enqueued();
      queue_.emplace(std::forward<T>(handle), std::move(group), ctrl);
      work_cond_.notify_one();
    }
  }

 private:
  //! Disable them
  ThreadPool(const ThreadPool &) = delete;
  ThreadPool(ThreadPool &&) = delete;
  ThreadPool &operator=(const ThreadPool &) = delete;

  //! Members
  std::queue<Task> queue_{};
  std::atomic_bool stopping_{false};
  std::atomic_uint worker_count_{0};
  std::atomic_uint active_count_{0};
  std::atomic_uint pending_count_{0};
  std::mutex queue_mutex_{};
  std::mutex wait_mutex_{};
  std::condition_variable work_cond_{};
  std::condition_variable finished_cond_{};
  std::condition_variable stopped_cond_{};
  std::vector<std::thread> pool_{};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/parallel/thread_queue.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <condition_variable>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
#include <vector>
#include <zvec/ailego/hash/jump_hash.h>
#include <zvec/ailego/pattern/closure.h>

namespace zvec {
namespace ailego {

/*! Thread Queue (One Thread One Queue)
 */
class ThreadQueue {
 public:
  /*! Thread Worker (One Thread One Worker)
   */
  class ThreadWorker {
   public:
    //! Constructor
    ThreadWorker(ThreadQueue *owner) : owner_(owner) {}

    //! Destructor
    ~ThreadWorker(void) {
      // Join the current thread
      if (thread_.joinable()) {
        thread_.join();
      }
    }

    //! Push a task to the queue
    template <typename T>
    void enqueue(T &&handle) {
      std::lock_guard<std::mutex> lock(mutex_);
      queue_.emplace(std::forward<T>(handle));
    }

    //! Push a task to the queue
    template <typename T>
    void enqueue_and_wake(T &&handle) {
      std::lock_guard<std::mutex> lock(mutex_);
      queue_.emplace(std::forward<T>(handle));
      cond_.notify_one();
    }

    //! Execute a function as a task
    template <typename... TArgs>
    void execute(TArgs &&...args) {
      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));
    }

    //! Push a task to the queue with high priority
    template <typename T>
    void enqueue_high_priority(T &&handle) {
      std::lock_guard<std::mutex> lock(mutex_);
      queue_high_priority_.emplace(std::forward<T>(handle));
    }

    //! Push a task to the queue with high priority and wake
    template <typename T>
    void enqueue_high_priority_and_wake(T &&handle) {
      std::lock_guard<std::mutex> lock(mutex_);
      queue_high_priority_.emplace(std::forward<T>(handle));
      cond_.notify_one();
    }

    //! Execute a task in high priority
    template <typename... TArgs>
    void execute_high_priority(TArgs &&...args) {
      this->enqueue_and_wake(Closure::New(std::forward<TArgs>(args)...));
    }

    //! Wake the thread
    void wake(void) {
      std::lock_guard<std::mutex> lock(mutex_);
      cond_.notify_one();
    }

    //! Notify thread stopped
    void stop(void) {
      // Set stop flag as ture, then wake the thread
      stopping_ = true;
      std::lock_guard<std::mutex> lock(mutex_);
      cond_.notify_one();
    }

   protected:
    //! Thread worker callback
    void worker(void) {
      owner_->mark_worker_started();

      ClosureHandler task;
      while (this->picking(&task)) {
        // Run the task
        if (task) {
          task->run();
          task = nullptr;
        }
      }
      owner_->mark_worker_stopped();
    }

    //! Pick a task from queue
    bool picking(ClosureHandler *task) {
      std::unique_lock<std::mutex> latch(mutex_);
      cond_.wait(latch, [this]() {
        return (queue_.size() > 0 || queue_high_priority_.size() > 0 ||
                stopping_);
      });
      if (stopping_) {
        return false;
      }

      if (!queue_high_priority_.empty()) {
        *task = std::move(queue_high_priority_.front());
        queue_high_priority_.pop();
      } else {
        *task = std::move(queue_.front());
        queue_.pop();
      }

      return true;
    }

   private:
    //! Disable them
    ThreadWorker(void) = delete;
    ThreadWorker(ThreadWorker &&) = delete;
    ThreadWorker(const ThreadWorker &) = delete;
    ThreadWorker &operator=(const ThreadWorker &) = delete;

    //! Members
    ThreadQueue *owner_{nullptr};
    std::queue<ClosureHandler> queue_{};
    std::queue<ClosureHandler> queue_high_priority_{};
    std::atomic_bool stopping_{false};
    std::mutex mutex_{};
    std::condition_variable cond_{};
    std::thread thread_{&ThreadWorker::worker, this};
  };

  //! Constructor
  ThreadQueue(void)
      : ThreadQueue{std::max(std::thread::hardware_concurrency(), 1u)} {}

  //! Constructor
  explicit ThreadQueue(uint32_t size) {
    for (uint32_t i = 0u; i < size; ++i) {
      threads_.emplace_back(new ThreadWorker(this));
    }
  }

  //! Destructor
  ~ThreadQueue(void) {
    this->stop();
    // Cleanup threads
    for (auto it = threads_.begin(); it != threads_.end(); ++it) {
      delete *it;
    }
  }

  //! operator []
  ThreadWorker &operator[](size_t i) {
    return *(threads_[i]);
  }

  //! Stop the thread
  void stop(void) {
    // Stop all workers
    for (auto it = threads_.begin(); it != threads_.end(); ++it) {
      (*it)->stop();
    }
  }

  //! Wake all worker threads
  void wake(void) {
    for (auto it = threads_.begin(); it != threads_.end(); ++it) {
      (*it)->wake();
    }
  }

  //! Wait until all threads stopped processing
  void wait_stop(void) {
    std::unique_lock<std::mutex> lock(wait_mutex_);
    stopped_cond_.wait(lock, [this]() { return this->is_stopped(); });
  }

  //! Check if the pool is stopped
  bool is_stopped(void) const {
    return (worker_count_ == 0);
  }

  //! Retrieve count of worker in queue
  size_t worker_count(void) const {
    return worker_count_.load(std::memory_order_relaxed);
  }

  //! Retrieve thread count in queue
  size_t count(void) const {
    return threads_.size();
  }

  //! Push a task to the queue
  template <typename T>
  void enqueue(uint64_t key, T &&handle) {
    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]->enqueue(
        std::forward<T>(handle));
  }

  //! Push a task to the queue
  template <typename T>
  void enqueue_and_wake(uint64_t key, T &&handle) {
    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]
        ->enqueue_and_wake(std::forward<T>(handle));
  }

  //! Execute a function as a task in pool
  template <typename... TArgs>
  void execute(uint64_t key, TArgs &&...args) {
    this->enqueue_and_wake(key, Closure::New(std::forward<TArgs>(args)...));
  }

  //! Push a task to the queue with high priority
  template <typename T>
  void enqueue_high_priority(uint64_t key, T &&handle) {
    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]
        ->enqueue_high_priority(std::forward<T>(handle));
  }

  //! Push a task to the queue with high priority and wake
  template <typename T>
  void enqueue_high_priority_and_wake(uint64_t key, T &&handle) {
    threads_[JumpHash(key, static_cast<int32_t>(threads_.size()))]
        ->enqueue_high_priority_and_wake(std::forward<T>(handle));
  }

  //! Execute a function as a task in pool with high priority
  template <typename... TArgs>
  void execute_high_priority(uint64_t key, TArgs &&...args) {
    this->enqueue_high_priority_and_wake(
        key, Closure::New(std::forward<TArgs>(args)...));
  }

 protected:
  //! Mark a worker started
  void mark_worker_started(void) {
    ++worker_count_;
  }

  //! Mark a worker stopped
  void mark_worker_stopped(void) {
    // Decrease count of workers
    std::lock_guard<std::mutex> lock(wait_mutex_);
    if (--worker_count_ == 0) {
      stopped_cond_.notify_all();
    }
  }

 private:
  //! Disable them
  ThreadQueue(const ThreadQueue &) = delete;
  ThreadQueue(ThreadQueue &&) = delete;
  ThreadQueue &operator=(const ThreadQueue &) = delete;

  //! Members
  std::atomic_uint worker_count_{0};
  std::mutex wait_mutex_{};
  std::condition_variable stopped_cond_{};
  std::vector<ThreadWorker *> threads_{};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/pattern/closure.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <tuple>
#include <type_traits>

namespace zvec {
namespace ailego {

/*! Callback Validator (declaration)
 */
template <typename TFunc>
struct CallbackValidator;

/*! Callback Validator (function pointer)
 */
template <typename R, typename... TParams>
struct CallbackValidator<R (*)(TParams...)> {
  enum { Value = true };
};

/*! Callback Validator (function)
 */
template <typename R, typename... TParams>
struct CallbackValidator<R(TParams...)> : CallbackValidator<R (*)(TParams...)> {
};

/*! Callback Validator (member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackValidator<R (T::*)(TParams...)>
    : CallbackValidator<R (*)(TParams...)> {};

/*! Callback Validator (constable member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackValidator<R (T::*)(TParams...) const>
    : CallbackValidator<R (*)(TParams...)> {};

/*! Callback Validator (volatile member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackValidator<R (T::*)(TParams...) volatile>
    : CallbackValidator<R (*)(TParams...)> {};

/*! Callback Validator (constable volatile member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackValidator<R (T::*)(TParams...) const volatile>
    : CallbackValidator<R (*)(TParams...)> {};

/*! Callback Validator
 */
template <typename TFunc>
struct CallbackValidator {
 protected:
  using FalseType = long;
  using TrueType = char;

  //! Check if the class contains operator()
  template <typename T>
  static TrueType &Validate(decltype(&T::operator()));

  //! Check if the class contains operator()
  template <typename T>
  static FalseType &Validate(...);

 public:
  enum { Value = (sizeof(Validate<TFunc>(nullptr)) == sizeof(TrueType)) };
};

/*! Callback Validator (left reference)
 */
template <typename TFunc>
struct CallbackValidator<TFunc &> : CallbackValidator<TFunc> {};

/*! Callback Validator (right reference)
 */
template <typename TFunc>
struct CallbackValidator<TFunc &&> : CallbackValidator<TFunc> {};

/*! Callback Traits (declaration)
 */
template <typename TFunc>
struct CallbackTraits;

/*! Callback Traits (function pointer)
 */
template <typename R, typename... TParams>
struct CallbackTraits<R (*)(TParams...)> {
  using Type = R (*)(TParams...);
  using ResultType = R;
  using TupleType = std::tuple<typename std::decay<TParams>::type...>;

  //! Callback Traits Parameter
  template <size_t N>
  struct Parameter {
    using Type = typename std::tuple_element<N, std::tuple<TParams...>>::type;
  };

  //! Number of parameters
  enum { Arity = sizeof...(TParams) };
};

/*! Callback Traits (function)
 */
template <typename R, typename... TParams>
struct CallbackTraits<R(TParams...)> : CallbackTraits<R (*)(TParams...)> {
  using Type = R (*)(TParams...);
};

/*! Callback Traits (member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackTraits<R (T::*)(TParams...)>
    : CallbackTraits<R (*)(TParams...)> {
  using Type = R (T::*)(TParams...);
};

/*! Callback Traits (constable member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackTraits<R (T::*)(TParams...) const>
    : CallbackTraits<R (*)(TParams...)> {
  using Type = R (T::*)(TParams...) const;
};

/*! Callback Traits (volatile member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackTraits<R (T::*)(TParams...) volatile>
    : CallbackTraits<R (*)(TParams...)> {
  using Type = R (T::*)(TParams...) volatile;
};

/*! Callback Traits (constable volatile member function pointer)
 */
template <typename T, typename R, typename... TParams>
struct CallbackTraits<R (T::*)(TParams...) const volatile>
    : CallbackTraits<R (*)(TParams...)> {
  using Type = R (T::*)(TParams...) const volatile;
};

/*! Callback Traits
 */
template <typename TFunc>
struct CallbackTraits : CallbackTraits<decltype(&TFunc::operator())> {
  using Type = TFunc;
};

/*! Callback Traits (left reference)
 */
template <typename TFunc>
struct CallbackTraits<TFunc &> : CallbackTraits<TFunc> {};

/*! Callback Traits (right reference)
 */
template <typename TFunc>
struct CallbackTraits<TFunc &&> : CallbackTraits<TFunc> {};

/*! Callback Functor
 */
template <typename TFunc>
struct CallbackFunctor {
  using Traits = CallbackTraits<TFunc>;
  using Type = typename Traits::Type;
  using ResultType = typename Traits::ResultType;
  using TupleType = typename Traits::TupleType;

  //! Tuple Index Maker
  template <size_t N, size_t... I>
  struct TupleIndexMaker : TupleIndexMaker<N - 1, N - 1, I...> {};

  //! Tuple Index
  template <size_t...>
  struct TupleIndex {};

  //! Tuple Index Maker (special)
  template <size_t... I>
  struct TupleIndexMaker<0, I...> {
    using Type = TupleIndex<I...>;
  };

  //! Run the callback function
  template <size_t... I>
  static ResultType Run(Type &impl, TupleType &tuple, TupleIndex<I...>) {
    return (impl)(std::forward<typename Traits::template Parameter<I>::Type>(
        std::get<I>(tuple))...);
  }

  //! Run the callback member function
  template <typename T, size_t... I>
  static ResultType Run(T *obj, Type &impl, TupleType &tuple,
                        TupleIndex<I...>) {
    return (obj->*impl)(
        std::forward<typename Traits::template Parameter<I>::Type>(
            std::get<I>(tuple))...);
  }

  //! Run the callback function
  static ResultType Run(Type &impl, TupleType &tuple) {
    return Run(impl, tuple, typename TupleIndexMaker<Traits::Arity>::Type());
  }

  //! Run the callback member function
  template <typename T>
  static ResultType Run(T *obj, Type &impl, TupleType &tuple) {
    return Run(obj, impl, tuple,
               typename TupleIndexMaker<Traits::Arity>::Type());
  }
};

/*! Callback Object
 */
template <typename T>
struct CallbackObject {
  using Type = typename std::remove_reference<T>::type;
};

/*! Callback (declaration)
 */
template <typename R>
class Callback;

/*! Callback (void)
 */
template <>
class Callback<void> {
 public:
  using Pointer = std::shared_ptr<Callback<void>>;

  //! Destructor
  virtual ~Callback(void) {}

  //! Function call
  void operator()(void) {
    this->run();
  }

  //! Run the callback function
  virtual void run(void) = 0;

  //! Create callback closure (member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static typename Callback<R>::Pointer New(T *obj, R (T::*impl)(TParams...),
                                           TArgs &&...args);

  //! Create callback closure (constable member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static typename Callback<R>::Pointer New(const T *obj,
                                           R (T::*impl)(TParams...) const,
                                           TArgs &&...args);

  //! Create callback closure (volatile member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static typename Callback<R>::Pointer New(volatile T *obj,
                                           R (T::*impl)(TParams...) volatile,
                                           TArgs &&...args);

  //! Create callback closure (constable volatile member function pointer)
  template <typename T, typename R, typename... TParams, typename... TArgs>
  static typename Callback<R>::Pointer New(const volatile T *obj,
                                           R (T::*impl)(TParams...)
                                               const volatile,
                                           TArgs &&...args);

  //! Create callback closure (function)
  template <
      typename TFunc, typename... TArgs,
      typename = typename std::enable_if<CallbackValidator<TFunc>::Value>::type>
  static typename Callback<typename CallbackTraits<TFunc>::ResultType>::Pointer
  New(TFunc &&impl, TArgs &&...args);
};

/*! Callback
 */
template <typename R>
class Callback : public Callback<void> {
 public:
  using Pointer = std::shared_ptr<Callback<R>>;
  using Callback<void>::run;

  //! Function call
  void operator()(void) {
    this->run();
  }

  //! Function call with return
  void operator()(R *r) {
    this->run(r);
  }

  //! Run the callback function
  virtual void run(R *) = 0;

 protected:
  //! Constructor
  Callback(void) {};
};

/*! Callback Implementation
 */
template <typename T, typename R, typename TFunc>
class CallbackImpl : public Callback<R> {
 public:
  using Object = CallbackObject<T>;
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Object::Type *obj, const typename Functor::Type &impl,
               TArgs &&...args)
      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Object::Type *obj, typename Functor::Type &&impl,
               TArgs &&...args)
      : obj_(obj),
        impl_(std::move(impl)),
        tuple_(std::forward<TArgs>(args)...) {}

  //! Run the callback function
  void run(void) override {
    Functor::Run(obj_, impl_, tuple_);
  }

  //! Run the callback function
  void run(R *r) override {
    *r = Functor::Run(obj_, impl_, tuple_);
  }

 protected:
  //! Disable them
  CallbackImpl(void) = delete;
  CallbackImpl(const CallbackImpl &) = delete;
  CallbackImpl(CallbackImpl &&) = delete;
  CallbackImpl &operator=(const CallbackImpl &) = delete;

 private:
  typename Object::Type *obj_;
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
};

/*! Callback Implementation
 */
template <typename T, typename TFunc>
class CallbackImpl<T, void, TFunc> : public Callback<void> {
 public:
  using Object = CallbackObject<T>;
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Object::Type *obj, const typename Functor::Type &impl,
               TArgs &&...args)
      : obj_(obj), impl_(impl), tuple_(std::forward<TArgs>(args)...) {}

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Object::Type *obj, typename Functor::Type &&impl,
               TArgs &&...args)
      : obj_(obj),
        impl_(std::move(impl)),
        tuple_(std::forward<TArgs>(args)...) {}

  //! Run the callback function
  void run(void) override {
    Functor::Run(obj_, impl_, tuple_);
  }

 protected:
  //! Disable them
  CallbackImpl(void) = delete;
  CallbackImpl(const CallbackImpl &) = delete;
  CallbackImpl(CallbackImpl &&) = delete;
  CallbackImpl &operator=(const CallbackImpl &) = delete;

 private:
  typename Object::Type *obj_;
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
};

/*! Callback Implementation
 */
template <typename R, typename TFunc>
class CallbackImpl<void, R, TFunc> : public Callback<R> {
 public:
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(const typename Functor::Type &impl, TArgs &&...args)
      : impl_(impl), tuple_(std::forward<TArgs>(args)...) {}

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Functor::Type &&impl, TArgs &&...args)
      : impl_(std::move(impl)), tuple_(std::forward<TArgs>(args)...) {}

  //! Run the callback function
  void run(void) override {
    Functor::Run(impl_, tuple_);
  }

  //! Run the callback function
  void run(R *r) override {
    *r = Functor::Run(impl_, tuple_);
  }

 protected:
  //! Disable them
  CallbackImpl(void) = delete;
  CallbackImpl(const CallbackImpl &) = delete;
  CallbackImpl(CallbackImpl &&) = delete;
  CallbackImpl &operator=(const CallbackImpl &) = delete;

 private:
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
};

/*! Callback Implementation
 */
template <typename TFunc>
class CallbackImpl<void, void, TFunc> : public Callback<void> {
 public:
  using Functor = CallbackFunctor<TFunc>;

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(const typename Functor::Type &impl, TArgs &&...args)
      : impl_(impl), tuple_(std::forward<TArgs>(args)...) {}

  //! Constructor
  template <typename... TArgs>
  CallbackImpl(typename Functor::Type &&impl, TArgs &&...args)
      : impl_(std::move(impl)), tuple_(std::forward<TArgs>(args)...) {}

  //! Run the callback function
  void run(void) override {
    Functor::Run(impl_, tuple_);
  }

 protected:
  //! Disable them
  CallbackImpl(void) = delete;
  CallbackImpl(const CallbackImpl &) = delete;
  CallbackImpl(CallbackImpl &&) = delete;
  CallbackImpl &operator=(const CallbackImpl &) = delete;

 private:
  typename Functor::Type impl_;
  typename Functor::TupleType tuple_;
};

//! Create callback closure (member function pointer)
template <typename T, typename R, typename... TParams, typename... TArgs>
typename Callback<R>::Pointer Callback<void>::New(T *obj,
                                                  R (T::*impl)(TParams...),
                                                  TArgs &&...args) {
  return std::make_shared<CallbackImpl<T, R, decltype(impl)>>(
      obj, impl, std::forward<TArgs>(args)...);
}

//! Create callback closure (constable member function pointer)
template <typename T, typename R, typename... TParams, typename... TArgs>
typename Callback<R>::Pointer Callback<void>::New(const T *obj,
                                                  R (T::*impl)(TParams...)
                                                      const,
                                                  TArgs &&...args) {
  return std::make_shared<CallbackImpl<const T, R, decltype(impl)>>(
      obj, impl, std::forward<TArgs>(args)...);
}

//! Create callback closure (volatile member function pointer)
template <typename T, typename R, typename... TParams, typename... TArgs>
typename Callback<R>::Pointer Callback<void>::New(
    volatile T *obj, R (T::*impl)(TParams...) volatile, TArgs &&...args) {
  return std::make_shared<CallbackImpl<volatile T, R, decltype(impl)>>(
      obj, impl, std::forward<TArgs>(args)...);
}

//! Create callback closure (constable volatile member function pointer)
template <typename T, typename R, typename... TParams, typename... TArgs>
typename Callback<R>::Pointer Callback<void>::New(const volatile T *obj,
                                                  R (T::*impl)(TParams...)
                                                      const volatile,
                                                  TArgs &&...args) {
  return std::make_shared<CallbackImpl<const volatile T, R, decltype(impl)>>(
      obj, impl, std::forward<TArgs>(args)...);
}

//! Create callback closure (function)
template <typename TFunc, typename... TArgs, typename>
typename Callback<typename CallbackTraits<TFunc>::ResultType>::Pointer
Callback<void>::New(TFunc &&impl, TArgs &&...args) {
  return std::make_shared<CallbackImpl<
      void, typename CallbackTraits<TFunc>::ResultType, decltype(impl)>>(
      std::forward<TFunc>(impl), std::forward<TArgs>(args)...);
}

//! Callback Handler
template <typename R>
using CallbackHandler = typename Callback<R>::Pointer;

//! Closure
using Closure = Callback<void>;

//! Closure Handler
using ClosureHandler = Closure::Pointer;

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/pattern/expected.hpp
================================================
///
// expected - An implementation of std::expected with extensions
// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama)
//
// Documentation available at http://tl.tartanllama.xyz/
//
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to the
// public domain worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication
// along with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
///

#ifndef TL_EXPECTED_HPP
#define TL_EXPECTED_HPP

#define TL_EXPECTED_VERSION_MAJOR 1
#define TL_EXPECTED_VERSION_MINOR 2
#define TL_EXPECTED_VERSION_PATCH 0

#include <exception>
#include <functional>
#include <type_traits>
#include <utility>

#if defined(__EXCEPTIONS) || defined(_CPPUNWIND)
#define TL_EXPECTED_EXCEPTIONS_ENABLED
#endif

#if (defined(_MSC_VER) && _MSC_VER == 1900)
#define TL_EXPECTED_MSVC2015
#define TL_EXPECTED_MSVC2015_CONSTEXPR
#else
#define TL_EXPECTED_MSVC2015_CONSTEXPR constexpr
#endif

#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
     !defined(__clang__))
#define TL_EXPECTED_GCC49
#endif

#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \
     !defined(__clang__))
#define TL_EXPECTED_GCC54
#endif

#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \
     !defined(__clang__))
#define TL_EXPECTED_GCC55
#endif

#if !defined(TL_ASSERT)
// can't have assert in constexpr in C++11 and GCC 4.9 has a compiler bug
#if (TL_CPLUSPLUS > 201103L) && !defined(TL_EXPECTED_GCC49)
#include <cassert>
#define TL_ASSERT(x) assert(x)
#else
#define TL_ASSERT(x)
#endif
#endif

#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
     !defined(__clang__))
// GCC < 5 doesn't support overloading on const&& for member functions

#define TL_EXPECTED_NO_CONSTRR
// GCC < 5 doesn't support some standard C++11 type traits
#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
  std::has_trivial_copy_constructor<T>
#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
  std::has_trivial_copy_assign<T>

// This one will be different for GCC 5.7 if it's ever supported
#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \
  std::is_trivially_destructible<T>

// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks
// std::vector for non-copyable types
#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__))
#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
namespace tl {
namespace detail {
template <class T>
struct is_trivially_copy_constructible
    : std::is_trivially_copy_constructible<T> {};
#ifdef _GLIBCXX_VECTOR
template <class T, class A>
struct is_trivially_copy_constructible<std::vector<T, A>> : std::false_type {};
#endif
}  // namespace detail
}  // namespace tl
#endif

#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
  tl::detail::is_trivially_copy_constructible<T>
#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
  std::is_trivially_copy_assignable<T>
#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \
  std::is_trivially_destructible<T>
#else
#define TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
  std::is_trivially_copy_constructible<T>
#define TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
  std::is_trivially_copy_assignable<T>
#define TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T) \
  std::is_trivially_destructible<T>
#endif

#ifdef _MSVC_LANG
#define TL_CPLUSPLUS _MSVC_LANG
#else
#define TL_CPLUSPLUS __cplusplus
#endif

#if TL_CPLUSPLUS > 201103L
#define TL_EXPECTED_CXX14
#endif

#ifdef TL_EXPECTED_GCC49
#define TL_EXPECTED_GCC49_CONSTEXPR
#else
#define TL_EXPECTED_GCC49_CONSTEXPR constexpr
#endif

#if (TL_CPLUSPLUS == 201103L || defined(TL_EXPECTED_MSVC2015) || \
     defined(TL_EXPECTED_GCC49))
#define TL_EXPECTED_11_CONSTEXPR
#else
#define TL_EXPECTED_11_CONSTEXPR constexpr
#endif

#if TL_CPLUSPLUS >= 201703L
#define TL_EXPECTED_NODISCARD [[nodiscard]]
#else
#define TL_EXPECTED_NODISCARD
#endif

namespace tl {
template <class T, class E>
class TL_EXPECTED_NODISCARD expected;

#ifndef TL_MONOSTATE_INPLACE_MUTEX
#define TL_MONOSTATE_INPLACE_MUTEX
class monostate {};

struct in_place_t {
  explicit in_place_t() = default;
};
static constexpr in_place_t in_place{};
#endif

template <class E>
class unexpected {
 public:
  static_assert(!std::is_same<E, void>::value, "E must not be void");

  unexpected() = delete;
  constexpr explicit unexpected(const E &e) : m_val(e) {}

  constexpr explicit unexpected(E &&e) : m_val(std::move(e)) {}

  template <class... Args, typename std::enable_if<std::is_constructible<
                               E, Args &&...>::value>::type * = nullptr>
  constexpr explicit unexpected(Args &&...args)
      : m_val(std::forward<Args>(args)...) {}
  template <
      class U, class... Args,
      typename std::enable_if<std::is_constructible<
          E, std::initializer_list<U> &, Args &&...>::value>::type * = nullptr>
  constexpr explicit unexpected(std::initializer_list<U> l, Args &&...args)
      : m_val(l, std::forward<Args>(args)...) {}

  constexpr const E &value() const & {
    return m_val;
  }
  TL_EXPECTED_11_CONSTEXPR E &value() & {
    return m_val;
  }
  TL_EXPECTED_11_CONSTEXPR E &&value() && {
    return std::move(m_val);
  }
  constexpr const E &&value() const && {
    return std::move(m_val);
  }

 private:
  E m_val;
};

#ifdef __cpp_deduction_guides
template <class E>
unexpected(E) -> unexpected<E>;
#endif

template <class E>
constexpr bool operator==(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() == rhs.value();
}
template <class E>
constexpr bool operator!=(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() != rhs.value();
}
template <class E>
constexpr bool operator<(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() < rhs.value();
}
template <class E>
constexpr bool operator<=(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() <= rhs.value();
}
template <class E>
constexpr bool operator>(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() > rhs.value();
}
template <class E>
constexpr bool operator>=(const unexpected<E> &lhs, const unexpected<E> &rhs) {
  return lhs.value() >= rhs.value();
}

template <class E>
unexpected<typename std::decay<E>::type> make_unexpected(E &&e) {
  return unexpected<typename std::decay<E>::type>(std::forward<E>(e));
}

struct unexpect_t {
  unexpect_t() = default;
};
static constexpr unexpect_t unexpect{};

namespace detail {
template <typename E>
[[noreturn]] TL_EXPECTED_11_CONSTEXPR void throw_exception(E &&e) {
#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
  throw std::forward<E>(e);
#else
  (void)e;
#ifdef _MSC_VER
  __assume(0);
#else
  __builtin_unreachable();
#endif
#endif
}

#ifndef TL_TRAITS_MUTEX
#define TL_TRAITS_MUTEX
// C++14-style aliases for brevity
template <class T>
using remove_const_t = typename std::remove_const<T>::type;
template <class T>
using remove_reference_t = typename std::remove_reference<T>::type;
template <class T>
using decay_t = typename std::decay<T>::type;
template <bool E, class T = void>
using enable_if_t = typename std::enable_if<E, T>::type;
template <bool B, class T, class F>
using conditional_t = typename std::conditional<B, T, F>::type;

// std::conjunction from C++17
template <class...>
struct conjunction : std::true_type {};
template <class B>
struct conjunction<B> : B {};
template <class B, class... Bs>
struct conjunction<B, Bs...>
    : std::conditional<bool(B::value), conjunction<Bs...>, B>::type {};

#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L
#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
#endif

// In C++11 mode, there's an issue in libc++'s std::mem_fn
// which results in a hard-error when using it in a noexcept expression
// in some cases. This is a check to workaround the common failing case.
#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
template <class T>
struct is_pointer_to_non_const_member_func : std::false_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...)>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &&>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &&>
    : std::true_type {};

template <class T>
struct is_const_or_const_ref : std::false_type {};
template <class T>
struct is_const_or_const_ref<T const &> : std::true_type {};
template <class T>
struct is_const_or_const_ref<T const> : std::true_type {};
#endif

// std::invoke from C++17
// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround
template <
    typename Fn, typename... Args,
#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
    typename = enable_if_t<!(is_pointer_to_non_const_member_func<Fn>::value &&
                             is_const_or_const_ref<Args...>::value)>,
#endif
    typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>, int = 0>
constexpr auto invoke(Fn &&f, Args &&...args) noexcept(
    noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))
    -> decltype(std::mem_fn(f)(std::forward<Args>(args)...)) {
  return std::mem_fn(f)(std::forward<Args>(args)...);
}

template <typename Fn, typename... Args,
          typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>
constexpr auto invoke(Fn &&f, Args &&...args) noexcept(
    noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))
    -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...)) {
  return std::forward<Fn>(f)(std::forward<Args>(args)...);
}

// std::invoke_result from C++17
template <class F, class, class... Us>
struct invoke_result_impl;

template <class F, class... Us>
struct invoke_result_impl<
    F,
    decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...), void()),
    Us...> {
  using type =
      decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...));
};

template <class F, class... Us>
using invoke_result = invoke_result_impl<F, void, Us...>;

template <class F, class... Us>
using invoke_result_t = typename invoke_result<F, Us...>::type;

#if defined(_MSC_VER) && _MSC_VER <= 1900
// TODO make a version which works with MSVC 2015
template <class T, class U = T>
struct is_swappable : std::true_type {};

template <class T, class U = T>
struct is_nothrow_swappable : std::true_type {};
#else
// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept
namespace swap_adl_tests {
// if swap ADL finds this then it would call std::swap otherwise (same
// signature)
struct tag {};

template <class T>
tag swap(T &, T &);
template <class T, std::size_t N>
tag swap(T (&a)[N], T (&b)[N]);

// helper functions to test if an unqualified swap is possible, and if it
// becomes std::swap
template <class, class>
std::false_type can_swap(...) noexcept(false);
template <class T, class U,
          class = decltype(swap(std::declval<T &>(), std::declval<U &>()))>
std::true_type can_swap(int) noexcept(noexcept(swap(std::declval<T &>(),
                                                    std::declval<U &>())));

template <class, class>
std::false_type uses_std(...);
template <class T, class U>
std::is_same<decltype(swap(std::declval<T &>(), std::declval<U &>())), tag>
uses_std(int);

template <class T>
struct is_std_swap_noexcept
    : std::integral_constant<bool,
                             std::is_nothrow_move_constructible<T>::value &&
                                 std::is_nothrow_move_assignable<T>::value> {};

template <class T, std::size_t N>
struct is_std_swap_noexcept<T[N]> : is_std_swap_noexcept<T> {};

template <class T, class U>
struct is_adl_swap_noexcept
    : std::integral_constant<bool, noexcept(can_swap<T, U>(0))> {};
}  // namespace swap_adl_tests

template <class T, class U = T>
struct is_swappable
    : std::integral_constant<
          bool,
          decltype(detail::swap_adl_tests::can_swap<T, U>(0))::value &&
              (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value ||
               (std::is_move_assignable<T>::value &&
                std::is_move_constructible<T>::value))> {};

template <class T, std::size_t N>
struct is_swappable<T[N], T[N]>
    : std::integral_constant<
          bool,
          decltype(detail::swap_adl_tests::can_swap<T[N], T[N]>(0))::value &&
              (!decltype(detail::swap_adl_tests::uses_std<T[N], T[N]>(
                   0))::value ||
               is_swappable<T, T>::value)> {};

template <class T, class U = T>
struct is_nothrow_swappable
    : std::integral_constant<
          bool,
          is_swappable<T, U>::value &&
              ((decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&
                detail::swap_adl_tests::is_std_swap_noexcept<T>::value) ||
               (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&
                detail::swap_adl_tests::is_adl_swap_noexcept<T, U>::value))> {};
#endif
#endif

// Trait for checking if a type is a tl::expected
template <class T>
struct is_expected_impl : std::false_type {};
template <class T, class E>
struct is_expected_impl<expected<T, E>> : std::true_type {};
template <class T>
using is_expected = is_expected_impl<decay_t<T>>;

template <class T, class E, class U>
using expected_enable_forward_value = detail::enable_if_t<
    std::is_constructible<T, U &&>::value &&
    !std::is_same<detail::decay_t<U>, in_place_t>::value &&
    !std::is_same<expected<T, E>, detail::decay_t<U>>::value &&
    !std::is_same<unexpected<E>, detail::decay_t<U>>::value>;

template <class T, class E, class U, class G, class UR, class GR>
using expected_enable_from_other = detail::enable_if_t<
    std::is_constructible<T, UR>::value &&
    std::is_constructible<E, GR>::value &&
    !std::is_constructible<T, expected<U, G> &>::value &&
    !std::is_constructible<T, expected<U, G> &&>::value &&
    !std::is_constructible<T, const expected<U, G> &>::value &&
    !std::is_constructible<T, const expected<U, G> &&>::value &&
    !std::is_convertible<expected<U, G> &, T>::value &&
    !std::is_convertible<expected<U, G> &&, T>::value &&
    !std::is_convertible<const expected<U, G> &, T>::value &&
    !std::is_convertible<const expected<U, G> &&, T>::value>;

template <class T, class U>
using is_void_or = conditional_t<std::is_void<T>::value, std::true_type, U>;

template <class T>
using is_copy_constructible_or_void =
    is_void_or<T, std::is_copy_constructible<T>>;

template <class T>
using is_move_constructible_or_void =
    is_void_or<T, std::is_move_constructible<T>>;

template <class T>
using is_copy_assignable_or_void = is_void_or<T, std::is_copy_assignable<T>>;

template <class T>
using is_move_assignable_or_void = is_void_or<T, std::is_move_assignable<T>>;

}  // namespace detail

namespace detail {
struct no_init_t {};
static constexpr no_init_t no_init{};

// Implements the storage of the values, and ensures that the destructor is
// trivial if it can be.
//
// This specialization is for where neither `T` or `E` is trivially
// destructible, so the destructors must be called on destruction of the
// `expected`
template <class T, class E, bool = std::is_trivially_destructible<T>::value,
          bool = std::is_trivially_destructible<E>::value>
struct expected_storage_base {
  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}
  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =
                nullptr>
  constexpr expected_storage_base(in_place_t, Args &&...args)
      : m_val(std::forward<Args>(args)...), m_has_val(true) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,
                                  Args &&...args)
      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}
  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  ~expected_storage_base() {
    if (m_has_val) {
      m_val.~T();
    } else {
      m_unexpect.~unexpected<E>();
    }
  }
  union {
    T m_val;
    unexpected<E> m_unexpect;
    char m_no_init;
  };
  bool m_has_val;
};

// This specialization is for when both `T` and `E` are trivially-destructible,
// so the destructor of the `expected` can be trivial.
template <class T, class E>
struct expected_storage_base<T, E, true, true> {
  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}
  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =
                nullptr>
  constexpr expected_storage_base(in_place_t, Args &&...args)
      : m_val(std::forward<Args>(args)...), m_has_val(true) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,
                                  Args &&...args)
      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}
  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  expected_storage_base(const expected_storage_base &) = default;
  expected_storage_base(expected_storage_base &&) = default;
  expected_storage_base &operator=(const expected_storage_base &) = default;
  expected_storage_base &operator=(expected_storage_base &&) = default;
  ~expected_storage_base() = default;
  union {
    T m_val;
    unexpected<E> m_unexpect;
    char m_no_init;
  };
  bool m_has_val;
};

// T is trivial, E is not.
template <class T, class E>
struct expected_storage_base<T, E, true, false> {
  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}
  TL_EXPECTED_MSVC2015_CONSTEXPR expected_storage_base(no_init_t)
      : m_no_init(), m_has_val(false) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =
                nullptr>
  constexpr expected_storage_base(in_place_t, Args &&...args)
      : m_val(std::forward<Args>(args)...), m_has_val(true) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,
                                  Args &&...args)
      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}
  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  expected_storage_base(const expected_storage_base &) = default;
  expected_storage_base(expected_storage_base &&) = default;
  expected_storage_base &operator=(const expected_storage_base &) = default;
  expected_storage_base &operator=(expected_storage_base &&) = default;
  ~expected_storage_base() {
    if (!m_has_val) {
      m_unexpect.~unexpected<E>();
    }
  }

  union {
    T m_val;
    unexpected<E> m_unexpect;
    char m_no_init;
  };
  bool m_has_val;
};

// E is trivial, T is not.
template <class T, class E>
struct expected_storage_base<T, E, false, true> {
  constexpr expected_storage_base() : m_val(T{}), m_has_val(true) {}
  constexpr expected_storage_base(no_init_t) : m_no_init(), m_has_val(false) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =
                nullptr>
  constexpr expected_storage_base(in_place_t, Args &&...args)
      : m_val(std::forward<Args>(args)...), m_has_val(true) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr expected_storage_base(in_place_t, std::initializer_list<U> il,
                                  Args &&...args)
      : m_val(il, std::forward<Args>(args)...), m_has_val(true) {}
  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  expected_storage_base(const expected_storage_base &) = default;
  expected_storage_base(expected_storage_base &&) = default;
  expected_storage_base &operator=(const expected_storage_base &) = default;
  expected_storage_base &operator=(expected_storage_base &&) = default;
  ~expected_storage_base() {
    if (m_has_val) {
      m_val.~T();
    }
  }
  union {
    T m_val;
    unexpected<E> m_unexpect;
    char m_no_init;
  };
  bool m_has_val;
};

// `T` is `void`, `E` is trivially-destructible
template <class E>
struct expected_storage_base<void, E, false, true> {
#if __GNUC__ <= 5
// no constexpr for GCC 4/5 bug
#else
  TL_EXPECTED_MSVC2015_CONSTEXPR
#endif
  expected_storage_base() : m_has_val(true) {}

  constexpr expected_storage_base(no_init_t) : m_val(), m_has_val(false) {}

  constexpr expected_storage_base(in_place_t) : m_has_val(true) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  expected_storage_base(const expected_storage_base &) = default;
  expected_storage_base(expected_storage_base &&) = default;
  expected_storage_base &operator=(const expected_storage_base &) = default;
  expected_storage_base &operator=(expected_storage_base &&) = default;
  ~expected_storage_base() = default;
  struct dummy {};
  union {
    unexpected<E> m_unexpect;
    dummy m_val;
  };
  bool m_has_val;
};

// `T` is `void`, `E` is not trivially-destructible
template <class E>
struct expected_storage_base<void, E, false, false> {
  constexpr expected_storage_base() : m_dummy(), m_has_val(true) {}
  constexpr expected_storage_base(no_init_t) : m_dummy(), m_has_val(false) {}

  constexpr expected_storage_base(in_place_t) : m_dummy(), m_has_val(true) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected_storage_base(unexpect_t, Args &&...args)
      : m_unexpect(std::forward<Args>(args)...), m_has_val(false) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected_storage_base(unexpect_t,
                                           std::initializer_list<U> il,
                                           Args &&...args)
      : m_unexpect(il, std::forward<Args>(args)...), m_has_val(false) {}

  expected_storage_base(const expected_storage_base &) = default;
  expected_storage_base(expected_storage_base &&) = default;
  expected_storage_base &operator=(const expected_storage_base &) = default;
  expected_storage_base &operator=(expected_storage_base &&) = default;
  ~expected_storage_base() {
    if (!m_has_val) {
      m_unexpect.~unexpected<E>();
    }
  }

  union {
    unexpected<E> m_unexpect;
    char m_dummy;
  };
  bool m_has_val;
};

// This base class provides some handy member functions which can be used in
// further derived classes
template <class T, class E>
struct expected_operations_base : expected_storage_base<T, E> {
  using expected_storage_base<T, E>::expected_storage_base;

  template <class... Args>
  void construct(Args &&...args) noexcept {
    new (std::addressof(this->m_val)) T(std::forward<Args>(args)...);
    this->m_has_val = true;
  }

  template <class Rhs>
  void construct_with(Rhs &&rhs) noexcept {
    new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());
    this->m_has_val = true;
  }

  template <class... Args>
  void construct_error(Args &&...args) noexcept {
    new (std::addressof(this->m_unexpect))
        unexpected<E>(std::forward<Args>(args)...);
    this->m_has_val = false;
  }

#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED

  // These assign overloads ensure that the most efficient assignment
  // implementation is used while maintaining the strong exception guarantee.
  // The problematic case is where rhs has a value, but *this does not.
  //
  // This overload handles the case where we can just copy-construct `T`
  // directly into place without throwing.
  template <class U = T,
            detail::enable_if_t<std::is_nothrow_copy_constructible<U>::value>
                * = nullptr>
  void assign(const expected_operations_base &rhs) noexcept {
    if (!this->m_has_val && rhs.m_has_val) {
      geterr().~unexpected<E>();
      construct(rhs.get());
    } else {
      assign_common(rhs);
    }
  }

  // This overload handles the case where we can attempt to create a copy of
  // `T`, then no-throw move it into place if the copy was successful.
  template <class U = T,
            detail::enable_if_t<!std::is_nothrow_copy_constructible<U>::value &&
                                std::is_nothrow_move_constructible<U>::value>
                * = nullptr>
  void assign(const expected_operations_base &rhs) noexcept {
    if (!this->m_has_val && rhs.m_has_val) {
      T tmp = rhs.get();
      geterr().~unexpected<E>();
      construct(std::move(tmp));
    } else {
      assign_common(rhs);
    }
  }

  // This overload is the worst-case, where we have to move-construct the
  // unexpected value into temporary storage, then try to copy the T into place.
  // If the construction succeeds, then everything is fine, but if it throws,
  // then we move the old unexpected value back into place before rethrowing the
  // exception.
  template <class U = T,
            detail::enable_if_t<!std::is_nothrow_copy_constructible<U>::value &&
                                !std::is_nothrow_move_constructible<U>::value>
                * = nullptr>
  void assign(const expected_operations_base &rhs) {
    if (!this->m_has_val && rhs.m_has_val) {
      auto tmp = std::move(geterr());
      geterr().~unexpected<E>();

#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
      try {
        construct(rhs.get());
      } catch (...) {
        geterr() = std::move(tmp);
        throw;
      }
#else
      construct(rhs.get());
#endif
    } else {
      assign_common(rhs);
    }
  }

  // These overloads do the same as above, but for rvalues
  template <class U = T,
            detail::enable_if_t<std::is_nothrow_move_constructible<U>::value>
                * = nullptr>
  void assign(expected_operations_base &&rhs) noexcept {
    if (!this->m_has_val && rhs.m_has_val) {
      geterr().~unexpected<E>();
      construct(std::move(rhs).get());
    } else {
      assign_common(std::move(rhs));
    }
  }

  template <class U = T,
            detail::enable_if_t<!std::is_nothrow_move_constructible<U>::value>
                * = nullptr>
  void assign(expected_operations_base &&rhs) {
    if (!this->m_has_val && rhs.m_has_val) {
      auto tmp = std::move(geterr());
      geterr().~unexpected<E>();
#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
      try {
        construct(std::move(rhs).get());
      } catch (...) {
        geterr() = std::move(tmp);
        throw;
      }
#else
      construct(std::move(rhs).get());
#endif
    } else {
      assign_common(std::move(rhs));
    }
  }

#else

  // If exceptions are disabled then we can just copy-construct
  void assign(const expected_operations_base &rhs) noexcept {
    if (!this->m_has_val && rhs.m_has_val) {
      geterr().~unexpected<E>();
      construct(rhs.get());
    } else {
      assign_common(rhs);
    }
  }

  void assign(expected_operations_base &&rhs) noexcept {
    if (!this->m_has_val && rhs.m_has_val) {
      geterr().~unexpected<E>();
      construct(std::move(rhs).get());
    } else {
      assign_common(std::move(rhs));
    }
  }

#endif

  // The common part of move/copy assigning
  template <class Rhs>
  void assign_common(Rhs &&rhs) {
    if (this->m_has_val) {
      if (rhs.m_has_val) {
        get() = std::forward<Rhs>(rhs).get();
      } else {
        destroy_val();
        construct_error(std::forward<Rhs>(rhs).geterr());
      }
    } else {
      if (!rhs.m_has_val) {
        geterr() = std::forward<Rhs>(rhs).geterr();
      }
    }
  }

  bool has_value() const {
    return this->m_has_val;
  }

  TL_EXPECTED_11_CONSTEXPR T &get() & {
    return this->m_val;
  }
  constexpr const T &get() const & {
    return this->m_val;
  }
  TL_EXPECTED_11_CONSTEXPR T &&get() && {
    return std::move(this->m_val);
  }
#ifndef TL_EXPECTED_NO_CONSTRR
  constexpr const T &&get() const && {
    return std::move(this->m_val);
  }
#endif

  TL_EXPECTED_11_CONSTEXPR unexpected<E> &geterr() & {
    return this->m_unexpect;
  }
  constexpr const unexpected<E> &geterr() const & {
    return this->m_unexpect;
  }
  TL_EXPECTED_11_CONSTEXPR unexpected<E> &&geterr() && {
    return std::move(this->m_unexpect);
  }
#ifndef TL_EXPECTED_NO_CONSTRR
  constexpr const unexpected<E> &&geterr() const && {
    return std::move(this->m_unexpect);
  }
#endif

  TL_EXPECTED_11_CONSTEXPR void destroy_val() {
    get().~T();
  }
};

// This base class provides some handy member functions which can be used in
// further derived classes
template <class E>
struct expected_operations_base<void, E> : expected_storage_base<void, E> {
  using expected_storage_base<void, E>::expected_storage_base;

  template <class... Args>
  void construct() noexcept {
    this->m_has_val = true;
  }

  // This function doesn't use its argument, but needs it so that code in
  // levels above this can work independently of whether T is void
  template <class Rhs>
  void construct_with(Rhs &&) noexcept {
    this->m_has_val = true;
  }

  template <class... Args>
  void construct_error(Args &&...args) noexcept {
    new (std::addressof(this->m_unexpect))
        unexpected<E>(std::forward<Args>(args)...);
    this->m_has_val = false;
  }

  template <class Rhs>
  void assign(Rhs &&rhs) noexcept {
    if (!this->m_has_val) {
      if (rhs.m_has_val) {
        geterr().~unexpected<E>();
        construct();
      } else {
        geterr() = std::forward<Rhs>(rhs).geterr();
      }
    } else {
      if (!rhs.m_has_val) {
        construct_error(std::forward<Rhs>(rhs).geterr());
      }
    }
  }

  bool has_value() const {
    return this->m_has_val;
  }

  TL_EXPECTED_11_CONSTEXPR unexpected<E> &geterr() & {
    return this->m_unexpect;
  }
  constexpr const unexpected<E> &geterr() const & {
    return this->m_unexpect;
  }
  TL_EXPECTED_11_CONSTEXPR unexpected<E> &&geterr() && {
    return std::move(this->m_unexpect);
  }
#ifndef TL_EXPECTED_NO_CONSTRR
  constexpr const unexpected<E> &&geterr() const && {
    return std::move(this->m_unexpect);
  }
#endif

  TL_EXPECTED_11_CONSTEXPR void destroy_val() {
    // no-op
  }
};

// This class manages conditionally having a trivial copy constructor
// This specialization is for when T and E are trivially copy constructible
template <class T, class E,
          bool = is_void_or<T, TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(
                                   T)>::value &&
                 TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value,
          bool = (is_copy_constructible_or_void<T>::value &&
                  std::is_copy_constructible<E>::value)>
struct expected_copy_base : expected_operations_base<T, E> {
  using expected_operations_base<T, E>::expected_operations_base;
};

// This specialization is for when T or E are non-trivially copy constructible
template <class T, class E>
struct expected_copy_base<T, E, false, true> : expected_operations_base<T, E> {
  using expected_operations_base<T, E>::expected_operations_base;

  expected_copy_base() = default;
  expected_copy_base(const expected_copy_base &rhs)
      : expected_operations_base<T, E>(no_init) {
    if (rhs.has_value()) {
      this->construct_with(rhs);
    } else {
      this->construct_error(rhs.geterr());
    }
  }

  expected_copy_base(expected_copy_base &&rhs) = default;
  expected_copy_base &operator=(const expected_copy_base &rhs) = default;
  expected_copy_base &operator=(expected_copy_base &&rhs) = default;
};

// This class manages conditionally having a trivial move constructor
// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
// doesn't implement an analogue to std::is_trivially_move_constructible. We
// have to make do with a non-trivial move constructor even if T is trivially
// move constructible
#ifndef TL_EXPECTED_GCC49
template <class T, class E,
          bool =
              is_void_or<T, std::is_trivially_move_constructible<T>>::value &&
              std::is_trivially_move_constructible<E>::value>
struct expected_move_base : expected_copy_base<T, E> {
  using expected_copy_base<T, E>::expected_copy_base;
};
#else
template <class T, class E, bool = false>
struct expected_move_base;
#endif
template <class T, class E>
struct expected_move_base<T, E, false> : expected_copy_base<T, E> {
  using expected_copy_base<T, E>::expected_copy_base;

  expected_move_base() = default;
  expected_move_base(const expected_move_base &rhs) = default;

  expected_move_base(expected_move_base &&rhs) noexcept(
      std::is_nothrow_move_constructible<T>::value)
      : expected_copy_base<T, E>(no_init) {
    if (rhs.has_value()) {
      this->construct_with(std::move(rhs));
    } else {
      this->construct_error(std::move(rhs.geterr()));
    }
  }
  expected_move_base &operator=(const expected_move_base &rhs) = default;
  expected_move_base &operator=(expected_move_base &&rhs) = default;
};

// This class manages conditionally having a trivial copy assignment operator
template <
    class T, class E,
    bool =
        is_void_or<
            T, conjunction<TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(T),
                           TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T),
                           TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(T)>>::value &&
        TL_EXPECTED_IS_TRIVIALLY_COPY_ASSIGNABLE(E)::value &&
        TL_EXPECTED_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(E)::value &&
        TL_EXPECTED_IS_TRIVIALLY_DESTRUCTIBLE(E)::value,
    bool = (is_copy_constructible_or_void<T>::value &&
            std::is_copy_constructible<E>::value &&
            is_copy_assignable_or_void<T>::value &&
            std::is_copy_assignable<E>::value)>
struct expected_copy_assign_base : expected_move_base<T, E> {
  using expected_move_base<T, E>::expected_move_base;
};

template <class T, class E>
struct expected_copy_assign_base<T, E, false, true> : expected_move_base<T, E> {
  using expected_move_base<T, E>::expected_move_base;

  expected_copy_assign_base() = default;
  expected_copy_assign_base(const expected_copy_assign_base &rhs) = default;

  expected_copy_assign_base(expected_copy_assign_base &&rhs) = default;
  expected_copy_assign_base &operator=(const expected_copy_assign_base &rhs) {
    this->assign(rhs);
    return *this;
  }
  expected_copy_assign_base &operator=(expected_copy_assign_base &&rhs) =
      default;
};

// This class manages conditionally having a trivial move assignment operator
// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
// doesn't implement an analogue to std::is_trivially_move_assignable. We have
// to make do with a non-trivial move assignment operator even if T is trivially
// move assignable
#ifndef TL_EXPECTED_GCC49
template <
    class T, class E,
    bool = is_void_or<
               T, conjunction<std::is_trivially_destructible<T>,
                              std::is_trivially_move_constructible<T>,
                              std::is_trivially_move_assignable<T>>>::value &&
           std::is_trivially_destructible<E>::value &&
           std::is_trivially_move_constructible<E>::value &&
           std::is_trivially_move_assignable<E>::value>
struct expected_move_assign_base : expected_copy_assign_base<T, E> {
  using expected_copy_assign_base<T, E>::expected_copy_assign_base;
};
#else
template <class T, class E, bool = false>
struct expected_move_assign_base;
#endif

template <class T, class E>
struct expected_move_assign_base<T, E, false>
    : expected_copy_assign_base<T, E> {
  using expected_copy_assign_base<T, E>::expected_copy_assign_base;

  expected_move_assign_base() = default;
  expected_move_assign_base(const expected_move_assign_base &rhs) = default;

  expected_move_assign_base(expected_move_assign_base &&rhs) = default;

  expected_move_assign_base &operator=(const expected_move_assign_base &rhs) =
      default;

  expected_move_assign_base &operator=(
      expected_move_assign_base
          &&rhs) noexcept(std::is_nothrow_move_constructible<T>::value &&
                          std::is_nothrow_move_assignable<T>::value) {
    this->assign(std::move(rhs));
    return *this;
  }
};

// expected_delete_ctor_base will conditionally delete copy and move
// constructors depending on whether T is copy/move constructible
template <class T, class E,
          bool EnableCopy = (is_copy_constructible_or_void<T>::value &&
                             std::is_copy_constructible<E>::value),
          bool EnableMove = (is_move_constructible_or_void<T>::value &&
                             std::is_move_constructible<E>::value)>
struct expected_delete_ctor_base {
  expected_delete_ctor_base() = default;
  expected_delete_ctor_base(const expected_delete_ctor_base &) = default;
  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default;
  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =
      default;
  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =
      default;
};

template <class T, class E>
struct expected_delete_ctor_base<T, E, true, false> {
  expected_delete_ctor_base() = default;
  expected_delete_ctor_base(const expected_delete_ctor_base &) = default;
  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete;
  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =
      default;
  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =
      default;
};

template <class T, class E>
struct expected_delete_ctor_base<T, E, false, true> {
  expected_delete_ctor_base() = default;
  expected_delete_ctor_base(const expected_delete_ctor_base &) = delete;
  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = default;
  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =
      default;
  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =
      default;
};

template <class T, class E>
struct expected_delete_ctor_base<T, E, false, false> {
  expected_delete_ctor_base() = default;
  expected_delete_ctor_base(const expected_delete_ctor_base &) = delete;
  expected_delete_ctor_base(expected_delete_ctor_base &&) noexcept = delete;
  expected_delete_ctor_base &operator=(const expected_delete_ctor_base &) =
      default;
  expected_delete_ctor_base &operator=(expected_delete_ctor_base &&) noexcept =
      default;
};

// expected_delete_assign_base will conditionally delete copy and move
// constructors depending on whether T and E are copy/move constructible +
// assignable
template <class T, class E,
          bool EnableCopy = (is_copy_constructible_or_void<T>::value &&
                             std::is_copy_constructible<E>::value &&
                             is_copy_assignable_or_void<T>::value &&
                             std::is_copy_assignable<E>::value),
          bool EnableMove = (is_move_constructible_or_void<T>::value &&
                             std::is_move_constructible<E>::value &&
                             is_move_assignable_or_void<T>::value &&
                             std::is_move_assignable<E>::value)>
struct expected_delete_assign_base {
  expected_delete_assign_base() = default;
  expected_delete_assign_base(const expected_delete_assign_base &) = default;
  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =
      default;
  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =
      default;
  expected_delete_assign_base &operator=(
      expected_delete_assign_base &&) noexcept = default;
};

template <class T, class E>
struct expected_delete_assign_base<T, E, true, false> {
  expected_delete_assign_base() = default;
  expected_delete_assign_base(const expected_delete_assign_base &) = default;
  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =
      default;
  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =
      default;
  expected_delete_assign_base &operator=(
      expected_delete_assign_base &&) noexcept = delete;
};

template <class T, class E>
struct expected_delete_assign_base<T, E, false, true> {
  expected_delete_assign_base() = default;
  expected_delete_assign_base(const expected_delete_assign_base &) = default;
  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =
      default;
  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =
      delete;
  expected_delete_assign_base &operator=(
      expected_delete_assign_base &&) noexcept = default;
};

template <class T, class E>
struct expected_delete_assign_base<T, E, false, false> {
  expected_delete_assign_base() = default;
  expected_delete_assign_base(const expected_delete_assign_base &) = default;
  expected_delete_assign_base(expected_delete_assign_base &&) noexcept =
      default;
  expected_delete_assign_base &operator=(const expected_delete_assign_base &) =
      delete;
  expected_delete_assign_base &operator=(
      expected_delete_assign_base &&) noexcept = delete;
};

// This is needed to be able to construct the expected_default_ctor_base which
// follows, while still conditionally deleting the default constructor.
struct default_constructor_tag {
  explicit constexpr default_constructor_tag() = default;
};

// expected_default_ctor_base will ensure that expected has a deleted default
// constructor if T is not default constructible.
// This specialization is for when T is default constructible
template <class T, class E,
          bool Enable =
              std::is_default_constructible<T>::value || std::is_void<T>::value>
struct expected_default_ctor_base {
  constexpr expected_default_ctor_base() noexcept = default;
  constexpr expected_default_ctor_base(
      expected_default_ctor_base const &) noexcept = default;
  constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept =
      default;
  expected_default_ctor_base &operator=(
      expected_default_ctor_base const &) noexcept = default;
  expected_default_ctor_base &operator=(
      expected_default_ctor_base &&) noexcept = default;

  constexpr explicit expected_default_ctor_base(default_constructor_tag) {}
};

// This specialization is for when T is not default constructible
template <class T, class E>
struct expected_default_ctor_base<T, E, false> {
  constexpr expected_default_ctor_base() noexcept = delete;
  constexpr expected_default_ctor_base(
      expected_default_ctor_base const &) noexcept = default;
  constexpr expected_default_ctor_base(expected_default_ctor_base &&) noexcept =
      default;
  expected_default_ctor_base &operator=(
      expected_default_ctor_base const &) noexcept = default;
  expected_default_ctor_base &operator=(
      expected_default_ctor_base &&) noexcept = default;

  constexpr explicit expected_default_ctor_base(default_constructor_tag) {}
};
}  // namespace detail

template <class E>
class bad_expected_access : public std::exception {
 public:
  explicit bad_expected_access(E e) : m_val(std::move(e)) {}

  virtual const char *what() const noexcept override {
    return "Bad expected access";
  }

  const E &error() const & {
    return m_val;
  }
  E &error() & {
    return m_val;
  }
  const E &&error() const && {
    return std::move(m_val);
  }
  E &&error() && {
    return std::move(m_val);
  }

 private:
  E m_val;
};

/// An `expected<T, E>` object is an object that contains the storage for
/// another object and manages the lifetime of this contained object `T`.
/// Alternatively it could contain the storage for another unexpected object
/// `E`. The contained object may not be initialized after the expected object
/// has been initialized, and may not be destroyed before the expected object
/// has been destroyed. The initialization state of the contained object is
/// tracked by the expected object.
template <class T, class E>
class TL_EXPECTED_NODISCARD expected
    : private detail::expected_move_assign_base<T, E>,
      private detail::expected_delete_ctor_base<T, E>,
      private detail::expected_delete_assign_base<T, E>,
      private detail::expected_default_ctor_base<T, E> {
  static_assert(!std::is_reference<T>::value, "T must not be a reference");
  static_assert(!std::is_same<T, std::remove_cv<in_place_t>::type>::value,
                "T must not be in_place_t");
  static_assert(!std::is_same<T, std::remove_cv<unexpect_t>::type>::value,
                "T must not be unexpect_t");
  static_assert(
      !std::is_same<T, typename std::remove_cv<unexpected<E>>::type>::value,
      "T must not be unexpected<E>");
  static_assert(!std::is_reference<E>::value, "E must not be a reference");

  T *valptr() {
    return std::addressof(this->m_val);
  }
  const T *valptr() const {
    return std::addressof(this->m_val);
  }
  unexpected<E> *errptr() {
    return std::addressof(this->m_unexpect);
  }
  const unexpected<E> *errptr() const {
    return std::addressof(this->m_unexpect);
  }

  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR U &val() {
    return this->m_val;
  }
  TL_EXPECTED_11_CONSTEXPR unexpected<E> &err() {
    return this->m_unexpect;
  }

  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  constexpr const U &val() const {
    return this->m_val;
  }
  constexpr const unexpected<E> &err() const {
    return this->m_unexpect;
  }

  using impl_base = detail::expected_move_assign_base<T, E>;
  using ctor_base = detail::expected_default_ctor_base<T, E>;

 public:
  typedef T value_type;
  typedef E error_type;
  typedef unexpected<E> unexpected_type;

#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) & {
    return and_then_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto and_then(F &&f) && {
    return and_then_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto and_then(F &&f) const & {
    return and_then_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr auto and_then(F &&f) const && {
    return and_then_impl(std::move(*this), std::forward<F>(f));
  }
#endif

#else
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto and_then(
      F &&f) & -> decltype(and_then_impl(std::declval<expected &>(),
                                         std::forward<F>(f))) {
    return and_then_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto and_then(
      F &&f) && -> decltype(and_then_impl(std::declval<expected &&>(),
                                          std::forward<F>(f))) {
    return and_then_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto and_then(
      F &&f) const & -> decltype(and_then_impl(std::declval<expected const &>(),
                                               std::forward<F>(f))) {
    return and_then_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr auto and_then(F &&f)
      const && -> decltype(and_then_impl(std::declval<expected const &&>(),
                                         std::forward<F>(f))) {
    return and_then_impl(std::move(*this), std::forward<F>(f));
  }
#endif
#endif

#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto map(F &&f) & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto map(F &&f) && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto map(F &&f) const & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  constexpr auto map(F &&f) const && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
#else
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(
      std::declval<expected &>(), std::declval<F &&>()))
  map(F &&f) & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval<expected>(),
                                                      std::declval<F &&>()))
  map(F &&f) && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr decltype(expected_map_impl(std::declval<const expected &>(),
                                       std::declval<F &&>()))
  map(F &&f) const & {
    return expected_map_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr decltype(expected_map_impl(std::declval<const expected &&>(),
                                       std::declval<F &&>()))
  map(F &&f) const && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
#endif
#endif

#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto transform(F &&f) && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto transform(F &&f) const & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  constexpr auto transform(F &&f) const && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
#else
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(
      std::declval<expected &>(), std::declval<F &&>()))
  transform(F &&f) & {
    return expected_map_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(expected_map_impl(std::declval<expected>(),
                                                      std::declval<F &&>()))
  transform(F &&f) && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr decltype(expected_map_impl(std::declval<const expected &>(),
                                       std::declval<F &&>()))
  transform(F &&f) const & {
    return expected_map_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr decltype(expected_map_impl(std::declval<const expected &&>(),
                                       std::declval<F &&>()))
  transform(F &&f) const && {
    return expected_map_impl(std::move(*this), std::forward<F>(f));
  }
#endif
#endif

#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto map_error(F &&f) && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto map_error(F &&f) const & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  constexpr auto map_error(F &&f) const && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
#else
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &>(),
                                                   std::declval<F &&>()))
  map_error(F &&f) & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &&>(),
                                                   std::declval<F &&>()))
  map_error(F &&f) && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr decltype(map_error_impl(std::declval<const expected &>(),
                                    std::declval<F &&>()))
  map_error(F &&f) const & {
    return map_error_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr decltype(map_error_impl(std::declval<const expected &&>(),
                                    std::declval<F &&>()))
  map_error(F &&f) const && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
#endif
#endif
#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr auto transform_error(F &&f) const & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  constexpr auto transform_error(F &&f) const && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
#else
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &>(),
                                                   std::declval<F &&>()))
  transform_error(F &&f) & {
    return map_error_impl(*this, std::forward<F>(f));
  }
  template <class F>
  TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval<expected &&>(),
                                                   std::declval<F &&>()))
  transform_error(F &&f) && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
  template <class F>
  constexpr decltype(map_error_impl(std::declval<const expected &>(),
                                    std::declval<F &&>()))
  transform_error(F &&f) const & {
    return map_error_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  constexpr decltype(map_error_impl(std::declval<const expected &&>(),
                                    std::declval<F &&>()))
  transform_error(F &&f) const && {
    return map_error_impl(std::move(*this), std::forward<F>(f));
  }
#endif
#endif
  template <class F>
  expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & {
    return or_else_impl(*this, std::forward<F>(f));
  }

  template <class F>
  expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) && {
    return or_else_impl(std::move(*this), std::forward<F>(f));
  }

  template <class F>
  expected constexpr or_else(F &&f) const & {
    return or_else_impl(*this, std::forward<F>(f));
  }

#ifndef TL_EXPECTED_NO_CONSTRR
  template <class F>
  expected constexpr or_else(F &&f) const && {
    return or_else_impl(std::move(*this), std::forward<F>(f));
  }
#endif
  constexpr expected() = default;
  constexpr expected(const expected &rhs) = default;
  constexpr expected(expected &&rhs) = default;
  expected &operator=(const expected &rhs) = default;
  expected &operator=(expected &&rhs) = default;

  template <class... Args,
            detail::enable_if_t<std::is_constructible<T, Args &&...>::value> * =
                nullptr>
  constexpr expected(in_place_t, Args &&...args)
      : impl_base(in_place, std::forward<Args>(args)...),
        ctor_base(detail::default_constructor_tag{}) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr expected(in_place_t, std::initializer_list<U> il, Args &&...args)
      : impl_base(in_place, il, std::forward<Args>(args)...),
        ctor_base(detail::default_constructor_tag{}) {}

  template <class G = E,
            detail::enable_if_t<std::is_constructible<E, const G &>::value> * =
                nullptr,
            detail::enable_if_t<!std::is_convertible<const G &, E>::value> * =
                nullptr>
  explicit constexpr expected(const unexpected<G> &e)
      : impl_base(unexpect, e.value()),
        ctor_base(detail::default_constructor_tag{}) {}

  template <
      class G = E,
      detail::enable_if_t<std::is_constructible<E, const G &>::value> * =
          nullptr,
      detail::enable_if_t<std::is_convertible<const G &, E>::value> * = nullptr>
  constexpr expected(unexpected<G> const &e)
      : impl_base(unexpect, e.value()),
        ctor_base(detail::default_constructor_tag{}) {}

  template <
      class G = E,
      detail::enable_if_t<std::is_constructible<E, G &&>::value> * = nullptr,
      detail::enable_if_t<!std::is_convertible<G &&, E>::value> * = nullptr>
  explicit constexpr expected(unexpected<G> &&e) noexcept(
      std::is_nothrow_constructible<E, G &&>::value)
      : impl_base(unexpect, std::move(e.value())),
        ctor_base(detail::default_constructor_tag{}) {}

  template <
      class G = E,
      detail::enable_if_t<std::is_constructible<E, G &&>::value> * = nullptr,
      detail::enable_if_t<std::is_convertible<G &&, E>::value> * = nullptr>
  constexpr expected(unexpected<G> &&e) noexcept(
      std::is_nothrow_constructible<E, G &&>::value)
      : impl_base(unexpect, std::move(e.value())),
        ctor_base(detail::default_constructor_tag{}) {}

  template <class... Args,
            detail::enable_if_t<std::is_constructible<E, Args &&...>::value> * =
                nullptr>
  constexpr explicit expected(unexpect_t, Args &&...args)
      : impl_base(unexpect, std::forward<Args>(args)...),
        ctor_base(detail::default_constructor_tag{}) {}

  template <class U, class... Args,
            detail::enable_if_t<std::is_constructible<
                E, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  constexpr explicit expected(unexpect_t, std::initializer_list<U> il,
                              Args &&...args)
      : impl_base(unexpect, il, std::forward<Args>(args)...),
        ctor_base(detail::default_constructor_tag{}) {}

  template <class U, class G,
            detail::enable_if_t<!(std::is_convertible<U const &, T>::value &&
                                  std::is_convertible<G const &, E>::value)> * =
                nullptr,
            detail::expected_enable_from_other<T, E, U, G, const U &, const G &>
                * = nullptr>
  explicit TL_EXPECTED_11_CONSTEXPR expected(const expected<U, G> &rhs)
      : ctor_base(detail::default_constructor_tag{}) {
    if (rhs.has_value()) {
      this->construct(*rhs);
    } else {
      this->construct_error(rhs.error());
    }
  }

  template <class U, class G,
            detail::enable_if_t<(std::is_convertible<U const &, T>::value &&
                                 std::is_convertible<G const &, E>::value)> * =
                nullptr,
            detail::expected_enable_from_other<T, E, U, G, const U &, const G &>
                * = nullptr>
  TL_EXPECTED_11_CONSTEXPR expected(const expected<U, G> &rhs)
      : ctor_base(detail::default_constructor_tag{}) {
    if (rhs.has_value()) {
      this->construct(*rhs);
    } else {
      this->construct_error(rhs.error());
    }
  }

  template <
      class U, class G,
      detail::enable_if_t<!(std::is_convertible<U &&, T>::value &&
                            std::is_convertible<G &&, E>::value)> * = nullptr,
      detail::expected_enable_from_other<T, E, U, G, U &&, G &&> * = nullptr>
  explicit TL_EXPECTED_11_CONSTEXPR expected(expected<U, G> &&rhs)
      : ctor_base(detail::default_constructor_tag{}) {
    if (rhs.has_value()) {
      this->construct(std::move(*rhs));
    } else {
      this->construct_error(std::move(rhs.error()));
    }
  }

  template <
      class U, class G,
      detail::enable_if_t<(std::is_convertible<U &&, T>::value &&
                           std::is_convertible<G &&, E>::value)> * = nullptr,
      detail::expected_enable_from_other<T, E, U, G, U &&, G &&> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR expected(expected<U, G> &&rhs)
      : ctor_base(detail::default_constructor_tag{}) {
    if (rhs.has_value()) {
      this->construct(std::move(*rhs));
    } else {
      this->construct_error(std::move(rhs.error()));
    }
  }

  template <
      class U = T,
      detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr,
      detail::expected_enable_forward_value<T, E, U> * = nullptr>
  explicit TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v)
      : expected(in_place, std::forward<U>(v)) {}

  template <
      class U = T,
      detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr,
      detail::expected_enable_forward_value<T, E, U> * = nullptr>
  TL_EXPECTED_MSVC2015_CONSTEXPR expected(U &&v)
      : expected(in_place, std::forward<U>(v)) {}

  template <
      class U = T, class G = T,
      detail::enable_if_t<std::is_nothrow_constructible<T, U &&>::value> * =
          nullptr,
      detail::enable_if_t<!std::is_void<G>::value> * = nullptr,
      detail::enable_if_t<
          (!std::is_same<expected<T, E>, detail::decay_t<U>>::value &&
           !detail::conjunction<std::is_scalar<T>,
                                std::is_same<T, detail::decay_t<U>>>::value &&
           std::is_constructible<T, U>::value &&
           std::is_assignable<G &, U>::value &&
           std::is_nothrow_move_constructible<E>::value)> * = nullptr>
  expected &operator=(U &&v) {
    if (has_value()) {
      val() = std::forward<U>(v);
    } else {
      err().~unexpected<E>();
      ::new (valptr()) T(std::forward<U>(v));
      this->m_has_val = true;
    }

    return *this;
  }

  template <
      class U = T, class G = T,
      detail::enable_if_t<!std::is_nothrow_constructible<T, U &&>::value> * =
          nullptr,
      detail::enable_if_t<!std::is_void<U>::value> * = nullptr,
      detail::enable_if_t<
          (!std::is_same<expected<T, E>, detail::decay_t<U>>::value &&
           !detail::conjunction<std::is_scalar<T>,
                                std::is_same<T, detail::decay_t<U>>>::value &&
           std::is_constructible<T, U>::value &&
           std::is_assignable<G &, U>::value &&
           std::is_nothrow_move_constructible<E>::value)> * = nullptr>
  expected &operator=(U &&v) {
    if (has_value()) {
      val() = std::forward<U>(v);
    } else {
      auto tmp = std::move(err());
      err().~unexpected<E>();

#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
      try {
        ::new (valptr()) T(std::forward<U>(v));
        this->m_has_val = true;
      } catch (...) {
        err() = std::move(tmp);
        throw;
      }
#else
      ::new (valptr()) T(std::forward<U>(v));
      this->m_has_val = true;
#endif
    }

    return *this;
  }

  template <class G = E,
            detail::enable_if_t<std::is_nothrow_copy_constructible<G>::value &&
                                std::is_assignable<G &, G>::value> * = nullptr>
  expected &operator=(const unexpected<G> &rhs) {
    if (!has_value()) {
      err() = rhs;
    } else {
      this->destroy_val();
      ::new (errptr()) unexpected<E>(rhs);
      this->m_has_val = false;
    }

    return *this;
  }

  template <class G = E,
            detail::enable_if_t<std::is_nothrow_move_constructible<G>::value &&
                                std::is_move_assignable<G>::value> * = nullptr>
  expected &operator=(unexpected<G> &&rhs) noexcept {
    if (!has_value()) {
      err() = std::move(rhs);
    } else {
      this->destroy_val();
      ::new (errptr()) unexpected<E>(std::move(rhs));
      this->m_has_val = false;
    }

    return *this;
  }

  template <class... Args, detail::enable_if_t<std::is_nothrow_constructible<
                               T, Args &&...>::value> * = nullptr>
  void emplace(Args &&...args) {
    if (has_value()) {
      val().~T();
    } else {
      err().~unexpected<E>();
      this->m_has_val = true;
    }
    ::new (valptr()) T(std::forward<Args>(args)...);
  }

  template <class... Args, detail::enable_if_t<!std::is_nothrow_constructible<
                               T, Args &&...>::value> * = nullptr>
  void emplace(Args &&...args) {
    if (has_value()) {
      val().~T();
      ::new (valptr()) T(std::forward<Args>(args)...);
    } else {
      auto tmp = std::move(err());
      err().~unexpected<E>();

#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
      try {
        ::new (valptr()) T(std::forward<Args>(args)...);
        this->m_has_val = true;
      } catch (...) {
        err() = std::move(tmp);
        throw;
      }
#else
      ::new (valptr()) T(std::forward<Args>(args)...);
      this->m_has_val = true;
#endif
    }
  }

  template <class U, class... Args,
            detail::enable_if_t<std::is_nothrow_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  void emplace(std::initializer_list<U> il, Args &&...args) {
    if (has_value()) {
      T t(il, std::forward<Args>(args)...);
      val() = std::move(t);
    } else {
      err().~unexpected<E>();
      ::new (valptr()) T(il, std::forward<Args>(args)...);
      this->m_has_val = true;
    }
  }

  template <class U, class... Args,
            detail::enable_if_t<!std::is_nothrow_constructible<
                T, std::initializer_list<U> &, Args &&...>::value> * = nullptr>
  void emplace(std::initializer_list<U> il, Args &&...args) {
    if (has_value()) {
      T t(il, std::forward<Args>(args)...);
      val() = std::move(t);
    } else {
      auto tmp = std::move(err());
      err().~unexpected<E>();

#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
      try {
        ::new (valptr()) T(il, std::forward<Args>(args)...);
        this->m_has_val = true;
      } catch (...) {
        err() = std::move(tmp);
        throw;
      }
#else
      ::new (valptr()) T(il, std::forward<Args>(args)...);
      this->m_has_val = true;
#endif
    }
  }

 private:
  using t_is_void = std::true_type;
  using t_is_not_void = std::false_type;
  using t_is_nothrow_move_constructible = std::true_type;
  using move_constructing_t_can_throw = std::false_type;
  using e_is_nothrow_move_constructible = std::true_type;
  using move_constructing_e_can_throw = std::false_type;

  void swap_where_both_have_value(expected & /*rhs*/, t_is_void) noexcept {
    // swapping void is a no-op
  }

  void swap_where_both_have_value(expected &rhs, t_is_not_void) {
    using std::swap;
    swap(val(), rhs.val());
  }

  void swap_where_only_one_has_value(expected &rhs, t_is_void) noexcept(
      std::is_nothrow_move_constructible<E>::value) {
    ::new (errptr()) unexpected_type(std::move(rhs.err()));
    rhs.err().~unexpected_type();
    std::swap(this->m_has_val, rhs.m_has_val);
  }

  void swap_where_only_one_has_value(expected &rhs, t_is_not_void) {
    swap_where_only_one_has_value_and_t_is_not_void(
        rhs, typename std::is_nothrow_move_constructible<T>::type{},
        typename std::is_nothrow_move_constructible<E>::type{});
  }

  void swap_where_only_one_has_value_and_t_is_not_void(
      expected &rhs, t_is_nothrow_move_constructible,
      e_is_nothrow_move_constructible) noexcept {
    auto temp = std::move(val());
    val().~T();
    ::new (errptr()) unexpected_type(std::move(rhs.err()));
    rhs.err().~unexpected_type();
    ::new (rhs.valptr()) T(std::move(temp));
    std::swap(this->m_has_val, rhs.m_has_val);
  }

  void swap_where_only_one_has_value_and_t_is_not_void(
      expected &rhs, t_is_nothrow_move_constructible,
      move_constructing_e_can_throw) {
    auto temp = std::move(val());
    val().~T();
#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
    try {
      ::new (errptr()) unexpected_type(std::move(rhs.err()));
      rhs.err().~unexpected_type();
      ::new (rhs.valptr()) T(std::move(temp));
      std::swap(this->m_has_val, rhs.m_has_val);
    } catch (...) {
      val() = std::move(temp);
      throw;
    }
#else
    ::new (errptr()) unexpected_type(std::move(rhs.err()));
    rhs.err().~unexpected_type();
    ::new (rhs.valptr()) T(std::move(temp));
    std::swap(this->m_has_val, rhs.m_has_val);
#endif
  }

  void swap_where_only_one_has_value_and_t_is_not_void(
      expected &rhs, move_constructing_t_can_throw,
      e_is_nothrow_move_constructible) {
    auto temp = std::move(rhs.err());
    rhs.err().~unexpected_type();
#ifdef TL_EXPECTED_EXCEPTIONS_ENABLED
    try {
      ::new (rhs.valptr()) T(std::move(val()));
      val().~T();
      ::new (errptr()) unexpected_type(std::move(temp));
      std::swap(this->m_has_val, rhs.m_has_val);
    } catch (...) {
      rhs.err() = std::move(temp);
      throw;
    }
#else
    ::new (rhs.valptr()) T(std::move(val()));
    val().~T();
    ::new (errptr()) unexpected_type(std::move(temp));
    std::swap(this->m_has_val, rhs.m_has_val);
#endif
  }

 public:
  template <class OT = T, class OE = E>
  detail::enable_if_t<detail::is_swappable<OT>::value &&
                      detail::is_swappable<OE>::value &&
                      (std::is_nothrow_move_constructible<OT>::value ||
                       std::is_nothrow_move_constructible<OE>::value)>
  swap(expected &rhs) noexcept(std::is_nothrow_move_constructible<T>::value &&
                               detail::is_nothrow_swappable<T>::value &&
                               std::is_nothrow_move_constructible<E>::value &&
                               detail::is_nothrow_swappable<E>::value) {
    if (has_value() && rhs.has_value()) {
      swap_where_both_have_value(rhs, typename std::is_void<T>::type{});
    } else if (!has_value() && rhs.has_value()) {
      rhs.swap(*this);
    } else if (has_value()) {
      swap_where_only_one_has_value(rhs, typename std::is_void<T>::type{});
    } else {
      using std::swap;
      swap(err(), rhs.err());
    }
  }

  constexpr const T *operator->() const {
    TL_ASSERT(has_value());
    return valptr();
  }
  TL_EXPECTED_11_CONSTEXPR T *operator->() {
    TL_ASSERT(has_value());
    return valptr();
  }

  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  constexpr const U &operator*() const & {
    TL_ASSERT(has_value());
    return val();
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR U &operator*() & {
    TL_ASSERT(has_value());
    return val();
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  constexpr const U &&operator*() const && {
    TL_ASSERT(has_value());
    return std::move(val());
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR U &&operator*() && {
    TL_ASSERT(has_value());
    return std::move(val());
  }

  constexpr bool has_value() const noexcept {
    return this->m_has_val;
  }
  constexpr explicit operator bool() const noexcept {
    return this->m_has_val;
  }

  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR const U &value() const & {
    if (!has_value())
      detail::throw_exception(bad_expected_access<E>(err().value()));
    return val();
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR U &value() & {
    if (!has_value())
      detail::throw_exception(bad_expected_access<E>(err().value()));
    return val();
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR const U &&value() const && {
    if (!has_value())
      detail::throw_exception(bad_expected_access<E>(std::move(err()).value()));
    return std::move(val());
  }
  template <class U = T,
            detail::enable_if_t<!std::is_void<U>::value> * = nullptr>
  TL_EXPECTED_11_CONSTEXPR U &&value() && {
    if (!has_value())
      detail::throw_exception(bad_expected_access<E>(std::move(err()).value()));
    return std::move(val());
  }

  constexpr const E &error() const & {
    TL_ASSERT(!has_value());
    return err().value();
  }
  TL_EXPECTED_11_CONSTEXPR E &error() & {
    TL_ASSERT(!has_value());
    return err().value();
  }
  constexpr const E &&error() const && {
    TL_ASSERT(!has_value());
    return std::move(err().value());
  }
  TL_EXPECTED_11_CONSTEXPR E &&error() && {
    TL_ASSERT(!has_value());
    return std::move(err().value());
  }

  template <class U>
  constexpr T value_or(U &&v) const & {
    static_assert(std::is_copy_constructible<T>::value &&
                      std::is_convertible<U &&, T>::value,
                  "T must be copy-constructible and convertible to from U&&");
    return bool(*this) ? **this : static_cast<T>(std::forward<U>(v));
  }
  template <class U>
  TL_EXPECTED_11_CONSTEXPR T value_or(U &&v) && {
    static_assert(std::is_move_constructible<T>::value &&
                      std::is_convertible<U &&, T>::value,
                  "T must be move-constructible and convertible to from U&&");
    return bool(*this) ? std::move(**this) : static_cast<T>(std::forward<U>(v));
  }
};

namespace detail {
template <class Exp>
using exp_t = typename detail::decay_t<Exp>::value_type;
template <class Exp>
using err_t = typename detail::decay_t<Exp>::error_type;
template <class Exp, class Ret>
using ret_t = expected<Ret, err_t<Exp>>;

#ifdef TL_EXPECTED_CXX14
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>()))>
constexpr auto and_then_impl(Exp &&exp, F &&f) {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");

  return exp.has_value()
             ? detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp))
             : Ret(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>()))>
constexpr auto and_then_impl(Exp &&exp, F &&f) {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");

  return exp.has_value() ? detail::invoke(std::forward<F>(f))
                         : Ret(unexpect, std::forward<Exp>(exp).error());
}
#else
template <class>
struct TC;
template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>())),
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr>
auto and_then_impl(Exp &&exp, F &&f) -> Ret {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");

  return exp.has_value()
             ? detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp))
             : Ret(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>())),
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr>
constexpr auto and_then_impl(Exp &&exp, F &&f) -> Ret {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");

  return exp.has_value() ? detail::invoke(std::forward<F>(f))
                         : Ret(unexpect, std::forward<Exp>(exp).error());
}
#endif

#ifdef TL_EXPECTED_CXX14
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto expected_map_impl(Exp &&exp, F &&f) {
  using result = ret_t<Exp, detail::decay_t<Ret>>;
  return exp.has_value() ? result(detail::invoke(std::forward<F>(f),
                                                 *std::forward<Exp>(exp)))
                         : result(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto expected_map_impl(Exp &&exp, F &&f) {
  using result = expected<void, err_t<Exp>>;
  if (exp.has_value()) {
    detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp));
    return result();
  }

  return result(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto expected_map_impl(Exp &&exp, F &&f) {
  using result = ret_t<Exp, detail::decay_t<Ret>>;
  return exp.has_value() ? result(detail::invoke(std::forward<F>(f)))
                         : result(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto expected_map_impl(Exp &&exp, F &&f) {
  using result = expected<void, err_t<Exp>>;
  if (exp.has_value()) {
    detail::invoke(std::forward<F>(f));
    return result();
  }

  return result(unexpect, std::forward<Exp>(exp).error());
}
#else
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>

constexpr auto expected_map_impl(Exp &&exp,
                                 F &&f) -> ret_t<Exp, detail::decay_t<Ret>> {
  using result = ret_t<Exp, detail::decay_t<Ret>>;

  return exp.has_value() ? result(detail::invoke(std::forward<F>(f),
                                                 *std::forward<Exp>(exp)))
                         : result(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Exp>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>

auto expected_map_impl(Exp &&exp, F &&f) -> expected<void, err_t<Exp>> {
  if (exp.has_value()) {
    detail::invoke(std::forward<F>(f), *std::forward<Exp>(exp));
    return {};
  }

  return unexpected<err_t<Exp>>(std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>

constexpr auto expected_map_impl(Exp &&exp,
                                 F &&f) -> ret_t<Exp, detail::decay_t<Ret>> {
  using result = ret_t<Exp, detail::decay_t<Ret>>;

  return exp.has_value() ? result(detail::invoke(std::forward<F>(f)))
                         : result(unexpect, std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>

auto expected_map_impl(Exp &&exp, F &&f) -> expected<void, err_t<Exp>> {
  if (exp.has_value()) {
    detail::invoke(std::forward<F>(f));
    return {};
  }

  return unexpected<err_t<Exp>>(std::forward<Exp>(exp).error());
}
#endif

#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \
    !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55)
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto map_error_impl(Exp &&exp, F &&f) {
  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;
  return exp.has_value()
             ? result(*std::forward<Exp>(exp))
             : result(unexpect, detail::invoke(std::forward<F>(f),
                                               std::forward<Exp>(exp).error()));
}
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto map_error_impl(Exp &&exp, F &&f) {
  using result = expected<exp_t<Exp>, monostate>;
  if (exp.has_value()) {
    return result(*std::forward<Exp>(exp));
  }

  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());
  return result(unexpect, monostate{});
}
template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto map_error_impl(Exp &&exp, F &&f) {
  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;
  return exp.has_value()
             ? result()
             : result(unexpect, detail::invoke(std::forward<F>(f),
                                               std::forward<Exp>(exp).error()));
}
template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto map_error_impl(Exp &&exp, F &&f) {
  using result = expected<exp_t<Exp>, monostate>;
  if (exp.has_value()) {
    return result();
  }

  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());
  return result(unexpect, monostate{});
}
#else
template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto map_error_impl(Exp &&exp, F &&f)
    -> expected<exp_t<Exp>, detail::decay_t<Ret>> {
  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;

  return exp.has_value()
             ? result(*std::forward<Exp>(exp))
             : result(unexpect, detail::invoke(std::forward<F>(f),
                                               std::forward<Exp>(exp).error()));
}

template <class Exp, class F,
          detail::enable_if_t<!std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto map_error_impl(Exp &&exp, F &&f) -> expected<exp_t<Exp>, monostate> {
  using result = expected<exp_t<Exp>, monostate>;
  if (exp.has_value()) {
    return result(*std::forward<Exp>(exp));
  }

  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());
  return result(unexpect, monostate{});
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto map_error_impl(Exp &&exp, F &&f)
    -> expected<exp_t<Exp>, detail::decay_t<Ret>> {
  using result = expected<exp_t<Exp>, detail::decay_t<Ret>>;

  return exp.has_value()
             ? result()
             : result(unexpect, detail::invoke(std::forward<F>(f),
                                               std::forward<Exp>(exp).error()));
}

template <class Exp, class F,
          detail::enable_if_t<std::is_void<exp_t<Exp>>::value> * = nullptr,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto map_error_impl(Exp &&exp, F &&f) -> expected<exp_t<Exp>, monostate> {
  using result = expected<exp_t<Exp>, monostate>;
  if (exp.has_value()) {
    return result();
  }

  detail::invoke(std::forward<F>(f), std::forward<Exp>(exp).error());
  return result(unexpect, monostate{});
}
#endif

#ifdef TL_EXPECTED_CXX14
template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto or_else_impl(Exp &&exp, F &&f) {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");
  return exp.has_value() ? std::forward<Exp>(exp)
                         : detail::invoke(std::forward<F>(f),
                                          std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
detail::decay_t<Exp> or_else_impl(Exp &&exp, F &&f) {
  return exp.has_value() ? std::forward<Exp>(exp)
                         : (detail::invoke(std::forward<F>(f),
                                           std::forward<Exp>(exp).error()),
                            std::forward<Exp>(exp));
}
#else
template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
auto or_else_impl(Exp &&exp, F &&f) -> Ret {
  static_assert(detail::is_expected<Ret>::value, "F must return an expected");
  return exp.has_value() ? std::forward<Exp>(exp)
                         : detail::invoke(std::forward<F>(f),
                                          std::forward<Exp>(exp).error());
}

template <class Exp, class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              std::declval<Exp>().error())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
detail::decay_t<Exp> or_else_impl(Exp &&exp, F &&f) {
  return exp.has_value() ? std::forward<Exp>(exp)
                         : (detail::invoke(std::forward<F>(f),
                                           std::forward<Exp>(exp).error()),
                            std::forward<Exp>(exp));
}
#endif
}  // namespace detail

template <class T, class E, class U, class F>
constexpr bool operator==(const expected<T, E> &lhs,
                          const expected<U, F> &rhs) {
  return (lhs.has_value() != rhs.has_value())
             ? false
             : (!lhs.has_value() ? lhs.error() == rhs.error() : *lhs == *rhs);
}
template <class T, class E, class U, class F>
constexpr bool operator!=(const expected<T, E> &lhs,
                          const expected<U, F> &rhs) {
  return (lhs.has_value() != rhs.has_value())
             ? true
             : (!lhs.has_value() ? lhs.error() != rhs.error() : *lhs != *rhs);
}
template <class E, class F>
constexpr bool operator==(const expected<void, E> &lhs,
                          const expected<void, F> &rhs) {
  return (lhs.has_value() != rhs.has_value())
             ? false
             : (!lhs.has_value() ? lhs.error() == rhs.error() : true);
}
template <class E, class F>
constexpr bool operator!=(const expected<void, E> &lhs,
                          const expected<void, F> &rhs) {
  return (lhs.has_value() != rhs.has_value())
             ? true
             : (!lhs.has_value() ? lhs.error() != rhs.error() : false);
}

template <class T, class E, class U>
constexpr bool operator==(const expected<T, E> &x, const U &v) {
  return x.has_value() ? *x == v : false;
}
template <class T, class E, class U>
constexpr bool operator==(const U &v, const expected<T, E> &x) {
  return x.has_value() ? *x == v : false;
}
template <class T, class E, class U>
constexpr bool operator!=(const expected<T, E> &x, const U &v) {
  return x.has_value() ? *x != v : true;
}
template <class T, class E, class U>
constexpr bool operator!=(const U &v, const expected<T, E> &x) {
  return x.has_value() ? *x != v : true;
}

template <class T, class E>
constexpr bool operator==(const expected<T, E> &x, const unexpected<E> &e) {
  return x.has_value() ? false : x.error() == e.value();
}
template <class T, class E>
constexpr bool operator==(const unexpected<E> &e, const expected<T, E> &x) {
  return x.has_value() ? false : x.error() == e.value();
}
template <class T, class E>
constexpr bool operator!=(const expected<T, E> &x, const unexpected<E> &e) {
  return x.has_value() ? true : x.error() != e.value();
}
template <class T, class E>
constexpr bool operator!=(const unexpected<E> &e, const expected<T, E> &x) {
  return x.has_value() ? true : x.error() != e.value();
}

template <class T, class E,
          detail::enable_if_t<(std::is_void<T>::value ||
                               std::is_move_constructible<T>::value) &&
                              detail::is_swappable<T>::value &&
                              std::is_move_constructible<E>::value &&
                              detail::is_swappable<E>::value> * = nullptr>
void swap(expected<T, E> &lhs,
          expected<T, E> &rhs) noexcept(noexcept(lhs.swap(rhs))) {
  lhs.swap(rhs);
}
}  // namespace tl

#endif


================================================
FILE: src/include/zvec/ailego/pattern/factory.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstring>
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <vector>

namespace zvec {
namespace ailego {

/*! Factory
 */
template <typename TBase>
class Factory {
 public:
  /*! Factory Register
   */
  template <typename TImpl, typename = typename std::enable_if<
                                std::is_base_of<TBase, TImpl>::value>::type>
  class Register {
   public:
    //! Constructor
    Register(const char *key) {
      Factory::Instance()->set(key, [] { return Register::Construct(); });
    }

    //! Constructor
    template <typename... TArgs>
    Register(const char *key, TArgs &&...args) {
      std::tuple<TArgs...> tuple(std::forward<TArgs>(args)...);

      Factory::Instance()->set(key, [tuple] {
        return Register::Construct(
            tuple, typename TupleIndexMaker<sizeof...(TArgs)>::Type());
      });
    }

   protected:
    //! Tuple Index Maker
    template <size_t N, size_t... I>
    struct TupleIndexMaker : TupleIndexMaker<N - 1, N - 1, I...> {};

    //! Tuple Index
    template <size_t...>
    struct TupleIndex {};

    //! Tuple Index Maker (special)
    template <size_t... I>
    struct TupleIndexMaker<0, I...> {
      typedef TupleIndex<I...> Type;
    };

    //! Construct a register object
    template <typename... TArgs, size_t... I>
    static TImpl *Construct(const std::tuple<TArgs...> &tuple,
                            TupleIndex<I...>) {
      return new (std::nothrow) TImpl(std::get<I>(tuple)...);
    }

    //! Construct a register object
    static TImpl *Construct(void) {
      return new (std::nothrow) TImpl();
    }
  };

  //! Produce an instance (c_ptr)
  static TBase *Make(const char *key) {
    return Factory::Instance()->produce(key);
  }

  //! Produce an instance (shared_ptr)
  static std::shared_ptr<TBase> MakeShared(const char *key) {
    return std::shared_ptr<TBase>(Factory::Make(key));
  }

  //! Produce an instance (unique_ptr)
  static std::unique_ptr<TBase> MakeUnique(const char *key) {
    return std::unique_ptr<TBase>(Factory::Make(key));
  }

  //! Test if the class is exist
  static bool Has(const char *key) {
    return Factory::Instance()->has(key);
  }

  //! Retrieve classes in factory
  static std::vector<std::string> Classes(void) {
    return Factory::Instance()->classes();
  }

 protected:
  //! Constructor
  Factory(void) : map_() {}

  //! Retrieve the singleton factory
  static Factory *Instance(void) {
    static Factory factory;
    return (&factory);
  }

  //! Inserts a new class into map
  template <typename TFunc>
  void set(const char *key, TFunc &&func) {
    map_[key] = std::forward<TFunc>(func);
  }

  //! Produce an instance
  TBase *produce(const char *key) {
    auto iter = map_.find(key);
    if (iter != map_.end()) {
      return iter->second();
    }
    return nullptr;
  }

  //! Test if the class is exist
  bool has(const char *key) {
    return (map_.find(key) != map_.end());
  }

  //! Retrieve classes in factory
  std::vector<std::string> classes(void) const {
    std::vector<std::string> vec;
    for (const auto &it : map_) {
      vec.push_back(std::string(it.first));
    }
    return vec;
  }

 private:
  //! Disable them
  Factory(const Factory &);
  Factory(Factory &&);
  Factory &operator=(const Factory &);

  /*! Key Comparer
   */
  struct KeyComparer {
    bool operator()(const char *lhs, const char *rhs) const {
      return (std::strcmp(lhs, rhs) < 0);
    }
  };

  //! Don't use variable buffer as key store.
  //! The key must be use a static buffer to store.
  std::map<const char *, std::function<TBase *()>, KeyComparer> map_;
};

//! Factory Register
#define AILEGO_FACTORY_REGISTER(__NAME__, __BASE__, __IMPL__, ...) \
  static ailego::Factory<__BASE__>::Register<__IMPL__>             \
      __ailegoFactoryRegister_##__NAME__(#__NAME__, ##__VA_ARGS__)

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/pattern/singleton.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <type_traits>

namespace zvec {
namespace ailego {

/*! Singleton (C++11)
 */
template <typename T>
class Singleton {
 public:
  using ObjectType = typename std::remove_reference<T>::type;

  //! Retrieve instance of object
  static ObjectType &Instance(void) noexcept(
      std::is_nothrow_constructible<ObjectType>::value) {
    // Since it's a static variable, if the class has already been created,
    // it won't be created again. And it is thread-safe in C++11.
    static ObjectType obj;
    return obj;
  }

 protected:
  //! Constructor (Allow inheritance)
  Singleton(void) {}

 private:
  //! Disable them
  Singleton(Singleton const &) = delete;
  Singleton(Singleton &&) = delete;
  Singleton &operator=(Singleton const &) = delete;
  Singleton &operator=(Singleton &&) = delete;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/string/string_concat_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdlib>
#include <vector>
#include <zvec/ailego/string/string_view.h>

namespace zvec {
namespace ailego {
namespace internal {

//! Helper class to convert integer and float types to string, facilitating
//! string concatenation memory allocation.
class Alphameric {
 public:
  //! Deals with int, int8_t, int16_t, int32_t, bool, short, signed char, non
  //! class enum
  Alphameric(int n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%d", n)) {}

  //! Deals with unsigned int, uint8_t, uint16_t, uint32_t, unsigned short,
  //! unsigned char
  Alphameric(unsigned int n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%u", n)) {}

  //! Deals with long, int32_t, int64_t
  Alphameric(long n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%ld", n)) {}

  //! Deals with unsigned long, uint32_t, uint64_t
  Alphameric(unsigned long n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%lu", n)) {}

  //! Deals with long long, int64_t
  Alphameric(long long n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%lld", n)) {}

  //! Deals with unsigned long long, uint64_t
  Alphameric(unsigned long long n)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%llu", n)) {}

  //! Deals with float, with 6 precision digit the same as std::to_string
  Alphameric(float f)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%g", f)) {}

  //! Deals with double, with 6 precision digit the same as std::to_string
  Alphameric(double f)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%g", f)) {}

  //! Deals with long double, with 6 precision digit the same as std::to_string
  Alphameric(long double f)
      : view_(buffer_, snprintf(buffer_, sizeof(buffer_), "%Lg", f)) {}

  //! Deals with const char*
  Alphameric(const char *s) : view_(s) {}

  //! Deals with std::string
  Alphameric(const std::string &s) : view_(s) {}

  //! Deals with StringView
  Alphameric(StringView s) : view_(s) {}

  // Use string literals ":" instead of character literals ':'.
  Alphameric(char c) = delete;
  Alphameric(const Alphameric &) = delete;
  Alphameric &operator=(const Alphameric &) = delete;

  //! Deals with enum class with non int underlying type
  template <typename T,
            typename = typename std::enable_if<
                std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
  Alphameric(T e)
      : Alphameric(static_cast<typename std::underlying_type<T>::type>(e)) {}

  //! Deals with std::vector<bool> subscript reference
  template <typename T,
            typename std::enable_if<
                std::is_class<T>::value &&
                (std::is_same<T, std::vector<bool>::reference>::value ||
                 std::is_same<T, std::vector<bool>::const_reference>::value)>::
                type * = nullptr>
  Alphameric(T e) : Alphameric(static_cast<bool>(e)) {}

  //! string size
  size_t size() const {
    return view_.size();
  }

  //! string data
  const char *data() const {
    return view_.data();
  }

  //! string view
  StringView view() const {
    return view_;
  }

 private:
  static constexpr int kBufferSize = 32;
  char buffer_[kBufferSize];
  StringView view_;
};

}  // namespace internal
}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/string/string_view.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstring>
#include <string>

namespace zvec {
namespace ailego {

//! StringView provides a lightweight view into the string data provided by
//! a `std::string`, double-quoted string literal, character array, or even
//! another `StringView`.
//!
//! A `StringView` does *not* own the string to which it
//! points, and that data cannot be modified through the view.
class StringView {
 public:
  //! Default constructor
  StringView() = default;

  //! Construct from c-string
  StringView(const char *str)
      : data_(str), size_(str != nullptr ? strlen(str) : 0) {}

  //! Construct from [str, str+s)
  StringView(const char *str, size_t len) : data_(str), size_(len) {}

  //! Construct from std::string
  StringView(const std::string &str) : data_(str.data()), size_(str.size()) {}

  //! Retrieve data of string
  const char *data() const {
    return data_;
  }

  //! Retrieve size of string
  size_t size() const {
    return size_;
  }

  //! Retrieve non-zero if it is empty
  bool empty() const {
    return size_ == 0;
  }

 private:
  const char *data_{nullptr};
  size_t size_{0};
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/utility/file_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstring>
#include <string>

namespace zvec {
namespace ailego {

/*! File Helper Module
 */
struct FileHelper {
#if defined(_WIN32) || defined(_WIN64)
  //! Native Handle in Windows
  typedef void *NativeHandle;
#else
  //! Native Handle in POSIX
  typedef int NativeHandle;
#endif

  //! Invalid Handle
  static constexpr NativeHandle InvalidHandle = (NativeHandle)(-1);

  //! Retrieve the path of self process
  static bool GetSelfPath(std::string *path);

  //! Retrieve the final path for the specified file
  static bool GetFilePath(NativeHandle handle, std::string *path);

  //! Retrieve current working directory
  static bool GetWorkingDirectory(std::string *path);

  //! Get the size of a file
  static bool GetFileSize(const char *path, size_t *psz);

  //! Delete a name and possibly the file it refers to
  static bool DeleteFile(const char *path);

  //! Change the name or location of a file
  static bool RenameFile(const char *oldpath, const char *newpath);

  //! Make directories' path
  static bool MakePath(const char *path);

  //! Remove a file or a directory (includes files & subdirectories)
  static bool RemovePath(const char *path);

  //! Remove a directory (includes files & subdirectories)
  static bool RemoveDirectory(const char *path);

  //! Retrieve non-zero if the path exists
  static bool IsExist(const char *path);

  //! Retrieve non-zero if the path is a regular file
  static bool IsRegular(const char *path);

  //! Retrieve non-zero if the path is a directory
  static bool IsDirectory(const char *path);

  //! Retrieve non-zero if the path is a symbolic link
  static bool IsSymbolicLink(const char *path);

  //! Retrieve non-zero if two paths are pointing to the same file
  static bool IsSame(const char *path1, const char *path2);

  //! Retrieve the size of a file
  static size_t FileSize(const char *path) {
    size_t file_size = 0;
    GetFileSize(path, &file_size);
    return file_size;
  }

  //! Retrieve the base name from a path
  static const char *BaseName(const char *path) {
    const char *output = std::strrchr(path, '/');
    if (!output) {
      output = std::strrchr(path, '\\');
    }
    return (output ? output + 1 : path);
  }
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/utility/float_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstddef>
#include <cstdint>

namespace zvec {
namespace ailego {

/*! Float Helper
 */
struct FloatHelper {
  //! Convert FP16 to FP32
  static float ToFP32(uint16_t val);

  //! Convert FP16 to FP32 (array)
  static void ToFP32(const uint16_t *arr, size_t size, float *out);

  //! Convert FP16 to FP32 with normalization (array)
  static void ToFP32(const uint16_t *arr, size_t size, float norm, float *out);

  //! Convert FP32 to FP16
  static uint16_t ToFP16(float val);

  //! Convert FP32 to FP16 (array)
  static void ToFP16(const float *arr, size_t size, uint16_t *out);

  //! Convert FP32 to FP16 with normalization (array)
  static void ToFP16(const float *arr, size_t size, float norm, uint16_t *out);

  //! Convert FP16 to FP32 with normalization
  static inline float ToFP32(uint16_t val, float norm) {
    return (FloatHelper::ToFP32(val) / norm);
  }

  //! Convert FP32 to FP16 with normalization
  static inline uint16_t ToFP16(float val, float norm) {
    return FloatHelper::ToFP16(val / norm);
  }
};

#if !defined(__aarch64__)
/*! Half-Precision Floating Point
 */
class Float16 {
 public:
  //! Constructor
  Float16(void) : value_(0) {}

  //! Constructor
  Float16(float val) : value_(FloatHelper::ToFP16(val)) {}

  //! Constructor
  Float16(double val) : value_(FloatHelper::ToFP16(static_cast<float>(val))) {}

  //! Assigment
  Float16 &operator=(float val) {
    this->value_ = FloatHelper::ToFP16(val);
    return *this;
  }

  //! Assigment
  Float16 &operator+=(float val) {
    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) + val);
    return *this;
  }

  //! Assigment
  Float16 &operator-=(float val) {
    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) - val);
    return *this;
  }

  //! Assigment
  Float16 &operator*=(float val) {
    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) * val);
    return *this;
  }

  //! Assigment
  Float16 &operator/=(float val) {
    this->value_ = FloatHelper::ToFP16(FloatHelper::ToFP32(this->value_) / val);
    return *this;
  }

  //! Retrieve value in FP32
  operator float() const {
    return FloatHelper::ToFP32(this->value_);
  }

  //! Equal operator
  bool operator==(const Float16 &rhs) const {
    return this->value_ == rhs.value_;
  }

  //! No equal operator
  bool operator!=(const Float16 &rhs) const {
    return this->value_ != rhs.value_;
  }

  //! Less than operator
  bool operator<(const Float16 &rhs) const {
    return FloatHelper::ToFP32(this->value_) < FloatHelper::ToFP32(rhs.value_);
  }

  //! Less than or equal operator
  bool operator<=(const Float16 &rhs) const {
    return FloatHelper::ToFP32(this->value_) <= FloatHelper::ToFP32(rhs.value_);
  }

  //! Greater than operator
  bool operator>(const Float16 &rhs) const {
    return FloatHelper::ToFP32(this->value_) > FloatHelper::ToFP32(rhs.value_);
  }

  //! Greater than or equal operator
  bool operator>=(const Float16 &rhs) const {
    return FloatHelper::ToFP32(this->value_) >= FloatHelper::ToFP32(rhs.value_);
  }

  //! Calculate the absolute value
  static inline Float16 Absolute(const Float16 &x) {
    Float16 abs;
    abs.value_ = static_cast<uint16_t>(x.value_ & 0x7fff);
    return abs;
  }

 private:
  uint16_t value_;
};
#else
/*! Half-Precision Floating Point
 */
class Float16 {
 public:
  //! Constructor
  Float16(void) : value_(0) {}

  //! Constructor
  Float16(__fp16 val) : value_(val) {}

  //! Assigment
  Float16 &operator=(__fp16 val) {
    this->value_ = val;
    return *this;
  }

  //! Assigment
  Float16 &operator+=(__fp16 val) {
    this->value_ = this->value_ + val;
    return *this;
  }

  //! Assigment
  Float16 &operator-=(__fp16 val) {
    this->value_ = this->value_ - val;
    return *this;
  }

  //! Assigment
  Float16 &operator*=(__fp16 val) {
    this->value_ = this->value_ * val;
    return *this;
  }

  //! Assigment
  Float16 &operator/=(__fp16 val) {
    this->value_ = this->value_ / val;
    return *this;
  }

  //! Retrieve value in FP16
  operator __fp16() const {
    return this->value_;
  }

  //! Equal operator
  bool operator==(const Float16 &rhs) const {
    return this->value_ == rhs.value_;
  }

  //! No equal operator
  bool operator!=(const Float16 &rhs) const {
    return this->value_ != rhs.value_;
  }

  //! Less than operator
  bool operator<(const Float16 &rhs) const {
    return this->value_ < rhs.value_;
  }

  //! Less than or equal operator
  bool operator<=(const Float16 &rhs) const {
    return this->value_ <= rhs.value_;
  }

  //! Greater than operator
  bool operator>(const Float16 &rhs) const {
    return this->value_ > rhs.value_;
  }

  //! Greater than or equal operator
  bool operator>=(const Float16 &rhs) const {
    return this->value_ >= rhs.value_;
  }

  //! Calculate the absolute value
  static inline Float16 Absolute(const Float16 &x) {
    Float16 abs(x.value_);
    uint16_t *p = reinterpret_cast<uint16_t *>(&abs.value_);
    *p &= 0x7fff;
    return abs;
  }

 private:
  __fp16 value_;
};
#endif

// Check size of Float16
static_assert(sizeof(Float16) == 2, "Float16 must be aligned with 2 bytes");

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/utility/string_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <string>
#include <vector>
#include <zvec/ailego/string/string_concat_helper.h>
#include <zvec/ailego/utility/string_helper_impl.h>

namespace zvec {
namespace ailego {

/*! String Helper
 */
struct StringHelper {
  //! Return true if the `ref` starts with the given prefix
  static bool StartsWith(const std::string &ref, const std::string &prefix);

  //! Return true if the `ref` ends with the given suffix
  static bool EndsWith(const std::string &ref, const std::string &suffix);

  //! Split a string into a vector of T
  //! NOTE: delim better NOT contain valid symbol for T,
  //!       i.e. digits + - for integers,
  //!            digits + - E e . for floating numbers
  //!       otherwise there will be performance overhead.
  template <typename T>
  static void Split(const std::string &str, char delim, std::vector<T> *out) {
    return details::SplitImpl<char, T>(str, delim, out);
  }
  template <typename T>
  static void Split(const std::string &str, const char *delim,
                    std::vector<T> *out) {
    return details::SplitImpl<const char *, T>(str, delim, out);
  }
  template <typename T>
  static void Split(const std::string &str, const std::string &delim,
                    std::vector<T> *out) {
    return details::SplitImpl<const std::string &, T>(str, delim, out);
  }

  template <typename T>
  static void Split(const std::string &str, char delim, std::vector<T> *out,
                    bool skip_empty) {
    return details::SplitImpl<char, T>(str, delim, out, skip_empty);
  }
  template <typename T>
  static void Split(const std::string &str, const char *delim,
                    std::vector<T> *out, bool skip_empty) {
    return details::SplitImpl<const char *, T>(str, delim, out, skip_empty);
  }
  template <typename T>
  static void Split(const std::string &str, const std::string &delim,
                    std::vector<T> *out, bool skip_empty) {
    return details::SplitImpl<const std::string &, T>(str, delim, out,
                                                      skip_empty);
  }

  // Trim from start (in place)
  static void LeftTrim(std::string &str);

  // Trim from end (in place)
  static void RightTrim(std::string &str);

  // Trim from both ends (in place)
  static void Trim(std::string &str);

  // Trim from start (copying)
  static std::string CopyLeftTrim(std::string str);

  // Trim from end (copying)
  static std::string CopyRightTrim(std::string str);

  // Trim from both ends (copying)
  static std::string CopyTrim(std::string str);

  //! Compare ignore case
  static bool CompareIgnoreCase(const std::string &a, const std::string &b);

  //! Convert string to floating-point number (double)
  static bool ToDouble(const std::string &str, double *val) {
    char *endptr = nullptr;
    *val = std::strtod(str.c_str(), &endptr);
    return (endptr && *endptr == '\0');
  }

  //! Convert string to floating-point number (float)
  static bool ToFloat(const std::string &str, float *val) {
    char *endptr = nullptr;
    *val = std::strtof(str.c_str(), &endptr);
    return (endptr && *endptr == '\0');
  }

  //! Convert string to integer number (int8_t)
  static bool ToInt8(const std::string &str, int8_t *val) {
    char *endptr = nullptr;
    *val = static_cast<int8_t>(std::strtol(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to integer number (int16_t)
  static bool ToInt16(const std::string &str, int16_t *val) {
    char *endptr = nullptr;
    *val = static_cast<int16_t>(std::strtol(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to integer number (int32_t)
  static bool ToInt32(const std::string &str, int32_t *val) {
    char *endptr = nullptr;
    *val = static_cast<int32_t>(std::strtol(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to integer number (int64_t)
  static bool ToInt64(const std::string &str, int64_t *val) {
    char *endptr = nullptr;
    *val = static_cast<int64_t>(std::strtoll(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to unsigned integer number (uint8_t)
  static bool ToUint8(const std::string &str, uint8_t *val) {
    char *endptr = nullptr;
    *val = static_cast<uint8_t>(std::strtoul(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to unsigned integer number (uint16_t)
  static bool ToUint16(const std::string &str, uint16_t *val) {
    char *endptr = nullptr;
    *val = static_cast<uint16_t>(std::strtoul(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to unsigned integer number (uint32_t)
  static bool ToUint32(const std::string &str, uint32_t *val) {
    char *endptr = nullptr;
    *val = static_cast<uint32_t>(std::strtoul(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert string to unsigned integer number (uint64_t)
  static bool ToUint64(const std::string &str, uint64_t *val) {
    char *endptr = nullptr;
    *val = static_cast<uint64_t>(std::strtoull(str.c_str(), &endptr, 0));
    return (endptr && *endptr == '\0');
  }

  //! Convert floating-point number string (double)
  static std::string ToString(double val) {
    return std::to_string(val);
  }

  //! Convert floating-point number string (float)
  static std::string ToString(float val) {
    return std::to_string(val);
  }

  //! Convert integer number to string (int8_t)
  static std::string ToString(int8_t val) {
    return std::to_string(val);
  }

  //! Convert integer number to string (int16_t)
  static std::string ToString(int16_t val) {
    return std::to_string(val);
  }

  //! Convert integer number to string (int32_t)
  static std::string ToString(int32_t val) {
    return std::to_string(val);
  }

  //! Convert integer number to string (int64_t)
  static std::string ToString(int64_t val) {
    return std::to_string(val);
  }

  //! Convert unsigned integer number to string (uint8_t)
  static std::string ToString(uint8_t val) {
    return std::to_string(val);
  }

  //! Convert unsigned integer number to string (uint16_t)
  static std::string ToString(uint16_t val) {
    return std::to_string(val);
  }

  //! Convert unsigned integer number to string (uint32_t)
  static std::string ToString(uint32_t val) {
    return std::to_string(val);
  }

  //! Convert unsigned integer number to string (uint64_t)
  static std::string ToString(uint64_t val) {
    return std::to_string(val);
  }

  //! Concatenation of arbitrary number of std::string, c-string, integers,
  //! floating point numbers with one memory allocation.
  //! E.g. auto s = Concat("foo", 123, std::string("bar"), 3.14159);
  //!
  //! Do not do the following, use Append instead
  //! str = Concat(str, ...);
  //! str.append(Concat(str, ...));
  //! str += Concat(str, ...);
  //!
  //! NOTE: char literal(e.g. ':') is not allowed,
  //! use string literal(e.g. ":") instead.
  static std::string Concat() {
    return {};
  }
  static std::string Concat(const internal::Alphameric &a);
  static std::string Concat(const internal::Alphameric &a,
                            const internal::Alphameric &b);
  static std::string Concat(const internal::Alphameric &a,
                            const internal::Alphameric &b,
                            const internal::Alphameric &c);
  static std::string Concat(const internal::Alphameric &a,
                            const internal::Alphameric &b,
                            const internal::Alphameric &c,
                            const internal::Alphameric &d);
  // Support 5 or more arguments
  template <typename... T>
  static std::string Concat(const internal::Alphameric &a,
                            const internal::Alphameric &b,
                            const internal::Alphameric &c,
                            const internal::Alphameric &d,
                            const internal::Alphameric &e, const T &...args) {
    std::string result;
    Append(&result, a, b, c, d, e, args...);
    return result;
  }

  //! Append arbitrary number of std::string, c-string, integers,
  //! floating point numbers to existing string with one memory allocation.
  //! E.g. Append(&str, "foo", 123, std::string("bar"), 3.14159);
  //!
  //! WARNING: Append requires that none of the arguments be a reference to
  //! destination str.
  //!
  //! Do not do the following
  //! std::string s = "foo";
  //! Append(&s, s);
  //!
  //! NOTE: char literal(e.g. ':') is not allowed,
  //! use string literal(e.g. ":") instead.
  static void Append(std::string *) {}
  static void Append(std::string *str, const internal::Alphameric &a);
  static void Append(std::string *str, const internal::Alphameric &a,
                     const internal::Alphameric &b);
  static void Append(std::string *str, const internal::Alphameric &a,
                     const internal::Alphameric &b,
                     const internal::Alphameric &c);
  static void Append(std::string *str, const internal::Alphameric &a,
                     const internal::Alphameric &b,
                     const internal::Alphameric &c,
                     const internal::Alphameric &d);
  // Support 5 or more arguments
  template <typename... T>
  static void Append(std::string *str, const internal::Alphameric &a,
                     const internal::Alphameric &b,
                     const internal::Alphameric &c,
                     const internal::Alphameric &d,
                     const internal::Alphameric &e, const T &...args) {
    AppendViews(str,
                {a.view(), b.view(), c.view(), d.view(), e.view(),
                 static_cast<const internal::Alphameric &>(args).view()...});
  }

  //! Append list of StringView to str.
  static void AppendViews(std::string *str,
                          std::initializer_list<StringView> views);
};

inline std::string StringHelper::Concat(const internal::Alphameric &a) {
  std::string result;
  Append(&result, a);
  return result;
}

inline std::string StringHelper::Concat(const internal::Alphameric &a,
                                        const internal::Alphameric &b) {
  std::string result;
  Append(&result, a, b);
  return result;
}

inline std::string StringHelper::Concat(const internal::Alphameric &a,
                                        const internal::Alphameric &b,
                                        const internal::Alphameric &c) {
  std::string result;
  Append(&result, a, b, c);
  return result;
}

inline std::string StringHelper::Concat(const internal::Alphameric &a,
                                        const internal::Alphameric &b,
                                        const internal::Alphameric &c,
                                        const internal::Alphameric &d) {
  std::string result;
  Append(&result, a, b, c, d);
  return result;
}

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/utility/string_helper_impl.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstring>
#include <string>
#include <vector>

namespace zvec {
namespace ailego {
namespace details {

//! Convert string to integers or floating point numbers
template <typename T>
static T CStringToType(const char *begin, char **endptr) {
  static_assert(
      std::is_same<T, int32_t>::value || std::is_same<T, int16_t>::value ||
          std::is_same<T, int8_t>::value || std::is_same<T, int64_t>::value ||
          std::is_same<T, uint64_t>::value ||
          std::is_same<T, uint32_t>::value ||
          std::is_same<T, uint16_t>::value || std::is_same<T, uint8_t>::value ||
          std::is_same<T, float>::value || std::is_same<T, double>::value,
      "type not supported");
  if (std::is_same<T, int32_t>::value || std::is_same<T, int16_t>::value ||
      std::is_same<T, int8_t>::value) {
    return static_cast<T>(strtol(begin, endptr, 0));
  } else if (std::is_same<T, int64_t>::value) {
    return static_cast<T>(strtoll(begin, endptr, 0));
  } else if (std::is_same<T, uint32_t>::value ||
             std::is_same<T, uint16_t>::value ||
             std::is_same<T, uint8_t>::value) {
    return static_cast<T>(strtoul(begin, endptr, 0));
  } else if (std::is_same<T, uint64_t>::value) {
    return static_cast<T>(strtoull(begin, endptr, 0));
  } else if (std::is_same<T, float>::value) {
    return static_cast<T>(strtof(begin, endptr));
  } else {
    return static_cast<T>(strtod(begin, endptr));
  }
}

//! Convert [begin, end) to T
//! If [end, ) contains valid T symbol, extra overhead will be incurred by
//! constructing std::string
template <typename T>
struct StringToType {
  T operator()(const char *begin, const char *end) {
    char *eptr = nullptr;
    auto v = CStringToType<T>(begin, &eptr);
    if (eptr > end) {
      // NOTE: [begin, end) is not 0 terminated
      // If delimiter contains valid T symbol, eptr might point to location
      // after end.
      // We create string here, which is guaranteed to be 0 terminated.
      std::string s{begin, end};
      return CStringToType<T>(s.c_str(), &eptr);
    }
    return v;
  }
};

//! Specialization for std::string
template <>
struct StringToType<std::string> {
  std::string operator()(const char *begin, const char *end) {
    return {begin, end};
  }
};

//! Return delimiter length.
template <typename T>
struct DelimiterLen {
  size_t operator()(T delimiter);
};

//! Return delimiter length for char.
template <>
struct DelimiterLen<char> {
  size_t operator()(char) {
    return 1;
  }
};

//! Return delimiter length for const char*.
template <>
struct DelimiterLen<const char *> {
  size_t operator()(const char *delimiter) {
    return delimiter == nullptr ? 0 : std::strlen(delimiter);
  }
};

//! Return delimiter length for std::string.
template <>
struct DelimiterLen<const std::string &> {
  size_t operator()(const std::string &delimiter) {
    return delimiter.size();
  }
};

//! Split implementation.
template <typename D, typename T,
          typename = typename std::enable_if<
              std::is_same<char, D>::value ||
                  std::is_same<const std::string &, D>::value ||
                  std::is_same<const char *, D>::value,
              D>::type>
static void SplitImpl(const std::string &str, D delim, std::vector<T> *out) {
  StringToType<T> func;
  out->clear();

  auto s = str.data();
  size_t delimiter_len = DelimiterLen<D>()(delim);
  if (delimiter_len != 0) {
    size_t a = 0, b = str.find(delim);
    while (b != std::string::npos) {
      out->push_back(func(s + a, s + b));
      a = b + delimiter_len;
      b = str.find(delim, a);
    }
    out->push_back(func(s + a, s + str.length()));
  } else {
    out->push_back(func(s + 0, s + str.length()));
  }
}

//! Split implementation.
template <typename D, typename T,
          typename = typename std::enable_if<
              std::is_same<char, D>::value ||
                  std::is_same<const std::string &, D>::value ||
                  std::is_same<const char *, D>::value,
              D>::type>
static void SplitImpl(const std::string &str, D delim, std::vector<T> *out,
                      bool skip_empty) {
  StringToType<T> func;
  out->clear();

  auto s = str.data();
  size_t delimiter_len = DelimiterLen<D>()(delim);
  if (delimiter_len != 0) {
    size_t a = 0, b = str.find(delim);
    while (b != std::string::npos) {
      if (!skip_empty || b - a > 0) {
        out->push_back(func(s + a, s + b));
      }

      a = b + delimiter_len;
      b = str.find(delim, a);
    }
    if (!skip_empty || str.length() - a > 0) {
      out->push_back(func(s + a, s + str.length()));
    }
  } else {
    if (!skip_empty || str.length() > 0) {
      out->push_back(func(s + 0, s + str.length()));
    }
  }
}
}  // namespace details
}  // namespace ailego
}  // namespace zvec

================================================
FILE: src/include/zvec/ailego/utility/time_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <zvec/ailego/internal/platform.h>

namespace zvec {
namespace ailego {

/*! Monotime
 */
struct Monotime {
  //! Retrieve monotonic time in nanoseconds
  static uint64_t NanoSeconds(void);

  //! Retrieve monotonic time in microseconds
  static uint64_t MicroSeconds(void);

  //! Retrieve monotonic time in milliseconds
  static uint64_t MilliSeconds(void);

  //! Retrieve monotonic time in seconds
  static uint64_t Seconds(void);
};

/*! Realtime
 */
struct Realtime {
  //! Retrieve system time in nanoseconds
  static uint64_t NanoSeconds(void);

  //! Retrieve system time in microseconds
  static uint64_t MicroSeconds(void);

  //! Retrieve system time in milliseconds
  static uint64_t MilliSeconds(void);

  //! Retrieve system time in seconds
  static uint64_t Seconds(void);

  //! Retrieve a timestamp as a specific local time format
  static size_t Localtime(uint64_t stamp, const char *format, char *buf,
                          size_t len);

  //! Retrieve a timestamp as a specific GMT time format
  static size_t Gmtime(uint64_t stamp, const char *format, char *buf,
                       size_t len);

  //! Retrieve local time in string
  static size_t Localtime(const char *format, char *buf, size_t len);

  //! Retrieve GMT time in string
  static size_t Gmtime(const char *format, char *buf, size_t len);

  //! Retrieve local time in string
  static size_t Localtime(char *buf, size_t len) {
    return Localtime("%Y-%m-%d %H:%M:%S", buf, len);
  }

  //! Retrieve GMT time in string
  static size_t Gmtime(char *buf, size_t len) {
    return Gmtime("%Y-%m-%d %H:%M:%S", buf, len);
  }

  //! Retrieve local time in string
  static std::string Localtime(void) {
    char str[32];
    Localtime(str, sizeof(str));
    return std::string(str);
  }

  //! Retrieve GMT time in string
  static std::string Gmtime(void) {
    char str[32];
    Gmtime(str, sizeof(str));
    return std::string(str);
  }

  //! Retrieve a timestamp as a specific local time format
  static size_t Localtime(uint64_t stamp, char *buf, size_t len) {
    return Localtime(stamp, "%Y-%m-%d %H:%M:%S", buf, len);
  }

  //! Retrieve a timestamp as a specific GMT time format
  static size_t Gmtime(uint64_t stamp, char *buf, size_t len) {
    return Gmtime(stamp, "%Y-%m-%d %H:%M:%S", buf, len);
  }

  //! Retrieve a timestamp as a specific local time format
  static std::string Localtime(uint64_t stamp) {
    char str[32];
    Localtime(stamp, str, sizeof(str));
    return std::string(str);
  }

  //! Retrieve a timestamp as a specific GMT time format
  static std::string Gmtime(uint64_t stamp) {
    char str[32];
    Gmtime(stamp, str, sizeof(str));
    return std::string(str);
  }
};

/*! Thread-specific CPU time
 */
struct CPUtime {
  //! Retrieve CPU time in nanoseconds
  static uint64_t NanoSeconds(void);

  //! Retrieve CPU time in microseconds
  static uint64_t MicroSeconds(void);

  //! Retrieve CPU time in milliseconds
  static uint64_t MilliSeconds(void);

  //! Retrieve CPU time in seconds
  static uint64_t Seconds(void);
};

/*! Elapsed Time
 */
class ElapsedTime {
 public:
  //! Constructor
  ElapsedTime(void) : stamp_(Monotime::NanoSeconds()) {}

  //! Retrieve the elapsed time in nanoseconds
  uint64_t nano_seconds(void) const {
    return (Monotime::NanoSeconds() - stamp_);
  }

  //! Retrieve the elapsed time in milliseconds
  uint64_t micro_seconds(void) const {
    return (this->nano_seconds() / 1000u);
  }

  //! Retrieve the elapsed time in milliseconds
  uint64_t milli_seconds(void) const {
    return (this->nano_seconds() / 1000000u);
  }

  //! Retrieve the elapsed time in seconds
  uint64_t seconds(void) const {
    return (this->nano_seconds() / 1000000000u);
  }

  //! Update time stamp
  void reset(void) {
    stamp_ = Monotime::NanoSeconds();
  }

 private:
  uint64_t stamp_;
};

/*! Elapsed CPU Time
 */
class ElapsedCPUTime {
 public:
  //! Constructor
  ElapsedCPUTime(void) : stamp_(CPUtime::NanoSeconds()) {}

  //! Retrieve the elapsed time in nanoseconds
  uint64_t nano_seconds(void) const {
    return (CPUtime::NanoSeconds() - stamp_);
  }

  //! Retrieve the elapsed time in milliseconds
  uint64_t micro_seconds(void) const {
    return (this->nano_seconds() / 1000u);
  }

  //! Retrieve the elapsed time in milliseconds
  uint64_t milli_seconds(void) const {
    return (this->nano_seconds() / 1000000u);
  }

  //! Retrieve the elapsed time in seconds
  uint64_t seconds(void) const {
    return (this->nano_seconds() / 1000000000u);
  }

  //! Update time stamp
  void reset(void) {
    stamp_ = CPUtime::NanoSeconds();
  }

 private:
  uint64_t stamp_;
};

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/ailego/utility/type_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <functional>
#include <type_traits>
#include <zvec/ailego/utility/float_helper.h>

namespace zvec {
namespace ailego {

//! Determines if a type is an arithmetic type (includes Float16)
template <typename T>
struct IsArithmetic
    : std::integral_constant<bool, std::is_arithmetic<T>::value ||
                                       std::is_same<T, Float16>::value> {};

//! Determines if a type is a signed arithmetic type (includes Float16)
template <typename T>
struct IsSignedArithmetic
    : std::integral_constant<bool, std::is_signed<T>::value ||
                                       std::is_same<T, Float16>::value> {};

//! Determines if a type is a unsigned arithmetic type (includes Float16)
template <typename T>
struct IsUnsignedArithmetic
    : std::integral_constant<bool, std::is_unsigned<T>::value> {};

//! Determines if a type is a floating-point type (includes Float16)
template <typename T>
struct IsFloatingPoint
    : std::integral_constant<bool, std::is_floating_point<T>::value ||
                                       std::is_same<T, Float16>::value> {};

#if __GNUC__ >= 5 || defined(_MSC_VER) || defined(__clang__)
template <typename T>
using IsTriviallyCopyable = std::is_trivially_copyable<T>;
#else
template <typename T>
using IsTriviallyCopyable = std::has_trivial_copy_constructor<T>;
#endif

#if __cplusplus >= 201703L  // C++17

//! Determines if a type can be invoked with the specified argument types
template <typename TFunc, typename... TArgs>
using IsInvocable = std::is_invocable<TFunc, TArgs...>;

//! Determines if a type can be invoked with the specified argument types
template <typename R, typename TFunc, typename... TArgs>
using IsInvocableWithResult = std::is_invocable_r<R, TFunc, TArgs...>;

#else
//! Determines if a type can be invoked with the specified argument types
template <typename TFunc, typename... TArgs>
struct IsInvocable
    : std::is_constructible<std::function<void(TArgs...)>,
                            std::reference_wrapper<
                                typename std::remove_reference<TFunc>::type> > {
};

//! Determines if a type can be invoked with the specified argument types
template <typename R, typename TFunc, typename... TArgs>
struct IsInvocableWithResult
    : std::is_constructible<std::function<R(TArgs...)>,
                            std::reference_wrapper<
                                typename std::remove_reference<TFunc>::type> > {
};
#endif

//! Fixed underlying_type used with conditional
template <typename T, bool = std::is_enum<T>::value>
struct UnderlyingType {
  typedef typename std::remove_cv<T>::type type;
};

//! Fixed underlying_type used with conditional
template <typename T>
struct UnderlyingType<T, true> {
  typedef typename std::underlying_type<T>::type type;
};

#if __cplusplus >= 201703L  // C++17

//! Variadic logical AND metafunction
template <typename... TConds>
using Conjunction = std::conjunction<TConds...>;

//! Variadic logical OR metafunction
template <typename... TConds>
using Disjunction = std::disjunction<TConds...>;

#else
//! Variadic logical AND metafunction
template <typename... TConds>
struct Conjunction : std::true_type {};

//! Variadic logical AND metafunction
template <typename TCond, typename... TConds>
struct Conjunction<TCond, TConds...>
    : std::conditional<TCond::value, Conjunction<TConds...>,
                       std::false_type>::type {};

//! Variadic logical OR metafunction
template <typename... TConds>
struct Disjunction : std::false_type {};

//! Variadic logical OR metafunction
template <typename TCond, typename... TConds>
struct Disjunction<TCond, TConds...>
    : std::conditional<TCond::value, std::true_type,
                       Disjunction<TConds...> >::type {};
#endif

}  // namespace ailego
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_builder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_runner.h>

namespace zvec {
namespace core {

class IndexBuilder : public IndexRunner {
 public:
  typedef std::shared_ptr<IndexBuilder> Pointer;

  //! Destructor
  virtual ~IndexBuilder(void) {}

  //! Initialize the builder
  virtual int init(const IndexMeta & /*meta*/,
                   const ailego::Params & /*params*/) {
    return IndexError_NotImplemented;
  }

  //! Train and build the index
  static int TrainAndBuild(const IndexBuilder::Pointer &builder,
                           IndexHolder::Pointer holder) {
    auto two_pass_holder = IndexHelper::MakeTwoPassHolder(std::move(holder));
    int ret = builder->train(two_pass_holder);
    if (ret == 0) {
      ret = builder->build(std::move(two_pass_holder));
    }
    return ret;
  }

  //! Train, build and dump the index
  static int TrainBuildAndDump(const IndexBuilder::Pointer &builder,
                               IndexHolder::Pointer holder,
                               const IndexDumper::Pointer &dumper) {
    int ret = IndexBuilder::TrainAndBuild(builder, std::move(holder));
    if (ret == 0) {
      ret = builder->dump(dumper);
    }
    return ret;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_bundle.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <memory>
#include <string>
#include <zvec/ailego/container/blob.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/io/mmap_file.h>

namespace zvec {
namespace core {

/*! Index Bundle
 */
struct IndexBundle {
  //! Index Bundle Pointer
  typedef std::shared_ptr<IndexBundle> Pointer;

  //! Destructor
  virtual ~IndexBundle(void) {}

  //! Retrieve index buffer via key
  virtual ailego::BlobWrap get(const std::string &key) const = 0;

  //! Test if the key is exist
  virtual bool has(const std::string &key) const = 0;

  //! Retrieve all
  virtual std::map<std::string, ailego::BlobWrap> all(void) const = 0;

  //! Retrieve the count of indexes
  virtual size_t count(void) const = 0;
};

/*! Trivial Index Bundle
 */
class TrivialIndexBundle : public IndexBundle {
 public:
  //! Trivial Index Bundle Pointer
  typedef std::shared_ptr<TrivialIndexBundle> Pointer;

  //! Retrieve index buffer via key
  virtual ailego::BlobWrap get(const std::string &key) const {
    auto iter = map_.find(key);
    if (iter != map_.end()) {
      return iter->second;
    }
    return ailego::BlobWrap();
  }

  //! Test if the key is exist
  virtual bool has(const std::string &key) const {
    return (map_.find(key) != map_.end());
  }

  //! Retrieve all
  virtual std::map<std::string, ailego::BlobWrap> all(void) const {
    return map_;
  }

  //! Retrieve the count of indexes
  virtual size_t count(void) const {
    return map_.size();
  }

  //! Set an index buffer in bundle
  void set(const std::string &key, const ailego::BlobWrap &blob) {
    map_[key] = blob;
  }

  //! Set an index buffer in bundle
  void set(std::string &&key, const ailego::BlobWrap &blob) {
    map_[std::move(key)] = blob;
  }

  //! Set an index buffer in bundle
  void set(const std::string &key, const void *buf, size_t len) {
    map_[key] = ailego::BlobWrap(buf, len);
  }

  //! Set an index buffer in bundle
  void set(std::string &&key, const void *buf, size_t len) {
    map_[std::move(key)] = ailego::BlobWrap(buf, len);
  }

 private:
  std::map<std::string, ailego::BlobWrap> map_;
};

/*! Memory Index Bundle
 */
class MemoryIndexBundle : public IndexBundle {
 public:
  //! Memory Index Bundle Pointer
  typedef std::shared_ptr<MemoryIndexBundle> Pointer;

  //! Retrieve index buffer via key
  virtual ailego::BlobWrap get(const std::string &key) const {
    auto iter = map_.find(key);
    if (iter != map_.end()) {
      return ailego::BlobWrap(iter->second.data(), iter->second.size());
    }
    return ailego::BlobWrap();
  }

  //! Test if the key is exist
  virtual bool has(const std::string &key) const {
    return (map_.find(key) != map_.end());
  }

  //! Retrieve all
  virtual std::map<std::string, ailego::BlobWrap> all(void) const {
    std::map<std::string, ailego::BlobWrap> result;
    for (const auto &it : map_) {
      result.emplace(it.first,
                     ailego::BlobWrap(it.second.data(), it.second.size()));
    }
    return result;
  }

  //! Retrieve the count of indexes
  virtual size_t count(void) const {
    return map_.size();
  }

  //! Set an index buffer in bundle
  void set(const std::string &key, const std::string &buf) {
    map_[key] = buf;
  }

  //! Set an index buffer in bundle
  void set(std::string &&key, const std::string &buf) {
    map_[std::move(key)] = buf;
  }

  //! Set an index buffer in bundle
  void set(const std::string &key, std::string &&buf) {
    map_[key] = std::move(buf);
  }

  //! Set an index buffer in bundle
  void set(std::string &&key, std::string &&buf) {
    map_[std::move(key)] = std::move(buf);
  }

  //! Set an index buffer in bundle
  void set(const std::string &key, const void *buf, size_t len) {
    map_[key].assign(reinterpret_cast<const char *>(buf), len);
  }

  //! Set an index buffer in bundle
  void set(std::string &&key, const void *buf, size_t len) {
    map_[std::move(key)].assign(reinterpret_cast<const char *>(buf), len);
  }

 private:
  std::map<std::string, std::string> map_;
};

/*! MMap File Index Bundle
 */
class MMapFileIndexBundle : public IndexBundle {
 public:
  //! Memory Index Bundle Pointer
  typedef std::shared_ptr<MMapFileIndexBundle> Pointer;

  //! Retrieve index buffer via key
  virtual ailego::BlobWrap get(const std::string &key) const {
    auto iter = map_.find(key);
    if (iter != map_.end()) {
      return ailego::BlobWrap(iter->second.region(), iter->second.size());
    }
    return ailego::BlobWrap();
  }

  //! Test if the key is exist
  virtual bool has(const std::string &key) const {
    return (map_.find(key) != map_.end());
  }

  //! Retrieve all
  virtual std::map<std::string, ailego::BlobWrap> all(void) const {
    std::map<std::string, ailego::BlobWrap> result;
    for (const auto &it : map_) {
      result.emplace(it.first,
                     ailego::BlobWrap(it.second.region(), it.second.size()));
    }
    return result;
  }

  //! Retrieve the count of indexes
  virtual size_t count(void) const {
    return map_.size();
  }

  //! Create a memory mapping file in bundle
  bool create(const std::string &prefix, const std::string &key, size_t len) {
    ailego::MMapFile file;
    if (!file.create(prefix + '/' + key, len)) {
      return false;
    }
    map_[key] = std::move(file);
    return true;
  }

  //! Create a memory mapping file in bundle
  bool create(const std::string &prefix, std::string &&key, size_t len) {
    ailego::MMapFile file;
    if (!file.create(prefix + '/' + key, len)) {
      return false;
    }
    map_[std::move(key)] = std::move(file);
    return true;
  }

  //! Create a memory mapping file in bundle
  bool create(const std::string &path, size_t len) {
    ailego::MMapFile file;
    if (!file.create(path, len)) {
      return false;
    }
    map_[ailego::File::BaseName(path)] = std::move(file);
    return true;
  }

  //! Open a memory mapping file in bundle
  bool open(const std::string &prefix, const std::string &key, bool rdonly) {
    ailego::MMapFile file;
    if (!file.open(prefix + '/' + key, rdonly)) {
      return false;
    }
    map_[key] = std::move(file);
    return true;
  }

  //! Open a memory mapping file in bundle
  bool open(const std::string &prefix, std::string &&key, bool rdonly) {
    ailego::MMapFile file;
    if (!file.open(prefix + '/' + key, rdonly)) {
      return false;
    }
    map_[std::move(key)] = std::move(file);
    return true;
  }

  //! Open a memory mapping file in bundle
  bool open(const std::string &path, bool rdonly) {
    ailego::MMapFile file;
    if (!file.open(path, rdonly)) {
      return false;
    }
    map_[ailego::File::BaseName(path)] = std::move(file);
    return true;
  }

 private:
  std::map<std::string, ailego::MMapFile> map_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_cluster.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/params.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_bundle.h>
#include <zvec/core/framework/index_features.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_module.h>
#include <zvec/core/framework/index_threads.h>

namespace zvec {
namespace core {

/*! Index Cluster
 */
struct IndexCluster : public IndexModule {
 public:
  //! Index Cluster Pointer
  typedef std::shared_ptr<IndexCluster> Pointer;

  /*! Index Cluster Centroid
   */
  class Centroid {
   public:
    //! Constructor
    Centroid(void)
        : buffer_(), score_(0.0), follows_(0), similars_(), subitems_() {}

    //! Constructor
    Centroid(const void *feat, size_t bytes)
        : buffer_(std::string(reinterpret_cast<const char *>(feat), bytes)),
          score_(0.0),
          follows_(0),
          similars_(),
          subitems_() {}

    //! Constructor
    Centroid(const Centroid &rhs)
        : buffer_(rhs.buffer_),
          score_(rhs.score_),
          follows_(rhs.follows_),
          similars_(rhs.similars_),
          subitems_(rhs.subitems_) {}

    //! Constructor
    Centroid(Centroid &&rhs)
        : buffer_(std::move(rhs.buffer_)),
          score_(rhs.score_),
          follows_(rhs.follows_),
          similars_(std::move(rhs.similars_)),
          subitems_(std::move(rhs.subitems_)) {}

    //! Assignment
    Centroid &operator=(const Centroid &rhs) {
      buffer_ = rhs.buffer_;
      score_ = rhs.score_;
      follows_ = rhs.follows_;
      similars_ = rhs.similars_;
      subitems_ = rhs.subitems_;
      return *this;
    }

    //! Assignment
    Centroid &operator=(Centroid &&rhs) {
      buffer_ = std::move(rhs.buffer_);
      score_ = rhs.score_;
      follows_ = rhs.follows_;
      similars_ = std::move(rhs.similars_);
      subitems_ = std::move(rhs.subitems_);
      return *this;
    }

    //! Less than
    bool operator<(const Centroid &rhs) const {
      return (this->score_ < rhs.score_);
    }

    //! Test if matchs the meta
    bool is_matched(const IndexMeta &meta) const {
      if (buffer_.size() != meta.element_size()) {
        return false;
      }
      for (const auto &it : subitems_) {
        if (!it.is_matched(meta)) {
          return false;
        }
      }
      return true;
    }

    //! Set feature of centroid
    void set_feature(const void *feat, size_t bytes) {
      buffer_.assign(std::string(reinterpret_cast<const char *>(feat), bytes));
    }

    //! Set feature of centroid
    template <typename T>
    void set_feature(const ailego::NumericalVector<T> &feat) {
      buffer_.assign(feat);
    }

    //! Set feature of centroid
    template <typename T>
    void set_feature(ailego::NumericalVector<T> &&feat) {
      buffer_.assign(std::forward<ailego::NumericalVector<T>>(feat));
    }

    //! Set score of centroid
    void set_score(double val) {
      score_ = val;
    }

    //! Set follows of centroid
    void set_follows(size_t count) {
      follows_ = count;
    }

    //! Set similars of centroid
    void set_similars(const std::vector<const void *> &feats) {
      similars_ = feats;
    }

    //! Set similars of centroid
    void set_similars(std::vector<const void *> &&feats) {
      similars_ = std::move(feats);
    }

    //! Set subitems of centroid
    void set_subitems(const std::vector<Centroid> &cents) {
      subitems_ = cents;
    }

    //! Set subitems of centroid
    void set_subitems(std::vector<Centroid> &&cents) {
      subitems_ = std::move(cents);
    }

    //! Retrieve feature buffer
    std::string *mutable_buffer(void) {
      return &buffer_;
    }

    //! Retrieve feature buffer
    const std::string &buffer(void) const {
      return buffer_;
    }

    //! Retrieve feature vector
    template <typename T>
    ailego::NumericalVector<T> *mutable_vector(void) {
      return static_cast<ailego::NumericalVector<T> *>(&buffer_);
    }

    //! Retrieve feature vector
    template <typename T>
    const ailego::NumericalVector<T> &vector(void) const {
      return static_cast<const ailego::NumericalVector<T> &>(buffer_);
    }

    //! Retrieve feature pointer
    const void *feature(void) const {
      return buffer_.data();
    }

    //! Retrieve size of centroid in bytes
    size_t size(void) const {
      return buffer_.size();
    }

    //! Retrieve score of centroid
    double score(void) const {
      return score_;
    }

    //! Retrieve follows' count of centroid
    size_t follows(void) const {
      return follows_;
    }

    //! Retrieve similars of centroid
    const std::vector<const void *> &similars(void) const {
      return similars_;
    }

    //! Retrieve similars of centroid
    std::vector<const void *> *mutable_similars(void) {
      return &similars_;
    }

    //! Retrieve the sub centroids
    const std::vector<Centroid> &subitems(void) const {
      return subitems_;
    }

    //! Retrieve the sub centroids
    std::vector<Centroid> *mutable_subitems(void) {
      return &subitems_;
    }

    //! Retrieve the count of subitems (includes children's children)
    size_t subcount(void) const {
      size_t total = subitems_.size();
      for (const auto &it : subitems_) {
        total += it.subcount();
      }
      return total;
    }

   private:
    //! Members
    std::string buffer_;
    double score_;
    size_t follows_;
    std::vector<const void *> similars_;
    std::vector<Centroid> subitems_;
  };

  //! Index Cluster Centroid List
  typedef std::vector<Centroid> CentroidList;

  //! Destructor
  virtual ~IndexCluster(void) {}

  //! Deserialize centroids from bundle
  static int Deserialize(const IndexMeta &meta, IndexBundle::Pointer bundle,
                         CentroidList *cents);

  //! Serialize centroids into bundle
  static int Serialize(const IndexMeta &meta, const CentroidList &cents,
                       IndexBundle::Pointer *out);

  //! Initialize Cluster
  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;

  //! Cleanup Cluster
  virtual int cleanup(void) = 0;

  //! Reset Cluster
  virtual int reset(void) = 0;

  //! Update Cluster
  virtual int update(const ailego::Params &params) = 0;

  //! Suggest dividing to K clusters
  virtual void suggest(uint32_t k) = 0;

  //! Mount features
  virtual int mount(IndexFeatures::Pointer feats) = 0;

  //! Cluster
  virtual int cluster(CentroidList &cents) {
    return this->cluster(nullptr, cents);
  }

  //! Cluster
  virtual int cluster(IndexThreads::Pointer threads, CentroidList &cents) = 0;

  //! Classify
  virtual int classify(CentroidList &cents) {
    return this->classify(nullptr, cents);
  }

  //! Classify
  virtual int classify(IndexThreads::Pointer threads, CentroidList &cents) = 0;

  //! Label
  virtual int label(const CentroidList &cents, std::vector<uint32_t> *out) {
    return this->label(nullptr, cents, out);
  }

  //! Label
  virtual int label(IndexThreads::Pointer threads, const CentroidList &cents,
                    std::vector<uint32_t> *out) = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_context.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_document.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_filter.h>
#include <zvec/core/framework/index_groupby.h>
#include <zvec/core/framework/index_metric.h>
#include <zvec/core/framework/index_stats.h>

namespace zvec {
namespace core {

/*! Profiler
 */
struct Profiler {
  Profiler() = default;
  ~Profiler() = default;

  void add(const std::string &name, double time) {
    timings[name] += time;
  }

  std::string display() const {
    std::string info = "================================================\n";

    for (auto itr = timings.begin(); itr != timings.end(); ++itr) {
      info +=
          itr->first + std::string(": ") + std::to_string(itr->second) + " s\n";
    }

    info += "================================================\n";

    return info;
  }

  std::map<std::string, double> timings;
};

/*! Index Context
 */
class IndexContext {
 public:
  //! Index Context Pointer
  typedef std::unique_ptr<IndexContext> Pointer;

  //! Index Context UPointer
  typedef std::unique_ptr<IndexContext> UPointer;

  /*! Index Context Stats
   */
  class Stats : public IndexStats {
   public:
    //! Set count of documents filtered
    void set_filtered_count(size_t count) {
      filtered_count_ = count;
    }

    //! Set count of documents dist calced
    void set_dist_calced_count(size_t count) {
      dist_calced_count_ = count;
    }

    //! Retrieve count of documents filtered
    size_t filtered_count(void) const {
      return filtered_count_;
    }

    //! Retrieve count of documents dist-calced
    size_t dist_calced_count(void) const {
      return dist_calced_count_;
    }

    //! Retrieve count of documents filtered (mutable)
    size_t *mutable_filtered_count(void) {
      return &filtered_count_;
    }

    //! Retrieve count of documents dist-calced (mutable)
    size_t *mutable_dist_calced_count(void) {
      return &dist_calced_count_;
    }

    void clear() {
      this->clear_attributes();

      filtered_count_ = 0u;
      dist_calced_count_ = 0u;
    }

   private:
    //! Members
    size_t filtered_count_{0u};
    size_t dist_calced_count_{0u};
  };

  //! Constructor
  IndexContext() {}

  //! Constructor
  IndexContext(IndexMetric::Pointer index_metric)
      : index_metric_(std::move(index_metric)) {}

  //! Destructor
  virtual ~IndexContext(void) {}

  //! Set topk of search result
  virtual void set_topk(uint32_t topk) = 0;

  virtual uint32_t topk() const {
    return 0;
  }

  virtual void set_group_params(uint32_t /*group_mum*/,
                                uint32_t /*group_topk*/){};

  //! Set brute force threshold
  virtual void set_bruteforce_threshold(uint32_t /*bruteforce_threshold*/) {}

  //! Set mode of debug
  virtual void set_debug_mode(bool /*enable*/) {}

  //! Set fetch vector
  virtual void set_fetch_vector(bool /*enable*/) {}

  //! Retrieve search result
  virtual const IndexDocumentList &result(void) const = 0;

  //! Retrieve search result with index
  virtual const IndexDocumentList &result(size_t /*index*/) const {
    return this->result();
  }

  //! Retrieve mutable result with index
  virtual IndexDocumentList *mutable_result(size_t idx) = 0;

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(void) const {
    // to make it compile
    static const IndexGroupDocumentList empty_list{};
    return empty_list;
  };

  //! Retrieve search group result with index
  virtual const IndexGroupDocumentList &group_result(size_t /*idx*/) const {
    return this->group_result();
  }

  //! Update the parameters of context
  virtual int update(const ailego::Params & /*params*/) {
    return IndexError_NotImplemented;
  }

  //! Retrieve mode of debug
  virtual bool debug_mode(void) const {
    return false;
  }

  //! Retrieve debug information
  virtual std::string debug_string(void) const {
    return std::string();
  }

  //! Retrieve magic number
  virtual uint32_t magic(void) const {
    return 0;
  }

  //! Retrieve search filter
  const IndexFilter &filter(void) const {
    return filter_;
  }

  //! Retrieve fetch vector
  virtual bool fetch_vector(void) const {
    return false;
  }

  //! Reset context
  virtual void reset(void) {}

  //! Set the filter of context
  template <typename T>
  void set_filter(T &&func) {
    filter_.set(std::forward<T>(func));
  }

  //! Reset the filter of context
  void reset_filter(void) {
    filter_.reset();
  }

  //! Retrieve search groupby
  const IndexGroupBy &group_by(void) const {
    return group_by_;
  }

  //! Set the groupby of context
  template <typename T>
  void set_group_by(T &&func) {
    group_by_.set(std::forward<T>(func));
  }

  //! Reset the groupby of context
  void reset_group_by(void) {
    group_by_.reset();
  }

  //! Set threshold for RNN
  void set_threshold(float val) {
    if (index_metric_ && index_metric_->support_normalize()) {
      index_metric_->denormalize(&val);
    }

    threshold_ = val;
  }

  //! Retrieve value of threshold for RNN
  float threshold(void) const {
    return threshold_;
  }

  //! Reset value of threshold for RNN
  void reset_threshold(void) {
    threshold_ = std::numeric_limits<float>::max();
  }

  //! Generate a global magic number
  static uint32_t GenerateMagic(void);

  //! Profiler
  Profiler &profiler() {
    return profiler_;
  }

 private:
  //! Members
  IndexFilter filter_{};
  IndexGroupBy group_by_{};
  float threshold_{std::numeric_limits<float>::max()};


  Profiler profiler_{};

 protected:
  IndexMetric::Pointer index_metric_{nullptr};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/include/zvec/core/framework/index_converter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <atomic>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_stats.h>
#include "zvec/core/framework/index_reformer.h"

namespace zvec {
namespace core {

/*! Index Converter
 */
class IndexConverter : public IndexModule {
 public:
  //! Index Converter Pointer
  typedef std::shared_ptr<IndexConverter> Pointer;

  /*! Index Converter Stats
   */
  class Stats : public IndexStats {
   public:
    Stats() {}
    Stats(const Stats &stats) {
      *this = stats;
    }
    Stats &operator=(const Stats &stats) {
      this->trained_count_.store(stats.trained_count_.load());
      this->transformed_count_.store(stats.transformed_count_.load());
      this->dumped_size_.store(stats.dumped_size_.load());
      this->discarded_count_.store(stats.discarded_count_.load());
      this->trained_costtime_.store(stats.trained_costtime_.load());
      this->transformed_costtime_.store(stats.transformed_costtime_.load());
      this->dumped_costtime_.store(stats.dumped_costtime_.load());
      return *this;
    }
    //! Set count of documents trained
    void set_trained_count(size_t count) {
      trained_count_ = count;
    }

    //! Set count of documents transformed
    void set_transformed_count(size_t count) {
      transformed_count_ = count;
    }

    //! Set size of documents dumped
    void set_dumped_size(size_t size) {
      dumped_size_ = size;
    }

    //! Set count of documents discarded
    void set_discarded_count(size_t count) {
      discarded_count_ = count;
    }

    //! Set time cost of documents trained
    void set_trained_costtime(uint64_t cost) {
      trained_costtime_ = cost;
    }

    //! Set time cost of documents transformed
    void set_transformed_costtime(uint64_t cost) {
      transformed_costtime_ = cost;
    }

    //! Set time cost of documents dumped
    void set_dumped_costtime(uint64_t cost) {
      dumped_costtime_ = cost;
    }

    //! Retrieve count of documents trained
    size_t trained_count(void) const {
      return trained_count_;
    }

    //! Retrieve count of documents transformed
    size_t transformed_count(void) const {
      return transformed_count_;
    }

    //! Retrieve size of documents dumped
    size_t dumped_size(void) const {
      return dumped_size_;
    }

    //! Retrieve count of documents discarded
    size_t discarded_count(void) const {
      return discarded_count_;
    }

    //! Retrieve time cost of documents trained
    uint64_t trained_costtime(void) const {
      return trained_costtime_;
    }

    //! Retrieve time cost of documents transformed
    uint64_t transformed_costtime(void) const {
      return transformed_costtime_;
    }

    //! Retrieve time cost of documents dumped
    uint64_t dumped_costtime(void) const {
      return dumped_costtime_;
    }

    //! Retrieve count of documents trained (mutable)
    std::atomic<size_t> *mutable_trained_count(void) {
      return &trained_count_;
    }

    //! Retrieve count of documents transformed (mutable)
    std::atomic<size_t> *mutable_transformed_count(void) {
      return &transformed_count_;
    }

    //! Retrieve size of documents dumped (mutable)
    std::atomic<size_t> *mutable_dumped_size(void) {
      return &dumped_size_;
    }

    //! Retrieve count of documents discarded (mutable)
    std::atomic<size_t> *mutable_discarded_count(void) {
      return &discarded_count_;
    }

    //! Retrieve time cost of documents trained (mutable)
    std::atomic<uint64_t> *mutable_trained_costtime(void) {
      return &trained_costtime_;
    }

    //! Retrieve time cost of documents transformed (mutable)
    std::atomic<uint64_t> *mutable_transformed_costtime(void) {
      return &transformed_costtime_;
    }

    //! Retrieve time cost of documents dumped (mutable)
    std::atomic<uint64_t> *mutable_dumped_costtime(void) {
      return &dumped_costtime_;
    }

   private:
    //! Members
    std::atomic<size_t> trained_count_{0u};
    std::atomic<size_t> transformed_count_{0u};
    std::atomic<size_t> dumped_size_{0u};
    std::atomic<size_t> discarded_count_{0u};
    std::atomic<uint64_t> trained_costtime_{0u};
    std::atomic<uint64_t> transformed_costtime_{0u};
    std::atomic<uint64_t> dumped_costtime_{0u};
  };

  //! Destructor
  virtual ~IndexConverter(void) {}

  //! Initialize Converter
  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;

  //! Cleanup Converter
  virtual int cleanup(void) = 0;

  //! Train the data
  virtual int train(IndexHolder::Pointer) {
    return IndexError_NotImplemented;
  }

  //! Train the data
  virtual int train(IndexSparseHolder::Pointer) {
    return IndexError_NotImplemented;
  }

  //! Transform the data
  virtual int transform(IndexHolder::Pointer) {
    return IndexError_NotImplemented;
  };

  //! Transform the data
  virtual int transform(IndexSparseHolder::Pointer) {
    return IndexError_NotImplemented;
  }

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) = 0;

  //! Retrieve statistics
  virtual const Stats &stats(void) const = 0;

  //! Retrieve a holder as result
  virtual IndexHolder::Pointer result(void) const {
    return nullptr;
  }

  //! Retrieve a holder as result
  virtual IndexSparseHolder::Pointer sparse_result(void) const {
    return nullptr;
  }

  //! Retrieve Index Meta
  virtual const IndexMeta &meta(void) const = 0;

  //! Train and transform the index
  static int TrainAndTransform(const IndexConverter::Pointer &converter,
                               IndexHolder::Pointer holder);

  //! Train, transform and dump the index
  static int TrainTransformAndDump(const IndexConverter::Pointer &converter,
                                   IndexHolder::Pointer holder,
                                   const IndexDumper::Pointer &dumper);

  //! Convert to reformer
  virtual int to_reformer(IndexReformer::Pointer *) {
    return IndexError_NotImplemented;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_document.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <string>
#include <zvec/ailego/container/heap.h>
#include <zvec/core/framework/index_storage.h>

namespace zvec {
namespace core {

/*! Index Sparse Document
 */
class IndexSparseDocument {
 public:
  //! Constructor
  IndexSparseDocument() = default;

  //! Retrieve mutable sparse count
  uint32_t *mutable_sparse_count(void) {
    return &sparse_count_;
  }

  //! Retrieve mutable sparse indices
  std::string *mutable_sparse_indices(void) {
    return &sparse_indices_;
  }

  //! Retrieve mutable sparse values
  std::string *mutable_sparse_values(void) {
    return &sparse_values_;
  }

  //! Retrieve sparse count
  uint32_t sparse_count(void) const {
    return sparse_count_;
  }

  //! Retrieve sparse indices
  const std::string &sparse_indices(void) const {
    return sparse_indices_;
  }

  //! Retrieve sparse values
  const std::string &sparse_values(void) const {
    return sparse_values_;
  }

 private:
  uint32_t sparse_count_{0};
  std::string sparse_indices_{};
  std::string sparse_values_{};
};

/*! Index Document
 */
class IndexDocument {
 public:
  //! Constructor
  IndexDocument() = default;

  //! Constructor
  IndexDocument(uint64_t k, float v) : key_(k), score_(v) {}

  //! Constructor
  IndexDocument(uint64_t k, float v, uint32_t i)
      : key_(k), score_(v), index_(i) {}

  //! Constructor
  IndexDocument(uint64_t k, float v, uint32_t i, const void *vector)
      : key_(k), score_(v), index_(i), vector_(vector) {}

  IndexDocument(uint64_t k, float v, uint32_t i,
                IndexStorage::MemoryBlock vec_block)
      : key_(k), score_(v), index_(i), vec_mem_block_(vec_block) {
    vector_ = vec_mem_block_.data();
    has_vec_mem_block_ = true;
  }

  //! Constructor
  IndexDocument(uint64_t k, float v, uint32_t i, const void *vector,
                IndexSparseDocument sparse_doc)
      : key_(k),
        score_(v),
        index_(i),
        vector_(vector),
        sparse_doc_(std::move(sparse_doc)) {}

  IndexDocument(uint64_t k, float v, uint32_t i,
                IndexStorage::MemoryBlock vec_block,
                IndexSparseDocument sparse_doc)
      : key_(k),
        score_(v),
        index_(i),
        vec_mem_block_(vec_block),
        sparse_doc_(std::move(sparse_doc)) {
    has_vec_mem_block_ = true;
    vector_ = vec_mem_block_.data();
  }

  //! Constructor
  IndexDocument(const IndexDocument &rhs)
      : key_(rhs.key_),
        score_(rhs.score_),
        index_(rhs.index_),
        vector_(rhs.vector_),
        sparse_doc_{rhs.sparse_doc_} {
    if (rhs.has_vec_mem_block_) {
      vec_mem_block_ = rhs.vec_mem_block_;
      has_vec_mem_block_ = true;
    }
  }

  //! Assignment
  IndexDocument &operator=(const IndexDocument &rhs) {
    if (this != &rhs) {
      key_ = rhs.key_;
      score_ = rhs.score_;
      index_ = rhs.index_;
      vector_ = rhs.vector_;
      if (rhs.has_vec_mem_block_) {
        vec_mem_block_ = rhs.vec_mem_block_;
        has_vec_mem_block_ = true;
      }
      sparse_doc_ = rhs.sparse_doc_;
    }
    return *this;
  }

  //! Less than
  bool operator<(const IndexDocument &rhs) const {
    return (this->score_ < rhs.score_);
  }

  //! Greater than
  bool operator>(const IndexDocument &rhs) const {
    return (this->score_ > rhs.score_);
  }

  //! Retrieve primary key
  uint64_t key(void) const {
    return key_;
  }

  //! Retrieve score value
  float score(void) const {
    return score_;
  }

  //! Retrieve index id
  uint32_t index(void) const {
    return index_;
  }

  //! Retrieve vec
  const void *vector() const {
    return vector_;
  }

  //! Retrieve vec
  const IndexSparseDocument &sparse_doc() const {
    return sparse_doc_;
  }

  //! Retrieve mutable primary key
  uint64_t *mutable_key(void) {
    return &key_;
  }

  //! Retrieve mutable score value
  float *mutable_score(void) {
    return &score_;
  }

  //! Retrieve mutable index id
  uint32_t *mutable_index(void) {
    return &index_;
  }

  //! Retrieve primary key
  void set_key(uint64_t val) {
    key_ = val;
  }

  //! Retrieve score value
  void set_score(float val) {
    score_ = val;
  }

  //! Retrieve index id
  void set_index(uint32_t val) {
    index_ = val;
  }

 private:
  //! Data members
  uint64_t key_{0u};
  float score_{0.0f};
  uint32_t index_{0u};
  const void *vector_{nullptr};
  bool has_vec_mem_block_{false};
  mutable IndexStorage::MemoryBlock vec_mem_block_{};
  IndexSparseDocument sparse_doc_{};
};

/*! Index Document Heap
 */
class IndexDocumentHeap : public ailego::Heap<IndexDocument> {
 public:
  //! Constructor
  IndexDocumentHeap(void) : ailego::Heap<IndexDocument>() {}

  //! Constructor
  IndexDocumentHeap(size_t max) : ailego::Heap<IndexDocument>(max) {}

  //! Constructor
  IndexDocumentHeap(size_t max, float val)
      : ailego::Heap<IndexDocument>(max), threshold_(val) {}

  //! Constructor
  IndexDocumentHeap(const IndexDocumentHeap &rhs)
      : ailego::Heap<IndexDocument>(rhs), threshold_(rhs.threshold_) {}

  //! Constructor
  IndexDocumentHeap(IndexDocumentHeap &&rhs)
      : ailego::Heap<IndexDocument>(std::move(rhs)),
        threshold_(rhs.threshold_) {}

  //! Constructor
  IndexDocumentHeap(const std::vector<IndexDocument> &rhs)
      : ailego::Heap<IndexDocument>(rhs) {}

  //! Constructor
  IndexDocumentHeap(std::vector<IndexDocument> &&rhs)
      : ailego::Heap<IndexDocument>(std::move(rhs)) {}

  //! Insert a document into the heap
  void emplace(uint64_t key, float score) {
    if (score <= threshold_) {
      ailego::Heap<IndexDocument>::emplace(key, score);
    }
  }

  //! Insert a document into the heap
  void emplace(uint64_t key, float score, uint32_t index) {
    if (score <= threshold_) {
      ailego::Heap<IndexDocument>::emplace(key, score, index);
    }
  }

  //! Set threshold for RNN
  void set_threshold(float val) {
    threshold_ = val;
  }

  //! Retrieve value of threshold for RNN
  float threshold(void) const {
    return threshold_;
  }

 private:
  //! members
  float threshold_{std::numeric_limits<float>::max()};
};

class GroupIndexDocument {
 public:
  GroupIndexDocument() = default;

  GroupIndexDocument(const std::string &group_id,
                     const std::vector<IndexDocument> &docs)
      : group_id_(group_id), docs_(docs) {}

  const std::string &group_id() const {
    return group_id_;
  }

  const std::vector<IndexDocument> &docs() const {
    return docs_;
  }

  std::string *mutable_group_id() {
    return &group_id_;
  }

  std::vector<IndexDocument> *mutable_docs() {
    return &docs_;
  }

  void set_group_id(const std::string &group_id) {
    group_id_ = group_id;
  }

 private:
  std::string group_id_;
  std::vector<IndexDocument> docs_;
};

/*! Index Document List
 */
using IndexDocumentList = std::vector<IndexDocument>;
using IndexGroupDocumentList = std::vector<GroupIndexDocument>;

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_dumper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_module.h>
#include <zvec/core/framework/index_packer.h>

namespace zvec {
namespace core {

/*! Index Dumper
 */
class IndexDumper : public IndexModule {
 public:
  //! Index Dumper Pointer
  typedef std::shared_ptr<IndexDumper> Pointer;

  //! Destructor
  virtual ~IndexDumper(void) {}

  //! Initialize dumper
  virtual int init(const ailego::Params &params) = 0;

  //! Cleanup dumper
  virtual int cleanup(void) = 0;

  //! Create a file for dumping
  virtual int create(const std::string &path) = 0;

  //! Close file
  virtual int close(void) = 0;

  //! Append a segment meta into table
  virtual int append(const std::string &id, size_t data_size,
                     size_t padding_size, uint32_t crc) = 0;

  //! Write data to the storage
  virtual size_t write(const void *data, size_t len) = 0;

  //! Retrieve magic number of index
  virtual uint32_t magic(void) const = 0;
};

/*! Index Segment Dumper
 */
class IndexSegmentDumper : public IndexDumper {
 public:
  //! Index Segment Dumper Pointer
  typedef std::shared_ptr<IndexSegmentDumper> Pointer;

  //! Constructor
  IndexSegmentDumper(IndexDumper::Pointer dumper, std::string segid)
      : segment_id_(std::move(segid)), dumper_(std::move(dumper)) {}

  //! Destructor
  virtual ~IndexSegmentDumper(void) {
    this->close_index();
  }

  //! Initialize dumper
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup dumper
  int cleanup(void) override {
    return 0;
  }

  //! Create a file for dumping
  int create(const std::string &segid) override {
    if (dumped_size_ != 0) {
      return IndexError_NoReady;
    }

    auto write_data = [&](const void *buf, size_t size) {
      return this->write_to_dumper(buf, size);
    };
    if (!packer_.setup(write_data)) {
      return IndexError_WriteData;
    }
    segment_id_ = segid;
    return 0;
  }

  //! Close file
  int close(void) override {
    return this->close_index();
  }

  //! Append a segment meta into table
  int append(const std::string &id, size_t data_size, size_t padding_size,
             uint32_t crc) override {
    stab_.emplace_back(id, data_size, padding_size, crc);
    return 0;
  }

  //! Write data to the storage
  size_t write(const void *data, size_t len) override {
    auto write_data = [&](const void *buf, size_t size) {
      return this->write_to_dumper(buf, size);
    };

    if (dumped_size_ == 0 && !packer_.setup(write_data)) {
      return 0;
    }
    return packer_.pack(write_data, data, len);
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return packer_.magic();
  }

 protected:
  //! Write data to dumper
  size_t write_to_dumper(const void *data, size_t len) {
    size_t wrlen = dumper_->write(data, len);
    dumped_size_ += wrlen;
    return wrlen;
  }

  //! Close index file
  int close_index(void) {
    if (dumped_size_ == 0) {
      return 0;
    }

    auto write_data = [&](const void *buf, size_t size) {
      return this->write_to_dumper(buf, size);
    };
    if (!packer_.finish(write_data, stab_)) {
      return IndexError_WriteData;
    }
    stab_.clear();

    int ret = dumper_->append(segment_id_, dumped_size_, 0, 0);
    dumped_size_ = 0u;
    return ret;
  }

 private:
  size_t dumped_size_{0};
  std::string segment_id_{};
  IndexDumper::Pointer dumper_{};
  IndexPacker packer_{};
  std::vector<IndexPacker::SegmentMeta> stab_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_error.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <map>
#include <zvec/ailego/pattern/expected.hpp>

namespace zvec {
namespace core {

/*! Error
 */

class ErrorCode;

template <typename T>
using Result = tl::expected<T, ErrorCode>;

template <typename T>
using result_unexpect = tl::unexpected<ErrorCode>;

/*! Index Error
 */
class IndexError {
 public:
  /*! Index Error Code
   */
  class Code {
   public:
    //! Constructor
    Code(int val, const char *str) : value_(-val), desc_(str) {
      IndexError::Instance()->emplace(this);
    }

    //! Retrieve the value of code
    operator int() const {
      return (this->value_);
    }

    //! Retrieve the value of code
    int value() const {
      return (this->value_);
    }

    //! Retrieve the description of code
    const char *desc() const {
      return (this->desc_);
    }

   private:
    int value_;
    const char *desc_;
  };

  //! Retrieve the description of code
  static const char *What(int val) {
    return IndexError::Instance()->what(val);
  }

 protected:
  //! Constructor
  IndexError(void) : map_() {}

  //! Inserts a new code into map
  void emplace(const IndexError::Code *code) {
    map_.emplace(code->value(), code);
  }

  //! Retrieve the description of code
  const char *what(int val) const {
    auto iter = map_.find(val);
    if (iter != map_.end()) {
      return iter->second->desc();
    }
    return "";
  }

  //! Retrieve the singleton
  static IndexError *Instance(void) {
    static IndexError error;
    return (&error);
  }

 private:
  //! Disable them
  IndexError(const IndexError &) = delete;
  IndexError(IndexError &&) = delete;
  IndexError &operator=(const IndexError &) = delete;

  //! Error code map
  std::map<int, const IndexError::Code *> map_;
};

//! Index Error Code Define
#define INDEX_ERROR_CODE_DEFINE(__NAME__, __VAL__, __DESC__)           \
  const IndexError::Code IndexError_##__NAME__((__VAL__), (__DESC__)); \
  const IndexError::Code &_IndexErrorCode_##__VAL__##_Register(        \
      IndexError_##__NAME__)

//! Index Error Code Declare
#define INDEX_ERROR_CODE_DECLARE(__NAME__) \
  extern const IndexError::Code IndexError_##__NAME__

//! Build-in error code
INDEX_ERROR_CODE_DECLARE(Success);      // Success
INDEX_ERROR_CODE_DECLARE(Runtime);      // Runtime error
INDEX_ERROR_CODE_DECLARE(Logic);        // Logic error
INDEX_ERROR_CODE_DECLARE(Type);         // Type error
INDEX_ERROR_CODE_DECLARE(System);       // System call error
INDEX_ERROR_CODE_DECLARE(Cast);         // Cast error
INDEX_ERROR_CODE_DECLARE(IO);           // IO error
INDEX_ERROR_CODE_DECLARE(AuthExpired);  // Auth expired error

INDEX_ERROR_CODE_DECLARE(NotImplemented);  // Not implemented
INDEX_ERROR_CODE_DECLARE(Unsupported);     // Unsupported
INDEX_ERROR_CODE_DECLARE(Denied);          // Permission denied
INDEX_ERROR_CODE_DECLARE(Canceled);        // Operation canceled
INDEX_ERROR_CODE_DECLARE(Overflow);        // Overflow
INDEX_ERROR_CODE_DECLARE(Underflow);       // Underflow
INDEX_ERROR_CODE_DECLARE(OutOfRange);      // Out of range
INDEX_ERROR_CODE_DECLARE(NoBuffer);        // No buffer space available
INDEX_ERROR_CODE_DECLARE(NoMemory);        // Not enough space
INDEX_ERROR_CODE_DECLARE(NoParamFound);    // No parameter found
INDEX_ERROR_CODE_DECLARE(NoReady);         // No ready
INDEX_ERROR_CODE_DECLARE(NoExist);         // No exist
INDEX_ERROR_CODE_DECLARE(Exist);           // Already exist
INDEX_ERROR_CODE_DECLARE(Mismatch);        // Mismatch
INDEX_ERROR_CODE_DECLARE(Duplicate);       // Duplicate
INDEX_ERROR_CODE_DECLARE(Uninitialized);   // Uninitialized

INDEX_ERROR_CODE_DECLARE(InvalidArgument);  // Invalid argument
INDEX_ERROR_CODE_DECLARE(InvalidFormat);    // Invalid format
INDEX_ERROR_CODE_DECLARE(InvalidLength);    // Invalid length
INDEX_ERROR_CODE_DECLARE(InvalidChecksum);  // Invalid checksum
INDEX_ERROR_CODE_DECLARE(InvalidValue);     // Invalid value

INDEX_ERROR_CODE_DECLARE(CreateDirectory);  // Create directory error
INDEX_ERROR_CODE_DECLARE(OpenDirectory);    // Open directory error
INDEX_ERROR_CODE_DECLARE(Serialize);        // Serialize error
INDEX_ERROR_CODE_DECLARE(Deserialize);      // Deserialize error
INDEX_ERROR_CODE_DECLARE(CreateFile);       // Create file error
INDEX_ERROR_CODE_DECLARE(OpenFile);         // Open file error
INDEX_ERROR_CODE_DECLARE(SeekFile);         // Seek file error
INDEX_ERROR_CODE_DECLARE(CloseFile);        // Close file error
INDEX_ERROR_CODE_DECLARE(TruncateFile);     // TruncateFile file error
INDEX_ERROR_CODE_DECLARE(MMapFile);         // MMap file error
INDEX_ERROR_CODE_DECLARE(FlushFile);        // Flush file error
INDEX_ERROR_CODE_DECLARE(WriteData);        // Write data error
INDEX_ERROR_CODE_DECLARE(ReadData);         // Read data error

INDEX_ERROR_CODE_DECLARE(PackIndex);      // Read data error
INDEX_ERROR_CODE_DECLARE(UnpackIndex);    // Read data error
INDEX_ERROR_CODE_DECLARE(IndexLoaded);    // Index loaded
INDEX_ERROR_CODE_DECLARE(NoIndexLoaded);  // No index loaded
INDEX_ERROR_CODE_DECLARE(NoTrained);      // No trained
INDEX_ERROR_CODE_DECLARE(IndexFull);      // Index full

}  // namespace core
}  // namespace zvec

================================================
FILE: src/include/zvec/core/framework/index_factory.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/pattern/factory.h>
#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_converter.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_logger.h>
#include <zvec/core/framework/index_metric.h>
#include <zvec/core/framework/index_reducer.h>
#include <zvec/core/framework/index_refiner.h>
#include <zvec/core/framework/index_reformer.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_streamer.h>
#include <zvec/core/framework/index_trainer.h>

namespace zvec {
namespace core {

/*! Index Factory
 */
struct IndexFactory {
  //! Create a index Metric by name
  static IndexMetric::Pointer CreateMetric(const std::string &name);

  //! Test if the Metric is exist
  static bool HasMetric(const std::string &name);

  //! Retrieve all Metric classes
  static std::vector<std::string> AllMetrics(void);

  //! Create a index logger by name
  static IndexLogger::Pointer CreateLogger(const std::string &name);

  //! Test if the logger is exist
  static bool HasLogger(const std::string &name);

  //! Retrieve all logger classes
  static std::vector<std::string> AllLoggers(void);

  //! Create a index dumper by name
  static IndexDumper::Pointer CreateDumper(const std::string &name);

  //! Test if the dumper is exist
  static bool HasDumper(const std::string &name);

  //! Retrieve all dumper classes
  static std::vector<std::string> AllDumpers(void);

  //! Test if the container is exist
  static bool HasContainer(const std::string &name);

  //! Retrieve all container classes
  static std::vector<std::string> AllContainers(void);

  //! Create a index storage by name
  static IndexStorage::Pointer CreateStorage(const std::string &name);

  //! Test if the storage is exist
  static bool HasStorage(const std::string &name);

  //! Retrieve all storage classes
  static std::vector<std::string> AllStorages(void);

  //! Create a index converter by name
  static IndexConverter::Pointer CreateConverter(const std::string &name);

  //! Test if the converter is exist
  static bool HasConverter(const std::string &name);

  //! Retrieve all converter classes
  static std::vector<std::string> AllConverters(void);

  //! Create a index reformer by name
  static IndexReformer::Pointer CreateReformer(const std::string &name);

  //! Test if the reformer is exist
  static bool HasReformer(const std::string &name);

  //! Retrieve all reformer classes
  static std::vector<std::string> AllReformers(void);

  //! Create a index trainer by name
  static IndexTrainer::Pointer CreateTrainer(const std::string &name);

  //! Test if the trainer is exist
  static bool HasTrainer(const std::string &name);

  //! Retrieve all trainer classes
  static std::vector<std::string> AllTrainers(void);

  //! Create a index builder by name
  static IndexBuilder::Pointer CreateBuilder(const std::string &name);

  //! Test if the builder is exist
  static bool HasBuilder(const std::string &name);

  //! Retrieve all builder classes
  static std::vector<std::string> AllBuilders(void);

  //! Create a index searcher by name
  static IndexSearcher::Pointer CreateSearcher(const std::string &name);

  //! Test if the searcher is exist
  static bool HasSearcher(const std::string &name);

  //! Retrieve all searcher classes
  static std::vector<std::string> AllSearchers(void);

  //! Create a index streamer by name
  static IndexStreamer::Pointer CreateStreamer(const std::string &name);

  //! Test if the streamer is exist
  static bool HasStreamer(const std::string &name);

  //! Retrieve all streamer classes
  static std::vector<std::string> AllStreamers(void);

  //! Create a index reducer by name
  static IndexReducer::Pointer CreateReducer(const std::string &name);

  //! Test if the reducer is exist
  static bool HasReducer(const std::string &name);

  //! Retrieve all reducer classes
  static std::vector<std::string> AllReducers(void);

  //! Create a index cluster by name
  static IndexCluster::Pointer CreateCluster(const std::string &name);

  //! Test if the cluster is exist
  static bool HasCluster(const std::string &name);

  //! Retrieve all cluster classes
  static std::vector<std::string> AllClusters(void);

  //! Create a index streamer reducer by name
  static IndexStreamerReducer::Pointer CreateStreamerReducer(
      const std::string &name);

  //! Test if the streamer reducer is exist
  static bool HasStreamerReducer(const std::string &name);

  //! Retrieve all streamer reducer classes
  static std::vector<std::string> AllStreamerReducers(void);

  //! Create a refiner by name
  static IndexRefiner::Pointer CreateRefiner(const std::string &name);

  //! Test if the refiner is exist
  static bool HasRefiner(const std::string &name);

  //! Retrieve all refiner classes
  static std::vector<std::string> AllRefiners(void);
};

//! Register Index Metric
#define INDEX_FACTORY_REGISTER_METRIC_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexMetric, __IMPL__, ##__VA_ARGS__)

//! Register Index Metric
#define INDEX_FACTORY_REGISTER_METRIC(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_METRIC_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Logger
#define INDEX_FACTORY_REGISTER_LOGGER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexLogger, __IMPL__, ##__VA_ARGS__)

//! Register Index Logger
#define INDEX_FACTORY_REGISTER_LOGGER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_LOGGER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Dumper
#define INDEX_FACTORY_REGISTER_DUMPER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexDumper, __IMPL__, ##__VA_ARGS__)

//! Register Index Dumper
#define INDEX_FACTORY_REGISTER_DUMPER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_DUMPER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Storage
#define INDEX_FACTORY_REGISTER_STORAGE_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexStorage, __IMPL__, ##__VA_ARGS__)

//! Register Index Storage
#define INDEX_FACTORY_REGISTER_STORAGE(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_STORAGE_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Converter
#define INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexConverter, __IMPL__, ##__VA_ARGS__)

//! Register Index Converter
#define INDEX_FACTORY_REGISTER_CONVERTER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_CONVERTER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Reformer
#define INDEX_FACTORY_REGISTER_REFORMER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexReformer, __IMPL__, ##__VA_ARGS__)

//! Register Index Reformer
#define INDEX_FACTORY_REGISTER_REFORMER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_REFORMER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Trainer
#define INDEX_FACTORY_REGISTER_TRAINER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexTrainer, __IMPL__, ##__VA_ARGS__)

//! Register Index Trainer
#define INDEX_FACTORY_REGISTER_TRAINER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_TRAINER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Builder
#define INDEX_FACTORY_REGISTER_BUILDER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexBuilder, __IMPL__, ##__VA_ARGS__)

//! Register Index Builder
#define INDEX_FACTORY_REGISTER_BUILDER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_BUILDER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Searcher
#define INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexSearcher, __IMPL__, ##__VA_ARGS__)

//! Register Index Searcher
#define INDEX_FACTORY_REGISTER_SEARCHER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_SEARCHER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Streamer
#define INDEX_FACTORY_REGISTER_STREAMER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexStreamer, __IMPL__, ##__VA_ARGS__)

//! Register Index Streamer
#define INDEX_FACTORY_REGISTER_STREAMER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_STREAMER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Reducer
#define INDEX_FACTORY_REGISTER_REDUCER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexReducer, __IMPL__, ##__VA_ARGS__)

//! Register Index Reducer
#define INDEX_FACTORY_REGISTER_REDUCER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_REDUCER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Streamer Reducer
#define INDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexStreamerReducer, __IMPL__,            \
                          ##__VA_ARGS__)

//! Register Index Streamer Reducer
#define INDEX_FACTORY_REGISTER_STREAMER_REDUCER(__IMPL__, ...)      \
  INDEX_FACTORY_REGISTER_STREAMER_REDUCER_ALIAS(__IMPL__, __IMPL__, \
                                                ##__VA_ARGS__)

//! Register Index Cluster
#define INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexCluster, __IMPL__, ##__VA_ARGS__)

//! Register Index Cluster
#define INDEX_FACTORY_REGISTER_CLUSTER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_CLUSTER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

//! Register Index Refiner
#define INDEX_FACTORY_REGISTER_REFINER_ALIAS(__NAME__, __IMPL__, ...) \
  AILEGO_FACTORY_REGISTER(__NAME__, IndexRefiner, __IMPL__, ##__VA_ARGS__)

//! Register Index Refiner
#define INDEX_FACTORY_REGISTER_REFINER(__IMPL__, ...) \
  INDEX_FACTORY_REGISTER_REFINER_ALIAS(__IMPL__, __IMPL__, ##__VA_ARGS__)

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_features.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstring>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include <zvec/core/framework/index_meta.h>

namespace zvec {
namespace core {

/*! Index Features
 */
struct IndexFeatures {
  //! Index Features Pointer
  typedef std::shared_ptr<IndexFeatures> Pointer;

  //! Destructor
  virtual ~IndexFeatures(void) {}

  //! Retrieve feature via index
  virtual const void *element(size_t i) const = 0;

  //! Retrieve count of elements
  virtual size_t count(void) const = 0;

  //! Retrieve dimension
  virtual size_t dimension(void) const = 0;

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const = 0;

  //! Test if it is a compacted buffer
  virtual bool is_compacted(void) const {
    return false;
  }

  //! Retrieve pointer of compacted buffer
  virtual const void *data(void) const {
    return nullptr;
  }

  //! Retrieve size of feature
  virtual size_t element_size(void) const {
    return IndexMeta::ElementSizeof(this->data_type(), this->dimension());
  }

  //! Operator []
  const void *operator[](size_t i) const {
    return this->element(i);
  }

  //! Test if matchs the meta
  bool is_matched(const IndexMeta &meta) const {
    return (meta.data_type() == this->data_type() &&
            meta.dimension() == this->dimension() &&
            meta.element_size() == this->element_size());
  }
};

/*! Coherent Index Features
 */
class CoherentIndexFeatures : public IndexFeatures {
 public:
  //! Coherent Index Features Pointer
  typedef std::shared_ptr<CoherentIndexFeatures> Pointer;

  //! Constructor
  CoherentIndexFeatures(void)
      : features_buffer_(nullptr),
        features_count_(0),
        feature_size_(0),
        feature_dimension_(0),
        data_type_(IndexMeta::DataType::DT_UNDEFINED) {}

  //! Constructor
  CoherentIndexFeatures(const IndexMeta &meta)
      : features_buffer_(nullptr),
        features_count_(0),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  //! Constructor
  CoherentIndexFeatures(const IndexMeta &meta, const void *buf, size_t len)
      : features_buffer_(buf),
        features_count_(len / meta.element_size()),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  //! Mount features
  void mount(const IndexMeta &meta, const void *buf, size_t len) {
    features_buffer_ = buf;
    data_type_ = meta.data_type();
    feature_size_ = meta.element_size();
    feature_dimension_ = meta.dimension();
    features_count_ = len / feature_size_;
  }

  //! Mount features
  void mount(const void *buf, size_t len) {
    features_buffer_ = buf;
    features_count_ = len / feature_size_;
  }

  //! Retrieve count of elements
  virtual size_t count(void) const {
    return features_count_;
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const {
    return feature_dimension_;
  }

  //! Retrieve feature via index
  virtual const void *element(size_t i) const {
    return (reinterpret_cast<const char *>(features_buffer_) +
            feature_size_ * i);
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  //! Test if it is a compacted buffer
  virtual bool is_compacted(void) const {
    return true;
  }

  //! Retrieve pointer of compacted buffer
  virtual const void *data(void) const {
    return features_buffer_;
  }

  //! Retrieve size of feature
  virtual size_t element_size(void) const {
    return feature_size_;
  }

 private:
  const void *features_buffer_;
  size_t features_count_;
  size_t feature_size_;
  size_t feature_dimension_;
  IndexMeta::DataType data_type_;
};

/*! Flexible Index Features
 */
class FlexibleIndexFeatures : public IndexFeatures {
 public:
  //! Flexible Index Features Pointer
  typedef std::shared_ptr<FlexibleIndexFeatures> Pointer;

  //! Constructor
  FlexibleIndexFeatures(void)
      : features_(nullptr),
        features_count_(0),
        feature_size_(0),
        feature_dimension_(0),
        data_type_(IndexMeta::DataType::DT_UNDEFINED) {}

  //! Constructor
  FlexibleIndexFeatures(const IndexMeta &meta)
      : features_(nullptr),
        features_count_(0),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  //! Constructor
  FlexibleIndexFeatures(const IndexMeta &meta, const void *const *feats,
                        size_t feats_count)
      : features_(feats),
        features_count_(feats_count),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  //! Mount features
  void mount(const IndexMeta &meta, const void *const *feats,
             size_t feats_count) {
    features_ = feats;
    features_count_ = feats_count;
    data_type_ = meta.data_type();
    feature_size_ = meta.element_size();
    feature_dimension_ = meta.dimension();
  }

  //! Mount features
  void mount(const void *const *feats, size_t feats_count) {
    features_ = feats;
    features_count_ = feats_count;
  }

  //! Retrieve count of elements
  virtual size_t count(void) const {
    return features_count_;
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const {
    return feature_dimension_;
  }

  //! Retrieve feature via index
  virtual const void *element(size_t i) const {
    return *(features_ + i);
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  //! Retrieve size of feature
  virtual size_t element_size(void) const {
    return feature_size_;
  }

 private:
  const void *const *features_;
  size_t features_count_;
  size_t feature_size_;
  size_t feature_dimension_;
  IndexMeta::DataType data_type_;
};

/*! Gap Index Features
 */
class GapIndexFeatures : public IndexFeatures {
 public:
  //! Gap Index Features Pointer
  typedef std::shared_ptr<GapIndexFeatures> Pointer;

  //! Constructor
  GapIndexFeatures(const IndexMeta &meta)
      : features_(),
        bucket_limit_(0),
        features_count_(0),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {
    if (feature_size_ >= 1024 * 1024) {
      bucket_limit_ = 64u;
    } else {
      bucket_limit_ = (1024 * 1024 * 64) / feature_size_;
    }
  }

  //! Constructor
  GapIndexFeatures(const GapIndexFeatures &rhs)
      : features_(rhs.features_),
        bucket_limit_(rhs.bucket_limit_),
        features_count_(rhs.features_count_),
        feature_size_(rhs.feature_size_),
        feature_dimension_(rhs.feature_dimension_),
        data_type_(rhs.data_type_) {}

  //! Constructor
  GapIndexFeatures(GapIndexFeatures &&rhs)
      : features_(std::move(rhs.features_)),
        bucket_limit_(rhs.bucket_limit_),
        features_count_(rhs.features_count_),
        feature_size_(rhs.feature_size_),
        feature_dimension_(rhs.feature_dimension_),
        data_type_(rhs.data_type_) {}

  //! Assignment
  GapIndexFeatures &operator=(const GapIndexFeatures &rhs) {
    features_ = rhs.features_;
    bucket_limit_ = rhs.bucket_limit_;
    features_count_ = rhs.features_count_;
    feature_size_ = rhs.feature_size_;
    feature_dimension_ = rhs.feature_dimension_;
    data_type_ = rhs.data_type_;
    return *this;
  }

  //! Assignment
  GapIndexFeatures &operator=(GapIndexFeatures &&rhs) {
    features_ = std::move(rhs.features_);
    bucket_limit_ = rhs.bucket_limit_;
    features_count_ = rhs.features_count_;
    feature_size_ = rhs.feature_size_;
    feature_dimension_ = rhs.feature_dimension_;
    data_type_ = rhs.data_type_;
    return *this;
  }

  //! Append a feature
  void emplace(const void *feat) {
    if (features_count_ % bucket_limit_ == 0) {
      std::string bucket;
      bucket.reserve(bucket_limit_ * feature_size_);
      bucket.assign(reinterpret_cast<const char *>(feat), feature_size_);
      features_.push_back(std::move(bucket));
    } else {
      features_[features_count_ / bucket_limit_].append(
          reinterpret_cast<const char *>(feat), feature_size_);
    }
    ++features_count_;
  }

  //! Replace a feature
  void replace(size_t i, const void *feat) {
    std::memcpy(const_cast<char *>(features_[i / bucket_limit_].data()) +
                    feature_size_ * (i % bucket_limit_),
                feat, feature_size_);
  }

  //! Clear the features
  void clear(void) {
    features_.clear();
    features_count_ = 0;
  }

  //! Retrieve feature via index
  void *at(size_t i) {
    return (const_cast<char *>(features_[i / bucket_limit_].data()) +
            feature_size_ * (i % bucket_limit_));
  }

  //! Retrieve feature via index
  const void *at(size_t i) const {
    return (features_[i / bucket_limit_].data() +
            feature_size_ * (i % bucket_limit_));
  }

  //! Retrieve count of elements
  virtual size_t count(void) const {
    return features_count_;
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const {
    return feature_dimension_;
  }

  //! Retrieve feature via index
  virtual const void *element(size_t i) const {
    return this->at(i);
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  //! Test if it is a compacted buffer
  virtual bool is_compacted(void) const {
    return (features_.size() == 1u);
  }

  //! Retrieve pointer of compacted buffer
  virtual const void *data(void) const {
    return (features_.size() == 1u ? features_.front().data() : nullptr);
  }

  //! Retrieve size of feature
  virtual size_t element_size(void) const {
    return feature_size_;
  }

 private:
  //! Disable them
  GapIndexFeatures(void) = delete;

  //! Members
  std::vector<std::string> features_;
  size_t bucket_limit_;
  size_t features_count_;
  size_t feature_size_;
  size_t feature_dimension_;
  IndexMeta::DataType data_type_;
};

/*! Compact Index Features
 */
class CompactIndexFeatures : public IndexFeatures {
 public:
  //! Compact Index Features Pointer
  typedef std::shared_ptr<CompactIndexFeatures> Pointer;

  //! Constructor
  CompactIndexFeatures(const IndexMeta &meta)
      : features_(),
        feature_size_(meta.element_size()),
        feature_dimension_(meta.dimension()),
        data_type_(meta.data_type()) {}

  //! Constructor
  CompactIndexFeatures(const CompactIndexFeatures &rhs)
      : features_(rhs.features_),
        feature_size_(rhs.feature_size_),
        feature_dimension_(rhs.feature_dimension_),
        data_type_(rhs.data_type_) {}

  //! Constructor
  CompactIndexFeatures(CompactIndexFeatures &&rhs)
      : features_(std::move(rhs.features_)),
        feature_size_(rhs.feature_size_),
        feature_dimension_(rhs.feature_dimension_),
        data_type_(rhs.data_type_) {}

  //! Assignment
  CompactIndexFeatures &operator=(const CompactIndexFeatures &rhs) {
    features_ = rhs.features_;
    feature_size_ = rhs.feature_size_;
    feature_dimension_ = rhs.feature_dimension_;
    data_type_ = rhs.data_type_;
    return *this;
  }

  //! Assignment
  CompactIndexFeatures &operator=(CompactIndexFeatures &&rhs) {
    features_ = std::move(rhs.features_);
    feature_size_ = rhs.feature_size_;
    feature_dimension_ = rhs.feature_dimension_;
    data_type_ = rhs.data_type_;
    return *this;
  }

  //! Append a feature
  void emplace(const void *feat) {
    features_.append(reinterpret_cast<const char *>(feat), feature_size_);
  }

  //! Replace a feature
  void replace(size_t i, const void *feat) {
    std::memcpy(const_cast<char *>(features_.data()) + feature_size_ * i, feat,
                feature_size_);
  }

  //! Resize the container
  void resize(size_t n) {
    features_.resize(feature_size_ * n);
  }

  //! Reserve the container
  void reserve(size_t n) {
    features_.reserve(feature_size_ * n);
  }

  //! Clear the features
  void clear(void) {
    features_.clear();
  }

  //! Retrieve feature via index
  void *at(size_t i) {
    return (const_cast<char *>(features_.data()) + feature_size_ * i);
  }

  //! Retrieve feature via index
  const void *at(size_t i) const {
    return (features_.data() + feature_size_ * i);
  }

  //! Retrieve count of elements
  virtual size_t count(void) const {
    return (features_.size() / feature_size_);
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const {
    return feature_dimension_;
  }

  //! Retrieve feature via index
  virtual const void *element(size_t i) const {
    return this->at(i);
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  //! Test if it is a compacted buffer
  virtual bool is_compacted(void) const {
    return true;
  }

  //! Retrieve pointer of compacted buffer
  virtual const void *data(void) const {
    return features_.data();
  }

  //! Retrieve size of feature
  virtual size_t element_size(void) const {
    return feature_size_;
  }

 private:
  //! Disable them
  CompactIndexFeatures(void) = delete;

  //! Members
  std::string features_;
  size_t feature_size_;
  size_t feature_dimension_;
  IndexMeta::DataType data_type_;
};

/*! Sample Index Features
 */
template <typename TBase>
class SampleIndexFeatures : public TBase {
 public:
  //! Sample Index Features Pointer
  typedef std::shared_ptr<SampleIndexFeatures<TBase>> Pointer;

  //! Constructor
  SampleIndexFeatures(const IndexMeta &meta, size_t cnt)
      : TBase(meta), samples_(std::max<size_t>(cnt, 1u)), total_(0), mt_() {}

  //! Constructor
  SampleIndexFeatures(const SampleIndexFeatures &rhs)
      : TBase(rhs), samples_(rhs.samples_), total_(rhs.total_), mt_() {}

  //! Constructor
  SampleIndexFeatures(SampleIndexFeatures &&rhs)
      : TBase(std::move(rhs)),
        samples_(rhs.samples_),
        total_(rhs.total_),
        mt_() {}

  //! Assignment
  SampleIndexFeatures &operator=(const SampleIndexFeatures &rhs) {
    TBase::operator=(static_cast<const TBase &>(rhs));
    samples_ = rhs.samples_;
    total_ = rhs.total_;
    return *this;
  }

  //! Assignment
  SampleIndexFeatures &operator=(SampleIndexFeatures &&rhs) {
    TBase::operator=(std::move(static_cast<TBase &&>(rhs)));
    samples_ = rhs.samples_;
    total_ = rhs.total_;
    return *this;
  }

  //! Retrieve count of samples
  size_t samples(void) const {
    return samples_;
  }

  //! Retrieve count of total
  size_t total(void) const {
    return total_;
  }

  //! Append a feature
  void emplace(const void *feat) {
    if (TBase::count() >= samples_) {
      std::uniform_int_distribution<size_t> dt(0, total_);
      size_t i = dt(mt_);

      if (i < samples_) {
        TBase::replace(i, feat);
      }
    } else {
      TBase::emplace(feat);
    }
    ++total_;
  }

  //! Clear the features
  void clear(void) {
    TBase::clear();
    total_ = 0;
  }

 private:
  //! Disable them
  SampleIndexFeatures(void) = delete;

  //! Members
  size_t samples_;
  size_t total_;
  std::mt19937 mt_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_filter.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <functional>

namespace zvec {
namespace core {

/*! Index Filter
 */
class IndexFilter {
 public:
  //! Constructor
  IndexFilter(void) {}

  //! Constructor
  IndexFilter(const IndexFilter &rhs) : filter_(rhs.filter_) {}

  //! Constructor
  IndexFilter(IndexFilter &&rhs)
      : filter_(std::forward<decltype(filter_)>(rhs.filter_)) {}

  //! Copy assignment operator
  IndexFilter &operator=(const IndexFilter &rhs) {
    filter_ = rhs.filter_;
    return *this;
  }

  //! Copy assignment operator
  IndexFilter &operator=(IndexFilter &&rhs) {
    filter_ = std::forward<decltype(filter_)>(rhs.filter_);
    return *this;
  }

  //! Function call
  bool operator()(uint64_t key) const {
    return (filter_ ? filter_(key) : false);
  }

  //! Set the filter function
  template <typename T>
  void set(T &&func) {
    filter_ = std::forward<T>(func);
  }

  //! Reset the filter function
  void reset(void) {
    filter_ = nullptr;
  }

  //! Test if the function is valid
  bool is_valid(void) const {
    return (!!filter_);
  }

 private:
  //! Members
  std::function<bool(uint64_t key)> filter_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_flow.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <zvec/core/framework/index_reformer.h>
#include <zvec/core/framework/index_searcher.h>
namespace zvec {
namespace core {

/*! Index Flow
 */
class IndexFlow {
 public:
  /*! Index Flow Context
   */
  class Context {
   public:
    //! Index Flow Pointer
    typedef std::unique_ptr<Context> Pointer;

    //! Index Flow UPointer
    typedef std::unique_ptr<Context> UPointer;

    //! Retrieve searcher context
    IndexSearcher::Context::Pointer &searcher_context(void) {
      return searcher_context_;
    }

    //! Set topk of search result
    void set_topk(uint32_t topk) {
      return searcher_context_->set_topk(topk);
    }

    //! Retrieve search results
    const IndexDocumentList &result(void) const {
      return searcher_context_->result();
    }

    //! Retrieve search result with index
    const IndexDocumentList &result(size_t index) const {
      return searcher_context_->result(index);
    }

    //! Set the filter of context
    template <typename T>
    void set_filter(T &&func) {
      searcher_context_->set_filter(std::forward<T>(func));
    }

    //! Reset the filter of context
    void reset_filter(void) {
      searcher_context_->reset_filter();
    }

    //! Set mode of debug
    void set_debug_mode(bool enable) {
      searcher_context_->set_debug_mode(enable);
    }

    //! Update the parameters of context
    int update(const ailego::Params &params) {
      return searcher_context_->update(params);
    }

    //! Retrieve debug information
    std::string debug_string(void) const {
      return searcher_context_->debug_string();
    }

    //! Retrieve magic number
    uint32_t magic(void) const {
      return searcher_context_->magic();
    }

    //! Retrieve mode of debug
    bool debug_mode(void) const {
      return searcher_context_->debug_mode();
    }

    //! Retrieve mutable features buffer
    std::string *mutable_features(void) {
      return &features_;
    }

    //! Retrieve features buffer
    const std::string &features(void) const {
      return features_;
    }

   protected:
    friend class IndexFlow;

    //! Constructor
    Context(IndexSearcher::Context::Pointer &&ctx)
        : searcher_context_(std::move(ctx)) {}

   private:
    IndexSearcher::Context::Pointer searcher_context_{};
    std::string features_{};
  };

  //! Constructor
  IndexFlow(void) {}

  //! Constructor
  IndexFlow(IndexFlow &&rhs)
      : storage_(std::move(rhs.storage_)),
        reformer_(std::move(rhs.reformer_)),
        searcher_(std::move(rhs.searcher_)),
        metric_(std::move(rhs.metric_)),
        user_reformer_(std::move(rhs.user_reformer_)),
        user_searcher_(std::move(rhs.user_searcher_)),
        user_metric_name_(std::move(rhs.user_metric_name_)),
        user_metric_params_(std::move(rhs.user_metric_params_)) {}

  //! Assignment
  IndexFlow &operator=(IndexFlow &&rhs) {
    storage_ = std::move(rhs.storage_);
    reformer_ = std::move(rhs.reformer_);
    searcher_ = std::move(rhs.searcher_);
    metric_ = std::move(rhs.metric_);
    user_reformer_ = std::move(rhs.user_reformer_);
    user_searcher_ = std::move(rhs.user_searcher_);
    user_metric_name_ = std::move(rhs.user_metric_name_);
    user_metric_params_ = std::move(rhs.user_metric_params_);
    return *this;
  }

  //! Retrieve index meta
  const IndexMeta &meta(void) const {
    return meta_;
  }

  //! Retrieve index reformer
  const IndexReformer::Pointer &reformer(void) const {
    return reformer_;
  }

  //! Retrieve index searcher
  const IndexSearcher::Pointer &searcher(void) const {
    return searcher_;
  }

  //! Retrieve index metric
  const IndexMetric::Pointer &metric(void) const {
    return metric_;
  }

  //! Set the index storage (user)
  int set_storage(const std::string &name, const ailego::Params &params);

  //! Set the index reformer (user)
  int set_reformer(const std::string &name, const ailego::Params &params);

  //! Set the index searcher (user)
  int set_searcher(const std::string &name, const ailego::Params &params);

  //! Set the index searcher (user)
  int set_searcher(IndexSearcher::Pointer searcher);

  //! Set the index metric (user)
  int set_metric(const std::string &name, const ailego::Params &params);

  //! Load index
  int load(const std::string &path);

  //! Unload index
  int unload(void);

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const;

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const;

  //! Similarity brute force search
  int search_bf_impl(const void *query, const IndexQueryMeta &qmeta,
                     uint32_t count, Context::Pointer &context) const;

  //! Similarity search
  int search_impl(const void *query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const;

  //! Similarity search (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search_bf(const ailego::Float16 *vec, size_t dim,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search_bf(const float *vec, size_t dim, Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (INT8)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT8>::type>
  int search_bf(const int8_t *vec, size_t dim,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (INT4)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT4>::type>
  int search_bf(const uint8_t *vec, size_t dim,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (BINARY)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_BINARY32>::type>
  int search_bf(const uint32_t *vec, size_t dim,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search in batch (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search_bf(const ailego::Float16 *vec, size_t dim, size_t rows,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search_bf(const float *vec, size_t dim, size_t rows,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (INT8)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT8>::type>
  int search_bf(const int8_t *vec, size_t dim, size_t rows,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (INT4)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT4>::type>
  int search_bf(const uint8_t *vec, size_t dim, size_t rows,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity Search in batch (BINARY)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_BINARY32>::type>
  int search_bf(const uint32_t *vec, size_t dim, size_t rows,
                Context::Pointer &context) const {
    return this->search_bf_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search(const ailego::Float16 *vec, size_t dim,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search(const float *vec, size_t dim, Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (INT8)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT8>::type>
  int search(const int8_t *vec, size_t dim, Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (INT4)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT4>::type>
  int search(const uint8_t *vec, size_t dim, Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search (BINARY32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_BINARY32>::type>
  int search(const uint32_t *vec, size_t dim, Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), context);
  }

  //! Similarity search in batch (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search(const ailego::Float16 *vec, size_t dim, size_t rows,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search(const float *vec, size_t dim, size_t rows,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (INT8)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT8>::type>
  int search(const int8_t *vec, size_t dim, size_t rows,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity search in batch (INT4)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_INT4>::type>
  int search(const uint8_t *vec, size_t dim, size_t rows,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Similarity Search in batch (BINARY)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_BINARY32>::type>
  int search(const uint32_t *vec, size_t dim, size_t rows,
             Context::Pointer &context) const {
    return this->search_impl(vec, IndexQueryMeta(DT, dim), rows, context);
  }

  //! Create a flow context
  Context::Pointer create_context(void) const {
    return Context::Pointer(new Context(searcher_->create_context()));
  }

 private:
  //! Disable them
  IndexFlow(const IndexFlow &) = delete;
  IndexFlow &operator=(const IndexFlow &) = delete;

  int load_internal();

  //! Members
  IndexMeta meta_{};
  IndexStorage::Pointer storage_{};
  IndexReformer::Pointer reformer_{};
  IndexSearcher::Pointer searcher_{};
  IndexMetric::Pointer metric_{};
  IndexReformer::Pointer user_reformer_{};
  IndexSearcher::Pointer user_searcher_{};
  std::string user_metric_name_{};
  ailego::Params user_metric_params_{};
};


/*! Index Sparse Flow
 */
class IndexSparseFlow {
 public:
  /*! Index Sparse Flow Context
   */
  class Context {
   public:
    //! Index Flow Pointer
    typedef std::unique_ptr<Context> Pointer;

    //! Index Flow UPointer
    typedef std::unique_ptr<Context> UPointer;

    //! Retrieve searcher context
    IndexSearcher::Context::Pointer &searcher_context(void) {
      return searcher_context_;
    }

    //! Set topk of search result
    void set_topk(uint32_t topk) {
      return searcher_context_->set_topk(topk);
    }

    //! Retrieve search results
    const IndexDocumentList &result(void) const {
      return searcher_context_->result();
    }

    //! Retrieve search result with index
    const IndexDocumentList &result(size_t index) const {
      return searcher_context_->result(index);
    }

    //! Set the filter of context
    template <typename T>
    void set_filter(T &&func) {
      searcher_context_->set_filter(std::forward<T>(func));
    }

    //! Reset the filter of context
    void reset_filter(void) {
      searcher_context_->reset_filter();
    }

    //! Set mode of debug
    void set_debug_mode(bool enable) {
      searcher_context_->set_debug_mode(enable);
    }

    //! Update the parameters of context
    int update(const ailego::Params &params) {
      return searcher_context_->update(params);
    }

    //! Retrieve debug information
    std::string debug_string(void) const {
      return searcher_context_->debug_string();
    }

    //! Retrieve magic number
    uint32_t magic(void) const {
      return searcher_context_->magic();
    }

    //! Retrieve mode of debug
    bool debug_mode(void) const {
      return searcher_context_->debug_mode();
    }

    //! Retrieve mutable features buffer
    std::string *mutable_features(void) {
      return &features_;
    }

    //! Retrieve features buffer
    const std::string &features(void) const {
      return features_;
    }

   protected:
    friend class IndexSparseFlow;

    //! Constructor
    Context(IndexSearcher::Context::Pointer &&ctx)
        : searcher_context_(std::move(ctx)) {}

   private:
    IndexSearcher::Context::Pointer searcher_context_{};
    std::string features_{};
  };

  //! Constructor
  IndexSparseFlow(void) {}

  //! Constructor
  IndexSparseFlow(IndexSparseFlow &&rhs)
      : storage_(std::move(rhs.storage_)),
        reformer_(std::move(rhs.reformer_)),
        searcher_(std::move(rhs.searcher_)),
        metric_(std::move(rhs.metric_)),
        user_reformer_(std::move(rhs.user_reformer_)),
        user_searcher_(std::move(rhs.user_searcher_)),
        user_metric_name_(std::move(rhs.user_metric_name_)),
        user_metric_params_(std::move(rhs.user_metric_params_)) {}

  //! Assignment
  IndexSparseFlow &operator=(IndexSparseFlow &&rhs) {
    storage_ = std::move(rhs.storage_);
    reformer_ = std::move(rhs.reformer_);
    searcher_ = std::move(rhs.searcher_);
    metric_ = std::move(rhs.metric_);
    user_reformer_ = std::move(rhs.user_reformer_);
    user_searcher_ = std::move(rhs.user_searcher_);
    user_metric_name_ = std::move(rhs.user_metric_name_);
    user_metric_params_ = std::move(rhs.user_metric_params_);
    return *this;
  }

  //! Retrieve index sparse meta
  const IndexMeta &meta(void) const {
    return meta_;
  }

  //! Retrieve index reformer
  const IndexReformer::Pointer &reformer(void) const {
    return reformer_;
  }

  //! Retrieve index searcher
  const IndexSearcher::Pointer &searcher(void) const {
    return searcher_;
  }

  //! Retrieve index metric
  const IndexMetric::Pointer &metric(void) const {
    return metric_;
  }

  //! Set the index storage (user)
  int set_storage(const std::string &name, const ailego::Params &params);

  //! Set the index reformer (user)
  int set_reformer(const std::string &name, const ailego::Params &params);

  //! Set the index searcher (user)
  int set_searcher(const std::string &name, const ailego::Params &params);

  //! Set the index searcher (user)
  int set_searcher(IndexSearcher::Pointer searcher);

  //! Set the index metric (user)
  int set_metric(const std::string &name, const ailego::Params &params);

  //! Load index
  int load(const std::string &path);

  //! Unload index
  int unload(void);

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  Context::Pointer &context) const;

  //! Similarity search with sparse inputs
  int search_impl(const uint32_t *sparse_count, const uint32_t *sparse_indices,
                  const void *sparse_query, const IndexQueryMeta &qmeta,
                  uint32_t count, Context::Pointer &context) const;

  //! Similarity brute force search and sparse inputs
  int search_bf_impl(const uint32_t sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta,
                     Context::Pointer &context) const;

  //! Similarity brute force search with sparse inputs
  int search_bf_impl(const uint32_t *sparse_count,
                     const uint32_t *sparse_indices, const void *sparse_query,
                     const IndexQueryMeta &qmeta, uint32_t count,
                     Context::Pointer &context) const;

  //! Similarity search (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search_bf(const uint32_t sparse_count, const uint32_t *sparse_indices,
                const ailego::Float16 *sparse_query,
                Context::Pointer &context) const {
    return this->search_bf_impl(sparse_count, sparse_indices, sparse_query,
                                IndexQueryMeta(DT), context);
  }

  //! Similarity search (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search_bf(const uint32_t sparse_count, const uint32_t *sparse_indices,
                const float *sparse_query, Context::Pointer &context) const {
    return this->search_bf_impl(sparse_count, sparse_indices, sparse_query,
                                IndexQueryMeta(DT), context);
  }

  //! Similarity search (FP16)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP16>::type>
  int search(const uint32_t sparse_count, const uint32_t *sparse_indices,
             const ailego::Float16 *sparse_query,
             Context::Pointer &context) const {
    return this->search_impl(sparse_count, sparse_indices, sparse_query,
                             IndexQueryMeta(DT), context);
  }

  //! Similarity search (FP32)
  template <IndexMeta::DataType DT,
            typename = typename std::enable_if<
                DT == IndexMeta::DataType::DT_FP32>::type>
  int search(const uint32_t sparse_count, const uint32_t *sparse_indices,
             const float *sparse_query, Context::Pointer &context) const {
    return this->search_impl(sparse_count, sparse_indices, sparse_query,
                             IndexQueryMeta(DT), context);
  }

  //! Create a flow context
  Context::Pointer create_context(void) const {
    return Context::Pointer(new Context(searcher_->create_context()));
  }

 private:
  //! Disable them
  IndexSparseFlow(const IndexSparseFlow &) = delete;
  IndexSparseFlow &operator=(const IndexSparseFlow &) = delete;

  int load_internal();

  //! Members
  IndexMeta meta_{};
  IndexStorage::Pointer storage_{};
  IndexReformer::Pointer reformer_{};
  IndexSearcher::Pointer searcher_{};
  IndexMetric::Pointer metric_{};
  IndexReformer::Pointer user_reformer_{};
  IndexSearcher::Pointer user_searcher_{};
  std::string user_metric_name_{};
  ailego::Params user_metric_params_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_format.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstring>
#include <random>
#include <string>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/utility/time_helper.h>

namespace zvec {
namespace core {

/*! Index Format
 */
struct IndexFormat {
  /*! Version Number
   */
  enum { FORMAT_VERSION = 0x0002 };

  /*! Index Format Meta Header
   */
  struct MetaHeader {
    uint32_t header_crc;
    uint16_t reserved1_;
    uint16_t version;
    uint32_t revision;
    uint32_t magic;
    uint16_t meta_header_size;
    uint16_t meta_footer_size;
    uint32_t meta_footer_offset;
    uint64_t content_offset;
    uint64_t setup_time;
    uint64_t reserved3_[3];
  };

  static_assert(sizeof(MetaHeader) % 32 == 0,
                "MetaHeader must be aligned with 32 bytes");

  /*! Index Format Meta Footer
   */
  struct MetaFooter {
    uint32_t footer_crc;
    uint32_t segments_meta_crc;
    uint32_t content_crc;
    uint32_t segment_count;
    // meta section size
    uint32_t segments_meta_size;
    uint32_t reserved1_;
    // segments' data section size
    uint64_t content_size;
    uint64_t content_padding_size;

    uint64_t check_point;
    uint64_t update_time;
    uint64_t reserved2_[7];
    uint64_t next_meta_header_offset;
    uint64_t total_size;
  };

  static_assert(sizeof(MetaFooter) % 32 == 0,
                "MetaFooter must be aligned with 32 bytes");

  /*! Index Format Segment Meta
   */
  struct SegmentMeta {
    uint32_t segment_id_offset;
    // used only by immutable segments, e.g., IndexMeta, or searcher
    uint32_t data_crc;
    uint64_t data_index;
    uint64_t data_size;
    uint64_t padding_size;
  };

  static_assert(sizeof(SegmentMeta) % 32 == 0,
                "SegmentMeta must be aligned with 32 bytes");

  /*! Index Format Segment Meta Buffer
   */
  class SegmentMetaBuffer {
   public:
    //! Constructor
    SegmentMetaBuffer(uint32_t count) : capacity_(count) {
      buffer_.clear();
      buffer_.resize(sizeof(SegmentMeta) * capacity_);
    }

    //! Append a segment meta into buffer
    bool append(const std::string &id, size_t data_size, size_t padding_size,
                uint32_t data_crc) {
      if (count_ >= capacity_) {
        return false;
      }
      SegmentMeta *meta = (SegmentMeta *)buffer_.data() + count_;
      meta->segment_id_offset = static_cast<uint32_t>(buffer_.size());
      meta->data_index = offset_;
      meta->data_size = data_size;
      meta->data_crc = data_crc;
      meta->padding_size = padding_size;
      buffer_.append(id.c_str(), std::strlen(id.c_str()) + 1);
      count_ += 1;
      offset_ += data_size + padding_size;
      return true;
    }

    //! Resize the buffer
    void resize(size_t val) {
      buffer_.resize(val);
    }

    //! Retrieve pointer of data
    const void *data(void) const {
      return buffer_.data();
    }

    //! Retrieve size of data
    size_t size(void) const {
      return buffer_.size();
    }

    //! Retrieve crc of buffer
    uint32_t crc(void) const {
      return ailego::Crc32c::Hash(buffer_.data(), buffer_.size(), 0);
    }

   private:
    //! Disable them
    SegmentMetaBuffer(void) = delete;

    //! Members
    std::string buffer_{};
    size_t offset_{0u};
    uint32_t capacity_{0u};
    uint32_t count_{0u};
  };

  //! Setup meta header structure
  static void SetupMetaHeader(MetaHeader *header, uint32_t footer_offset,
                              uint32_t content_offset) {
    memset(header, 0, sizeof(MetaHeader));
    header->version = IndexFormat::FORMAT_VERSION;
    header->revision = 0;
    header->magic = std::random_device()();
    header->meta_header_size = sizeof(MetaHeader);
    header->meta_footer_size = sizeof(MetaFooter);
    header->meta_footer_offset = footer_offset;
    header->content_offset = content_offset;
    header->setup_time = ailego::Realtime::Seconds();
    header->header_crc = ailego::Crc32c::Hash(header, sizeof(MetaHeader), 0);
  }

  static void UpdateMetaHeader(MetaHeader *header) {
    header->header_crc = 0;
    header->header_crc = ailego::Crc32c::Hash(header, sizeof(MetaHeader), 0);
  }

  //! Setup meta footer structure
  static void SetupMetaFooter(MetaFooter *footer) {
    memset(footer, 0, sizeof(MetaFooter));
  }

  //! Update meta footer structure
  static void UpdateMetaFooter(MetaFooter *footer, uint64_t check_point) {
    if (check_point != 0) {
      footer->check_point = check_point;
    }
    footer->update_time = ailego::Realtime::Seconds();
    footer->footer_crc = 0;
    footer->footer_crc = ailego::Crc32c::Hash(footer, sizeof(MetaFooter), 0);
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_framework.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_bundle.h>
#include <zvec/core/framework/index_cluster.h>
#include <zvec/core/framework/index_converter.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_filter.h>
#include <zvec/core/framework/index_flow.h>
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_mapping.h>
#include <zvec/core/framework/index_memory.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_plugin.h>
#include <zvec/core/framework/index_runner.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_trainer.h>


================================================
FILE: src/include/zvec/core/framework/index_groupby.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <functional>
#include <string>

namespace zvec {
namespace core {

/*! Index GroupBy
 */
class IndexGroupBy {
 public:
  //! Function call
  std::string operator()(uint64_t key) const {
    return (group_by_ ? group_by_(key) : "");
  }

  //! Set the group by function
  template <typename T>
  void set(T &&func) {
    group_by_ = std::forward<T>(func);
  }

  //! Reset the group by function
  void reset(void) {
    group_by_ = nullptr;
  }

  //! Test if the function is valid
  bool is_valid(void) const {
    return (!!group_by_);
  }

 private:
  //! Members
  std::function<std::string(uint64_t key)> group_by_{};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/include/zvec/core/framework/index_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_storage.h>

namespace zvec {
namespace core {

/*! Index Helper
 */
struct IndexHelper {
  //! Serialize meta information to dumper
  static int SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper,
                               const std::string &key);

  //! Serialize meta information to storage
  static int SerializeToStorage(const IndexMeta &mt, IndexStorage *storage,
                                const std::string &key);

  //! Derialize meta information from storage
  static int DeserializeFromStorage(IndexStorage *storage,
                                    const std::string &key, IndexMeta *out);

  //! Serialize meta information to dumper
  static int SerializeToDumper(const IndexMeta &mt, IndexDumper *dumper) {
    return SerializeToDumper(mt, dumper, "IndexMeta");
  }

  //! Serialize meta information to storage
  static int SerializeToStorage(const IndexMeta &mt, IndexStorage *storage) {
    return SerializeToStorage(mt, storage, "IndexMeta");
  }

  //! Derialize meta information from storage
  static int DeserializeFromStorage(IndexStorage *storage, IndexMeta *out) {
    return DeserializeFromStorage(storage, "IndexMeta", out);
  }

  //! Create a proxy holder that can be traversed twice.
  static IndexHolder::Pointer MakeTwoPassHolder(IndexHolder::Pointer holder);
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_holder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstring>
#include <list>
#include <memory>
#include <vector>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/internal/platform.h>
#include <zvec/core/framework/index_features.h>
#include <zvec/core/framework/index_meta.h>

namespace zvec {
namespace core {

/*! Index Holder
 */
struct IndexHolder {
  //! Index Holder Pointer
  typedef std::shared_ptr<IndexHolder> Pointer;

  /*! Index Holder Iterator
   */
  struct Iterator {
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    virtual const void *data(void) const = 0;

    //! Test if the iterator is valid
    virtual bool is_valid(void) const = 0;

    //! Retrieve primary key
    virtual uint64_t key(void) const = 0;

    //! Next iterator
    virtual void next(void) = 0;
  };

  //! Destructor
  virtual ~IndexHolder(void) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  virtual size_t count(void) const = 0;

  //! Retrieve dimension
  virtual size_t dimension(void) const = 0;

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const = 0;

  //! Retrieve element size in bytes
  virtual size_t element_size(void) const = 0;

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const = 0;

  //! Create a new iterator
  virtual Iterator::Pointer create_iterator(void) = 0;

  //! Test if matchs the meta
  bool is_matched(const IndexMeta &meta) const {
    return (this->data_type() == meta.data_type() &&
            this->dimension() == meta.dimension() &&
            this->element_size() == meta.element_size());
  }
};

/*! Index Hybrid Holder
 */
struct IndexHybridHolder : public IndexHolder {
  //! Index Holder Pointer
  typedef std::shared_ptr<IndexHybridHolder> Pointer;

  /*! Index Holder Iterator
   */
  struct Iterator : public IndexHolder::Iterator {
    //! Index Holder Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    virtual const void *data(void) const = 0;

    //! Test if the iterator is valid
    virtual bool is_valid(void) const = 0;

    //! Retrieve primary key
    virtual uint64_t key(void) const = 0;

    //! Retrieve sparse count
    virtual uint32_t sparse_count() const = 0;

    //! Retrieve sparse indicies
    virtual const uint32_t *sparse_indices() const = 0;

    //! Retrieve sparse data
    virtual const void *sparse_data() const = 0;

    //! Next iterator
    virtual void next(void) = 0;
  };

  //! Destructor
  virtual ~IndexHybridHolder(void) {}

  //! Retrieve sparse count summing up over all the docs
  virtual size_t total_sparse_count(void) const = 0;

  //! Create a new hybrid iterator
  virtual Iterator::Pointer create_hybrid_iterator(void) = 0;
};

/*! Index Sparse Holder
 */
struct IndexSparseHolder {
  //! Index Sparse Holder Pointer
  typedef std::shared_ptr<IndexSparseHolder> Pointer;

  /*! Index Holder Iterator
   */
  struct Iterator {
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Destructor
    virtual ~Iterator(void) {}

    //! Test if the iterator is valid
    virtual bool is_valid(void) const = 0;

    //! Retrieve primary key
    virtual uint64_t key(void) const = 0;

    //! Retrieve sparse count
    virtual uint32_t sparse_count() const = 0;

    //! Retrieve sparse indicies
    virtual const uint32_t *sparse_indices() const = 0;

    //! Retrieve sparse data
    virtual const void *sparse_data() const = 0;

    //! Next iterator
    virtual void next(void) = 0;
  };

  //! Destructor
  virtual ~IndexSparseHolder(void) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  virtual size_t count(void) const = 0;

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const = 0;

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const = 0;

  //! Create a new iterator
  virtual Iterator::Pointer create_iterator(void) = 0;

  //! Test if matchs the meta
  bool is_matched(const IndexMeta &meta) const {
    return (this->data_type() == meta.data_type());
  }

  //! Retrieve sparse count summing up over all the docs for reserving space
  virtual size_t total_sparse_count(void) const = 0;
};

/*! One-Pass Numerical Index Holder
 */
template <typename T>
class OnePassNumericalIndexHolder : public IndexHolder {
 public:
  /*! One-Pass Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(OnePassNumericalIndexHolder *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.erase(features_iter_++);
    }

   private:
    OnePassNumericalIndexHolder *holder_{nullptr};
    typename std::list<std::pair<uint64_t, ailego::NumericalVector<T>>>::
        iterator features_iter_{};
  };

  //! Constructor
  OnePassNumericalIndexHolder(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return dimension_ * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return false;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new OnePassNumericalIndexHolder::Iterator(this));
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, vec);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));
    return true;
  }

 private:
  //! Disable them
  OnePassNumericalIndexHolder(void) = delete;

  //! Members
  size_t dimension_{0};
  std::list<std::pair<uint64_t, ailego::NumericalVector<T>>> features_;
};

/*! Multi-Pass Numerical Index Holder
 */
template <typename T>
class MultiPassNumericalIndexHolder : public IndexHolder {
 public:
  /*! Multi-Pass Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(MultiPassNumericalIndexHolder *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      ++features_iter_;
    }

   private:
    MultiPassNumericalIndexHolder *holder_{nullptr};
    typename std::vector<std::pair<uint64_t, ailego::NumericalVector<T>>>::
        iterator features_iter_{};
  };

  //! Constructor
  MultiPassNumericalIndexHolder(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return dimension_ * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new MultiPassNumericalIndexHolder::Iterator(this));
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, vec);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));
    return true;
  }

  //! Request a change in capacity
  void reserve(size_t size) {
    features_.reserve(size);
  }

  //! Get vector data pointer by index
  const void *get_vector_by_index(size_t index) const {
    if (index >= features_.size()) {
      return nullptr;
    }
    return features_[index].second.data();
  }

 protected:
  //! Members
  size_t dimension_{0};
  std::vector<std::pair<uint64_t, ailego::NumericalVector<T>>> features_;

 private:
  //! Disable them
  MultiPassNumericalIndexHolder(void) = delete;
};

/*! One-Pass Binary Index Holder
 */
template <typename T>
class OnePassBinaryIndexHolder : public IndexHolder {
 public:
  /*! One-Pass Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(OnePassBinaryIndexHolder *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.erase(features_iter_++);
    }

   private:
    OnePassBinaryIndexHolder *holder_{nullptr};
    typename std::list<std::pair<uint64_t, ailego::BinaryVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  OnePassBinaryIndexHolder(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return (dimension_ + (sizeof(T) << 3) - 1) / (sizeof(T) << 3) * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return false;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new OnePassBinaryIndexHolder::Iterator(this));
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, vec);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));
    return true;
  }

 private:
  //! Disable them
  OnePassBinaryIndexHolder(void) = delete;

  //! Members
  size_t dimension_{0};
  std::list<std::pair<uint64_t, ailego::BinaryVector<T>>> features_;
};

/*! Multi-Pass Binary Index Holder
 */
template <typename T>
class MultiPassBinaryIndexHolder : public IndexHolder {
 public:
  /*! Multi-Pass Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(MultiPassBinaryIndexHolder *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      ++features_iter_;
    }

   private:
    MultiPassBinaryIndexHolder *holder_{nullptr};
    typename std::vector<std::pair<uint64_t, ailego::BinaryVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  MultiPassBinaryIndexHolder(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return (dimension_ + (sizeof(T) << 3) - 1) / (sizeof(T) << 3) * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new MultiPassBinaryIndexHolder::Iterator(this));
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, vec);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));
    return true;
  }

  //! Request a change in capacity
  void reserve(size_t size) {
    features_.reserve(size);
  }

  //! Get vector data pointer by index
  const void *get_vector_by_index(size_t index) const {
    if (index >= features_.size()) {
      return nullptr;
    }
    return features_[index].second.data();
  }

 protected:
  //! Members
  size_t dimension_{0};
  std::vector<std::pair<uint64_t, ailego::BinaryVector<T>>> features_;

 private:
  //! Disable them
  MultiPassBinaryIndexHolder(void) = delete;
};

/*! One-Pass Index Hybrid Holder
 */
template <typename T>
class OnePassIndexHybridHolderBase : public IndexHybridHolder {
 public:
  /*! One-Pass Index Holder Iterator
   */
  class Iterator : public IndexHybridHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(OnePassIndexHybridHolderBase *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.erase(features_iter_++);
    }

    //! Retrieve primary key
    uint32_t sparse_count() const override {
      return features_iter_->second.sparse_count();
    }

    //! Retrieve primary key
    const uint32_t *sparse_indices() const override {
      return features_iter_->second.sparse_indices();
    }

    //! Retrieve primary key
    const void *sparse_data() const override {
      return features_iter_->second.sparse_data();
    }

   private:
    OnePassIndexHybridHolderBase *holder_{nullptr};
    typename std::list<std::pair<uint64_t, ailego::HybridVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  OnePassIndexHybridHolderBase(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return dimension_ * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return false;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new OnePassIndexHybridHolderBase::Iterator(this));
  }

  //! Create a new hybrid iterator
  IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) override {
    return IndexHybridHolder::Iterator::Pointer(
        new OnePassIndexHybridHolderBase::Iterator(this));
  }

  //! Retrieve sparse count summing up over all the docs
  size_t total_sparse_count(void) const override {
    return total_sparse_count_;
    ;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::HybridVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, vec);

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::HybridVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

 private:
  //! Disable them
  OnePassIndexHybridHolderBase(void) = delete;

  //! Members
  size_t dimension_{0};
  std::list<std::pair<uint64_t, ailego::HybridVector<T>>> features_;
  size_t total_sparse_count_{0};
};

/*! Multi-Pass Index Hybrid Holder Base
 */
template <typename T>
class MultiPassIndexHybridHolderBase : public IndexHybridHolder {
 public:
  /*! Multi-Pass Index Holder Iterator
   */
  class Iterator : public IndexHybridHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(MultiPassIndexHybridHolderBase *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    const void *data(void) const override {
      return features_iter_->second.data();
    }

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      ++features_iter_;
    }

    //! Retrieve primary key
    uint32_t sparse_count() const override {
      return features_iter_->second.sparse_count();
    }

    //! Retrieve primary key
    const uint32_t *sparse_indices() const override {
      return features_iter_->second.sparse_indices();
    }

    //! Retrieve primary key
    const void *sparse_data() const override {
      return features_iter_->second.sparse_data();
    }

   private:
    MultiPassIndexHybridHolderBase *holder_{nullptr};
    typename std::vector<std::pair<uint64_t, ailego::HybridVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  MultiPassIndexHybridHolderBase(size_t dim) : dimension_(dim) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return dimension_;
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return dimension_ * sizeof(T);
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new MultiPassIndexHybridHolderBase::Iterator(this));
  }

  //! Create a new hybrid iterator
  IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) override {
    return IndexHybridHolder::Iterator::Pointer(
        new MultiPassIndexHybridHolderBase::Iterator(this));
  }

  //! Retrieve sparse count summing up over all the docs
  size_t total_sparse_count(void) const override {
    return 0;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::HybridVector<T> &vec) {
    if (vec.size() != dimension_) {
      return false;
    }

    features_.emplace_back(key, vec);

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::HybridVector<T> &&vec) {
    if (vec.size() != dimension_) {
      return false;
    }
    features_.emplace_back(key, std::move(vec));

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Request a change in capacity
  void reserve(size_t size) {
    features_.reserve(size);
  }

 private:
  //! Disable them
  MultiPassIndexHybridHolderBase(void) = delete;

  //! Members
  size_t dimension_{0};
  std::vector<std::pair<uint64_t, ailego::HybridVector<T>>> features_;
  size_t total_sparse_count_{0};
};

/*! One-Pass Index Sparse Holder
 */
template <typename T>
class OnePassIndexSparseHolderBase : public IndexSparseHolder {
 public:
  /*! One-Pass Index Holder Iterator
   */
  class Iterator : public IndexSparseHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(OnePassIndexSparseHolderBase *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      holder_->features_.erase(features_iter_++);
    }

    //! Retrieve primary key
    uint32_t sparse_count() const override {
      return features_iter_->second.sparse_count();
    }

    //! Retrieve primary key
    const uint32_t *sparse_indices() const override {
      return features_iter_->second.sparse_indices();
    }

    //! Retrieve primary key
    const void *sparse_data() const override {
      return features_iter_->second.sparse_data();
    }

   private:
    OnePassIndexSparseHolderBase *holder_{nullptr};
    typename std::list<std::pair<uint64_t, ailego::SparseVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  OnePassIndexSparseHolderBase() {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return false;
  }

  //! Create a new iterator
  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {
    return IndexSparseHolder::Iterator::Pointer(
        new OnePassIndexSparseHolderBase::Iterator(this));
  }

  //! Retrieve sparse count summing up over all the docs
  size_t total_sparse_count(void) const override {
    return total_sparse_count_;
    ;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::SparseVector<T> &vec) {
    features_.emplace_back(key, vec);

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::SparseVector<T> &&vec) {
    features_.emplace_back(key, std::move(vec));

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

 private:
  //! Members
  std::list<std::pair<uint64_t, ailego::SparseVector<T>>> features_;
  size_t total_sparse_count_{0};
};

/*! Multi-Pass Index Sparse Holder Base
 */
template <typename T>
class MultiPassIndexSparseHolderBase : public IndexSparseHolder {
 public:
  /*! Multi-Pass Index Holder Iterator
   */
  class Iterator : public IndexSparseHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(MultiPassIndexSparseHolderBase *owner) : holder_(owner) {
      features_iter_ = holder_->features_.begin();
    }

    //! Destructor
    virtual ~Iterator(void) {}

    //! Test if the iterator is valid
    bool is_valid(void) const override {
      return (features_iter_ != holder_->features_.end());
    }

    //! Retrieve primary key
    uint64_t key(void) const override {
      return features_iter_->first;
    }

    //! Next iterator
    void next(void) override {
      ++features_iter_;
    }

    //! Retrieve primary key
    uint32_t sparse_count() const override {
      return features_iter_->second.sparse_count();
    }

    //! Retrieve primary key
    const uint32_t *sparse_indices() const override {
      return features_iter_->second.sparse_indices();
    }

    //! Retrieve primary key
    const void *sparse_data() const override {
      return features_iter_->second.sparse_data();
    }

   private:
    MultiPassIndexSparseHolderBase *holder_{nullptr};
    typename std::vector<std::pair<uint64_t, ailego::SparseVector<T>>>::iterator
        features_iter_{};
  };

  //! Constructor
  MultiPassIndexSparseHolderBase() {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  size_t count(void) const override {
    return features_.size();
  }

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_UNDEFINED;
  }

  //! Retrieve if it can multi-pass
  bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  IndexSparseHolder::Iterator::Pointer create_iterator(void) override {
    return IndexSparseHolder::Iterator::Pointer(
        new MultiPassIndexSparseHolderBase::Iterator(this));
  }

  //! Retrieve sparse count summing up over all the docs
  size_t total_sparse_count(void) const override {
    return 0;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::SparseVector<T> &vec) {
    features_.emplace_back(key, vec);

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::SparseVector<T> &&vec) {
    features_.emplace_back(key, std::move(vec));

    total_sparse_count_ += vec.sparse_count();

    return true;
  }

  //! Request a change in capacity
  void reserve(size_t size) {
    features_.reserve(size);
  }

 private:
  //! Members
  std::vector<std::pair<uint64_t, ailego::SparseVector<T>>> features_;
  size_t total_sparse_count_{0};
};

/*! One-Pass Index Holder
 */
template <IndexMeta::DataType FT>
struct OnePassIndexHolder;

/*! One-Pass Index Holder (BINARY32)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_BINARY32>
    : public OnePassBinaryIndexHolder<uint32_t> {
  //! Constructor
  using OnePassBinaryIndexHolder::OnePassBinaryIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY32;
  }
};

/*! One-Pass Index Holder (BINARY64)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_BINARY64>
    : public OnePassBinaryIndexHolder<uint64_t> {
  //! Constructor
  using OnePassBinaryIndexHolder::OnePassBinaryIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY64;
  }
};

/*! One-Pass Index Holder (FP16)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_FP16>
    : public OnePassNumericalIndexHolder<ailego::Float16> {
  //! Constructor
  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! One-Pass Index Holder (FP32)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_FP32>
    : public OnePassNumericalIndexHolder<float> {
  //! Constructor
  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! One-Pass Index Holder (FP64)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_FP64>
    : public OnePassNumericalIndexHolder<double> {
  //! Constructor
  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! One-Pass Index Holder (INT8)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_INT8>
    : public OnePassNumericalIndexHolder<int8_t> {
  //! Constructor
  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! One-Pass Index Holder (INT16)
 */
template <>
struct OnePassIndexHolder<IndexMeta::DataType::DT_INT16>
    : public OnePassNumericalIndexHolder<int16_t> {
  //! Constructor
  using OnePassNumericalIndexHolder::OnePassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! Multi-Pass Index Holder
 */
template <IndexMeta::DataType FT>
struct MultiPassIndexHolder;

/*! Multi-Pass Index Holder (BINARY32)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32>
    : public MultiPassBinaryIndexHolder<uint32_t> {
  //! Constructor
  using MultiPassBinaryIndexHolder::MultiPassBinaryIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY32;
  }
};

/*! Multi-Pass Index Holder (BINARY64)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY64>
    : public MultiPassBinaryIndexHolder<uint64_t> {
  //! Constructor
  using MultiPassBinaryIndexHolder::MultiPassBinaryIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY64;
  }
};

/*! Multi-Pass Index Holder (FP16)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_FP16>
    : public MultiPassNumericalIndexHolder<ailego::Float16> {
  //! Constructor
  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! Multi-Pass Index Holder (FP32)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>
    : public MultiPassNumericalIndexHolder<float> {
  //! Constructor
  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! Multi-Pass Index Holder (FP64)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_FP64>
    : public MultiPassNumericalIndexHolder<double> {
  //! Constructor
  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! Multi-Pass Index Holder (INT8)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>
    : public MultiPassNumericalIndexHolder<int8_t> {
  //! Constructor
  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! Multi-Pass Index Holder (INT16)
 */
template <>
struct MultiPassIndexHolder<IndexMeta::DataType::DT_INT16>
    : public MultiPassNumericalIndexHolder<int16_t> {
  //! Constructor
  using MultiPassNumericalIndexHolder::MultiPassNumericalIndexHolder;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! One-Pass Index Hybrid Holder
 */
template <IndexMeta::DataType FT>
struct OnePassIndexHybridHolder;

/*! One-Pass Index Hybrid Holder (FP16)
 */
template <>
struct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP16>
    : public OnePassIndexHybridHolderBase<ailego::Float16> {
  //! Constructor
  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! One-Pass Index Hybrid Holder (FP32)
 */
template <>
struct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP32>
    : public OnePassIndexHybridHolderBase<float> {
  //! Constructor
  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! One-Pass Index Hybrid Holder (FP64)
 */
template <>
struct OnePassIndexHybridHolder<IndexMeta::DataType::DT_FP64>
    : public OnePassIndexHybridHolderBase<double> {
  //! Constructor
  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! One-Pass Index Hybrid Holder (INT8)
 */
template <>
struct OnePassIndexHybridHolder<IndexMeta::DataType::DT_INT8>
    : public OnePassIndexHybridHolderBase<int8_t> {
  //! Constructor
  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! One-Pass Index Hybrid Holder (INT16)
 */
template <>
struct OnePassIndexHybridHolder<IndexMeta::DataType::DT_INT16>
    : public OnePassIndexHybridHolderBase<int16_t> {
  //! Constructor
  using OnePassIndexHybridHolderBase::OnePassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! Multi-Pass Index Hybrid Holder
 */
template <IndexMeta::DataType FT>
struct MultiPassIndexHybridHolder;

/*! Multi-Pass Index Hybrid Holder (FP16)
 */
template <>
struct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP16>
    : public MultiPassIndexHybridHolderBase<ailego::Float16> {
  //! Constructor
  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! Multi-Pass Index Hybrid Holder (FP32)
 */
template <>
struct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP32>
    : public MultiPassIndexHybridHolderBase<float> {
  //! Constructor
  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! Multi-Pass Index Hybrid Holder (FP64)
 */
template <>
struct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_FP64>
    : public MultiPassIndexHybridHolderBase<double> {
  //! Constructor
  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! Multi-Pass Index Hybrid Holder (INT8)
 */
template <>
struct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_INT8>
    : public MultiPassIndexHybridHolderBase<int8_t> {
  //! Constructor
  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! Multi-Pass Index Hybrid Holder (INT16)
 */
template <>
struct MultiPassIndexHybridHolder<IndexMeta::DataType::DT_INT16>
    : public MultiPassIndexHybridHolderBase<int16_t> {
  //! Constructor
  using MultiPassIndexHybridHolderBase::MultiPassIndexHybridHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! One-Pass Index Sparse Holder
 */
template <IndexMeta::DataType FT>
struct OnePassIndexSparseHolder;

/*! One-Pass Index Sparse Holder (FP16)
 */
template <>
struct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP16>
    : public OnePassIndexSparseHolderBase<ailego::Float16> {
  //! Constructor
  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! One-Pass Index Sparse Holder (FP32)
 */
template <>
struct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>
    : public OnePassIndexSparseHolderBase<float> {
  //! Constructor
  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! One-Pass Index Sparse Holder (FP64)
 */
template <>
struct OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP64>
    : public OnePassIndexSparseHolderBase<double> {
  //! Constructor
  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! One-Pass Index Sparse Holder (INT8)
 */
template <>
struct OnePassIndexSparseHolder<IndexMeta::DataType::DT_INT8>
    : public OnePassIndexSparseHolderBase<int8_t> {
  //! Constructor
  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! One-Pass Index Sparse Holder (INT16)
 */
template <>
struct OnePassIndexSparseHolder<IndexMeta::DataType::DT_INT16>
    : public OnePassIndexSparseHolderBase<int16_t> {
  //! Constructor
  using OnePassIndexSparseHolderBase::OnePassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! Multi-Pass Index Sparse Holder
 */
template <IndexMeta::DataType FT>
struct MultiPassIndexSparseHolder;

/*! Multi-Pass Index Sparse Holder (FP16)
 */
template <>
struct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP16>
    : public MultiPassIndexSparseHolderBase<ailego::Float16> {
  //! Constructor
  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! Multi-Pass Index Sparse Holder (FP32)
 */
template <>
struct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>
    : public MultiPassIndexSparseHolderBase<float> {
  //! Constructor
  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! Multi-Pass Index Sparse Holder (FP64)
 */
template <>
struct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP64>
    : public MultiPassIndexSparseHolderBase<double> {
  //! Constructor
  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! Multi-Pass Index Sparse Holder (INT8)
 */
template <>
struct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_INT8>
    : public MultiPassIndexSparseHolderBase<int8_t> {
  //! Constructor
  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! Multi-Pass Index Sparse Holder (INT16)
 */
template <>
struct MultiPassIndexSparseHolder<IndexMeta::DataType::DT_INT16>
    : public MultiPassIndexSparseHolderBase<int16_t> {
  //! Constructor
  using MultiPassIndexSparseHolderBase::MultiPassIndexSparseHolderBase;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! Random Access Index Holder
 */
class RandomAccessIndexHolder : public IndexHolder {
 public:
  //! Index Holder Iterator Pointer
  typedef std::shared_ptr<RandomAccessIndexHolder> Pointer;

  /*! Random Access Index Holder Iterator
   */
  class Iterator : public IndexHolder::Iterator {
   public:
    //! Index Holder Iterator Pointer
    typedef std::unique_ptr<Iterator> Pointer;

    //! Constructor
    Iterator(RandomAccessIndexHolder *owner) : holder_(owner) {}

    //! Destructor
    virtual ~Iterator(void) {}

    //! Retrieve pointer of data
    virtual const void *data(void) const override {
      return holder_->element(id_);
    }

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return id_ < holder_->count();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return holder_->key(id_);
    }

    //! Next iterator
    virtual void next(void) override {
      ++id_;
    }

   private:
    //! Members
    RandomAccessIndexHolder *holder_{nullptr};
    uint32_t id_{0};
  };

  //! Constructor
  RandomAccessIndexHolder(const IndexMeta &meta)
      : features_(std::make_shared<CompactIndexFeatures>(meta)) {}

  //! Retrieve count of elements in holder (-1 indicates unknown)
  virtual size_t count(void) const override {
    return features_->count();
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const override {
    return features_->dimension();
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const override {
    return features_->data_type();
  }

  //! Retrieve element size in bytes
  virtual size_t element_size(void) const override {
    return features_->element_size();
  }

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const override {
    return true;
  }

  //! Create a new iterator
  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {
    return IndexHolder::Iterator::Pointer(
        new RandomAccessIndexHolder::Iterator(this));
  }

  void reserve(size_t elems) {
    features_->reserve(elems);
    keys_.reserve(elems);
  }

  //! Append an element into holder
  void emplace(uint64_t pkey, const void *vec) {
    features_->emplace(vec);
    keys_.emplace_back(pkey);
  }

  //! Retrieve feature via local id
  const void *element(size_t id) const {
    return features_->element(id);
  }

  //! Retrieve key via local id
  uint64_t key(size_t id) const {
    ailego_assert_with(id < keys_.size(), "Index Overflow");
    return keys_[id];
  }

 private:
  //! Disable them
  RandomAccessIndexHolder(void) = delete;

  //! Members
  CompactIndexFeatures::Pointer features_{};
  std::vector<uint64_t> keys_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_logger.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdarg>
#include <memory>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/logger/logger.h>
#include "index_module.h"

#define ZVEC_LOG_IMPL(level, format, ...)                                   \
  do {                                                                      \
    if (zvec::core::IndexLoggerBroker::IsLevelEnabled(level)) {             \
      zvec::core::IndexLoggerBroker::Log(level, __FILE__, __LINE__, format, \
                                         ##__VA_ARGS__);                    \
    }                                                                       \
  } while (0)

//! Log Debug Message
#ifndef LOG_DEBUG
#define LOG_DEBUG(format, ...) \
  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_DEBUG, format, ##__VA_ARGS__)
#endif

//! Log Information Message
#ifndef LOG_INFO
#define LOG_INFO(format, ...) \
  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_INFO, format, ##__VA_ARGS__)
#endif

//! Log Warn Message
#ifndef LOG_WARN
#define LOG_WARN(format, ...) \
  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_WARN, format, ##__VA_ARGS__)
#endif

//! Log Error Message
#ifndef LOG_ERROR
#define LOG_ERROR(format, ...) \
  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_ERROR, format, ##__VA_ARGS__)
#endif

//! Log Fatal Message
#ifndef LOG_FATAL
#define LOG_FATAL(format, ...) \
  ZVEC_LOG_IMPL(zvec::core::IndexLogger::LEVEL_FATAL, format, ##__VA_ARGS__)
#endif

namespace zvec {
namespace core {

/*! Index Logger
 */
struct IndexLogger : public IndexModule {
  //! Index Logger Pointer
  typedef std::shared_ptr<IndexLogger> Pointer;

  static const int LEVEL_DEBUG;
  static const int LEVEL_INFO;
  static const int LEVEL_WARN;
  static const int LEVEL_ERROR;
  static const int LEVEL_FATAL;

  //! Retrieve string of level
  static const char *LevelString(int level) {
    static const char *info[] = {"DEBUG", " INFO", " WARN", "ERROR", "FATAL"};
    if (level < (int)(sizeof(info) / sizeof(info[0]))) {
      return info[level];
    }
    return "";
  }

  //! Retrieve symbol of level
  static char LevelSymbol(int level) {
    static const char info[5] = {'D', 'I', 'W', 'E', 'F'};
    if (level < (int)(sizeof(info) / sizeof(info[0]))) {
      return info[level];
    }
    return ' ';
  }

  //! Destructor
  virtual ~IndexLogger(void) {}

  //! Initialize Logger
  virtual int init(const zvec::ailego::Params &params) = 0;

  //! Cleanup Logger
  virtual int cleanup(void) = 0;

  //! Log Message
  virtual void log(int level, const char *file, int line, const char *format,
                   va_list args) = 0;
};

/*! Index Logger Broker
 */
class IndexLoggerBroker {
 public:
  //! Register Logger
  static IndexLogger::Pointer Register(IndexLogger::Pointer logger) {
    IndexLogger::Pointer ret = std::move(logger_);
    logger_ = std::move(logger);
    return ret;
  }

  //! Register Logger with init params
  static int Register(IndexLogger::Pointer logger,
                      const ailego::Params &params) {
    //! Cleanup the previous, before initizlizing the new one
    if (logger_) {
      logger_->cleanup();
    }
    logger_ = std::move(logger);
    return logger_->init(params);
  }

  //! Unregister Logger
  static void Unregister(void) {
    logger_ = nullptr;
  }

  //! Set Level of Logger
  static void SetLevel(int level) {
    logger_level_ = level;
  }

  //! Check if log level is enabled
  static bool IsLevelEnabled(int level) {
    return logger_level_ <= level && logger_;
  }

  //! Log Message
  __attribute__((format(printf, 4, 5))) static void Log(
      int level, const char *file, int line, const char *format, ...) {
    if (IsLevelEnabled(level)) {
      va_list args;
      va_start(args, format);
      logger_->log(level, file, line, format, args);
      va_end(args);
    }
  }

 private:
  //! Disable them
  IndexLoggerBroker(void) = delete;
  IndexLoggerBroker(const IndexLoggerBroker &) = delete;
  IndexLoggerBroker(IndexLoggerBroker &&) = delete;

  //! Members
  static int logger_level_;
  static IndexLogger::Pointer logger_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_mapping.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <map>
#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/io/file.h>
#include <zvec/core/framework/index_format.h>

namespace zvec {
namespace core {

/*! Index Mapping
 */
class IndexMapping {
 public:
  /*! Index Mapping Segment
   */
  class Segment {
   public:
    //! Constructor
    Segment(void) {}

    //! Constructor
    Segment(IndexFormat::SegmentMeta *segmeta) : meta_(segmeta) {}

    //! Flush the segment
    bool flush(void) const {
      ailego_false_if_false(this->meta_ && this->data_);
      return ailego::File::MemoryFlush(
          this->data_, this->meta_->data_size + this->meta_->padding_size);
    }

    //! Retrieve mapping address of the segment
    void *data(void) const {
      return data_;
    }

    //! Retrieve pointer of SegmentMeta
    IndexFormat::SegmentMeta *meta(void) const {
      return meta_;
    }

    //! Retrieve dirty flag of the segment
    bool dirty(void) const {
      return dirty_;
    }

    //! Set the segment as dirty
    void set_dirty(void) const {
      dirty_ = true;
    }

   private:
    friend class IndexMapping;

    //! Set the mapping address of the segment
    void set_data(void *addr) {
      data_ = addr;
    }

    //! Clear the dirty flag
    void reset_dirty(void) const {
      dirty_ = false;
    }

   private:
    //! Members
    IndexFormat::SegmentMeta *meta_{nullptr};
    void *data_{nullptr};
    mutable bool dirty_{false};
  };

  struct SegmentInfo {
    Segment segment;
    uint64_t segment_header_start_offset;
    IndexFormat::MetaHeader *segment_header;
  };

  //! Constructor
  IndexMapping(void) {}

  //! Constructor
  IndexMapping(IndexMapping &&rhs)
      : segment_ids_offset_(rhs.segment_ids_offset_),
        segment_start_(rhs.segment_start_),
        header_(rhs.header_),
        footer_(rhs.footer_),
        segments_(std::move(rhs.segments_)),
        file_(std::move(rhs.file_)) {
    rhs.segment_ids_offset_ = 0;
    rhs.segment_start_ = nullptr;
    rhs.header_ = nullptr;
    rhs.footer_ = nullptr;
  }

  //! Assignment
  IndexMapping &operator=(IndexMapping &&rhs) {
    segment_ids_offset_ = rhs.segment_ids_offset_;
    segment_start_ = rhs.segment_start_;
    header_ = rhs.header_;
    footer_ = rhs.footer_;
    segments_ = std::move(rhs.segments_);
    file_ = std::move(rhs.file_);
    rhs.segment_ids_offset_ = 0;
    rhs.segment_start_ = nullptr;
    rhs.header_ = nullptr;
    rhs.footer_ = nullptr;
    return *this;
  }

  //! Open a index file
  int open(const std::string &path, bool cow, bool full_mode);

  //! Create a index file
  int create(const std::string &path, size_t segs_size);

  //! Close the index
  void close(void);

  //! Refresh meta information (checksum, update time, etc.)
  void refresh(uint64_t check_point);

  //! Append a segment into index
  int append(const std::string &id, size_t size);

  //! Map a segment by id
  Segment *map(const std::string &id, bool warmup, bool lock);

  //! Unmap a segment by id
  void unmap(const std::string &id);

  //! Unmap all segments
  void unmap_all(void);

  //! Flush the index mapping
  int flush(void);

  //! Test if the segment is exist
  bool has(const std::string &id) const {
    return (segments_.find(id) != segments_.end());
  }

  //! Retrieve count of segments
  size_t segment_count(void) const {
    return segments_.size();
  }

  //! Retrieve size of index mapping
  size_t index_size(void) const {
    return index_size_;
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const {
    return (header_ ? header_->magic : 0);
  }

  //! Retrieve header information
  const IndexFormat::MetaHeader &header(void) const {
    return *header_;
  }

  //! Retrieve footer information
  const IndexFormat::MetaFooter &footer(void) const {
    return *footer_;
  }

  bool huge_page() const {
    return huge_page_;
  }

 protected:
  //! Initialize index file mapping
  int init_index_mapping(size_t len);

  bool Ishugetlbfs(const std::string &path) const;

  int init_meta_section();
  int init_hugepage_meta_section();

 private:
  //! Disable them
  IndexMapping(const IndexMapping &) = delete;
  IndexMapping &operator=(const IndexMapping &) = delete;

  //! Members
  uint32_t segment_ids_offset_{0};
  IndexFormat::SegmentMeta *segment_start_{nullptr};
  IndexFormat::MetaHeader *header_{nullptr};
  std::map<uint64_t, IndexFormat::MetaHeader *> header_addr_map_{};
  IndexFormat::MetaFooter *footer_{nullptr};
  std::map<std::string, SegmentInfo> segments_{};
  size_t index_size_{0u};
  ailego::File file_{};
  std::string path_;
  bool copy_on_write_{false};
  bool full_mode_{false};
  bool header_dirty_{false};
  bool huge_page_{false};
  size_t seg_meta_capacity_{0u};
  uint64_t current_header_start_offset_{0u};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: src/include/zvec/core/framework/index_memory.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <iterator>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>

namespace zvec {
namespace core {

/*! Index Memory
 */
class IndexMemory {
 public:
  /*! Index Memory Block
   */
  class Block {
   public:
    //! Constructor
    Block(size_t sz) : buffer_(sz) {}

    //! Constructor
    Block(const Block &rhs) : buffer_(rhs.buffer_) {}

    //! Constructor
    Block(Block &&rhs) noexcept : buffer_(std::move(rhs.buffer_)) {}

    //! Assignment
    Block &operator=(const Block &rhs) {
      buffer_ = rhs.buffer_;
      return *this;
    }

    //! Assignment
    Block &operator=(Block &&rhs) {
      buffer_ = std::move(rhs.buffer_);
      return *this;
    }

    //! Retrieve size of buffer
    size_t size(void) const {
      return buffer_.size();
    }

    //! Append data into the block
    size_t append(const void *data, size_t len) {
      std::copy(reinterpret_cast<const uint8_t *>(data),
                reinterpret_cast<const uint8_t *>(data) + len,
                std::back_inserter(buffer_));
      return len;
    }

    //! Write data into the block
    size_t write(size_t off, const void *data, size_t len) {
      size_t region_size = buffer_.size();
      if (off + len > region_size) {
        if (off > region_size) {
          off = region_size;
        }
        len = region_size - off;
      }
      std::copy(reinterpret_cast<const uint8_t *>(data),
                reinterpret_cast<const uint8_t *>(data) + len,
                buffer_.data() + off);
      return len;
    }

    //! Fetch data from the storage (with own buffer)
    size_t fetch(size_t off, void *buf, size_t len) const {
      size_t region_size = buffer_.size();
      if (off + len > region_size) {
        if (off > region_size) {
          off = region_size;
        }
        len = region_size - off;
      }
      std::copy(buffer_.data(), buffer_.data() + len,
                reinterpret_cast<uint8_t *>(buf));
      return len;
    }

    //! Read data from the storage (Zero-copy)
    size_t read(size_t off, const void **data, size_t len) {
      size_t region_size = buffer_.size();
      if (off + len > region_size) {
        if (off > region_size) {
          off = region_size;
        }
        len = region_size - off;
      }
      *data = buffer_.data() + off;
      return len;
    }

   private:
    //! Members
    std::vector<uint8_t> buffer_{};
  };

  /*! Index Memory Rope
   */
  class Rope {
   public:
    //! Index Memory Rope Pointer
    typedef std::shared_ptr<Rope> Pointer;

    //! Constructor
    Rope(void) {}

    //! Constructor
    Rope(const Rope &rhs) : blocks_(rhs.blocks_) {}

    //! Constructor
    Rope(Rope &&rhs) : blocks_(std::move(rhs.blocks_)) {}

    //! Assignment
    Rope &operator=(const Rope &rhs) {
      blocks_ = rhs.blocks_;
      return *this;
    }

    //! Assignment
    Rope &operator=(Rope &&rhs) {
      blocks_ = std::move(rhs.blocks_);
      return *this;
    }

    //! Retrieve the block at index n
    Block &operator[](size_t n) {
      return blocks_[n];
    }

    //! Retrieve the block at index n
    const Block &operator[](size_t n) const {
      return blocks_[n];
    }

    //! Retrieve count of blocks
    size_t count(void) const {
      return blocks_.size();
    }

    //! Retrieve memory size of rope
    size_t size(void) const {
      size_t sum = 0u;
      for (const auto &it : blocks_) {
        sum += it.size();
      }
      return sum;
    }

    //! Test if the rope is empty
    bool empty(void) const {
      return blocks_.empty();
    }

    //! Append a new memory block
    Block &append(size_t init_size) {
      return *blocks_.emplace(blocks_.end(), init_size);
    }

   private:
    //! Members
    std::vector<Block> blocks_{};
  };

  //! Constructor
  IndexMemory(void) {}

  //! Constructor
  IndexMemory(IndexMemory &&rhs) {
    std::lock_guard<std::mutex> latch(rhs.mutex_);
    pool_ = std::move(rhs.pool_);
  }

  //! Assignment
  IndexMemory &operator=(IndexMemory &&rhs) {
    std::lock_guard<std::mutex> latch1(mutex_);
    {
      std::lock_guard<std::mutex> latch2(rhs.mutex_);
      pool_ = std::move(rhs.pool_);
    }
    return *this;
  }

  //! Retrieve the singleton memory
  static IndexMemory *Instance(void) {
    static IndexMemory mem;
    return (&mem);
  }

  //! Clear the memory
  void clear(void) {
    std::lock_guard<std::mutex> latch(mutex_);
    pool_.clear();
  }

  //! Test if the element is exist
  bool has(const std::string &key) const {
    std::lock_guard<std::mutex> latch(mutex_);
    return (pool_.find(key) != pool_.end());
  }

  //! Create or overwrite a new memory rope
  Rope::Pointer create(const std::string &key) {
    std::lock_guard<std::mutex> latch(mutex_);
    auto it = pool_.emplace(key, nullptr).first;
    it->second = std::make_shared<Rope>();
    return it->second;
  }

  //! Create or overwrite a new memory rope
  Rope::Pointer create(std::string &&key) {
    std::lock_guard<std::mutex> latch(mutex_);
    auto it = pool_.emplace(std::move(key), nullptr).first;
    it->second = std::make_shared<Rope>();
    return it->second;
  }

  //! Open a memory rope (read only)
  Rope::Pointer open(const std::string &key) const {
    std::lock_guard<std::mutex> latch(mutex_);
    auto it = pool_.find(key);
    if (it == pool_.end()) {
      return nullptr;
    }
    return it->second;
  }

  //! Remove a memory rope
  void remove(const std::string &key) {
    std::lock_guard<std::mutex> latch(mutex_);
    auto it = pool_.find(key);
    if (it != pool_.end()) {
      pool_.erase(it);
    }
  }

 private:
  //! Disable them
  IndexMemory(const IndexMemory &) = delete;
  IndexMemory &operator=(const IndexMemory &) = delete;

  //! Members
  std::map<std::string, Rope::Pointer> pool_{};
  mutable std::mutex mutex_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_meta.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/params.h>

namespace zvec {
namespace core {

/*! Index Meta
 */
class IndexMeta {
 public:
  /*! Meta Types
   */
  enum MetaType { MT_UNDEFINED = 0, MT_DENSE = 1, MT_SPARSE = 2 };

  /*! Data Types
   */
  enum DataType {
    DT_UNDEFINED = 0,
    DT_FP16 = 1,
    DT_FP32 = 2,
    DT_FP64 = 3,
    DT_INT8 = 4,
    DT_INT16 = 5,
    DT_INT4 = 6,
    DT_BINARY32 = 7,
    DT_BINARY64 = 8,
  };

  /*! Major Orders
   */
  enum MajorOrder {
    MO_UNDEFINED = 0,
    MO_ROW = 1,
    MO_COLUMN = 2,
  };

  //! Constructor
  IndexMeta(void) {
    this->set_meta(DataType::DT_FP32, 128u);
    this->set_metric("SquaredEuclidean", 0, ailego::Params());
  }

  //! Constructor
  IndexMeta(DataType data_type, uint32_t dim) {
    meta_type_ = MT_DENSE;
    this->set_meta(data_type, dim);
    this->set_metric("SquaredEuclidean", 0, ailego::Params());
  }

  //! Constructor
  IndexMeta(MetaType meta_type, DataType data_type) {
    meta_type_ = meta_type;

    this->set_meta(data_type, 0);
    this->set_metric("SquaredEuclidean", 0, ailego::Params());
  }

  //! Constructor
  IndexMeta(const IndexMeta &rhs)
      : meta_type_{rhs.meta_type_},
        major_order_(rhs.major_order_),
        data_type_(rhs.data_type_),
        dimension_(rhs.dimension_),
        unit_size_(rhs.unit_size_),
        element_size_(rhs.element_size_),
        space_id_(rhs.space_id_),
        metric_revision_(rhs.metric_revision_),
        converter_revision_(rhs.converter_revision_),
        reformer_revision_(rhs.reformer_revision_),
        trainer_revision_(rhs.trainer_revision_),
        builder_revision_(rhs.builder_revision_),
        reducer_revision_(rhs.reducer_revision_),
        searcher_revision_(rhs.searcher_revision_),
        streamer_revision_(rhs.streamer_revision_),
        metric_name_(rhs.metric_name_),
        converter_name_(rhs.converter_name_),
        reformer_name_(rhs.reformer_name_),
        trainer_name_(rhs.trainer_name_),
        builder_name_(rhs.builder_name_),
        reducer_name_(rhs.reducer_name_),
        searcher_name_(rhs.searcher_name_),
        streamer_name_(rhs.streamer_name_),
        metric_params_(rhs.metric_params_),
        converter_params_(rhs.converter_params_),
        reformer_params_(rhs.reformer_params_),
        trainer_params_(rhs.trainer_params_),
        builder_params_(rhs.builder_params_),
        reducer_params_(rhs.reducer_params_),
        searcher_params_(rhs.searcher_params_),
        streamer_params_(rhs.streamer_params_),
        attributes_(rhs.attributes_) {}

  //! Constructor
  IndexMeta(IndexMeta &&rhs)
      : meta_type_{rhs.meta_type_},
        major_order_(rhs.major_order_),
        data_type_(rhs.data_type_),
        dimension_(rhs.dimension_),
        unit_size_(rhs.unit_size_),
        element_size_(rhs.element_size_),
        space_id_(rhs.space_id_),
        metric_revision_(rhs.metric_revision_),
        converter_revision_(rhs.converter_revision_),
        reformer_revision_(rhs.reformer_revision_),
        trainer_revision_(rhs.trainer_revision_),
        builder_revision_(rhs.builder_revision_),
        reducer_revision_(rhs.reducer_revision_),
        searcher_revision_(rhs.searcher_revision_),
        streamer_revision_(rhs.streamer_revision_),
        metric_name_(std::move(rhs.metric_name_)),
        converter_name_(std::move(rhs.converter_name_)),
        reformer_name_(std::move(rhs.reformer_name_)),
        trainer_name_(std::move(rhs.trainer_name_)),
        builder_name_(std::move(rhs.builder_name_)),
        reducer_name_(std::move(rhs.reducer_name_)),
        searcher_name_(std::move(rhs.searcher_name_)),
        streamer_name_(std::move(rhs.streamer_name_)),
        metric_params_(std::move(rhs.metric_params_)),
        converter_params_(std::move(rhs.converter_params_)),
        reformer_params_(std::move(rhs.reformer_params_)),
        trainer_params_(std::move(rhs.trainer_params_)),
        builder_params_(std::move(rhs.builder_params_)),
        reducer_params_(std::move(rhs.reducer_params_)),
        searcher_params_(std::move(rhs.searcher_params_)),
        streamer_params_(std::move(rhs.streamer_params_)),
        attributes_(std::move(rhs.attributes_)) {}

  //! Assignment
  IndexMeta &operator=(const IndexMeta &rhs) {
    meta_type_ = rhs.meta_type_;
    major_order_ = rhs.major_order_;
    data_type_ = rhs.data_type_;
    dimension_ = rhs.dimension_;
    unit_size_ = rhs.unit_size_;
    element_size_ = rhs.element_size_;
    space_id_ = rhs.space_id_;
    metric_revision_ = rhs.metric_revision_;
    converter_revision_ = rhs.converter_revision_;
    reformer_revision_ = rhs.reformer_revision_;
    trainer_revision_ = rhs.trainer_revision_;
    builder_revision_ = rhs.builder_revision_;
    reducer_revision_ = rhs.reducer_revision_;
    searcher_revision_ = rhs.searcher_revision_;
    streamer_revision_ = rhs.streamer_revision_;
    metric_name_ = std::move(rhs.metric_name_);
    converter_name_ = std::move(rhs.converter_name_);
    reformer_name_ = std::move(rhs.reformer_name_);
    trainer_name_ = std::move(rhs.trainer_name_);
    builder_name_ = std::move(rhs.builder_name_);
    reducer_name_ = std::move(rhs.reducer_name_);
    searcher_name_ = std::move(rhs.searcher_name_);
    streamer_name_ = std::move(rhs.streamer_name_);
    metric_params_ = std::move(rhs.metric_params_);
    converter_params_ = std::move(rhs.converter_params_);
    reformer_params_ = std::move(rhs.reformer_params_);
    trainer_params_ = std::move(rhs.trainer_params_);
    builder_params_ = std::move(rhs.builder_params_);
    reducer_params_ = std::move(rhs.reducer_params_);
    searcher_params_ = std::move(rhs.searcher_params_);
    streamer_params_ = std::move(rhs.streamer_params_);
    attributes_ = std::move(rhs.attributes_);

    return *this;
  }

  //! Assignment
  IndexMeta &operator=(IndexMeta &&rhs) {
    meta_type_ = rhs.meta_type_;
    major_order_ = rhs.major_order_;
    data_type_ = rhs.data_type_;
    dimension_ = rhs.dimension_;
    unit_size_ = rhs.unit_size_;
    element_size_ = rhs.element_size_;
    space_id_ = rhs.space_id_;
    metric_revision_ = rhs.metric_revision_;
    converter_revision_ = rhs.converter_revision_;
    reformer_revision_ = rhs.reformer_revision_;
    trainer_revision_ = rhs.trainer_revision_;
    builder_revision_ = rhs.builder_revision_;
    reducer_revision_ = rhs.reducer_revision_;
    searcher_revision_ = rhs.searcher_revision_;
    streamer_revision_ = rhs.streamer_revision_;
    metric_name_ = std::move(rhs.metric_name_);
    converter_name_ = std::move(rhs.converter_name_);
    reformer_name_ = std::move(rhs.reformer_name_);
    trainer_name_ = std::move(rhs.trainer_name_);
    builder_name_ = std::move(rhs.builder_name_);
    reducer_name_ = std::move(rhs.reducer_name_);
    searcher_name_ = std::move(rhs.searcher_name_);
    streamer_name_ = std::move(rhs.streamer_name_);
    metric_params_ = std::move(rhs.metric_params_);
    converter_params_ = std::move(rhs.converter_params_);
    reformer_params_ = std::move(rhs.reformer_params_);
    trainer_params_ = std::move(rhs.trainer_params_);
    builder_params_ = std::move(rhs.builder_params_);
    reducer_params_ = std::move(rhs.reducer_params_);
    searcher_params_ = std::move(rhs.searcher_params_);
    streamer_params_ = std::move(rhs.streamer_params_);
    attributes_ = std::move(rhs.attributes_);

    return *this;
  }

  //! Reset the meta
  void clear(void) {
    meta_type_ = MetaType::MT_DENSE;
    major_order_ = MajorOrder::MO_UNDEFINED;
    data_type_ = DataType::DT_UNDEFINED;
    dimension_ = 0;
    unit_size_ = 0;
    element_size_ = 0;
    space_id_ = 0;
    metric_revision_ = 0;
    converter_revision_ = 0;
    reformer_revision_ = 0;
    trainer_revision_ = 0;
    builder_revision_ = 0;
    reducer_revision_ = 0;
    searcher_revision_ = 0;
    streamer_revision_ = 0;
    metric_name_.clear();
    converter_name_.clear();
    reformer_name_.clear();
    trainer_name_.clear();
    builder_name_.clear();
    reducer_name_.clear();
    searcher_name_.clear();
    streamer_name_.clear();
    metric_params_.clear();
    converter_params_.clear();
    reformer_params_.clear();
    trainer_params_.clear();
    builder_params_.clear();
    reducer_params_.clear();
    searcher_params_.clear();
    streamer_params_.clear();
    attributes_.clear();
  }

  //! Retrieve major order information
  MetaType meta_type(void) const {
    return meta_type_;
  }

  //! Retrieve major order information
  MajorOrder major_order(void) const {
    return major_order_;
  }

  //! Retrieve type information
  DataType data_type(void) const {
    return data_type_;
  }

  //! Retrieve dimension
  uint32_t dimension(void) const {
    return dimension_;
  }

  //! Retrieve unit size in bytes
  uint32_t unit_size(void) const {
    return unit_size_;
  }

  //! Retrieve element size in bytes
  uint32_t element_size(void) const {
    return element_size_;
  }

  //! Retrieve space id
  uint64_t space_id(void) const {
    return space_id_;
  }

  //! Retrieve revision of metric
  uint32_t metric_revision(void) const {
    return metric_revision_;
  }

  //! Retrieve revision of converter
  uint32_t converter_revision(void) const {
    return converter_revision_;
  }

  //! Retrieve revision of reformer
  uint32_t reformer_revision(void) const {
    return reformer_revision_;
  }

  //! Retrieve revision of trainer
  uint32_t trainer_revision(void) const {
    return trainer_revision_;
  }

  //! Retrieve revision of builder
  uint32_t builder_revision(void) const {
    return builder_revision_;
  }

  //! Retrieve revision of searcher
  uint32_t searcher_revision(void) const {
    return searcher_revision_;
  }

  //! Retrieve revision of reducer
  uint32_t reducer_revision(void) const {
    return reducer_revision_;
  }

  //! Retrieve revision of streamer
  uint32_t streamer_revision(void) const {
    return streamer_revision_;
  }

  //! Retrieve name of metric
  const std::string &metric_name(void) const {
    return metric_name_;
  }

  //! Retrieve name of converter
  const std::string &converter_name(void) const {
    return converter_name_;
  }

  //! Retrieve name of reformer
  const std::string &reformer_name(void) const {
    return reformer_name_;
  }

  //! Retrieve name of trainer
  const std::string &trainer_name(void) const {
    return trainer_name_;
  }

  //! Retrieve name of builder
  const std::string &builder_name(void) const {
    return builder_name_;
  }

  //! Retrieve name of reducer
  const std::string &reducer_name(void) const {
    return reducer_name_;
  }

  //! Retrieve name of searcher
  const std::string &searcher_name(void) const {
    return searcher_name_;
  }

  //! Retrieve name of streamer
  const std::string &streamer_name(void) const {
    return streamer_name_;
  }

  //! Retrieve metric params
  const ailego::Params &metric_params(void) const {
    return metric_params_;
  }

  //! Retrieve converter params
  const ailego::Params &converter_params(void) const {
    return converter_params_;
  }

  //! Retrieve reformer params
  const ailego::Params &reformer_params(void) const {
    return reformer_params_;
  }

  //! Retrieve trainer params
  const ailego::Params &trainer_params(void) const {
    return trainer_params_;
  }

  //! Retrieve builder params
  const ailego::Params &builder_params(void) const {
    return builder_params_;
  }

  //! Retrieve reducer params
  const ailego::Params &reducer_params(void) const {
    return reducer_params_;
  }

  //! Retrieve searcher params
  const ailego::Params &searcher_params(void) const {
    return searcher_params_;
  }

  //! Retrieve streamer params
  const ailego::Params &streamer_params(void) const {
    return streamer_params_;
  }

  //! Retrieve attributes
  const ailego::Params &attributes(void) const {
    return attributes_;
  }

  //! Retrieve mutable attributes
  ailego::Params *mutable_attributes(void) {
    return &attributes_;
  }

  //! Set meta type
  void set_meta_type(MetaType meta_type) {
    meta_type_ = meta_type;
  }

  //! Set major order of features
  void set_major_order(MajorOrder major_order) {
    major_order_ = major_order;
  }

  //! Set dimension of feature
  void set_dimension(uint32_t dim) {
    dimension_ = dim;
    element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dim);
  }

  //! Set meta information of feature
  void set_data_type(DataType data_type) {
    data_type_ = data_type;
    unit_size_ = UnitSizeof(data_type);
  }

  //! Set meta information of feature
  void set_meta(DataType data_type, uint32_t unit, uint32_t dim) {
    data_type_ = data_type;
    dimension_ = dim;
    unit_size_ = unit;
    element_size_ = ElementSizeof(data_type, unit, dim);
  }

  //! Set meta information of feature
  void set_meta(DataType data_type, uint32_t dim) {
    this->set_meta(data_type, UnitSizeof(data_type), dim);
  }

  //! Set information of metric
  template <typename TName, typename TParams>
  void set_metric(TName &&name, uint32_t rev, TParams &&params) {
    metric_name_ = std::forward<TName>(name);
    metric_revision_ = rev;
    metric_params_ = std::forward<TParams>(params);
  }

  //! Set information of converter
  template <typename TName, typename TParams>
  void set_converter(TName &&name, uint32_t rev, TParams &&params) {
    converter_name_ = std::forward<TName>(name);
    converter_revision_ = rev;
    converter_params_ = std::forward<TParams>(params);
  }

  //! Set information of reformer
  template <typename TName, typename TParams>
  void set_reformer(TName &&name, uint32_t rev, TParams &&params) {
    reformer_name_ = std::forward<TName>(name);
    reformer_revision_ = rev;
    reformer_params_ = std::forward<TParams>(params);
  }

  //! Set information of trainer
  template <typename TName, typename TParams>
  void set_trainer(TName &&name, uint32_t rev, TParams &&params) {
    trainer_name_ = std::forward<TName>(name);
    trainer_revision_ = rev;
    trainer_params_ = std::forward<TParams>(params);
  }

  //! Set information of builder
  template <typename TName, typename TParams>
  void set_builder(TName &&name, uint32_t rev, TParams &&params) {
    builder_name_ = std::forward<TName>(name);
    builder_revision_ = rev;
    builder_params_ = std::forward<TParams>(params);
  }

  //! Set information of reducer
  template <typename TName, typename TParams>
  void set_reducer(TName &&name, uint32_t rev, TParams &&params) {
    reducer_name_ = std::forward<TName>(name);
    reducer_revision_ = rev;
    reducer_params_ = std::forward<TParams>(params);
  }

  //! Set information of searcher
  template <typename TName, typename TParams>
  void set_searcher(TName &&name, uint32_t rev, TParams &&params) {
    searcher_name_ = std::forward<TName>(name);
    searcher_revision_ = rev;
    searcher_params_ = std::forward<TParams>(params);
  }

  //! Set information of streamer
  template <typename TName, typename TParams>
  void set_streamer(TName &&name, uint32_t rev, TParams &&params) {
    streamer_name_ = std::forward<TName>(name);
    streamer_revision_ = rev;
    streamer_params_ = std::forward<TParams>(params);
  }

  //! Serialize meta information into buffer
  void serialize(std::string *out) const;

  //! Derialize meta information from buffer
  bool deserialize(const void *data, size_t len);

  //! Calculate unit size of feature
  static uint32_t UnitSizeof(DataType data_type) {
    static const uint32_t unit_size_table[] = {
        0u,                // DT_UNDEFINED
        sizeof(uint16_t),  // DT_FP16
        sizeof(float),     // DT_FP32
        sizeof(double),    // DT_FP64
        sizeof(int8_t),    // DT_INT8
        sizeof(int16_t),   // DT_INT16
        sizeof(uint8_t),   // DT_INT4
        sizeof(uint32_t),  // DT_BINARY32
        sizeof(uint64_t)   // DT_BINARY64
    };
    return unit_size_table[data_type];
  }

  //! Calculate align size of feature
  static uint32_t AlignSizeof(DataType ft) {
    static const uint32_t align_size_table[] = {
        0u,                   // DT_UNDEFINED
        sizeof(uint16_t),     // DT_FP16
        sizeof(float),        // DT_FP32
        sizeof(double),       // DT_FP64
        sizeof(int8_t) * 4,   // DT_INT8
        sizeof(int16_t),      // DT_INT16
        sizeof(uint8_t) * 4,  // DT_INT4
        sizeof(uint32_t),     // DT_BINARY32
        sizeof(uint64_t)      // DT_BINARY64
    };
    return align_size_table[ft];
  }

  //! Calculate element size of feature
  static uint32_t ElementSizeof(DataType data_type, uint32_t unit,
                                uint32_t dim) {
    switch (data_type) {
      case DataType::DT_UNDEFINED:
        return 0;
      case DataType::DT_FP16:
      case DataType::DT_FP32:
      case DataType::DT_FP64:
      case DataType::DT_INT8:
      case DataType::DT_INT16:
        return (dim * unit);
      case DataType::DT_INT4:
        return (dim + unit * 2 - 1) / (unit * 2) * unit;
      case DataType::DT_BINARY32:
      case DataType::DT_BINARY64:
        return (dim + unit * 8 - 1) / (unit * 8) * unit;
    }
    return 0;
  }

  //! Calculate element size of vector
  static uint32_t ElementSizeof(DataType data_type, uint32_t dim) {
    return ElementSizeof(data_type, UnitSizeof(data_type), dim);
  }

 private:
  MetaType meta_type_{MetaType::MT_DENSE};
  MajorOrder major_order_{MajorOrder::MO_UNDEFINED};
  DataType data_type_{DataType::DT_UNDEFINED};
  uint32_t dimension_{0};
  uint32_t unit_size_{0};
  uint32_t element_size_{0};
  uint64_t space_id_{0};
  uint32_t metric_revision_{0};
  uint32_t converter_revision_{0};
  uint32_t reformer_revision_{0};
  uint32_t trainer_revision_{0};
  uint32_t builder_revision_{0};
  uint32_t reducer_revision_{0};
  uint32_t searcher_revision_{0};
  uint32_t streamer_revision_{0};

  std::string metric_name_{};
  std::string converter_name_{};
  std::string reformer_name_{};
  std::string trainer_name_{};
  std::string builder_name_{};
  std::string reducer_name_{};
  std::string searcher_name_{};
  std::string streamer_name_{};

  ailego::Params metric_params_{};
  ailego::Params converter_params_{};
  ailego::Params reformer_params_{};
  ailego::Params trainer_params_{};
  ailego::Params builder_params_{};
  ailego::Params reducer_params_{};
  ailego::Params searcher_params_{};
  ailego::Params streamer_params_{};
  ailego::Params attributes_{};
};

/*! Index Query Meta
 */
class IndexQueryMeta {
 public:
  //! Constructor
  IndexQueryMeta(void) {}

  //! Constructor
  IndexQueryMeta(IndexMeta::MetaType meta_type, IndexMeta::DataType data_type,
                 uint32_t unit, uint32_t dim)
      : meta_type_(meta_type),
        data_type_(data_type),
        dimension_(dim),
        unit_size_(unit),
        element_size_(IndexMeta::ElementSizeof(data_type, unit, dim)) {}

  //! Constructor
  IndexQueryMeta(IndexMeta::DataType data_type, uint32_t dim)
      : IndexQueryMeta{IndexMeta::MetaType::MT_DENSE, data_type,
                       IndexMeta::UnitSizeof(data_type), dim} {}

  //! Constructor
  IndexQueryMeta(IndexMeta::DataType data_type)
      : IndexQueryMeta{IndexMeta::MetaType::MT_SPARSE, data_type,
                       IndexMeta::UnitSizeof(data_type), 0} {}

  //! Constructor
  IndexQueryMeta(IndexMeta::MetaType meta_type, IndexMeta::DataType data_type,
                 uint32_t dim = 0)
      : IndexQueryMeta{meta_type, data_type, IndexMeta::UnitSizeof(data_type),
                       dim} {}

  //! Retrieve meta type
  IndexMeta::MetaType meta_type(void) const {
    return meta_type_;
  }

  //! Retrieve data
  IndexMeta::DataType data_type(void) const {
    return data_type_;
  }

  //! Retrieve dimension of features
  uint32_t dimension(void) const {
    return dimension_;
  }

  //! Retrieve unit size of feature
  uint32_t unit_size(void) const {
    return unit_size_;
  }

  //! Retrieve element size of feature
  uint32_t element_size(void) const {
    return element_size_;
  }

  //! Set dimension of feature
  void set_dimension(uint32_t dim) {
    dimension_ = dim;
    element_size_ = IndexMeta::ElementSizeof(data_type_, unit_size_, dim);
  }

  //! Set meta type
  void set_meta_type(IndexMeta::MetaType meta_type) {
    meta_type_ = meta_type;
  }

  //! Set data type
  void set_data_type(IndexMeta::DataType data_type) {
    data_type_ = data_type;
  }

  //! Set meta information of feature
  void set_meta(IndexMeta::DataType data_type, uint32_t unit, uint32_t dim) {
    data_type_ = data_type;
    dimension_ = dim;
    unit_size_ = unit;
    element_size_ = IndexMeta::ElementSizeof(data_type, unit, dim);
  }

  //! Set meta information of feature
  void set_meta(IndexMeta::DataType data_type, uint32_t dim) {
    this->set_meta(data_type, IndexMeta::UnitSizeof(data_type), dim);
  }

 private:
  IndexMeta::MetaType meta_type_{IndexMeta::MetaType::MT_DENSE};
  IndexMeta::DataType data_type_{IndexMeta::DataType::DT_UNDEFINED};
  uint32_t dimension_{0};
  uint32_t unit_size_{0};
  uint32_t element_size_{0};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_metric.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/math_batch/utils.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_module.h>

namespace zvec {
namespace core {

/*! Index Metric
 */
struct IndexMetric : public IndexModule {
  //! Index Metric Pointer
  typedef std::shared_ptr<IndexMetric> Pointer;

  //! Matrix Distance Function
  typedef void (*MatrixDistanceHandle)(const void *m, const void *q, size_t dim,
                                       float *out);

  //! Matrix Distance Function Object
  using MatrixDistance =
      std::function<void(const void *m, const void *q, size_t dim, float *out)>;

  //! Matrix Sparse Distance Function
  typedef void (*MatrixSparseDistanceHandle)(const void *m_sparse_data,
                                             const void *q_sparse_data,
                                             float *out);

  //! Matrix Sparse Distance Function Object
  using MatrixSparseDistance = std::function<void(
      const void *m_sparse_data, const void *q_sparse_data, float *out)>;


  //! Matrix Batch Distance Function
  typedef void (*MatrixBatchDistanceHandle)(const void **m, const void *q,
                                            size_t num, size_t dim, float *out);

  //! Matrix Batch Distance Function Object
  using MatrixBatchDistance = std::function<void(
      const void **m, const void *q, size_t num, size_t dim, float *out)>;

  //! Destructor
  virtual ~IndexMetric(void) {}

  //! Initialize Metric
  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;

  //! Cleanup Metric
  virtual int cleanup(void) = 0;

  //! Retrieve if it matched
  virtual bool is_matched(const IndexMeta &meta) const = 0;

  //! Retrieve if it matched
  virtual bool is_matched(const IndexMeta &meta,
                          const IndexQueryMeta &qmeta) const = 0;

  //! Retrieve distance function for query
  virtual MatrixDistance distance(void) const {
    return nullptr;
  }

  //! Retrieve hybrid distance function for query
  virtual MatrixSparseDistance sparse_distance(void) const {
    return nullptr;
  };

  //! Retrieve distance function for query
  virtual MatrixBatchDistance batch_distance(void) const {
    return nullptr;
  }

  //! Retrieve distance function for index features
  virtual MatrixDistance distance_matrix(size_t /*m*/, size_t /*n*/) const {
    return nullptr;
  }

  //! Retrieve params of Metric
  virtual const ailego::Params &params(void) const = 0;

  //! Retrieve query metric object of this index metric
  virtual Pointer query_metric(void) const = 0;

  //! Normalize result
  virtual void normalize(float *score) const {
    (void)score;
  }

  //! Denormalize result
  virtual void denormalize(float *score) const {
    (void)score;
  }

  //! Retrieve if it supports normalization
  virtual bool support_normalize(void) const {
    return false;
  }

  //! Train the metric
  virtual int train(const void *vec, size_t dim) {
    (void)vec;
    (void)dim;
    return 0;
  }

  //! Retrieve if it supports training
  virtual bool support_train(void) const {
    return false;
  }

  //! Compute the distance between feature and query
  float distance(const void *m, const void *q, size_t dim) const {
    float dist;
    (this->distance())(m, q, dim, &dist);
    return dist;
  }

  using DistanceBatchQueryPreprocessFunc =
      ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc;

  virtual DistanceBatchQueryPreprocessFunc get_query_preprocess_func() const {
    return nullptr;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_module.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>

namespace zvec {
namespace core {

/*! Index Module
 */
class IndexModule {
 public:
  //! Index Module Pointer
  typedef std::shared_ptr<IndexModule> Pointer;

  //! Destructor
  virtual ~IndexModule(void) {}

  //! Retrieve debug information
  virtual std::string debug_string(void) const {
    return std::string();
  }

  //! Retrieve name of module
  const std::string &name(void) const {
    return name_;
  }

  //! Retrieve revision of module
  uint32_t revision(void) const {
    return revision_;
  }

 protected:
  friend struct IndexFactory;

  //! Set name of module
  void set_name(const std::string &str) {
    name_ = str;
  }

  //! Set revision of module
  void set_revision(uint32_t val) {
    revision_ = val;
  }

 private:
  //! Members
  uint32_t revision_{0u};
  std::string name_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_packer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/internal/platform.h>
#include <zvec/ailego/utility/type_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_version.h>

namespace zvec {
namespace core {

/*! Index Packer
 */
class IndexPacker {
 public:
  /*! Index Packer Segment Meta
   */
  class SegmentMeta {
   public:
    //! Constructor
    SegmentMeta(const std::string &str, size_t dsz, size_t psz, uint32_t crc)
        : data_size_(dsz), padding_size_(psz), data_crc_(crc), id_(str) {}

    //! Constructor
    SegmentMeta(std::string &&str, size_t dsz, size_t psz, uint32_t crc)
        : data_size_(dsz),
          padding_size_(psz),
          data_crc_(crc),
          id_(std::forward<std::string>(str)) {}

    //! Constructor
    SegmentMeta(const SegmentMeta &rhs)
        : data_size_(rhs.data_size_),
          padding_size_(rhs.padding_size_),
          data_crc_(rhs.data_crc_),
          id_(rhs.id_) {}

    //! Constructor
    SegmentMeta(SegmentMeta &&rhs)
        : data_size_(rhs.data_size_),
          padding_size_(rhs.padding_size_),
          data_crc_(rhs.data_crc_),
          id_(std::move(rhs.id_)) {}

    //! Retrieve id of segment
    const std::string &id(void) const {
      return id_;
    }

    //! Retrieve size of data
    size_t data_size(void) const {
      return data_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const {
      return data_crc_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const {
      return padding_size_;
    }

   private:
    size_t data_size_{0};
    size_t padding_size_{0};
    uint32_t data_crc_{0};
    std::string id_{};
  };

  //! Retrieve magic number of index
  uint32_t magic(void) const {
    return magic_;
  }

  //! Reset the packer
  void reset(void) {
    magic_ = 0;
    data_crc_ = 0u;
    data_size_ = 0u;
  }

  //! Setup header of index pacakge
  template <typename TFunc>
  bool setup(TFunc write_data) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,
                                                size_t>::value,
                  "Invocable function type");
    IndexFormat::MetaHeader header;
    IndexFormat::SetupMetaHeader(
        &header, (uint32_t)(0 - sizeof(IndexFormat::MetaFooter)),
        sizeof(IndexFormat::MetaHeader));

    if (write_data(&header, sizeof(header)) != sizeof(header)) {
      return false;
    }
    magic_ = header.magic;
    return true;
  }

  //! Pack index data
  template <typename TFunc>
  size_t pack(TFunc write_data, const void *data, size_t len) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,
                                                size_t>::value,
                  "Invocable function type");
    size_t wrlen = write_data(data, len);
    if (wrlen > 0u) {
      data_crc_ = ailego::Crc32c::Hash(data, wrlen, data_crc_);
      data_size_ += wrlen;
    }
    return wrlen;
  }

  //! Finish packing data
  template <typename TFunc>
  bool finish(TFunc write_data, std::vector<SegmentMeta> &stab) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,
                                                size_t>::value,
                  "Invocable function type");

    size_t content_size = 0u;
    for (const auto &it : stab) {
      content_size += it.data_size() + it.padding_size();
    }

    if (content_size != data_size_) {
      return false;
    }

    if (!this->pack_version(write_data, stab)) {
      return false;
    }

    // Write the padding if need
    size_t content_padding_size = ailego_align(data_size_, 32) - data_size_;
    if (content_padding_size) {
      std::string padding(content_padding_size, '\0');

      if (write_data(padding.data(), padding.size()) != padding.size()) {
        return false;
      }
    }

    // Prepare segment meta buffer
    IndexFormat::SegmentMetaBuffer buffer(stab.size());
    for (const auto &it : stab) {
      buffer.append(it.id(), it.data_size(), it.padding_size(), it.data_crc());
    }
    buffer.resize(ailego_align(buffer.size(), 32));

    // Write segment table into file
    if (write_data(buffer.data(), buffer.size()) != buffer.size()) {
      return false;
    }

    // Update footer
    IndexFormat::MetaFooter footer;
    IndexFormat::SetupMetaFooter(&footer);
    footer.segments_meta_crc = buffer.crc();
    footer.content_crc = data_crc_;
    footer.segment_count = stab.size();
    footer.segments_meta_size = buffer.size();
    footer.content_size = data_size_;
    footer.content_padding_size = content_padding_size;
    footer.total_size = footer.content_size + footer.content_padding_size +
                        footer.segments_meta_size +
                        sizeof(IndexFormat::MetaHeader) +
                        sizeof(IndexFormat::MetaFooter);
    IndexFormat::UpdateMetaFooter(&footer, 0);

    // Write footer into file
    if (write_data(&footer, sizeof(footer)) != sizeof(footer)) {
      return false;
    }
    return true;
  }

  //! Pack index version
  template <typename TFunc>
  bool pack_version(TFunc write_data, std::vector<SegmentMeta> &stab) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, const void *,
                                                size_t>::value,
                  "Invocable function type");
    std::string buffer(IndexVersion::Details());

    size_t data_size = buffer.size();
    uint32_t data_crc = ailego::Crc32c::Hash(buffer.data(), buffer.size(), 0);
    buffer.resize((data_size + 31u) & ~31u);

    if (write_data(buffer.data(), buffer.size()) != buffer.size()) {
      return false;
    }
    data_crc_ = ailego::Crc32c::Hash(buffer.data(), buffer.size(), data_crc_);
    data_size_ += buffer.size();
    stab.emplace_back(std::string("IndexVersion"), data_size,
                      buffer.size() - data_size, data_crc);
    return true;
  }

 private:
  uint32_t magic_{0u};
  uint32_t data_crc_{0u};
  size_t data_size_{0u};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_plugin.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>

namespace zvec {
namespace core {

/*! Index Plugin
 */
class IndexPlugin {
 public:
  //! Constructor
  IndexPlugin(void) : handle_(nullptr) {}

  //! Constructor
  IndexPlugin(IndexPlugin &&plugin) : handle_(plugin.handle_) {
    plugin.handle_ = nullptr;
  }

  //! Constructor
  explicit IndexPlugin(const std::string &path) : handle_(nullptr) {
    this->load(path);
  }

  //! Destructor
  ~IndexPlugin(void) {}

  //! Test if the plugin is valid
  bool is_valid(void) const {
    return (!!handle_);
  }

  //! Retrieve the handle
  void *handle(void) const {
    return handle_;
  }

  //! Load the library path
  bool load(const std::string &path);

  //! Load the library path
  bool load(const std::string &path, std::string *err);

  //! Unload plugin
  void unload(void);

 private:
  //! Disable them
  IndexPlugin(const IndexPlugin &) = delete;
  IndexPlugin &operator=(const IndexPlugin &) = delete;

  //! Members
  void *handle_;
};

/*! Index Plugin Broker
 */
class IndexPluginBroker {
 public:
  //! Constructor
  IndexPluginBroker(void) : plugins_() {}

  //! Constructor
  IndexPluginBroker(IndexPluginBroker &&broker)
      : plugins_(std::move(broker.plugins_)) {}

  //! Destructor
  ~IndexPluginBroker(void) {}

  //! Emplace a plugin
  bool emplace(IndexPlugin &&plugin);

  //! Emplace a plugin via library path
  bool emplace(const std::string &path) {
    return this->emplace(IndexPlugin(path));
  }

  //! Emplace a plugin via library path
  bool emplace(const std::string &path, std::string *err) {
    IndexPlugin plugin;
    if (!plugin.load(path, err)) {
      return false;
    }
    return this->emplace(std::move(plugin));
  }

  //! Retrieve count of plugins in broker
  size_t count(void) const {
    return plugins_.size();
  }

 private:
  //! Disable them
  IndexPluginBroker(const IndexPluginBroker &) = delete;
  IndexPluginBroker &operator=(const IndexPluginBroker &) = delete;

  //! Members
  std::vector<IndexPlugin> plugins_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_provider.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <unordered_map>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_storage.h>

namespace zvec {
namespace core {

/*! Index Provider
 */
struct IndexProvider : public IndexHolder {
  //! Index Provider Pointer
  typedef std::shared_ptr<IndexProvider> Pointer;

  //! Destructor
  virtual ~IndexProvider(void) {}

  bool multipass() const override {
    return true;
  }

 public:  // Provider's unique method
  //! Retrieve a vector using a primary key
  virtual const void *get_vector(const uint64_t key) const = 0;

  //! Retrieve a vector using a primary key
  virtual int get_vector(const uint64_t /*key*/,
                         IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  //! Retrieve the owner class
  virtual const std::string &owner_class(void) const = 0;
};

/*! Index SparseProvider
 */
struct IndexSparseProvider : IndexSparseHolder {
  //! Index Provider Pointer
  typedef std::shared_ptr<IndexSparseProvider> Pointer;

  //! Destructor
  virtual ~IndexSparseProvider(void) {}

  bool multipass() const override {
    return true;
  }

 public:  // Provider's unique method
  //! Retrieve a vector using a primary key
  virtual int get_sparse_vector(uint64_t key, uint32_t *sparse_count,
                                std::string *sparse_indices_buffer,
                                std::string *sparse_values_buffer) const = 0;

  //! Retrieve the owner class
  virtual const std::string &owner_class(void) const = 0;
};

/*! Multi-Pass Numerical Index Provider
 */
template <typename T>
class MultiPassNumericalIndexProvider : public IndexProvider {
 public:
  //! Constructor
  explicit MultiPassNumericalIndexProvider(size_t dim)
      : holder_(dim), owner_class_("MultiPassNumericalIndexProvider") {}

  //! Destructor
  virtual ~MultiPassNumericalIndexProvider(void) {}

  //! Retrieve count of elements in holder
  size_t count(void) const override {
    return holder_.count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return holder_.dimension();
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return holder_.element_size();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return holder_.create_iterator();
  }

  //! Retrieve a vector using a primary key
  const void *get_vector(const uint64_t key) const override {
    auto it = indice_map_.find(key);
    if (it == indice_map_.end()) {
      return nullptr;
    }
    return holder_.get_vector_by_index(it->second);
  }

  //! Retrieve a vector using a primary key
  int get_vector(const uint64_t key,
                 IndexStorage::MemoryBlock &block) const override {
    const void *data = get_vector(key);
    if (data == nullptr) {
      return IndexError_NoExist;
    }
    block.reset(const_cast<void *>(data));
    return 0;
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::NumericalVector<T> &vec) {
    if (!holder_.emplace(key, vec)) {
      return false;
    }
    indice_map_[key] = static_cast<int>(holder_.count() - 1);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::NumericalVector<T> &&vec) {
    if (!holder_.emplace(key, std::move(vec))) {
      return false;
    }
    indice_map_[key] = static_cast<int>(holder_.count() - 1);
    return true;
  }

 private:
  //! Members
  MultiPassNumericalIndexHolder<T> holder_;
  std::unordered_map<uint64_t, int> indice_map_;
  std::string owner_class_;
};

/*! Multi-Pass Binary Index Provider
 */
template <typename T>
class MultiPassBinaryIndexProvider : public IndexProvider {
 public:
  //! Constructor
  explicit MultiPassBinaryIndexProvider(size_t dim)
      : holder_(dim), owner_class_("MultiPassBinaryIndexProvider") {}

  //! Destructor
  virtual ~MultiPassBinaryIndexProvider(void) {}

  //! Retrieve count of elements in holder
  size_t count(void) const override {
    return holder_.count();
  }

  //! Retrieve dimension
  size_t dimension(void) const override {
    return holder_.dimension();
  }

  //! Retrieve element size in bytes
  size_t element_size(void) const override {
    return holder_.element_size();
  }

  //! Create a new iterator
  IndexHolder::Iterator::Pointer create_iterator(void) override {
    return holder_.create_iterator();
  }

  //! Retrieve a vector using a primary key
  const void *get_vector(const uint64_t key) const override {
    auto it = indice_map_.find(key);
    if (it == indice_map_.end()) {
      return nullptr;
    }
    return holder_.get_vector_by_index(it->second);
  }

  //! Retrieve a vector using a primary key
  int get_vector(const uint64_t key,
                 IndexStorage::MemoryBlock &block) const override {
    const void *data = get_vector(key);
    if (data == nullptr) {
      return IndexError_NoExist;
    }
    block.reset(const_cast<void *>(data));
    return 0;
  }

  //! Retrieve the owner class
  const std::string &owner_class(void) const override {
    return owner_class_;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, const ailego::BinaryVector<T> &vec) {
    if (!holder_.emplace(key, vec)) {
      return false;
    }
    indice_map_[key] = static_cast<int>(holder_.count() - 1);
    return true;
  }

  //! Append an element into holder
  bool emplace(uint64_t key, ailego::BinaryVector<T> &&vec) {
    if (!holder_.emplace(key, std::move(vec))) {
      return false;
    }
    indice_map_[key] = static_cast<int>(holder_.count() - 1);
    return true;
  }

 private:
  //! Members
  MultiPassBinaryIndexHolder<T> holder_;
  std::unordered_map<uint64_t, int> indice_map_;
  std::string owner_class_;
};

/*! Multi-Pass Index Provider
 */
template <IndexMeta::DataType FT>
struct MultiPassIndexProvider;

/*! Multi-Pass Index Provider (BINARY32)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY32>
    : public MultiPassBinaryIndexProvider<uint32_t> {
  //! Constructor
  using MultiPassBinaryIndexProvider::MultiPassBinaryIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY32;
  }
};

/*! Multi-Pass Index Provider (BINARY64)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY64>
    : public MultiPassBinaryIndexProvider<uint64_t> {
  //! Constructor
  using MultiPassBinaryIndexProvider::MultiPassBinaryIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_BINARY64;
  }
};

/*! Multi-Pass Index Provider (FP16)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_FP16>
    : public MultiPassNumericalIndexProvider<ailego::Float16> {
  //! Constructor
  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP16;
  }
};

/*! Multi-Pass Index Provider (FP32)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>
    : public MultiPassNumericalIndexProvider<float> {
  //! Constructor
  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP32;
  }
};

/*! Multi-Pass Index Provider (FP64)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_FP64>
    : public MultiPassNumericalIndexProvider<double> {
  //! Constructor
  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_FP64;
  }
};

/*! Multi-Pass Index Provider (INT8)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_INT8>
    : public MultiPassNumericalIndexProvider<int8_t> {
  //! Constructor
  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT8;
  }
};

/*! Multi-Pass Index Provider (INT16)
 */
template <>
struct MultiPassIndexProvider<IndexMeta::DataType::DT_INT16>
    : public MultiPassNumericalIndexProvider<int16_t> {
  //! Constructor
  using MultiPassNumericalIndexProvider::MultiPassNumericalIndexProvider;

  //! Retrieve type information
  IndexMeta::DataType data_type(void) const override {
    return IndexMeta::DataType::DT_INT16;
  }
};

/*! Convert IndexHolder to IndexProvider
 *  @param holder The IndexHolder to convert
 *  @return IndexProvider::Pointer
 */
inline IndexProvider::Pointer convert_holder_to_provider(
    const IndexHolder::Pointer &holder) {
  if (!holder) {
    return nullptr;
  }

  IndexMeta::DataType data_type = holder->data_type();
  size_t dimension = holder->dimension();

  switch (data_type) {
    case IndexMeta::DataType::DT_FP16: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_FP16>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const ailego::Float16 *data =
            static_cast<const ailego::Float16 *>(iter->data());
        ailego::NumericalVector<ailego::Float16> vec(dimension);
        std::memcpy(vec.data(), data, dimension * sizeof(ailego::Float16));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_FP32: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const float *data = static_cast<const float *>(iter->data());
        ailego::NumericalVector<float> vec(dimension);
        std::memcpy(vec.data(), data, dimension * sizeof(float));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_FP64: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_FP64>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const double *data = static_cast<const double *>(iter->data());
        ailego::NumericalVector<double> vec(dimension);
        std::memcpy(vec.data(), data, dimension * sizeof(double));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_INT8: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_INT8>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const int8_t *data = static_cast<const int8_t *>(iter->data());
        ailego::NumericalVector<int8_t> vec(dimension);
        std::memcpy(vec.data(), data, dimension * sizeof(int8_t));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_INT16: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_INT16>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const int16_t *data = static_cast<const int16_t *>(iter->data());
        ailego::NumericalVector<int16_t> vec(dimension);
        std::memcpy(vec.data(), data, dimension * sizeof(int16_t));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_BINARY32: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY32>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const uint32_t *data = static_cast<const uint32_t *>(iter->data());
        size_t binary_size = (dimension + 31) / 32;
        ailego::BinaryVector<uint32_t> vec(dimension);
        std::memcpy(vec.data(), data, binary_size * sizeof(uint32_t));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    case IndexMeta::DataType::DT_BINARY64: {
      auto provider = std::make_shared<
          MultiPassIndexProvider<IndexMeta::DataType::DT_BINARY64>>(dimension);
      auto iter = holder->create_iterator();
      while (iter->is_valid()) {
        uint64_t key = iter->key();
        const uint64_t *data = static_cast<const uint64_t *>(iter->data());
        size_t binary_size = (dimension + 63) / 64;
        ailego::BinaryVector<uint64_t> vec(dimension);
        std::memcpy(vec.data(), data, binary_size * sizeof(uint64_t));
        provider->emplace(key, std::move(vec));
        iter->next();
      }
      return provider;
    }

    default:
      return nullptr;
  }
}

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_reducer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_builder.h>
#include <zvec/core/framework/index_converter.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_filter.h>
#include <zvec/core/framework/index_reformer.h>
#include <zvec/core/framework/index_stats.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

/*! Index Reducer Base
 */
class IndexReducerBase : public IndexModule {
 public:
  //! Index Reducer Pointer
  typedef std::shared_ptr<IndexReducerBase> Pointer;

  /*! Index Reducer Stats
   */
  class Stats : public IndexStats {
   public:
    Stats() {}
    Stats(const Stats &stats) {
      *this = stats;
    }
    Stats &operator=(const Stats &stats) {
      this->loaded_count_.store(stats.loaded_count_.load());
      this->dumped_count_.store(stats.dumped_count_.load());
      this->filtered_count_.store(stats.filtered_count_.load());
      this->duplicated_count_.store(stats.duplicated_count_.load());
      this->reduced_costtime_.store(stats.reduced_costtime_.load());
      this->dumped_costtime_.store(stats.dumped_costtime_.load());
      return *this;
    }
    //! Set count of documents loaded
    void set_loaded_count(size_t count) {
      loaded_count_ = count;
    }

    //! Set count of documents dumped
    void set_dumped_count(size_t count) {
      dumped_count_ = count;
    }

    //! Set count of documents filtered
    void set_filtered_count(size_t count) {
      filtered_count_ = count;
    }

    //! Set count of documents duplicated
    void set_duplicated_count(size_t count) {
      duplicated_count_ = count;
    }

    //! Set time cost of documents reduced
    void set_reduced_costtime(uint64_t cost) {
      reduced_costtime_ = cost;
    }

    //! Set time cost of documents dumped
    void set_dumped_costtime(uint64_t cost) {
      dumped_costtime_ = cost;
    }

    //! Retrieve count of documents loaded
    size_t loaded_count(void) const {
      return loaded_count_;
    }

    //! Retrieve count of documents dumped
    size_t dumped_count(void) const {
      return dumped_count_;
    }

    //! Retrieve count of documents filtered
    size_t filtered_count(void) const {
      return filtered_count_;
    }

    //! Retrieve count of documents duplicated
    size_t duplicated_count(void) const {
      return duplicated_count_;
    }

    //! Retrieve time cost of documents reduced
    uint64_t reduced_costtime(void) const {
      return reduced_costtime_;
    }

    //! Retrieve time cost of documents dumped
    uint64_t dumped_costtime(void) const {
      return dumped_costtime_;
    }

    //! Retrieve count of documents loaded (mutable)
    std::atomic<size_t> *mutable_loaded_count(void) {
      return &loaded_count_;
    }

    //! Retrieve count of documents dumped (mutable)
    std::atomic<size_t> *mutable_dumped_count(void) {
      return &dumped_count_;
    }

    //! Retrieve count of documents filtered (mutable)
    std::atomic<size_t> *mutable_filtered_count(void) {
      return &filtered_count_;
    }

    //! Retrieve count of documents duplicated (mutable)
    std::atomic<size_t> *mutable_duplicated_count(void) {
      return &duplicated_count_;
    }

    //! Retrieve time cost of documents reduced (mutable)
    std::atomic<uint64_t> *mutable_reduced_costtime(void) {
      return &reduced_costtime_;
    }

    //! Retrieve time cost of documents dumped (mutable)
    std::atomic<uint64_t> *mutable_dumped_costtime(void) {
      return &dumped_costtime_;
    }

   private:
    //! Members
    std::atomic<size_t> loaded_count_{0u};
    std::atomic<size_t> dumped_count_{0u};
    std::atomic<size_t> filtered_count_{0u};
    std::atomic<size_t> duplicated_count_{0u};
    std::atomic<uint64_t> reduced_costtime_{0u};
    std::atomic<uint64_t> dumped_costtime_{0u};
  };

  //! Destructor
  virtual ~IndexReducerBase(void) = default;

  //! Initialize Reducer
  virtual int init(const ailego::Params &params) = 0;

  //! Cleanup Reducer
  virtual int cleanup(void) = 0;

  //! Reduce operator (with filter)
  virtual int reduce(const IndexFilter &filter) = 0;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) = 0;

  //! Retrieve statistics
  virtual const Stats &stats(void) const = 0;

  //! Set thread pool
  void set_thread_pool(ailego::ThreadPool *pool) {
    thread_pool_ = pool;
  }

  //! Set stop flag
  void set_stop_flag(std::atomic<bool> *stop_flag) {
    stop_flag_ = stop_flag;
  }

 protected:
  ailego::ThreadPool *thread_pool_{nullptr};
  std::atomic<bool> *stop_flag_{nullptr};
};

/*! Index Reducer
 */
class IndexReducer : public IndexReducerBase {
 public:
  //! Index Reducer Pointer
  typedef std::shared_ptr<IndexReducer> Pointer;

  //! Destructor
  virtual ~IndexReducer(void) = default;
};

/*! Index Sparse Reducer
 */
class IndexSparseReducer : public IndexReducerBase {
 public:
  //! Index Reducer Pointer
  typedef std::shared_ptr<IndexSparseReducer> Pointer;

  //! Destructor
  virtual ~IndexSparseReducer(void) = default;
};

/*! Index Streamer Reducer
 */
class IndexStreamerReducer : public IndexReducerBase {
 public:
  //! Index Reducer Pointer
  typedef std::shared_ptr<IndexStreamerReducer> Pointer;

  virtual int set_target_streamer_wiht_info(
      const IndexBuilder::Pointer builder,
      const IndexStreamer::Pointer streamer,
      const IndexConverter::Pointer converter,
      const IndexReformer::Pointer reformer = nullptr,
      const IndexQueryMeta &original_query_meta = IndexQueryMeta()) = 0;
  virtual int feed_streamer_with_reformer(
      IndexStreamer::Pointer streamer,
      const IndexReformer::Pointer reformer) = 0;

  virtual ~IndexStreamerReducer(void) = default;
};
}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_refiner.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <zvec/ailego/container/heap.h>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_streamer.h>

namespace zvec {
namespace core {

/*! Index Refiner
 */
class IndexRefiner : public IndexModule {
 public:
  //! Index Refiner Pointer
  typedef std::shared_ptr<IndexRefiner> Pointer;

  /*! Index Searcher Context
   */
  struct Context : public IndexContext {
   public:
    Context() = default;
    ~Context() = default;

    virtual int set_contexts(IndexRunner::Context::Pointer base_ctx,
                             IndexRunner::Context::Pointer refine_ctx) = 0;
  };

  //! Initialize refiner with streamer
  virtual int init(IndexRunner::Pointer base_runner,
                   IndexRunner::Pointer refine_runner,
                   const ailego::Params &params) = 0;

  //! Cleanup
  virtual int cleanup() = 0;

  //! Create a context
  virtual Context::Pointer create_context(void) const = 0;

  //! Add a vector into index
  virtual int add_impl(uint64_t key, const void *base_query,
                       const IndexQueryMeta &base_qmeta,
                       const void *refine_query,
                       const IndexQueryMeta &refine_qmeta,
                       Context::Pointer &context) = 0;

  //! Similarity search
  virtual int search_impl(const void *base_query,
                          const IndexQueryMeta &base_qmeta,
                          const void *refine_query,
                          const IndexQueryMeta &refine_qmeta,
                          Context::Pointer &context) const = 0;
  //! Similarity search
  virtual int search_impl(const void *base_query,
                          const IndexQueryMeta &base_qmeta,
                          const void *refine_query,
                          const IndexQueryMeta &refine_qmeta, uint32_t count,
                          Context::Pointer &context) const = 0;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *base_query,
                             const IndexQueryMeta &base_qmeta,
                             const void *refine_query,
                             const IndexQueryMeta &refine_qmeta,
                             Context::Pointer &context) const = 0;

  //! Similarity brute force search
  virtual int search_bf_impl(const void *base_query,
                             const IndexQueryMeta &base_qmeta,
                             const void *refine_query,
                             const IndexQueryMeta &refine_qmeta, uint32_t count,
                             Context::Pointer &context) const = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_reformer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_document.h>
#include <zvec/core/framework/index_meta.h>

namespace zvec {
namespace core {

/*! Index Reformer
 */
class IndexReformer : public IndexModule {
 public:
  //! Index Reformer Pointer
  typedef std::shared_ptr<IndexReformer> Pointer;

  //! Destructor
  virtual ~IndexReformer(void) {}

  //! Initialize Reformer
  virtual int init(const ailego::Params &params) = 0;

  //! Cleanup Reformer
  virtual int cleanup(void) = 0;

  //! Load index from container
  virtual int load(IndexStorage::Pointer cntr) = 0;

  //! Unload index
  virtual int unload(void) = 0;

  //! Transform a query
  virtual int transform(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const {
    return IndexError_NotImplemented;
  }

  //! Transform queries
  virtual int transform(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
                        std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const {
    return IndexError_NotImplemented;
  }

  //! Convert a record
  virtual int convert(const void *record, const IndexQueryMeta &rmeta,
                      std::string *out, IndexQueryMeta *ometa) const {
    return this->transform(record, rmeta, out, ometa);
  }

  //! Convert records
  virtual int convert(const void *records, const IndexQueryMeta &rmeta,
                      uint32_t count, std::string *out,
                      IndexQueryMeta *ometa) const {
    return this->transform(records, rmeta, count, out, ometa);
  }

  //! Normalize results
  virtual int normalize(const void * /*query*/,
                        const IndexQueryMeta & /*qmeta*/,
                        IndexDocumentList & /*result*/) const {
    return IndexError_NotImplemented;
  }

  virtual bool need_revert() const {
    return false;
  }

  virtual int revert(const void * /*in*/, const IndexQueryMeta & /*qmeta*/,
                     std::string * /*out*/) const {
    return IndexError_NotImplemented;
  }

  //! Transform a query
  virtual int transform(uint32_t /*sparse_count*/,
                        const uint32_t * /*sparse_indices*/,
                        const void * /*sparse_query*/,
                        const IndexQueryMeta & /*qmeta*/, std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const {
    return IndexError_NotImplemented;
  }

  //! Transform queries
  virtual int transform(const uint32_t * /*sparse_count*/,
                        const uint32_t * /*sparse_indices*/,
                        const void * /*sparse_query*/,
                        const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
                        std::string * /*out*/,
                        IndexQueryMeta * /*ometa*/) const {
    return IndexError_NotImplemented;
  }

  //! Convert a record
  virtual int convert(uint32_t sparse_count, const uint32_t *sparse_indices,
                      const void *sparse_query, const IndexQueryMeta &qmeta,
                      std::string *out, IndexQueryMeta *ometa) const {
    return this->transform(sparse_count, sparse_indices, sparse_query, qmeta,
                           out, ometa);
  }

  //! Convert records
  virtual int convert(const uint32_t *sparse_count,
                      const uint32_t *sparse_indices, const void *sparse_query,
                      const IndexQueryMeta &qmeta, uint32_t count,
                      std::string *out, IndexQueryMeta *ometa) const {
    return this->transform(sparse_count, sparse_indices, sparse_query, qmeta,
                           count, out, ometa);
  }

  virtual int revert(const uint32_t /*sparse_count*/,
                     const uint32_t * /*sparse_indices*/,
                     const void * /*sparse_query*/,
                     const IndexQueryMeta & /*qmeta*/,
                     std::string * /*sparse_query_out*/) const {
    return 0;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_runner.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <atomic>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_metric.h>
#include <zvec/core/framework/index_module.h>
#include <zvec/core/framework/index_provider.h>
#include <zvec/core/framework/index_stats.h>
#include <zvec/core/framework/index_threads.h>
#include <zvec/core/framework/index_trainer.h>

namespace zvec {
namespace core {

/*! Index Runner
 */
class IndexRunner : public IndexModule {
 public:
  //! Index Searcher Pointer
  typedef std::shared_ptr<IndexRunner> Pointer;

  /*! Index Searcher Context
   */
  using Context = IndexContext;

  /*! Index Searcher Provider
   */
  using Provider = IndexProvider;

  /*! Index Sparse Searcher Provider
   */
  using SparseProvider = IndexSparseProvider;

  /*! Index Streamer Stats
   */
  class Stats : public IndexStats {
   public:
    Stats() {}
    Stats(const Stats &stats) {
      *this = stats;
    }
    Stats &operator=(const Stats &stats) {
      this->revision_id_ = stats.revision_id_;

      this->trained_count_.store(stats.trained_count_.load());
      this->built_count_.store(stats.built_count_.load());
      this->dumped_count_.store(stats.dumped_count_.load());
      this->loaded_count_.store(stats.loaded_count_.load());
      this->added_count_.store(stats.added_count_.load());
      this->discarded_count_.store(stats.discarded_count_.load());
      this->updated_count_.store(stats.updated_count_.load());
      this->deleted_count_.store(stats.deleted_count_.load());

      this->index_size_.store(stats.index_size_.load());
      this->dumped_size_.store(stats.dumped_size_.load());

      this->check_point_.store(stats.check_point_.load());

      this->create_time_.store(stats.create_time_.load());
      this->update_time_.store(stats.update_time_.load());
      this->loaded_costtime_.store(stats.loaded_costtime_.load());
      this->trained_costtime_.store(stats.trained_costtime_.load());
      this->built_costtime_.store(stats.built_costtime_.load());
      this->dumped_costtime_.store(stats.dumped_costtime_.load());

      return *this;
    }
    //! Set revision id
    void set_revision_id(size_t rev) {
      revision_id_ = rev;
    }

    //! Set count of documents trained
    void set_trained_count(size_t count) {
      trained_count_ = count;
    }

    //! Set count of documents built
    void set_built_count(size_t count) {
      built_count_ = count;
    }

    //! Set count of documents dumped
    void set_dumped_count(size_t count) {
      dumped_count_ = count;
    }

    //! Set count of documents loaded
    void set_loaded_count(size_t count) {
      loaded_count_ = count;
    }

    //! Set count of documents added
    void set_added_count(size_t count) {
      added_count_ = count;
    }

    //! Set count of documents discarded
    void set_discarded_count(size_t count) {
      discarded_count_ = count;
    }

    //! Set count of documents updated
    void set_updated_count(size_t count) {
      updated_count_ = count;
    }

    //! Set count of documents deleted
    void set_deleted_count(size_t count) {
      deleted_count_ = count;
    }

    //! Set size of index
    void set_index_size(size_t count) {
      index_size_ = count;
    }

    //! Set size of index dumped
    void set_dumped_size(size_t count) {
      dumped_size_ = count;
    }

    //! Set size of index dumped
    void set_check_point(uint64_t val) {
      check_point_ = val;
    }

    //! Retrieve create time
    void set_create_time(uint64_t val) {
      create_time_ = val;
    }

    //! Retrieve update time
    void set_update_time(uint64_t val) {
      update_time_ = val;
    }

    //! Retrieve loaded costtime
    void set_loaded_costtime(uint64_t val) {
      loaded_costtime_ = val;
    }

    //! Retrieve train costtime
    void set_trained_costtime(uint64_t val) {
      trained_costtime_ = val;
    }

    //! Retrieve built costtime
    void set_built_costtime(uint64_t val) {
      built_costtime_ = val;
    }

    //! Retrieve update time
    void set_dumped_costtime(uint64_t val) {
      dumped_costtime_ = val;
    }

    //! Retrieve revision id
    size_t revision_id(void) const {
      return revision_id_;
    }

    //! Retrieve count of documents trained
    size_t trained_count(void) const {
      return trained_count_;
    }

    //! Retrieve count of documents built
    size_t built_count(void) const {
      return built_count_;
    }

    //! Retrieve count of documents dumped
    size_t dumped_count(void) const {
      return dumped_count_;
    }

    //! Retrieve count of documents loaded
    size_t loaded_count(void) const {
      return loaded_count_;
    }

    //! Retrieve count of documents added
    size_t added_count(void) const {
      return added_count_;
    }

    //! Retrieve count of documents discarded
    size_t discarded_count(void) const {
      return discarded_count_;
    }

    //! Retrieve count of documents updated
    size_t updated_count(void) const {
      return updated_count_;
    }

    //! Retrieve count of documents deleted
    size_t deleted_count(void) const {
      return deleted_count_;
    }

    //! Retrieve size of index
    size_t index_size(void) const {
      return index_size_;
    }

    //! Retrieve size of index dumped
    size_t dumped_size(void) const {
      return dumped_size_;
    }

    //! Retrieve check point of index
    uint64_t check_point(void) const {
      return check_point_;
    }

    //! Retrieve create time of index
    uint64_t create_time(void) const {
      return create_time_;
    }

    //! Retrieve update time of index
    uint64_t update_time(void) const {
      return update_time_;
    }

    //! Retrieve loaded cost time of index
    uint64_t loaded_costtime(void) const {
      return loaded_costtime_;
    }

    //! Retrieve trained cost time of index
    uint64_t trained_costtime(void) const {
      return trained_costtime_;
    }

    //! Retrieve built cost time of index
    uint64_t built_costtime(void) const {
      return built_costtime_;
    }

    //! Retrieve dumped cost time of index
    uint64_t dumped_costtime(void) const {
      return dumped_costtime_;
    }

    //! Retrieve count of documents loaded (mutable)
    std::atomic<size_t> *mutable_trained_count(void) {
      return &loaded_count_;
    }

    //! Retrieve count of documents built (mutable)
    std::atomic<size_t> *mutable_built_count(void) {
      return &built_count_;
    }

    //! Retrieve count of documents dumped (mutable)
    std::atomic<size_t> *mutable_dumped_count(void) {
      return &dumped_count_;
    }

    //! Retrieve count of documents loaded (mutable)
    std::atomic<size_t> *mutable_loaded_count(void) {
      return &loaded_count_;
    }

    //! Retrieve count of documents added (mutable)
    std::atomic<size_t> *mutable_added_count(void) {
      return &added_count_;
    }

    //! Retrieve count of documents discarded (mutable)
    std::atomic<size_t> *mutable_discarded_count(void) {
      return &discarded_count_;
    }

    //! Retrieve count of documents updated (mutable)
    std::atomic<size_t> *mutable_updated_count(void) {
      return &updated_count_;
    }

    //! Retrieve count of documents deleted (mutable)
    std::atomic<size_t> *mutable_deleted_count(void) {
      return &deleted_count_;
    }

    //! Retrieve size of index (mutable)
    std::atomic<size_t> *mutable_index_size(void) {
      return &index_size_;
    }

    //! Retrieve size of index dumped (mutable)
    std::atomic<size_t> *mutable_dumped_size(void) {
      return &dumped_size_;
    }

    //! Retrieve check point of index (mutable)
    std::atomic<uint64_t> *mutable_check_point(void) {
      return &check_point_;
    }

    //! Retrieve create time of index (mutable)
    std::atomic<uint64_t> *mutable_create_time(void) {
      return &create_time_;
    }

    //! Retrieve update time of index (mutable)
    std::atomic<uint64_t> *mutable_update_time(void) {
      return &update_time_;
    }

    //! Retrieve loaded time of index (mutable)
    std::atomic<uint64_t> *mutable_loaded_costtime(void) {
      return &loaded_costtime_;
    }

    //! Retrieve trained costtime of index (mutable)
    std::atomic<uint64_t> *mutable_trained_costtime(void) {
      return &trained_costtime_;
    }

    //! Retrieve built costtime of index (mutable)
    std::atomic<uint64_t> *mutable_built_costtime(void) {
      return &built_costtime_;
    }

    //! Retrieve dump costtime of index (mutable)
    std::atomic<uint64_t> *mutable_dumped_costtime(void) {
      return &dumped_costtime_;
    }

    void clear() {
      this->clear_attributes();

      revision_id_ = 0u;

      trained_count_ = 0u;
      built_count_ = 0u;
      dumped_count_ = 0u;
      loaded_count_ = 0u;
      added_count_ = 0u;
      discarded_count_ = 0u;
      updated_count_ = 0u;
      deleted_count_ = 0u;

      index_size_ = 0u;
      dumped_size_ = 0u;
      check_point_ = 0u;

      create_time_ = 0u;
      update_time_ = 0u;
      loaded_costtime_ = 0u;
      trained_costtime_ = 0u;
      built_costtime_ = 0u;
      dumped_costtime_ = 0u;
    }

   private:
    //! Members
    size_t revision_id_{0u};

    std::atomic<size_t> trained_count_{0u};
    std::atomic<size_t> built_count_{0u};
    std::atomic<size_t> dumped_count_{0u};
    std::atomic<size_t> loaded_count_{0u};
    std::atomic<size_t> added_count_{0u};
    std::atomic<size_t> discarded_count_{0u};
    std::atomic<size_t> updated_count_{0u};
    std::atomic<size_t> deleted_count_{0u};

    std::atomic<size_t> index_size_{0u};
    std::atomic<size_t> dumped_size_{0u};
    std::atomic<uint64_t> check_point_{0u};

    std::atomic<uint64_t> create_time_{0u};
    std::atomic<uint64_t> update_time_{0u};
    std::atomic<uint64_t> loaded_costtime_{0u};
    std::atomic<uint64_t> trained_costtime_{0u};
    std::atomic<uint64_t> built_costtime_{0u};
    std::atomic<uint64_t> dumped_costtime_{0u};
  };

  //! Constructor
  IndexRunner() = default;

  //! Destructor
  virtual ~IndexRunner() = default;

  //! Retrieve statistics
  virtual const Stats &stats(void) const = 0;

  //! Cleanup Searcher
  virtual int cleanup() = 0;

  //! Unload Searcher
  virtual int unload() {
    return IndexError_NotImplemented;
  }

  //! Print debug info
  virtual void print_debug_info() {};

  //! Create a searcher context
  virtual Context::Pointer create_context(void) const {
    return Context::Pointer();
  }

  //! Create a searcher provider
  virtual Provider::Pointer create_provider(void) const {
    return Provider::Pointer();
  }

  //! Create a searcher sparse provider
  virtual SparseProvider::Pointer create_sparse_provider(void) const {
    return SparseProvider::Pointer();
  }

  //! Get vector by key
  virtual const void *get_vector(uint64_t /*key*/) const {
    return nullptr;
  }

  virtual int get_vector(const uint64_t /*key*/,
                         IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  //! Fetch vector by id
  virtual const void *get_vector_by_id(uint32_t /*id*/) const {
    return nullptr;
  }

  virtual int get_vector_by_id(const uint32_t /*id*/,
                               IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  virtual int get_vector_by_key(const uint64_t /*key*/,
                                IndexStorage::MemoryBlock & /*block*/) const {
    return IndexError_NotImplemented;
  }

  //! Get vector by key
  virtual int get_sparse_vector(uint64_t /*key*/, uint32_t * /*sparse_count*/,
                                std::string * /*sparse_indices_buffer*/,
                                std::string * /*sparse_values_buffer*/) const {
    return IndexError_NotImplemented;
  }

  //! Fetch vector by id
  virtual int get_sparse_vector_by_id(
      uint32_t /*id*/, uint32_t * /*sparse_count*/,
      std::string * /*sparse_indices_buffer*/,
      std::string * /*sparse_values_buffer*/) const {
    return IndexError_NotImplemented;
  }

  //! Add a vector into index
  virtual int add_impl(uint64_t /*key*/, const void * /*query*/,
                       const IndexQueryMeta & /*qmeta*/,
                       Context::Pointer & /*context*/) {
    return IndexError_NotImplemented;
  }

  //! Add a vector with id into index
  virtual int add_with_id_impl(uint32_t /*id*/, const void * /*query*/,
                               const IndexQueryMeta & /*qmeta*/,
                               Context::Pointer & /*context*/) {
    return IndexError_NotImplemented;
  }

  //! Similarity search
  virtual int search_impl(const void * /*query*/,
                          const IndexQueryMeta & /*qmeta*/,
                          Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }
  //! Similarity search
  virtual int search_impl(const void * /*query*/,
                          const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
                          Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  virtual int search_impl(const uint32_t * /*sparse_count*/,
                          const uint32_t * /*sparse_indices*/,
                          const void * /*sparse_query*/,
                          const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
                          Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Similarity search with sparse inputs
  virtual int search_impl(const uint32_t /*sparse_count*/,
                          const uint32_t * /*sparse_indices*/,
                          const void * /*sparse_query*/,
                          const IndexQueryMeta & /*qmeta*/,
                          Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Similarity brute force search
  virtual int search_bf_impl(const void * /*query*/,
                             const IndexQueryMeta & /*qmeta*/,
                             Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }
  //! Similarity brute force search
  virtual int search_bf_impl(const void * /*query*/,
                             const IndexQueryMeta & /*qmeta*/,
                             uint32_t /*count*/,
                             Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Add a vector into index with dense and sparse inputs
  virtual int add_impl(uint64_t /* pkey */, const uint32_t /* sparse_count*/,
                       const uint32_t * /* sparse_indices */,
                       const void * /* sparse_query */,
                       const IndexQueryMeta & /* qmeta */,
                       Context::Pointer & /* context */) {
    return IndexError_NotImplemented;
  }

  //! Add a vector with id into index
  virtual int add_with_id_impl(uint32_t /* id */,
                               const uint32_t /* sparse_count*/,
                               const uint32_t * /* sparse_indices */,
                               const void * /* sparse_query */,
                               const IndexQueryMeta & /* qmeta */,
                               Context::Pointer & /* context */) {
    return IndexError_NotImplemented;
  }

  //! Bruteforce search with sparse inputs
  virtual int search_bf_impl(const uint32_t * /*sparse_count*/,
                             const uint32_t * /*sparse_indices*/,
                             const void * /*sparse_query*/,
                             const IndexQueryMeta & /*qmeta*/,
                             uint32_t /*count*/,
                             Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Bruteforce search with sparse inputs
  virtual int search_bf_impl(const uint32_t /*sparse_count*/,
                             const uint32_t * /*sparse_indices*/,
                             const void * /*sparse_query*/,
                             const IndexQueryMeta & /*qmeta*/,
                             Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Similarity brute force search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void *query, const std::vector<std::vector<uint64_t>> &p_keys,
      const IndexQueryMeta &qmeta, Context::Pointer &context) const {
    return search_bf_by_p_keys_impl(query, p_keys, qmeta, 1, context);
  }

  //! Similarity brute force search by primary keys
  virtual int search_bf_by_p_keys_impl(
      const void * /*query*/,
      const std::vector<std::vector<uint64_t>> & /*p_keys*/,
      const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
      Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Linear search by primary keys with dense and sparse inputs
  virtual int search_bf_by_p_keys_impl(
      const uint32_t /* sparse_count */, const uint32_t * /* sparse_indices */,
      const void * /* sparse_query */,
      const std::vector<std::vector<uint64_t>> & /* p_keys */,
      const IndexQueryMeta & /* qmeta */,
      Context::Pointer & /* context */) const {
    return IndexError_NotImplemented;
  }

  //! Linear search by primary keys with dense and sparse inputs
  virtual int search_bf_by_p_keys_impl(
      const uint32_t * /* sparse_count */,
      const uint32_t * /* sparse_indices */, const void * /* sparse_query */,
      const std::vector<std::vector<uint64_t>> & /*p_keys */,
      const IndexQueryMeta & /* qmeta */, uint32_t /* count */,
      Context::Pointer & /* context */) const {
    return IndexError_NotImplemented;
  }

  //! Linear search by primary keys with dense and sparse inputs
  virtual int search_bf_by_p_keys_impl(
      const void * /*dense_query*/, const uint32_t /*sparse_count*/,
      const uint32_t * /*sparse_indices*/, const void * /*sparse_query*/,
      const std::vector<std::vector<uint64_t>> & /*p_keys*/,
      const IndexQueryMeta & /*qmeta*/, Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Linear search by primary keys with dense and sparse inputs
  virtual int search_bf_by_p_keys_impl(
      const void * /*dense_query*/, const uint32_t * /*sparse_count*/,
      const uint32_t * /*sparse_indices*/, const void * /*sparse_query*/,
      const std::vector<std::vector<uint64_t>> & /*p_keys*/,
      const IndexQueryMeta & /*qmeta*/, uint32_t /*count*/,
      Context::Pointer & /*context*/) const {
    return IndexError_NotImplemented;
  }

  //! Update the vector in index
  virtual int update_impl(uint64_t /*key*/, const void * /*query*/,
                          const IndexQueryMeta & /*qmeta*/,
                          Context::Pointer & /*context*/) {
    return IndexError_NotImplemented;
  }

  //! Delete the vector in index
  virtual int remove_impl(uint64_t /*key*/, Context::Pointer & /*context*/) {
    return IndexError_NotImplemented;
  }

  //! Optimize the index
  virtual int optimize_impl(IndexThreads::Pointer) {
    return IndexError_NotImplemented;
  }

  //! Delete the vector in index
  int remove(uint64_t key, Context::Pointer &context) {
    return this->remove_impl(key, context);
  }

  //! Optimize the index
  int optimize(IndexThreads::Pointer threads) {
    return this->optimize_impl(threads);
  }

  //! Train the data
  virtual int train(IndexHolder::Pointer holder) {
    return this->train(nullptr, std::move(holder));
  }

  //! Train the data
  virtual int train(IndexThreads::Pointer /*threads*/,
                    IndexHolder::Pointer /*holder*/) {
    return IndexError_NotImplemented;
  }

  //! Train the data
  virtual int train(const IndexTrainer::Pointer & /*trainer*/) {
    return IndexError_NotImplemented;
  }

  //! Train the data
  virtual int train(IndexSparseHolder::Pointer holder) {
    return this->train(nullptr, std::move(holder));
  }

  //! Train the data
  virtual int train(IndexThreads::Pointer /*threads*/,
                    IndexSparseHolder::Pointer /*holder*/) {
    return IndexError_NotImplemented;
  }

  //! Build the index
  virtual int build(IndexHolder::Pointer holder) {
    return this->build(nullptr, std::move(holder));
  }

  //! Build the index
  virtual int build(IndexThreads::Pointer /*threads*/,
                    IndexHolder::Pointer /*holder*/) {
    return IndexError_NotImplemented;
  }

  //! Build the index
  virtual int build(IndexSparseHolder::Pointer holder) {
    return this->build(nullptr, std::move(holder));
  }

  //! Build the index
  virtual int build(IndexThreads::Pointer /*threads*/,
                    IndexSparseHolder::Pointer /*holder*/) {
    return IndexError_NotImplemented;
  }

  //! Build the index with indptr format
  virtual int build(size_t count, const uint64_t *keys,
                    const uint64_t *sparse_indptr,
                    const uint32_t *sparse_indices, const void *sparse_data) {
    return this->build(nullptr, count, keys, sparse_indptr, sparse_indices,
                       sparse_data);
  }

  virtual int build(const IndexQueryMeta &qmeta, size_t count,
                    const uint64_t *keys, const uint64_t *sparse_indptr,
                    const uint32_t *sparse_indices, const void *sparse_data) {
    return this->build(nullptr, qmeta, count, keys, sparse_indptr,
                       sparse_indices, sparse_data);
  }

  //! Build the index with indptr format
  virtual int build(IndexThreads::Pointer /*threads*/, size_t /*count*/,
                    const uint64_t * /*keys*/,
                    const uint64_t * /*sparse_indptr*/,
                    const uint32_t * /*sparse_indices*/,
                    const void * /*sparse_data*/) {
    return IndexError_NotImplemented;
  }

  //! Build the index with indptr format
  virtual int build(IndexThreads::Pointer /*threads*/,
                    const IndexQueryMeta & /*qmeta*/, size_t /*count*/,
                    const uint64_t * /*keys*/,
                    const uint64_t * /*sparse_indptr*/,
                    const uint32_t * /*sparse_indices*/,
                    const void * /*sparse_data*/) {
    return IndexError_NotImplemented;
  }

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer & /*dumper*/) {
    return IndexError_NotImplemented;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_searcher.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_metric.h>
#include <zvec/core/framework/index_module.h>
#include <zvec/core/framework/index_provider.h>
#include <zvec/core/framework/index_runner.h>
#include <zvec/core/framework/index_stats.h>

namespace zvec {
namespace core {

/*! Index Searcher
 */
class IndexSearcher : public IndexRunner {
 public:
  //! Index Searcher Pointer
  typedef std::shared_ptr<IndexSearcher> Pointer;

  //! Constructor
  IndexSearcher() = default;

  //! Destructor
  virtual ~IndexSearcher() = default;

  //! Initialize Searcher
  virtual int init(const ailego::Params & /*params*/) = 0;

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const = 0;

  //! Retrieve params of index
  virtual const ailego::Params &params(void) const = 0;

  virtual int load(IndexStorage::Pointer /*container*/,
                   IndexMetric::Pointer /*metric*/) {
    return IndexError_NotImplemented;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_segment_storage.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_module.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_unpacker.h>

namespace zvec {
namespace core {

/*! Index Segment Container
 */
class IndexSegmentStorage : public IndexStorage {
 public:
  //! Index Segment Container Pointer
  typedef std::shared_ptr<IndexSegmentStorage> Pointer;

  /*! Index Container Segment
   */
  class Segment : public IndexStorage::Segment,
                  public std::enable_shared_from_this<Segment> {
   public:
    //! Index Container Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Constructor
    Segment(const Segment &rhs) = delete;

    //! Constructor
    Segment(const IndexStorage::Segment::Pointer &parent,
            const IndexUnpacker::SegmentMeta &segment)
        : data_offset_(segment.data_offset()),
          data_size_(segment.data_size()),
          padding_size_(segment.padding_size()),
          region_size_(segment.data_size() + segment.padding_size()),
          data_crc_(segment.data_crc()),
          parent_(parent->clone()) {}

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    size_t data_size(void) const override {
      return data_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const override {
      return data_crc_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const override {
      return padding_size_;
    }

    size_t capacity(void) const override {
      return region_size_;
    }

    //! Fetch data from segment (with own buffer)
    size_t fetch(size_t offset, void *buf, size_t len) const override {
      return parent_->fetch(data_offset_ + offset, buf, len);
    }

    //! Read data from segment
    size_t read(size_t offset, const void **data, size_t len) override {
      return parent_->read(data_offset_ + offset, data, len);
    }

    size_t read(size_t offset, MemoryBlock &data, size_t len) override {
      const void **data_ptr = nullptr;
      size_t ret = parent_->read(data_offset_ + offset, data_ptr, len);
      data.reset((void *)*data_ptr);
      return ret;
    }

    //! Read data from segment
    bool read(SegmentData *iovec, size_t count) override {
      for (SegmentData *it = iovec, *end = iovec + count; it != end; ++it) {
        it->offset += data_offset_;
      }
      bool success = parent_->read(iovec, count);
      for (SegmentData *it = iovec, *end = iovec + count; it != end; ++it) {
        it->offset -= data_offset_;
      }
      return success;
    }

    size_t write(size_t, const void *, size_t) override {
      return IndexError_NotImplemented;
    }

    size_t resize(size_t) override {
      return IndexError_NotImplemented;
    }

    void update_data_crc(uint32_t) override {
      return;
    }

    //! Clone the segment
    IndexStorage::Segment::Pointer clone(void) override {
      return shared_from_this();
    }

   private:
    size_t data_offset_{0u};
    size_t data_size_{0u};
    size_t padding_size_{0u};
    size_t region_size_{0u};
    uint32_t data_crc_{0u};
    IndexStorage::Segment::Pointer parent_{nullptr};
  };

  //! Constructor
  IndexSegmentStorage(IndexStorage::Segment::Pointer &&seg)
      : parent_(std::move(seg)) {}

  //! Constructor
  IndexSegmentStorage(const IndexStorage::Segment::Pointer &seg)
      : parent_(seg) {}

  //! Destructor
  virtual ~IndexSegmentStorage(void) {}

  //! Initialize container
  int init(const ailego::Params &) override {
    return 0;
  }

  //! Cleanup container
  int cleanup(void) override {
    return 0;
  }

  //! Load the current segment, ignore path
  int open(const std::string &, bool) override {
    if (!parent_) {
      LOG_ERROR("Failed to load an empty segment");
      return IndexError_NoReady;
    }

    auto read_data = [this](size_t offset, const void **data, size_t len) {
      return this->parent_->read(offset, data, len);
    };

    IndexUnpacker unpacker;
    if (!unpacker.unpack(read_data, parent_->data_size(), false)) {
      LOG_ERROR("Failed to unpack segment data");
      return IndexError_UnpackIndex;
    }
    segments_ = std::move(*unpacker.mutable_segments());
    magic_ = unpacker.magic();
    return 0;
  }

  //! Retrieve a segment by id
  IndexStorage::Segment::Pointer get(const std::string &id, int) override {
    if (!parent_) {
      return IndexStorage::Segment::Pointer();
    }
    auto it = segments_.find(id);
    if (it == segments_.end()) {
      return IndexStorage::Segment::Pointer();
    }
    return std::make_shared<IndexSegmentStorage::Segment>(parent_, it->second);
  }

  //! Test if it a segment exists
  bool has(const std::string &id) const override {
    return (segments_.find(id) != segments_.end());
  }

  //! Retrieve all segments
  std::map<std::string, IndexStorage::Segment::Pointer> get_all(
      void) const override {
    std::map<std::string, IndexStorage::Segment::Pointer> result;
    if (parent_) {
      for (const auto &it : segments_) {
        result.emplace(it.first, std::make_shared<IndexSegmentStorage::Segment>(
                                     parent_, it.second));
      }
    }
    return result;
  }

  //! Unload all indexes
  int close(void) override {
    parent_ = nullptr;
    segments_.clear();
    return 0;
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const override {
    return magic_;
  }

  int flush(void) override {
    return IndexError_NotImplemented;
  }

  int append(const std::string & /*id*/, size_t /*size*/) override {
    return IndexError_NotImplemented;
  }

  void refresh(uint64_t) override {
    return;
  }

  uint64_t check_point(void) const override {
    return 0;
  }

 private:
  uint32_t magic_{0};
  std::map<std::string, IndexUnpacker::SegmentMeta> segments_{};
  IndexStorage::Segment::Pointer parent_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_stats.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/container/params.h>

namespace zvec {
namespace core {

/*! Index Stats
 */
class IndexStats {
 public:
  //! Test if the element is exist
  bool has_attribute(const std::string &key) const {
    return attributes_.has(key);
  }

  //! Set the value of key in T
  template <typename T>
  bool set_attribute(const std::string &key, T &&val) {
    return attributes_.set<T>(key, std::forward<T>(val));
  }

  //! Retrieve attribute with key
  template <typename T>
  bool get_attribute(const std::string &key, T *out) const {
    return attributes_.get<T>(key, out);
  }

  //! Erase the pair via a key
  bool erase_attribute(const std::string &key) {
    return attributes_.erase(key);
  }

  //! Clear the attributes
  void clear_attributes(void) {
    attributes_.clear();
  }

  //! Retrieve attributes
  const ailego::Params &attributes(void) const {
    return attributes_;
  }

  //! Retrieve mutable attributes
  ailego::Params *mutable_attributes(void) {
    return &attributes_;
  }

 private:
  //! Members
  ailego::Params attributes_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_storage.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/buffer/buffer_pool.h>
#include <zvec/ailego/container/params.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_module.h>

namespace zvec {
namespace core {

/*! Index Storage
 */
class IndexStorage : public IndexModule {
 public:
  //! Index Storage Pointer
  typedef std::shared_ptr<IndexStorage> Pointer;

  struct MemoryBlock {
    enum MemoryBlockType {
      MBT_UNKNOWN = 0,
      MBT_MMAP = 1,
      MBT_BUFFERPOOL = 2,
    };

    MemoryBlock() {}
    MemoryBlock(ailego::VecBufferPoolHandle *buffer_pool_handle,
                size_t block_id, void *data)
        : type_(MemoryBlockType::MBT_BUFFERPOOL) {
      buffer_pool_handle_ = buffer_pool_handle;
      buffer_block_id_ = block_id;
      data_ = data;
    }
    MemoryBlock(void *data) : type_(MemoryBlockType::MBT_MMAP), data_(data) {}

    MemoryBlock(const MemoryBlock &rhs) {
      switch (rhs.type_) {
        case MemoryBlockType::MBT_MMAP:
          this->reset(rhs.data_);
          break;
        case MemoryBlockType::MBT_BUFFERPOOL:
          this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_, rhs.data_);
          buffer_pool_handle_->acquire_one(buffer_block_id_);
          break;
        default:
          break;
      }
    }

    MemoryBlock(MemoryBlock &&rhs) {
      switch (rhs.type_) {
        case MemoryBlockType::MBT_MMAP:
          this->reset(std::move(rhs.data_));
          break;
        case MemoryBlockType::MBT_BUFFERPOOL:
          this->reset(std::move(rhs.buffer_pool_handle_),
                      std::move(rhs.buffer_block_id_), std::move(rhs.data_));
          rhs.buffer_pool_handle_ = nullptr;
          rhs.type_ = MemoryBlockType::MBT_UNKNOWN;
          break;
        default:
          break;
      }
    }

    MemoryBlock &operator=(const MemoryBlock &rhs) {
      if (this != &rhs) {
        switch (rhs.type_) {
          case MemoryBlockType::MBT_MMAP:
            this->reset(rhs.data_);
            break;
          case MemoryBlockType::MBT_BUFFERPOOL:
            this->reset(rhs.buffer_pool_handle_, rhs.buffer_block_id_,
                        rhs.data_);
            buffer_pool_handle_->acquire_one(buffer_block_id_);
            break;
          default:
            break;
        }
      }
      return *this;
    }

    MemoryBlock &operator=(MemoryBlock &&rhs) {
      if (this != &rhs) {
        switch (rhs.type_) {
          case MemoryBlockType::MBT_MMAP:
            this->reset(std::move(rhs.data_));
            break;
          case MemoryBlockType::MBT_BUFFERPOOL:
            this->reset(std::move(rhs.buffer_pool_handle_),
                        std::move(rhs.buffer_block_id_), std::move(rhs.data_));
            rhs.buffer_pool_handle_ = nullptr;
            rhs.type_ = MemoryBlockType::MBT_UNKNOWN;
            break;
          default:
            break;
        }
      }
      return *this;
    }

    ~MemoryBlock() {
      switch (type_) {
        case MemoryBlockType::MBT_MMAP:
          break;
        case MemoryBlockType::MBT_BUFFERPOOL:
          if (buffer_pool_handle_) {
            buffer_pool_handle_->release_one(buffer_block_id_);
          }
          break;
        default:
          break;
      }
      data_ = nullptr;
    }

    const void *data() const {
      return data_;
    }

    void reset(ailego::VecBufferPoolHandle *buffer_pool_handle, size_t block_id,
               void *data) {
      if (type_ == MemoryBlockType::MBT_BUFFERPOOL) {
        buffer_pool_handle_->release_one(buffer_block_id_);
      }
      type_ = MemoryBlockType::MBT_BUFFERPOOL;
      buffer_pool_handle_ = buffer_pool_handle;
      buffer_block_id_ = block_id;
      data_ = data;
    }

    void reset(void *data) {
      if (type_ == MemoryBlockType::MBT_BUFFERPOOL) {
        buffer_pool_handle_->release_one(buffer_block_id_);
        buffer_pool_handle_ = nullptr;
      }
      type_ = MemoryBlockType::MBT_MMAP;
      data_ = data;
    }

    MemoryBlockType type_{MBT_UNKNOWN};
    void *data_{nullptr};
    mutable ailego::VecBufferPoolHandle *buffer_pool_handle_{nullptr};
    size_t buffer_block_id_{0};
  };

  struct SegmentData {
    //! Constructor
    SegmentData(void) : offset(0u), length(0u), data(nullptr) {}

    //! Constructor
    SegmentData(size_t off, size_t len)
        : offset(off), length(len), data(nullptr) {}

    //! Members
    size_t offset;
    size_t length;
    const void *data;
  };

  /*! Index Storage Segment
   */
  struct Segment {
    //! Index Storage Pointer
    typedef std::shared_ptr<Segment> Pointer;

    //! Destructor
    virtual ~Segment(void) {}

    //! Retrieve size of data
    virtual size_t data_size(void) const = 0;

    //! Retrieve crc of data
    virtual uint32_t data_crc(void) const = 0;

    //! Retrieve size of padding
    virtual size_t padding_size(void) const = 0;

    //! Retrieve capacity of segment
    virtual size_t capacity(void) const = 0;

    //! Fetch data from segment (with own buffer)
    virtual size_t fetch(size_t offset, void *buf, size_t len) const = 0;

    //! Read data from segment
    virtual size_t read(size_t offset, const void **data, size_t len) = 0;

    virtual size_t read(size_t offset, MemoryBlock &data, size_t len) = 0;

    virtual bool read(SegmentData *, size_t) {
      return false;
    }

    //! Write data into the storage with offset
    virtual size_t write(size_t offset, const void *data, size_t len) = 0;

    //! Resize size of data
    virtual size_t resize(size_t size) = 0;

    //! Update crc of data
    virtual void update_data_crc(uint32_t crc) = 0;

    //! Clone the segment
    virtual Pointer clone(void) = 0;
  };

  //! Destructor
  virtual ~IndexStorage(void) {}

  //! Initialize storage
  virtual int init(const ailego::Params &params) = 0;

  //! Cleanup storage
  virtual int cleanup(void) = 0;

  //! Open storage
  virtual int open(const std::string &path, bool create) = 0;

  //! Flush storage
  virtual int flush(void) = 0;

  //! Close storage
  virtual int close(void) = 0;

  //! Append a segment into storage
  virtual int append(const std::string &id, size_t size) = 0;

  //! Refresh meta information (checksum, update time, etc.)
  virtual void refresh(uint64_t check_point) = 0;

  //! Retrieve check point of storage
  virtual uint64_t check_point(void) const = 0;

  //! Retrieve a segment by id
  virtual Segment::Pointer get(const std::string &id, int level = -1) = 0;

  virtual std::map<std::string, Segment::Pointer> get_all(void) const {
    // LOG_ERROR("get_all() Not Implemented");
    std::map<std::string, Segment::Pointer> result;
    return result;
  }

  //! Test if it a segment exists
  virtual bool has(const std::string &id) const = 0;

  //! Retrieve magic number of index
  virtual uint32_t magic(void) const = 0;

  //! huge page
  virtual bool isHugePage(void) const {
    return false;
  }
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_streamer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_helper.h>
#include <zvec/core/framework/index_provider.h>
#include <zvec/core/framework/index_runner.h>
#include <zvec/core/framework/index_stats.h>

namespace zvec {
namespace core {

/*! Index Streamer
 */
class IndexStreamer : public IndexRunner {
 public:
  //! Index Streamer Pointer
  typedef std::shared_ptr<IndexStreamer> Pointer;

  //! Destructor
  virtual ~IndexStreamer(void) = default;

  //! Initialize the builder
  virtual int init(const IndexMeta & /*meta*/,
                   const ailego::Params & /*params*/) {
    return IndexError_NotImplemented;
  }

  //! Open a index from storage
  virtual int open(IndexStorage::Pointer stg) = 0;

  //! Flush index
  virtual int flush(uint64_t check_point) = 0;

  //! Close index
  virtual int close(void) = 0;

  //! Retrieve meta of index
  virtual const IndexMeta &meta(void) const = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_threads.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <thread>
#include <utility>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/pattern/closure.h>

namespace zvec {
namespace core {

/*! Index Threads
 *  Index ThreadPool maintains multiple threads resources to execute the tasks
 *  concurrently
 */
class IndexThreads {
 public:
  using Pointer = std::shared_ptr<IndexThreads>;

  /*! Threads Task Group
   *  Manage of a group of sub-tasks which can be seen as a big task,
   *  so we can wait all sub-tasks finished, or get the status of them
   */
  class TaskGroup {
   public:
    using Pointer = std::shared_ptr<TaskGroup>;

    //! Destructor
    virtual ~TaskGroup(void) {}

    //! Submit a task to be executed asynchronous
    virtual void submit(ailego::ClosureHandler &&task) = 0;

    //! Check if the group is finished
    virtual bool is_finished(void) const = 0;

    //! Wait until all tasks in group finished
    virtual void wait_finish(void) = 0;
  };

  //! Destructor
  virtual ~IndexThreads(void) {}

  //! Retrieve thread count in pool
  virtual size_t count(void) const = 0;

  //! Stop all threads
  virtual void stop(void) = 0;

  //! Submit a task to be executed asynchronous
  virtual void submit(ailego::ClosureHandler &&task) = 0;

  //! Make a task group
  virtual TaskGroup::Pointer make_group(void) = 0;

  //! Get the current work thread index
  virtual int indexof_this(void) const = 0;
};

/*! Single Queue Index Threads
 */
class SingleQueueIndexThreads : public IndexThreads {
 public:
  /*! Single Queue Index Threads Task Group
   */
  class SingleQueueTaskGroup : public TaskGroup {
   public:
    using Pointer = std::shared_ptr<SingleQueueTaskGroup>;

    //! Constructor
    explicit SingleQueueTaskGroup(
        ailego::ThreadPool::TaskGroup::Pointer task_group)
        : task_group_(std::move(task_group)) {}

    //! Submit a task to be executed asynchronous
    void submit(ailego::ClosureHandler &&task) override {
      while (task_group_->pending_count() >= kMaxQueueSize) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
      }
      task_group_->enqueue_and_wake(std::move(task));
    }

    //! Check if the group is finished
    bool is_finished(void) const override {
      return task_group_->is_finished();
    }

    //! Wait until all tasks in group finished
    void wait_finish(void) override {
      return task_group_->wait_finish();
    }

   private:
    //! Members
    ailego::ThreadPool::TaskGroup::Pointer task_group_{};
  };

  //! Constructor
  SingleQueueIndexThreads(uint32_t size, bool binding)
      : pool_(
            size > 0 ? size : std::max(std::thread::hardware_concurrency(), 1u),
            binding) {}

  //! Constructor
  explicit SingleQueueIndexThreads(bool binding)
      : SingleQueueIndexThreads(0, binding) {}

  //! Constructor
  SingleQueueIndexThreads(void) : SingleQueueIndexThreads{false} {}

  //! Destructor
  virtual ~SingleQueueIndexThreads(void) {}

  //! Retrieve thread count in pool
  size_t count(void) const override {
    return pool_.count();
  }

  //! Stop all threads
  void stop(void) override {
    pool_.stop();
  }

  //! Submit a task to be executed asynchronous
  void submit(ailego::ClosureHandler &&task) override {
    while (pool_.pending_count() >= kMaxQueueSize) {
      std::this_thread::sleep_for(std::chrono::milliseconds(1));
    }
    pool_.enqueue_and_wake(std::move(task));
  }

  //! Make a task group
  TaskGroup::Pointer make_group(void) override {
    return std::make_shared<SingleQueueTaskGroup>(pool_.make_group());
  }

  //! Get the current work thread index
  int indexof_this(void) const override {
    return pool_.indexof_this();
  }

 private:
  static constexpr size_t kMaxQueueSize = 4096u;

  //! Disable them
  SingleQueueIndexThreads(const SingleQueueIndexThreads &) = delete;
  SingleQueueIndexThreads(SingleQueueIndexThreads &&) = delete;
  SingleQueueIndexThreads &operator=(const SingleQueueIndexThreads &) = delete;

  //! Members
  ailego::ThreadPool pool_{};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_trainer.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/core/framework/index_bundle.h>
#include <zvec/core/framework/index_dumper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_stats.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/framework/index_threads.h>

namespace zvec {
namespace core {

/*! Index Trainer
 */
class IndexTrainer : public IndexModule {
 public:
  //! Index Trainer Pointer
  typedef std::shared_ptr<IndexTrainer> Pointer;

  /*! Index Trainer Stats
   */
  class Stats : public IndexStats {
   public:
    //! Set count of documents trained
    void set_trained_count(size_t count) {
      trained_count_ = count;
    }

    //! Set count of documents discarded
    void set_discarded_count(size_t count) {
      discarded_count_ = count;
    }

    //! Set time cost of documents trained
    void set_trained_costtime(uint64_t cost) {
      trained_costtime_ = cost;
    }

    //! Retrieve count of documents trained
    size_t trained_count(void) const {
      return trained_count_;
    }

    //! Retrieve count of documents discarded
    size_t discarded_count(void) const {
      return discarded_count_;
    }

    //! Retrieve time cost of documents trained
    uint64_t trained_costtime(void) const {
      return trained_costtime_;
    }

   private:
    //! Members
    size_t trained_count_{0u};
    size_t discarded_count_{0u};
    uint64_t trained_costtime_{0u};
  };

  //! Destructor
  virtual ~IndexTrainer(void) {}

  //! Initialize Trainer
  virtual int init(const IndexMeta &meta, const ailego::Params &params) = 0;

  //! Cleanup Trainer
  virtual int cleanup(void) = 0;

  //! Train the data
  virtual int train(IndexHolder::Pointer holder) {
    return this->train(nullptr, holder);
  }

  //! Train the data
  virtual int train(IndexThreads::Pointer threads,
                    IndexHolder::Pointer holder) = 0;

  //! Load index from container
  virtual int load(IndexStorage::Pointer cntr) = 0;

  //! Dump index into storage
  virtual int dump(const IndexDumper::Pointer &dumper) = 0;

  //! Retrieve Index Meta
  virtual const IndexMeta &meta(void) const = 0;

  //! Retrieve statistics
  virtual const Stats &stats(void) const = 0;

  //! Retrieve the output indexes
  virtual IndexBundle::Pointer indexes(void) const = 0;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_unpacker.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <map>
#include <zvec/ailego/utility/type_helper.h>
#include <zvec/core/framework/index_error.h>
#include <zvec/core/framework/index_format.h>
#include <zvec/core/framework/index_logger.h>

namespace zvec {
namespace core {

/*! Index Unpacker
 */
class IndexUnpacker {
 public:
  /*! Index Unpacker Segment Meta
   */
  class SegmentMeta {
   public:
    //! Constructor
    SegmentMeta(size_t offset, size_t dsz, size_t psz, uint32_t crc)
        : data_offset_(offset),
          data_size_(dsz),
          padding_size_(psz),
          data_crc_(crc) {}

    //! Retrieve offset of data
    size_t data_offset(void) const {
      return data_offset_;
    }

    //! Retrieve size of data
    size_t data_size(void) const {
      return data_size_;
    }

    //! Retrieve crc of data
    uint32_t data_crc(void) const {
      return data_crc_;
    }

    //! Retrieve size of padding
    size_t padding_size(void) const {
      return padding_size_;
    }

   private:
    size_t data_offset_{0};
    size_t data_size_{0};
    size_t padding_size_{0};
    uint32_t data_crc_{0};
  };

  //! Reset the unpacker
  void reset(void) {
    segments_.clear();
  }

  //! Retrieve segments of index package
  const std::map<std::string, SegmentMeta> &segments(void) const {
    return segments_;
  }

  //! Retrieve magic number of index
  uint32_t magic(void) const {
    return header_.magic;
  }

  //! Retrieve header of index package
  const IndexFormat::MetaHeader &header(void) const {
    return header_;
  }

  //! Retrieve footer of index package
  const IndexFormat::MetaFooter &footer(void) const {
    return footer_;
  }

  //! Retrieve version information
  const std::string &version(void) const {
    return version_;
  }

  //! Retrieve mutable segments of index package
  std::map<std::string, SegmentMeta> *mutable_segments(void) {
    return &segments_;
  }

  //! Unpack index data
  template <typename TFunc>
  bool unpack(TFunc read_data, size_t total, bool checksum) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");

    while (true) {
      if (!this->unpack_header(read_data)) {
        LOG_ERROR("Failed to unpack index header");
        return false;
      }
      if (!this->unpack_footer(read_data, total)) {
        LOG_ERROR("Failed to unpack index footer");
        return false;
      }
      if (!this->unpack_segments(read_data, total)) {
        LOG_ERROR("Failed to unpack index segments' meta");
        return false;
      }
      if (checksum && !this->validate_checksum(read_data)) {
        LOG_ERROR("Failed to validate checksum of index content");
        return false;
      }
      if (footer_.next_meta_header_offset == 0) {
        break;
      }
      current_header_start_offset_ = footer_.next_meta_header_offset;
    }

    if (!this->unpack_version(read_data)) {
      LOG_ERROR("Failed to unpack index version");
      return false;
    }
    return true;
  }

  //! Unpack index header
  template <typename TFunc>
  bool unpack_header(TFunc read_data) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");
    const void *data = nullptr;
    if (read_data(current_header_start_offset_, &data, sizeof(header_)) !=
        sizeof(header_)) {
      return false;
    }

    memcpy(&header_, data, sizeof(header_));
    if (header_.meta_header_size != sizeof(header_)) {
      return false;
    }
    if (ailego::Crc32c::Hash(&header_, sizeof(header_), header_.header_crc) !=
        header_.header_crc) {
      return false;
    }
    return true;
  }

  //! Unpack index footer
  template <typename TFunc>
  bool unpack_footer(TFunc read_data, size_t total) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");
    if (header_.meta_footer_size != sizeof(footer_)) {
      return false;
    }

    size_t footer_offset = ((int32_t)header_.meta_footer_offset < 0)
                               ? total + (int32_t)header_.meta_footer_offset
                               : header_.meta_footer_offset;
    if (footer_offset + sizeof(footer_) > total) {
      return false;
    }

    const void *data = nullptr;
    if (read_data(current_header_start_offset_ + footer_offset, &data,
                  sizeof(footer_)) != sizeof(footer_)) {
      return false;
    }

    memcpy(&footer_, data, sizeof(footer_));
    if (footer_.content_size + footer_.content_padding_size +
            header_.content_offset >
        footer_.total_size) {
      return false;
    }
    if (ailego::Crc32c::Hash(&footer_, sizeof(footer_), footer_.footer_crc) !=
        footer_.footer_crc) {
      return false;
    }
    return true;
  }

  //! Unpack segments' meta
  template <typename TFunc>
  bool unpack_segments(TFunc read_data, size_t total) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");
    if (sizeof(IndexFormat::SegmentMeta) * footer_.segment_count >
        footer_.segments_meta_size) {
      return false;
    }
    size_t offset = ((int32_t)header_.meta_footer_offset < 0)
                        ? total + (int32_t)header_.meta_footer_offset
                        : header_.meta_footer_offset;
    if (offset < footer_.segments_meta_size || offset > total) {
      return false;
    }
    offset -= footer_.segments_meta_size;

    const void *data = nullptr;
    if (read_data(current_header_start_offset_ + offset, &data,
                  footer_.segments_meta_size) != footer_.segments_meta_size) {
      return false;
    }
    if (ailego::Crc32c::Hash(data, footer_.segments_meta_size, 0u) !=
        footer_.segments_meta_crc) {
      return false;
    }

    IndexFormat::SegmentMeta *seg = (IndexFormat::SegmentMeta *)data;
    for (size_t i = 0; i < footer_.segment_count; ++i, ++seg) {
      if (seg->segment_id_offset > footer_.segments_meta_size) {
        return false;
      }
      if (seg->data_index > footer_.content_size) {
        return false;
      }
      if (seg->data_index + seg->data_size > footer_.content_size) {
        return false;
      }
      segments_.emplace(
          std::string(reinterpret_cast<const char *>(data) +
                      seg->segment_id_offset),
          SegmentMeta(seg->data_index + header_.content_offset +
                          current_header_start_offset_,
                      seg->data_size, seg->padding_size, seg->data_crc));
    }
    return true;
  }

  //! Unpack index version
  template <typename TFunc>
  bool unpack_version(TFunc read_data) {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");

    auto it = segments_.find("IndexVersion");
    if (it == segments_.end()) {
      return false;
    }

    const SegmentMeta &segment = it->second;
    const void *data = nullptr;

    if (read_data(0 + segment.data_offset(), &data, segment.data_size()) !=
        segment.data_size()) {
      return false;
    }
    if (segment.data_crc() != 0u &&
        ailego::Crc32c::Hash(data, segment.data_size(), 0u) !=
            segment.data_crc()) {
      return false;
    }
    version_.assign(reinterpret_cast<const char *>(data), segment.data_size());
    return true;
  }

  //! Validate checksum of content
  template <typename TFunc>
  bool validate_checksum(TFunc read_data) const {
    static_assert(ailego::IsInvocableWithResult<size_t, TFunc, size_t,
                                                const void **, size_t>::value,
                  "Invocable function type");
    if (footer_.content_crc == 0) {
      return true;
    }
    const size_t block_size = 4096u;
    const void *data = nullptr;
    uint32_t checksum = 0u;
    size_t total = footer_.content_size;
    size_t offset = sizeof(header_);

    while (total >= block_size) {
      if (read_data(current_header_start_offset_ + offset, &data, block_size) !=
          block_size) {
        return false;
      }
      checksum = ailego::Crc32c::Hash(data, block_size, checksum);
      total -= block_size;
      offset += block_size;
    }
    if (read_data(current_header_start_offset_ + offset, &data, total) !=
        total) {
      return false;
    }
    checksum = ailego::Crc32c::Hash(data, total, checksum);
    return (checksum == footer_.content_crc);
  }

 private:
  IndexFormat::MetaHeader header_{};
  IndexFormat::MetaFooter footer_{};
  std::string version_{};
  std::map<std::string, SegmentMeta> segments_{};
  uint64_t current_header_start_offset_{0u};
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/framework/index_version.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace zvec {
namespace core {

/*! Index Version
 */
struct IndexVersion {
  //! Retrieve the version number in string
  static const char *String(void);

  //! Retrieve the detailed version information
  static const char *Details(void);
};

}  // namespace core
}  // namespace zvec


================================================
FILE: src/include/zvec/core/interface/constants.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>

namespace zvec::core_interface {

constexpr static uint32_t kDefaultHnswEfConstruction = 500;
constexpr static uint32_t kDefaultHnswNeighborCnt = 50;

constexpr static uint32_t kDefaultHnswEfSearch = 300;

constexpr const uint32_t kDefaultRabitqTotalBits = 7;
constexpr const uint32_t kDefaultRabitqNumClusters = 16;


}  // namespace zvec::core_interface

================================================
FILE: src/include/zvec/core/interface/index.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <variant>
#include <vector>
#include <zvec/core/framework/index_context.h>
#include <zvec/core/framework/index_converter.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_filter.h>
#include <zvec/core/framework/index_holder.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/framework/index_metric.h>
#include <zvec/core/framework/index_reducer.h>
#include <zvec/core/framework/index_reformer.h>
#include <zvec/core/framework/index_searcher.h>
#include <zvec/core/framework/index_storage.h>
#include <zvec/core/interface/index_param.h>
#include "zvec/core/framework/index_provider.h"

namespace zvec::core_interface {

class IndexFactory;

struct DenseVector {
  const void *data;
  // core::IndexQueryMeta meta;
  // DenseVector(void *data) : data(data) {
  //   meta.set_meta_type(core::IndexMeta::MetaType::MT_DENSE);
  // };
};

struct SparseVector {
  uint32_t count;
  const void *indices;
  const void *values;

  const uint32_t *get_indices() const {
    return reinterpret_cast<const uint32_t *>(indices);
  }

  template <typename T = void>
  const T *get_values() const {
    return reinterpret_cast<const T *>(values);
  }
};

struct VectorData {
  std::variant<DenseVector, SparseVector> vector;

  // DenseVector dense_vector;
  // SparseVector sparse_vector;
};

// Used to pass mutable vectors
struct DenseVectorBuffer {
  std::string data;  // use string to manage memory
};

struct SparseVectorBuffer {
  uint32_t count;
  std::string indices;
  std::string values;

  uint32_t *get_indices() {
    return reinterpret_cast<uint32_t *>(indices.data());
  }

  template <typename T = void>
  T *get_values() {
    return reinterpret_cast<T *>(values.data());
  }
};

struct VectorDataBuffer {
  std::variant<DenseVectorBuffer, SparseVectorBuffer> vector_buffer;
};


struct SearchResult {
  core::IndexDocumentList doc_list_;
  // use string to manage memory
  std::vector<std::string> reverted_vector_list_{};
  std::vector<std::string> reverted_sparse_values_list_{};
};

class Index {
 public:
  typedef std::shared_ptr<Index> Pointer;
  virtual ~Index() = default;

  // static Index::Pointer Create(const BaseIndexParam &param); //IndexFactory
  virtual int Open(const std::string &file_path,
                   StorageOptions storage_options);
  int Close();
  int Flush();
  // virtual int Serialize(const std::string &file_path);
  // virtual int Deserialize(const std::string &file_path);

  // // TODO: use holder
  // virtual int Build() = 0;
  virtual int Train() {
    is_trained_ = true;
    return 0;
  }

  // virtual int Dump(const std::string &file_path) = 0;
  virtual int Merge(const std::vector<Index::Pointer> &indexes,
                    const IndexFilter &filter,
                    const MergeOptions &options = {});
  // TODO: static reduce

  virtual int Add(const VectorData &vector, uint32_t doc_id);
  virtual int Fetch(const uint32_t doc_id,
                    VectorDataBuffer *vector_data_buffer);
  virtual int Search(const VectorData &query,
                     const BaseIndexQueryParam::Pointer &search_param,
                     SearchResult *result);

  virtual BaseIndexParam::Pointer GetParam() const {
    return std::make_shared<BaseIndexParam>(param_);
  }

  virtual bool IsTrained() const {
    return is_trained_;
  }

  uint32_t GetDocCount() const {
    if (streamer_ == nullptr) {
      return -1;
    }
    if (is_sparse_) {
      return streamer_->create_sparse_provider()->count();
    } else {
      return streamer_->create_provider()->count();
    }
  }

  core::IndexStreamer::Pointer index_searcher() {
    return streamer_;
  }

  core::IndexProvider::Pointer create_index_provider() const {
    return streamer_->create_provider();
  }

  static std::string get_metric_name(MetricType metric_type, bool is_sparse);

 protected:
  int _sparse_fetch(const uint32_t doc_id,
                    VectorDataBuffer *vector_data_buffer);
  virtual int _dense_fetch(const uint32_t doc_id,
                           VectorDataBuffer *vector_data_buffer);

  int _sparse_add(const VectorData &vector, const uint32_t doc_id,
                  core::IndexContext::Pointer &context);
  int _dense_add(const VectorData &vector, const uint32_t doc_id,
                 core::IndexContext::Pointer &context);
  int _sparse_search(const VectorData &query,
                     const BaseIndexQueryParam::Pointer &search_param,
                     SearchResult *result,
                     core::IndexContext::Pointer &context);
  int _dense_search(const VectorData &query,
                    const BaseIndexQueryParam::Pointer &search_param,
                    SearchResult *result, core::IndexContext::Pointer &context);
  virtual int _prepare_for_search(
      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
      core::IndexContext::Pointer &context) = 0;
  virtual int _get_coarse_search_topk(
      const BaseIndexQueryParam::Pointer &search_param);

 protected:
  friend class IndexFactory;
  Index() = default;
  int Init(const BaseIndexParam &param);


 protected:
  int ParseMetricName(const BaseIndexParam &param);
  int CreateAndInitMetric(const BaseIndexParam &param);
  int CreateAndInitConverterReformer(const QuantizerParam &param,
                                     const BaseIndexParam &index_param);
  virtual int CreateAndInitStreamer(const BaseIndexParam &param) = 0;

 protected:
  bool init_context();
  core::IndexContext::Pointer &acquire_context();
  void release_context() {
    // context_list_[get_context_index()]->reset();
  }

 protected:
  bool is_trained_{false};

  BaseIndexParam param_;
  ailego::Params proxima_index_params_{};
  core::IndexMeta proxima_index_meta_{};  // IndexQueryMeta + other index config
  core::IndexQueryMeta input_vector_meta_;     // input
  core::IndexQueryMeta streamer_vector_meta_;  // after reformer.convert()

  core::IndexBuilder::Pointer builder_{};
  core::IndexStreamer::Pointer streamer_{};
  core::IndexReformer::Pointer reformer_{};
  core::IndexConverter::Pointer converter_{};  // for build()
  core::IndexMetric::Pointer metric_{};        // to do normalization

  size_t context_index_;
  core::IndexStorage::Pointer storage_{};

  bool is_open_{false};
  bool is_sparse_{false};
  bool is_huge_page_{false};
  bool is_read_only_{false};
};


class FlatIndex : public Index {
 public:
  FlatIndex() = default;
  // FlatIndex(const FlatIndexParam &param) : param_(param) {}
  // FlatIndex(FlatIndexParam &&param) : param(std::move(param)) {}


 protected:
  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;

  virtual int _prepare_for_search(
      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
      core::IndexContext::Pointer &context) override;

 private:
  FlatIndexParam param_{};
};

class IVFIndex : public Index {
 public:
  IVFIndex() = default;

 protected:
  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;

  virtual int _prepare_for_search(
      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
      core::IndexContext::Pointer &context) override;

  virtual int Add(const VectorData &vector, uint32_t doc_id) override;

  virtual int Train() override;

  virtual int Open(const std::string &file_path,
                   StorageOptions storage_options) override;

  virtual int _dense_fetch(const uint32_t doc_id,
                           VectorDataBuffer *vector_data_buffer) override;
  virtual int Merge(const std::vector<Index::Pointer> &indexes,
                    const IndexFilter &filter,
                    const MergeOptions &options) override;
  int GenerateHolder();

 private:
  IVFIndexParam param_{};
  std::mutex mutex_{};
  std::vector<std::pair<uint64_t, std::string>> doc_cache_;
  core::IndexHolder::Pointer holder_{};
  std::string file_path_;
};


class HNSWIndex : public Index {
 public:
  HNSWIndex() = default;

 protected:
  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;

  virtual int _prepare_for_search(
      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
      core::IndexContext::Pointer &context) override;
  int _get_coarse_search_topk(
      const BaseIndexQueryParam::Pointer &search_param) override;


 private:
  HNSWIndexParam param_{};
};

class HNSWRabitqIndex : public Index {
 public:
  HNSWRabitqIndex() = default;

 protected:
  virtual int CreateAndInitStreamer(const BaseIndexParam &param) override;

  virtual int _prepare_for_search(
      const VectorData &query, const BaseIndexQueryParam::Pointer &search_param,
      core::IndexContext::Pointer &context) override;
  int _get_coarse_search_topk(
      const BaseIndexQueryParam::Pointer &search_param) override;

 private:
  HNSWRabitqIndexParam param_{};
};


}  // namespace zvec::core_interface


================================================
FILE: src/include/zvec/core/interface/index_factory.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <zvec/core/interface/index.h>
#include <zvec/core/interface/index_param.h>

namespace zvec::core_interface {

// 索引的工厂类
class IndexFactory {
 public:
  static Index::Pointer CreateAndInitIndex(const BaseIndexParam &param);

  static BaseIndexParam::Pointer DeserializeIndexParamFromJson(
      const std::string &json_str);


  static std::string QueryParamSerializeToJson(
      const BaseIndexQueryParam &param);


  template <
      typename QueryParamType,
      std::enable_if_t<std::is_base_of_v<BaseIndexQueryParam, QueryParamType>,
                       bool> = true>
  static std::string QueryParamSerializeToJson(const QueryParamType &param,
                                               bool omit_empty_value = false);

  template <
      typename QueryParamType,
      std::enable_if_t<std::is_base_of_v<BaseIndexQueryParam, QueryParamType>,
                       bool> = true>
  static typename QueryParamType::Pointer QueryParamDeserializeFromJson(
      const std::string &json_str);

  // register() -- Index class should have a `create` interface
};


}  // namespace zvec::core_interface

================================================
FILE: src/include/zvec/core/interface/index_param.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include <zvec/ailego/encoding/json.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/core/framework/index_filter.h>
#include <zvec/core/framework/index_meta.h>
#include <zvec/core/interface/constants.h>
#include "zvec/core/framework/index_framework.h"

namespace zvec::core_interface {
#define MAX_DIMENSION 65536
// #define MAX_EF_CONSTRUCTION 65536
// #define MAX_EF_SEARCH 100

class IndexFactory;
class Index;
class BaseIndexParam;
class BaseIndexQueryParam;

struct StorageOptions {
  enum class StorageType { kNone, kMMAP, kMemory, kBufferPool };

  StorageType type = StorageType::kNone;
  bool create_new = false;
  bool read_only = false;
};

struct MergeOptions {
  uint32_t write_concurrency = 1;
  ailego::ThreadPool *pool = nullptr;
};

using IndexMeta = core::IndexMeta;
using IndexQueryMeta = core::IndexQueryMeta;
using DataType = core::IndexMeta::DataType;
using IndexFilter = core::IndexFilter;


// 定义支持的索引类型
enum class IndexType {
  // to do: support factory's register, may change to
  // `static constexpr std::string_view`, which may incur str comp overhead
  kNone,
  kFlat,
  kIVF,  // it's actual a two-layer index
  kHNSW,
  kHNSWRabitq,
};

enum class IVFSearchMethod { kBF, kHNSW };

enum class MetricType {
  kNone,
  kL2sq,  // Euclidean
  kInnerProduct,
  kCosine,
  kMIPSL2sq  // spherical?
};

enum class QuantizerType {
  kNone,
  kPQ,        // Product Quantization
  kQuickADC,  // TODO: +refiner ? // should be a type of index?
  kAQ,
  kFP16,
  kInt8,
  kInt4,
  kRabitq,
};

struct SerializableBase {
  std::string SerializeToJson(bool omit_empty_value = false) const {
    return zvec::ailego::JsonValue(SerializeToJsonObject(omit_empty_value))
        .as_json_string()
        .as_stl_string();
  }

  bool DeserializeFromJson(const std::string &json_str) {
    ailego::JsonValue json_value;
    if (!json_value.parse(json_str)) {
      return false;
    }
    return DeserializeFromJsonObject(json_value.as_object());
  }

 protected:
  virtual ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const = 0;
  virtual bool DeserializeFromJsonObject(
      const ailego::JsonObject &json_obj) = 0;
};

// TODO: maybe a base class for quantizer?
struct QuantizerParam : public SerializableBase {
  QuantizerType type = QuantizerType::kNone;
  int num_subquantizers = 8;  // M
  int num_bits = 8;           // bits per subquantizer

  // Constructors
  // QuantizerParam() = default;
  QuantizerParam(QuantizerType t = QuantizerType::kNone, int subquantizers = 8,
                 int bits = 8)
      : type(t), num_subquantizers(subquantizers), num_bits(bits) {}


 protected:
  friend class BaseIndexParam;
  virtual ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const override;

  virtual bool DeserializeFromJsonObject(
      const ailego::JsonObject &json_obj) override;
};

// preprocessor
enum class PreprocessorType {
  kNone,
  kPCA,
  kOPQ,
};

struct PreprocessorParam {
  PreprocessorType type = PreprocessorType::kNone;

  // Constructors
  // PreprocessorParam() = default;
  explicit PreprocessorParam(PreprocessorType t = PreprocessorType::kNone)
      : type(t) {}
};

struct RefinerParam {
  using Pointer = std::shared_ptr<RefinerParam>;

  float scale_factor_{0};
  std::shared_ptr<Index> reference_index = nullptr;
};

// --- Query Parameters (can be passed to search methods) ---
class BaseIndexQueryParam {
 public:
  using Pointer = std::shared_ptr<BaseIndexQueryParam>;

  virtual ~BaseIndexQueryParam() = default;

  uint32_t topk = 10;
  bool fetch_vector = false;
  std::shared_ptr<IndexFilter> filter = nullptr;
  std::shared_ptr<std::vector<uint64_t>> bf_pks = nullptr;
  float radius = 0.0f;
  bool is_linear = false;
  RefinerParam::Pointer refiner_param = nullptr;

  virtual Pointer Clone() const = 0;
};

struct FlatQueryParam : public BaseIndexQueryParam {
  using Pointer = std::shared_ptr<FlatQueryParam>;

  BaseIndexQueryParam::Pointer Clone() const override {
    return std::make_shared<FlatQueryParam>(*this);
  }
};

struct HNSWQueryParam : public BaseIndexQueryParam {
  using Pointer = std::shared_ptr<HNSWQueryParam>;

  uint32_t ef_search = kDefaultHnswEfSearch;

  BaseIndexQueryParam::Pointer Clone() const override {
    return std::make_shared<HNSWQueryParam>(*this);
  }
};

struct HNSWRabitqQueryParam : public BaseIndexQueryParam {
  using Pointer = std::shared_ptr<HNSWRabitqQueryParam>;

  uint32_t ef_search = kDefaultHnswEfSearch;

  BaseIndexQueryParam::Pointer Clone() const override {
    return std::make_shared<HNSWRabitqQueryParam>(*this);
  }
};

struct IVFQueryParam : public BaseIndexQueryParam {
  int nprobe = 10;
  std::shared_ptr<BaseIndexQueryParam> l1QueryParam = nullptr;
  std::shared_ptr<BaseIndexQueryParam> l2QueryParam = nullptr;

  using Pointer = std::shared_ptr<IVFQueryParam>;

  BaseIndexQueryParam::Pointer Clone() const override {
    auto cloned_this = std::make_shared<IVFQueryParam>(*this);
    cloned_this->l1QueryParam = l1QueryParam ? l1QueryParam->Clone() : nullptr;
    cloned_this->l2QueryParam = l2QueryParam ? l2QueryParam->Clone() : nullptr;
    return cloned_this;
  }
};

// --- Construction Parameters ---
// template<typename IndexQueryParamType>
class BaseIndexParam : public SerializableBase {
 public:
  using Pointer = std::shared_ptr<BaseIndexParam>;

  explicit BaseIndexParam(IndexType type = IndexType::kNone,
                          MetricType metric = MetricType::kL2sq, int dim = 0,
                          int ver = 0)
      : index_type(type), metric_type(metric), dimension(dim), version(ver) {}

  virtual ~BaseIndexParam() = default;

  IndexType index_type = IndexType::kNone;
  MetricType metric_type = MetricType::kL2sq;
  int dimension = 0;  // [1, MAX_DIMENSION]
  int version = 0;    // for compatibility
  bool is_sparse = false;
  bool is_huge_page = false;
  DataType data_type = DataType::DT_UNDEFINED;
  bool use_id_map = true;

  // IndexMeta meta;
  ailego::Params params;

  // pipeline
  PreprocessorParam preprocess_param;
  QuantizerParam quantizer_param;

  BaseIndexQueryParam::Pointer default_query_param = nullptr;
  // virtual std::shared_ptr<BaseIndexQueryParam> GetDefaultQueryParam() const
  // {
  //   return std::make_shared<BaseIndexQueryParam>();
  // }
  //

 protected:
  virtual bool DeserializeFromJsonObject(
      const ailego::JsonObject &json_obj) override;
  virtual ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const override;
};

struct FlatIndexParam : public BaseIndexParam {
  using Pointer = std::shared_ptr<FlatIndexParam>;
  FlatIndexParam() : BaseIndexParam(IndexType::kFlat) {}

  IndexMeta::MajorOrder major_order = IndexMeta::MajorOrder::MO_ROW;

 protected:
  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;
  ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const override;
};

struct IVFIndexParam : public BaseIndexParam {
  using Pointer = std::shared_ptr<IVFIndexParam>;
  int nlist = 1024;
  int niters = 10;
  std::shared_ptr<BaseIndexParam> l1Index = nullptr;
  std::shared_ptr<BaseIndexParam> l2Index = nullptr;
  bool use_soar = false;

  // Constructors with delegation
  IVFIndexParam() : BaseIndexParam(IndexType::kIVF) {}

  IVFIndexParam(int nlist, int niters, std::shared_ptr<BaseIndexParam> l1Index,
                std::shared_ptr<BaseIndexParam> l2Index)
      : BaseIndexParam(IndexType::kIVF),
        nlist(nlist),
        niters(niters),
        l1Index(std::move(l1Index)),
        l2Index(std::move(l2Index)) {}

  IVFIndexParam(MetricType metric, int dim, int nlist, int niters,
                std::shared_ptr<BaseIndexParam> l1Index,
                std::shared_ptr<BaseIndexParam> l2Index)
      : BaseIndexParam(IndexType::kIVF, metric, dim),
        nlist(nlist),
        niters(niters),
        l1Index(std::move(l1Index)),
        l2Index(std::move(l2Index)) {}

  // query param:
  // topk of l1Index's param ==== IVFIndexQueryParam.nprobe
  // topk of l2Index's param ==== IVFIndexQueryParam.topK

  // IVFIndexParam.metric_type === l2Index's metric_type
  // IVFIndexParam.quantization === l2Index's quantization
};

struct HNSWIndexParam : public BaseIndexParam {
  using Pointer = std::shared_ptr<HNSWIndexParam>;
  int m = kDefaultHnswNeighborCnt;
  int ef_construction = kDefaultHnswEfConstruction;

  // Constructors with delegation
  HNSWIndexParam() : BaseIndexParam(IndexType::kHNSW) {}

  HNSWIndexParam(int m, int ef_construction)
      : BaseIndexParam(IndexType::kHNSW),
        m(m),
        ef_construction(ef_construction) {}

  HNSWIndexParam(MetricType metric, int dim, int m, int ef_construction)
      : BaseIndexParam(IndexType::kHNSW, metric, dim),
        m(m),
        ef_construction(ef_construction) {}

 protected:
  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;
  ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const override;
};

struct HNSWRabitqIndexParam : public BaseIndexParam {
  using Pointer = std::shared_ptr<HNSWRabitqIndexParam>;

  // HNSW parameters
  int m = kDefaultHnswNeighborCnt;
  int ef_construction = kDefaultHnswEfConstruction;

  // Rabitq parameters
  int total_bits = kDefaultRabitqTotalBits;
  int num_clusters = kDefaultRabitqNumClusters;
  int sample_count = 0;
  core::IndexProvider::Pointer provider = nullptr;
  core::IndexReformer::Pointer reformer = nullptr;

  // Constructors with delegation
  HNSWRabitqIndexParam() : BaseIndexParam(IndexType::kHNSWRabitq) {}

  HNSWRabitqIndexParam(int m, int ef_construction)
      : BaseIndexParam(IndexType::kHNSWRabitq),
        m(m),
        ef_construction(ef_construction) {}

  HNSWRabitqIndexParam(MetricType metric, int dim, int m, int ef_construction)
      : BaseIndexParam(IndexType::kHNSWRabitq, metric, dim),
        m(m),
        ef_construction(ef_construction) {}

 protected:
  bool DeserializeFromJsonObject(const ailego::JsonObject &json_obj) override;
  ailego::JsonObject SerializeToJsonObject(
      bool omit_empty_value = false) const override;
};

}  // namespace zvec::core_interface

================================================
FILE: src/include/zvec/core/interface/index_param_builders.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <zvec/core/interface/index_param.h>
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"
#include "zvec/core/interface/index.h"

namespace zvec::core_interface {

// struct ConditionalIndexParam {
//     // predicate / rule / threshold
//     // candidate
// };


// chaining calls builder
template <typename ActualIndexParamBuilderType, typename ActualIndexParamType>
class BaseIndexParamBuilder {  //  : public
                               //  std::enable_shared_from_this<Resource>
 public:
  BaseIndexParamBuilder() : param(std::make_shared<ActualIndexParamType>()) {};
  virtual ~BaseIndexParamBuilder() = default;

  ActualIndexParamBuilderType &WithVersion(int version) {
    param.version = version;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithIndexType(IndexType index_type) {
    param->index_type = index_type;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithMetricType(MetricType metric_type) {
    param->metric_type = metric_type;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithDimension(int dimension) {
    param->dimension = dimension;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithPreprocessParam(
      const PreprocessorParam &preprocess_param) {
    param->preprocess_param =
        std::make_shared<PreprocessorParam>(preprocess_param);
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithQuantizerParam(
      const QuantizerParam &quantizer_param) {
    param->quantizer_param = quantizer_param;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  // ActualIndexParamBuilderType &WithRefinerParam(
  //     const RefinerParam &refiner_param) {
  //   param->refiner_param = refiner_param;
  //   return static_cast<ActualIndexParamBuilderType &>(*this);
  // }
  // ActualIndexParamBuilderType &WithDefaultQueryParam(
  //     const BaseIndexQueryParam &default_query_param) {
  //   param->default_query_param = default_query_param;
  //   return static_cast<ActualIndexParamBuilderType &>(*this);
  // }

  ActualIndexParamBuilderType &WithIsSparse(bool is_sparse) {
    param->is_sparse = is_sparse;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }
  ActualIndexParamBuilderType &WithDataType(DataType data_type) {
    param->data_type = data_type;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }

  ActualIndexParamBuilderType &WithUseIDMap(bool use_id_map) {
    param->use_id_map = use_id_map;
    return static_cast<ActualIndexParamBuilderType &>(*this);
  }

  virtual std::shared_ptr<ActualIndexParamType> Build() = 0;

 protected:
  std::shared_ptr<ActualIndexParamType> param;
};

class FlatIndexParamBuilder
    : public BaseIndexParamBuilder<FlatIndexParamBuilder, FlatIndexParam> {
 public:
  FlatIndexParamBuilder() = default;
  std::shared_ptr<FlatIndexParam> Build() override {
    return param;
  }
};

class IVFIndexParamBuilder
    : public BaseIndexParamBuilder<IVFIndexParamBuilder, IVFIndexParam> {
 public:
  IVFIndexParamBuilder() = default;
  IVFIndexParamBuilder &WithNList(int nlist) {
    param->nlist = nlist;
    return *this;
  }
  IVFIndexParamBuilder &WithNiters(int niters) {
    param->niters = niters;
    return *this;
  }
  IVFIndexParamBuilder &WithL1Index(const BaseIndexParam &l1Index) {
    param->l1Index = std::make_shared<BaseIndexParam>(l1Index);
    return *this;
  }
  IVFIndexParamBuilder &WithL2Index(const BaseIndexParam &l2Index) {
    param->l2Index = std::make_shared<BaseIndexParam>(l2Index);
    return *this;
  }
  IVFIndexParamBuilder &WithUseSoar(bool use_soar) {
    param->use_soar = use_soar;
    return *this;
  }

  std::shared_ptr<IVFIndexParam> Build() override {
    return param;
  }
};

class HNSWIndexParamBuilder
    : public BaseIndexParamBuilder<HNSWIndexParamBuilder, HNSWIndexParam> {
 public:
  HNSWIndexParamBuilder() = default;
  HNSWIndexParamBuilder &WithM(int m) {
    param->m = m;
    return *this;
  }
  HNSWIndexParamBuilder &WithEFConstruction(int ef_construction) {
    param->ef_construction = ef_construction;
    return *this;
  }

  std::shared_ptr<HNSWIndexParam> Build() override {
    return param;
  }
};

class HNSWRabitqIndexParamBuilder
    : public BaseIndexParamBuilder<HNSWRabitqIndexParamBuilder,
                                   HNSWRabitqIndexParam> {
 public:
  HNSWRabitqIndexParamBuilder() = default;
  HNSWRabitqIndexParamBuilder &WithM(int m) {
    param->m = m;
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithEFConstruction(int ef_construction) {
    param->ef_construction = ef_construction;
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithTotalBits(int total_bits) {
    param->total_bits = total_bits;
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithNumClusters(int num_clusters) {
    param->num_clusters = num_clusters;
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithSampleCount(int sample_count) {
    param->sample_count = sample_count;
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithReformer(
      core::IndexReformer::Pointer reformer) {
    param->reformer = std::move(reformer);
    return *this;
  }
  HNSWRabitqIndexParamBuilder &WithProvider(
      core::IndexProvider::Pointer provider) {
    param->provider = std::move(provider);
    return *this;
  }
  std::shared_ptr<HNSWRabitqIndexParam> Build() override {
    return param;
  }
};

//     class CompositeIndexParamBuilder : public
//     BaseIndexParamBuilder<CompositeIndexParamBuilder, CompositeIndexParam>
//     { public:
//         CompositeIndexParamBuilder() = default;
//         CompositeIndexParamBuilder &WithLayers(const
//         std::vector<std::shared_ptr<BaseIndexParam>> &layers) {
//             param.layers = layers;
//             return *this;
//         }
//         // with layer
//         CompositeIndexParamBuilder &WithLayer(const BaseIndexParam &layer)
//         {
//             param.layers.push_back(std::make_shared<BaseIndexParam>(layer));
//             return *this;
//         }

//         CompositeIndexParamBuilder &WithLayer(const BaseIndexParam &layer,
//                                               const BaseIndexQueryParam
//                                               &default_query_param) {
//             param.layers.push_back(std::make_shared<BaseIndexParam>(layer));
//             param.layers.back()->default_query_param =
//             std::make_shared<BaseIndexQueryParam>(default_query_param);
//             return *this;
//         }
//         std::shared_ptr<CompositeIndexParam> Build() { return
//         std::make_shared<CompositeIndexParam>(param); }

//     private:
//         CompositeIndexParam param;
//     };


#include <memory>
#include <vector>

template <typename T, typename Derived>
class BaseIndexQueryParamBuilder {
 public:
  // This allows derived builders to access the protected member
  T m_param;

  // Fluent setters for BaseIndexQueryParam fields
  Derived &with_topk(int topk) {
    m_param.topk = topk;
    return static_cast<Derived &>(*this);
  }

  Derived &with_fetch_vector(bool fetch_vector) {
    m_param.fetch_vector = fetch_vector;
    return static_cast<Derived &>(*this);
  }

  Derived &with_filter(std::shared_ptr<IndexFilter> filter) {
    m_param.filter = std::move(filter);
    return static_cast<Derived &>(*this);
  }

  // Using a vector of uint64_t for the next one
  Derived &with_bf_pks(std::shared_ptr<std::vector<uint64_t>> bf_pks) {
    m_param.bf_pks = std::move(bf_pks);
    return static_cast<Derived &>(*this);
  }

  Derived &with_radius(float radius) {
    m_param.radius = radius;
    return static_cast<Derived &>(*this);
  }

  Derived &with_is_linear(bool is_linear) {
    m_param.is_linear = is_linear;
    return static_cast<Derived &>(*this);
  }

  Derived &with_refiner_param(RefinerParam::Pointer refiner_param) {
    m_param.refiner_param = std::move(refiner_param);
    return static_cast<Derived &>(*this);
  }
};

// FLAT builder (no extra fields, just inherits base functionality)
class FlatQueryParamBuilder
    : public BaseIndexQueryParamBuilder<FlatQueryParam, FlatQueryParamBuilder> {
 public:
  FlatQueryParam::Pointer build() {
    return std::make_shared<FlatQueryParam>(std::move(m_param));
  }
};

// Example Usage:
// FlatQueryParam::Pointer flat_config = FlatQueryParamBuilder()
//     .with_topk(20)
//     .with_fetch_vector(true)
//     .build();

// HNSW builder (adds one specific field: ef_search)
class HNSWQueryParamBuilder
    : public BaseIndexQueryParamBuilder<HNSWQueryParam, HNSWQueryParamBuilder> {
 public:
  HNSWQueryParamBuilder &with_ef_search(int ef_search) {
    m_param.ef_search = ef_search;
    return *this;
  }

  HNSWQueryParam::Pointer build() {
    return std::make_shared<HNSWQueryParam>(std::move(m_param));
  }
};

// Example Usage:
// HNSWQueryParam::Pointer hnsw_config = HNSWQueryParamBuilder()
//     .with_topk(5)
//     .with_ef_search(128) // HNSW specific
//     .with_is_linear(false)
//     .build();

// IVF builder (adds specific fields: nprobe, l1QueryParam, l2QueryParam)
class IVFQueryParamBuilder
    : public BaseIndexQueryParamBuilder<IVFQueryParam, IVFQueryParamBuilder> {
 public:
  IVFQueryParamBuilder &with_nprobe(int nprobe) {
    m_param.nprobe = nprobe;
    return *this;
  }

  // Since l1QueryParam and l2QueryParam are shared_ptr to BaseIndexQueryParam,
  // they can accept ANY derived configuration object.
  IVFQueryParamBuilder &with_l1_query_param(
      BaseIndexQueryParam::Pointer l1QueryParam) {
    m_param.l1QueryParam = std::move(l1QueryParam);
    return *this;
  }

  IVFQueryParamBuilder &with_l2_query_param(
      BaseIndexQueryParam::Pointer l2QueryParam) {
    m_param.l2QueryParam = std::move(l2QueryParam);
    return *this;
  }

  IVFQueryParam::Pointer build() {
    return std::make_shared<IVFQueryParam>(std::move(m_param));
  }
};

// HNSW-Rabitq builder (adds ef_search field)
class HNSWRabitqQueryParamBuilder
    : public BaseIndexQueryParamBuilder<HNSWRabitqQueryParam,
                                        HNSWRabitqQueryParamBuilder> {
 public:
  HNSWRabitqQueryParamBuilder &with_ef_search(int ef_search) {
    m_param.ef_search = ef_search;
    return *this;
  }

  HNSWRabitqQueryParam::Pointer build() {
    return std::make_shared<HNSWRabitqQueryParam>(std::move(m_param));
  }
};

// Example Usage:
// // First, build the required nested params
// auto nested_hnsw = HNSWQueryParamBuilder().with_ef_search(64).build();
//
// // Then, build the IVF param
// IVFQueryParam::Pointer ivf_config = IVFQueryParamBuilder()
//     .with_topk(10)
//     .with_nprobe(50) // IVF specific
//     .with_l1_query_param(nested_hnsw) // Set a nested config object
//     .build();


namespace predefined {
// some predefined index param builders, e.g., SCANN
class SCANNIndexParamBuilder {
 public:
  // alias SCANNIIndexParam = xxxxx
  std::shared_ptr<IVFIndexParam> Build() {
    // SCANN
    auto param_ptr =
        IVFIndexParamBuilder()
            .WithNList(40000)  //  10000000 -> 40000
            .WithUseSoar(
                true)  //  由于1个数据点可能对应2个partition，因此140个点中可能有重复，需要去重（保留一个取均值）
            .WithQuantizerParam(QuantizerParam(QuantizerType::kQuickADC))
            // .WithDefaultQueryParam(
            //     IVFQueryParamBuilder().with_topk(140).with_nprobe(68).build())
            // .WithRefinerParam(RefinerParam{
            //     10,  // 140 -> 10
            //     nullptr,
            //     std::make_shared<QuantizerParam>(
            //         QuantizerParam{QuantizerType::kFP16}),
            // })
            .WithL1Index(*(
                IVFIndexParamBuilder()
                    .WithMetricType(
                        MetricType::kInnerProduct)  // Layer2  flat index
                    .WithNList(700)                 //  40000 -> 700
                    .WithQuantizerParam(
                        QuantizerParam{QuantizerType::kQuickADC})
                    // .WithDefaultQueryParam(IVFQueryParamBuilder()
                    //                            .with_topk(68)
                    //                            .with_nprobe(20)
                    //                            .build())
                    .WithL1Index(*(
                        FlatIndexParamBuilder()
                            .WithMetricType(MetricType::kL2sq)
                            // implicit :
                            // .WithDefaultQueryParam(FlatQueryParamBuilder().with_topk(20).build())
                            .Build()))
                    .Build()))
            .Build();

    return param_ptr;
  }
};

}  // namespace predefined
}  // namespace zvec::core_interface

================================================
FILE: src/include/zvec/db/collection.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <string>
#include <vector>
#include <zvec/db/doc.h>
#include <zvec/db/options.h>
#include <zvec/db/stats.h>
#include <zvec/db/status.h>

namespace zvec {

class Collection {
 public:
  using Ptr = std::shared_ptr<Collection>;

  /**
   * @brief Create and open a collection.
   *
   * @param path The path to the collection.
   * @param schema The schema of the collection.
   * @param option The options of the collection.
   * @return The collection OR an error.
   */
  static Result<Ptr> CreateAndOpen(const std::string &path,
                                   const CollectionSchema &schema,
                                   const CollectionOptions &option);

  /**
   * @brief Open an existing collection.
   *
   * @param path The path to the collection.
   * @param option The options of the collection.
   * @return The collection OR an error.
   */
  static Result<Ptr> Open(const std::string &path,
                          const CollectionOptions &option);

  virtual ~Collection();

 public:
  virtual Status Destroy() = 0;

  virtual Status Flush() = 0;

  virtual Result<std::string> Path() const = 0;

  virtual Result<CollectionStats> Stats() const = 0;

  virtual Result<CollectionSchema> Schema() const = 0;

  virtual Result<CollectionOptions> Options() const = 0;

 public:
  virtual Status CreateIndex(
      const std::string &column_name, const IndexParams::Ptr &index_params,
      const CreateIndexOptions &options = CreateIndexOptions{0}) = 0;

  virtual Status DropIndex(const std::string &column_name) = 0;

  virtual Status Optimize(const OptimizeOptions &options = OptimizeOptions{
                              0}) = 0;

  virtual Status AddColumn(const FieldSchema::Ptr &column_schema,
                           const std::string &expression,
                           const AddColumnOptions &options = AddColumnOptions{
                               0}) = 0;

  virtual Status DropColumn(const std::string &column_name) = 0;

  virtual Status AlterColumn(
      const std::string &column_name, const std::string &rename,
      const FieldSchema::Ptr &new_column_schema = nullptr,
      const AlterColumnOptions &options = AlterColumnOptions{0}) = 0;

  virtual Result<WriteResults> Insert(std::vector<Doc> &docs) = 0;

  virtual Result<WriteResults> Upsert(std::vector<Doc> &docs) = 0;

  virtual Result<WriteResults> Update(std::vector<Doc> &docs) = 0;

  virtual Result<WriteResults> Delete(const std::vector<std::string> &pks) = 0;

  virtual Status DeleteByFilter(const std::string &filter) = 0;

  virtual Result<DocPtrList> Query(const VectorQuery &query) const = 0;

  virtual Result<GroupResults> GroupByQuery(
      const GroupByVectorQuery &query) const = 0;

  virtual Result<DocPtrMap> Fetch(
      const std::vector<std::string> &pks) const = 0;
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/config.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <atomic>
#include <cstdint>
#include <memory>
#include <zvec/ailego/pattern/singleton.h>
#include <zvec/db/status.h>

namespace zvec {

const uint32_t MIN_LOG_FILE_SIZE = 128;
const uint32_t DEFAULT_LOG_FILE_SIZE = 2048;
const uint32_t DEFAULT_LOG_OVERDUE_DAYS = 7;
const std::string CONSOLE_LOG_TYPE_NAME = "ConsoleLogger";
const std::string FILE_LOG_TYPE_NAME = "AppendLogger";
const std::string DEFAULT_LOG_DIR = "./logs";
const std::string DEFAULT_LOG_BASENAME = "zvec.log";

class GlobalConfig : public ailego::Singleton<GlobalConfig> {
  friend class ailego::Singleton<GlobalConfig>;

 public:
  enum class LogLevel : uint8_t {
    DEBUG = 0,
    INFO,
    WARN,
    ERROR,
    FATAL,
  };

  struct LogConfig {
    LogLevel level;

    LogConfig(LogLevel level) : level(level) {}
    virtual ~LogConfig() = default;
    virtual std::string GetLoggerType() const = 0;
  };

  // Console log configuration
  struct ConsoleLogConfig : LogConfig {
    ConsoleLogConfig(LogLevel level = LogLevel::WARN) : LogConfig{level} {}

    std::string GetLoggerType() const override {
      return CONSOLE_LOG_TYPE_NAME;
    }
  };

  // File log configuration
  struct FileLogConfig : LogConfig {
    std::string dir;
    std::string basename;
    uint32_t file_size;  // MB
    uint32_t overdue_days;

    FileLogConfig(LogLevel level = LogLevel::WARN,
                  std::string dir = DEFAULT_LOG_DIR,
                  std::string basename = DEFAULT_LOG_BASENAME,
                  uint32_t file_size = DEFAULT_LOG_FILE_SIZE,
                  uint32_t overdue_days = DEFAULT_LOG_OVERDUE_DAYS)
        : LogConfig{level},
          dir{dir},
          basename{basename},
          file_size{file_size},
          overdue_days(overdue_days) {}

    std::string GetLoggerType() const override {
      return FILE_LOG_TYPE_NAME;
    }
  };

  // Configuration data structure
  struct ConfigData {
    uint64_t memory_limit_bytes;

    // log
    std::shared_ptr<LogConfig> log_config;

    // query
    uint32_t query_thread_count;
    float invert_to_forward_scan_ratio;
    float brute_force_by_keys_ratio;

    // optimize
    uint32_t optimize_thread_count;

    ConfigData();
  };

  // Initialize the configuration (can only be called once)
  Status Initialize(const ConfigData &config);

  Status Validate(const ConfigData &config) const;

  // Read-only accessors
  uint64_t memory_limit_bytes() const noexcept;

  const LogConfig &log_config() const noexcept {
    return *config_.log_config;
  }

  std::string log_type() const noexcept {
    return config_.log_config->GetLoggerType();
  }

  LogLevel log_level() const noexcept {
    return config_.log_config->level;
  }

  // File log specific accessors (only valid when using FileLogConfig)
  const std::string &log_dir() const noexcept {
    const FileLogConfig *file_config =
        dynamic_cast<const FileLogConfig *>(config_.log_config.get());
    static const std::string empty_string = "";
    return file_config ? file_config->dir : empty_string;
  }

  const std::string &log_file_basename() const noexcept {
    const FileLogConfig *file_config =
        dynamic_cast<const FileLogConfig *>(config_.log_config.get());
    static const std::string empty_string = "";
    return file_config ? file_config->basename : empty_string;
  }

  uint32_t log_file_size() const noexcept {
    const FileLogConfig *file_config =
        dynamic_cast<const FileLogConfig *>(config_.log_config.get());
    return file_config ? file_config->file_size : 0;
  }

  uint32_t log_overdue_days() const noexcept {
    const FileLogConfig *file_config =
        dynamic_cast<const FileLogConfig *>(config_.log_config.get());
    return file_config ? file_config->overdue_days : 0;
  }

  //! Query thread count
  uint32_t query_thread_count() const noexcept {
    return config_.query_thread_count;
  }

  //! Invert to forward scan ratio
  float invert_to_forward_scan_ratio() const noexcept {
    return config_.invert_to_forward_scan_ratio;
  }

  //! Brute force by keys ratio
  float brute_force_by_keys_ratio() const noexcept {
    return config_.brute_force_by_keys_ratio;
  }

  //! Optimize thread count
  uint32_t optimize_thread_count() const noexcept {
    return config_.optimize_thread_count;
  }

 private:
  // Configuration data
  ConfigData config_;

  // Atomic flag to ensure initialization happens only once
  std::atomic<bool> initialized_{false};
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/doc.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <optional>
#include <sstream>
#include <string>
#include <unordered_map>
#include <variant>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/db/query_params.h>
#include <zvec/db/schema.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>

namespace zvec {

using float16_t = ailego::Float16;

class Doc {
 public:
  using Value = std::variant<
      std::monostate,  // 0 - represents null value
      bool, int32_t, uint32_t, int64_t, uint64_t, float, double,  // 1~7
      std::string,                                                // 8
      std::vector<bool>,                                          // 9
      std::vector<int8_t>,                                        // 10
      std::vector<int16_t>,                                       // 11
      std::vector<int32_t>,                                       // 12
      std::vector<int64_t>,                                       // 13
      std::vector<uint32_t>,                                      // 14
      std::vector<uint64_t>,                                      // 15
      std::vector<float16_t>,                                     // 16
      std::vector<float>,                                         // 17
      std::vector<double>,                                        // 18
      std::vector<std::string>,                                   // 19
      std::pair<std::vector<uint32_t>, std::vector<float>>,       // 20
      std::pair<std::vector<uint32_t>, std::vector<float16_t>>>;  // 21

  using Ptr = std::shared_ptr<Doc>;

  Doc() = default;
  ~Doc() = default;

  Doc(const Doc &) = default;
  Doc &operator=(const Doc &) = default;
  Doc(Doc &&) = default;
  Doc &operator=(Doc &&) = default;

 public:
  void set_pk(std::string pk) {
    pk_ = std::move(pk);
  }

  std::string pk() const {
    return pk_;
  }

  void set_score(float score) {
    score_ = score;
  }

  float score() const {
    return score_;
  }

  void set_doc_id(uint64_t doc_id) {
    doc_id_ = doc_id;
  }

  uint64_t doc_id() const {
    return doc_id_;
  }

  std::vector<std::string> field_names() const {
    std::vector<std::string> names;
    names.reserve(fields_.size());

    for (const auto &[name, _] : fields_) {
      names.emplace_back(name);
    }

    return names;
  }

  void set_operator(const Operator op) {
    op_ = op;
  }

  Operator get_operator() {
    return op_;
  }

  // Set field value
  template <typename T>
  bool set(const std::string &field_name, T value) {
    // TODO: support char*
    static_assert(is_valid_type_v<T>, "Unsupported type");
    fields_[field_name] = std::move(value);
    return true;
  }

  // Set field to null
  void set_null(const std::string &field_name) {
    fields_[field_name] = std::monostate{};
  }

  // Check if field exists
  bool has(const std::string &field_name) const {
    return fields_.find(field_name) != fields_.end();
  }

  // Check if field exists and is not null
  bool has_value(const std::string &field_name) const {
    auto it = fields_.find(field_name);
    if (it == fields_.end()) {
      return false;
    }
    return !std::holds_alternative<std::monostate>(it->second);
  }

  // Check if field is null
  bool is_null(const std::string &field_name) const {
    auto it = fields_.find(field_name);
    if (it == fields_.end()) {
      return false;  // Field does not exist is not equal to null
    }
    return std::holds_alternative<std::monostate>(it->second);
  }

  // Check if fields is empty
  bool is_empty() const {
    return fields_.empty();
  }

  // Field get status enumeration
  enum class FieldGetStatus {
    SUCCESS,       // Successfully got value
    NOT_FOUND,     // Field does not exist
    IS_NULL,       // Field exists but is null
    TYPE_MISMATCH  // Field exists but type mismatch
  };

  // Field get result template class
  template <typename T>
  class FieldGetResult {
   public:
    // Constructor - success case
    explicit FieldGetResult(const T &value)
        : status_(FieldGetStatus::SUCCESS), value_(value) {}

    // Constructor - error case
    explicit FieldGetResult(FieldGetStatus status) : status_(status) {
      if (status == FieldGetStatus::SUCCESS) {
        throw std::invalid_argument("Use value constructor for SUCCESS status");
      }
    }

    // Get status
    FieldGetStatus status() const {
      return status_;
    }

    // Get value (only available when successful)
    const T &value() const {
      if (status_ != FieldGetStatus::SUCCESS) {
        throw std::runtime_error("No value available");
      }
      return value_;
    }

    // Check if successful
    bool ok() const {
      return status_ == FieldGetStatus::SUCCESS;
    }

    // Convert to optional
    operator std::optional<T>() const {
      if (status_ == FieldGetStatus::SUCCESS) {
        return value_;
      }
      return std::nullopt;
    }

   private:
    FieldGetStatus status_;
    T value_;
  };


  // Get field value, distinguish between not found, null and type mismatch
  // cases
  template <typename T>
  typename Doc::template FieldGetResult<T> get_field(
      const std::string &field_name) const {
    static_assert(is_valid_type_v<T>, "Unsupported type");

    auto it = fields_.find(field_name);
    if (it == fields_.end()) {
      return FieldGetResult<T>(FieldGetStatus::NOT_FOUND);
    }

    if (std::holds_alternative<std::monostate>(it->second)) {
      return FieldGetResult<T>(FieldGetStatus::IS_NULL);
    }

    try {
      return FieldGetResult<T>(std::get<T>(it->second));
    } catch (const std::bad_variant_access &) {
      return FieldGetResult<T>(FieldGetStatus::TYPE_MISMATCH);
    }
  }

  template <typename T>
  std::optional<T> get(const std::string &field_name) const {
    auto result = get_field<T>(field_name);
    if (result.status() == FieldGetStatus::SUCCESS) {
      return result.value();
    }
    return std::nullopt;
  }

  void remove(const std::string &field_name) {
    fields_.erase(field_name);
  }

  Status validate(const CollectionSchema::Ptr &schema,
                  bool is_update = false) const;

  size_t memory_usage() const;

  void clear() {
    pk_.clear();
    score_ = 0.0f;
    doc_id_ = 0;
    fields_.clear();
  }

  const std::string to_string() const {
    std::stringstream ss;
    ss << "[op:" << (uint32_t)op_ << ", doc_id: " << doc_id_
       << ", score: " << score_ << ", pk: " << pk_
       << ", fields: " << fields_.size() << "]";
    return ss.str();
  }

  std::string to_detail_string() const;

  bool operator==(const Doc &other) const;

  bool operator!=(const Doc &other) const {
    return !(*this == other);
  }

 public:
  std::vector<uint8_t> serialize() const;

  static Doc::Ptr deserialize(const uint8_t *data, size_t size);
  static Doc::Ptr deserialize(const std::vector<uint8_t> &data) {
    return deserialize(data.data(), data.size());
  }

 public:
  void merge(const Doc &other) {
    pk_ = other.pk_;
    score_ = other.score_;
    doc_id_ = other.doc_id_;
    op_ = other.op_;
    for (const auto &[field_name, value] : other.fields_) {
      fields_[field_name] = value;
    }
  }

 private:
  static void serialize_value(std::vector<uint8_t> &buffer, const Value &value);

  static Value deserialize_value(const uint8_t *&data, uint8_t type);
  static Value deserialize_value(const uint8_t *&data);

  static void write_to_buffer(std::vector<uint8_t> &buffer, const void *src,
                              size_t size);

  static void read_from_buffer(const uint8_t *&data, void *dest, size_t size);

  struct ValueEqual;

 private:
  std::string pk_;
  float score_{0.0f};
  uint64_t doc_id_;
  Operator op_;

  template <typename T>
  static constexpr bool is_valid_type_v =
      std::is_same_v<T, std::monostate> ||            // 0 - Added null support
      std::is_same_v<T, bool> ||                      // 1
      std::is_same_v<T, int32_t> ||                   // 2
      std::is_same_v<T, uint32_t> ||                  // 3
      std::is_same_v<T, int64_t> ||                   // 4
      std::is_same_v<T, uint64_t> ||                  // 5
      std::is_same_v<T, float> ||                     // 6
      std::is_same_v<T, double> ||                    // 7
      std::is_same_v<T, std::string> ||               // 8
      std::is_same_v<T, std::vector<bool>> ||         // 9
      std::is_same_v<T, std::vector<int8_t>> ||       // 10
      std::is_same_v<T, std::vector<int16_t>> ||      // 11
      std::is_same_v<T, std::vector<int32_t>> ||      // 12
      std::is_same_v<T, std::vector<uint32_t>> ||     // 13
      std::is_same_v<T, std::vector<int64_t>> ||      // 14
      std::is_same_v<T, std::vector<uint64_t>> ||     // 15
      std::is_same_v<T, std::vector<float16_t>> ||    // 16
      std::is_same_v<T, std::vector<float>> ||        // 17
      std::is_same_v<T, std::vector<double>> ||       // 18
      std::is_same_v<T, std::vector<std::string>> ||  // 19
      std::is_same_v<
          T, std::pair<std::vector<uint32_t>, std::vector<float16_t>>> ||  // 20
      std::is_same_v<
          T, std::pair<std::vector<uint32_t>, std::vector<float>>>;  // 21

  std::unordered_map<std::string, Value> fields_;
};

std::string get_value_type_name(const Doc::Value &value, bool is_vector);

using DocPtrList = std::vector<Doc::Ptr>;

using DocPtrMap = std::unordered_map<std::string, Doc::Ptr>;

using WriteResults = std::vector<Status>;

struct VectorQuery {
  int topk_;
  std::string field_name_;
  std::string query_vector_;  // fp16, void *
  std::string query_sparse_indices_;
  std::string query_sparse_values_;
  std::string filter_;
  bool include_vector_{false};
  bool include_doc_id_{false};
  // select * by default, select no field if output_fields_ is empty, select
  // specific fields if output_fields_ is not empty
  std::optional<std::vector<std::string>> output_fields_;
  QueryParams::Ptr query_params_;

  Status validate(const FieldSchema *schema) const;
};

struct GroupByVectorQuery {
  std::string field_name_;
  std::string query_vector_;
  std::string query_sparse_indices_;
  std::string query_sparse_values_;
  std::string filter_;
  bool include_vector_;
  // select * by default, select no field if output_fields_ is empty, select
  // specific fields if output_fields_ is not empty
  std::optional<std::vector<std::string>> output_fields_;
  std::string group_by_field_name_;
  uint32_t group_count_ = 2;
  uint32_t group_topk_ = 3;
  QueryParams::Ptr query_params_;
};

struct GroupResult {
  std::string group_by_value_;
  std::vector<Doc> docs_;
};

using GroupResults = std::vector<GroupResult>;

}  // namespace zvec


================================================
FILE: src/include/zvec/db/index_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <sstream>
#include <string>
#include <zvec/core/interface/constants.h>
#include <zvec/db/type.h>
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"

namespace zvec {

/*
 * Column index params
 */
class IndexParams {
 public:
  using Ptr = std::shared_ptr<IndexParams>;

  IndexParams(IndexType type) : type_(type) {}

  virtual ~IndexParams() = default;

  virtual Ptr clone() const = 0;

  virtual bool operator==(const IndexParams &other) const = 0;

  virtual std::string to_string() const = 0;

  virtual bool operator!=(const IndexParams &other) const {
    return !(*this == other);
  }

  bool is_vector_index_type() const {
    return type_ == IndexType::FLAT || type_ == IndexType::HNSW ||
           type_ == IndexType::HNSW_RABITQ || type_ == IndexType::IVF;
  }

  IndexType type() const {
    return type_;
  }

 protected:
  IndexType type_;
};

/*
 * Scalar: Invert index params
 */
class InvertIndexParams : public IndexParams {
 public:
  InvertIndexParams(bool enable_range_optimization = true,
                    bool enable_extended_wildcard = false)
      : IndexParams(IndexType::INVERT),
        enable_range_optimization_(enable_range_optimization),
        enable_extended_wildcard_(enable_extended_wildcard) {}

  using OPtr = std::shared_ptr<InvertIndexParams>;

  Ptr clone() const override {
    return std::make_shared<InvertIndexParams>(enable_range_optimization_,
                                               enable_extended_wildcard_);
  }

  std::string to_string() const override;

  bool operator==(const IndexParams &other) const override {
    if (type() != other.type()) {
      return false;
    }
    auto &other_invert = dynamic_cast<const InvertIndexParams &>(other);
    return enable_range_optimization_ ==
               other_invert.enable_range_optimization_ &&
           enable_extended_wildcard_ == other_invert.enable_extended_wildcard_;
  }

  bool enable_range_optimization() const {
    return enable_range_optimization_;
  }

  void set_enable_range_optimization(bool enable_range_optimization) {
    enable_range_optimization_ = enable_range_optimization;
  }

  bool enable_extended_wildcard() const {
    return enable_extended_wildcard_;
  }

  // Enables suffix and infix search.
  // Note that prefix search is always enabled regardless of this setting.
  void set_enable_extended_wildcard(bool enable_extended_wildcard) {
    enable_extended_wildcard_ = enable_extended_wildcard;
  }

 private:
  bool enable_range_optimization_{false};
  bool enable_extended_wildcard_{false};
};

/*
 * Column index params
 */
class VectorIndexParams : public IndexParams {
 public:
  VectorIndexParams(IndexType type, MetricType metric_type,
                    QuantizeType quantize_type = QuantizeType::UNDEFINED)
      : IndexParams(type),
        metric_type_(metric_type),
        quantize_type_(quantize_type) {}

  virtual ~VectorIndexParams() = default;

  std::string vector_index_params_to_string(const std::string &class_name,
                                            MetricType metric_type,
                                            QuantizeType quantize_type) const;

  MetricType metric_type() const {
    return metric_type_;
  }

  void set_metric_type(MetricType metric_type) {
    metric_type_ = metric_type;
  }

  QuantizeType quantize_type() const {
    return quantize_type_;
  }

  void set_quantize_type(QuantizeType quantize_type) {
    quantize_type_ = quantize_type;
  }

 protected:
  MetricType metric_type_;
  QuantizeType quantize_type_;
};

/*
 * Vector: Hnsw index params
 */
class HnswIndexParams : public VectorIndexParams {
 public:
  HnswIndexParams(
      MetricType metric_type, int m = core_interface::kDefaultHnswNeighborCnt,
      int ef_construction = core_interface::kDefaultHnswEfConstruction,
      QuantizeType quantize_type = QuantizeType::UNDEFINED)
      : VectorIndexParams(IndexType::HNSW, metric_type, quantize_type),
        m_(m),
        ef_construction_(ef_construction) {}

  using OPtr = std::shared_ptr<HnswIndexParams>;

 public:
  Ptr clone() const override {
    return std::make_shared<HnswIndexParams>(metric_type_, m_, ef_construction_,
                                             quantize_type_);
  }

  std::string to_string() const override {
    auto base_str = vector_index_params_to_string("HnswIndexParams",
                                                  metric_type_, quantize_type_);
    std::ostringstream oss;
    oss << base_str << ",m:" << m_ << ",ef_construction:" << ef_construction_
        << "}";
    return oss.str();
  }

  bool operator==(const IndexParams &other) const override {
    return type() == other.type() &&
           metric_type() ==
               static_cast<const HnswIndexParams &>(other).metric_type() &&
           m_ == static_cast<const HnswIndexParams &>(other).m_ &&
           ef_construction_ ==
               static_cast<const HnswIndexParams &>(other).ef_construction_ &&
           quantize_type() ==
               static_cast<const HnswIndexParams &>(other).quantize_type();
  }

  void set_m(int m) {
    m_ = m;
  }
  int m() const {
    return m_;
  }
  void set_ef_construction(int ef_construction) {
    ef_construction_ = ef_construction;
  }
  int ef_construction() const {
    return ef_construction_;
  }

 protected:
  int m_;
  int ef_construction_;
};

class HnswRabitqIndexParams : public VectorIndexParams {
 public:
  HnswRabitqIndexParams(
      MetricType metric_type,
      int total_bits = core_interface::kDefaultRabitqTotalBits,
      int num_clusters = core_interface::kDefaultRabitqNumClusters,
      int m = core_interface::kDefaultHnswNeighborCnt,
      int ef_construction = core_interface::kDefaultHnswEfConstruction,
      int sample_count = 0)
      : VectorIndexParams(IndexType::HNSW_RABITQ, metric_type,
                          QuantizeType::RABITQ),
        total_bits_(total_bits),
        num_clusters_(num_clusters),
        sample_count_(sample_count),
        m_(m),
        ef_construction_(ef_construction) {}

  using OPtr = std::shared_ptr<HnswRabitqIndexParams>;

  Ptr clone() const override {
    auto obj = std::make_shared<HnswRabitqIndexParams>(
        metric_type_, total_bits_, num_clusters_, m_, ef_construction_,
        sample_count_);
    obj->set_rabitq_reformer(rabitq_reformer_);
    obj->set_raw_vector_provider(raw_vector_provider_);
    return obj;
  }

  std::string to_string() const override {
    auto base_str = vector_index_params_to_string("HnswRabitqIndexParams",
                                                  metric_type_, quantize_type_);
    std::ostringstream oss;
    oss << base_str << ",total_bits:" << total_bits_
        << ",num_clusters:" << num_clusters_
        << ",sample_count:" << sample_count_ << ",m:" << m_
        << ",ef_construction:" << ef_construction_ << "}";
    return oss.str();
  }

  bool operator==(const IndexParams &other) const override {
    if (type() != other.type()) {
      return false;
    }
    auto &other_rabitq = dynamic_cast<const HnswRabitqIndexParams &>(other);
    return metric_type() == other_rabitq.metric_type() &&
           quantize_type_ == other_rabitq.quantize_type_ &&
           total_bits_ == other_rabitq.total_bits_ &&
           num_clusters_ == other_rabitq.num_clusters_ &&
           sample_count_ == other_rabitq.sample_count_ &&
           m_ == other_rabitq.m_ &&
           ef_construction_ == other_rabitq.ef_construction_;
  }

  void set_m(int m) {
    m_ = m;
  }
  int m() const {
    return m_;
  }
  void set_ef_construction(int ef_construction) {
    ef_construction_ = ef_construction;
  }
  int ef_construction() const {
    return ef_construction_;
  }

  void set_raw_vector_provider(
      core::IndexProvider::Pointer raw_vector_provider) {
    raw_vector_provider_ = std::move(raw_vector_provider);
  }

  void set_rabitq_reformer(core::IndexReformer::Pointer rabitq_reformer) {
    rabitq_reformer_ = std::move(rabitq_reformer);
  }
  core::IndexReformer::Pointer rabitq_reformer() const {
    return rabitq_reformer_;
  }
  core::IndexProvider::Pointer raw_vector_provider() const {
    return raw_vector_provider_;
  }

  void set_total_bits(int total_bits) {
    total_bits_ = total_bits;
  }
  int total_bits() const {
    return total_bits_;
  }

  void set_num_clusters(int num_clusters) {
    num_clusters_ = num_clusters;
  }
  int num_clusters() const {
    return num_clusters_;
  }

  void set_sample_count(int sample_count) {
    sample_count_ = sample_count;
  }
  int sample_count() const {
    return sample_count_;
  }

 private:
  int total_bits_;
  int num_clusters_;
  int sample_count_;
  int m_;
  int ef_construction_;
  core::IndexProvider::Pointer raw_vector_provider_;
  core::IndexReformer::Pointer rabitq_reformer_;
};

class FlatIndexParams : public VectorIndexParams {
 public:
  FlatIndexParams(MetricType metric_type,
                  QuantizeType quantize_type = QuantizeType::UNDEFINED)
      : VectorIndexParams(IndexType::FLAT, metric_type, quantize_type) {}

  using OPtr = std::shared_ptr<FlatIndexParams>;

 public:
  Ptr clone() const override {
    return std::make_shared<FlatIndexParams>(metric_type_, quantize_type_);
  }

  std::string to_string() const override {
    auto base_str = vector_index_params_to_string("FlatIndexParams",
                                                  metric_type_, quantize_type_);
    std::ostringstream oss;
    oss << base_str << "}";
    return oss.str();
  }

  bool operator==(const IndexParams &other) const override {
    return type() == other.type() &&
           metric_type() ==
               static_cast<const VectorIndexParams &>(other).metric_type() &&
           quantize_type() ==
               static_cast<const VectorIndexParams &>(other).quantize_type();
  }
};

// define default index params
const FlatIndexParams DefaultVectorIndexParams(MetricType::IP);

inline FlatIndexParams MakeDefaultVectorIndexParams(MetricType metric_type) {
  return FlatIndexParams(metric_type);
}

inline FlatIndexParams MakeDefaultQuantVectorIndexParams(
    MetricType metric_type, QuantizeType quantize_type) {
  return FlatIndexParams(metric_type, quantize_type);
}

class IVFIndexParams : public VectorIndexParams {
 public:
  IVFIndexParams(MetricType metric_type, int n_list = 1024, int n_iters = 10,
                 bool use_soar = false,
                 QuantizeType quantize_type = QuantizeType::UNDEFINED)
      : VectorIndexParams(IndexType::IVF, metric_type, quantize_type),
        n_list_(n_list),
        n_iters_(n_iters),
        use_soar_(use_soar) {}

  using OPtr = std::shared_ptr<IVFIndexParams>;

 public:
  Ptr clone() const override {
    return std::make_shared<IVFIndexParams>(metric_type_, n_list_, n_iters_,
                                            use_soar_, quantize_type_);
  }

  std::string to_string() const override {
    auto base_str = vector_index_params_to_string("IVFIndexParams",
                                                  metric_type_, quantize_type_);
    std::ostringstream oss;
    oss << base_str << ",n_list:" << n_list_ << ",n_iters:" << n_iters_ << "}";
    return oss.str();
  }

  int n_list() const {
    return n_list_;
  }

  void set_n_list(int n_list) {
    n_list_ = n_list;
  }

  int n_iters() const {
    return n_iters_;
  }

  void set_n_iters(int n_iters) {
    n_iters_ = n_iters;
  }

  bool use_soar() const {
    return use_soar_;
  }

  void set_use_soar(bool use_soar) {
    use_soar_ = use_soar;
  }

  bool operator==(const IndexParams &other) const override {
    return type() == other.type() &&
           metric_type() ==
               static_cast<const IVFIndexParams &>(other).metric_type() &&
           n_list_ == static_cast<const IVFIndexParams &>(other).n_list_ &&
           n_iters_ == static_cast<const IVFIndexParams &>(other).n_iters_ &&
           use_soar_ == static_cast<const IVFIndexParams &>(other).use_soar_ &&
           quantize_type() ==
               static_cast<const IVFIndexParams &>(other).quantize_type();
  }

 private:
  int n_list_;
  int n_iters_;
  bool use_soar_;
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/options.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>

namespace zvec {

const uint32_t DEFAULT_MAX_BUFFER_SIZE = 64 * 1024 * 1024;  // 128M

struct CollectionOptions {
  bool read_only_{false};
  bool enable_mmap_{true};  // ignnored when load collection
  uint32_t max_buffer_size_{
      DEFAULT_MAX_BUFFER_SIZE};  // ignored when read_only=true

  bool operator==(const CollectionOptions &other) const {
    return read_only_ == other.read_only_ &&
           enable_mmap_ == other.enable_mmap_ &&
           max_buffer_size_ == other.max_buffer_size_;
  }

  bool operator!=(const CollectionOptions &other) const {
    return !(*this == other);
  }

  CollectionOptions() = default;

  CollectionOptions(bool read_only, bool enable_mmap,
                    uint32_t max_buffer_size = DEFAULT_MAX_BUFFER_SIZE)
      : read_only_(read_only),
        enable_mmap_(enable_mmap),
        max_buffer_size_(max_buffer_size) {}
};

struct SegmentOptions {
  bool read_only_;
  bool enable_mmap_;
  uint32_t max_buffer_size_{DEFAULT_MAX_BUFFER_SIZE};
};

struct CreateIndexOptions {
  int concurrency_{0};  // default use config.optimize_thread_pool
};

struct OptimizeOptions {
  int concurrency_{0};
};

struct AddColumnOptions {
  int concurrency_{0};
};

struct AlterColumnOptions {
  int concurrency_{0};
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/query_params.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <memory>
#include <zvec/core/interface/constants.h>
#include <zvec/db/type.h>

namespace zvec {

/*
 * Query Index params
 */
class QueryParams {
 public:
  using Ptr = std::shared_ptr<QueryParams>;

  QueryParams(IndexType type) : type_(type) {}
  virtual ~QueryParams() = default;

  IndexType type() const {
    return type_;
  }

  void set_type(IndexType type) {
    type_ = type;
  }

  float radius() const {
    return radius_;
  }

  void set_radius(float radius) {
    radius_ = radius;
  }

  bool is_linear() const {
    return is_linear_;
  }

  void set_is_linear(bool is_linear) {
    is_linear_ = is_linear;
  }

  void set_is_using_refiner(bool is_using_refiner) {
    is_using_refiner_ = is_using_refiner;
  }
  bool is_using_refiner() const {
    return is_using_refiner_;
  }

 private:
  IndexType type_;
  float radius_{0.0f};
  bool is_linear_{false};

  bool is_using_refiner_{false};
};

class HnswQueryParams : public QueryParams {
 public:
  HnswQueryParams(int ef = core_interface::kDefaultHnswEfSearch,
                  float radius = 0.0f, bool is_linear = false,
                  bool is_using_refiner = false)
      : QueryParams(IndexType::HNSW), ef_(ef) {
    set_radius(radius);
    set_is_linear(is_linear);
    set_is_using_refiner(is_using_refiner);
  }

  virtual ~HnswQueryParams() = default;

  int ef() const {
    return ef_;
  }

  void set_ef(int ef) {
    ef_ = ef;
  }

 private:
  int ef_;
};

class IVFQueryParams : public QueryParams {
 public:
  IVFQueryParams(int nprobe = 10, bool is_using_refiner = false,
                 float scale_factor = 10)
      : QueryParams(IndexType::IVF), nprobe_(nprobe) {
    set_is_using_refiner(is_using_refiner);
    set_scale_factor(scale_factor);
  }

  virtual ~IVFQueryParams() = default;

  int nprobe() const {
    return nprobe_;
  }

  void set_nprobe(int nprobe) {
    nprobe_ = nprobe;
  }

  float scale_factor() const {
    return scale_factor_;
  }

  void set_scale_factor(float scale_factor) {
    scale_factor_ = scale_factor;
  }

 private:
  int nprobe_;
  float scale_factor_{10};
};

class HnswRabitqQueryParams : public QueryParams {
 public:
  HnswRabitqQueryParams(int ef = core_interface::kDefaultHnswEfSearch,
                        float radius = 0.0f, bool is_linear = false,
                        bool is_using_refiner = false)
      : QueryParams(IndexType::HNSW_RABITQ), ef_(ef) {
    set_radius(radius);
    set_is_linear(is_linear);
    set_is_using_refiner(is_using_refiner);
  }

  virtual ~HnswRabitqQueryParams() = default;

  int ef() const {
    return ef_;
  }

  void set_ef(int ef) {
    ef_ = ef;
  }

 private:
  int ef_;
};

class FlatQueryParams : public QueryParams {
 public:
  FlatQueryParams(bool is_using_refiner = false, float scale_factor = 10)
      : QueryParams(IndexType::FLAT) {
    set_is_using_refiner(is_using_refiner);
    set_scale_factor(scale_factor);
  }

  virtual ~FlatQueryParams() = default;

  float scale_factor() const {
    return scale_factor_;
  }

  void set_scale_factor(float scale_factor) {
    scale_factor_ = scale_factor;
  }

 private:
  float scale_factor_{10};
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/schema.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <unordered_map>
#include <zvec/db/index_params.h>
#include <zvec/db/status.h>
#include <zvec/db/type.h>

namespace zvec {

const uint64_t MAX_DOC_COUNT_PER_SEGMENT = 10000000;
const uint64_t MAX_DOC_COUNT_PER_SEGMENT_MIN_THRESHOLD = 1000;

/*
 * Field schema
 */
class FieldSchema {
 public:
  using Ptr = std::shared_ptr<FieldSchema>;

 public:
  FieldSchema() = default;
  FieldSchema(const std::string &name, DataType type)
      : name_(name),
        data_type_(type),
        nullable_(false),
        dimension_(0),
        index_params_(nullptr) {}
  FieldSchema(const std::string &name, DataType type, bool nullable,
              const IndexParams::Ptr &index_params = nullptr)
      : name_(name),
        data_type_(type),
        nullable_(nullable),
        dimension_(0),
        index_params_(index_params ? index_params->clone() : nullptr) {}
  FieldSchema(const std::string &name, DataType type, uint32_t dimension,
              bool nullable, const IndexParams::Ptr &index_params = nullptr)
      : name_(name),
        data_type_(type),
        nullable_(nullable),
        dimension_(dimension),
        index_params_(index_params ? index_params->clone() : nullptr) {}
  FieldSchema(const FieldSchema &other)
      : name_(other.name_),
        data_type_(other.data_type_),
        nullable_(other.nullable_),
        dimension_(other.dimension_),
        index_params_(other.index_params_ ? other.index_params_->clone()
                                          : nullptr) {}
  FieldSchema &operator=(const FieldSchema &other) {
    if (this != &other) {
      name_ = other.name_;
      data_type_ = other.data_type_;
      nullable_ = other.nullable_;
      dimension_ = other.dimension_;
      index_params_ =
          other.index_params_ ? other.index_params_->clone() : nullptr;
    }
    return *this;
  }
  FieldSchema(FieldSchema &&) = default;
  FieldSchema &operator=(FieldSchema &&) = default;
  ;
  ~FieldSchema() = default;

 public:
  bool operator==(const FieldSchema &other) const {
    bool index_params_equal = false;
    if (index_params_ == nullptr && other.index_params_ == nullptr) {
      index_params_equal = true;
    } else if (index_params_ != nullptr && other.index_params_ != nullptr) {
      index_params_equal = (*index_params_ == *(other.index_params_));
    } else {
      index_params_equal = false;
    }

    return name_ == other.name_ && data_type_ == other.data_type_ &&
           nullable_ == other.nullable_ && dimension_ == other.dimension_ &&
           index_params_equal;
  }

  bool operator!=(const FieldSchema &other) const {
    return !(*this == other);
  }

  std::string to_string() const;

  std::string to_string_formatted(int indent_level = 0) const;

 public:
  void set_name(const std::string &name) {
    name_ = name;
  }

  const std::string &name() const {
    return name_;
  }

  void set_data_type(DataType type) {
    data_type_ = type;
  }

  DataType data_type() const {
    return data_type_;
  }

  DataType element_data_type() const {
    return get_element_data_type(data_type_);
  }

  size_t element_data_size() const {
    return get_element_data_size(data_type_);
  }

  bool is_vector_field() const {
    return is_vector_field(data_type_);
  }

  bool is_dense_vector() const {
    return is_dense_vector_field(data_type_);
  }

  bool is_sparse_vector() const {
    return is_sparse_vector_field(data_type_);
  }

  bool nullable() const {
    return nullable_;
  }

  void set_nullable(bool nullable) {
    nullable_ = nullable;
  }

  bool has_invert_index() const {
    return !is_vector_field() && index_params_ != nullptr;
  }

  bool is_array_type() const {
    return data_type_ >= DataType::ARRAY_BINARY &&
           data_type_ <= DataType::ARRAY_DOUBLE;
  }

  void set_dimension(uint32_t dimension) {
    dimension_ = dimension;
  }

  uint32_t dimension() const {
    return dimension_;
  }

  IndexType index_type() const {
    if (index_params_) {
      return index_params_->type();
    }
    return IndexType::UNDEFINED;
  }

  IndexParams::Ptr index_params() const {
    return index_params_;
  }

  void set_index_params(const IndexParams::Ptr &index_params) {
    index_params_ = index_params;
  }

  void set_index_params(const IndexParams &index_params) {
    index_params_ = index_params.clone();
  }

  Status validate() const;

 public:
  static bool is_dense_vector_field(DataType type) {
    return type >= DataType::VECTOR_BINARY32 && type <= DataType::VECTOR_INT16;
  }

  static bool is_sparse_vector_field(DataType type) {
    return type >= DataType::SPARSE_VECTOR_FP16 &&
           type <= DataType::SPARSE_VECTOR_FP32;
  }

  static bool is_vector_field(DataType type) {
    return is_dense_vector_field(type) || is_sparse_vector_field(type);
  }

  static DataType get_element_data_type(DataType data_type) {
    switch (data_type) {
      case DataType::ARRAY_BINARY:
        return DataType::BINARY;
      case DataType::ARRAY_STRING:
        return DataType::STRING;
      case DataType::ARRAY_BOOL:
        return DataType::BOOL;
      case DataType::ARRAY_INT32:
        return DataType::INT32;
      case DataType::ARRAY_INT64:
        return DataType::INT64;
      case DataType::ARRAY_UINT32:
        return DataType::UINT32;
      case DataType::ARRAY_UINT64:
        return DataType::UINT64;
      case DataType::ARRAY_FLOAT:
        return DataType::FLOAT;
      case DataType::ARRAY_DOUBLE:
        return DataType::DOUBLE;
      default:
        return data_type;
    }
  }

  static size_t get_element_data_size(DataType data_type) {
    switch (data_type) {
      case DataType::ARRAY_BINARY:
        return 0;
      case DataType::ARRAY_STRING:
        return 0;
      case DataType::ARRAY_BOOL:
        return sizeof(bool);
      case DataType::ARRAY_INT32:
        return sizeof(int32_t);
      case DataType::ARRAY_INT64:
        return sizeof(int64_t);
      case DataType::ARRAY_UINT32:
        return sizeof(uint32_t);
      case DataType::ARRAY_UINT64:
        return sizeof(uint64_t);
      case DataType::ARRAY_FLOAT:
        return sizeof(float);
      case DataType::ARRAY_DOUBLE:
        return sizeof(double);
      case DataType::BINARY:
        return 0;
      case DataType::STRING:
        return 0;
      case DataType::BOOL:
        return sizeof(bool);
      case DataType::INT32:
        return sizeof(int32_t);
      case DataType::INT64:
        return sizeof(int64_t);
      case DataType::UINT32:
        return sizeof(uint32_t);
      case DataType::UINT64:
        return sizeof(uint64_t);
      case DataType::FLOAT:
        return sizeof(float);
      case DataType::DOUBLE:
        return sizeof(double);
      default:
        return 0;
    }
  }


 private:
  std::string name_;
  DataType data_type_{DataType::UNDEFINED};
  bool nullable_{false};
  uint32_t dimension_{0U};
  IndexParams::Ptr index_params_;
};

using FieldSchemaPtrList = std::vector<FieldSchema::Ptr>;
using FieldSchemaPtrMap = std::unordered_map<std::string, FieldSchema::Ptr>;

/*
 * Collection schema
 */
class CollectionSchema {
 public:
  using Ptr = std::shared_ptr<CollectionSchema>;

 public:
  CollectionSchema() = default;

  CollectionSchema(const std::string &name) : name_(name) {}

  CollectionSchema(const std::string &name, const FieldSchemaPtrList &fields)
      : name_(name) {
    copy_fields(fields);
  }

  CollectionSchema(const CollectionSchema &other) {
    name_ = other.name_;
    copy_fields(other.fields_);
    max_doc_count_per_segment_ = other.max_doc_count_per_segment_;
  }

 public:
  std::string to_string() const;


  std::string to_string_formatted(int indent_level = 0) const;

  std::string name() const {
    return name_;
  }

  void set_name(const std::string &name) {
    name_ = name;
  }

  Status add_field(FieldSchema::Ptr column_schema);

  Status alter_field(const std::string &column_name,
                     const FieldSchema::Ptr &new_column_options);

  Status drop_field(const std::string &column_name);

  bool has_field(const std::string &column) const;

  const FieldSchema *get_field(const std::string &column) const;
  FieldSchema *get_field(const std::string &column);
  const FieldSchema *get_forward_field(const std::string &column) const;
  FieldSchema *get_forward_field(const std::string &column);
  const FieldSchema *get_vector_field(const std::string &column) const;
  FieldSchema *get_vector_field(const std::string &column);

  FieldSchemaPtrList fields() const;

  FieldSchemaPtrList forward_fields() const;

  FieldSchemaPtrList forward_fields_with_index() const;

  std::vector<std::string> forward_field_names() const;

  std::vector<std::string> forward_field_names_with_index() const;

  std::vector<std::string> all_field_names() const;

  FieldSchemaPtrList vector_fields() const;

  uint64_t max_doc_count_per_segment() const;

  void set_max_doc_count_per_segment(uint64_t max_doc_count_per_segment);

  Status validate() const;

 public:
  Status add_index(const std::string &column,
                   const IndexParams::Ptr &index_options);

  Status drop_index(const std::string &column);

  bool has_index(const std::string &column) const;

 public:
  bool operator==(const CollectionSchema &other) const {
    if (name_ != other.name_ || fields_.size() != other.fields_.size()) {
      return false;
    }

    for (size_t i = 0; i < fields_.size(); ++i) {
      if (*fields_[i] != *other.fields_[i]) {
        return false;
      }
    }

    return true;
  }

  bool operator!=(const CollectionSchema &other) const {
    return !(*this == other);
  }

 private:
  void copy_fields(const FieldSchemaPtrList &fields) {
    for (auto &field : fields) {
      auto c = std::make_shared<FieldSchema>(*field);
      fields_.push_back(c);
      fields_map_[field->name()] = c;
    }
  }

 private:
  std::string name_{};
  FieldSchemaPtrList fields_{};
  FieldSchemaPtrMap fields_map_{};

  uint64_t max_doc_count_per_segment_{MAX_DOC_COUNT_PER_SEGMENT};
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/stats.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <string>
#include <unordered_map>

namespace zvec {

/*
 * Collection stats
 */
struct CollectionStats {
  uint64_t doc_count{0};
  // column -> completeness
  std::unordered_map<std::string, float> index_completeness;

  std::string to_string() const;

  std::string to_string_formatted(int indent_level = 0) const;
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/status.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <string>
#include <zvec/ailego/pattern/expected.hpp>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec {

class Status;
template <typename T>
using Result = tl::expected<T, Status>;

std::ostream &operator<<(std::ostream &os, const Status &s);

/**
 * @brief Enumeration of common error codes.
 */
enum class StatusCode {
  OK = 0,
  NOT_FOUND,
  ALREADY_EXISTS,
  INVALID_ARGUMENT,
  PERMISSION_DENIED,
  FAILED_PRECONDITION,
  RESOURCE_EXHAUSTED,
  UNAVAILABLE,
  INTERNAL_ERROR,
  NOT_SUPPORTED,
  UNKNOWN
};

// Helper: get default message for code
const char *GetDefaultMessage(StatusCode code);

/**
 * @class Status
 * @brief Represents the result of an operation: success or failure with
 * message.
 *
 * This class is used to return error information from functions without using
 * exceptions. It stores a status code and an optional error message.
 *
 * @note This class is thread-compatible: const methods can be called from
 * multiple threads.
 */
class Status {
 public:
  /// @brief Default constructor: OK status
  Status() noexcept : code_(StatusCode::OK) {}

  /// @brief Construct a failed status with code and message
  Status(StatusCode code, const std::string &msg) : code_(code), msg_(msg) {
    ensure_not_ok(code);
  }

  /// @brief Construct a failed status with code and rvalue message
  Status(StatusCode code, std::string &&msg)
      : code_(code), msg_(std::move(msg)) {
    ensure_not_ok(code);
  }

  /// @brief Copy constructor
  Status(const Status &) = default;

  /// @brief Copy assignment
  Status &operator=(const Status &) = default;

  /// @brief Move constructor
  Status(Status &&) = default;

  /// @brief Move assignment
  Status &operator=(Status &&) = default;

  /// @brief Destructor
  ~Status() = default;

  /// @brief Check if the status is OK (no error)
  bool ok() const noexcept {
    return code_ == StatusCode::OK;
  }

  /// @brief Get the status code
  StatusCode code() const noexcept {
    return code_;
  }

  /// @brief Get the error message (empty if OK)
  const std::string &message() const noexcept {
    return msg_;
  }

  /// @brief Get C-style string (safe because msg_ owns the string)
  const char *c_str() const noexcept {
    return msg_.c_str();
  }

  /// @brief Comparison operators
  bool operator==(const Status &other) const noexcept;
  bool operator!=(const Status &other) const noexcept {
    return !(*this == other);
  }

  /// @brief Factory: Success
  static Status OK() noexcept {
    return Status();
  }

  /// @brief Factory: Invalid argument
  template <typename... Args>
  static Status InvalidArgument(Args &&...args) {
    return Status(StatusCode::INVALID_ARGUMENT,
                  concat(std::forward<Args>(args)...));
  }

  /// @brief Factory: Not found
  template <typename... Args>
  static Status NotFound(Args &&...args) {
    return Status(StatusCode::NOT_FOUND, concat(std::forward<Args>(args)...));
  }

  /// @brief Factory: Already exists
  template <typename... Args>
  static Status AlreadyExists(Args &&...args) {
    return Status(StatusCode::ALREADY_EXISTS,
                  concat(std::forward<Args>(args)...));
  }

  /// @brief Factory: Internal error
  template <typename... Args>
  static Status InternalError(Args &&...args) {
    return Status(StatusCode::INTERNAL_ERROR,
                  concat(std::forward<Args>(args)...));
  }

  /// @brief Factory: Permission denied
  template <typename... Args>
  static Status PermissionDenied(Args &&...args) {
    return Status(StatusCode::PERMISSION_DENIED,
                  concat(std::forward<Args>(args)...));
  }

  /// @brief Factory: Not supported
  template <typename... Args>
  static Status NotSupported(Args &&...args) {
    return Status(StatusCode::NOT_SUPPORTED,
                  concat(std::forward<Args>(args)...));
  }

  // Add more factories as needed...

 private:
  /// @brief Ensure non-OK status has non-empty message (optional)
  static void ensure_not_ok(StatusCode /*code*/) noexcept {
    // Optional: assert(code == StatusCode::OK || "non-OK status should have
    // message")
  }

  /// @brief Helper: concatenate any number of arguments into a string
  template <typename... Args>
  static std::string concat(Args &&...args) {
    return ailego::StringHelper::Concat(std::forward<Args>(args)...);
  }

  StatusCode code_;
  std::string msg_;
};

}  // namespace zvec

================================================
FILE: src/include/zvec/db/type.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>

namespace zvec {

/*
 * Column index types
 */
enum class IndexType : uint32_t {
  UNDEFINED = 0,
  HNSW = 1,
  IVF = 2,
  FLAT = 3,
  HNSW_RABITQ = 4,
  INVERT = 10,
};

/*
 * Column data types
 */
enum class DataType : uint32_t {
  UNDEFINED = 0,

  BINARY = 1,
  STRING = 2,
  BOOL = 3,
  INT32 = 4,
  INT64 = 5,
  UINT32 = 6,
  UINT64 = 7,
  FLOAT = 8,
  DOUBLE = 9,

  VECTOR_BINARY32 = 20,
  VECTOR_BINARY64 = 21,
  VECTOR_FP16 = 22,
  VECTOR_FP32 = 23,
  VECTOR_FP64 = 24,
  VECTOR_INT4 = 25,
  VECTOR_INT8 = 26,
  VECTOR_INT16 = 27,

  SPARSE_VECTOR_FP16 = 30,
  SPARSE_VECTOR_FP32 = 31,

  ARRAY_BINARY = 40,
  ARRAY_STRING = 41,
  ARRAY_BOOL = 42,
  ARRAY_INT32 = 43,
  ARRAY_INT64 = 44,
  ARRAY_UINT32 = 45,
  ARRAY_UINT64 = 46,
  ARRAY_FLOAT = 47,
  ARRAY_DOUBLE = 48,
};

enum class QuantizeType : uint32_t {
  UNDEFINED = 0,
  FP16 = 1,
  INT8 = 2,
  INT4 = 3,
  RABITQ = 4,
};

enum class MetricType : uint32_t {
  UNDEFINED = 0,
  L2 = 1,
  IP = 2,
  COSINE = 3,
  MIPSL2 = 4,
};

enum class Operator : uint32_t {
  INSERT = 0,
  UPSERT = 1,
  UPDATE = 2,
  DELETE = 3,
};

enum class CompareOp : uint32_t {
  NONE = 0,
  EQ,
  NE,
  LT,
  LE,
  GT,
  GE,
  LIKE,
  CONTAIN_ALL,
  CONTAIN_ANY,
  NOT_CONTAIN_ALL,
  NOT_CONTAIN_ANY,
  IS_NULL,
  IS_NOT_NULL,
  HAS_PREFIX,
  HAS_SUFFIX,
};

enum RelationOp : uint32_t {
  NONE = 0,

  AND = 1,
  OR = 2
};

enum BlockType : uint32_t {
  UNDEFINED = 0,
  SCALAR = 1,
  SCALAR_INDEX = 2,
  VECTOR_INDEX = 3,
  VECTOR_INDEX_QUANTIZE = 4,
};


enum class FileFormat : uint32_t {
  UNKNOWN = 0,
  IPC = 1,
  PARQUET = 2,
};

enum class ColumnOp : uint32_t {
  UNDEFINED = 0,
  ADD,
  ALTER,
  DROP,
};

}  // namespace zvec


================================================
FILE: src/include/zvec/turbo/turbo.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <functional>
#include <zvec/ailego/math_batch/utils.h>

namespace zvec::turbo {

using DistanceFunc =
    std::function<void(const void *m, const void *q, size_t dim, float *out)>;
using BatchDistanceFunc = std::function<void(
    const void **m, const void *q, size_t num, size_t dim, float *out)>;
using QueryPreprocessFunc =
    zvec::ailego::DistanceBatch::DistanceBatchQueryPreprocessFunc;

enum class MetricType {
  kSquaredEuclidean,
  kCosine,
  kMipsSquaredEuclidean,
  kUnknown,
};

enum class DataType {
  kInt8,
  kUnknown,
};

enum class QuantizeType {
  kDefault,
};

DistanceFunc get_distance_func(MetricType metric_type, DataType data_type,
                               QuantizeType quantize_type);

BatchDistanceFunc get_batch_distance_func(MetricType metric_type,
                                          DataType data_type,
                                          QuantizeType quantize_type);

QueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,
                                              DataType data_type,
                                              QuantizeType quantize_type);

}  // namespace zvec::turbo


================================================
FILE: src/turbo/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(NOT ANDROID AND AUTO_DETECT_ARCH)
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
        setup_compiler_march_for_x86(TURBO_MARCH_FLAG_SSE TURBO_MARCH_FLAG_AVX2 TURBO_MARCH_FLAG_AVX512 TURBO_MARCH_FLAG_AVX512FP16)
    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|ARM64")
        # ARM64 architecture - no special march flags needed for now
        # NEON implementations can be added here if needed
        message(STATUS "turbo: ARM64 detected, skipping x86-specific optimizations")
    endif()
endif()

file(GLOB_RECURSE ALL_SRCS *.cc *.c *.h)

# Set per-file compile flags for AVX512-VNNI sources.
# set_source_files_properties is directory-scoped, so it must be called in the
# same directory that adds the sources to a target (i.e. here, not in a
# subdirectory).
if(NOT ANDROID AND AUTO_DETECT_ARCH)
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386|x64")
        file(GLOB_RECURSE AVX512_VNNI_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/avx512_vnni/*.cc)
        set_source_files_properties(
            ${AVX512_VNNI_SRCS}
            PROPERTIES
            COMPILE_FLAGS "${TURBO_MARCH_FLAG_AVX512}"
        )
    endif()
endif()

cc_library(
    NAME zvec_turbo STATIC STRICT PACKED
    SRCS ${ALL_SRCS}
    LIBS zvec_ailego
    INCS ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_ROOT_DIR}/src/include
)


================================================
FILE: src/turbo/avx512_vnni/record_quantized_int8/common.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Shared AVX512-VNNI inner product kernels for record_quantized_int8 distance
// implementations (cosine, l2, mips_l2, etc.).
//
// All functions are marked always_inline so that when this header is included
// from a per-file-march .cc translation unit, the compiler can fully inline
// and optimize them under the correct -march flag without any cross-TU call
// overhead.

#pragma once

#if defined(__AVX512VNNI__)
#include <immintrin.h>
#include <array>
#include <cstdint>

namespace zvec::turbo::avx512_vnni::internal {

static inline int32_t HorizontalAdd_INT32_V256(__m256i v) {
  __m256i x1 = _mm256_hadd_epi32(v, v);
  __m256i x2 = _mm256_hadd_epi32(x1, x1);
  __m128i x3 = _mm256_extractf128_si256(x2, 1);
  __m128i x4 = _mm_add_epi32(_mm256_castsi256_si128(x2), x3);
  return _mm_cvtsi128_si32(x4);
}

#define FMA_INT8_GENERAL(m, q, sum) sum += static_cast<float>(m * q);

// Compute the raw integer inner product of two int8 vectors of length `size`.
// The result is written to `*distance` as a float.
// Both `a` and `b` must point to int8_t arrays.
static __attribute__((always_inline)) void ip_int8_avx512_vnni(
    const void *a, const void *b, size_t size, float *distance) {
  const __m256i ONES_INT16_AVX = _mm256_set1_epi32(0x00010001);
  const __m128i ONES_INT16_SSE = _mm_set1_epi32(0x00010001);

  const int8_t *lhs = reinterpret_cast<const int8_t *>(a);
  const int8_t *rhs = reinterpret_cast<const int8_t *>(b);

  const int8_t *last = lhs + size;
  const int8_t *last_aligned = lhs + ((size >> 6) << 6);

  float result = 0.0f;

  __m256i ymm_sum_0 = _mm256_setzero_si256();
  __m256i ymm_sum_1 = _mm256_setzero_si256();

  if (((uintptr_t)lhs & 0x1f) == 0 && ((uintptr_t)rhs & 0x1f) == 0) {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_load_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_load_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_load_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_load_si256((const __m256i *)(rhs + 32));

      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);
      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);
      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);
      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);

      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),
                            ONES_INT16_AVX),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),
                            ONES_INT16_AVX),
          ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_load_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_load_si256((const __m256i *)rhs);
      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);
      ymm_rhs = _mm256_abs_epi8(ymm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),
                            ONES_INT16_AVX),
          ymm_sum_0);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_load_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_load_si128((const __m128i *)rhs);
      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(),
                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),
                                          ONES_INT16_SSE)),
          ymm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  } else {
    for (; lhs != last_aligned; lhs += 64, rhs += 64) {
      __m256i ymm_lhs_0 = _mm256_loadu_si256((const __m256i *)(lhs + 0));
      __m256i ymm_lhs_1 = _mm256_loadu_si256((const __m256i *)(lhs + 32));
      __m256i ymm_rhs_0 = _mm256_loadu_si256((const __m256i *)(rhs + 0));
      __m256i ymm_rhs_1 = _mm256_loadu_si256((const __m256i *)(rhs + 32));

      ymm_lhs_0 = _mm256_sign_epi8(ymm_lhs_0, ymm_rhs_0);
      ymm_lhs_1 = _mm256_sign_epi8(ymm_lhs_1, ymm_rhs_1);
      ymm_rhs_0 = _mm256_abs_epi8(ymm_rhs_0);
      ymm_rhs_1 = _mm256_abs_epi8(ymm_rhs_1);

      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_0, ymm_lhs_0),
                            ONES_INT16_AVX),
          ymm_sum_0);
      ymm_sum_1 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs_1, ymm_lhs_1),
                            ONES_INT16_AVX),
          ymm_sum_1);
    }

    if (last >= last_aligned + 32) {
      __m256i ymm_lhs = _mm256_loadu_si256((const __m256i *)lhs);
      __m256i ymm_rhs = _mm256_loadu_si256((const __m256i *)rhs);
      ymm_lhs = _mm256_sign_epi8(ymm_lhs, ymm_rhs);
      ymm_rhs = _mm256_abs_epi8(ymm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_madd_epi16(_mm256_maddubs_epi16(ymm_rhs, ymm_lhs),
                            ONES_INT16_AVX),
          ymm_sum_0);
      lhs += 32;
      rhs += 32;
    }

    if (last >= lhs + 16) {
      __m128i xmm_lhs = _mm_loadu_si128((const __m128i *)lhs);
      __m128i xmm_rhs = _mm_loadu_si128((const __m128i *)rhs);
      xmm_lhs = _mm_sign_epi8(xmm_lhs, xmm_rhs);
      xmm_rhs = _mm_abs_epi8(xmm_rhs);
      ymm_sum_0 = _mm256_add_epi32(
          _mm256_set_m128i(_mm_setzero_si128(),
                           _mm_madd_epi16(_mm_maddubs_epi16(xmm_rhs, xmm_lhs),
                                          ONES_INT16_SSE)),
          ymm_sum_0);
      lhs += 16;
      rhs += 16;
    }
  }
  result = static_cast<float>(
      HorizontalAdd_INT32_V256(_mm256_add_epi32(ymm_sum_0, ymm_sum_1)));

  switch (last - lhs) {
    case 15:
      FMA_INT8_GENERAL(lhs[14], rhs[14], result)
      /* FALLTHRU */
    case 14:
      FMA_INT8_GENERAL(lhs[13], rhs[13], result)
      /* FALLTHRU */
    case 13:
      FMA_INT8_GENERAL(lhs[12], rhs[12], result)
      /* FALLTHRU */
    case 12:
      FMA_INT8_GENERAL(lhs[11], rhs[11], result)
      /* FALLTHRU */
    case 11:
      FMA_INT8_GENERAL(lhs[10], rhs[10], result)
      /* FALLTHRU */
    case 10:
      FMA_INT8_GENERAL(lhs[9], rhs[9], result)
      /* FALLTHRU */
    case 9:
      FMA_INT8_GENERAL(lhs[8], rhs[8], result)
      /* FALLTHRU */
    case 8:
      FMA_INT8_GENERAL(lhs[7], rhs[7], result)
      /* FALLTHRU */
    case 7:
      FMA_INT8_GENERAL(lhs[6], rhs[6], result)
      /* FALLTHRU */
    case 6:
      FMA_INT8_GENERAL(lhs[5], rhs[5], result)
      /* FALLTHRU */
    case 5:
      FMA_INT8_GENERAL(lhs[4], rhs[4], result)
      /* FALLTHRU */
    case 4:
      FMA_INT8_GENERAL(lhs[3], rhs[3], result)
      /* FALLTHRU */
    case 3:
      FMA_INT8_GENERAL(lhs[2], rhs[2], result)
      /* FALLTHRU */
    case 2:
      FMA_INT8_GENERAL(lhs[1], rhs[1], result)
      /* FALLTHRU */
    case 1:
      FMA_INT8_GENERAL(lhs[0], rhs[0], result)
  }
  *distance = result;
}

#undef FMA_INT8_GENERAL

// Shift the first `original_dim` bytes of `query` in-place from int8 to uint8
// by adding 128 to each element. The metadata tail beyond `original_dim` is
// left untouched. This prepares the query for use with dpbusd (uint8 * int8).
static __attribute__((always_inline)) void shift_int8_to_uint8_avx512(
    void *query, size_t original_dim) {
  const int8_t *input = reinterpret_cast<const int8_t *>(query);
  uint8_t *output = reinterpret_cast<uint8_t *>(query);

  // 128 represented as int8_t wraps to -128, but two's complement addition
  // produces the correct uint8 result.
  const __m512i offset = _mm512_set1_epi8(static_cast<int8_t>(128));

  size_t i = 0;
  for (; i + 64 <= original_dim; i += 64) {
    __m512i data =
        _mm512_loadu_si512(reinterpret_cast<const __m512i *>(input + i));
    __m512i shifted = _mm512_add_epi8(data, offset);
    _mm512_storeu_si512(reinterpret_cast<__m512i *>(output + i), shifted);
  }
  for (; i < original_dim; ++i) {
    output[i] = static_cast<uint8_t>(static_cast<int>(input[i]) + 128);
  }
}

// Compute raw integer inner products for a batch of int8 vectors against a
// single query. Uses AVX512-VNNI dpbusd instruction.
// `query` is treated as uint8 (preprocessed), `vectors[i]` as int8.
template <size_t batch_size>
__attribute__((always_inline)) void ip_int8_batch_avx512_vnni_impl(
    const void *query, const void *const *vectors,
    const std::array<const void *, batch_size> &prefetch_ptrs,
    size_t dimensionality, float *distances) {
  __m512i accs[batch_size];
  for (size_t i = 0; i < batch_size; ++i) {
    accs[i] = _mm512_setzero_si512();
  }
  size_t dim = 0;
  for (; dim + 64 <= dimensionality; dim += 64) {
    __m512i q = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(
        reinterpret_cast<const int8_t *>(query) + dim));
    __m512i data_regs[batch_size];
    for (size_t i = 0; i < batch_size; ++i) {
      data_regs[i] = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(
          reinterpret_cast<const int8_t *>(vectors[i]) + dim));
    }
    for (size_t i = 0; i < batch_size; ++i) {
      if (prefetch_ptrs[i]) {
        _mm_prefetch(
            reinterpret_cast<const char *>(
                reinterpret_cast<const int8_t *>(prefetch_ptrs[i]) + dim),
            _MM_HINT_T0);
      }
      accs[i] = _mm512_dpbusd_epi32(accs[i], q, data_regs[i]);
    }
  }
  std::array<int, batch_size> temp_results{};
  for (size_t i = 0; i < batch_size; ++i) {
    temp_results[i] = _mm512_reduce_add_epi32(accs[i]);
  }
  for (; dim < dimensionality; ++dim) {
    int q = static_cast<int>(reinterpret_cast<const uint8_t *>(query)[dim]);
    for (size_t i = 0; i < batch_size; ++i) {
      temp_results[i] +=
          q *
          static_cast<int>(reinterpret_cast<const int8_t *>(vectors[i])[dim]);
    }
  }
  for (size_t i = 0; i < batch_size; ++i) {
    distances[i] = static_cast<float>(temp_results[i]);
  }
}

// Dispatch batched inner product over all `n` vectors with prefetching.
static __attribute__((always_inline)) void ip_int8_batch_avx512_vnni(
    const void *const *vectors, const void *query, size_t n, size_t dim,
    float *distances) {
  static constexpr size_t batch_size = 2;
  static constexpr size_t prefetch_step = 2;
  size_t i = 0;
  for (; i + batch_size <= n; i += batch_size) {
    std::array<const void *, batch_size> prefetch_ptrs;
    for (size_t j = 0; j < batch_size; ++j) {
      if (i + j + batch_size * prefetch_step < n) {
        prefetch_ptrs[j] = vectors[i + j + batch_size * prefetch_step];
      } else {
        prefetch_ptrs[j] = nullptr;
      }
    }
    ip_int8_batch_avx512_vnni_impl<batch_size>(
        query, &vectors[i], prefetch_ptrs, dim, distances + i);
  }
  for (; i < n; i++) {
    std::array<const void *, 1> prefetch_ptrs{nullptr};
    ip_int8_batch_avx512_vnni_impl<1>(query, &vectors[i], prefetch_ptrs, dim,
                                      distances + i);
  }
}

}  // namespace zvec::turbo::avx512_vnni::internal

#endif  // defined(__AVX512VNNI__)


================================================
FILE: src/turbo/avx512_vnni/record_quantized_int8/cosine.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This file is compiled with per-file -march=avx512vnni (set in CMakeLists.txt)
// so that all AVX512-VNNI intrinsics and the inlined inner product kernels from
// common.h are compiled with the correct target ISA.

#include "avx512_vnni/record_quantized_int8/cosine.h"
#include "avx512_vnni/record_quantized_int8/common.h"
#if defined(__AVX512VNNI__)
#include <immintrin.h>
#endif

// Tail layout for quantized INT8 cosine vectors:
//
//   [ original_dim bytes: int8_t elements ]
//   [ float scale_a       ]  (ma)
//   [ float bias_a        ]  (mb)
//   [ float sum_a         ]  (ms)
//   [ float square_sum_a  ]  (ms2)
//   [ int  int8_sum  ]  (sum of raw int8 elements, used when query is
//                        preprocessed to uint8 via +128 shift)
//
// The query tail has the same layout (qa, qb, qs, qs2) without int8_sum.

namespace zvec::turbo::avx512_vnni {

void cosine_int8_distance(const void *a, const void *b, size_t dim,
                          float *distance) {
#if defined(__AVX512VNNI__)
  // `dim` is the full encoded size; the original vector occupies dim-24 bytes.
  const int original_dim = dim - 24;
  if (original_dim <= 0) {
    return;
  }

  // Compute raw integer inner product over the original_dim bytes.
  // Note: for the single-vector path there is no query preprocessing, so both
  // sides are treated as int8_t (same as the non-preprocessed path in
  // MinusInnerProductDistanceBatchWithScoreUnquantized<int8_t>).
  internal::ip_int8_avx512_vnni(a, b, original_dim, distance);

  const float *a_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(a) + original_dim);
  const float *b_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(b) + original_dim);

  float ma = a_tail[0];
  float mb = a_tail[1];
  float ms = a_tail[2];

  float qa = b_tail[0];
  float qb = b_tail[1];
  float qs = b_tail[2];

  // Dequantize and compute cosine distance:
  //   cosine_dist = -(ma * qa * ip + mb * qa * qs + qb * ma * ms
  //                   + original_dim * qb * mb)
  *distance = -(ma * qa * *distance + mb * qa * qs + qb * ma * ms +
                static_cast<float>(original_dim) * qb * mb);
#else
  (void)a;
  (void)b;
  (void)dim;
  (void)distance;
#endif
}

void cosine_int8_batch_distance(const void *const *vectors, const void *query,
                                size_t n, size_t dim, float *distances) {
#if defined(__AVX512VNNI__)
  // `dim` is the full encoded size; the original vector occupies dim-24 bytes.
  const int original_dim = dim - 24;
  if (original_dim <= 0) {
    return;
  }

  // Compute raw inner products for all vectors. The query has been preprocessed
  // (int8 + 128 -> uint8) so dpbusd can be used via ip_int8_batch_avx512_vnni.
  internal::ip_int8_batch_avx512_vnni(vectors, query, n, original_dim,
                                      distances);

  const float *q_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(query) + original_dim);
  float qa = q_tail[0];
  float qb = q_tail[1];
  float qs = q_tail[2];

  for (int i = 0; i < n; ++i) {
    const float *m_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const int8_t *>(vectors[i]) + original_dim);
    float ma = m_tail[0];
    float mb = m_tail[1];
    float ms = m_tail[2];
    // Correct for the +128 shift applied to the query during preprocessing:
    //   dpbusd computes sum(uint8_query[i] * int8_data[i])
    //         = sum((int8_query[i] + 128) * int8_data[i])
    //         = true_ip + 128 * sum(int8_data[i])
    // int8_sum is stored as the 5th int-sized field after the 4 floats.
    int int8_sum = reinterpret_cast<const int *>(m_tail)[4];
    float &result = distances[i];
    result -= 128.0f * static_cast<float>(int8_sum);

    // Dequantize and compute cosine distance:
    //   cosine_dist = -(ma * qa * ip + mb * qa * qs + qb * ma * ms
    //                   + original_dim * qb * mb)
    result = -(ma * qa * result + mb * qa * qs + qb * ma * ms +
               static_cast<float>(original_dim) * qb * mb);
  }
#else
  (void)vectors;
  (void)query;
  (void)n;
  (void)dim;
  (void)distances;
#endif
}

void cosine_int8_query_preprocess(void *query, size_t dim) {
#if defined(__AVX512VNNI__)
  // The original vector occupies dim-24 bytes; only those bytes are shifted.
  const int original_dim = static_cast<int>(dim) - 24;
  if (original_dim <= 0) {
    return;
  }
  internal::shift_int8_to_uint8_avx512(query, original_dim);
#else
  (void)query;
  (void)dim;
#endif
}

}  // namespace zvec::turbo::avx512_vnni


================================================
FILE: src/turbo/avx512_vnni/record_quantized_int8/cosine.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstddef>

namespace zvec::turbo::avx512_vnni {

// Compute cosine distance (negative inner product after normalization) between
// a single quantized INT8 vector pair.
// `dim` includes the original vector bytes plus a 24-byte metadata tail
// (3 floats: scale_a, bias_a, sum_a).
void cosine_int8_distance(const void *a, const void *b, size_t dim,
                          float *distance);

// Batch version of cosine_int8_distance.
// The query must have been preprocessed by cosine_int8_query_preprocess
// (int8 -> uint8 via +128 shift) before calling this function.
void cosine_int8_batch_distance(const void *const *vectors, const void *query,
                                size_t n, size_t dim, float *distances);

// Preprocess the query vector in-place (shift int8 -> uint8 by adding 128)
// so that the AVX512-VNNI dpbusd instruction can be used for inner product.
// `dim` includes the 24-byte metadata tail.
void cosine_int8_query_preprocess(void *query, size_t dim);

}  // namespace zvec::turbo::avx512_vnni


================================================
FILE: src/turbo/avx512_vnni/record_quantized_int8/squared_euclidean.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// This file is compiled with per-file -march=avx512vnni (set in CMakeLists.txt)
// so that all AVX512-VNNI intrinsics and the inlined inner product kernels from
// common.h are compiled with the correct target ISA.

#include "avx512_vnni/record_quantized_int8/squared_euclidean.h"
#include "avx512_vnni/record_quantized_int8/common.h"
#if defined(__AVX512VNNI__)
#include <immintrin.h>
#endif

// Tail layout for quantized INT8 squared Euclidean vectors:
//
//   [ original_dim bytes: int8_t elements ]
//   [ float scale_a  ]  (ma)
//   [ float bias_a   ]  (mb)
//   [ float sum_a    ]  (ms)
//   [ float sum2_a   ]  (ms2)
//   [ int  int8_sum  ]  (sum of raw int8 elements, used for bias correction
//                        when the query has been shifted to uint8 via +128)
//
// Total tail size: 4 floats + 1 int = 20 bytes, so dim = original_dim + 20.

namespace zvec::turbo::avx512_vnni {

void squared_euclidean_int8_distance(const void *a, const void *b, size_t dim,
                                     float *distance) {
#if defined(__AVX512VNNI__)
  const int original_dim = dim - 20;
  if (original_dim <= 0) {
    return;
  }
  internal::ip_int8_avx512_vnni(a, b, original_dim, distance);

  const float *a_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(a) + original_dim);
  const float *b_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(b) + original_dim);

  float ma = a_tail[0];
  float mb = a_tail[1];
  float ms = a_tail[2];
  float ms2 = a_tail[3];

  float qa = b_tail[0];
  float qb = b_tail[1];
  float qs = b_tail[2];
  float qs2 = b_tail[3];

  const float sum = qa * qs;
  const float sum2 = qa * qa * qs2;

  *distance = ma * ma * ms2 + sum2 - 2 * ma * qa * *distance +
              (mb - qb) * (mb - qb) * original_dim +
              2 * (mb - qb) * (ms * ma - sum);
#else
  (void)a;
  (void)b;
  (void)dim;
  (void)distance;
#endif
}

void squared_euclidean_int8_batch_distance(const void *const *vectors,
                                           const void *query, size_t n,
                                           size_t dim, float *distances) {
#if defined(__AVX512VNNI__)
  const int original_dim = dim - 20;
  if (original_dim <= 0) {
    return;
  }

  internal::ip_int8_batch_avx512_vnni(vectors, query, n, original_dim,
                                      distances);
  const float *q_tail = reinterpret_cast<const float *>(
      reinterpret_cast<const int8_t *>(query) + original_dim);
  float qa = q_tail[0];
  float qb = q_tail[1];
  float qs = q_tail[2];
  float qs2 = q_tail[3];

  const float sum = qa * qs;
  const float sum2 = qa * qa * qs2;
  for (size_t i = 0; i < n; ++i) {
    const float *m_tail = reinterpret_cast<const float *>(
        reinterpret_cast<const int8_t *>(vectors[i]) + original_dim);
    float ma = m_tail[0];
    float mb = m_tail[1];
    float ms = m_tail[2];
    float ms2 = m_tail[3];
    // Correct for the +128 shift applied to the query during preprocessing:
    //   dpbusd computes sum(uint8_query[i] * int8_data[i])
    //         = sum((int8_query[i] + 128) * int8_data[i])
    //         = true_ip + 128 * sum(int8_data[i])
    // int8_sum is stored as the 5th int-sized field after the 4 floats.
    int int8_sum = reinterpret_cast<const int *>(m_tail)[4];
    float &result = distances[i];
    result -= 128.0f * static_cast<float>(int8_sum);
    result = ma * ma * ms2 + sum2 - 2 * ma * qa * result +
             (mb - qb) * (mb - qb) * original_dim +
             2 * (mb - qb) * (ms * ma - sum);
  }
#else
  (void)vectors;
  (void)query;
  (void)n;
  (void)dim;
  (void)distances;
#endif
}

void squared_euclidean_int8_query_preprocess(void *query, size_t dim) {
#if defined(__AVX512VNNI__)
  const int original_dim = static_cast<int>(dim) - 20;
  if (original_dim <= 0) {
    return;
  }
  internal::shift_int8_to_uint8_avx512(query, original_dim);
#else
  (void)query;
  (void)dim;
#endif
}

}  // namespace zvec::turbo::avx512_vnni


================================================
FILE: src/turbo/avx512_vnni/record_quantized_int8/squared_euclidean.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstddef>

namespace zvec::turbo::avx512_vnni {

// Compute squared Euclidean distance between a single quantized INT8
// vector pair.
// `dim` includes the original vector bytes plus a 20-byte metadata tail
// (4 floats: scale_a, bias_a, sum_a, sum2_a).
void squared_euclidean_int8_distance(const void *a, const void *b, size_t dim,
                                     float *distance);

// Batch version of squared_euclidean_int8_distance.
// The query must have been preprocessed by
// squared_euclidean_int8_query_preprocess (int8 -> uint8 via +128 shift)
// before calling this function.
void squared_euclidean_int8_batch_distance(const void *const *vectors,
                                           const void *query, size_t n,
                                           size_t dim, float *distances);

// Preprocess the query vector in-place (shift int8 -> uint8 by adding 128)
// for the batch path. Only the original_dim bytes are shifted; the metadata
// tail is left intact. `dim` includes the 20-byte metadata tail.
void squared_euclidean_int8_query_preprocess(void *query, size_t dim);

}  // namespace zvec::turbo::avx512_vnni


================================================
FILE: src/turbo/turbo.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include <zvec/turbo/turbo.h>
#include "avx512_vnni/record_quantized_int8/cosine.h"
#include "avx512_vnni/record_quantized_int8/squared_euclidean.h"

namespace zvec::turbo {

DistanceFunc get_distance_func(MetricType metric_type, DataType data_type,
                               QuantizeType quantize_type) {
  if (data_type == DataType::kInt8) {
    if (quantize_type == QuantizeType::kDefault) {
      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
        if (metric_type == MetricType::kSquaredEuclidean) {
          return avx512_vnni::squared_euclidean_int8_distance;
        }
        if (metric_type == MetricType::kCosine) {
          return avx512_vnni::cosine_int8_distance;
        }
      }
    }
  }
  return nullptr;
}

BatchDistanceFunc get_batch_distance_func(MetricType metric_type,
                                          DataType data_type,
                                          QuantizeType quantize_type) {
  if (data_type == DataType::kInt8) {
    if (quantize_type == QuantizeType::kDefault) {
      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
        if (metric_type == MetricType::kSquaredEuclidean) {
          return avx512_vnni::squared_euclidean_int8_batch_distance;
        }
        if (metric_type == MetricType::kCosine) {
          return avx512_vnni::cosine_int8_batch_distance;
        }
      }
    }
  }
  return nullptr;
}

QueryPreprocessFunc get_query_preprocess_func(MetricType metric_type,
                                              DataType data_type,
                                              QuantizeType quantize_type) {
  if (data_type == DataType::kInt8) {
    if (quantize_type == QuantizeType::kDefault) {
      if (zvec::ailego::internal::CpuFeatures::static_flags_.AVX512_VNNI) {
        if (metric_type == MetricType::kSquaredEuclidean) {
          return avx512_vnni::squared_euclidean_int8_query_preprocess;
        }
        if (metric_type == MetricType::kCosine) {
          return avx512_vnni::cosine_int8_query_preprocess;
        }
      }
    }
  }
  return nullptr;
}

}  // namespace zvec::turbo


================================================
FILE: tests/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_directories(ailego)
cc_directories(db)
cc_directories(core)


================================================
FILE: tests/ailego/CMakeLists.txt
================================================

include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
    NAME ${CC_TARGET} STRICT
    LIBS zvec_ailego
    Arrow::arrow_depends
    Arrow::parquet_static
    SRCS ${CC_SRCS}
  )
  cc_test_suite(zvec_ailego ${CC_TARGET})
endforeach()


================================================
FILE: tests/ailego/algorithm/integer_quantizer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <stdlib.h>
#include <random>
#include <ailego/algorithm/integer_quantizer.h>
#include <gtest/gtest.h>

namespace zvec::ailego {

TEST(IntegerQuantizer, INT8_Uniform_Distribution) {
  std::vector<size_t> tests = {1, 100, 1000, 10000, 100000};
  for (auto COUNT : tests) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::vector<float> data;

    std::uniform_real_distribution<float> dist(1.0, 2.0);
    float max = -std::numeric_limits<float>::max();
    float min = std::numeric_limits<float>::max();
    for (size_t i = 0; i < COUNT; ++i) {
      auto v = dist(gen);
      max = std::max(max, v);
      min = std::min(min, v);
      data.emplace_back(v);
    }
    // data.emplace_back(10);  // deviation point
    EntropyInt8Quantizer quantizer;
    quantizer.set_max(max);
    quantizer.set_min(min);
    quantizer.feed(data.data(), data.size());

    ASSERT_TRUE(quantizer.train());

    std::vector<int8_t> qdata(data.size(), 0);
    quantizer.encode(data.data(), qdata.size(), qdata.data());

    std::vector<float> recover_data(data.size(), 0.0f);
    quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

    float var = 0.0f;
    for (size_t i = 0; i < data.size(); ++i) {
      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    }
    EXPECT_LT(var / COUNT, 0.01);
  }
}

TEST(IntegerQuantizer, INT8_Normal_Distribution) {
  const size_t COUNT = 1000000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::normal_distribution<float> dist(3, 1.5);
  float max = -std::numeric_limits<float>::max();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    auto v = dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyInt8Quantizer quantizer;
  bool non_bias = dist(gen) > 5;
  quantizer.set_non_bias(non_bias);

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());
  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);

  std::vector<int8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), qdata.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 0.001);
}

TEST(IntegerQuantizer, INT8_Poisson_Distribution) {
  const size_t COUNT = 100000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::poisson_distribution<int> dist(10000);
  float max = -std::numeric_limits<float>::min();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    float v = (float)dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyInt8Quantizer quantizer;

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());

  std::vector<int8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), qdata.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
  }
  EXPECT_LT(var / COUNT, 100);
}

TEST(IntegerQuantizer, INT4_Uniform_Distribution) {
  std::vector<size_t> tests = {2, 1000, 10000, 100000};
  for (auto COUNT : tests) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::vector<float> data;

    std::uniform_real_distribution<float> dist(1.0, 2.0);
    float max = -std::numeric_limits<float>::min();
    float min = std::numeric_limits<float>::max();
    for (size_t i = 0; i < COUNT; ++i) {
      auto v = dist(gen);
      max = std::max(max, v);
      min = std::min(min, v);
      data.emplace_back(v);
    }
    // data.emplace_back(10);  // deviation point
    EntropyInt4Quantizer quantizer;
    quantizer.set_max(max);
    quantizer.set_min(min);
    quantizer.feed(data.data(), data.size());

    ASSERT_TRUE(quantizer.train());

    std::vector<uint8_t> qdata(data.size() / 2, 0);
    quantizer.encode(data.data(), data.size(), qdata.data());

    std::vector<float> recover_data(data.size(), 0.0f);
    quantizer.decode(qdata.data(), data.size(), recover_data.data());

    float var = 0.0f;
    for (size_t i = 0; i < data.size(); ++i) {
      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
      // printf("%f %f\n", data[i], recover_data[i]);
    }
#if 0
    printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
           *std::min_element(data.begin(), data.end()));
    printf("recover max=%f min=%f\n",
           *std::max_element(recover_data.begin(), recover_data.end()),
           *std::min_element(recover_data.begin(), recover_data.end()));
    printf("var=%f\n", var);
#endif
    EXPECT_LT(var / COUNT, 0.1);
  }
}

TEST(IntegerQuantizer, INT4_Normal_Distribution) {
  const size_t COUNT = 10000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::normal_distribution<float> avg(-1, 1);
  std::normal_distribution<float> dist(avg(gen), 5);
  float max = -std::numeric_limits<float>::max();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    auto v = dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyInt4Quantizer quantizer;
  bool non_bias = avg(gen) > 0;
  quantizer.set_non_bias(non_bias);

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());
  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), data.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), data.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 1.0f);
}

TEST(IntegerQuantizer, INT4_Poisson_Distribution) {
  const size_t COUNT = 100000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::poisson_distribution<int> dist(10000);
  float max = -std::numeric_limits<float>::min();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    float v = (float)dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyInt4Quantizer quantizer;

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), data.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), data.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 500);
}

TEST(IntegerQuantizer, UINT8_Uniform_Distribution) {
  std::vector<size_t> tests = {1, 100, 1000, 10000, 100000};
  for (auto COUNT : tests) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::vector<float> data;

    std::uniform_real_distribution<float> dist(1.0, 2.0);
    float max = -std::numeric_limits<float>::max();
    float min = std::numeric_limits<float>::max();
    for (size_t i = 0; i < COUNT; ++i) {
      auto v = dist(gen);
      max = std::max(max, v);
      min = std::min(min, v);
      data.emplace_back(v);
    }
    // data.emplace_back(10);  // deviation point
    EntropyUInt8Quantizer quantizer;
    quantizer.set_max(max);
    quantizer.set_min(min);
    quantizer.feed(data.data(), data.size());

    ASSERT_TRUE(quantizer.train());

    std::vector<uint8_t> qdata(data.size(), 0);
    quantizer.encode(data.data(), qdata.size(), qdata.data());

    std::vector<float> recover_data(data.size(), 0.0f);
    quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

    float var = 0.0f;
    for (size_t i = 0; i < data.size(); ++i) {
      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    }
    EXPECT_LT(var / COUNT, 0.01);
  }
}

TEST(IntegerQuantizer, UINT8_Normal_Distribution) {
  const size_t COUNT = 10000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::normal_distribution<float> dist(5.0f, 1.4f);
  float max = -std::numeric_limits<float>::max();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    auto v = dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyUInt8Quantizer quantizer;
  bool non_bias = dist(gen) > 5;
  quantizer.set_non_bias(non_bias);

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());
  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), qdata.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 0.01);
}

TEST(IntegerQuantizer, UINT8_Poisson_Distribution) {
  const size_t COUNT = 100000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::poisson_distribution<int> dist(10000);
  float max = -std::numeric_limits<float>::min();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    float v = (float)dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyUInt8Quantizer quantizer;

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), qdata.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), qdata.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
  EXPECT_LT(var / COUNT, 100);
}

TEST(IntegerQuantizer, UINT4_Uniform_Distribution) {
  std::vector<size_t> tests = {2, 100, 5000, 10000, 100000};
  for (auto COUNT : tests) {
    std::random_device rd;
    std::mt19937 gen(rd());
    std::vector<float> data;

    std::uniform_real_distribution<float> dist(1.0, 2.0);
    float max = -std::numeric_limits<float>::min();
    float min = std::numeric_limits<float>::max();
    for (size_t i = 0; i < COUNT; ++i) {
      auto v = dist(gen);
      max = std::max(max, v);
      min = std::min(min, v);
      data.emplace_back(v);
    }
    // data.emplace_back(10);  // deviation point
    EntropyUInt4Quantizer quantizer;
    quantizer.set_max(max);
    quantizer.set_min(min);
    quantizer.feed(data.data(), data.size());

    ASSERT_TRUE(quantizer.train());

    std::vector<uint8_t> qdata(data.size() / 2, 0);
    quantizer.encode(data.data(), data.size(), qdata.data());

    std::vector<float> recover_data(data.size(), 0.0f);
    quantizer.decode(qdata.data(), data.size(), recover_data.data());

    float var = 0.0f;
    for (size_t i = 0; i < data.size(); ++i) {
      var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
      // printf("%f %f\n", data[i], recover_data[i]);
    }
#if 0
    printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
           *std::min_element(data.begin(), data.end()));
    printf("recover max=%f min=%f\n",
           *std::max_element(recover_data.begin(), recover_data.end()),
           *std::min_element(recover_data.begin(), recover_data.end()));
    printf("var=%f\n", var);
#endif
    EXPECT_LT(var / COUNT, 0.1);
  }
}

TEST(IntegerQuantizer, UINT4_Normal_Distribution) {
  const size_t COUNT = 100000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::normal_distribution<float> avg(5, 1.0);
  std::normal_distribution<float> dist(avg(gen), 2);
  float max = -std::numeric_limits<float>::max();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    auto v = dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyUInt4Quantizer quantizer;
  bool non_bias = avg(gen) > 5;
  quantizer.set_non_bias(non_bias);

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());
  ASSERT_EQ(quantizer.bias() == 0.0f, non_bias);

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), data.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), data.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 2.0f);
}

TEST(IntegerQuantizer, UINT4_Poisson_Distribution) {
  const size_t COUNT = 100000u;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::vector<float> data;

  std::poisson_distribution<int> dist(10000);
  float max = -std::numeric_limits<float>::min();
  float min = std::numeric_limits<float>::max();
  for (size_t i = 0; i < COUNT; ++i) {
    float v = (float)dist(gen);
    max = std::max(max, v);
    min = std::min(min, v);
    data.emplace_back(v);
  }
  // data.emplace_back(10);  // deviation point
  EntropyUInt4Quantizer quantizer;

  quantizer.set_max(max);
  quantizer.set_min(min);
  quantizer.feed(data.data(), data.size());

  ASSERT_TRUE(quantizer.train());

  std::vector<uint8_t> qdata(data.size(), 0);
  quantizer.encode(data.data(), data.size(), qdata.data());

  std::vector<float> recover_data(data.size(), 0.0f);
  quantizer.decode(qdata.data(), data.size(), recover_data.data());

  float var = 0.0f;
  for (size_t i = 0; i < data.size(); ++i) {
    var += (data[i] - recover_data[i]) * (data[i] - recover_data[i]);
    // printf("%f %f\n", data[i], recover_data[i]);
  }
#if 0
  printf("max=%f min=%f\n", *std::max_element(data.begin(), data.end()),
         *std::min_element(data.begin(), data.end()));
  printf("recover max=%f min=%f\n",
         *std::max_element(recover_data.begin(), recover_data.end()),
         *std::min_element(recover_data.begin(), recover_data.end()));
  printf("var=%f\n", var);
#endif
  EXPECT_LT(var / COUNT, 350);
}
}  // namespace zvec::ailego

================================================
FILE: tests/ailego/algorithm/kmeans_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/parallel/thread_pool.h>

#define protected public
#define private public
#include <ailego/algorithm/kmeans.h>

using namespace zvec;

TEST(NumericalKmeans, FP32_General) {
  const size_t DIMENSION = 20;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalKmeans<float, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<float, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, FP16_General) {
  const size_t DIMENSION = 20;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalKmeans<ailego::Float16, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<ailego::Float16, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, INT8_General) {
  const size_t DIMENSION = 20 * 4;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalKmeans<int8_t, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<int> dist(-127, 127);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<int8_t, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, BINARY32_General) {
  const size_t DIMENSION = 16 * 32;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::BinaryKmeans<uint32_t, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::BinaryVector<uint32_t> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      if (dist(gen) >= 0.5) {
        vec.set(j);
      }
    }
    kmeans.append(vec.data(), vec.dimension());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

#if defined(AILEGO_M64)
TEST(NumericalKmeans, BINARY64_General) {
  const size_t DIMENSION = 8 * 64;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::BinaryKmeans<uint64_t, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::BinaryVector<uint64_t> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      if (dist(gen) >= 0.5) {
        vec.set(j);
      }
    }
    kmeans.append(vec.data(), vec.dimension());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}
#endif  // AILEGO_M64

TEST(NibbleKmeans, INT4_General) {
  const size_t DIMENSION = 32;
  const size_t K_VALUE = 63;
  const size_t COUNT = 40000u;

  ailego::NumericalKmeans<int8_t, ailego::ThreadPool> kmeans1;
  ailego::NibbleKmeans<int32_t, ailego::ThreadPool> kmeans2;
  kmeans1.reset(K_VALUE, DIMENSION);
  kmeans2.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<int> dist(-8, 7);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::NumericalVector<int8_t> vec1(DIMENSION);
    ailego::NibbleVector<int32_t> vec2(DIMENSION);

    for (size_t j = 0; j < DIMENSION; ++j) {
      int8_t val = (int8_t)dist(gen);
      vec1[j] = val;
      vec2.set(j, val);
    }
    kmeans1.append(vec1.data(), vec1.size());
    kmeans2.append(vec2.data(), vec2.size());
  }

  ailego::ThreadPool pool;
  {
    const ailego::NumericalKmeans<int8_t, ailego::ThreadPool> &kmeans1_ref =
        kmeans1;
    ailego::Kmc2CentroidsGenerator<decltype(kmeans1_ref), ailego::ThreadPool> g;

    kmeans1.init_centroids(pool);

    g.set_chain_length(20);
    kmeans1.init_centroids(pool, g);

    g.set_assumption_free(true);
    kmeans1.init_centroids(pool, g);

    // Shared centroids
    auto centroids = kmeans1.centroids();
    for (size_t i = 0; i < centroids.count(); ++i) {
      ailego::NibbleVector<int8_t> nvec;
      nvec.assign(centroids[i], centroids.dimension());
      kmeans2.mutable_centroids()->append(
          reinterpret_cast<const uint32_t *>(nvec.data()), nvec.dimension());
    }
  }

  double prev_sse1 = 0.0;
  double prev_sse2 = 0.0;
  for (size_t i = 0; i < 18; ++i) {
    double sse1 = 0.0;
    double sse2 = 0.0;
    EXPECT_TRUE(kmeans1.cluster_once(pool, &sse1));
    EXPECT_TRUE(kmeans2.cluster_once(pool, &sse2));
    printf("1: (%zu) SSE: %f -> %f = %f\n", i, prev_sse1, sse1,
           sse1 - prev_sse1);
    printf("2: (%zu) SSE: %f -> %f = %f\n", i, prev_sse2, sse2,
           sse2 - prev_sse2);
    prev_sse1 = sse1;
    prev_sse2 = sse2;
  }

  auto &cluster1 = kmeans1.context().clusters();
  auto &cluster2 = kmeans2.context().clusters();
  for (size_t i = 0; i < cluster1.size(); ++i) {
    // printf("(%zu) INT8 %f: %zu\n", i, cluster1[i].cost(),
    //        cluster1[i].count());
    // printf("(%zu) INT4 %f: %zu\n", i, cluster2[i].cost(),
    //        cluster2[i].count());

    for (size_t j = 0; j < cluster1[i].accum_.size(); ++j) {
      EXPECT_DOUBLE_EQ(cluster1[i].accum_[j], cluster2[i].accum_[j]);
    }
  }
}

TEST(NumericalKmeans, FP32_General_InnerProduct) {
  const size_t DIMENSION = 20;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalInnerProductKmeans<float, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<float, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, FP16_General_InnerProduct) {
  const size_t DIMENSION = 20;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalInnerProductKmeans<ailego::Float16, ailego::ThreadPool>
      kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<ailego::Float16, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, INT8_General_InnerProduct) {
  const size_t DIMENSION = 20 * 4;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalInnerProductKmeans<int8_t, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<int> dist(-127, 127);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<int8_t, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}

TEST(NumericalKmeans, FP32_General_InnerProduct_Spherical) {
  const size_t DIMENSION = 20;
  const size_t K_VALUE = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalInnerProductKmeans<float, ailego::ThreadPool> kmeans;
  kmeans.reset(K_VALUE, DIMENSION, true);

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<float, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    kmeans.append(vec.data(), vec.size());
  }

  ailego::ThreadPool pool;
  double prev_sse = 0.0;
  for (size_t i = 0; i < 20; ++i) {
    double sse = 0.0;
    EXPECT_TRUE(kmeans.cluster_once(pool, &sse));
    printf("(%zu) SSE: %f -> %f = %f\n", i, prev_sse, sse, sse - prev_sse);
    prev_sse = sse;
  }

  for (auto &it : kmeans.context().clusters()) {
    printf("%f: %zu\n", it.cost(), it.count());
  }
}


================================================
FILE: tests/ailego/buffer/buffer_manager_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdint>
#include <thread>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/ailego/logger/logger.h>

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec::ailego;


const std::string working_dir{"./buffer_manager_dir/"};
const std::string file_path_forward{working_dir + "test.forward_index"};
const std::string file_path_vector{working_dir + "test.vector_index"};


class BufferManagerTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    if (!File::MakePath(working_dir)) {
      LOG_ERROR("Failed to create working directory.");
      return;
    }

    File file_vector_index;
    size_t file_vector_size = 16 * 1024 * 1024;
    if (!file_vector_index.create(file_path_vector, file_vector_size)) {
      LOG_ERROR("Failed to create vector index file.");
      return;
    }
    // Populate vector file with number series
    for (uint32_t i = 0; i < file_vector_size / sizeof(uint32_t); ++i) {
      file_vector_index.write((void *)&i, sizeof(i));
    }
    file_vector_index.close();

    BufferManager::Instance().init(4 * 1024 * 1024, 1);
  }

  static void TearDownTestCase() {}
  /*****  Global initialization and cleanup - End  *****/
  ;
};


TEST_F(BufferManagerTest, READ_VECTOR_FILE) {
  uint32_t size_4KB = 4 * 1024;

  auto read_and_verify_numbers = [&](uint32_t offset) {
    BufferID id = BufferID::VectorID(file_path_vector, offset, size_4KB);
    auto handle = BufferManager::Instance().acquire(id);
    uint32_t *vector_data = (uint32_t *)handle.pin_vector_data();
    uint32_t num_start = offset / sizeof(uint32_t);
    for (uint32_t i = 0; i < size_4KB / sizeof(uint32_t); i++) {
      ASSERT_EQ(*(vector_data + i), num_start + i);
    }
    handle.unpin_vector_data();
  };

  std::vector<std::thread> threads;

  // Read the same part concurrently
  for (int i = 0; i < 10; ++i) {
    threads.emplace_back(read_and_verify_numbers, 3 * size_4KB);
  }
  for (auto &thread : threads) {
    thread.join();
  }

  {  // Verify the reference count
    BufferID id = BufferID::VectorID(file_path_vector, 3 * size_4KB, size_4KB);
    auto handle = BufferManager::Instance().acquire(id);
    handle.pin_vector_data();
    ASSERT_EQ(handle.references(), 1);
    handle.unpin_vector_data();
    ASSERT_EQ(handle.references(), 0);
  }

  threads.clear();
  // Read different parts concurrently
  for (int i = 0; i < 30; ++i) {
    threads.emplace_back(read_and_verify_numbers, i * size_4KB);
  }
  for (auto &thread : threads) {
    thread.join();
  }
  ASSERT_EQ(BufferManager::Instance().total_size_in_bytes(), 30 * 4 * 1024);

  {  // Read a large chunk so that the buffer is full
    BufferID id =
        BufferID::VectorID(file_path_vector, 4 * 1024 * 1024, 4 * 1024 * 1024);
    auto handle = BufferManager::Instance().acquire(id);
    handle.pin_vector_data();
    handle.unpin_vector_data();
  }

  {  // Trigger eviction
    BufferID id =
        BufferID::VectorID(file_path_vector, 8 * 1024 * 1024, 4 * 1024 * 1024);
    auto handle = BufferManager::Instance().acquire(id);
    handle.pin_vector_data();
    ASSERT_EQ(BufferManager::Instance().total_size_in_bytes(), 4 * 1024 * 1024);
    handle.unpin_vector_data();
    ASSERT_EQ(handle.references(), 0);
  }
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/ailego/container/bitmap_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
#include <bitset>
#include <iostream>
#include <memory>
#include <set>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/utility/bitset_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/time_helper.h>

#if defined(__AVX2__)
#define INTRINSICS_SET "AVX2"
#elif defined(__AVX__)
#define INTRINSICS_SET "AVX"
#elif defined(__SSE4_2__)
#define INTRINSICS_SET "SSE4.2"
#elif defined(__SSE4_1__)
#define INTRINSICS_SET "SSE4.1"
#elif defined(__SSE2__)
#define INTRINSICS_SET "SSE2"
#else
#define INTRINSICS_SET "NONE"
#endif

using namespace zvec::ailego;

TEST(FixedBitset, General) {
  FixedBitset<0> bitset0;
  FixedBitset<32> bitset32;
  FixedBitset<64> bitset64;

  EXPECT_EQ(0u, bitset0.size());
  EXPECT_EQ(32u, bitset32.size());
  EXPECT_EQ(64u, bitset64.size());

  EXPECT_TRUE(bitset32.test_none());
  EXPECT_TRUE(bitset64.test_none());

  bitset32.set(30);
  bitset64.set(60);

  FixedBitset<32> bitset32_2(bitset32);
  FixedBitset<64> bitset64_2(bitset64);

  bitset32.set(28);
  bitset64.set(55);

  EXPECT_TRUE(bitset32_2.test_any());
  EXPECT_TRUE(bitset64_2.test_any());

  EXPECT_FALSE(bitset32_2.test_all());
  EXPECT_FALSE(bitset64_2.test_all());

  EXPECT_EQ(1u, bitset32_2.cardinality());
  EXPECT_EQ(1u, bitset64_2.cardinality());

  bitset32_2 = bitset32;
  bitset64_2 = bitset64;

  EXPECT_EQ(2u, bitset32_2.cardinality());
  EXPECT_EQ(2u, bitset64_2.cardinality());

  bitset32.reset(28);
  bitset64.reset(55);

  bitset32_2 = bitset32;
  bitset64_2 = bitset64;

  EXPECT_EQ(1u, bitset32_2.cardinality());
  EXPECT_EQ(1u, bitset64_2.cardinality());

  bitset32.flip(30);
  bitset64.flip(60);

  EXPECT_EQ(0u, bitset32.cardinality());
  EXPECT_EQ(0u, bitset64.cardinality());
}

TEST(FixedBitset, And) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<3552> bitset1;
  FixedBitset<3552> bitset2;
  FixedBitset<3552> bitset3;
  std::bitset<3552> stl_bitset1;
  std::bitset<3552> stl_bitset2;
  std::bitset<3552> stl_bitset3;

  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }

  bitset3 = bitset1;
  bitset3.bitwise_and(bitset2);
  stl_bitset3 = stl_bitset1 & stl_bitset2;

  for (uint32_t i = 0; i < bitset3.size(); ++i) {
    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));
  }
  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());

  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)
      ->bitwise_and(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));
}

TEST(FixedBitset, Andnot) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  FixedBitset<2528> bitset3;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;
  std::bitset<2528> stl_bitset3;

  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }

  bitset3 = bitset1;
  bitset3.bitwise_andnot(bitset2);
  stl_bitset3 = stl_bitset1 & (~stl_bitset2);

  for (uint32_t i = 0; i < bitset3.size(); ++i) {
    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));
  }
  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());

  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)
      ->bitwise_andnot(
          *(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));
}

TEST(FixedBitset, Or) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  FixedBitset<2528> bitset3;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;
  std::bitset<2528> stl_bitset3;

  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }

  bitset3 = bitset1;
  bitset3.bitwise_or(bitset2);
  stl_bitset3 = stl_bitset1 | stl_bitset2;

  for (uint32_t i = 0; i < bitset3.size(); ++i) {
    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));
  }
  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());

  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)
      ->bitwise_or(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));
}

TEST(FixedBitset, Xor) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  FixedBitset<2528> bitset3;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;
  std::bitset<2528> stl_bitset3;

  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 623; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }

  bitset3 = bitset1;
  bitset3.bitwise_xor(bitset2);
  stl_bitset3 = stl_bitset1 ^ stl_bitset2;

  for (uint32_t i = 0; i < bitset3.size(); ++i) {
    EXPECT_EQ(bitset3.test(i), stl_bitset3.test(i));
  }
  EXPECT_EQ(stl_bitset3.count(), bitset3.cardinality());

  FixedBitset<512>::Cast((uint32_t *)bitset3.data() + 1)
      ->bitwise_xor(*(FixedBitset<512>::Cast((uint32_t *)bitset2.data() + 3)));
}

TEST(FixedBitset, Not) {
  FixedBitset<1504> bitset1;
  EXPECT_FALSE(bitset1.test_all());
  EXPECT_FALSE(bitset1.test_any());
  EXPECT_TRUE(bitset1.test_none());
  EXPECT_EQ(0u, bitset1.cardinality());

  for (uint32_t i = 0; i < bitset1.size(); ++i) {
    bitset1.set(i);
  }
  EXPECT_EQ(bitset1.size(), bitset1.cardinality());
  EXPECT_TRUE(bitset1.test_all());
  EXPECT_TRUE(bitset1.test_any());
  EXPECT_FALSE(bitset1.test_none());

  bitset1.bitwise_not();
  EXPECT_FALSE(bitset1.test_all());
  EXPECT_FALSE(bitset1.test_any());
  EXPECT_TRUE(bitset1.test_none());

  FixedBitset<512> bitset2;
  EXPECT_FALSE(bitset2.test_all());
  EXPECT_FALSE(bitset2.test_any());
  EXPECT_TRUE(bitset2.test_none());

  for (uint32_t i = 0; i < bitset2.size(); ++i) {
    bitset2.set(i);
  }
  EXPECT_TRUE(bitset2.test_all());
  EXPECT_TRUE(bitset2.test_any());
  EXPECT_FALSE(bitset2.test_none());

  bitset2.bitwise_not();
  EXPECT_FALSE(bitset2.test_all());
  EXPECT_FALSE(bitset2.test_any());
  EXPECT_TRUE(bitset2.test_none());

  FixedBitset<512 - 32>::Cast((uint32_t *)bitset2.data() + 1)->bitwise_not();
}

TEST(FixedBitset, TestAll) {
  FixedBitset<1504> bitset;
  EXPECT_FALSE(bitset.test_all());

  for (uint32_t i = 0; i < bitset.size(); ++i) {
    bitset.set(i);
  }
  EXPECT_TRUE(bitset.test_all());

  bitset.reset(999u);
  EXPECT_FALSE(bitset.test_all());
  EXPECT_FALSE(
      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_all());
}

TEST(FixedBitset, TestAny) {
  FixedBitset<1504> bitset;
  EXPECT_FALSE(bitset.test_any());

  for (uint32_t i = 666; i < 888; ++i) {
    bitset.set(i);
  }
  EXPECT_TRUE(bitset.test_any());

  for (uint32_t i = 666; i < 777; ++i) {
    bitset.reset(i);
  }
  EXPECT_TRUE(bitset.test_any());
  EXPECT_TRUE(
      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_any());
}

TEST(FixedBitset, TestNone) {
  FixedBitset<1504> bitset;
  EXPECT_TRUE(bitset.test_none());

  for (uint32_t i = 1000; i < 1111; ++i) {
    bitset.set(i);
  }
  EXPECT_FALSE(bitset.test_none());

  for (uint32_t i = 1000; i < 1110; ++i) {
    bitset.flip(i);
  }
  EXPECT_FALSE(bitset.test_none());
  EXPECT_FALSE(
      FixedBitset<1504 - 32>::Cast((uint32_t *)bitset.data() + 1)->test_none());
}

TEST(FixedBitset, Extract) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  std::vector<size_t> vector1;

  for (uint32_t i = 0; i < 1111; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());

    bitset1.set(val1);
    vector1.push_back(val1);
  }

  std::sort(vector1.begin(), vector1.end());
  vector1.erase(std::unique(vector1.begin(), vector1.end()), vector1.end());

  std::vector<size_t> vector2;
  bitset1.extract(&vector2);

  EXPECT_EQ(vector1.size(), vector2.size());
  EXPECT_TRUE(std::equal(vector1.begin(), vector1.end(), vector2.begin()));
}

TEST(FixedBitset, BitwiseXorCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;

  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ(0u, FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset1));
  EXPECT_EQ(0u, FixedBitset<2528>::BitwiseXorCardinality(bitset2, bitset2));
  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),
            FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset2));

  EXPECT_EQ(FixedBitset<2528>::BitwiseAndnotCardinality(bitset1, bitset2) +
                FixedBitset<2528>::BitwiseAndnotCardinality(bitset2, bitset1),
            FixedBitset<2528>::BitwiseXorCardinality(bitset1, bitset2));
}

TEST(FixedBitset, BitwiseOrCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;

  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),
            FixedBitset<2528>::BitwiseOrCardinality(bitset1, bitset2));
}

TEST(FixedBitset, BitwiseAndCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;

  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),
            FixedBitset<2528>::BitwiseAndCardinality(bitset1, bitset2));
}

TEST(FixedBitset, BitwiseAndnotCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<2528> bitset1;
  FixedBitset<2528> bitset2;
  std::bitset<2528> stl_bitset1;
  std::bitset<2528> stl_bitset2;

  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),
            FixedBitset<2528>::BitwiseAndnotCardinality(bitset1, bitset2));

  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),
            FixedBitset<2528>::BitwiseAndnotCardinality(bitset2, bitset1));
}

TEST(FixedBitset, Benchmark) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  const uint32_t dimension = 2048u;
  const uint32_t test_count = 100000u;

  std::vector<FixedBitset<dimension>> bucket1_vec;
  std::vector<FixedBitset<dimension>> bucket2_vec;

  std::unique_ptr<FixedBitset<dimension>> bucket1(new FixedBitset<dimension>);
  std::unique_ptr<FixedBitset<dimension>> bucket2(new FixedBitset<dimension>);

  for (uint32_t i = 0; i < 2000; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bucket1->size());
    uint32_t val2 = (uint32_t)(rand() % bucket2->size());

    bucket1->set(val1);
    bucket2->set(val2);
  }
  for (uint32_t i = 0; i < 1000; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bucket1->size());
    uint32_t val2 = (uint32_t)(rand() % bucket2->size());

    bucket1->flip(val1);
    bucket2->flip(val2);
  }
  for (uint32_t i = 0; i < 500; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bucket1->size());
    uint32_t val2 = (uint32_t)(rand() % bucket2->size());

    bucket1->reset(val1);
    bucket2->reset(val2);
  }

  bucket1_vec.reserve(test_count);
  bucket2_vec.reserve(test_count);
  for (uint32_t j = 0; j < test_count; ++j) {
    bucket1_vec.push_back(*bucket1);
    bucket2_vec.push_back(*bucket2);
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < test_count; ++i) {
      sum += FixedBitset<dimension>::BitwiseAndCardinality(bucket1_vec[i],
                                                           bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseAndCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < test_count; ++i) {
      sum += FixedBitset<dimension>::BitwiseAndnotCardinality(bucket1_vec[i],
                                                              bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseAndnotCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < test_count; ++i) {
      sum += FixedBitset<dimension>::BitwiseXorCardinality(bucket1_vec[i],
                                                           bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseXorCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < test_count; ++i) {
      sum += FixedBitset<dimension>::BitwiseOrCardinality(bucket1_vec[i],
                                                          bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseOrCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);
    *bucket3 = bucket1_vec[0];

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < test_count; ++i) {
      bucket3->bitwise_and(bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET << " And: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);
    *bucket3 = bucket1_vec[0];

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < test_count; ++i) {
      bucket3->bitwise_andnot(bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET << " Andnot: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);
    *bucket3 = bucket1_vec[0];

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < test_count; ++i) {
      bucket3->bitwise_or(bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET << " Or: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    std::unique_ptr<FixedBitset<dimension>> bucket3(new FixedBitset<dimension>);
    *bucket3 = bucket1_vec[0];

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < test_count; ++i) {
      bucket3->bitwise_xor(bucket2_vec[i]);
    }
    std::cout << INTRINSICS_SET << " Xor: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }
}

TEST(Bitset, General) {
  Bitset bitset32(31);
  Bitset bitset64(61);

  EXPECT_EQ(32u, bitset32.size());
  EXPECT_EQ(64u, bitset64.size());

  EXPECT_TRUE(bitset32.test_none());
  EXPECT_TRUE(bitset64.test_none());

  bitset32.set(30);
  bitset64.set(60);

  Bitset bitset32_2(bitset32);
  Bitset bitset64_2(bitset64);

  bitset32.set(28);
  bitset64.set(55);

  EXPECT_TRUE(bitset32_2.test_any());
  EXPECT_TRUE(bitset64_2.test_any());

  EXPECT_FALSE(bitset32_2.test_all());
  EXPECT_FALSE(bitset64_2.test_all());

  EXPECT_EQ(1u, bitset32_2.cardinality());
  EXPECT_EQ(1u, bitset64_2.cardinality());

  bitset32_2 = bitset32;
  bitset64_2 = bitset64;

  EXPECT_EQ(2u, bitset32_2.cardinality());
  EXPECT_EQ(2u, bitset64_2.cardinality());

  bitset32.reset(28);
  bitset64.reset(55);

  bitset32_2 = bitset32;
  bitset64_2 = bitset64;

  EXPECT_EQ(1u, bitset32_2.cardinality());
  EXPECT_EQ(1u, bitset64_2.cardinality());

  bitset32.flip(30);
  bitset64.flip(60);

  EXPECT_EQ(0u, bitset32.cardinality());
  EXPECT_EQ(0u, bitset64.cardinality());
}

TEST(Bitset, BitwiseXorCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitset bitset1;
  Bitset bitset2;
  bitset1.resize(500000);
  bitset2.resize(630000);
  std::bitset<638888> stl_bitset1;
  std::bitset<638888> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    ;
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),
            Bitset::BitwiseXorCardinality(bitset1, bitset2));

  EXPECT_EQ(Bitset::BitwiseAndnotCardinality(bitset1, bitset2) +
                Bitset::BitwiseAndnotCardinality(bitset2, bitset1),
            Bitset::BitwiseXorCardinality(bitset1, bitset2));
  EXPECT_EQ(Bitset::BitwiseXorCardinality(bitset1, bitset2),
            Bitset::BitwiseXorCardinality(bitset2, bitset1));
}

TEST(Bitset, BitwiseOrCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitset bitset1;
  Bitset bitset2;
  bitset1.resize(599999);
  bitset2.resize(500000);

  std::bitset<638888> stl_bitset1;
  std::bitset<638888> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),
            Bitset::BitwiseOrCardinality(bitset1, bitset2));
  EXPECT_EQ(Bitset::BitwiseOrCardinality(bitset1, bitset2),
            Bitset::BitwiseOrCardinality(bitset2, bitset1));
}

TEST(Bitset, BitwiseAndCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitset bitset1;
  Bitset bitset2;
  bitset1.resize(500001);
  bitset2.resize(599999);

  std::bitset<638888> stl_bitset1;
  std::bitset<638888> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),
            Bitset::BitwiseAndCardinality(bitset1, bitset2));
  EXPECT_EQ(Bitset::BitwiseAndCardinality(bitset1, bitset2),
            Bitset::BitwiseAndCardinality(bitset2, bitset1));
}

TEST(Bitset, BitwiseAndnotCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitset bitset1;
  Bitset bitset2;
  bitset1.resize(599997);
  bitset2.resize(500002);

  std::bitset<638888> stl_bitset1;
  std::bitset<638888> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),
            Bitset::BitwiseAndnotCardinality(bitset1, bitset2));

  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),
            Bitset::BitwiseAndnotCardinality(bitset2, bitset1));
}

TEST(Bitmap, General) {
  const uint32_t data1[] = {0,     1,      2,      4,      7,    9,
                            31,    65,     77,     100,    1000, 1999,
                            19999, 100000, 188888, 2999999};
  const uint32_t data2[] = {8,     12,    13,    24,     7777,      9999,
                            66666, 88888, 99999, 100002, 0x7fffffff};
  Bitmap bitmap1;

  EXPECT_EQ(0u, bitmap1.cardinality());
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    bitmap1.set(data1[i]);
  }

  // Test `Set`
  Bitmap bitmap2(bitmap1);

  EXPECT_NE(0u, bitmap2.cardinality());
  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]), bitmap2.cardinality());
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    EXPECT_TRUE(bitmap2.test(data1[i]));
  }

  // Test `Reset`
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    bitmap1.reset(data2[i]);
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    EXPECT_FALSE(bitmap1.test(data2[i]));
  }

  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]), bitmap1.cardinality());
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    bitmap1.reset(data1[i]);
  }
  EXPECT_EQ(0u, bitmap1.cardinality());

  // Test `Flip`
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    bitmap1.flip(data1[i]);
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    bitmap1.flip(data2[i]);
  }
  EXPECT_EQ(sizeof(data1) / sizeof(data1[0]) + sizeof(data2) / sizeof(data2[0]),
            bitmap1.cardinality());

  bitmap2 = bitmap1;
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    EXPECT_TRUE(bitmap2.test(data1[i]));
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    EXPECT_TRUE(bitmap2.test(data2[i]));
  }

  // Test `ShrinkToFit`
  bitmap1.shrink_to_fit();
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    EXPECT_TRUE(bitmap1.test(data1[i]));
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    EXPECT_TRUE(bitmap1.test(data2[i]));
  }

  // Test `Clear`
  EXPECT_NE(0u, bitmap1.cardinality());
  bitmap2 = bitmap1;
  bitmap1.clear();
  EXPECT_EQ(0u, bitmap1.cardinality());
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    EXPECT_FALSE(bitmap1.test(data1[i]));
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    EXPECT_FALSE(bitmap1.test(data2[i]));
  }
  for (size_t i = 0; i < sizeof(data1) / sizeof(data1[0]); ++i) {
    EXPECT_TRUE(bitmap2.test(data1[i]));
  }
  for (size_t i = 0; i < sizeof(data2) / sizeof(data2[0]); ++i) {
    EXPECT_TRUE(bitmap2.test(data2[i]));
  }
}

TEST(Bitmap, ShrinkToFit) {
  Bitmap bitmap1;
  bitmap1.shrink_to_fit();

  EXPECT_EQ(0u, bitmap1.bucket_size());
  bitmap1.set(2);
  EXPECT_EQ(1u, bitmap1.bucket_size());
  bitmap1.reset(2);
  EXPECT_EQ(1u, bitmap1.bucket_size());
  bitmap1.shrink_to_fit();
  EXPECT_EQ(0u, bitmap1.bucket_size());

  bitmap1.set(100);
  bitmap1.set(100000);
  bitmap1.set(1000000);
  EXPECT_EQ((1000000u + 0xffff) / 0x10000, bitmap1.bucket_size());

  bitmap1.reset(100);
  bitmap1.reset(1000000);
  EXPECT_EQ((1000000u + 0xffff) / 0x10000, bitmap1.bucket_size());
  bitmap1.shrink_to_fit();
  EXPECT_EQ((100000u + 0xffff) / 0x10000, bitmap1.bucket_size());
}

TEST(Bitmap, And) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1, bitmap2;
  std::set<size_t> set1, set2, set3;
  std::vector<size_t> vec1, vec3;

  for (uint32_t i = 0; i < 25000; ++i) {
    uint32_t val1 = rand() % 1000000;
    bitmap1.set(val1);
    set1.insert(val1);
  }

  for (uint32_t i = 0; i < 45000; ++i) {
    uint32_t val2 = rand() % 1000000;
    bitmap2.set(val2);
    set2.insert(val2);
  }
  std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),
                        std::inserter(set3, set3.begin()));
  bitmap1.bitwise_and(bitmap2);
  bitmap1.extract(&vec1);
  ASSERT_EQ(bitmap1.cardinality(), vec1.size());
  ASSERT_EQ(set3.size(), vec1.size());

  vec3.reserve(set3.size());
  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));
  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
}

TEST(Bitmap, Andnot) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1, bitmap2;
  std::set<size_t> set1, set2, set3;
  std::vector<size_t> vec1, vec3;

  for (uint32_t i = 0; i < 20000; ++i) {
    uint32_t val1 = rand() % 1000000;
    bitmap1.set(val1);
    set1.insert(val1);
  }

  for (uint32_t i = 0; i < 20000; ++i) {
    uint32_t val2 = rand() % 1000000;
    bitmap2.set(val2);
    set2.insert(val2);
  }
  std::set_difference(set1.begin(), set1.end(), set2.begin(), set2.end(),
                      std::inserter(set3, set3.begin()));
  bitmap1.bitwise_andnot(bitmap2);
  bitmap1.extract(&vec1);
  ASSERT_EQ(bitmap1.cardinality(), vec1.size());
  ASSERT_EQ(set3.size(), vec1.size());

  vec3.reserve(set3.size());
  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));
  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
}

TEST(Bitmap, Or) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1, bitmap2;
  std::set<size_t> set1, set2, set3;
  std::vector<size_t> vec1, vec3;

  for (uint32_t i = 0; i < 3000; ++i) {
    uint32_t val1 = rand() % 2000000;
    bitmap1.set(val1);
    set1.insert(val1);
  }

  for (uint32_t i = 0; i < 2000; ++i) {
    uint32_t val2 = rand() % 2000000;
    bitmap2.set(val2);
    set2.insert(val2);
  }
  std::set_union(set1.begin(), set1.end(), set2.begin(), set2.end(),
                 std::inserter(set3, set3.begin()));
  bitmap1.bitwise_or(bitmap2);
  bitmap1.extract(&vec1);
  ASSERT_EQ(bitmap1.cardinality(), vec1.size());
  ASSERT_EQ(set3.size(), vec1.size());

  vec3.reserve(set3.size());
  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));
  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
}

TEST(Bitmap, Xor) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1, bitmap2;
  std::set<size_t> set1, set2, set3;
  std::vector<size_t> vec1, vec3;

  for (uint32_t i = 0; i < 3000; ++i) {
    uint32_t val1 = rand() % 2000000;
    bitmap1.set(val1);
    set1.insert(val1);
  }

  for (uint32_t i = 0; i < 2000; ++i) {
    uint32_t val2 = rand() % 2000000;
    bitmap2.set(val2);
    set2.insert(val2);
  }
  std::set_symmetric_difference(set1.begin(), set1.end(), set2.begin(),
                                set2.end(), std::inserter(set3, set3.begin()));
  bitmap1.bitwise_xor(bitmap2);
  bitmap1.extract(&vec1);
  ASSERT_EQ(bitmap1.cardinality(), vec1.size());
  ASSERT_EQ(set3.size(), vec1.size());

  vec3.reserve(set3.size());
  std::copy(set3.begin(), set3.end(), std::back_inserter(vec3));
  EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
}

TEST(Bitmap, Not) {
  Bitmap bitmap1, bitmap2, bitmap3;
  std::set<size_t> set1, set2, set3;
  std::vector<size_t> vec1;

  for (uint32_t i = 0; i < 20000; ++i) {
    uint32_t val1 = rand() % 1000000;
    bitmap1.set(val1);
    set1.insert(val1);
  }

  for (uint32_t i = 0; i < 20000; ++i) {
    uint32_t val2 = rand() % 1000000;
    bitmap2.set(val2);
    set2.insert(val2);
  }

  bitmap2.bitwise_not();
  bitmap2.bitwise_not();

  {
    set3.clear();
    vec1.clear();
    std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),
                          std::inserter(set3, set3.begin()));

    bitmap3 = bitmap1;
    bitmap3.bitwise_and(bitmap2);
    bitmap3.extract(&vec1);
    ASSERT_EQ(bitmap3.cardinality(), vec1.size());
    ASSERT_EQ(set3.size(), vec1.size());

    std::vector<size_t> vec3(set3.begin(), set3.end());
    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
  }

  {
    set3.clear();
    vec1.clear();
    std::set_difference(set1.begin(), set1.end(), set2.begin(), set2.end(),
                        std::inserter(set3, set3.begin()));

    bitmap3 = bitmap1;
    bitmap3.bitwise_andnot(bitmap2);
    bitmap3.extract(&vec1);
    ASSERT_EQ(bitmap3.cardinality(), vec1.size());
    ASSERT_EQ(set3.size(), vec1.size());

    std::vector<size_t> vec3(set3.begin(), set3.end());
    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
  }

  {
    set3.clear();
    vec1.clear();
    std::set_union(set1.begin(), set1.end(), set2.begin(), set2.end(),
                   std::inserter(set3, set3.begin()));

    bitmap3 = bitmap1;
    bitmap3.bitwise_or(bitmap2);
    bitmap3.extract(&vec1);
    ASSERT_EQ(bitmap3.cardinality(), vec1.size());
    ASSERT_EQ(set3.size(), vec1.size());

    std::vector<size_t> vec3(set3.begin(), set3.end());
    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
  }

  {
    set3.clear();
    vec1.clear();
    std::set_symmetric_difference(set1.begin(), set1.end(), set2.begin(),
                                  set2.end(),
                                  std::inserter(set3, set3.begin()));

    bitmap3 = bitmap1;
    bitmap3.bitwise_xor(bitmap2);
    bitmap3.extract(&vec1);
    ASSERT_EQ(bitmap3.cardinality(), vec1.size());
    ASSERT_EQ(set3.size(), vec1.size());

    std::vector<size_t> vec3(set3.begin(), set3.end());
    EXPECT_TRUE(std::equal(vec1.begin(), vec1.end(), vec3.begin()));
  }
}

TEST(Bitmap, TestAll) {
  Bitmap bitmap;
  EXPECT_FALSE(bitmap.test_all());

  for (uint32_t i = 0; i < Bitmap::Bucket::MAX_SIZE * 2; ++i) {
    bitmap.set(i);
  }
  EXPECT_TRUE(bitmap.test_all());

  bitmap.reset(Bitmap::Bucket::MAX_SIZE + 2);
  EXPECT_FALSE(bitmap.test_all());
}

TEST(Bitmap, TestAny) {
  Bitmap bitmap;
  EXPECT_FALSE(bitmap.test_any());

  for (uint32_t i = 69000; i < 70000; ++i) {
    bitmap.set(i);
  }
  EXPECT_TRUE(bitmap.test_any());

  for (uint32_t i = 69888; i < 70111; ++i) {
    bitmap.reset(i);
  }
  EXPECT_TRUE(bitmap.test_any());
}

TEST(Bitmap, TestNone) {
  Bitmap bitmap;
  EXPECT_TRUE(bitmap.test_none());

  for (uint32_t i = 65000; i < 70000; ++i) {
    bitmap.set(i);
  }
  EXPECT_FALSE(bitmap.test_none());

  for (uint32_t i = 65555; i < 70022; ++i) {
    bitmap.flip(i);
  }
  EXPECT_FALSE(bitmap.test_none());
}

TEST(Bitmap, Extract) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1;
  std::vector<size_t> vector1;

  for (uint32_t i = 0; i < 1111; ++i) {
    uint32_t val1 = rand();

    bitmap1.set(val1);
    vector1.push_back(val1);
  }

  std::sort(vector1.begin(), vector1.end());
  vector1.erase(std::unique(vector1.begin(), vector1.end()), vector1.end());

  std::vector<size_t> vector2;
  bitmap1.extract(&vector2);

  EXPECT_EQ(vector1.size(), vector2.size());
  EXPECT_TRUE(std::equal(vector1.begin(), vector1.end(), vector2.begin()));
}

TEST(Bitmap, BitwiseXorCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1;
  Bitmap bitmap2;
  std::bitset<500000> stl_bitset1;
  std::bitset<500000> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.set(val1);
    stl_bitset1.set(val1);

    bitmap2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.flip(val1);
    stl_bitset1.flip(val1);

    bitmap2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 ^ stl_bitset2).count(),
            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));

  EXPECT_EQ(Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2) +
                Bitmap::BitwiseAndnotCardinality(bitmap2, bitmap1),
            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));
  EXPECT_EQ(Bitmap::BitwiseXorCardinality(bitmap2, bitmap1),
            Bitmap::BitwiseXorCardinality(bitmap1, bitmap2));
}

TEST(Bitmap, BitwiseOrCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1;
  Bitmap bitmap2;
  std::bitset<500000> stl_bitset1;
  std::bitset<500000> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.set(val1);
    stl_bitset1.set(val1);

    bitmap2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.flip(val1);
    stl_bitset1.flip(val1);

    bitmap2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 | stl_bitset2).count(),
            Bitmap::BitwiseOrCardinality(bitmap1, bitmap2));
  EXPECT_EQ(Bitmap::BitwiseOrCardinality(bitmap2, bitmap1),
            Bitmap::BitwiseOrCardinality(bitmap1, bitmap2));
}

TEST(Bitmap, BitwiseAndCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1;
  Bitmap bitmap2;
  std::bitset<500000> stl_bitset1;
  std::bitset<500000> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.set(val1);
    stl_bitset1.set(val1);

    bitmap2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.flip(val1);
    stl_bitset1.flip(val1);

    bitmap2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & stl_bitset2).count(),
            Bitmap::BitwiseAndCardinality(bitmap1, bitmap2));
  EXPECT_EQ(Bitmap::BitwiseAndCardinality(bitmap2, bitmap1),
            Bitmap::BitwiseAndCardinality(bitmap1, bitmap2));
}

TEST(Bitmap, BitwiseAndnotCardinality) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1;
  Bitmap bitmap2;
  std::bitset<500000> stl_bitset1;
  std::bitset<500000> stl_bitset2;

  for (uint32_t i = 0; i < 800; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.set(val1);
    stl_bitset1.set(val1);

    bitmap2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 600; ++i) {
    uint32_t val1 = rand() % 500000;
    uint32_t val2 = rand() % 500000;

    bitmap1.flip(val1);
    stl_bitset1.flip(val1);

    bitmap2.flip(val2);
    stl_bitset2.flip(val2);
  }
  EXPECT_EQ((stl_bitset1 & ~stl_bitset2).count(),
            Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2));

  EXPECT_EQ((stl_bitset2 & ~stl_bitset1).count(),
            Bitmap::BitwiseAndnotCardinality(bitmap2, bitmap1));
}

TEST(Bitmap, Benchmark) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  Bitmap bitmap1, bitmap2;

  for (uint32_t i = 0; i < 2000; ++i) {
    uint32_t val1 = rand() % 200000000u;
    uint32_t val2 = rand() % 200000000u;

    bitmap1.set(val1);
    bitmap2.set(val2);
  }
  for (uint32_t i = 0; i < 1000; ++i) {
    uint32_t val1 = rand() % 200000000u;
    uint32_t val2 = rand() % 200000000u;

    bitmap1.flip(val1);
    bitmap2.flip(val2);
  }
  for (uint32_t i = 0; i < 500; ++i) {
    uint32_t val1 = rand() % 200000000u;
    uint32_t val2 = rand() % 200000000u;

    bitmap1.reset(val1);
    bitmap2.reset(val2);
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < 3; ++i) {
      sum += Bitmap::BitwiseAndCardinality(bitmap1, bitmap2);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseAndCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < 3; ++i) {
      sum += Bitmap::BitwiseAndnotCardinality(bitmap1, bitmap2);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseAndnotCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < 3; ++i) {
      sum += Bitmap::BitwiseXorCardinality(bitmap1, bitmap2);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseXorCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    uint64_t t1 = Monotime::MicroSeconds();
    uint64_t sum = 0;
    for (uint32_t i = 0; i < 3; ++i) {
      sum += Bitmap::BitwiseOrCardinality(bitmap1, bitmap2);
    }
    std::cout << INTRINSICS_SET
              << " BitwiseOrCardinality: " << Monotime::MicroSeconds() - t1
              << " us, sum: " << sum << std::endl;
  }

  {
    Bitmap bitmap3;
    bitmap3 = bitmap1;

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < 3; ++i) {
      bitmap1.bitwise_and(bitmap2);
    }
    std::cout << INTRINSICS_SET << " And: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    Bitmap bitmap3;
    bitmap3 = bitmap1;

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < 3; ++i) {
      bitmap1.bitwise_andnot(bitmap2);
    }
    std::cout << INTRINSICS_SET << " Andnot: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    Bitmap bitmap3;
    bitmap3 = bitmap1;

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < 3; ++i) {
      bitmap1.bitwise_or(bitmap2);
    }
    std::cout << INTRINSICS_SET << " Or: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }

  {
    Bitmap bitmap3;
    bitmap3 = bitmap1;

    uint64_t t1 = Monotime::MicroSeconds();
    for (uint32_t i = 0; i < 3; ++i) {
      bitmap1.bitwise_xor(bitmap2);
    }
    std::cout << INTRINSICS_SET << " Xor: " << Monotime::MicroSeconds() - t1
              << " us" << std::endl;
  }
}


================================================
FILE: tests/ailego/container/blob_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <type_traits>
#include <gtest/gtest.h>
#include <zvec/ailego/container/blob.h>

using namespace zvec;

TEST(BlobWrap, Constructor) {
  ailego::BlobWrap blob1;
  EXPECT_EQ(0u, blob1.size());
  EXPECT_FALSE(blob1.buffer());
  EXPECT_FALSE(blob1.is_valid());

  std::string buf2;
  ailego::BlobWrap blob2(buf2);
  EXPECT_EQ(0u, blob2.size());
  EXPECT_TRUE(blob2.buffer());
  EXPECT_FALSE(blob2.is_valid());

  buf2.append("good...");
  EXPECT_EQ(0u, blob2.size());
  EXPECT_TRUE(blob2.buffer());

  ailego::BlobWrap blob3(blob2);
  EXPECT_EQ(0u, blob3.size());
  EXPECT_TRUE(blob3.buffer());

  std::string buf4("........");
  ailego::BlobWrap blob4(buf4);
  EXPECT_NE(0u, blob4.size());
  EXPECT_TRUE(blob4.buffer());
  EXPECT_TRUE(blob4.is_valid());

  ailego::BlobWrap blob5(std::move(blob4));
  EXPECT_EQ(0u, blob4.size());
  EXPECT_FALSE(blob4.buffer());
  EXPECT_NE(0u, blob5.size());
  EXPECT_TRUE(blob5.buffer());

  blob4 = blob5;
  EXPECT_NE(0u, blob4.size());
  EXPECT_TRUE(blob4.buffer());
  EXPECT_NE(0u, blob5.size());
  EXPECT_TRUE(blob5.buffer());

  blob1 = std::move(blob5);
  EXPECT_NE(0u, blob1.size());
  EXPECT_TRUE(blob1.buffer());
  EXPECT_EQ(0u, blob5.size());
  EXPECT_FALSE(blob5.buffer());
}

TEST(BlobWrap, General) {
  ailego::BlobWrap blob1;
  std::string buf1("11111111111");

  blob1.mount(buf1);
  EXPECT_TRUE(blob1.buffer());

  blob1.umount();
  EXPECT_FALSE(blob1.buffer());

  std::string buf2("22222222222222222");
  const ailego::BlobWrap blob2(buf2);
  EXPECT_TRUE(
      std::is_const<
          typename std::remove_pointer<decltype(blob2.buffer())>::type>::value);

  ailego::BlobWrap blob3;
  std::string buf3("3333");
  blob3.mount(const_cast<char *>(buf3.data()), buf3.size());
  blob3.copy(blob2);
  EXPECT_FALSE(
      std::is_const<
          typename std::remove_pointer<decltype(blob3.buffer())>::type>::value);

  std::string buf4("444444444444444444444");
  ailego::BlobWrap blob4;
  blob4.mount(buf4);
  blob4.copy(buf1.data(), buf1.size());

  std::string buf5("55555");
  ailego::BlobWrap blob5(buf5);
  blob5.zero();
  blob4.copy(buf4);
}


================================================
FILE: tests/ailego/container/bloom_filter_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <ailego/container/bloom_filter.h>
#include <gtest/gtest.h>

using namespace zvec::ailego;

TEST(BloomFilterCalculator, General) {
  EXPECT_EQ(13487125u,
            BloomFilterCalculator::NumberOfItems(536454615, 5, 0.000023));
  EXPECT_EQ(295835133u,
            BloomFilterCalculator::NumberOfBytes(123456789, 0.0001));
  EXPECT_EQ(11924878998u,
            BloomFilterCalculator::NumberOfBits(536454615, 0.000023));
  EXPECT_FLOAT_EQ(0.00032803119f, (float)BloomFilterCalculator::Probability(
                                      400204, 7005007, 8));
  EXPECT_FLOAT_EQ(0.747645072f,
                  (float)BloomFilterCalculator::Probability(10000, 10000, 2));
  EXPECT_EQ(12u, BloomFilterCalculator::NumberOfHash(400204, 7005007));
  EXPECT_EQ(24120650u,
            BloomFilterCalculator::NumberOfBits(1000000, 5, 0.00023));

  double p = 0.000023;
  size_t n = 536454615;
  size_t m = BloomFilterCalculator::NumberOfBits(n, p);
  size_t k = BloomFilterCalculator::NumberOfHash(n, m);
  double p2 = BloomFilterCalculator::Probability(n, m, k);
  std::cout << "Probability: " << p << std::endl;
  std::cout << "Probability2: " << p2 << std::endl;
}

TEST(BloomFilter, General) {
  BloomFilter<5> filter(10000, 0.00023);
  EXPECT_TRUE(filter.insert(19009, 134, 1234, 54511, 43423));
  EXPECT_EQ(1u, filter.count());
  EXPECT_TRUE(filter.has(19009, 134, 1234, 54511, 43423));
  EXPECT_FALSE(filter.has(19009, 135, 1234, 54511, 43423));

  filter.force_insert(19009, 135, 1234, 54511, 43423);
  EXPECT_TRUE(filter.has(19009, 135, 1234, 54511, 43423));

  filter.clear();
  EXPECT_EQ(0u, filter.count());
  EXPECT_FALSE(filter.has(19009, 134, 1234, 54511, 43423));

  BloomFilter<0> filter0;
  (void)filter0;

  BloomFilter<6> filter6;
  EXPECT_FALSE(filter6.reset(0, 23.1));
  EXPECT_TRUE(filter6.reset(100000, 0.00023));
  std::cout << "bits_count: " << filter6.bits_count() << std::endl;
  std::cout << "capacity: " << filter6.capacity() << std::endl;
  std::cout << "count: " << filter6.count() << std::endl;
  std::cout << "probability: " << filter6.probability() << std::endl;
}


================================================
FILE: tests/ailego/container/cube_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>
#include <vector>
#include <gtest/gtest.h>
#include <zvec/ailego/container/cube.h>

using namespace zvec::ailego;

TEST(Cube, TypeInfo) {
  std::cout << "* bool:                " << typeid(bool).name() << std::endl;

  std::cout << "* int8_t:              " << typeid(int8_t).name() << std::endl;
  std::cout << "* char:                " << typeid(char).name() << std::endl;
  std::cout << "* signed char:         " << typeid(signed char).name()
            << std::endl;
  std::cout << "* uint8_t:             " << typeid(uint8_t).name() << std::endl;
  std::cout << "* unsigned char:       " << typeid(unsigned char).name()
            << std::endl;

  std::cout << "* int16_t:             " << typeid(int16_t).name() << std::endl;
  std::cout << "* short:               " << typeid(short).name() << std::endl;
  std::cout << "* signed short:        " << typeid(signed short).name()
            << std::endl;
  std::cout << "* uint16_t:            " << typeid(uint16_t).name()
            << std::endl;
  std::cout << "* unsigned short:      " << typeid(unsigned short).name()
            << std::endl;

  std::cout << "* int32_t:             " << typeid(int32_t).name() << std::endl;
  std::cout << "* int:                 " << typeid(int).name() << std::endl;
  std::cout << "* signed int:          " << typeid(signed int).name()
            << std::endl;
  std::cout << "* uint32_t:            " << typeid(uint32_t).name()
            << std::endl;
  std::cout << "* unsigned int:        " << typeid(unsigned int).name()
            << std::endl;

  std::cout << "* int64_t:             " << typeid(int64_t).name() << std::endl;
  std::cout << "* long:                " << typeid(long).name() << std::endl;
  std::cout << "* signed long:         " << typeid(signed long).name()
            << std::endl;
  std::cout << "* uint64_t:            " << typeid(uint64_t).name()
            << std::endl;
  std::cout << "* unsigned long:       " << typeid(unsigned long).name()
            << std::endl;

  std::cout << "* long long:           " << typeid(long).name() << std::endl;
  std::cout << "* signed long long:    " << typeid(signed long).name()
            << std::endl;
  std::cout << "* unsigned long long:  " << typeid(unsigned long).name()
            << std::endl;
}

TEST(Cube, General) {
  Cube cube1 = 11111;
  EXPECT_EQ(11111, cube1.unsafe_cast<int>());
  EXPECT_EQ(11111, cube1.cast<int>());
  int int1 = cube1;
  EXPECT_EQ(11111, int1);
  EXPECT_TRUE(!cube1.empty());
  EXPECT_EQ(sizeof(11111), cube1.size());

  Cube cube2 = 22222;
  EXPECT_EQ(22222, cube2.unsafe_cast<int>());
  EXPECT_EQ(22222, cube2.cast<int>());
  int int2 = (const int &)cube2;
  EXPECT_EQ(22222, int2);
  EXPECT_TRUE(!cube2.empty());
  EXPECT_EQ(sizeof(22222), cube2.size());

  Cube cube3 = std::vector<int>();
  cube3.unsafe_cast<std::vector<int>>().push_back(1);
  cube3.unsafe_cast<std::vector<int>>().push_back(2);
  cube3.unsafe_cast<std::vector<int>>().push_back(3);
  EXPECT_EQ(3u, cube3.unsafe_cast<std::vector<int>>().size());
  EXPECT_EQ(3u, cube3.cast<std::vector<int>>().size());
  std::vector<int> &vec3 = cube3;
  EXPECT_EQ(3u, vec3.size());
  EXPECT_TRUE(!cube3.empty());
  EXPECT_EQ(sizeof(std::vector<int>), cube3.size());

  std::vector<long> vec4;
  vec4.push_back(1);
  vec4.push_back(2);
  vec4.push_back(3);
  vec4.push_back(4);
  Cube cube4 = vec4;
  EXPECT_EQ(4u, cube4.unsafe_cast<std::vector<long>>().size());
  EXPECT_EQ(4u, cube4.cast<std::vector<long>>().size());
  const std::vector<long> &vec44 = cube4;
  EXPECT_EQ(4u, vec44.size());
  EXPECT_TRUE(!cube4.empty());
  EXPECT_EQ(sizeof(std::vector<long>), cube4.size());

  Cube cube5, cube6;
  EXPECT_TRUE(cube5.empty());
  EXPECT_TRUE(cube6.empty());
  EXPECT_EQ(cube5.type(), cube6.type());
  EXPECT_EQ(0u, cube5.size());
  EXPECT_EQ(0u, cube6.size());

  EXPECT_EQ(cube1.type(), cube2.type());
  EXPECT_NE(cube3.type(), cube4.type());
  EXPECT_NE(cube1.type(), cube3.type());
  EXPECT_NE(cube2.type(), cube4.type());
  EXPECT_NE(cube1.type(), cube5.type());
  EXPECT_NE(cube2.type(), cube5.type());
  EXPECT_NE(cube3.type(), cube5.type());
  EXPECT_NE(cube4.type(), cube5.type());
  EXPECT_TRUE(cube1.compatible(cube2));
  EXPECT_TRUE(cube5.compatible(cube6));
  EXPECT_FALSE(cube1.compatible(cube3));
  EXPECT_FALSE(cube3.compatible(cube5));

  cube1.reset();
  cube3.reset();
  cube5.reset();
  cube6.reset();
  EXPECT_TRUE(cube1.empty());
  EXPECT_TRUE(cube3.empty());
  EXPECT_TRUE(cube5.empty());
  EXPECT_TRUE(cube6.empty());
}

TEST(Cube, LargeObject) {
  std::string str1("1111111");
  std::string str2("2222222");
  std::string str3("3333333");
  std::string str4("4444444");
  std::string str5("5555555");
  std::string str6("6666666");
  std::string str7("7777777");

  Cube cube1(str1);
  Cube cube2;
  cube2 = str2;
  Cube cube3 = str3;

  EXPECT_EQ(str1, cube1.cast<std::string>());
  EXPECT_EQ(str2, cube2.cast<std::string>());
  EXPECT_TRUE(cube1.compatible(cube2));

  cube1 = std::move(cube2);
  EXPECT_EQ(str2, cube1.cast<std::string>());
  EXPECT_TRUE(cube2.empty());
  EXPECT_FALSE(cube1.compatible(cube2));

  EXPECT_EQ(str3, cube3.cast<std::string>());
  cube3 = cube1;
  EXPECT_EQ(str2, cube3.cast<std::string>());
  EXPECT_EQ(str2, cube1.cast<std::string>());

  // Test Constructor Cube(T &&rhs) / Cube(const T &rhs)
  Cube cube41(std::string("444444"));
  Cube cube42(str4);
  EXPECT_NE(std::string(""), str4);
  Cube cube43(std::move(str4));
  EXPECT_EQ(std::string(""), str4);

  const std::string str41 = str4;
  Cube cube44(str41);
  EXPECT_EQ(str41, str4);
  EXPECT_EQ(str4, cube44.cast<std::string>());

  // Test Assignment operator=(T &&rhs) / operator=(const T &rhs)
  Cube cube51, cube52, cube53, cube54;
  cube51 = std::string("55555");
  cube52 = str5;
  EXPECT_NE(std::string(""), str5);
  cube53 = std::move(str5);
  EXPECT_EQ(std::string(""), str5);

  const std::string str51 = str5;
  cube54 = str51;
  EXPECT_EQ(str51, str5);
  EXPECT_EQ(str5, cube54.cast<std::string>());

  // Test Constructor Cube(Cube &&rhs) / Cube(const Cube &rhs)
  Cube cube6(str6);
  Cube cube61(cube6);
  EXPECT_EQ(str6, cube61.cast<std::string>());
  EXPECT_FALSE(cube6.empty());
  Cube cube62(std::move(cube6));
  EXPECT_EQ(str6, cube62.cast<std::string>());
  EXPECT_TRUE(cube6.empty());

  const Cube cube63 = cube62;
  Cube cube64(cube63);
  EXPECT_EQ(str6, cube64.cast<std::string>());
  EXPECT_FALSE(cube63.empty());

  // Test Assignment operator=(Cube &&rhs) / operator=(const Cube &rhs)
  Cube cube7(str7), cube71, cube72;
  cube71 = cube7;
  EXPECT_EQ(str7, cube71.cast<std::string>());
  EXPECT_FALSE(cube7.empty());
  cube72 = std::move(cube7);
  EXPECT_EQ(str7, cube72.cast<std::string>());
  EXPECT_TRUE(cube7.empty());

  const Cube cube73(cube72);
  Cube cube74;
  cube74 = cube73;
  EXPECT_EQ(str7, cube74.cast<std::string>());
  EXPECT_EQ(str7, cube73.cast<std::string>());
  EXPECT_FALSE(cube74.empty());
}

struct SmallObject {
  SmallObject() {
    ++assign_count;
  }

  SmallObject(const SmallObject &) {
    ++clone_count;
  }

  SmallObject(SmallObject &&) {
    ++move_count;
  }

  ~SmallObject() {
    ++cleanup_count;
  }

  int val{0};
  static int assign_count;
  static int clone_count;
  static int move_count;
  static int cleanup_count;
};

int SmallObject::assign_count = 0;
int SmallObject::clone_count = 0;
int SmallObject::move_count = 0;
int SmallObject::cleanup_count = 0;

TEST(Cube, CubePolicy) {
  EXPECT_EQ(0, SmallObject::assign_count);
  EXPECT_EQ(0, SmallObject::clone_count);
  EXPECT_EQ(0, SmallObject::move_count);
  EXPECT_EQ(0, SmallObject::cleanup_count);

  SmallObject obj1, obj2, obj3, obj4, obj5;
  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(0, SmallObject::clone_count);
  EXPECT_EQ(0, SmallObject::move_count);
  EXPECT_EQ(0, SmallObject::cleanup_count);

  Cube cube1(obj1);
  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(1, SmallObject::clone_count);
  EXPECT_EQ(0, SmallObject::move_count);
  EXPECT_EQ(0, SmallObject::cleanup_count);

  Cube cube2(std::move(obj2));
  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(1, SmallObject::clone_count);
  EXPECT_EQ(1, SmallObject::move_count);
  EXPECT_EQ(0, SmallObject::cleanup_count);

  {
    Cube cube3(std::move(obj3));
    EXPECT_EQ(5, SmallObject::assign_count);
    EXPECT_EQ(1, SmallObject::clone_count);
    EXPECT_EQ(2, SmallObject::move_count);
    EXPECT_EQ(0, SmallObject::cleanup_count);
  }

  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(1, SmallObject::clone_count);
  EXPECT_EQ(2, SmallObject::move_count);
  EXPECT_EQ(1, SmallObject::cleanup_count);

  {
    Cube cube4(obj4);
    EXPECT_EQ(5, SmallObject::assign_count);
    EXPECT_EQ(2, SmallObject::clone_count);
    EXPECT_EQ(2, SmallObject::move_count);
    EXPECT_EQ(1, SmallObject::cleanup_count);
  }

  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(2, SmallObject::clone_count);
  EXPECT_EQ(2, SmallObject::move_count);
  EXPECT_EQ(2, SmallObject::cleanup_count);

  {
    Cube cube5(obj5);
    EXPECT_EQ(5, SmallObject::assign_count);
    EXPECT_EQ(3, SmallObject::clone_count);
    EXPECT_EQ(2, SmallObject::move_count);
    EXPECT_EQ(2, SmallObject::cleanup_count);
  }

  EXPECT_EQ(5, SmallObject::assign_count);
  EXPECT_EQ(3, SmallObject::clone_count);
  EXPECT_EQ(2, SmallObject::move_count);
  EXPECT_EQ(3, SmallObject::cleanup_count);
}

TEST(Cube, SmallObject) {
  uint64_t uint1 = 1111111;
  uint64_t uint2 = 2222222;
  uint64_t uint3 = 3333333;
  uint64_t uint4 = 4444444;
  uint64_t uint5 = 5555555;
  uint64_t uint6 = 6666666;
  uint64_t uint7 = 7777777;

  Cube cube1(uint1);
  Cube cube2;
  cube2 = uint2;
  Cube cube3 = uint3;

  EXPECT_EQ(uint1, cube1.cast<uint64_t>());
  EXPECT_EQ(uint2, cube2.cast<uint64_t>());
  EXPECT_TRUE(cube1.compatible(cube2));

  cube1 = std::move(cube2);
  EXPECT_EQ(uint2, cube1.cast<uint64_t>());
  EXPECT_TRUE(cube2.empty());
  EXPECT_FALSE(cube1.compatible(cube2));

  EXPECT_EQ(uint3, cube3.cast<uint64_t>());
  cube3 = cube1;
  EXPECT_EQ(uint2, cube3.cast<uint64_t>());
  EXPECT_EQ(uint2, cube1.cast<uint64_t>());

  // Test Conuintuctor Cube(T &&rhs) / Cube(const T &rhs)
  Cube cube41(uint64_t(444444));
  Cube cube42(uint4);
  EXPECT_NE(uint64_t(0), uint4);
  Cube cube43(std::move(uint4));
  EXPECT_NE(uint64_t(0), uint4);

  const uint64_t uint41 = uint4;
  Cube cube44(uint41);
  EXPECT_EQ(uint41, uint4);
  EXPECT_EQ(uint4, cube44.cast<uint64_t>());

  // Test Assignment operator=(T &&rhs) / operator=(const T &rhs)
  Cube cube51, cube52, cube53, cube54;
  cube51 = uint64_t(55555);
  cube52 = uint5;
  EXPECT_NE(uint64_t(0), uint5);
  cube53 = std::move(uint5);
  EXPECT_NE(uint64_t(0), uint5);

  const uint64_t uint51 = uint5;
  cube54 = uint51;
  EXPECT_EQ(uint51, uint5);
  EXPECT_EQ(uint5, cube54.cast<uint64_t>());

  // Test Conuintuctor Cube(Cube &&rhs) / Cube(const Cube &rhs)
  Cube cube6(uint6);
  Cube cube61(cube6);
  EXPECT_EQ(uint6, cube61.cast<uint64_t>());
  EXPECT_FALSE(cube6.empty());
  Cube cube62(std::move(cube6));
  EXPECT_EQ(uint6, cube62.cast<uint64_t>());
  EXPECT_TRUE(cube6.empty());

  const Cube cube63 = cube62;
  Cube cube64(cube63);
  EXPECT_EQ(uint6, cube64.cast<uint64_t>());
  EXPECT_FALSE(cube63.empty());

  // Test Assignment operator=(Cube &&rhs) / operator=(const Cube &rhs)
  Cube cube7(uint7), cube71, cube72;
  cube71 = cube7;
  EXPECT_EQ(uint7, cube71.cast<uint64_t>());
  EXPECT_FALSE(cube7.empty());
  cube72 = std::move(cube7);
  EXPECT_EQ(uint7, cube72.cast<uint64_t>());
  EXPECT_TRUE(cube7.empty());

  const Cube cube73(cube72);
  Cube cube74;
  cube74 = cube73;
  EXPECT_EQ(uint7, cube74.cast<uint64_t>());
  EXPECT_EQ(uint7, cube73.cast<uint64_t>());
  EXPECT_FALSE(cube74.empty());
}

enum EnumValueType { Unknown, Binary, Float, Double };
enum class EnumClassType { Unknown, RED, GREEN, BLUE };

TEST(Cube, EnumObject) {
  std::cout << "* uint32_t: " << typeid(uint32_t).name() << std::endl;
  std::cout << "* int32_t: " << typeid(int32_t).name() << std::endl;
  std::cout << "* EnumValueType: " << typeid(EnumValueType).name() << std::endl;
  std::cout << "* EnumValueType (underlying_type): "
            << typeid(typename std::underlying_type<EnumValueType>::type).name()
            << std::endl;

  std::cout << "* EnumClassType: " << typeid(EnumClassType).name() << std::endl;
  std::cout << "* EnumClassType (underlying_type): "
            << typeid(typename std::underlying_type<EnumClassType>::type).name()
            << std::endl;

  EnumValueType a(EnumValueType::Binary), c(EnumValueType::Unknown);
  EnumClassType b(EnumClassType::RED), d(EnumClassType::Unknown);

  Cube cubeA(a);
  Cube cubeB(b);

  EXPECT_EQ(a, cubeA.cast<EnumValueType>());
  EXPECT_NE(c, cubeA.cast<EnumValueType>());
  c = cubeA.cast<EnumValueType>();
  EXPECT_EQ(a, c);

  EXPECT_EQ(b, cubeB.cast<EnumClassType>());
  EXPECT_NE(d, cubeB.cast<EnumClassType>());
  d = cubeB.cast<EnumClassType>();
  EXPECT_EQ(b, d);

  Cube cubeC((std::underlying_type<EnumValueType>::type)1);
  Cube cubeD((std::underlying_type<EnumClassType>::type)1);

  std::cout << "* cubeA: " << cubeA.type().name() << std::endl;
  std::cout << "* cubeB: " << cubeB.type().name() << std::endl;
  std::cout << "* cubeC: " << cubeC.type().name() << std::endl;
  std::cout << "* cubeD: " << cubeD.type().name() << std::endl;

  // EXPECT_TRUE(typeid(std::underlying_type<EnumValueType>::type) ==
  //             typeid(uint32_t));
  // EXPECT_TRUE(typeid(std::underlying_type<EnumClassType>::type) ==
  //             typeid(int32_t));

  EXPECT_TRUE(cubeA.compatible<EnumValueType>());
  EXPECT_TRUE(cubeB.compatible<EnumClassType>());
  EXPECT_TRUE(cubeA.compatible<std::underlying_type<EnumValueType>::type>());
  EXPECT_TRUE(cubeB.compatible<std::underlying_type<EnumClassType>::type>());
  EXPECT_TRUE(cubeC.compatible<std::underlying_type<EnumValueType>::type>());
  EXPECT_TRUE(cubeD.compatible<std::underlying_type<EnumClassType>::type>());

  EnumValueType e =
      (EnumValueType)cubeA.cast<std::underlying_type<EnumValueType>::type>();
  EnumClassType f =
      (EnumClassType)cubeB.cast<std::underlying_type<EnumClassType>::type>();
  EXPECT_EQ(a, e);
  EXPECT_EQ(b, f);
}


================================================
FILE: tests/ailego/container/heap_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/heap.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(Heap, General) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 100);

  {
    ailego::Heap<float> heap;

    for (size_t i = 0; i < 12; ++i) {
      heap.emplace(dist(gen));
    }
    EXPECT_EQ(12u, heap.size());
    EXPECT_FALSE(heap.full());

    for (auto it : heap) {
      std::cout << it << " ";
    }
    std::cout << std::endl;

    ailego::Heap<float> heap1(std::move(heap));
    EXPECT_TRUE(heap.empty());
    EXPECT_FALSE(heap1.empty());
    for (size_t i = 0; i < 12; ++i) {
      heap1.pop();
    }
    EXPECT_TRUE(heap1.empty());
  }

  {
    ailego::Heap<float> heap(12);

    for (size_t i = 0; i < 200; ++i) {
      heap.push(dist(gen));
    }
    EXPECT_EQ(12u, heap.size());
    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));
    EXPECT_TRUE(heap.full());

    ailego::Heap<float> heap2(heap);
    for (auto it : heap2) {
      std::cout << it << " ";
    }
    std::cout << std::endl;

    for (size_t i = 0; i < 12; ++i) {
      heap2.pop();
    }
    EXPECT_TRUE(heap2.empty());
    EXPECT_FALSE(heap.empty());
  }

  {
    ailego::Heap<float> heap(12);
    ailego::Heap<float> heap1;
    ailego::Heap<float> heap2;

    for (size_t i = 0; i < 50; ++i) {
      heap.emplace(dist(gen));
    }

    EXPECT_NE(heap1.limit(), heap.limit());
    EXPECT_FALSE(heap.empty());
    EXPECT_TRUE(heap1.empty());
    heap1 = heap;

    EXPECT_FALSE(heap.empty());
    EXPECT_FALSE(heap1.empty());
    EXPECT_EQ(heap1.limit(), heap.limit());

    heap2 = std::move(heap);
    EXPECT_TRUE(heap.empty());
    EXPECT_FALSE(heap2.empty());
    EXPECT_EQ(heap2.limit(), heap.limit());
  }

  {
    ailego::Heap<float> heap(12);
    ailego::Heap<float> heap1;

    for (size_t i = 0; i < 50; ++i) {
      heap.emplace(dist(gen));
    }

    heap.swap(heap1);
    EXPECT_FALSE(heap1.empty());
    EXPECT_TRUE(heap.empty());
  }

  {
    ailego::Heap<float> heap(32);

    for (size_t i = 0; i < 200; ++i) {
      heap.emplace(dist(gen));
    }
    EXPECT_EQ(32u, heap.size());
    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));

    heap.limit(55);
    for (size_t i = 0; i < 100; ++i) {
      heap.emplace(dist(gen));
    }
    EXPECT_TRUE(std::is_heap(heap.begin(), heap.end()));
    EXPECT_EQ(55u, heap.size());
    EXPECT_TRUE(heap.full());
  }
}

TEST(Heap, Make) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 100);

  std::vector<float> raw_data;
  for (size_t i = 0; i < 200; ++i) {
    raw_data.push_back(dist(gen));
  }

  ailego::Heap<float> heap(raw_data);
  EXPECT_FALSE(raw_data.empty());
  EXPECT_EQ(heap.front(), *std::max_element(raw_data.begin(), raw_data.end()));

  ailego::Heap<float> heap1(std::move(raw_data));
  EXPECT_TRUE(raw_data.empty());
  EXPECT_EQ(heap1.front(), *std::max_element(heap.begin(), heap.end()));

  raw_data = std::move(heap);
  EXPECT_FALSE(raw_data.empty());
  EXPECT_TRUE(heap.empty());
}

TEST(Heap, Sort) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 100);

  std::vector<float> raw_data;
  for (size_t i = 0; i < 200; ++i) {
    raw_data.push_back(dist(gen));
  }

  ailego::Heap<float> heap(raw_data);
  EXPECT_EQ(heap.front(), *std::max_element(raw_data.begin(), raw_data.end()));

  heap.sort();
  EXPECT_EQ(heap.front(), *std::min_element(raw_data.begin(), raw_data.end()));

  heap.limit(50);
  EXPECT_EQ(200u, heap.size());
  heap.update();
  EXPECT_EQ(50u, heap.size());
  EXPECT_EQ(heap.front(), *std::max_element(heap.begin(), heap.end()));

  heap.sort();
  EXPECT_EQ(heap.front(), *std::min_element(raw_data.begin(), raw_data.end()));
}

struct HeapValue {
  HeapValue(void) : score(0.0f) {
    std::cout << "HeapValue(void)" << std::endl;
  }

  HeapValue(float val) : score(val) {
    std::cout << "HeapValue(float)" << std::endl;
  }

  HeapValue(const HeapValue &rhs) : score(rhs.score) {
    std::cout << "HeapValue(const HeapValue &)" << std::endl;
  }

  HeapValue(HeapValue &&rhs) : score(rhs.score) {
    std::cout << "HeapValue(HeapValue &&)" << std::endl;
  }

  //! Less than
  bool operator<(const HeapValue &rhs) const {
    return (this->score < rhs.score);
  }

  //! Greater than
  bool operator>(const HeapValue &rhs) const {
    return (this->score > rhs.score);
  }

  //! Assignment
  HeapValue &operator=(const HeapValue &rhs) {
    std::cout << "operator=(const HeapValue &)" << std::endl;
    score = rhs.score;
    return *this;
  }

  //! Assignment
  HeapValue &operator=(HeapValue &&rhs) {
    std::cout << "operator=(HeapValue &&)" << std::endl;
    score = rhs.score;
    return *this;
  }

  float score;
};

TEST(Heap, Constructor) {
  ailego::Heap<HeapValue> heap(2);
  heap.push(HeapValue(2.0f));
  heap.emplace(1.0f);

  HeapValue val;
  heap.push(val);

  heap.pop();
  EXPECT_EQ(1u, heap.size());
  heap.pop();
  EXPECT_EQ(0u, heap.size());
  // heap.pop(); // disallowed
}

template <typename T, class TAllocator = std::allocator<T>>
class HeapVector {
 public:
  typedef size_t size_type;
  typedef typename std::remove_reference<T>::type value_type;
  typedef TAllocator allocator_type;

  //! Constructor
  HeapVector(void) : begin_(nullptr), end_(nullptr), capacity_(0u), alloc_() {}

  //! Constructor
  HeapVector(const HeapVector &rhs)
      : begin_(nullptr), end_(nullptr), capacity_(0u), alloc_() {
    size_type count = rhs.size();
    if (count) {
      this->expand(count);

      end_ = begin_ + count;
      for (value_type *iter = begin_, *src = rhs.begin_; iter != end_;
           ++iter, ++src) {
        iter->value_type(*src);
      }
    }
  }

  //! Constructor
  HeapVector(HeapVector &&rhs)
      : begin_(rhs.begin_), end_(rhs.end_), capacity_(rhs.capacity_), alloc_() {
    rhs.begin_ = nullptr;
    rhs.end_ = nullptr;
    rhs.capacity_ = 0u;
  }

  //! Destructor
  ~HeapVector(void) {
    if (capacity_) {
      for (value_type *iter = begin_; iter != end_; ++iter) {
        iter->~value_type();
      }
      alloc_.deallocate(begin_, capacity_);
    }
  }

  //! Assignment
  HeapVector &operator=(const HeapVector &rhs) {
    this->clear();

    size_type count = rhs.size();
    if (capacity_ < count) {
      this->expand(count);
    }

    if (count) {
      end_ = begin_ + count;
      for (value_type *iter = begin_, *src = rhs.begin_; iter != end_;
           ++iter, ++src) {
        iter->value_type(*src);
      }
    }
    return *this;
  }

  //! Assignment
  HeapVector &operator=(HeapVector &&rhs) {
    this->clear();
    begin_ = rhs.begin_;
    end_ = rhs.end_;
    capacity_ = rhs.capacity_;
    rhs.begin_ = nullptr;
    rhs.end_ = nullptr;
    rhs.capacity_ = 0u;
    return *this;
  }

  //! Clear the vector
  void clear(void) {
    for (value_type *iter = begin_; iter != end_; ++iter) {
      iter->~value_type();
    }
    end_ = begin_;
  }

  //! Retrieve the begin iterator
  value_type *begin(void) {
    return begin_;
  }

  //! Retrieve the begin iterator
  const value_type *begin(void) const {
    return begin_;
  }

  //! Retrieve the end iterator
  value_type *end(void) {
    return end_;
  }

  //! Retrieve the end iterator
  const value_type *end(void) const {
    return end_;
  }

  //! Retrieve the front element
  value_type &front(void) {
    return *begin_;
  }

  //! Retrieve the front element
  const value_type &front(void) const {
    return *begin_;
  }

  //! Retrieve the back element
  value_type &back(void) {
    return *(end_ - 1);
  }

  //! Retrieve the back element
  const value_type &back(void) const {
    return *(end_ - 1);
  }

  //! Retrieve count of elements in vector
  size_type size(void) const {
    return (end_ - begin_);
  }

  //! Retrieve capacity of vector
  size_type capacity(void) const {
    return capacity_;
  }

  //! Check whether the heap is empty
  bool empty(void) const {
    return (begin_ == end_);
  }

  //! Request a change in capacity
  void reserve(size_type n) {
    if (capacity_ < n) {
      this->expand(n);
    }
  }

  void push_back(const value_type &val) {
    size_type count = this->size();

    if (count == capacity_) {
      this->expand(count + 1);
    }
    // (end_++)->value_type(val);
    *(end_++) = val;
  }

  void push_back(value_type &&val) {
    size_type count = this->size();

    if (count == capacity_) {
      this->expand(count + 1);
    }
    // (end_++)->value_type(std::move(val));
    *(end_++) = std::move(val);
  }

  void pop_back(void) {
    (--end_)->~value_type();
  }

 protected:
  //! Find the number which is upper power of 2
  static inline size_type clp2(size_type n) {
    n = n - 1;
    n = n | (n >> 1);
    n = n | (n >> 2);
    n = n | (n >> 4);
    n = n | (n >> 8);
    n = n | (n >> 16);
    // n = n | (n >> 32);
    return (n + 1);
  }

  //! Expand the buffer
  void expand(size_type need) {
    need = clp2(need);
    value_type *buf = alloc_.allocate(need);
    size_type count = this->size();

    if (count) {
      memcpy(buf, begin_, sizeof(value_type) * count);
    }
    alloc_.deallocate(begin_, capacity_);
    begin_ = buf;
    end_ = buf + count;
    capacity_ = need;
  }

 private:
  //! Members
  value_type *begin_;
  value_type *end_;
  size_type capacity_;
  allocator_type alloc_;
};

TEST(Heap, Becnhmark) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 100);

  std::vector<float> raw_data;
  for (size_t i = 0; i < 1000000; ++i) {
    raw_data.push_back(dist(gen));
  }

  ailego::Heap<float> heap1(100);
  ailego::Heap<float, std::less<float>, HeapVector<float>> heap2(100);

  ailego::ElapsedTime stamp;
  stamp.reset();
  for (uint32_t i = 0; i < raw_data.size(); ++i) {
    heap1.emplace(raw_data[i]);
  }
  std::cout << "Heap 1: " << stamp.milli_seconds() << " ms" << std::endl;
  EXPECT_EQ(100u, heap1.size());

  stamp.reset();
  for (uint32_t i = 0; i < raw_data.size(); ++i) {
    heap2.push(raw_data[i]);
  }
  std::cout << "Heap 2: " << stamp.milli_seconds() << " ms" << std::endl;
  EXPECT_EQ(100u, heap2.size());
}


================================================
FILE: tests/ailego/container/hypercube_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/container/hypercube.h>

using namespace zvec::ailego;

TEST(Hypercube, General) {
  Hypercube hyper;
  hyper.insert("1", 1);
  hyper.insert("2", 2);
  hyper.insert("3", 3);

  EXPECT_EQ(1, hyper["1"].cast<int>());
  EXPECT_EQ(2, hyper["2"].cast<int>());
  EXPECT_EQ(3, hyper["3"].cast<int>());

  hyper.insert_or_assign("1", 11);
  hyper.insert_or_assign("2", 22);
  hyper.insert_or_assign("3", 33);
  hyper.insert_or_assign("4", 44);
  hyper.insert_or_assign("5", 55);
  EXPECT_EQ(11, hyper["1"].cast<int>());
  EXPECT_EQ(22, hyper["2"].cast<int>());
  EXPECT_EQ(33, hyper["3"].cast<int>());
  EXPECT_EQ(44, hyper["4"].cast<int>());
  EXPECT_EQ(55, hyper["5"].cast<int>());

  std::string key1("111"), key2("222");
  Cube val1(11);
  hyper.insert_or_assign(key1, val1);
  hyper.insert_or_assign(std::move(key2), val1);
  hyper.insert_or_assign("345464", 435465);
  EXPECT_FALSE(key1.empty());
  EXPECT_TRUE(key2.empty());
}


================================================
FILE: tests/ailego/container/params_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/container/params.h>

using namespace zvec;
using namespace zvec::ailego;

TEST(Params, General) {
  Params params;
  Params params1;

  EXPECT_TRUE(params.empty());
  EXPECT_TRUE(params1.empty());
  params1.merge(params);

  char test_string1[] = "test1";
  params.set(test_string1, test_string1);

  const char *test_string2 = "test2";
  params.set(test_string2, test_string2);

  params.set("11111", "11111");
  params.set("22222", params1);
  params.set("true", true);
  params.set("-8", int8_t(-8));
  params.set("-16", int16_t(-16));
  params.set("-32", int32_t(-32));
  params.set("-64", int64_t(-64));
  params.set("8", uint8_t(8));
  params.set("16", uint16_t(16));
  params.set("32", uint32_t(32));
  params.set("64", uint64_t(64));
  params.set("100.0", float(100.0f));
  params.set("1000.0", double(1000.0f));
  params.set(std::string("string"), "string");

  EXPECT_FALSE(params.empty());

  EXPECT_TRUE(params.has("64"));
  EXPECT_TRUE(params.has("32"));
  EXPECT_TRUE(params.has("16"));
  EXPECT_TRUE(params.has("8"));
  EXPECT_TRUE(params.has("-64"));
  EXPECT_TRUE(params.has("-32"));
  EXPECT_TRUE(params.has("-16"));
  EXPECT_TRUE(params.has("-8"));
  EXPECT_TRUE(params.has("true"));
  EXPECT_FALSE(params.has("false"));
  EXPECT_FALSE(params.has(""));
  EXPECT_TRUE(params.has("100.0"));
  EXPECT_TRUE(params.has("1000.0"));
  EXPECT_FALSE(params.has("10000.0"));
  EXPECT_TRUE(params.has("string"));

  EXPECT_EQ("1", params.get_as_string("true"));
  EXPECT_EQ("-8", params.get_as_string("-8"));
  EXPECT_EQ("-16", params.get_as_string("-16"));
  EXPECT_EQ("-32", params.get_as_string("-32"));
  EXPECT_EQ("-64", params.get_as_string("-64"));
  EXPECT_EQ("8", params.get_as_string("8"));
  EXPECT_EQ("16", params.get_as_string("16"));
  EXPECT_EQ("32", params.get_as_string("32"));
  EXPECT_EQ("64", params.get_as_string("64"));
  EXPECT_EQ("100.000000", params.get_as_string("100.0"));
  EXPECT_EQ("1000.000000", params.get_as_string("1000.0"));
  EXPECT_EQ("", params.get_as_string("10000.0"));

  EXPECT_EQ(64u, params.get_as_uint64("64"));
  EXPECT_EQ(32u, params.get_as_uint64("32"));
  EXPECT_EQ(16u, params.get_as_uint64("16"));
  EXPECT_EQ(8u, params.get_as_uint64("8"));
  EXPECT_EQ((uint64_t)(-64), params.get_as_uint64("-64"));
  EXPECT_EQ((uint64_t)(-32), params.get_as_uint64("-32"));
  EXPECT_EQ((uint64_t)(-16), params.get_as_uint64("-16"));
  EXPECT_EQ((uint64_t)(-8), params.get_as_uint64("-8"));
  EXPECT_EQ(1u, params.get_as_uint64("true"));
  EXPECT_EQ(100u, params.get_as_uint64("100.0"));
  EXPECT_EQ(1000u, params.get_as_uint64("1000.0"));
  EXPECT_EQ(0u, params.get_as_uint64("10000.0"));

  EXPECT_EQ(64u, params.get_as_uint32("64"));
  EXPECT_EQ(32u, params.get_as_uint32("32"));
  EXPECT_EQ(16u, params.get_as_uint32("16"));
  EXPECT_EQ(8u, params.get_as_uint32("8"));
  EXPECT_EQ(4294967232u, params.get_as_uint32("-64"));
  EXPECT_EQ((uint32_t)(-32), params.get_as_uint32("-32"));
  EXPECT_EQ((uint32_t)(-16), params.get_as_uint32("-16"));
  EXPECT_EQ((uint32_t)(-8), params.get_as_uint32("-8"));
  EXPECT_EQ(1u, params.get_as_uint32("true"));
  EXPECT_EQ(100u, params.get_as_uint32("100.0"));
  EXPECT_EQ(1000u, params.get_as_uint32("1000.0"));
  EXPECT_EQ(0u, params.get_as_uint32("10000.0"));

  EXPECT_EQ(64u, params.get_as_uint16("64"));
  EXPECT_EQ(32u, params.get_as_uint16("32"));
  EXPECT_EQ(16u, params.get_as_uint16("16"));
  EXPECT_EQ(8u, params.get_as_uint16("8"));
  EXPECT_EQ(65472u, params.get_as_uint16("-64"));
  EXPECT_EQ(65504u, params.get_as_uint16("-32"));
  EXPECT_EQ((uint16_t)(-16), params.get_as_uint16("-16"));
  EXPECT_EQ((uint16_t)(-8), params.get_as_uint16("-8"));
  EXPECT_EQ(1u, params.get_as_uint16("true"));
  EXPECT_EQ(100u, params.get_as_uint16("100.0"));
  EXPECT_EQ(1000u, params.get_as_uint16("1000.0"));
  EXPECT_EQ(0u, params.get_as_uint16("10000.0"));

  EXPECT_EQ(64u, params.get_as_uint8("64"));
  EXPECT_EQ(32u, params.get_as_uint8("32"));
  EXPECT_EQ(32u, params.get_as_uint8("32"));
  EXPECT_EQ(8u, params.get_as_uint8("8"));
  EXPECT_EQ(192u, params.get_as_uint8("-64"));
  EXPECT_EQ(224u, params.get_as_uint8("-32"));
  EXPECT_EQ(240u, params.get_as_uint8("-16"));
  EXPECT_EQ((uint8_t)(-8), params.get_as_uint8("-8"));
  EXPECT_EQ(1u, params.get_as_uint8("true"));
  EXPECT_EQ(100u, params.get_as_uint8("100.0"));
  EXPECT_EQ(232u, params.get_as_uint8("1000.0"));
  EXPECT_EQ(0u, params.get_as_uint8("10000.0"));

  EXPECT_TRUE(params.get_as_bool("64"));
  EXPECT_TRUE(params.get_as_bool("32"));
  EXPECT_TRUE(params.get_as_bool("16"));
  EXPECT_TRUE(params.get_as_bool("8"));
  EXPECT_TRUE(params.get_as_bool("-64"));
  EXPECT_TRUE(params.get_as_bool("-32"));
  EXPECT_TRUE(params.get_as_bool("-16"));
  EXPECT_TRUE(params.get_as_bool("-8"));
  EXPECT_TRUE(params.get_as_bool("true"));
  EXPECT_FALSE(params.get_as_bool("false"));
  EXPECT_FALSE(params.get_as_bool(""));
  EXPECT_TRUE(params.get_as_bool("100.0"));
  EXPECT_TRUE(params.get_as_bool("1000.0"));
  EXPECT_FALSE(params.get_as_bool("10000.0"));
  EXPECT_FALSE(params.get_as_bool("string"));

  EXPECT_EQ(64, params.get_as_int64("64"));
  EXPECT_EQ(32, params.get_as_int64("32"));
  EXPECT_EQ(16, params.get_as_int64("16"));
  EXPECT_EQ(8, params.get_as_int64("8"));
  EXPECT_EQ(-64, params.get_as_int64("-64"));
  EXPECT_EQ(-32, params.get_as_int64("-32"));
  EXPECT_EQ(-16, params.get_as_int64("-16"));
  EXPECT_EQ(-8, params.get_as_int64("-8"));
  EXPECT_EQ(1, params.get_as_int64("true"));
  EXPECT_EQ(100, params.get_as_int64("100.0"));
  EXPECT_EQ(1000, params.get_as_int64("1000.0"));
  EXPECT_EQ(0, params.get_as_int64("10000.0"));

  EXPECT_EQ(64, params.get_as_int32("64"));
  EXPECT_EQ(32, params.get_as_int32("32"));
  EXPECT_EQ(16, params.get_as_int32("16"));
  EXPECT_EQ(8, params.get_as_int32("8"));
  EXPECT_EQ(-64, params.get_as_int32("-64"));
  EXPECT_EQ(-32, params.get_as_int32("-32"));
  EXPECT_EQ(-16, params.get_as_int32("-16"));
  EXPECT_EQ(-8, params.get_as_int32("-8"));
  EXPECT_EQ(1, params.get_as_int32("true"));
  EXPECT_EQ(100, params.get_as_int32("100.0"));
  EXPECT_EQ(1000, params.get_as_int32("1000.0"));
  EXPECT_EQ(0, params.get_as_int32("10000.0"));
  params1.merge(params);

  EXPECT_EQ(64, params.get_as_int16("64"));
  EXPECT_EQ(32, params.get_as_int16("32"));
  EXPECT_EQ(16, params.get_as_int16("16"));
  EXPECT_EQ(8, params.get_as_int16("8"));
  EXPECT_EQ(-64, params.get_as_int16("-64"));
  EXPECT_EQ(-32, params.get_as_int16("-32"));
  EXPECT_EQ(-16, params.get_as_int16("-16"));
  EXPECT_EQ(-8, params.get_as_int16("-8"));
  EXPECT_EQ(1, params.get_as_int16("true"));
  EXPECT_EQ(100, params.get_as_int16("100.0"));
  EXPECT_EQ(1000, params.get_as_int16("1000.0"));
  EXPECT_EQ(0, params.get_as_int16("10000.0"));
  params1.merge(params);

  EXPECT_EQ(64, params.get_as_int8("64"));
  EXPECT_EQ(32, params.get_as_int8("32"));
  EXPECT_EQ(16, params.get_as_int8("16"));
  EXPECT_EQ(8, params.get_as_int8("8"));
  EXPECT_EQ(-64, params.get_as_int8("-64"));
  EXPECT_EQ(-32, params.get_as_int8("-32"));
  EXPECT_EQ(-16, params.get_as_int8("-16"));
  EXPECT_EQ(-8, params.get_as_int8("-8"));
  EXPECT_EQ(1, params.get_as_int8("true"));
  EXPECT_EQ(100, params.get_as_int8("100.0"));
  EXPECT_EQ(-24, params.get_as_int8("1000.0"));
  EXPECT_EQ(0, params.get_as_int8("10000.0"));
  params1.merge(params);

  params.erase("64");
  params.erase("32");
  params.erase("16");
  params.erase("8");
  params.erase("-64");
  params.erase("-32");
  params.erase("-16");
  params.erase("-8");
  params.erase("true");
  params.erase("false");
  params.erase("");
  params.erase("100.0");
  params.erase("1000.0");
  params.erase("10000.0");
  params.erase("string");
  params1.merge(params);
  params.clear();
}

TEST(Params, OverloadedOperator) {
  Params params;
  Params params1;

  char test_string1[] = "test1";
  params[test_string1] = test_string1;

  const char *test_string2 = "test2";
  params[test_string2] = test_string2;

  params["11111"] = "11111";
  params["22222"] = params1;
  params["true"] = true;
  params["-8"] = int8_t(-8);
  params["-16"] = int16_t(-16);
  params["-32"] = int32_t(-32);
  params["-64"] = int64_t(-64);
  params["8"] = uint8_t(8);
  params["16"] = uint16_t(16);
  params["32"] = uint32_t(32);
  params["64"] = uint64_t(64);
  params["100.0"] = float(100.0f);
  params["1000.0"] = double(1000.0f);
  params["size_t"] = size_t(1234);
  params[std::string("string")] = std::string("string");

  EXPECT_EQ(64u, params.get_as_uint64("64"));
  EXPECT_EQ(32u, params.get_as_uint64("32"));
  EXPECT_EQ(16u, params.get_as_uint64("16"));
  EXPECT_EQ(8u, params.get_as_uint64("8"));
  EXPECT_EQ((uint64_t)(-64), params.get_as_uint64("-64"));
  EXPECT_EQ((uint64_t)(-32), params.get_as_uint64("-32"));
  EXPECT_EQ((uint64_t)(-16), params.get_as_uint64("-16"));
  EXPECT_EQ((uint64_t)(-8), params.get_as_uint64("-8"));
  EXPECT_EQ(1u, params.get_as_uint64("true"));
  EXPECT_EQ(100u, params.get_as_uint64("100.0"));
  EXPECT_EQ(1000u, params.get_as_uint64("1000.0"));
  EXPECT_EQ(0u, params.get_as_uint64("10000.0"));
  EXPECT_EQ(1234u, params.get_as_uint64("size_t"));
  EXPECT_EQ(1234u, params.get_as_uint32("size_t"));

  std::cout << "float: " << typeid(float).name() << std::endl;
  std::cout << "double: " << typeid(double).name() << std::endl;
  std::cout << "long double: " << typeid(long double).name() << std::endl;
  std::cout << "char: " << typeid(char).name() << std::endl;
  std::cout << "signed char: " << typeid(signed char).name() << std::endl;
  std::cout << "unsigned char: " << typeid(unsigned char).name() << std::endl;
  std::cout << "short int: " << typeid(short int).name() << std::endl;
  std::cout << "int: " << typeid(int).name() << std::endl;
  std::cout << "long int: " << typeid(long int).name() << std::endl;
  std::cout << "long long int: " << typeid(long long int).name() << std::endl;
  std::cout << "unsigned short int: " << typeid(unsigned short int).name()
            << std::endl;
  std::cout << "unsigned int: " << typeid(unsigned int).name() << std::endl;
  std::cout << "unsigned long int: " << typeid(unsigned long int).name()
            << std::endl;
  std::cout << "unsigned long long int: "
            << typeid(unsigned long long int).name() << std::endl;

  size_t size;
  EXPECT_TRUE(params.get("8", &size));
  EXPECT_TRUE(params.get("16", &size));
  EXPECT_TRUE(params.get("32", &size));
  EXPECT_TRUE(params.get("64", &size));
  EXPECT_TRUE(params.get("-8", &size));
  EXPECT_TRUE(params.get("-16", &size));
  EXPECT_TRUE(params.get("-32", &size));
  EXPECT_TRUE(params.get("-64", &size));
  EXPECT_TRUE(params.get("size_t", &size));
}

TEST(Params, GeneralString) {
  Params params;
  EXPECT_TRUE(params.empty());

  params.set("11111", "11111");
  params.set("22222", "22222");
  params.set("yes", "yes");
  params.set("no", "no");
  params.set("No", "No");
  params.set("Yes", "Yes");
  params.set("true", "true");
  params.set("True", "True");
  params.set("False", "False");
  params.set("false", "false");
  params.set("string", "string");

  EXPECT_TRUE(params.get_as_bool("yes"));
  EXPECT_TRUE(params.get_as_bool("Yes"));
  EXPECT_TRUE(params.get_as_bool("True"));
  EXPECT_TRUE(params.get_as_bool("true"));
  EXPECT_FALSE(params.get_as_bool("No"));
  EXPECT_FALSE(params.get_as_bool("no"));
  EXPECT_FALSE(params.get_as_bool("False"));
  EXPECT_FALSE(params.get_as_bool("false"));
  EXPECT_FALSE(params.get_as_bool("string"));

  EXPECT_TRUE(params.get_as_bool("11111"));
  EXPECT_EQ(103, params.get_as_int8("11111"));
  EXPECT_EQ(11111, params.get_as_int16("11111"));
  EXPECT_EQ(11111, params.get_as_int32("11111"));
  EXPECT_EQ(11111, params.get_as_int64("11111"));
  EXPECT_EQ(103u, params.get_as_uint8("11111"));
  EXPECT_EQ(11111u, params.get_as_uint16("11111"));
  EXPECT_EQ(11111u, params.get_as_uint32("11111"));
  EXPECT_EQ(11111u, params.get_as_uint64("11111"));
  EXPECT_FLOAT_EQ(11111.0, params.get_as_float("11111"));
  EXPECT_FLOAT_EQ(11111.0, params.get_as_double("11111"));

  EXPECT_TRUE(params.get_as_bool("22222"));
  EXPECT_EQ(-50, params.get_as_int8("22222"));
  EXPECT_EQ(22222, params.get_as_int16("22222"));
  EXPECT_EQ(22222, params.get_as_int32("22222"));
  EXPECT_EQ(22222, params.get_as_int64("22222"));
  EXPECT_EQ(206u, params.get_as_uint8("22222"));
  EXPECT_EQ(22222u, params.get_as_uint16("22222"));
  EXPECT_EQ(22222u, params.get_as_uint32("22222"));
  EXPECT_EQ(22222u, params.get_as_uint64("22222"));
  EXPECT_FLOAT_EQ(22222.0, params.get_as_float("22222"));
  EXPECT_FLOAT_EQ(22222.0, params.get_as_double("22222"));
}

TEST(Params, ParseFromEnvironment) {
  Params params;
  Params::ParseFromEnvironment(&params);
  std::cout << params.get_as_string("PATH") << std::endl;
}

TEST(Params, ParseFromBuffer) {
  std::string str =
      "{ -1111: -1111.11, -2222: -2222,  1111: 1111, 2222: "
      "\"2222\", 1: true, \'object\' : {  } }";
  Params params;
  Params::ParseFromBuffer(str, &params);

  ASSERT_FLOAT_EQ(-1111.11, params.get_as_float("-1111"));
  ASSERT_EQ(-2222, params.get_as_int32("-2222"));
  ASSERT_EQ(1111, params.get_as_int32("1111"));
  ASSERT_EQ(true, params.get_as_bool("1"));
  ASSERT_EQ(std::string("2222"), params.get_as_string("2222"));

  ASSERT_TRUE(params.has("object"));

  std::string str1 = "{proxima.general.cluster.count: 4000 }";
  Params::ParseFromBuffer(str1, &params);
  ASSERT_TRUE(params.has("proxima.general.cluster.count"));

  uint32_t count = 0;
  params.get("proxima.general.cluster.count", &count);
  ASSERT_EQ(4000u, count);
}

TEST(Params, SerializeToBuffer) {
  std::string str =
      "{ -1111: -1111.11, -2222: -2222,  1111: 1111, 2222: "
      "\"2222\", 1: true, \'object\' : "
      "{ \"eeee\": false, \'null\':null } }";
  Params params;
  Params::ParseFromBuffer(str, &params);
  params.set("unsupported_string_pointer", &str);
  params.set("supported_string", str);

  std::string str1 = params.debug_string();
  printf("%s\n", str1.c_str());

  Params params1;
  EXPECT_TRUE(Params::ParseFromBuffer(str1, &params1));
  EXPECT_EQ(str1, params1.debug_string());
}


================================================
FILE: tests/ailego/container/reservoir_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <ailego/container/reservoir.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(Reservoir, General) {
  ailego::Reservoir<size_t> sampler(20);
  EXPECT_EQ(0u, sampler.pool().size());
  EXPECT_EQ(0u, sampler.total());
  EXPECT_EQ(20u, sampler.samples());

  for (size_t i = 0; i < sampler.samples(); ++i) {
    sampler.fill(i);
  }
  EXPECT_EQ(sampler.samples(), sampler.pool().size());
  EXPECT_EQ(sampler.samples(), sampler.total());

  for (size_t i = 0; i < sampler.pool().size(); ++i) {
    EXPECT_EQ(i, (sampler.pool())[i]);
  }

  for (size_t i = 0; i < 10000; ++i) {
    sampler.fill(i);
  }
  EXPECT_EQ(sampler.samples(), sampler.pool().size());
  EXPECT_EQ(10020u, sampler.total());

  for (auto it : sampler.pool()) {
    std::cout << it << ' ';
  }
  std::cout << std::endl;

  sampler.reset();
  EXPECT_EQ(0u, sampler.pool().size());
  EXPECT_EQ(0u, sampler.total());
  EXPECT_EQ(20u, sampler.samples());
}


================================================
FILE: tests/ailego/container/vector_array_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/vector_array.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(NumericalVectorArray, General) {
  ailego::NumericalVectorArray<float> arr;
  ailego::NumericalVectorArray<float> &const_arr = arr;
  EXPECT_TRUE(arr.empty());
  EXPECT_EQ(0u, arr.dimension());
  EXPECT_EQ(0u, arr.count());
  EXPECT_EQ(0u, arr.bytes());
  EXPECT_NE(nullptr, arr.data());
  EXPECT_NE(nullptr, const_arr.data());
  arr.shrink_to_fit();
  arr.clear();
  EXPECT_EQ(0u, arr.dimension());
  EXPECT_EQ(0u, arr.count());

  try {
    arr.at(0);
  } catch (const std::out_of_range &oor) {
    std::cerr << "Out of Range error: " << oor.what() << '\n';
  }
  try {
    const_arr.at(0);
  } catch (const std::out_of_range &oor) {
    std::cerr << "Out of Range error: " << oor.what() << '\n';
  }

  ailego::NumericalVector<float> vec1 = {10.0f, 11.0f, 12.0f, 13.0f, 14.0f,
                                         15.0f, 16.0f, 17.0f, 18.0f, 19.0f};
  ailego::NumericalVector<float> vec2 = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
                                         6.0f, 7.0f, 8.0f, 9.0f, 0.0f};
  arr.reset(10);
  arr.append(vec1);
  arr.append(vec2);
  arr.append(vec1);
  EXPECT_EQ(3u, arr.count());
  EXPECT_EQ(10u, arr.dimension());

  arr.reserve(10);
  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr.at(0)),
                              arr.dimension() * sizeof(float)));
  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr[2]),
                              arr.dimension() * sizeof(float)));
  arr.replace(2, vec2);
  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr.at(1)),
                              arr.dimension() * sizeof(float)));
  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr[2]),
                              arr.dimension() * sizeof(float)));

  arr.clear();
  EXPECT_EQ(10u, arr.dimension());
  EXPECT_EQ(0u, arr.count());

  arr.reset(2);
  arr.append(vec1.data(), 2, 5);
  arr.append(vec2.data(), 2, 5);
  EXPECT_EQ(2u, arr.dimension());
  EXPECT_EQ(10u, arr.count());

  ailego::NumericalVectorArray<float> arr1 = std::move(arr);
  EXPECT_TRUE(arr.empty());
  EXPECT_EQ(2u, arr.dimension());
  EXPECT_EQ(0u, arr.count());
  EXPECT_EQ(2u, arr1.dimension());
  EXPECT_EQ(10u, arr1.count());

  arr1.resize(8u);
  EXPECT_EQ(8u, arr1.count());

  arr1.resize(15u);
  EXPECT_EQ(15u, arr1.count());
}

TEST(NumericalVectorArray, Batch) {
  const size_t DIMENSION = 20;
  const size_t COUNT = 20000u;

  ailego::NumericalVectorArray<float> arr(DIMENSION);
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);
  std::string buffer;

  for (size_t i = 0; i < COUNT; ++i) {
    ailego::FixedVector<float, DIMENSION> vec;
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    arr.append(vec.data(), vec.size());
    buffer.append((const char *)vec.data(), sizeof(vec));
  }
  EXPECT_EQ(COUNT, arr.count());
  EXPECT_EQ(buffer, std::string((const char *)arr.data(), arr.bytes()));
}

TEST(BinaryVectorArray, General) {
  ailego::BinaryVectorArray<uint64_t> arr64;
  ailego::BinaryVectorArray<uint64_t> &const_arr64 = arr64;
  EXPECT_TRUE(arr64.empty());
  EXPECT_EQ(0u, arr64.dimension());
  EXPECT_EQ(0u, arr64.count());
  EXPECT_EQ(0u, arr64.bytes());
  EXPECT_NE(nullptr, arr64.data());
  EXPECT_NE(nullptr, const_arr64.data());
  arr64.shrink_to_fit();
  arr64.clear();
  EXPECT_EQ(0u, arr64.dimension());
  EXPECT_EQ(0u, arr64.count());

  try {
    arr64.at(0);
  } catch (const std::out_of_range &oor) {
    std::cerr << "Out of Range error: " << oor.what() << '\n';
  }
  try {
    const_arr64.at(0);
  } catch (const std::out_of_range &oor) {
    std::cerr << "Out of Range error: " << oor.what() << '\n';
  }

  ailego::BinaryVector<uint64_t> vec1 = {true, false, true,  true, false,
                                         true, false, false, true, false};
  ailego::BinaryVector<uint64_t> vec2 = {true,  true,  true,  true,
                                         false, false, false, true,
                                         false, false, true,  false};
  EXPECT_EQ(64u, vec1.dimension());
  EXPECT_EQ(64u, vec2.dimension());
  arr64.reset(10);
  arr64.append(vec1);
  arr64.append(vec2);
  arr64.append(vec1);
  EXPECT_EQ(3u, arr64.count());
  EXPECT_EQ(64u, arr64.dimension());
  EXPECT_EQ(0u, arr64.bytes() % sizeof(uint64_t));

  arr64.reserve(10);
  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr64.at(0)),
                              arr64.dimension() >> 3));
  EXPECT_EQ(vec1, std::string(reinterpret_cast<const char *>(arr64[2]),
                              arr64.dimension() >> 3));
  arr64.replace(2, vec2);
  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr64.at(1)),
                              arr64.dimension() >> 3));
  EXPECT_EQ(vec2, std::string(reinterpret_cast<const char *>(const_arr64[2]),
                              arr64.dimension() >> 3));

  arr64.clear();
  EXPECT_EQ(64u, arr64.dimension());
  EXPECT_EQ(0u, arr64.count());

  ailego::BinaryVectorArray<uint32_t> arr32(1);
  EXPECT_EQ(32u, arr32.dimension());
  arr32.append((const uint32_t *)vec1.data(), 32, 2);
  arr32.append((const uint32_t *)vec2.data(), 32, 2);
  EXPECT_EQ(32u, arr32.dimension());
  EXPECT_EQ(4u, arr32.count());
  EXPECT_EQ(0u, arr64.bytes() % sizeof(uint32_t));

  ailego::BinaryVectorArray<uint32_t> arr1 = std::move(arr32);
  EXPECT_TRUE(arr32.empty());
  EXPECT_EQ(32u, arr32.dimension());
  EXPECT_EQ(0u, arr32.count());
  EXPECT_EQ(32u, arr1.dimension());
  EXPECT_EQ(4u, arr1.count());

  arr1.resize(8u);
  EXPECT_EQ(8u, arr1.count());

  arr1.resize(1u);
  EXPECT_EQ(1u, arr1.count());
}


================================================
FILE: tests/ailego/container/vector_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(FixedVector, General) {
  int aaa[512];
  ailego::FixedVector<int, 512> *v = ailego::FixedVector<int, 512>::Cast(aaa);
  ASSERT_EQ(aaa, v->data());
  EXPECT_EQ(512u, v->size());

  ailego::FixedVector<int, 128> bbb{11, 22, 33};
  EXPECT_EQ(11, bbb[0]);
  EXPECT_EQ(22, bbb[1]);
  EXPECT_EQ(33, bbb[2]);
  EXPECT_EQ(128u, bbb.size());

  bbb = {55, 66, 77};
  EXPECT_EQ(55, bbb[0]);
  EXPECT_EQ(66, bbb[1]);
  EXPECT_EQ(77, bbb[2]);
  EXPECT_EQ(128u, bbb.size());
}

TEST(NumericalVector, General) {
  ailego::NumericalVector<float> vec(10);
  for (size_t i = 0; i < vec.size(); ++i) {
    vec[i] = (float)i;
  }

  {
    size_t index = 0;
    for (auto v : vec) {
      EXPECT_FLOAT_EQ(v, (float)(index++));
    }
  }

  vec.reserve(20);
  EXPECT_EQ(10u, vec.size());

  vec.append(
      {10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f});
  {
    size_t index = 0;
    for (auto v : vec) {
      EXPECT_FLOAT_EQ(v, (float)(index++));
    }
  }
  EXPECT_EQ(20u, vec.size());

  EXPECT_FALSE(vec.empty());
  vec.clear();
  EXPECT_EQ(0u, vec.size());
  EXPECT_TRUE(vec.empty());

  ailego::NumericalVector<float> vec1(10, 1.0f);
  for (auto v : vec1) {
    EXPECT_FLOAT_EQ(1.0f, v);
  }

  vec.swap(vec1);
  for (auto v : vec) {
    EXPECT_FLOAT_EQ(1.0f, v);
  }
  {
    size_t index = 0;
    for (auto v : vec1) {
      EXPECT_FLOAT_EQ(v, (float)(index++));
    }
  }

  ailego::NumericalVector<float> vec2(
      {10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f});
  {
    size_t index = 10;
    for (auto v : vec1) {
      EXPECT_FLOAT_EQ(v, (float)(index++));
    }
  }
}

TEST(NumericalVector, Assign) {
  ailego::NumericalVector<float> vec1;
  EXPECT_TRUE(vec1.data());
  EXPECT_EQ(0u, vec1.size());

  ailego::NumericalVector<size_t> vec2(222u);
  EXPECT_TRUE(!!vec2.data());
  EXPECT_EQ(222u, vec2.size());

  ailego::NumericalVector<size_t> vec3(vec2);
  EXPECT_TRUE(!!vec2.data());
  EXPECT_EQ(222u, vec2.size());
  EXPECT_TRUE(!!vec3.data());
  EXPECT_EQ(222u, vec3.size());

  ailego::NumericalVector<size_t> vec4;
  vec4 = vec3;
  EXPECT_TRUE(!!vec3.data());
  EXPECT_EQ(222u, vec3.size());
  EXPECT_TRUE(!!vec4.data());
  EXPECT_EQ(222u, vec4.size());

  ailego::NumericalVector<size_t> vec5;
  vec5 = std::move(vec4);
  EXPECT_TRUE(vec4.data());
  EXPECT_EQ(0u, vec4.size());
  EXPECT_TRUE(!!vec5.data());
  EXPECT_EQ(222u, vec5.size());

  ailego::NumericalVector<size_t> vec6(std::move(vec5));
  EXPECT_TRUE(vec5.data());
  EXPECT_EQ(0u, vec5.size());
  EXPECT_TRUE(!!vec6.data());
  EXPECT_EQ(222u, vec6.size());

  ailego::NumericalVector<int> vec7 = {1, 2, 3, 4, 5, 6, 7};
  EXPECT_TRUE(!!vec7.data());
  EXPECT_EQ(7u, vec7.size());
}

TEST(BinaryVector, General) {
  ailego::BinaryVector<char> a8({true, false, true, false, true, true});
  EXPECT_EQ(8u, a8.size());
  EXPECT_FALSE(a8.empty());
  EXPECT_FALSE(a8.at(1));
  EXPECT_TRUE(a8[0]);

  for (auto val : a8) {
    std::cout << val << ' ';
  }
  std::cout << std::endl;

  ailego::BinaryVector<int16_t> a16({true, false, true, false, true, true});
  EXPECT_EQ(16u, a16.size());
  EXPECT_FALSE(a16.at(1));
  EXPECT_TRUE(a16[0]);

  for (auto val : a16) {
    std::cout << val << ' ';
  }
  std::cout << std::endl;

  ailego::BinaryVector<uint32_t> a32({true, false, true, false, true, true});
  EXPECT_EQ(32u, a32.size());
  EXPECT_FALSE(a32.at(1));
  EXPECT_TRUE(a32[2]);

  for (auto val : a32) {
    std::cout << val << ' ';
  }
  std::cout << std::endl;

  ailego::BinaryVector<int64_t> a64({true, false, true, false, true, true});
  EXPECT_EQ(64u, a64.size());
  EXPECT_FALSE(a64.at(1));
  EXPECT_TRUE(a64[2]);

  for (auto val : a64) {
    std::cout << val << ' ';
  }
  std::cout << std::endl;

  ailego::BinaryVector<uint64_t> aaa(21, true);
  EXPECT_EQ(64u, aaa.size());
  for (auto val : aaa) {
    EXPECT_TRUE(val);
  }
  for (size_t i = 0; i < aaa.size(); ++i) {
    EXPECT_TRUE(aaa[i]);
    aaa.reset(i);
    EXPECT_FALSE(aaa.at(i));
  }

  ailego::BinaryVector<int32_t> bbb(100);
  EXPECT_EQ(128u, bbb.size());
  for (auto val : bbb) {
    EXPECT_FALSE(val);
  }
  for (size_t i = 0; i < bbb.size(); ++i) {
    EXPECT_FALSE(bbb[i]);
    bbb.set(i);
    EXPECT_TRUE(bbb.at(i));
  }

  ailego::BinaryVector<bool> ccc(100);
  EXPECT_EQ(
      (100u + sizeof(bool) * 8 - 1) / (sizeof(bool) * 8) * (sizeof(bool) * 8),
      ccc.size());
  for (auto val : ccc) {
    EXPECT_FALSE(val);
  }
  for (size_t i = 0; i < ccc.size(); ++i) {
    EXPECT_FALSE(ccc[i]);
    ccc.flip(i);
    EXPECT_TRUE(ccc.at(i));
  }

  ailego::BinaryVector<int32_t> ddd;
  EXPECT_TRUE(ddd.empty());
  EXPECT_FALSE(bbb.empty());
  ddd = std::move(bbb);
  EXPECT_FALSE(ddd.empty());
  EXPECT_TRUE(bbb.empty());

  ailego::BinaryVector<int32_t> eee;
  EXPECT_TRUE(eee.empty());
  eee = ddd;
  EXPECT_FALSE(ddd.empty());
  EXPECT_FALSE(eee.empty());
  ddd.clear();
  bbb.clear();
  EXPECT_TRUE(ddd.empty());
  EXPECT_TRUE(bbb.empty());

  ailego::BinaryVector<int32_t> fff;
  for (auto val : fff) {
    (void)val;
    EXPECT_TRUE(0);
  }

  std::string str;
  ailego::BinaryVector<int32_t> ggg(str);
  ailego::BinaryVector<char> hhh(str);

  str.resize(128);
  ailego::BinaryVector<char> iii(str);
  ailego::BinaryVector<int64_t> jjj(std::move(str));

  jjj.assign({true, true, true, false, true, true, false, true, true, false,
              true, true});
  EXPECT_NE(0u, jjj.capacity());
  EXPECT_TRUE(jjj.front());
  EXPECT_FALSE(jjj.back());

  ailego::BinaryVector<int64_t> mmm;
  EXPECT_TRUE(mmm.data());
  ailego::BinaryVector<int64_t> &nnn = mmm;
  EXPECT_TRUE(nnn.data());

  ailego::BinaryVector<int64_t> ooo;
  ooo.reserve(1111);
  EXPECT_NE(0u, ooo.capacity());
  EXPECT_EQ(0u, ooo.size());
  EXPECT_TRUE(ooo.empty());
  ooo.assign({true});
  EXPECT_EQ(64u, ooo.size());

  ooo.swap(mmm);
  EXPECT_EQ(0u, ooo.size());
}

TEST(BinaryVector, Iterator) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<size_t>(1, 129);
  size_t dimension = dist(gen) * 32;

  ailego::BinaryVector<uint32_t> bt(dimension);
  std::vector<bool> vec(dimension);

  for (size_t i = 0; i != vec.size(); ++i) {
    bool val = (dist(gen) % 7 == 0);
    vec[i] = val;
    if (val) {
      bt.set(i);
    }
  }

  size_t index = 0;
  for (auto iter = bt.begin(); iter != bt.end(); ++iter) {
    EXPECT_EQ(vec[index], *iter);
    ++index;
  }
}

TEST(BinaryVector, LittleEndian) {
  ailego::BinaryVector<uint8_t> bs8(128 * 4);
  ailego::BinaryVector<uint16_t> bs16(128 * 4);
  ailego::BinaryVector<uint32_t> bs32(128 * 4);
  ailego::BinaryVector<uint64_t> bs64(128 * 4);

  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<size_t>(0, 128 * 4);

  for (int i = 0; i < 18; ++i) {
    size_t val = dist(gen);
    bs8.set(val);
    bs16.set(val);
    bs32.set(val);
    bs64.set(val);
    EXPECT_TRUE(bs8.at(val));
    EXPECT_TRUE(bs16.at(val));
    EXPECT_TRUE(bs32.at(val));
    EXPECT_TRUE(bs64.at(val));
  }

  EXPECT_TRUE(memcmp(bs8.data(), bs16.data(), bs8.bytes()) == 0);
  EXPECT_TRUE(memcmp(bs8.data(), bs32.data(), bs8.bytes()) == 0);
  EXPECT_TRUE(memcmp(bs8.data(), bs64.data(), bs8.bytes()) == 0);
}

TEST(NibbleVector, General) {
  ailego::NibbleVector<int> nv1(
      {-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7});

  EXPECT_FALSE(nv1.empty());
  EXPECT_EQ(16, nv1.size());
  EXPECT_EQ(16, nv1.dimension());
  EXPECT_EQ(8, nv1.bytes());
  for (int i = -8; i != 8; ++i) {
    EXPECT_EQ(i, nv1.at(i + 8));
  }

  ailego::NibbleVector<uint32_t> nv2(31, 5);
  for (size_t i = 0; i != nv2.size(); ++i) {
    EXPECT_EQ(5u, nv2.at(i));
  }

  ailego::NibbleVector<int32_t> nv3(56);
  nv3.assign({-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7});
  EXPECT_EQ(16u, nv3.size());

  ailego::NibbleVector<uint32_t> nv4(25);
  nv4.assign(88, 6);
  for (size_t i = 0; i != nv4.size(); ++i) {
    EXPECT_EQ(6u, nv4.at(i));
  }
}

TEST(NibbleVector, UnsignedIterator) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<uint32_t>(0, 15);
  size_t dimension = ((std::uniform_int_distribution<size_t>(1, 63))(gen)) * 2;

  ailego::NibbleVector<uint32_t> nv;
  std::vector<uint32_t> vec;

  for (size_t i = 0; i != dimension; i += 2) {
    uint32_t lo = dist(gen);
    uint32_t hi = dist(gen);
    vec.push_back(lo);
    vec.push_back(hi);
    nv.append(lo, hi);
  }

  size_t index = 0;
  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {
    EXPECT_EQ(vec[index], *iter);
    ++index;
  }
}

TEST(NibbleVector, SignedIterator) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<int32_t>(-8, 7);
  size_t dimension = ((std::uniform_int_distribution<size_t>(1, 63))(gen)) * 2;

  ailego::NibbleVector<int32_t> nv;
  std::vector<int32_t> vec;
  EXPECT_TRUE(nv.empty());

  for (size_t i = 0; i != dimension; i += 2) {
    int32_t lo = dist(gen);
    int32_t hi = dist(gen);
    vec.push_back(lo);
    vec.push_back(hi);
    nv.append(lo, hi);
  }
  EXPECT_FALSE(nv.empty());
  EXPECT_EQ(vec.size(), nv.size());
  EXPECT_EQ(vec.size(), nv.dimension());
  EXPECT_EQ(vec.size() / 2, nv.bytes());

  size_t index = 0;
  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {
    EXPECT_EQ(vec[index], *iter);
    ++index;
  }

  // Test again
  for (size_t i = 0; i != dimension; i += 2) {
    int32_t lo = dist(gen);
    int32_t hi = dist(gen);
    vec[i + 0] = lo;
    vec[i + 1] = hi;
    nv.set(i + 0, lo);
    nv.set(i + 1, hi);
  }
  index = 0;
  for (auto iter = nv.begin(); iter != nv.end(); ++iter) {
    EXPECT_EQ(vec[index], *iter);
    ++index;
  }
}


================================================
FILE: tests/ailego/encoding/json_parse_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/encoding/json.h>

using namespace zvec::ailego;

TEST(Json, JsonParser) {
  {
    JsonValue val;
    JsonParser parser;

    JsonString str =
        "{first: {int: 123, float: 1.0, "
        "true:[true, true, true, true], false:[false],  zero:[0,0,0]}, "
        "true:true, false:[false, false, false, false], zero:[0,0]}";

    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val.refer() == 1);

    const JsonValue &val2 = val;
    const JsonObject &obj2 = val2.as_object();

    EXPECT_TRUE(val2.as_object().refer() == 1);
    JsonValue val_result;
    EXPECT_TRUE(obj2.get("first", &val_result));
    EXPECT_TRUE(val_result.refer() == 2);
    EXPECT_TRUE(val_result.as_object().refer() == 2);

    EXPECT_TRUE(obj2.get("true", &val_result));
    EXPECT_TRUE(obj2.get("false", &val_result));
    EXPECT_TRUE(obj2.get("zero", &val_result));

    const JsonValue val3 = val;
    EXPECT_TRUE(val3.refer() == 2);
    EXPECT_TRUE(val3.as_object().refer() == 1);

    JsonValue val4 = val;
    EXPECT_TRUE(val4.refer() == 3);
    EXPECT_TRUE(val3.refer() == 3);
    EXPECT_TRUE(val2.refer() == 3);

    JsonObject &obj4 = val4.as_object();
    EXPECT_TRUE(obj4.refer() == 2);
    EXPECT_TRUE(val4.refer() == 0);
    EXPECT_TRUE(val3.refer() == 2);
    EXPECT_TRUE(val3.as_object().refer() == 2);
    EXPECT_TRUE(val2.refer() == 2);
  }

  {
    JsonString str =
        "[ true,,\'\\u9701abcd \\u38981515\\u89454845\\uabcd\\uef12\'";
    JsonParser parser;
    JsonValue val = JsonValue();

    JsonValue tmp;
    EXPECT_FALSE(tmp.parse(str));

    parser.set_squote();
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonParser parser;
    JsonString str = "{ 0:0, 1: 1, 2:2, 3:3, 4: 4, 5:5}";
    JsonValue val;

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val.refer() == 1);

    JsonValue val2 = val;
    EXPECT_TRUE(val2.refer() == 2);

    JsonObject &obj2 = val2.as_object();
    EXPECT_TRUE(val2.refer() == 0);
    EXPECT_TRUE(obj2.refer() == 2);
    EXPECT_TRUE(obj2["0"].refer() == 2);
    EXPECT_TRUE(obj2["1"].refer() == 2);
    EXPECT_TRUE(obj2["2"].refer() == 2);
    EXPECT_TRUE(obj2["3"].refer() == 2);
    EXPECT_TRUE(obj2["4"].refer() == 2);
    EXPECT_TRUE(obj2["5"].refer() == 2);
    EXPECT_TRUE(obj2.refer() == 0);

    JsonValue val3 = val;
    EXPECT_TRUE(val3.refer() == 2);

    JsonObject::const_iterator iter = obj2.begin();
    EXPECT_TRUE(iter->key().refer() == 2);
    EXPECT_TRUE(iter->value().refer() == 2);
  }

  {
    JsonParser parser;
    JsonString str = "[0, 1, 2, 3, 4, 5]";
    JsonValue val;

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val.refer() == 1);

    JsonValue val2 = val;
    EXPECT_TRUE(val2.refer() == 2);

    JsonArray &arr2 = val2.as_array();
    EXPECT_TRUE(val2.refer() == 0);
    EXPECT_TRUE(arr2.refer() == 2);
    EXPECT_TRUE(arr2[0].refer() == 2);
    EXPECT_TRUE(arr2[1].refer() == 2);
    EXPECT_TRUE(arr2[2].refer() == 2);
    EXPECT_TRUE(arr2[3].refer() == 2);
    EXPECT_TRUE(arr2[4].refer() == 2);
    EXPECT_TRUE(arr2[5].refer() == 2);
    EXPECT_TRUE(arr2.refer() == 0);

    JsonValue val3 = val;
    EXPECT_TRUE(val3.refer() == 2);

    JsonArray::const_iterator iter = arr2.begin();
    EXPECT_TRUE(iter->refer() == 2);
  }

  {
    JsonString str =
        "[ 15, true, null,\'\\u9701abcd "
        "\\u38981515\\u89454845\\uabcd\\uef12\',]";
    JsonParser parser;
    JsonValue val(true);

    parser.set_squote();
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val[1].as_bool());
    val[1] = val[2];
    EXPECT_FALSE(val[1].as_bool());
  }

  {
    JsonParser parser;
    JsonValue val1, val2, val3, val4;

    EXPECT_TRUE(parser.parse(
        "[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, "
        "6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,"
        "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,"
        "\"32\",\"33\",\"34\",\"35\",\"36\","
        "{\"5\":5,\"4\":4,\"3\":3,\"2\":2,\"1\":1,\"0\":0,\"-1\":-1}]",
        &val1));
    EXPECT_TRUE(
        parser.parse("[\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\","
                     "\"9\",\"10\",\"11\",\"12\",\"13\",\"14\",\"15\",\"16\","
                     "\"17\",\"18\",\"19\",\"20\",\"21\",\"22\",\"23\",\"24\","
                     "\"25\",\"26\",\"27\",\"28\",\"29\",\"30\",\"31\","
                     "\"32\",\"33\",\"34\",\"35\",\"36\","
                     "{\"-2\":\"-2\",\"-1\":\"-1\",\"1\":\"1\",\"2\":\"2\","
                     "\"3\":\"3\",\"4\":\"4\",\"5\":\"5\",\"6\":\"6\"},"
                     "[],null,true,false,0.0,1.0,9.999,-1]",
                     &val2));
    EXPECT_TRUE(
        parser.parse("[\"0\",\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\","
                     "\"9\",\"10\",\"11\",\"12\",\"13\",\"14\",\"15\",\"16\","
                     "\"17\",\"18\",\"19\",\"20\",\"21\",\"22\",\"23\",\"24\","
                     "\"25\",\"26\",\"27\",\"28\",\"29\",\"30\",\"31\","
                     "\"32\",\"33\",\"34\",\"35\",\"36\","
                     "{\"5\":\"5\",\"4\":\"4\",\"3\":\"3\","
                     "\"2\":\"2\",\"1\":\"1\",\"0\":0,"
                     "\"-1\":\"-1\",\"-2\":\"-2\",\"6\":\"6\"},"
                     "[],null,true,false,0.0,1.0,9.999,-1]",
                     &val3));
    EXPECT_TRUE(
        parser.parse("[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, "
                     "6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,"
                     "17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,"
                     "\"32\",\"33\",\"34\",\"35\",\"36\","
                     "{\"-2\":\"-2\",\"-1\":-1,\"1\":1,\"2\":2,"
                     "\"3\":3,\"4\":4,\"5\":5,\"6\":\"6\",\"0\":0},"
                     "[],null,true,false,0.0,1.0,9.999,-1]",
                     &val4));

    JsonValue tmp1 = val1;
    tmp1.merge(val2);

    JsonValue tmp2 = val2;
    tmp2.merge(val1);

    JsonDumper dumper;
    EXPECT_TRUE(dumper.dump(val1));
    EXPECT_TRUE(dumper.dump(val2));
    EXPECT_TRUE(dumper.dump(val3));
    EXPECT_TRUE(dumper.dump(val4));
    EXPECT_TRUE(dumper.dump(tmp1));

    EXPECT_TRUE(tmp1.as_json_string() == val3.as_json_string());
    EXPECT_TRUE(tmp2.as_json_string() == val4.as_json_string());
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str =
        "{\"req\": {\"aid\": \"\", \"friend\": "
        "\"1234567890\", \"uintype\": "
        "0}}";
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_FALSE(parser.parse("", &val));

    JsonValue tmp;
    EXPECT_TRUE(tmp.parse(str));
    EXPECT_TRUE(tmp == val);
    EXPECT_FALSE(tmp != val);

    const JsonValue &req = val["req"];
    EXPECT_TRUE(req.is_object());
    EXPECT_TRUE(req["show"].as_integer() == 0);
    EXPECT_TRUE(req["friend"].as_integer() == 1234567890);
    EXPECT_TRUE(req[1].is_null());
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "[true, false, 0, 1, 2, \"3\"]";
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val[0u].as_bool());
    EXPECT_FALSE(val[1].as_bool());
    EXPECT_TRUE(val[2].as_integer() == 0);
    EXPECT_TRUE(val[3].as_integer() == 1);
    EXPECT_TRUE(val[4].as_integer() == 2);
    EXPECT_TRUE(val[5].as_integer() == 3);

    JsonValue tmp;
    EXPECT_TRUE(tmp.parse(str));
    EXPECT_TRUE(tmp == val);
    EXPECT_FALSE(tmp != val);

    const JsonValue val2 = val;
    EXPECT_TRUE(val2[0u].as_bool());
    EXPECT_FALSE(val2[1].as_bool());
    EXPECT_TRUE(val2[2].as_integer() == 0);
    EXPECT_TRUE(val2[3].as_integer() == 1);
    EXPECT_TRUE(val2[4].as_integer() == 2);
    EXPECT_TRUE(val2[5].as_integer() == 3);
    EXPECT_TRUE(val2[6].is_null());
    EXPECT_TRUE(val2[(JsonValue::size_type)-1].as_integer() == 0);
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{abcd:\"1234\"}";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == std::string("1234"));

    parser.set_unstrict(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_integer() == 1234);
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "/*comments*/ { abcd\t  :  /* //comments */\"1234\" }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    parser.set_comment(false);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    parser.set_comment(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == std::string("1234"));

    parser.set_unstrict(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_integer() == 1234);
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ abcd/*  fff*/  :  /* //comments */\"1234\" }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    parser.set_comment(false);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    parser.set_comment(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == std::string("1234"));

    parser.set_unstrict(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_integer() == 1234);
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str =
        "{ \"abcd\\\"/*  fff*/  :  /* //comments */\"1234\" , {, [,  ]}}";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ abcd///comments */\"1234\", [] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ abcd/*//*/ : \t  \"1234\" }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == NULL);
    EXPECT_TRUE(val["abcd/*//*/"].as_c_string() == std::string("1234"));

    parser.set_comment(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == std::string("1234"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ ,{}, \"abcd/*//*/ : \t  \"1234\", }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ ccdd: [], abcd\" /*//*/ \n: \t  \"1234\" }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_comment(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == NULL);
    EXPECT_TRUE(val["abcd\""].as_c_string() == std::string("1234"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{,, \"\" \n: \t  \"1234\" }";
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val[""].as_c_string() == std::string("1234"));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val[""].as_c_string() == std::string("1234"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ ,  \n: \t  \"1234\" }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val[""].as_c_string() == NULL);

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ \'ccdd\': [], \'abcd\' /*//*/ \n: \t  \"1234\" }";

    parser.set_comment(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["\'abcd\'"].as_c_string() == std::string("1234"));

    parser.set_squote(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["abcd"].as_c_string() == std::string("1234"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ 1234 : \'abcd\', \'5678\' : [5, \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["1234"].as_c_string() == std::string("abcd"));
    EXPECT_TRUE(val["5678"].as_array().at(1).as_c_string() ==
                std::string("5678"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ 1234 : \'ab\"cd\', \'5678\' : [\"5\", \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["1234"].as_c_string() == std::string("ab\"cd"));
    EXPECT_TRUE(val["5678"].as_array().at(1).as_c_string() ==
                std::string("5678"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ 1234 : \'ab\\\'cd\', \'5678\' : [\"5\", \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_TRUE(parser.parse(str.c_str(), &val));
    EXPECT_TRUE(val["1234"].as_c_string() == std::string("ab\\\'cd"));
    EXPECT_TRUE(val["5678"].as_array().at(1).as_c_string() ==
                std::string("5678"));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str = "{ \'1234\'\' : \'abcd\', \'5678\' : [\"5\", \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str =
        "{ \'1234\' : \'abcd\' \", \'5678\' : [\"5\", \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str =
        "{ \'1234\' : \'abcd\' , \'5678\' : [\"5\" \", \'5678\'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val;
    JsonParser parser;

    std::string str =
        "{ \'1234\' : \'abcd\' , \'5678\' : [\"5\" , \'5678\' \'] }";
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_squote(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_simple(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));

    parser.set_unstrict(true);
    EXPECT_FALSE(parser.parse(str.c_str(), &val));
  }

  {
    JsonValue val1;
    JsonValue val2;
    JsonString str =
        "{\"a\":1, \"b\":2, \"c\":3, \"string\":  \"string\", "
        "\"array\": [null, true, false, "
        ", 0], \"object\": {\"a\":1.0, \"b\":2.0, \"c\":3.0}, "
        "\"true\": true, \"null\": null}";

    EXPECT_TRUE(val1.parse(str));
    EXPECT_TRUE(val2.parse(str));
    EXPECT_TRUE(val2 == val1);
    EXPECT_FALSE((val2 != val1));
    EXPECT_TRUE(val2.as_object() == val1);
    EXPECT_TRUE(val2 == val1.as_object());
    EXPECT_TRUE(val2.as_object() == val1.as_object());
    EXPECT_TRUE(val1["string"] == val2["string"]);
  }
}

TEST(Json, JsonObject) {
  {
    JsonObject jobj;

    for (int i = 0; i < 1000; ++i) {
      JsonValue key(i);
      EXPECT_TRUE(jobj.set(key.as_json_string().c_str(), JsonValue((float)i)));

      JsonValue::integer_type val;
      EXPECT_TRUE(jobj.get(key.as_json_string(), &val));
      EXPECT_EQ(val, i);
    }

    for (int i = 0; i < 1000; ++i) {
      JsonValue key(i);
      jobj.unset(key.as_json_string().c_str());

      JsonValue::integer_type val = 0;
      EXPECT_FALSE(jobj.get(key.as_stl_string(), &val));
      EXPECT_EQ(val, 0);
    }
  }

  {
    JsonObject obj;

    obj.set("0", JsonValue(0));
    obj.set("1", JsonValue(1));
    obj.set("2", JsonValue(2));
    obj.set("3", JsonValue(3));
    obj.set("4", JsonValue(4));
    obj.set("5", JsonValue(5));
    obj.set("6", JsonValue(6));
    obj.set("7", JsonValue(7));
    obj.set("8", JsonValue(8));
    obj.set("9", JsonValue(9));

    EXPECT_EQ(obj.size(), 10u);
    EXPECT_EQ(obj["0"].as_integer(), 0);
    EXPECT_EQ(obj["1"].as_integer(), 1);
    EXPECT_EQ(obj["2"].as_integer(), 2);
    EXPECT_EQ(obj["3"].as_integer(), 3);
    EXPECT_EQ(obj["4"].as_integer(), 4);
    EXPECT_EQ(obj["5"].as_integer(), 5);
    EXPECT_EQ(obj["6"].as_integer(), 6);
    EXPECT_EQ(obj["7"].as_integer(), 7);
    EXPECT_EQ(obj["8"].as_integer(), 8);
    EXPECT_EQ(obj["9"].as_integer(), 9);
    EXPECT_EQ(obj.size(), 10u);

    int index_id = 0;
    for (JsonObject::const_iterator it = obj.cbegin(); it != obj.cend();
         ++it, ++index_id) {
      EXPECT_EQ(it->value().as_integer(), index_id);
    }

    int index_id_r = 9;
    for (JsonObject::const_reverse_iterator it = obj.crbegin();
         it != obj.crend(); ++it, --index_id_r) {
      EXPECT_EQ(it->value().as_integer(), index_id_r);
    }

    obj.unset("1");
    EXPECT_EQ(obj.size(), 9u);
    obj.unset("3");
    EXPECT_EQ(obj.size(), 8u);
    obj.unset("5");
    EXPECT_EQ(obj.size(), 7u);
    obj.unset("7");
    EXPECT_EQ(obj.size(), 6u);
    obj.unset("9");
    EXPECT_EQ(obj.size(), 5u);

    obj.clear();
    EXPECT_EQ(obj.size(), 0u);
  }

  {
    JsonObject obj;

    // 0
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_FALSE(obj.has("44444"));
    EXPECT_FALSE(obj.has("55555"));
    EXPECT_FALSE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_FALSE(obj.has("88888"));
    EXPECT_FALSE(obj.has("99999"));

    // 1
    EXPECT_TRUE(obj.set("55555", "55555"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_FALSE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_FALSE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_FALSE(obj.has("88888"));
    EXPECT_FALSE(obj.has("99999"));

    // 2
    EXPECT_TRUE(obj.set("88888", "88888"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_FALSE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_FALSE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_FALSE(obj.has("99999"));

    // 2
    EXPECT_TRUE(obj.set("66666", "66666"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_FALSE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_FALSE(obj.has("99999"));

    // 3
    EXPECT_TRUE(obj.set("44444", "44444"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_FALSE(obj.has("99999"));

    // 4
    EXPECT_TRUE(obj.set("99999", "99999"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_FALSE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    // 5
    EXPECT_TRUE(obj.set("22222", "22222"));
    EXPECT_FALSE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_TRUE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    // 6
    EXPECT_TRUE(obj.set("00000", "00000"));
    EXPECT_TRUE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_TRUE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_FALSE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    // 7
    EXPECT_TRUE(obj.set("77777", "77777"));
    EXPECT_TRUE(obj.has("00000"));
    EXPECT_FALSE(obj.has("11111"));
    EXPECT_TRUE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_TRUE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    // 8
    EXPECT_TRUE(obj.set("11111", "11111"));
    EXPECT_TRUE(obj.has("00000"));
    EXPECT_TRUE(obj.has("11111"));
    EXPECT_TRUE(obj.has("22222"));
    EXPECT_FALSE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_TRUE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    // 9
    EXPECT_TRUE(obj.set("33333", "33333"));
    EXPECT_TRUE(obj.has("00000"));
    EXPECT_TRUE(obj.has("11111"));
    EXPECT_TRUE(obj.has("22222"));
    EXPECT_TRUE(obj.has("33333"));
    EXPECT_TRUE(obj.has("44444"));
    EXPECT_TRUE(obj.has("55555"));
    EXPECT_TRUE(obj.has("66666"));
    EXPECT_TRUE(obj.has("77777"));
    EXPECT_TRUE(obj.has("88888"));
    EXPECT_TRUE(obj.has("99999"));

    EXPECT_EQ(10u, obj.size());

    int index_ids[] = {0,     11111, 22222, 33333, 44444,
                       55555, 66666, 77777, 88888, 99999};
    int i = 0;
    for (JsonObject::iterator it = obj.begin(); it != obj.end(); ++it, ++i) {
      EXPECT_EQ(it->value().as_integer(), index_ids[i]);
    }
    EXPECT_EQ(i, (int)obj.size());
  }

  {
    JsonObject obj;

    EXPECT_TRUE(obj.set("key0", "key0"));
    EXPECT_TRUE(obj.set("key1", "key1"));
    EXPECT_TRUE(obj.set("key2", "key2"));
    EXPECT_TRUE(obj.set("key3", "key3"));
    EXPECT_TRUE(obj.set("key4", "key4"));
    EXPECT_TRUE(obj.set("key5", "key5"));
    EXPECT_FALSE(obj.set("key0", "000000"));
    EXPECT_FALSE(obj.set("key1", "000000"));
    EXPECT_FALSE(obj.set("key5", "000000"));
    EXPECT_EQ(obj.size(), 6u);

    JsonString str;
    EXPECT_TRUE(obj.get("key0", &str));
    EXPECT_TRUE(str == JsonString("key0"));
    EXPECT_TRUE(obj.get("key3", &str));
    EXPECT_TRUE(str == JsonString("key3"));
    EXPECT_TRUE(obj.get("key5", &str));
    EXPECT_TRUE(str == JsonString("key5"));
    EXPECT_EQ(obj.size(), 6u);

    obj["key0"] = 0;
    obj["key1"] = 1;
    obj["key2"] = 2;
    obj["key3"] = 3;
    obj["key4"] = 4;
    obj["key5"] = 5;

    EXPECT_EQ(obj.size(), 6u);

    int index_id = 0;
    for (JsonObject::iterator it = obj.begin(); it != obj.end();
         ++it, ++index_id) {
      EXPECT_EQ(it->value().as_integer(), index_id);
    }
    EXPECT_EQ(index_id, 6);

    int index_id_r = 5;
    for (JsonObject::reverse_iterator it = obj.rbegin(); it != obj.rend();
         ++it, --index_id_r) {
      EXPECT_EQ(it->value().as_integer(), index_id_r);
    }
    EXPECT_EQ(index_id_r, -1);
  }

  {
    JsonObject::reverse_iterator it1 = JsonObject::iterator();
    JsonObject::reverse_iterator it2 = JsonObject::reverse_iterator();
    EXPECT_TRUE(it1 == it2);

    JsonObject::iterator it3 = JsonObject::reverse_iterator();
    JsonObject::iterator it4 = JsonObject::iterator();
    EXPECT_TRUE(it3 == it4);

    JsonObject::const_iterator it5 = JsonObject::const_iterator();
    JsonObject::const_iterator it6 = JsonObject::iterator();
    EXPECT_TRUE(it5 == it6);

    JsonObject::const_iterator it7 = JsonObject::reverse_iterator();
    JsonObject::const_iterator it8 = JsonObject::const_reverse_iterator();
    EXPECT_TRUE(it7 == it8);

    JsonObject::const_reverse_iterator it9 = JsonObject::const_iterator();
    JsonObject::const_reverse_iterator it10 = JsonObject::iterator();
    EXPECT_TRUE(it9 == it10);

    JsonObject::const_reverse_iterator it11 = JsonObject::reverse_iterator();
    JsonObject::const_reverse_iterator it12 =
        JsonObject::const_reverse_iterator();
    EXPECT_TRUE(it11 == it12);
  }

  {
    JsonObject obj1;
    JsonObject obj2;
    JsonObject obj3;
    JsonObject::iterator iter1;

    EXPECT_TRUE(obj1.set("aaa", "123456"));
    obj2 = obj1;
    iter1 = obj1.begin();
    obj3 = obj1;
    iter1->value() = "abcdefg";
    EXPECT_TRUE(obj1["aaa"].as_string() == "abcdefg");
    EXPECT_TRUE(obj2["aaa"].as_string() == "123456");
    EXPECT_TRUE(obj3["aaa"].as_string() == "123456");
  }

  {
    JsonObject obj1;

    obj1.set("FTitle", "123456789");
    obj1.set("FDesc", "abcdef");

    const JsonObject &obj2 = obj1;
    EXPECT_TRUE(obj1["FTitle"].as_stl_string() == "123456789");
    EXPECT_TRUE(obj1["FDesc"].as_stl_string() == "abcdef");
    EXPECT_TRUE(obj2["FTitle"].as_stl_string() == "123456789");
    EXPECT_TRUE(obj2["FDesc"].as_stl_string() == "abcdef");
  }
}

TEST(Json, JsonArray) {
  {
    JsonArray arr1;
    arr1.push(JsonValue(0.0));
    arr1.push(JsonValue(2));
    arr1.push("2");
    arr1.push(JsonValue(true));
    arr1.push(JsonArray());
    arr1.push(JsonObject());
    arr1.push(JsonValue());
    arr1.push(JsonString());

    JsonArray arr2 = arr1;
    EXPECT_TRUE(arr2 == arr1);

    JsonArray arr3;
    arr3.push(JsonValue(0.0));
    arr3.push(JsonValue(2));
    arr3.push("2");
    arr3.push(JsonValue(true));
    arr3.push(JsonArray());
    arr3.push(JsonObject());
    arr3.push(JsonValue());
    arr3.push(JsonString());
    EXPECT_TRUE(arr2 == arr3);
    EXPECT_TRUE(arr1 == arr3);

    arr2.push(JsonObject());
    EXPECT_TRUE(arr2 != arr3);
    EXPECT_TRUE(arr2 != arr1);
    EXPECT_TRUE(arr1 == arr3);
  }

  {
    JsonArray jarr;

    EXPECT_TRUE(jarr.capacity() == 0);
    EXPECT_TRUE(jarr.size() == 0);
    jarr.reserve(21);
    EXPECT_TRUE(jarr.capacity() == 32);
    EXPECT_TRUE(jarr.size() == 0);
    jarr.reserve(2);
    EXPECT_TRUE(jarr.capacity() == 32);
    EXPECT_TRUE(jarr.size() == 0);
    jarr.reserve(33);
    EXPECT_TRUE(jarr.capacity() == 64);
    EXPECT_TRUE(jarr.size() == 0);
  }

  {
    JsonArray arr1;
    JsonArray arr2;
    JsonArray arr3;
    JsonArray::iterator iter1;

    arr1.push("123456");
    arr2 = arr1;
    iter1 = arr1.begin();
    arr3 = arr1;
    *iter1 = "abcdefg";
    EXPECT_TRUE(arr1[0].as_string() == "abcdefg");
    EXPECT_TRUE(arr2[0].as_string() == "123456");
    EXPECT_TRUE(arr3[0].as_string() == "123456");
  }

  {
    JsonArray arr1;
    JsonArray arr2;
    JsonArray arr3;

    arr1.push("123456");
    arr2 = arr1;

    JsonValue &val1 = arr1.front();
    arr3 = arr1;
    val1 = "abcdefg";
    EXPECT_TRUE(arr1[0].as_string() == "abcdefg");
    EXPECT_TRUE(arr2[0].as_string() == "123456");
    EXPECT_TRUE(arr3[0].as_string() == "123456");
  }

  {
    JsonArray arr;
    JsonValue val(666);

    arr.push("0");
    arr.push(JsonValue(1));
    arr.push(JsonValue(2));
    arr.push("3");
    arr.push("4");
    arr.push("5");
    arr.push("6");
    arr.push(JsonValue(7.0));
    EXPECT_TRUE(arr.size() == 8);
    EXPECT_TRUE(arr.capacity() == 32);
    EXPECT_TRUE(arr[0].as_string() == "0");
    EXPECT_TRUE(arr[1].as_integer() == 1);
    EXPECT_TRUE(arr[2].as_integer() == 2);
    EXPECT_TRUE(arr[3].as_integer() == 3);
    EXPECT_TRUE(arr[4].as_integer() == 4);
    EXPECT_TRUE(arr[5].as_integer() == 5);
    EXPECT_TRUE(arr[6].as_integer() == 6);
    EXPECT_TRUE(arr[7].as_integer() == 7);
    arr.resize(20, val);
    EXPECT_TRUE(arr.size() == 20);
    arr.resize(5, val);
    EXPECT_TRUE(arr.size() == 5);
    EXPECT_TRUE(arr[0].as_string() == "0");
    EXPECT_TRUE(arr[1].as_integer() == 1);
    EXPECT_TRUE(arr[2].as_integer() == 2);
    EXPECT_TRUE(arr[3].as_string() == "3");
    EXPECT_TRUE(arr[4].as_string() == "4");
    EXPECT_TRUE(val.as_integer() == 666);

    arr.reverse();
    EXPECT_TRUE(arr.size() == 5);
    EXPECT_TRUE(arr[4].as_string() == "0");
    EXPECT_TRUE(arr[3].as_integer() == 1);
    EXPECT_TRUE(arr[2].as_integer() == 2);
    EXPECT_TRUE(arr[1].as_string() == "3");
    EXPECT_TRUE(arr[0].as_string() == "4");

    arr.shift();
    arr.reverse();
    EXPECT_TRUE(arr.size() == 4);
    EXPECT_TRUE(arr[0].as_string() == "0");
    EXPECT_TRUE(arr[1].as_integer() == 1);
    EXPECT_TRUE(arr[2].as_integer() == 2);
    EXPECT_TRUE(arr[3].as_string() == "3");
  }

  {
    JsonArray::reverse_iterator it1 = JsonArray::iterator();
    JsonArray::reverse_iterator it2 = JsonArray::reverse_iterator();
    EXPECT_TRUE(it1 == it2);

    JsonArray::iterator it3 = JsonArray::reverse_iterator();
    JsonArray::iterator it4 = JsonArray::iterator();
    EXPECT_TRUE(it3 == it4);

    JsonArray::const_iterator it5 = JsonArray::const_iterator();
    JsonArray::const_iterator it6 = JsonArray::iterator();
    EXPECT_TRUE(it5 == it6);

    JsonArray::const_iterator it7 = JsonArray::reverse_iterator();
    JsonArray::const_iterator it8 = JsonArray::const_reverse_iterator();
    EXPECT_TRUE(it7 == it8);

    JsonArray::const_reverse_iterator it9 = JsonArray::const_iterator();
    JsonArray::const_reverse_iterator it10 = JsonArray::iterator();
    EXPECT_TRUE(it9 == it10);

    JsonArray::const_reverse_iterator it11 = JsonArray::reverse_iterator();
    JsonArray::const_reverse_iterator it12 =
        JsonArray::const_reverse_iterator();
    EXPECT_TRUE(it11 == it12);
  }

  {
    JsonArray arr;
    arr.resize(1023);
    EXPECT_TRUE(arr.size() == 1023);
    EXPECT_TRUE(arr.capacity() == 1024);
    EXPECT_TRUE(arr[0].is_null());
    EXPECT_TRUE(arr[1022].is_null());
  }

  {
    JsonArray arr;
    EXPECT_TRUE(arr.capacity() == 0);
    arr.resize(0);
    EXPECT_TRUE(arr.capacity() == 32);
    arr.push(0);
    EXPECT_TRUE(arr.capacity() == 32);
    EXPECT_TRUE(arr.size() == 1);
    arr.resize(0);
    EXPECT_TRUE(arr.size() == 0);
    arr.resize(1);
    EXPECT_TRUE(arr.capacity() == 32);
    EXPECT_TRUE(arr.size() == 1);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[0, 1]"));
    EXPECT_TRUE(val.as_array().front() == 0);
    EXPECT_TRUE(val.as_array().front().as_integer() == 0);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[-1]"));
    EXPECT_TRUE(val.as_array().front() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() == -1);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[4294967295]"));
    EXPECT_TRUE(val.parse("[+4294967295]"));
    EXPECT_TRUE(val.as_array().front() == 4294967295);
    EXPECT_TRUE((int32_t)val.as_array().front().as_integer() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() == 4294967295);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ 2147483647 ]"));
    EXPECT_TRUE(val.parse("[ +2147483647 ]"));
    EXPECT_TRUE(val.as_array().front() == 2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == 2147483647);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ -2147483647 ]"));
    EXPECT_TRUE(val.as_array().front() == -2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);
    EXPECT_TRUE(val.as_array().front().as_integer() == -2147483647);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[9223372036854775807]"));
    EXPECT_TRUE(val.parse("[+9223372036854775807]"));
    EXPECT_TRUE(val.as_array().front() == 9223372036854775807uLL);
    EXPECT_TRUE((int32_t)val.as_array().front().as_integer() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() == 9223372036854775807uLL);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[-9223372036854775807]"));
    EXPECT_TRUE(val.as_array().front() == -9223372036854775807LL);
    EXPECT_TRUE(val.as_array().front().as_integer() == -9223372036854775807LL);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ 1844674407370955161 ]"));
    EXPECT_TRUE(val.parse("[ +1844674407370955161 ]"));
    EXPECT_TRUE(val.as_array().front() == 1844674407370955161uLL);
    EXPECT_TRUE(val.as_array().front().as_integer() == 1844674407370955161uLL);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ 18446744073709551615 ]"));
    EXPECT_TRUE(val.parse("[ +18446744073709551615 ]"));
    EXPECT_TRUE(val.as_array().front() == 18446744073709551615uLL);
    EXPECT_TRUE(val.as_array().front().as_integer() == -1);
    EXPECT_TRUE(val.as_array().front().as_integer() ==
                JsonValue::integer_type(18446744073709551615uLL));
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ 18446744073709551616 ]"));
    EXPECT_FALSE(val.as_array().front().is_integer());
    EXPECT_TRUE(val.as_array().front() == 18446744073709551616.0);
    EXPECT_TRUE(val.as_array().front().as_float() == 18446744073709551616.0);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[ 1e+30, 1.3e12 ]"));
    EXPECT_TRUE(val.as_array().front() == 1e+30);
    EXPECT_TRUE(val.as_array().back() == 1.3e12);
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[0,[0]]"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_array().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    val.as_array().push(val);
    EXPECT_TRUE(val.as_json_string() == "[0,[0],[0,[0]]]");

    val.as_array().pop();
    val.as_array().push(val);
    EXPECT_TRUE(val.as_json_string() == "[0,[0],[0,[0]]]");

    val.as_array().pop();
    val.as_array().pop();
    val.as_array().pop();
    val.as_array().push(val);
    EXPECT_TRUE(val.as_json_string() == "[[]]");
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("[0,[0]]"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_array().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    val.as_array()[0] = val;
    EXPECT_TRUE(val.as_json_string() == "[[0,[0]],[0]]");
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("{\"0\":[0]}"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_object().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    JsonObject obj = val.as_object();
    val.as_object()["1"].assign(obj);
    EXPECT_TRUE(val.as_json_string() == "{\"0\":[0],\"1\":{\"0\":[0]}}");
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("{\"0\":[0]}"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_object().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    EXPECT_TRUE(val.as_object().set("1", val));
    EXPECT_TRUE(val.as_json_string() == "{\"0\":[0],\"1\":{\"0\":[0]}}");
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("{\"0\":[0]}"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_object().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    JsonValue val2 = val;
    val.as_object()["1"] = val2;
    EXPECT_TRUE(val.as_json_string() == "{\"0\":[0],\"1\":{\"0\":[0]}}");
  }

  {
    JsonValue val;
    EXPECT_TRUE(val.parse("{\"0\":[0]}"));
    EXPECT_TRUE(val.refer() == 1);
    EXPECT_TRUE(val.as_object().refer() == 1);
    EXPECT_TRUE(val.refer() == 0);

    JsonObject obj = val.as_object();
    val.as_object()["1"] = obj;
    EXPECT_TRUE(val.as_json_string() == "{\"0\":[0],\"1\":{\"0\":[0]}}");
  }
}

TEST(Json, JsonString) {
  {
    JsonString str1("1234567890abcdefghijklmn");
    EXPECT_TRUE(str1 == str1.decode());
  }

  {
    JsonString str1("\\\"1234\\\\567890abcdefghijklmn\\t");
    JsonString str2 = "\"1234\\567890abcdefghijklmn\t";
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1(" \\/ \\\\ \\\" \\b \\f \\n \\r \\t ");
    JsonString str2 = " / \\ \" \b \f \n \r \t ";
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\n\\r \\u8096 \\u5141 \\u950B \\u000a \\u000d");
    JsonString str2("\n\r \xE8\x82\x96 \xE5\x85\x81 \xE9\x94\x8B \n \r");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\u007f");
    JsonString str2("\x7F");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\u0080");
    JsonString str2("\xC2\x80");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\u07FF");
    JsonString str2("\xDF\xBF");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\u0800");
    JsonString str2("\xE0\xA0\x80");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString str1("\\uFFFF");
    JsonString str2("\xEF\xBF\xBF");
    EXPECT_TRUE(str2 == str1.decode());
  }

  {
    JsonString a("abcdefg");
    JsonString b("abcdefl");
    JsonString c("abcdefg");
    EXPECT_TRUE(a == c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b != a);

    EXPECT_TRUE(a.compare(c) == 0);
    EXPECT_TRUE(b.compare(c) != 0);
    EXPECT_TRUE(b.compare(c) != 0);
  }

  {
    JsonString a("abcdefg\"");
    JsonString b("abcd");
    JsonString c("abcdefg");
    EXPECT_TRUE(a != c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b != a);
  }

  {
    JsonString a("abcd\0efg");
    JsonString b("abcd");
    JsonString c("abcdefg\0");
    EXPECT_TRUE(a != c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b == a);
    EXPECT_TRUE(a.compare(b) == 0);
  }

  {
    JsonString a("abcd\0efg", 8);
    JsonString b("abcd");
    JsonString c("abcdefg\0");
    EXPECT_TRUE(a != c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b != a);

    EXPECT_TRUE(a.compare("abcd") == 0);
    EXPECT_TRUE(b.compare("abcd\0") == 0);
    EXPECT_TRUE(c.compare("abcdefg") == 0);
  }

  {
    JsonString a("abcd\0efg", 8);
    JsonString b("abcd");
    JsonString c("abcd\0efg", 8);
    EXPECT_TRUE(a == c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b != a);
  }

  {
    JsonString a;
    JsonString b("\0");
    JsonString c(NULL);
    EXPECT_TRUE(a == c);
    EXPECT_TRUE(b == c);
    EXPECT_TRUE(b == a);
  }

  {
    JsonString a;
    JsonString b("\0", 1);
    JsonString c(NULL);
    EXPECT_TRUE(a == c);
    EXPECT_TRUE(b != c);
    EXPECT_TRUE(b != a);
  }

  {
    JsonString str1(
        "author:\\u8096\\u5141\\u950b;\\r\\ntest:\\u007f \\u0080 \\u07ff "
        "\\u0800 \\uffff");
    JsonString str2(
        "author:\xE8\x82\x96\xE5\x85\x81\xE9\x94\x8B;\r\ntest:"
        "\x7F \xC2\x80 "
        "\xDF\xBF \xE0\xA0\x80 \xEF\xBF\xBF");
    JsonString str3(
        "author:\xE8\x82\x96\xE5\x85\x81\xE9\x94\x8B;"
        "\\r\\ntest:\x7F \xC2\x80 "
        "\xDF\xBF \xE0\xA0\x80 \xEF\xBF\xBF");
    EXPECT_TRUE(str2 == str1.decode());
    EXPECT_TRUE(str2.encode() == str3);
  }

  {
    JsonString str1("\\007f \\0080 \\u07ff \\u0800 \\uffff");
    JsonString str2("\\u008\\u07ff \\u0800 \\uffff");
    EXPECT_FALSE(str1.decode().is_valid());
    EXPECT_FALSE(str2.decode().is_valid());
  }

  {
    JsonString str1(" \x1f \x0e \x01 \x1e / \\ AAA\" AAA\b \f \n \r \t ");
    JsonString str2(
        " \\u001f \\u000e \\u0001 \\u001e / \\\\ AAA\\\" "
        "AAA\\b \\f \\n \\r "
        "\\t ");
    EXPECT_TRUE(str1.encode() == str2);
    EXPECT_TRUE(str1 == str2.decode());
  }

  {
    JsonString jstr;

    EXPECT_TRUE(jstr.capacity() == 0);
    EXPECT_TRUE(jstr.size() == 0);
    jstr.reserve(21);
    EXPECT_TRUE(jstr.capacity() == 32 - 1);
    EXPECT_TRUE(jstr.size() == 0);
    jstr.reserve(2);
    EXPECT_TRUE(jstr.capacity() == 32 - 1);
    EXPECT_TRUE(jstr.size() == 0);
    jstr.reserve(32);
    EXPECT_TRUE(jstr.capacity() == 64 - 1);
    EXPECT_TRUE(jstr.size() == 0);

    char buf[1000];
    buf[0] = '\0';
    jstr.assign(buf, sizeof(buf));
    EXPECT_TRUE(jstr.capacity() == 1024 - 1);
    EXPECT_TRUE(jstr.length() == 1000);
    EXPECT_TRUE(JsonString(jstr.c_str()) == "");

    memcpy(buf, "abcdef", 7);
    jstr.assign(buf, 200);
    EXPECT_TRUE(jstr.capacity() == 1024 - 1);
    EXPECT_TRUE(jstr.length() == 200);
    EXPECT_TRUE(JsonString(jstr.c_str()) == "abcdef");
  }
}

TEST(Json, JsonValue) {
  {
    EXPECT_TRUE(JsonValue(true) == JsonValue(true));
    EXPECT_TRUE(JsonValue(false) == JsonValue(false));
    EXPECT_TRUE(JsonValue((char)'\r') == JsonValue(0xd));
    EXPECT_TRUE(JsonValue((char)'\r') == JsonValue('\r'));
    EXPECT_TRUE(JsonValue(10000) == JsonValue(10000));
    EXPECT_TRUE(JsonValue(0xffff) == JsonValue(0xffff));
    EXPECT_TRUE(JsonValue(0x10000) == JsonValue(0x10000));
    EXPECT_TRUE(JsonValue(0xffffffff) == JsonValue(0xffffffff));
    EXPECT_TRUE(JsonValue(0x100000000) == JsonValue(0x100000000));
    EXPECT_TRUE(JsonValue(0xffffffffffffffff) == JsonValue(0xffffffffffffffff));
    EXPECT_TRUE(JsonValue(0.999999) == JsonValue(0.999999));
    EXPECT_TRUE(JsonValue(false) != JsonValue(0.0));
    EXPECT_TRUE(JsonValue(0.0) != JsonValue(0));
    EXPECT_TRUE(JsonValue("0.0") != JsonValue(0));
    EXPECT_TRUE(JsonValue("0.0") == JsonValue("0.0"));
    EXPECT_TRUE(JsonValue(std::string("0.0001")) == JsonValue("0.0001"));
  }

  {
    EXPECT_EQ(JsonValue(0).as_json_string().as_stl_string(), "0");
    EXPECT_EQ(JsonValue(1).as_json_string().as_stl_string(), "1");
    EXPECT_EQ(JsonValue(-1).as_json_string().as_stl_string(), "-1");
    EXPECT_EQ(JsonValue(99).as_json_string().as_stl_string(), "99");
    EXPECT_EQ(JsonValue(-99).as_json_string().as_stl_string(), "-99");
    EXPECT_EQ(JsonValue(188).as_json_string().as_stl_string(), "188");
    EXPECT_EQ(JsonValue(-188).as_json_string().as_stl_string(), "-188");
    EXPECT_EQ(JsonValue(1520).as_json_string().as_stl_string(), "1520");
    EXPECT_EQ(JsonValue(-1520).as_json_string().as_stl_string(), "-1520");

    EXPECT_EQ(JsonValue(12345).as_json_string().as_stl_string(), "12345");
    EXPECT_EQ(JsonValue(-12345).as_json_string().as_stl_string(), "-12345");

    EXPECT_EQ(JsonValue(65535).as_json_string().as_stl_string(), "65535");
    EXPECT_EQ(JsonValue(-65535).as_json_string().as_stl_string(), "-65535");

    EXPECT_EQ(JsonValue(65536).as_json_string().as_stl_string(), "65536");
    EXPECT_EQ(JsonValue(-65536).as_json_string().as_stl_string(), "-65536");

    EXPECT_EQ(JsonValue(234567).as_json_string().as_stl_string(), "234567");
    EXPECT_EQ(JsonValue(-234567).as_json_string().as_stl_string(), "-234567");

    EXPECT_EQ(JsonValue(1234567890).as_json_string().as_stl_string(),
              "1234567890");
    EXPECT_EQ(JsonValue(-1234567890).as_json_string().as_stl_string(),
              "-1234567890");

    EXPECT_EQ(JsonValue(9999999999).as_json_string().as_stl_string(),
              "9999999999");
    EXPECT_EQ(JsonValue(-9999999999).as_json_string().as_stl_string(),
              "-9999999999");

    EXPECT_EQ(JsonValue(4294967295).as_json_string().as_stl_string(),
              "4294967295");
    // EXPECT_EQ(JsonValue(-4294967295).as_json_string().as_stl_string(),
    //           "-4294967295LL");

    EXPECT_EQ(JsonValue(4294967296).as_json_string().as_stl_string(),
              "4294967296");
    EXPECT_EQ(JsonValue(-4294967296).as_json_string().as_stl_string(),
              "-4294967296");

    EXPECT_EQ(JsonValue(281474976710655).as_json_string().as_stl_string(),
              "281474976710655");
    EXPECT_EQ(JsonValue(-281474976710655).as_json_string().as_stl_string(),
              "-281474976710655");

    EXPECT_EQ(JsonValue(281474976710656).as_json_string().as_stl_string(),
              "281474976710656");
    EXPECT_EQ(JsonValue(-281474976710656).as_json_string().as_stl_string(),
              "-281474976710656");

    EXPECT_EQ(JsonValue(9223372036854775807ll).as_json_string().as_stl_string(),
              "9223372036854775807");
    EXPECT_EQ(
        JsonValue(-9223372036854775807ll).as_json_string().as_stl_string(),
        "-9223372036854775807");
  }

  {
    JsonValue jval;

    jval.assign("aaaaaaaaaaaa");
    jval.assign("122326263", 10);
    jval.assign(200);
    jval.assign(0xffffffffffff);
  }

  {
    JsonValue val1;
    JsonValue val2;
    JsonValue val3;
    JsonValue val4;

    val1 = "abcdef";
    val2 = val1;
    val3 = val1;

    EXPECT_TRUE(val1.refer() == 3);
    EXPECT_TRUE(val2.refer() == 3);
    EXPECT_TRUE(val3.refer() == 3);
    EXPECT_TRUE(val3.as_stl_string() == "abcdef");

    JsonString &str = val1.as_string();
    EXPECT_TRUE(str.refer() == 2);
    val4 = val1;
    str = "123456";

    EXPECT_TRUE(val1.refer() == 0);
    EXPECT_TRUE(val2.refer() == 2);
    EXPECT_TRUE(val3.refer() == 2);
    EXPECT_TRUE(val4.refer() == 1);
    EXPECT_TRUE(val1.as_stl_string() == "123456");
    EXPECT_TRUE(val2.as_stl_string() == "abcdef");
    EXPECT_TRUE(val3.as_stl_string() == "abcdef");
    EXPECT_TRUE(val4.as_stl_string() == "abcdef");
  }

  {
    JsonValue val1;
    JsonValue val2;
    JsonValue val3;

    val1["abcd"] = "1234";
    val2 = val1.as_object();
    val3 = val2;

    EXPECT_TRUE(val1.refer() == 0);
    EXPECT_TRUE(val2.refer() == 2);
    EXPECT_TRUE(val3.refer() == 2);
    EXPECT_TRUE(val1.as_object().refer() == 0);
    EXPECT_TRUE(val2.as_object().refer() == 2);
    EXPECT_TRUE(val3.as_object().refer() == 2);
  }
}

TEST(Json, General) {
  {
    JsonObject obj;
    JsonArray arr;
    JsonValue val;
    JsonString str;

    EXPECT_TRUE(obj.refer() == -1);
    EXPECT_TRUE(arr.refer() == -1);
    EXPECT_TRUE(val.refer() == -1);
    EXPECT_TRUE(str.refer() == -1);

    val = str;
    EXPECT_TRUE(val.refer() == 1);

    val = obj;
    EXPECT_TRUE(val.refer() == 1);

    val = arr;
    EXPECT_TRUE(val.refer() == 1);

    arr.push("acdef");
    EXPECT_TRUE(arr.refer() == 1);
    arr.begin();
    EXPECT_TRUE(arr.refer() == 0);
    arr.end();
    EXPECT_TRUE(arr.refer() == 0);

    JsonArray arr1 = arr;
    JsonArray arr2 = arr1;
    JsonArray arr3 = arr;
    EXPECT_TRUE(arr1.refer() == 2);
    EXPECT_TRUE(arr2.refer() == 2);
    EXPECT_TRUE(arr3.refer() == 1);
    EXPECT_TRUE(arr.refer() == 0);

    obj.set("1111", "null");
    EXPECT_TRUE(obj.refer() == 1);
    obj.rbegin();
    EXPECT_TRUE(obj.refer() == 0);
    obj.rend();
    EXPECT_TRUE(obj.refer() == 0);

    JsonObject obj1 = obj;
    JsonObject obj2 = obj1;
    JsonObject obj3 = obj;
    EXPECT_TRUE(obj1.refer() == 2);
    EXPECT_TRUE(obj2.refer() == 2);
    EXPECT_TRUE(obj3.refer() == 1);
    EXPECT_TRUE(obj.refer() == 0);
  }

  {
    short a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    unsigned short a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    int a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    unsigned int a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    long a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    unsigned long a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    long long a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    unsigned long long a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    float a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    double a = 1;
    enum { A = 1, B = 2 };
    EXPECT_TRUE(a == A);
    EXPECT_TRUE(A == a);
    EXPECT_TRUE(a != B);
    EXPECT_TRUE(B != a);
  }

  {
    EXPECT_TRUE(JsonString() == JsonString());
    EXPECT_TRUE(JsonValue() == JsonValue());
    EXPECT_TRUE(JsonObject() == JsonObject());
    EXPECT_TRUE(JsonArray() == JsonArray());

    EXPECT_FALSE((JsonString() != JsonString()));
    EXPECT_FALSE((JsonValue() != JsonValue()));
    EXPECT_FALSE((JsonObject() != JsonObject()));
    EXPECT_FALSE((JsonArray() != JsonArray()));

    EXPECT_TRUE(JsonString() != JsonValue());
    EXPECT_TRUE(JsonObject() != JsonValue());
    EXPECT_TRUE(JsonArray() != JsonValue());
    EXPECT_TRUE(JsonValue() != JsonString());
    EXPECT_TRUE(JsonValue() != JsonObject());
    EXPECT_TRUE(JsonValue() != JsonArray());

    EXPECT_FALSE((JsonString() == JsonValue()));
    EXPECT_FALSE((JsonObject() == JsonValue()));
    EXPECT_FALSE((JsonArray() == JsonValue()));
    EXPECT_FALSE((JsonValue() == JsonString()));
    EXPECT_FALSE((JsonValue() == JsonObject()));
    EXPECT_FALSE((JsonValue() == JsonArray()));

    EXPECT_TRUE(JsonString() == std::string());
    EXPECT_TRUE(std::string() == JsonString());
    EXPECT_FALSE((JsonString() != std::string()));
    EXPECT_FALSE((std::string() != JsonString()));

    EXPECT_TRUE(JsonString() == std::string(""));
    EXPECT_TRUE(std::string("") == JsonString());
    EXPECT_FALSE((JsonString() != std::string("")));
    EXPECT_FALSE((std::string("") != JsonString()));

    EXPECT_TRUE(JsonString("") == std::string());
    EXPECT_TRUE(std::string() == JsonString(""));
    EXPECT_FALSE((JsonString("") != std::string()));
    EXPECT_FALSE((std::string() != JsonString("")));
  }
}


================================================
FILE: tests/ailego/hash/crc32c_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>
#include <vector>
#include <gtest/gtest.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(Crc32c, Crc32c) {
  {
    char data[] = "";
    EXPECT_EQ(0u, ailego::Crc32c::Hash(data, strlen(data), 0u));
  }
  {
    char data[] = "123456789";
    EXPECT_EQ(0x58E3FA20u, ailego::Crc32c::Hash(data, strlen(data), 0));
  }
  {
    char data[] = "whiz bang boom";
    EXPECT_EQ(0x8CAE40C8u, ailego::Crc32c::Hash(data, strlen(data), 0u));
    EXPECT_EQ(0xDF19F0C8u, ailego::Crc32c::Hash(data, strlen(data), 5678));
  }
  {
    char data[] = "foo bar baz";
    EXPECT_EQ(0xF58C78ACu, ailego::Crc32c::Hash(data, strlen(data), 0u));
    EXPECT_EQ(0x348DACCEu, ailego::Crc32c::Hash(data, strlen(data), 1234u));
  }
  {
    uint32_t result[10] = {3263744690, 2184491954, 1881115848, 3193814825,
                           1570985216, 371133708,  2843540871, 3970904592,
                           1491335712, 551906596};
    char data[] = "123456789";
    for (size_t i = 0; i < 10; ++i) {
      EXPECT_EQ(result[i], ailego::Crc32c::Hash(data, i + 1, 0u));
    }
  }
  {
    uint8_t data = 0;
    EXPECT_EQ(0u, ailego::Crc32c::Hash(&data, sizeof(data), 0u));
    EXPECT_NE(0u, ailego::Crc32c::Hash(&data, sizeof(data), 55u));
  }

  {
    char test1[] = "Hello world";
    std::string test2("Hello world");

    EXPECT_EQ(ailego::Crc32c::Hash(test1, strlen(test1), 0u),
              ailego::Crc32c::Hash(test2.data(), test2.size(), 0u));
    EXPECT_EQ(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 0u),
              ailego::Crc32c::Hash(test2.data(), test2.size(), 0u));

    EXPECT_EQ(ailego::Crc32c::Hash(test1, strlen(test1), 1),
              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));
    EXPECT_EQ(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 1),
              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));

    EXPECT_NE(ailego::Crc32c::Hash(test1, 0u),
              ailego::Crc32c::Hash(test1, 1, 0u));
    EXPECT_NE(ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 0u),
              ailego::Crc32c::Hash(test1, sizeof(test1) - 1, 1));
    EXPECT_NE(ailego::Crc32c::Hash(test2.data(), test2.size(), 0u),
              ailego::Crc32c::Hash(test2.data(), test2.size(), 1));
  }
}

TEST(Crc32c, Crc32cChecksum) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  {
    size_t len = 10000;
    std::string str;

    for (size_t i = 0; i < len; i++) {
      str.push_back((char)rand());
    }

    *((uint32_t *)str.data()) = 0u;
    uint32_t crc = ailego::Crc32c::Hash(str.data(), str.size(), 0u);

    *((uint32_t *)str.data()) = crc;
    EXPECT_EQ(crc, ailego::Crc32c::Hash(str.data(), str.size(), crc));

    uint32_t crc2 = ailego::Crc32c::Hash(str.data() + 4, str.size() - 4, 0);
    EXPECT_EQ(crc2, ailego::Crc32c::Hash(&crc, 0, crc2));
  }
  {
    size_t len = 20000;
    std::string str;

    for (size_t i = 0; i < len; i++) {
      str.push_back((char)rand());
    }

    *((uint32_t *)str.data()) = 0xffffffffu;
    uint32_t crc = ailego::Crc32c::Hash(str.data(), str.size(), 0xffffffffu);

    *((uint32_t *)str.data()) = crc;
    EXPECT_EQ(crc, ailego::Crc32c::Hash(str.data(), str.size(), crc));

    uint32_t crc2 = ailego::Crc32c::Hash(str.data() + 4, str.size() - 4, 0);
    EXPECT_EQ(crc2, ailego::Crc32c::Hash(&crc, 0, crc2));
  }
}

TEST(Crc32c, Crc32cBenchmark) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  size_t len = 100000;
  std::vector<uint32_t> data;
  for (size_t i = 0; i < len; ++i) {
    data.push_back((uint32_t)rand());
  }

  {
    uint64_t t1 = ailego::Monotime::MicroSeconds();
    uint32_t hash =
        ailego::Crc32c::Hash(&data[0], data.size() * sizeof(uint32_t), 0u);
    for (int i = 0; i < 100; ++i) {
      hash =
          ailego::Crc32c::Hash(&data[0], data.size() * sizeof(uint32_t), hash);
    }
    uint64_t t2 = ailego::Monotime::MicroSeconds();
    printf("ailego::Crc32c::Hash = %u: %u us\n", hash, (uint32_t)(t2 - t1));
  }
}


================================================
FILE: tests/ailego/hash/jump_hash_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <set>
#include <string>
#include <vector>
#include <gtest/gtest.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/hash/jump_hash.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

TEST(JumpHash, JumpHash) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<uint32_t> dist1(25353195, 25358555);
  std::uniform_int_distribution<uint32_t> dist2(1, 10000);
  std::set<uint32_t> result1;
  std::set<uint32_t> result2;

  const int total = 10000;
  for (int i = 0; i < total; ++i) {
    uint32_t ticket = dist1(gen);
    uint32_t signal = dist2(gen);

    uint64_t key = ((uint64_t)signal << 32) | ticket;
    uint32_t hash1 = (JumpHash(key, 32) << 27) | (ticket & 0x7ffffff);

    uint32_t hash2 = (signal << 27) | (ticket & 0x7ffffff);
    result1.insert(hash1);
    result2.insert(hash2);
  }
  printf("Conflict 1: %f\n", (double)(total - result1.size()) / (double)total);
  printf("Conflict 2: %f\n", (double)(total - result2.size()) / (double)total);
}


================================================
FILE: tests/ailego/internal/cpu_features_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/internal/cpu_features.h>
#include <gtest/gtest.h>

using namespace zvec::ailego::internal;

TEST(CpuFeatures, General) {
  std::cout << "* Intrinsics:       " << CpuFeatures::Intrinsics() << std::endl;
  std::cout << "* F16C:             " << CpuFeatures::F16C() << std::endl;
  std::cout << "* SSE:              " << CpuFeatures::SSE() << std::endl;
  std::cout << "* SSE2:             " << CpuFeatures::SSE2() << std::endl;
  std::cout << "* SSE3:             " << CpuFeatures::SSE3() << std::endl;
  std::cout << "* SSSE3:            " << CpuFeatures::SSSE3() << std::endl;
  std::cout << "* SSE4_1:           " << CpuFeatures::SSE4_1() << std::endl;
  std::cout << "* SSE4_2:           " << CpuFeatures::SSE4_2() << std::endl;
  std::cout << "* AVX:              " << CpuFeatures::AVX() << std::endl;
  std::cout << "* AVX2:             " << CpuFeatures::AVX2() << std::endl;
  std::cout << "* AVX512F:          " << CpuFeatures::AVX512F() << std::endl;
  std::cout << "* AVX512DQ:         " << CpuFeatures::AVX512DQ() << std::endl;
  std::cout << "* AVX512PF:         " << CpuFeatures::AVX512PF() << std::endl;
  std::cout << "* AVX512ER:         " << CpuFeatures::AVX512ER() << std::endl;
  std::cout << "* AVX512CD:         " << CpuFeatures::AVX512CD() << std::endl;
  std::cout << "* AVX512BW:         " << CpuFeatures::AVX512BW() << std::endl;
  std::cout << "* AVX512VL:         " << CpuFeatures::AVX512VL() << std::endl;
  std::cout << "* AVX512_IFMA:      " << CpuFeatures::AVX512_IFMA()
            << std::endl;
  std::cout << "* AVX512_VBMI:      " << CpuFeatures::AVX512_VBMI()
            << std::endl;
  std::cout << "* AVX512_VBMI2:     " << CpuFeatures::AVX512_VBMI2()
            << std::endl;
  std::cout << "* AVX512_VNNI:      " << CpuFeatures::AVX512_VNNI()
            << std::endl;
  std::cout << "* AVX512_BITALG:    " << CpuFeatures::AVX512_BITALG()
            << std::endl;
  std::cout << "* AVX512_VPOPCNTDQ: " << CpuFeatures::AVX512_VPOPCNTDQ()
            << std::endl;
  std::cout << "* AVX512_4VNNIW:    " << CpuFeatures::AVX512_4VNNIW()
            << std::endl;
  std::cout << "* AVX512_4FMAPS:    " << CpuFeatures::AVX512_4FMAPS()
            << std::endl;
  std::cout << "* AVX512_FP16:      " << CpuFeatures::AVX512_FP16()
            << std::endl;
  std::cout << "* CX8:              " << CpuFeatures::CX8() << std::endl;
  std::cout << "* CX16:             " << CpuFeatures::CX16() << std::endl;
  std::cout << "* PCLMULQDQ:        " << CpuFeatures::PCLMULQDQ() << std::endl;
  std::cout << "* VPCLMULQDQ:       " << CpuFeatures::VPCLMULQDQ() << std::endl;
  std::cout << "* CMOV:             " << CpuFeatures::CMOV() << std::endl;
  std::cout << "* MOVBE:            " << CpuFeatures::MOVBE() << std::endl;
  std::cout << "* ERMS:             " << CpuFeatures::ERMS() << std::endl;
  std::cout << "* POPCNT:           " << CpuFeatures::POPCNT() << std::endl;
  std::cout << "* XSAVE:            " << CpuFeatures::XSAVE() << std::endl;
  std::cout << "* FMA:              " << CpuFeatures::FMA() << std::endl;
  std::cout << "* ADX:              " << CpuFeatures::ADX() << std::endl;
  std::cout << "* GFNI:             " << CpuFeatures::GFNI() << std::endl;
  std::cout << "* AES:              " << CpuFeatures::AES() << std::endl;
  std::cout << "* VAES:             " << CpuFeatures::VAES() << std::endl;
  std::cout << "* RDSEED:           " << CpuFeatures::RDSEED() << std::endl;
  std::cout << "* RDRAND:           " << CpuFeatures::RDRAND() << std::endl;
  std::cout << "* SHA:              " << CpuFeatures::SHA() << std::endl;
  std::cout << "* BMI1:             " << CpuFeatures::BMI1() << std::endl;
  std::cout << "* BMI2:             " << CpuFeatures::BMI2() << std::endl;
  std::cout << "* CLFLUSH:          " << CpuFeatures::CLFLUSH() << std::endl;
  std::cout << "* CLFLUSHOPT:       " << CpuFeatures::CLFLUSHOPT() << std::endl;
  std::cout << "* CLWB:             " << CpuFeatures::CLWB() << std::endl;
  std::cout << "* RDPID:            " << CpuFeatures::RDPID() << std::endl;
  std::cout << "* FPU:              " << CpuFeatures::FPU() << std::endl;
  std::cout << "* HT:               " << CpuFeatures::HT() << std::endl;
  std::cout << "* VMX:              " << CpuFeatures::VMX() << std::endl;
  std::cout << "* HYPERVISOR:       " << CpuFeatures::HYPERVISOR() << std::endl;

// #if defined(__AVX512VBMI2__)
//     EXPECT_TRUE(CpuFeatures::AVX512VBMI2());
// #endif
// #if defined(__AVX512VBMI__)
//     EXPECT_TRUE(CpuFeatures::AVX512VBMI());
// #endif
// #if defined(__AVX512VL__)
//     EXPECT_TRUE(CpuFeatures::AVX512VL());
// #endif
// #if defined(__AVX512BW__)
//     EXPECT_TRUE(CpuFeatures::AVX512BW());
// #endif
// #if defined(__AVX512CD__)
//     EXPECT_TRUE(CpuFeatures::AVX512CD());
// #endif
// #if defined(__AVX512ER__)
//     EXPECT_TRUE(CpuFeatures::AVX512ER());
// #endif
// #if defined(__AVX512PF__)
//     EXPECT_TRUE(CpuFeatures::AVX512PF());
// #endif
// #if defined(__AVX512IFMA__)
//     EXPECT_TRUE(CpuFeatures::AVX512IFMA());
// #endif
// #if defined(__AVX512DQ__)
//     EXPECT_TRUE(CpuFeatures::AVX512DQ());
// #endif
// #if defined(__AVX512F__)
//     EXPECT_TRUE(CpuFeatures::AVX512F());
// #endif
#if defined(__AVX2__)
  EXPECT_TRUE(CpuFeatures::AVX2());
  EXPECT_TRUE(CpuFeatures::AVX());
  EXPECT_TRUE(CpuFeatures::SSE4_2());
  EXPECT_TRUE(CpuFeatures::SSE4_1());
  EXPECT_TRUE(CpuFeatures::SSSE3());
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__AVX__)
  EXPECT_TRUE(CpuFeatures::AVX());
  EXPECT_TRUE(CpuFeatures::SSE4_2());
  EXPECT_TRUE(CpuFeatures::SSE4_1());
  EXPECT_TRUE(CpuFeatures::SSSE3());
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__SSE4_2__)
  EXPECT_TRUE(CpuFeatures::SSE4_2());
  EXPECT_TRUE(CpuFeatures::SSE4_1());
  EXPECT_TRUE(CpuFeatures::SSSE3());
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
  EXPECT_TRUE(CpuFeatures::POPCNT());
#endif
#if defined(__SSE4_1__)
  EXPECT_TRUE(CpuFeatures::SSE4_1());
  EXPECT_TRUE(CpuFeatures::SSSE3());
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__SSSE3__)
  EXPECT_TRUE(CpuFeatures::SSSE3());
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__SSE3__)
  EXPECT_TRUE(CpuFeatures::SSE3());
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__SSE2__)
  EXPECT_TRUE(CpuFeatures::SSE2());
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__SSE__)
  EXPECT_TRUE(CpuFeatures::SSE());
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
#if defined(__MMX__)
  EXPECT_TRUE(CpuFeatures::MMX());
#endif
}


TEST(CpuFeatures, Static) {
  std::cout << "* F16C:             " << CpuFeatures::static_flags_.F16C
            << std::endl;
  std::cout << "* SSE:              " << CpuFeatures::static_flags_.SSE
            << std::endl;
  std::cout << "* SSE2:             " << CpuFeatures::static_flags_.SSE2
            << std::endl;
  std::cout << "* SSE3:             " << CpuFeatures::static_flags_.SSE3
            << std::endl;
  std::cout << "* SSSE3:            " << CpuFeatures::static_flags_.SSSE3
            << std::endl;
  std::cout << "* SSE4_1:           " << CpuFeatures::static_flags_.SSE4_1
            << std::endl;
  std::cout << "* SSE4_2:           " << CpuFeatures::static_flags_.SSE4_2
            << std::endl;
  std::cout << "* AVX:              " << CpuFeatures::static_flags_.AVX
            << std::endl;
  std::cout << "* AVX2:             " << CpuFeatures::static_flags_.AVX2
            << std::endl;
  std::cout << "* AVX512F:          " << CpuFeatures::static_flags_.AVX512F
            << std::endl;
  std::cout << "* AVX512DQ:         " << CpuFeatures::static_flags_.AVX512DQ
            << std::endl;
  std::cout << "* AVX512PF:         " << CpuFeatures::static_flags_.AVX512PF
            << std::endl;
  std::cout << "* AVX512ER:         " << CpuFeatures::static_flags_.AVX512ER
            << std::endl;
  std::cout << "* AVX512CD:         " << CpuFeatures::static_flags_.AVX512CD
            << std::endl;
  std::cout << "* AVX512BW:         " << CpuFeatures::static_flags_.AVX512BW
            << std::endl;
  std::cout << "* AVX512VL:         " << CpuFeatures::static_flags_.AVX512VL
            << std::endl;
  std::cout << "* AVX512_IFMA:      " << CpuFeatures::static_flags_.AVX512_IFMA
            << std::endl;
  std::cout << "* AVX512_VBMI:      " << CpuFeatures::static_flags_.AVX512_VBMI
            << std::endl;
  std::cout << "* AVX512_VBMI2:     " << CpuFeatures::static_flags_.AVX512_VBMI2
            << std::endl;
  std::cout << "* AVX512_VNNI:      " << CpuFeatures::static_flags_.AVX512_VNNI
            << std::endl;
  std::cout << "* AVX512_BITALG:    "
            << CpuFeatures::static_flags_.AVX512_BITALG << std::endl;
  std::cout << "* AVX512_VPOPCNTDQ: "
            << CpuFeatures::static_flags_.AVX512_VPOPCNTDQ << std::endl;
  std::cout << "* AVX512_4VNNIW:    "
            << CpuFeatures::static_flags_.AVX512_4VNNIW << std::endl;
  std::cout << "* AVX512_4FMAPS:    "
            << CpuFeatures::static_flags_.AVX512_4FMAPS << std::endl;
  std::cout << "* AVX512_FP16:      " << CpuFeatures::static_flags_.AVX512_FP16
            << std::endl;
  std::cout << "* CX8:              " << CpuFeatures::static_flags_.CX8
            << std::endl;
  std::cout << "* CX16:             " << CpuFeatures::static_flags_.CX16
            << std::endl;
  std::cout << "* PCLMULQDQ:        " << CpuFeatures::static_flags_.PCLMULQDQ
            << std::endl;
  std::cout << "* VPCLMULQDQ:       " << CpuFeatures::static_flags_.VPCLMULQDQ
            << std::endl;
  std::cout << "* CMOV:             " << CpuFeatures::static_flags_.CMOV
            << std::endl;
  std::cout << "* MOVBE:            " << CpuFeatures::static_flags_.MOVBE
            << std::endl;
  std::cout << "* ERMS:             " << CpuFeatures::static_flags_.ERMS
            << std::endl;
  std::cout << "* POPCNT:           " << CpuFeatures::static_flags_.POPCNT
            << std::endl;
  std::cout << "* XSAVE:            " << CpuFeatures::static_flags_.XSAVE
            << std::endl;
  std::cout << "* FMA:              " << CpuFeatures::static_flags_.FMA
            << std::endl;
  std::cout << "* ADX:              " << CpuFeatures::static_flags_.ADX
            << std::endl;
  std::cout << "* GFNI:             " << CpuFeatures::static_flags_.GFNI
            << std::endl;
  std::cout << "* AES:              " << CpuFeatures::static_flags_.AES
            << std::endl;
  std::cout << "* VAES:             " << CpuFeatures::static_flags_.VAES
            << std::endl;
  std::cout << "* RDSEED:           " << CpuFeatures::static_flags_.RDSEED
            << std::endl;
  std::cout << "* RDRAND:           " << CpuFeatures::static_flags_.RDRAND
            << std::endl;
  std::cout << "* SHA:              " << CpuFeatures::static_flags_.SHA
            << std::endl;
  std::cout << "* BMI1:             " << CpuFeatures::static_flags_.BMI1
            << std::endl;
  std::cout << "* BMI2:             " << CpuFeatures::static_flags_.BMI2
            << std::endl;
  std::cout << "* CLFLUSH:          " << CpuFeatures::static_flags_.CLFLUSH
            << std::endl;
  std::cout << "* CLFLUSHOPT:       " << CpuFeatures::static_flags_.CLFLUSHOPT
            << std::endl;
  std::cout << "* CLWB:             " << CpuFeatures::static_flags_.CLWB
            << std::endl;
  std::cout << "* RDPID:            " << CpuFeatures::static_flags_.RDPID
            << std::endl;
  std::cout << "* FPU:              " << CpuFeatures::static_flags_.FPU
            << std::endl;
  std::cout << "* HT:               " << CpuFeatures::static_flags_.HT
            << std::endl;
  std::cout << "* VMX:              " << CpuFeatures::static_flags_.VMX
            << std::endl;
  std::cout << "* HYPERVISOR:       " << CpuFeatures::static_flags_.HYPERVISOR
            << std::endl;
}

================================================
FILE: tests/ailego/io/file_lock_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/io/file_lock.h>
#include <gtest/gtest.h>

using namespace zvec::ailego;

TEST(FileLock, General) {
  File file;
  const char *path = "file_lock_test.dat";

  if (!File::IsExist(path)) {
    ASSERT_TRUE(file.create(path, 128));
  } else {
    ASSERT_TRUE(file.open(path, false));
  }

  FileLock file_lock(file);
  ASSERT_TRUE(file_lock.lock());
  ASSERT_TRUE(file_lock.unlock());

  ASSERT_TRUE(file_lock.try_lock_shared());
  ASSERT_TRUE(file_lock.unlock());

  ASSERT_TRUE(file_lock.lock_shared());
  ASSERT_TRUE(file_lock.unlock());

  ASSERT_TRUE(file_lock.try_lock());
  ASSERT_TRUE(file_lock.unlock());
  file.close();
}


================================================
FILE: tests/ailego/io/file_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/utility/memory_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/io/file.h>

using namespace zvec::ailego;

TEST(File, General) {
  EXPECT_TRUE(File::IsDirectory("."));
  EXPECT_TRUE(File::IsDirectory(".."));
  EXPECT_TRUE(File::IsDirectory("../"));
  EXPECT_TRUE(File::IsDirectory("..//"));
  EXPECT_TRUE(File::IsDirectory("..//"));

  EXPECT_FALSE(File::IsSymbolicLink("."));
  EXPECT_FALSE(File::IsSymbolicLink(".."));
  EXPECT_FALSE(File::IsSymbolicLink("../"));
  EXPECT_FALSE(File::IsSymbolicLink("..//"));
  EXPECT_FALSE(File::IsSymbolicLink("..//"));

  EXPECT_FALSE(File::IsRegular("."));
  EXPECT_FALSE(File::IsRegular(".."));
  EXPECT_FALSE(File::IsRegular("../"));
  EXPECT_FALSE(File::IsRegular("..//"));
  EXPECT_FALSE(File::IsRegular("..//"));

  EXPECT_TRUE(File::IsExist("."));
  EXPECT_TRUE(File::IsExist(".."));
  EXPECT_TRUE(File::IsExist("../"));
  EXPECT_TRUE(File::IsExist("..//"));
  EXPECT_TRUE(File::IsExist("..//"));
}

TEST(File, MakePath) {
  EXPECT_TRUE(File::MakePath(""));
  EXPECT_TRUE(File::MakePath("."));
  EXPECT_TRUE(File::MakePath(".."));
  EXPECT_TRUE(File::MakePath("../"));
  EXPECT_TRUE(File::MakePath("..//"));
  EXPECT_TRUE(File::MakePath("..//"));
  EXPECT_TRUE(File::MakePath("/"));

  EXPECT_TRUE(File::MakePath("./file_test_makepath"));
  EXPECT_TRUE(File::MakePath("file_test_makepath"));
  EXPECT_TRUE(File::MakePath("file_test_makepath/1/2/3/"));
  EXPECT_TRUE(File::MakePath("file_test_makepath/1/2/3"));
}

bool TouchFile(const char *path) {
  std::string buf(path);
  char *sp = (char *)strrchr(buf.data(), '/');
  *sp = '\0';
  File::MakePath(buf.data());
  *sp = '/';
  File file;
  return file.create(path, 0);
}

TEST(File, RemoveDirectory) {
  EXPECT_TRUE(File::MakePath("file_test_rmdir/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/1/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/1/2/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/1/2/3/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/a/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/a/b/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmdir/a/b/c/1/2/3"));

  EXPECT_TRUE(TouchFile("file_test_rmdir/a/b/c/1/2/3/A"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/a/b/c/1/2/3/B"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/C"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/D"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/2/3/E"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/a/b/c/d/F"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/a/b/c/d/G"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/2/a/b/c/d/H"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/2/3/a/b/c/d/I"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/a/1/2/3/J"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/a/b/1/2/3/K"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/2/3/M"));
  EXPECT_TRUE(TouchFile("file_test_rmdir/1/2/a/b/c/d/N"));

  EXPECT_FALSE(File::RemoveDirectory("file_test_rmdir/1/2/a/b/c/d/N"));
  EXPECT_FALSE(File::RemoveDirectory("file_test_rmdir/1/2/3/a/b/c/d/I"));
  EXPECT_FALSE(File::RemoveDirectory("file_test_rmdir/C"));
  EXPECT_FALSE(File::RemoveDirectory("file_test_rmdir/D"));

  EXPECT_TRUE(File::IsDirectory("file_test_rmdir/"));
  EXPECT_TRUE(File::IsDirectory("file_test_makepath/"));
  EXPECT_TRUE(File::RemoveDirectory("file_test_rmdir/"));
  EXPECT_TRUE(File::RemoveDirectory("file_test_makepath"));
}

TEST(File, RemovePath) {
  EXPECT_TRUE(File::MakePath("file_test_rmpath/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/1/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/1/2/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/1/2/3/a/b/c/d"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/a/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/a/b/1/2/3"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/a/b/c/1/2/3"));

  EXPECT_TRUE(TouchFile("file_test_rmpath/a/b/c/1/2/3/A"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/a/b/c/1/2/3/B"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/C"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/D"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/2/3/E"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/a/b/c/d/F"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/a/b/c/d/G"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/2/a/b/c/d/H"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/2/3/a/b/c/d/I"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/a/1/2/3/J"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/a/b/1/2/3/K"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/2/3/M"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/1/2/a/b/c/d/N"));
  EXPECT_TRUE(File::IsExist("file_test_rmpath/1/2/a/b/c/d/N"));

  EXPECT_TRUE(File::IsDirectory("file_test_rmpath/"));
  EXPECT_TRUE(File::RemovePath("file_test_rmpath/"));

  EXPECT_TRUE(File::MakePath("file_test_rmpath/AAA"));
  EXPECT_TRUE(File::MakePath("file_test_rmpath/BBB"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/CCC"));
  EXPECT_TRUE(TouchFile("file_test_rmpath/DDD"));
  EXPECT_TRUE(File::IsExist("file_test_rmpath/BBB"));

  EXPECT_FALSE(File::RemovePath("file_test_rmpath/CCC/"));
  EXPECT_FALSE(File::RemovePath("file_test_rmpath/DDD/"));
  EXPECT_TRUE(File::RemovePath("file_test_rmpath/CCC"));
  EXPECT_TRUE(File::RemovePath("file_test_rmpath/DDD"));
  EXPECT_TRUE(File::RemovePath("file_test_rmpath"));
}

TEST(File, CreateAndOpen) {
  const char *file_path = "file_create_testing.tmp";
  size_t file_size = 12 * 1022 * 1021;

  File::Delete(file_path);
  EXPECT_FALSE(File::IsRegular(file_path));

  {
    File file;
    EXPECT_FALSE(file.is_valid());
    EXPECT_TRUE(file.create(file_path, file_size, true));
    EXPECT_TRUE(file.is_valid());
    EXPECT_TRUE(File::IsRegular(file_path));
    EXPECT_EQ(file_size, file.size());
  }
  // create again with exist file
  {
    File file;
    EXPECT_FALSE(file.is_valid());
    EXPECT_TRUE(file.create(file_path, file_size / 10));
    EXPECT_TRUE(file.is_valid());
    EXPECT_FALSE(file.read_only());
    EXPECT_EQ(file_size / 10, file.size());
  }

  {
    File file;
    EXPECT_FALSE(file.is_valid());
    EXPECT_TRUE(file.create(file_path, file_size * 3, true));
    EXPECT_TRUE(file.is_valid());
    EXPECT_FALSE(file.read_only());
    EXPECT_EQ(file_size * 3, file.size());
  }

  {
    File file;
    EXPECT_TRUE(file.open(file_path, true, true));
    EXPECT_TRUE(file.is_valid());
    EXPECT_TRUE(file.read_only());
    EXPECT_EQ(file_size * 3, file.size());
  }

  {
    File file;
    EXPECT_TRUE(file.open(file_path, false, true));
    EXPECT_TRUE(file.is_valid());
    EXPECT_FALSE(file.read_only());
    EXPECT_EQ(file_size * 3, file.size());
  }
  File::Delete(file_path);
}

TEST(File, ReadAndWrite) {
  const char *file_path = "file_read_testing.tmp";
  size_t file_size = 2u * 1024u * 1024u + 12u * 1024;

  File::Delete(file_path);
  EXPECT_FALSE(File::IsRegular(file_path));

  File file;
  EXPECT_FALSE(file.is_valid());
  EXPECT_TRUE(file.create(file_path, file_size));
  EXPECT_TRUE(File::IsRegular(file_path));

  EXPECT_TRUE(file.is_valid());
  EXPECT_EQ(0, file.offset());
  EXPECT_EQ(file_size, file.size());

  std::string buf;
  buf.resize(file_size, 0x55);
  ASSERT_EQ(file_size, buf.size());
  EXPECT_EQ(file_size, file.write(buf.data(), buf.size()));
  EXPECT_EQ(file_size, file.size());
  EXPECT_EQ((ssize_t)buf.size(), file.offset());
  EXPECT_TRUE(file.flush());

  buf.clear();
  buf.resize(file_size);
  file.reset();
  EXPECT_EQ(file_size, file.read((void *)buf.data(), buf.size()));

  File::Delete(file_path);
}

TEST(File, MemoryMap) {
  const char *file_path = "file_map_testing.tmp";
  size_t file_size = 2u * 1024u * 1024u + 12u * 1024;
  size_t map_offset = MemoryHelper::PageSize() * 16;
  size_t map_size = file_size - MemoryHelper::PageSize();

  File::Delete(file_path);
  EXPECT_FALSE(File::IsRegular(file_path));

  File file;
  EXPECT_FALSE(file.is_valid());
  EXPECT_TRUE(file.create(file_path, file_size));
  EXPECT_TRUE(File::IsRegular(file_path));
  EXPECT_EQ(file_size, file.size());

  void *addr = file.map(map_offset, map_size, 0);
  EXPECT_TRUE(addr != nullptr);
  EXPECT_TRUE(File::MemoryFlush(addr, map_size));
  File::MemoryUnmap(addr, map_size);
  file.close();

  EXPECT_TRUE(file.open(file_path, true));
  EXPECT_EQ(file_size, file.size());
  addr = file.map(map_offset, map_size, 0);
  EXPECT_TRUE(addr != nullptr);
  EXPECT_TRUE(File::MemoryFlush(addr, map_size));
  File::MemoryUnmap(addr, map_size);

  // void *addr1 = file.map(map_offset, map_size, 0);
  // void *addr2 = file.map(map_offset, map_size, 0);
  // EXPECT_EQ(addr1, addr2);
  file.close();

  EXPECT_TRUE(file.open(file_path, true));
  EXPECT_EQ(file_size, file.size());
  addr = file.map(map_offset, map_size, File::MMAP_SHARED);
  EXPECT_TRUE(addr != nullptr);
  EXPECT_TRUE(File::MemoryFlush(addr, map_size));

#if defined(__linux) || defined(__linux__) || defined(__NetBSD__)
  EXPECT_TRUE(File::MemoryRemap(addr, map_size, addr, map_size * 2));
  addr = File::MemoryRemap(addr, map_size, nullptr, map_size * 3);
  EXPECT_TRUE(addr);
#endif

  File::MemoryUnmap(addr, map_size);
  file.close();

#if !defined(_WIN32)
  addr = File::MemoryMap(map_size, 0);
  EXPECT_TRUE(addr != nullptr);
  File::MemoryUnmap(addr, map_size);

  addr = File::MemoryMap(map_size, File::MMAP_SHARED);
  EXPECT_TRUE(addr != nullptr);
  File::MemoryUnmap(addr, map_size);
#endif
}

TEST(File, Append) {
  const char *file_path = "file_append_testing.tmp";
  File file;
  EXPECT_FALSE(file.is_valid());
  EXPECT_TRUE(file.create(file_path, MemoryHelper::PageSize()));
  EXPECT_TRUE(File::IsRegular(file_path));

  std::string padding;
  padding.resize(MemoryHelper::PageSize());
  for (size_t i = 0; i < 10; ++i) {
    EXPECT_EQ(padding.size(),
              file.write(file.size(), padding.data(), padding.size()));
  }
  EXPECT_EQ(padding.size() * 11, file.size());

  file.truncate(padding.size() * 7);
  EXPECT_EQ(padding.size() * 7, file.size());

  file.truncate(padding.size() * 16);
  EXPECT_EQ(padding.size() * 16, file.size());
  file.close();
}

TEST(File, Seek) {
  const char *file_path = "file_seek_testing.tmp";
  File file;
  EXPECT_FALSE(file.is_valid());
  EXPECT_TRUE(file.create(file_path, 0));
  EXPECT_TRUE(File::IsRegular(file_path));

  std::string padding;
  padding.resize(MemoryHelper::PageSize());
  for (size_t i = 0; i < 10; ++i) {
    EXPECT_EQ(padding.size(), file.write(padding.data(), padding.size()));
  }
  EXPECT_EQ(padding.size() * 10, (size_t)file.size());
  EXPECT_EQ(padding.size() * 10, (size_t)file.offset());

  EXPECT_TRUE(file.seek(0, File::Origin::Begin));
  EXPECT_EQ(0, file.offset());

  EXPECT_TRUE(file.seek(-20, File::Origin::End));
  EXPECT_EQ((ssize_t)file.size() - 20, file.offset());

  EXPECT_TRUE(file.seek(20, File::Origin::Current));
  EXPECT_EQ((ssize_t)file.size(), file.offset());
  file.close();
}


================================================
FILE: tests/ailego/io/mmap_file_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/io/mmap_file.h>

using namespace zvec::ailego;

TEST(MMapFile, Create) {
  const char *file_path = "mmap_file_create_testing.tmp";
  size_t file_size = 12 * 1022 * 1021;

  File::Delete(file_path);
  EXPECT_FALSE(File::IsRegular(file_path));

  {
    MMapFile file;
    EXPECT_EQ(0u, file.size());
    EXPECT_EQ(0u, file.offset());
    EXPECT_FALSE(file.is_valid());
    EXPECT_TRUE(file.create(file_path, file_size));
    EXPECT_TRUE(file.is_valid());
    EXPECT_TRUE(File::IsRegular(file_path));

    memset(file.region(), 0xff, file.size());
    file.close();
    file.warmup();
    EXPECT_FALSE(file.lock());
    EXPECT_FALSE(file.unlock());
  }
  // create again with exist file
  {
    MMapFile file;
    EXPECT_FALSE(file.is_valid());
    EXPECT_TRUE(file.create(file_path, file_size));
    EXPECT_TRUE(file.is_valid());
    EXPECT_FALSE(file.read_only());
    memset(file.region(), 0xff, file.size());
  }
  File::Delete(file_path);
}

TEST(MMapFile, Open) {
  const char *file_path = "mmap_file_open_testing.tmp";
  const char *file_path2 = "mmap_file_open_testing2.tmp";
  size_t file_size = 23 * 1022 * 1021;
  std::string raw_data;

  File::Delete(file_path);
  raw_data.resize(file_size, 0x74);

  // create a file
  {
    MMapFile file;
    EXPECT_TRUE(file.create(file_path, file_size));
    EXPECT_EQ(file_size, file.size());
    EXPECT_EQ(0u, file.offset());
    EXPECT_TRUE(File::IsRegular(file_path));
    file.warmup();
    file.lock();

    MMapFile file2 = std::move(file);
    memset(file2.region(), 0x74, file2.size());
    EXPECT_EQ(0, memcmp(file2.region(), raw_data.data(), raw_data.size()));
    file.flush();
    file2.lock();
  }

  File::Delete(file_path2);
  ASSERT_TRUE(File::Rename(file_path, file_path2));

  // open a file
  {
    MMapFile file;
    EXPECT_TRUE(File::IsRegular(file_path2));
    EXPECT_TRUE(file.open(file_path2, true));
    EXPECT_TRUE(file.read_only());
    EXPECT_EQ(0, memcmp(file.region(), raw_data.data(), raw_data.size()));
    file.lock();
  }
  {
    MMapFile file;
    MMapFile file2 = std::move(file);
    EXPECT_TRUE(file2.open(file_path2, false));

    EXPECT_FALSE(file.lock());
    EXPECT_FALSE(file.unlock());
    file2.warmup();
    file2.lock();
    file2.unlock();
  }
  // clean up
  File::Delete(file_path2);
}

TEST(MMapFile, ReadAndWrite) {
  const char *file_path = "mmap_file_read_testing.tmp";
  size_t file_size = 11 * 1022 * 1021;

  File::Delete(file_path);
  EXPECT_FALSE(File::IsRegular(file_path));

  MMapFile file;
  EXPECT_EQ(0u, file.size());
  EXPECT_EQ(0u, file.offset());
  EXPECT_FALSE(file.is_valid());
  EXPECT_TRUE(file.create(file_path, file_size));
  EXPECT_EQ(file_size, file.size());
  EXPECT_TRUE(file.is_valid());
  EXPECT_TRUE(File::IsRegular(file_path));

  char buf[] = "abcdefghijklmnopqrstuvwxyz";
  EXPECT_LT(sizeof(buf), file.size());
  EXPECT_EQ(sizeof(buf), file.write(buf, sizeof(buf)));
  EXPECT_EQ(0u, file.write(file_size + 2, buf, sizeof(buf)));

  std::string str;
  str.resize(sizeof(buf) - 1);
  EXPECT_EQ(str.size(), file.read(0, (uint8_t *)str.data(), str.size()));
  EXPECT_EQ(str, std::string(buf));

  EXPECT_EQ(11u, file.write(file_size - 11u, buf, sizeof(buf)));
  const void *p1 = nullptr;
  EXPECT_EQ(11u, file.read(file_size - 11u, &p1, sizeof(buf)));
  EXPECT_TRUE(!!p1);
  EXPECT_EQ(std::string((char *)p1, 11u), std::string(buf, 11u));

  EXPECT_EQ(sizeof(buf), file.offset());
  file.reset();
  EXPECT_EQ(0u, file.offset());

  std::string str2;
  str2.resize(sizeof(buf) - 1);
  EXPECT_EQ(str2.size(), file.read((uint8_t *)str2.data(), str2.size()));
  EXPECT_EQ(str, std::string(buf));

  const void *p2 = nullptr;
  file.reset();
  EXPECT_EQ(0u, file.read(file_size + 11u, &p2, sizeof(buf)));
  const void *p3 = nullptr;
  EXPECT_EQ(sizeof(buf), file.read(&p3, sizeof(buf)));
  EXPECT_EQ(std::string((char *)p3), std::string(buf));

  char dest[64];
  EXPECT_EQ(11u, file.read(file_size - 11u, dest, sizeof(dest)));
  EXPECT_EQ(std::string(dest, 11u), std::string(buf, 11u));

  File::Delete(file_path);
}


================================================
FILE: tests/ailego/logger/logger_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/parallel/thread_pool.h>

using namespace zvec;
using namespace zvec::ailego;

static void DoLogging() {
  static int log_count = 0;
  LOG_INFO("DoLogging: %d", ++log_count);
}

static void DoErrLogging() {
  static int err_log_count = 0;
  LOG_ERROR("DoErrLogging: %d", ++err_log_count);
}

TEST(IndexLogger, General) {
  ASSERT_TRUE(ailego::Factory<Logger>::Has("ConsoleLogger"));

  for (int i = 0; i < 10; ++i) {
    LoggerBroker::SetLevel(i);
    LOG_DEBUG("level: %d, %s", i, "LOG_DEBUG");
    LOG_INFO("level: %d, %s", i, "LOG_INFO");
    LOG_WARN("level: %d, %s", i, "LOG_WARN");
    LOG_ERROR("level: %d, %s", i, "LOG_ERROR");
    LOG_FATAL("level: %d, %s", i, "LOG_FATAL");
  }

  LoggerBroker::SetLevel(0);
  LOG_DEBUG("%s", std::string("LOG_DEBUG").c_str());
  LOG_INFO("%s", std::string("LOG_INFO").c_str());
  LOG_WARN("%s", std::string("LOG_WARN").c_str());
  LOG_ERROR("%s", std::string("LOG_ERROR").c_str());
  LOG_FATAL("%s", std::string("LOG_FATAL").c_str());

  ThreadPool pool;
  for (uint32_t i = 0; i < 20; ++i) {
    pool.enqueue(Closure::New(DoLogging));
  }
  for (uint32_t i = 0; i < 20; ++i) {
    pool.enqueue(Closure::New(DoErrLogging));
  }
  pool.wake_all();
  pool.wait_finish();

  LoggerBroker::Unregister();
  LOG_DEBUG("%s", "LOG_DEBUG");
  LOG_INFO("%s", "LOG_INFO");
  LOG_WARN("%s", "LOG_WARN");
  LOG_ERROR("%s", "LOG_ERROR");
  LOG_FATAL("%s", "LOG_FATAL");
}

================================================
FILE: tests/ailego/math/cosine_distance_matrix_fp16_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,
                            size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float CosineDistance(const FixedVector<Float16, N> &lhs,
                            const FixedVector<Float16, N> &rhs) {
  size_t dimension = lhs.size() + 2;

  float l_norm = 0.0f;
  Norm2Matrix<Float16, 1>::Compute(lhs.data(), N, &l_norm);

  float r_norm = 0.0f;
  Norm2Matrix<Float16, 1>::Compute(rhs.data(), N, &r_norm);

  std::string lhs_normed;

  lhs_normed.resize(dimension * sizeof(uint16_t));

  Float16 *lhs_buf = reinterpret_cast<Float16 *>(&(lhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    lhs_buf[i] = lhs[i] / l_norm;
  }
  ::memcpy(reinterpret_cast<uint16_t *>(&(lhs_normed[0])) + N, &l_norm,
           sizeof(float));

  std::string rhs_normed;

  rhs_normed.resize(dimension * sizeof(uint16_t));

  Float16 *rhs_buf = reinterpret_cast<Float16 *>(&(rhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    rhs_buf[i] = rhs[i] / r_norm;
  }
  ::memcpy(reinterpret_cast<uint16_t *>(&(rhs_normed[0])) + N, &r_norm,
           sizeof(float));

  return Distance::Cosine(reinterpret_cast<const Float16 *>(lhs_normed.data()),
                          reinterpret_cast<const Float16 *>(rhs_normed.data()),
                          dimension);
}

TEST(DistanceMatrix, Cosine_General) {
  const float epsilon = 1e-3;

  FixedVector<Float16, 2> a{1.0f, 1.0f}, b{1.0f, 1.0f};
  EXPECT_NEAR(0.0f, CosineDistance(a, b), epsilon);

  FixedVector<Float16, 3> c{0.2f, 0.9f, 0.6f}, d{0.3f, 0.5f, 0.7f};
  EXPECT_NEAR(0.072000861f, CosineDistance(c, d), epsilon);

  FixedVector<Float16, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                             5.2f, 2.1f, 7.1f, 6.8f, 1.2f},
      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};
  EXPECT_NEAR(0.28025103f, CosineDistance(e, f), epsilon);

  // FixedVector<Float16, 1> a{0.0f}, b{0.0f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(a, b));

  // FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(c, d));

  // FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(e, f));

  // FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f,
  // 0.3f}; EXPECT_FLOAT_EQ(0.0f, CosineDistance(g, h));

  // FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
  //     j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(i, j));

  // FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
  //     k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(l, k));

  // FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
  //     n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(m, n));

  // FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
  //     p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(o, p));

  // FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
  //                           0.5f, 0.6f, 0.7f, 0.8f},
  //     r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(q, r));

  // FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
  //                            0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
  //     t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  // EXPECT_FLOAT_EQ(0.0f, CosineDistance(s, t));

  // FixedVector<Float16, 11> u{0.0f},
  //     v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  // EXPECT_TRUE(MathHelper::IsAlmostEqual(3.84983f, CosineDistance(u, v),
  // 1000));

  // FixedVector<Float16, 12> w{0.0f},
  //     x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f,
  //     0.9f, 1.0f, 1.1f};
  // EXPECT_TRUE(MathHelper::IsAlmostEqual(5.05897f, CosineDistance(w, x),
  // 1000));

  // FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f,
  // 0.6f,
  //                                     0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  // EXPECT_TRUE(MathHelper::IsAlmostEqual(6.499438f, CosineDistance(y, z),
  // 1000));

  // FixedVector<Float16, 14> x14{0.0f},
  //     y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
  //         0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  // EXPECT_TRUE(
  //     MathHelper::IsAlmostEqual(10.49944f, CosineDistance(x14, y14), 1000));

  // FixedVector<Float16, 15> x15{0.0f},
  //     y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
  //         0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  // EXPECT_TRUE(
  //     MathHelper::IsAlmostEqual(19.49944f, CosineDistance(x15, y15), 1000));
}

#if 0
template <size_t M, size_t N>
void TestCosineMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      CosineDistanceMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  CosineDistanceMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));
  }
}

TEST(DistanceMatrix, Cosine_1x1) {
  TestCosineMatrix<1, 1>();
}

TEST(DistanceMatrix, Cosine_2x1) {
  TestCosineMatrix<2, 1>();
}

TEST(DistanceMatrix, Cosine_2x2) {
  TestCosineMatrix<2, 2>();
}

TEST(DistanceMatrix, Cosine_3x3) {
  TestCosineMatrix<3, 3>();
}

TEST(DistanceMatrix, Cosine_4x1) {
  TestCosineMatrix<4, 1>();
}

TEST(DistanceMatrix, Cosine_4x2) {
  TestCosineMatrix<4, 2>();
}

TEST(DistanceMatrix, Cosine_4x4) {
  TestCosineMatrix<4, 4>();
}

TEST(DistanceMatrix, Cosine_8x1) {
  TestCosineMatrix<8, 1>();
}

TEST(DistanceMatrix, Cosine_8x2) {
  TestCosineMatrix<8, 2>();
}

TEST(DistanceMatrix, Cosine_8x4) {
  TestCosineMatrix<8, 4>();
}

TEST(DistanceMatrix, Cosine_8x8) {
  TestCosineMatrix<8, 8>();
}

TEST(DistanceMatrix, Cosine_16x1) {
  TestCosineMatrix<16, 1>();
}

TEST(DistanceMatrix, Cosine_16x2) {
  TestCosineMatrix<16, 2>();
}

TEST(DistanceMatrix, Cosine_16x4) {
  TestCosineMatrix<16, 4>();
}

TEST(DistanceMatrix, Cosine_16x8) {
  TestCosineMatrix<16, 8>();
}

TEST(DistanceMatrix, Cosine_16x16) {
  TestCosineMatrix<16, 16>();
}

TEST(DistanceMatrix, Cosine_32x1) {
  TestCosineMatrix<32, 1>();
}

TEST(DistanceMatrix, Cosine_32x2) {
  TestCosineMatrix<32, 2>();
}

TEST(DistanceMatrix, Cosine_32x4) {
  TestCosineMatrix<32, 4>();
}

TEST(DistanceMatrix, Cosine_32x8) {
  TestCosineMatrix<32, 8>();
}

TEST(DistanceMatrix, Cosine_32x16) {
  TestCosineMatrix<32, 16>();
}

TEST(DistanceMatrix, Cosine_32x32) {
  TestCosineMatrix<32, 32>();
}

TEST(DistanceMatrix, Cosine_64x1) {
  TestCosineMatrix<64, 1>();
}

TEST(DistanceMatrix, Cosine_64x2) {
  TestCosineMatrix<64, 2>();
}

TEST(DistanceMatrix, Cosine_64x4) {
  TestCosineMatrix<64, 4>();
}

TEST(DistanceMatrix, Cosine_64x8) {
  TestCosineMatrix<64, 8>();
}

TEST(DistanceMatrix, Cosine_64x16) {
  TestCosineMatrix<64, 16>();
}

TEST(DistanceMatrix, Cosine_64x32) {
  TestCosineMatrix<64, 32>();
}

TEST(DistanceMatrix, Cosine_64x64) {
  TestCosineMatrix<64, 64>();
}

TEST(DistanceMatrix, Cosine_128x1) {
  TestCosineMatrix<128, 1>();
}

TEST(DistanceMatrix, Cosine_128x2) {
  TestCosineMatrix<128, 2>();
}

TEST(DistanceMatrix, Cosine_128x4) {
  TestCosineMatrix<128, 4>();
}

TEST(DistanceMatrix, Cosine_128x8) {
  TestCosineMatrix<128, 8>();
}

TEST(DistanceMatrix, Cosine_128x16) {
  TestCosineMatrix<128, 16>();
}

TEST(DistanceMatrix, Cosine_128x32) {
  TestCosineMatrix<128, 32>();
}

TEST(DistanceMatrix, Cosine_128x64) {
  TestCosineMatrix<128, 64>();
}

TEST(DistanceMatrix, Cosine_128x128) {
  TestCosineMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void CosineBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      CosineDistanceMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    CosineDistanceMatrix<Float16, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        CosineDistanceMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {
  CosineBenchmark<2, 1, 512, 64>();
  CosineBenchmark<2, 2, 512, 64>();
  CosineBenchmark<4, 1, 512, 64>();
  CosineBenchmark<4, 2, 512, 64>();
  CosineBenchmark<4, 4, 512, 64>();
  CosineBenchmark<8, 1, 512, 64>();
  CosineBenchmark<8, 2, 512, 64>();
  CosineBenchmark<8, 4, 512, 64>();
  CosineBenchmark<8, 8, 512, 64>();
  CosineBenchmark<16, 1, 512, 64>();
  CosineBenchmark<16, 2, 512, 64>();
  CosineBenchmark<16, 4, 512, 64>();
  CosineBenchmark<16, 8, 512, 64>();
  CosineBenchmark<16, 16, 512, 64>();
  CosineBenchmark<32, 1, 512, 64>();
  CosineBenchmark<32, 2, 512, 64>();
  CosineBenchmark<32, 4, 512, 64>();
  CosineBenchmark<32, 8, 512, 64>();
  CosineBenchmark<32, 16, 512, 64>();
  CosineBenchmark<32, 32, 512, 64>();
  CosineBenchmark<64, 1, 512, 64>();
  CosineBenchmark<64, 2, 512, 64>();
  CosineBenchmark<64, 4, 512, 64>();
  CosineBenchmark<64, 8, 512, 64>();
  CosineBenchmark<128, 1, 512, 64>();
  CosineBenchmark<1, 1, 1024, 256>();
}

#endif


================================================
FILE: tests/ailego/math/cosine_distance_matrix_fp32_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

inline void MatrixTranspose(float *dst, const float *src, size_t M, size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

template <size_t N>
static float CosineDistance(const FixedVector<float, N> &lhs,
                            const FixedVector<float, N> &rhs) {
  size_t dimension = lhs.size() + 1;

  float l_norm = 0.0f;
  Norm2Matrix<float, 1>::Compute(lhs.data(), N, &l_norm);

  float r_norm = 0.0f;
  Norm2Matrix<float, 1>::Compute(rhs.data(), N, &r_norm);

  std::string lhs_normed;

  lhs_normed.resize(dimension * sizeof(float));

  float *lhs_buf = reinterpret_cast<float *>(&(lhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    lhs_buf[i] = lhs[i] / l_norm;
  }
  lhs_buf[N] = l_norm;

  std::string rhs_normed;

  rhs_normed.resize(dimension * sizeof(float));

  float *rhs_buf = reinterpret_cast<float *>(&(rhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    rhs_buf[i] = rhs[i] / r_norm;
  }
  rhs_buf[N] = r_norm;

  return Distance::Cosine(reinterpret_cast<const float *>(lhs_normed.data()),
                          reinterpret_cast<const float *>(rhs_normed.data()),
                          dimension);
}

TEST(DistanceMatrix, Cosine_General) {
  const float epsilon = 1e-3;

  FixedVector<float, 2> a{0.2f, 0.9f}, b{0.3f, 0.5f};

  EXPECT_NEAR(0.05131668f, CosineDistance(a, b), epsilon);

  FixedVector<float, 3> c{0.2f, 0.9f, 0.6f}, d{0.3f, 0.5f, 0.7f};

  EXPECT_NEAR(0.07199293f, CosineDistance(c, d), epsilon);

  FixedVector<float, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                           5.2f, 2.1f, 7.1f, 6.8f, 1.2f},
      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};

  EXPECT_NEAR(0.2803060f, CosineDistance(e, f), epsilon);
}

#if 0
template <size_t M, size_t N>
void TestCosineMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  // size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t dimension = 4;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }

  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      CosineDistanceMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }

  CosineDistanceMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));
  }
}

TEST(DistanceMatrix, Cosine_1x1) {
  TestCosineMatrix<1, 1>();
}

TEST(DistanceMatrix, Cosine_2x1) {
  TestCosineMatrix<2, 1>();
}

TEST(DistanceMatrix, Cosine_2x2) {
  TestCosineMatrix<2, 2>();
}

TEST(DistanceMatrix, Cosine_3x3) {
  TestCosineMatrix<3, 3>();
}

TEST(DistanceMatrix, Cosine_4x1) {
  TestCosineMatrix<4, 1>();
}

TEST(DistanceMatrix, Cosine_4x2) {
  TestCosineMatrix<4, 2>();
}

TEST(DistanceMatrix, Cosine_4x4) {
  TestCosineMatrix<4, 4>();
}

TEST(DistanceMatrix, Cosine_8x1) {
  TestCosineMatrix<8, 1>();
}

TEST(DistanceMatrix, Cosine_8x2) {
  TestCosineMatrix<8, 2>();
}

TEST(DistanceMatrix, Cosine_8x4) {
  TestCosineMatrix<8, 4>();
}

TEST(DistanceMatrix, Cosine_8x8) {
  TestCosineMatrix<8, 8>();
}

TEST(DistanceMatrix, Cosine_16x1) {
  TestCosineMatrix<16, 1>();
}

TEST(DistanceMatrix, Cosine_16x2) {
  TestCosineMatrix<16, 2>();
}

TEST(DistanceMatrix, Cosine_16x4) {
  TestCosineMatrix<16, 4>();
}

TEST(DistanceMatrix, Cosine_16x8) {
  TestCosineMatrix<16, 8>();
}

TEST(DistanceMatrix, Cosine_16x16) {
  TestCosineMatrix<16, 16>();
}

TEST(DistanceMatrix, Cosine_32x1) {
  TestCosineMatrix<32, 1>();
}

TEST(DistanceMatrix, Cosine_32x2) {
  TestCosineMatrix<32, 2>();
}

TEST(DistanceMatrix, Cosine_32x4) {
  TestCosineMatrix<32, 4>();
}

TEST(DistanceMatrix, Cosine_32x8) {
  TestCosineMatrix<32, 8>();
}

TEST(DistanceMatrix, Cosine_32x16) {
  TestCosineMatrix<32, 16>();
}

TEST(DistanceMatrix, Cosine_32x32) {
  TestCosineMatrix<32, 32>();
}

TEST(DistanceMatrix, Cosine_64x1) {
  TestCosineMatrix<64, 1>();
}

TEST(DistanceMatrix, Cosine_64x2) {
  TestCosineMatrix<64, 2>();
}

TEST(DistanceMatrix, Cosine_64x4) {
  TestCosineMatrix<64, 4>();
}

TEST(DistanceMatrix, Cosine_64x8) {
  TestCosineMatrix<64, 8>();
}

TEST(DistanceMatrix, Cosine_64x16) {
  TestCosineMatrix<64, 16>();
}

TEST(DistanceMatrix, Cosine_64x32) {
  TestCosineMatrix<64, 32>();
}

TEST(DistanceMatrix, Cosine_64x64) {
  TestCosineMatrix<64, 64>();
}

TEST(DistanceMatrix, Cosine_128x1) {
  TestCosineMatrix<128, 1>();
}

TEST(DistanceMatrix, Cosine_128x2) {
  TestCosineMatrix<128, 2>();
}

TEST(DistanceMatrix, Cosine_128x4) {
  TestCosineMatrix<128, 4>();
}

TEST(DistanceMatrix, Cosine_128x8) {
  TestCosineMatrix<128, 8>();
}

TEST(DistanceMatrix, Cosine_128x16) {
  TestCosineMatrix<128, 16>();
}

TEST(DistanceMatrix, Cosine_128x32) {
  TestCosineMatrix<128, 32>();
}

TEST(DistanceMatrix, Cosine_128x64) {
  TestCosineMatrix<128, 64>();
}

TEST(DistanceMatrix, Cosine_128x128) {
  TestCosineMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void CosineBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      CosineDistanceMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    CosineDistanceMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        CosineDistanceMatrix<float, 1, 1>::Compute(&matrix_batch[k * dimension],
                                                   current_query, dimension,
                                                   &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {
  CosineBenchmark<2, 1, 512, 64>();
  CosineBenchmark<2, 2, 512, 64>();
  CosineBenchmark<4, 1, 512, 64>();
  CosineBenchmark<4, 2, 512, 64>();
  CosineBenchmark<4, 4, 512, 64>();
  CosineBenchmark<8, 1, 512, 64>();
  CosineBenchmark<8, 2, 512, 64>();
  CosineBenchmark<8, 4, 512, 64>();
  CosineBenchmark<8, 8, 512, 64>();
  CosineBenchmark<16, 1, 512, 64>();
  CosineBenchmark<16, 2, 512, 64>();
  CosineBenchmark<16, 4, 512, 64>();
  CosineBenchmark<16, 8, 512, 64>();
  CosineBenchmark<16, 16, 512, 64>();
  CosineBenchmark<32, 1, 512, 64>();
  CosineBenchmark<32, 2, 512, 64>();
  CosineBenchmark<32, 4, 512, 64>();
  CosineBenchmark<32, 8, 512, 64>();
  CosineBenchmark<32, 16, 512, 64>();
  CosineBenchmark<32, 32, 512, 64>();
  CosineBenchmark<64, 1, 512, 64>();
  CosineBenchmark<64, 2, 512, 64>();
  CosineBenchmark<64, 4, 512, 64>();
  CosineBenchmark<64, 8, 512, 64>();
  CosineBenchmark<128, 1, 512, 64>();
  CosineBenchmark<1, 1, 1024, 256>();
}

#endif

================================================
FILE: tests/ailego/math/cosine_distance_matrix_int8_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                            size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float CosineDistance(const FixedVector<int8_t, N> &lhs,
                            const FixedVector<int8_t, N> &rhs) {
  size_t dimension = lhs.size() + 4;

  float l_norm = 0.0f;
  Norm2Matrix<int8_t, 1>::Compute(lhs.data(), N, &l_norm);

  float r_norm = 0.0f;
  Norm2Matrix<int8_t, 1>::Compute(rhs.data(), N, &r_norm);

  std::string lhs_normed;

  lhs_normed.resize(dimension * sizeof(int8_t));

  int8_t *lhs_buf = reinterpret_cast<int8_t *>(&(lhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    lhs_buf[i] = lhs[i] / l_norm;
  }
  ::memcpy(reinterpret_cast<int8_t *>(&(lhs_normed[0])) + N, &l_norm,
           sizeof(float));

  std::string rhs_normed;

  rhs_normed.resize(dimension * sizeof(int8_t));

  int8_t *rhs_buf = reinterpret_cast<int8_t *>(&(rhs_normed[0]));

  for (size_t i = 0; i < N; ++i) {
    rhs_buf[i] = rhs[i] / r_norm;
  }
  ::memcpy(reinterpret_cast<int8_t *>(&(rhs_normed[0])) + N, &r_norm,
           sizeof(float));

  return Distance::Cosine(reinterpret_cast<const int8_t *>(lhs_normed.data()),
                          reinterpret_cast<const int8_t *>(rhs_normed.data()),
                          dimension);
}

#if 0

TEST(DistanceMatrix, Cosine_General) {
  int8_t a8[] = {127, 0, 1, 2, -127, -127, -127, -127};
  int8_t b8[] = {-127, -127, -127, -127, 1, 2, 1, 127};
  int8_t a16[] = {127, 127, 16,   3,   100,  -127, 1,    2,
                  3,   4,   -127, 100, -127, -127, -127, -127};
  int8_t b16[] = {-127, 123, -127, -127, -127, -127, 127, 127,
                  1,    2,   3,    4,    127,  127,  121, 16};
  int8_t a32[] = {127, 127,  0,    0,   -127, -127, 0,    0,    0,    0, 0,
                  0,   -127, -127, 127, 127,  0,    0,    -127, -127, 0, 0,
                  127, 127,  127,  127, 0,    0,    -127, -127, 0,    0};
  int8_t b32[] = {-127, -127, 0,    0,    127,  127, 0,   0,   0,   0, 0,
                  0,    127,  127,  -127, -127, 0,   0,   127, 127, 0, 0,
                  -127, -127, -127, -127, 0,    0,   127, 127, 0,   0};

  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,
                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,
                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,
                  0,   0, -127, 112,  -127, -127, -127, -127, 127,  127,
                  1,   2, 3,    4,    127,  127,  120};
  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,
                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,
                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,
                  0,    0, 127,  127, 80,   111,  122, -127, 1,   2,
                  3,    4, -127, 112, -127, -127, -127};

  EXPECT_FLOAT_EQ(1.4109956f,
                  CosineDistance(*FixedVector<int8_t, 8>::Cast(a8),
                                 *FixedVector<int8_t, 8>::Cast(b8)));
  EXPECT_FLOAT_EQ(1.3013078f,
                  CosineDistance(*FixedVector<int8_t, 16>::Cast(a16),
                                 *FixedVector<int8_t, 16>::Cast(b16)));
  EXPECT_FLOAT_EQ(2.0f, CosineDistance(*FixedVector<int8_t, 32>::Cast(a32),
                                       *FixedVector<int8_t, 32>::Cast(b32)));
  EXPECT_FLOAT_EQ(1.7623165f,
                  CosineDistance(*FixedVector<int8_t, 47>::Cast(a47),
                                 *FixedVector<int8_t, 47>::Cast(b47)));
}

template <size_t M, size_t N>
void TestCosineMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      CosineDistanceMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  CosineDistanceMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, Cosine_1x1) {
  TestCosineMatrix<1, 1>();
}

TEST(DistanceMatrix, Cosine_2x1) {
  TestCosineMatrix<2, 1>();
}

TEST(DistanceMatrix, Cosine_2x2) {
  TestCosineMatrix<2, 2>();
}

TEST(DistanceMatrix, Cosine_3x3) {
  TestCosineMatrix<3, 3>();
}

TEST(DistanceMatrix, Cosine_4x1) {
  TestCosineMatrix<4, 1>();
}

TEST(DistanceMatrix, Cosine_4x2) {
  TestCosineMatrix<4, 2>();
}

TEST(DistanceMatrix, Cosine_4x4) {
  TestCosineMatrix<4, 4>();
}

TEST(DistanceMatrix, Cosine_8x1) {
  TestCosineMatrix<8, 1>();
}

TEST(DistanceMatrix, Cosine_8x2) {
  TestCosineMatrix<8, 2>();
}

TEST(DistanceMatrix, Cosine_8x4) {
  TestCosineMatrix<8, 4>();
}

TEST(DistanceMatrix, Cosine_8x8) {
  TestCosineMatrix<8, 8>();
}

TEST(DistanceMatrix, Cosine_16x1) {
  TestCosineMatrix<16, 1>();
}

TEST(DistanceMatrix, Cosine_16x2) {
  TestCosineMatrix<16, 2>();
}

TEST(DistanceMatrix, Cosine_16x4) {
  TestCosineMatrix<16, 4>();
}

TEST(DistanceMatrix, Cosine_16x8) {
  TestCosineMatrix<16, 8>();
}

TEST(DistanceMatrix, Cosine_16x16) {
  TestCosineMatrix<16, 16>();
}

TEST(DistanceMatrix, Cosine_32x1) {
  TestCosineMatrix<32, 1>();
}

TEST(DistanceMatrix, Cosine_32x2) {
  TestCosineMatrix<32, 2>();
}

TEST(DistanceMatrix, Cosine_32x4) {
  TestCosineMatrix<32, 4>();
}

TEST(DistanceMatrix, Cosine_32x8) {
  TestCosineMatrix<32, 8>();
}

TEST(DistanceMatrix, Cosine_32x16) {
  TestCosineMatrix<32, 16>();
}

TEST(DistanceMatrix, Cosine_32x32) {
  TestCosineMatrix<32, 32>();
}

TEST(DistanceMatrix, Cosine_64x1) {
  TestCosineMatrix<64, 1>();
}

TEST(DistanceMatrix, Cosine_64x2) {
  TestCosineMatrix<64, 2>();
}

TEST(DistanceMatrix, Cosine_64x4) {
  TestCosineMatrix<64, 4>();
}

TEST(DistanceMatrix, Cosine_64x8) {
  TestCosineMatrix<64, 8>();
}

TEST(DistanceMatrix, Cosine_64x16) {
  TestCosineMatrix<64, 16>();
}

TEST(DistanceMatrix, Cosine_64x32) {
  TestCosineMatrix<64, 32>();
}

TEST(DistanceMatrix, Cosine_64x64) {
  TestCosineMatrix<64, 128>();
}

TEST(DistanceMatrix, Cosine_128x1) {
  TestCosineMatrix<128, 1>();
}

TEST(DistanceMatrix, Cosine_128x2) {
  TestCosineMatrix<128, 2>();
}

TEST(DistanceMatrix, Cosine_128x4) {
  TestCosineMatrix<128, 4>();
}

TEST(DistanceMatrix, Cosine_128x8) {
  TestCosineMatrix<128, 8>();
}

TEST(DistanceMatrix, Cosine_128x16) {
  TestCosineMatrix<128, 16>();
}

TEST(DistanceMatrix, Cosine_128x32) {
  TestCosineMatrix<128, 32>();
}

TEST(DistanceMatrix, Cosine_128x64) {
  TestCosineMatrix<128, 128>();
}

TEST(DistanceMatrix, Cosine_128x128) {
  TestCosineMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void CosineBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      CosineDistanceMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    CosineDistanceMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Cosine
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        CosineDistanceMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Cosine (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(DistanceMatrix, DISABLED_Cosine_Benchmark) {
  CosineBenchmark<2, 1, 512, 128>();
  CosineBenchmark<2, 2, 512, 128>();
  CosineBenchmark<4, 1, 512, 128>();
  CosineBenchmark<4, 2, 512, 128>();
  CosineBenchmark<4, 4, 512, 128>();
  CosineBenchmark<8, 1, 512, 128>();
  CosineBenchmark<8, 2, 512, 128>();
  CosineBenchmark<8, 4, 512, 128>();
  CosineBenchmark<8, 8, 512, 128>();
  CosineBenchmark<16, 1, 512, 128>();
  CosineBenchmark<16, 2, 512, 128>();
  CosineBenchmark<16, 4, 512, 128>();
  CosineBenchmark<16, 8, 512, 128>();
  CosineBenchmark<16, 16, 512, 128>();
  CosineBenchmark<32, 1, 512, 128>();
  CosineBenchmark<32, 2, 512, 128>();
  CosineBenchmark<32, 4, 512, 128>();
  CosineBenchmark<32, 8, 512, 128>();
  CosineBenchmark<32, 16, 512, 128>();
  CosineBenchmark<32, 32, 512, 128>();
  CosineBenchmark<64, 1, 512, 128>();
  CosineBenchmark<64, 2, 512, 128>();
  CosineBenchmark<64, 4, 512, 128>();
  CosineBenchmark<64, 8, 512, 128>();
  CosineBenchmark<128, 1, 512, 128>();
  CosineBenchmark<1, 1, 1024, 256>();
}

#endif

================================================
FILE: tests/ailego/math/euclidean_distance_matrix_fp16_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float EuclideanDistance(const FixedVector<Float16, N> &lhs,
                               const FixedVector<Float16, N> &rhs) {
  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float SquaredEuclideanDistance(const FixedVector<Float16, N> &lhs,
                                      const FixedVector<Float16, N> &rhs) {
  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, Euclidean_General) {
  FixedVector<Float16, 1> a{0.0f}, b{0.0f};
  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));

  FixedVector<Float16, 3> c{1.0f, 2.0f, 3.0f}, d{2.0f, 4.0f, 6.0f};
  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));

  FixedVector<Float16, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                             5.2f, 2.1f, 7.1f, 6.8f, 1.2f},
      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};
  EXPECT_TRUE(
      MathHelper::IsAlmostEqual(8.86837f, EuclideanDistance(e, f), 1000));
}

TEST(DistanceMatrix, SquaredEuclidean_General) {
  FixedVector<Float16, 1> a{0.0f}, b{0.0f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(a, b));

  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(c, d));

  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(e, f));

  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(g, h));

  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(i, j));

  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(l, k));

  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(m, n));

  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(o, p));

  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                            0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(q, r));

  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(s, t));

  FixedVector<Float16, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(3.84983f,
                                        SquaredEuclideanDistance(u, v), 1000));

  FixedVector<Float16, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(5.05897f,
                                        SquaredEuclideanDistance(w, x), 1000));

  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(6.499438f,
                                        SquaredEuclideanDistance(y, z), 1000));

  FixedVector<Float16, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      10.49944f, SquaredEuclideanDistance(x14, y14), 1000));

  FixedVector<Float16, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      19.49944f, SquaredEuclideanDistance(x15, y15), 1000));
}

template <size_t M, size_t N>
void TestEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(32, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      EuclideanDistanceMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  EuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));
  }
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(32, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  SquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));
  }
}

TEST(DistanceMatrix, Euclidean_1x1) {
  TestEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, Euclidean_2x1) {
  TestEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, Euclidean_2x2) {
  TestEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, Euclidean_3x3) {
  TestEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, Euclidean_4x1) {
  TestEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, Euclidean_4x2) {
  TestEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, Euclidean_4x4) {
  TestEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, Euclidean_8x1) {
  TestEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, Euclidean_8x2) {
  TestEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, Euclidean_8x4) {
  TestEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, Euclidean_8x8) {
  TestEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, Euclidean_16x1) {
  TestEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, Euclidean_16x2) {
  TestEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, Euclidean_16x4) {
  TestEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, Euclidean_16x8) {
  TestEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, Euclidean_16x16) {
  TestEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, Euclidean_32x1) {
  TestEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, Euclidean_32x2) {
  TestEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, Euclidean_32x4) {
  TestEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, Euclidean_32x8) {
  TestEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, Euclidean_32x16) {
  TestEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, Euclidean_32x32) {
  TestEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, Euclidean_64x1) {
  TestEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, Euclidean_64x2) {
  TestEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, Euclidean_64x4) {
  TestEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, Euclidean_64x8) {
  TestEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, Euclidean_64x16) {
  TestEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, Euclidean_64x32) {
  TestEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, Euclidean_64x64) {
  TestEuclideanMatrix<64, 64>();
}

TEST(DistanceMatrix, Euclidean_128x1) {
  TestEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, Euclidean_128x2) {
  TestEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, Euclidean_128x4) {
  TestEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, Euclidean_128x8) {
  TestEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, Euclidean_128x16) {
  TestEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, Euclidean_128x32) {
  TestEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, Euclidean_128x64) {
  TestEuclideanMatrix<128, 64>();
}

TEST(DistanceMatrix, Euclidean_128x128) {
  TestEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_1x1) {
  TestSquaredEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x1) {
  TestSquaredEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x2) {
  TestSquaredEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_3x3) {
  TestSquaredEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x1) {
  TestSquaredEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x2) {
  TestSquaredEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x4) {
  TestSquaredEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x1) {
  TestSquaredEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x2) {
  TestSquaredEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x4) {
  TestSquaredEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x8) {
  TestSquaredEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x1) {
  TestSquaredEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x2) {
  TestSquaredEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x4) {
  TestSquaredEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x8) {
  TestSquaredEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x16) {
  TestSquaredEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x1) {
  TestSquaredEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x2) {
  TestSquaredEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x4) {
  TestSquaredEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x8) {
  TestSquaredEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x16) {
  TestSquaredEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x32) {
  TestSquaredEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x1) {
  TestSquaredEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x2) {
  TestSquaredEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x4) {
  TestSquaredEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x8) {
  TestSquaredEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x16) {
  TestSquaredEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x32) {
  TestSquaredEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x64) {
  TestSquaredEuclideanMatrix<64, 64>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x1) {
  TestSquaredEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x2) {
  TestSquaredEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x4) {
  TestSquaredEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x8) {
  TestSquaredEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x16) {
  TestSquaredEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x32) {
  TestSquaredEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x64) {
  TestSquaredEuclideanMatrix<128, 64>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x128) {
  TestSquaredEuclideanMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void EuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      EuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    EuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        EuclideanDistanceMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void SquaredEuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      SquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    SquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {
  EuclideanBenchmark<2, 1, 512, 64>();
  EuclideanBenchmark<2, 2, 512, 64>();
  EuclideanBenchmark<4, 1, 512, 64>();
  EuclideanBenchmark<4, 2, 512, 64>();
  EuclideanBenchmark<4, 4, 512, 64>();
  EuclideanBenchmark<8, 1, 512, 64>();
  EuclideanBenchmark<8, 2, 512, 64>();
  EuclideanBenchmark<8, 4, 512, 64>();
  EuclideanBenchmark<8, 8, 512, 64>();
  EuclideanBenchmark<16, 1, 512, 64>();
  EuclideanBenchmark<16, 2, 512, 64>();
  EuclideanBenchmark<16, 4, 512, 64>();
  EuclideanBenchmark<16, 8, 512, 64>();
  EuclideanBenchmark<16, 16, 512, 64>();
  EuclideanBenchmark<32, 1, 512, 64>();
  EuclideanBenchmark<32, 2, 512, 64>();
  EuclideanBenchmark<32, 4, 512, 64>();
  EuclideanBenchmark<32, 8, 512, 64>();
  EuclideanBenchmark<32, 16, 512, 64>();
  EuclideanBenchmark<32, 32, 512, 64>();
  EuclideanBenchmark<64, 1, 512, 64>();
  EuclideanBenchmark<64, 2, 512, 64>();
  EuclideanBenchmark<64, 4, 512, 64>();
  EuclideanBenchmark<64, 8, 512, 64>();
  EuclideanBenchmark<128, 1, 512, 64>();
  EuclideanBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {
  SquaredEuclideanBenchmark<2, 1, 512, 64>();
  SquaredEuclideanBenchmark<2, 2, 512, 64>();
  SquaredEuclideanBenchmark<4, 1, 512, 64>();
  SquaredEuclideanBenchmark<4, 2, 512, 64>();
  SquaredEuclideanBenchmark<4, 4, 512, 64>();
  SquaredEuclideanBenchmark<8, 1, 512, 64>();
  SquaredEuclideanBenchmark<8, 2, 512, 64>();
  SquaredEuclideanBenchmark<8, 4, 512, 64>();
  SquaredEuclideanBenchmark<8, 8, 512, 64>();
  SquaredEuclideanBenchmark<16, 1, 512, 64>();
  SquaredEuclideanBenchmark<16, 2, 512, 64>();
  SquaredEuclideanBenchmark<16, 4, 512, 64>();
  SquaredEuclideanBenchmark<16, 8, 512, 64>();
  SquaredEuclideanBenchmark<16, 16, 512, 64>();
  SquaredEuclideanBenchmark<32, 1, 512, 64>();
  SquaredEuclideanBenchmark<32, 2, 512, 64>();
  SquaredEuclideanBenchmark<32, 4, 512, 64>();
  SquaredEuclideanBenchmark<32, 8, 512, 64>();
  SquaredEuclideanBenchmark<32, 16, 512, 64>();
  SquaredEuclideanBenchmark<32, 32, 512, 64>();
  SquaredEuclideanBenchmark<64, 1, 512, 64>();
  SquaredEuclideanBenchmark<64, 2, 512, 64>();
  SquaredEuclideanBenchmark<64, 4, 512, 64>();
  SquaredEuclideanBenchmark<64, 8, 512, 64>();
  SquaredEuclideanBenchmark<128, 1, 512, 64>();
  SquaredEuclideanBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_Euclidean_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 10000LLU;

  std::vector<Float16> data(dimension);
  std::vector<Float16> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    EuclideanDistanceMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],
                                                    dimension, &result);
  }
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 10000LLU;

  std::vector<Float16> data(dimension);
  std::vector<Float16> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    SquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],
                                                           dimension, &result);
  }
}


================================================
FILE: tests/ailego/math/euclidean_distance_matrix_fp32_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(float *dst, const float *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

template <size_t N>
static float EuclideanDistance(const FixedVector<float, N> &lhs,
                               const FixedVector<float, N> &rhs) {
  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float SquaredEuclideanDistance(const FixedVector<float, N> &lhs,
                                      const FixedVector<float, N> &rhs) {
  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, Euclidean_General) {
  FixedVector<float, 1> a{0.0f}, b{0.0f};
  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));

  FixedVector<float, 3> c{1.0f, 2.0f, 3.0f}, d{2.0f, 4.0f, 6.0f};
  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));

  FixedVector<float, 11> e{1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                           5.2f, 2.1f, 7.1f, 6.8f, 1.2f},
      f{2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f, 1.0f, 2.3f, 3.4f, 4.5f, 6.4f};
  EXPECT_FLOAT_EQ(8.86905f, EuclideanDistance(e, f));
}

TEST(DistanceMatrix, SquaredEuclidean_General) {
  FixedVector<float, 1> a{0.0f}, b{0.0f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(a, b));

  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(c, d));

  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(e, f));

  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(g, h));

  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(i, j));

  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(l, k));

  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(m, n));

  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(o, p));

  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(q, r));

  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_FLOAT_EQ(0.0f, SquaredEuclideanDistance(s, t));

  FixedVector<float, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_FLOAT_EQ(3.85f, SquaredEuclideanDistance(u, v));

  FixedVector<float, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_FLOAT_EQ(5.06f, SquaredEuclideanDistance(w, x));

  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_FLOAT_EQ(6.5f, SquaredEuclideanDistance(y, z));

  FixedVector<float, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_FLOAT_EQ(10.5f, SquaredEuclideanDistance(x14, y14));

  FixedVector<float, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_FLOAT_EQ(19.5f, SquaredEuclideanDistance(x15, y15));
}

template <size_t M, size_t N>
void TestEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      EuclideanDistanceMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  EuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));
  }
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  SquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));
  }
}

TEST(DistanceMatrix, Euclidean_1x1) {
  TestEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, Euclidean_2x1) {
  TestEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, Euclidean_2x2) {
  TestEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, Euclidean_3x3) {
  TestEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, Euclidean_4x1) {
  TestEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, Euclidean_4x2) {
  TestEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, Euclidean_4x4) {
  TestEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, Euclidean_8x1) {
  TestEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, Euclidean_8x2) {
  TestEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, Euclidean_8x4) {
  TestEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, Euclidean_8x8) {
  TestEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, Euclidean_16x1) {
  TestEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, Euclidean_16x2) {
  TestEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, Euclidean_16x4) {
  TestEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, Euclidean_16x8) {
  TestEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, Euclidean_16x16) {
  TestEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, Euclidean_32x1) {
  TestEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, Euclidean_32x2) {
  TestEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, Euclidean_32x4) {
  TestEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, Euclidean_32x8) {
  TestEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, Euclidean_32x16) {
  TestEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, Euclidean_32x32) {
  TestEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, Euclidean_64x1) {
  TestEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, Euclidean_64x2) {
  TestEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, Euclidean_64x4) {
  TestEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, Euclidean_64x8) {
  TestEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, Euclidean_64x16) {
  TestEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, Euclidean_64x32) {
  TestEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, Euclidean_64x64) {
  TestEuclideanMatrix<64, 64>();
}

TEST(DistanceMatrix, Euclidean_128x1) {
  TestEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, Euclidean_128x2) {
  TestEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, Euclidean_128x4) {
  TestEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, Euclidean_128x8) {
  TestEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, Euclidean_128x16) {
  TestEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, Euclidean_128x32) {
  TestEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, Euclidean_128x64) {
  TestEuclideanMatrix<128, 64>();
}

TEST(DistanceMatrix, Euclidean_128x128) {
  TestEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_1x1) {
  TestSquaredEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x1) {
  TestSquaredEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x2) {
  TestSquaredEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_3x3) {
  TestSquaredEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x1) {
  TestSquaredEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x2) {
  TestSquaredEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x4) {
  TestSquaredEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x1) {
  TestSquaredEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x2) {
  TestSquaredEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x4) {
  TestSquaredEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x8) {
  TestSquaredEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x1) {
  TestSquaredEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x2) {
  TestSquaredEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x4) {
  TestSquaredEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x8) {
  TestSquaredEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x16) {
  TestSquaredEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x1) {
  TestSquaredEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x2) {
  TestSquaredEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x4) {
  TestSquaredEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x8) {
  TestSquaredEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x16) {
  TestSquaredEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x32) {
  TestSquaredEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x1) {
  TestSquaredEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x2) {
  TestSquaredEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x4) {
  TestSquaredEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x8) {
  TestSquaredEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x16) {
  TestSquaredEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x32) {
  TestSquaredEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x64) {
  TestSquaredEuclideanMatrix<64, 64>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x1) {
  TestSquaredEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x2) {
  TestSquaredEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x4) {
  TestSquaredEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x8) {
  TestSquaredEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x16) {
  TestSquaredEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x32) {
  TestSquaredEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x64) {
  TestSquaredEuclideanMatrix<128, 64>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x128) {
  TestSquaredEuclideanMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void EuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      EuclideanDistanceMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    EuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        EuclideanDistanceMatrix<float, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void SquaredEuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      SquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    SquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {
  EuclideanBenchmark<2, 1, 512, 64>();
  EuclideanBenchmark<2, 2, 512, 64>();
  EuclideanBenchmark<4, 1, 512, 64>();
  EuclideanBenchmark<4, 2, 512, 64>();
  EuclideanBenchmark<4, 4, 512, 64>();
  EuclideanBenchmark<8, 1, 512, 64>();
  EuclideanBenchmark<8, 2, 512, 64>();
  EuclideanBenchmark<8, 4, 512, 64>();
  EuclideanBenchmark<8, 8, 512, 64>();
  EuclideanBenchmark<16, 1, 512, 64>();
  EuclideanBenchmark<16, 2, 512, 64>();
  EuclideanBenchmark<16, 4, 512, 64>();
  EuclideanBenchmark<16, 8, 512, 64>();
  EuclideanBenchmark<16, 16, 512, 64>();
  EuclideanBenchmark<32, 1, 512, 64>();
  EuclideanBenchmark<32, 2, 512, 64>();
  EuclideanBenchmark<32, 4, 512, 64>();
  EuclideanBenchmark<32, 8, 512, 64>();
  EuclideanBenchmark<32, 16, 512, 64>();
  EuclideanBenchmark<32, 32, 512, 64>();
  EuclideanBenchmark<64, 1, 512, 64>();
  EuclideanBenchmark<64, 2, 512, 64>();
  EuclideanBenchmark<64, 4, 512, 64>();
  EuclideanBenchmark<64, 8, 512, 64>();
  EuclideanBenchmark<128, 1, 512, 64>();
  EuclideanBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {
  SquaredEuclideanBenchmark<2, 1, 512, 64>();
  SquaredEuclideanBenchmark<2, 2, 512, 64>();
  SquaredEuclideanBenchmark<4, 1, 512, 64>();
  SquaredEuclideanBenchmark<4, 2, 512, 64>();
  SquaredEuclideanBenchmark<4, 4, 512, 64>();
  SquaredEuclideanBenchmark<8, 1, 512, 64>();
  SquaredEuclideanBenchmark<8, 2, 512, 64>();
  SquaredEuclideanBenchmark<8, 4, 512, 64>();
  SquaredEuclideanBenchmark<8, 8, 512, 64>();
  SquaredEuclideanBenchmark<16, 1, 512, 64>();
  SquaredEuclideanBenchmark<16, 2, 512, 64>();
  SquaredEuclideanBenchmark<16, 4, 512, 64>();
  SquaredEuclideanBenchmark<16, 8, 512, 64>();
  SquaredEuclideanBenchmark<16, 16, 512, 64>();
  SquaredEuclideanBenchmark<32, 1, 512, 64>();
  SquaredEuclideanBenchmark<32, 2, 512, 64>();
  SquaredEuclideanBenchmark<32, 4, 512, 64>();
  SquaredEuclideanBenchmark<32, 8, 512, 64>();
  SquaredEuclideanBenchmark<32, 16, 512, 64>();
  SquaredEuclideanBenchmark<32, 32, 512, 64>();
  SquaredEuclideanBenchmark<64, 1, 512, 64>();
  SquaredEuclideanBenchmark<64, 2, 512, 64>();
  SquaredEuclideanBenchmark<64, 4, 512, 64>();
  SquaredEuclideanBenchmark<64, 8, 512, 64>();
  SquaredEuclideanBenchmark<128, 1, 512, 64>();
  SquaredEuclideanBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_Euclidean_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 10000LLU;

  std::vector<float> data(dimension);
  std::vector<float> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    EuclideanDistanceMatrix<float, 1, 1>::Compute(&data[0], &query[0],
                                                  dimension, &result);
  }
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 10000LLU;

  std::vector<float> data(dimension);
  std::vector<float> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    SquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(&data[0], &query[0],
                                                         dimension, &result);
  }
}


================================================
FILE: tests/ailego/math/euclidean_distance_matrix_int4_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/matrix_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

TEST(DistanceMatrix, Euclidean_General) {
  std::mt19937 gen((std::random_device())());
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 1;

  std::vector<int8_t> vec1(dimension), query1(dimension);
  std::vector<uint8_t> vec2(dimension >> 1), query2(dimension >> 1);

  std::uniform_int_distribution<int> dist(-8, 7);

  for (size_t k = 0; k < 100; ++k) {
    for (size_t i = 0; i < dimension; i += 2) {
      vec1[i + 0] = (int8_t)dist(gen);
      vec1[i + 1] = (int8_t)dist(gen);
      vec2[i >> 1] =
          ((uint8_t)(vec1[i + 0]) << 4) | ((uint8_t)(vec1[i + 1]) & 0xf);
      EXPECT_EQ(vec1[i + 0] * vec1[i + 1], Int4MulTable[vec2[i >> 1]]);

      query1[i + 0] = (int8_t)dist(gen);
      query1[i + 1] = (int8_t)dist(gen);
      query2[i >> 1] =
          ((uint8_t)(query1[i + 0]) << 4) | ((uint8_t)(query1[i + 1]) & 0xf);
      EXPECT_EQ(query1[i + 0] * query1[i + 1], Int4MulTable[query2[i >> 1]]);
    }

    EXPECT_FLOAT_EQ(
        Distance::SquaredEuclidean(vec1.data(), query1.data(), dimension),
        Distance::SquaredEuclidean(vec2.data(), query2.data(), dimension));
    EXPECT_FLOAT_EQ(Distance::Euclidean(vec1.data(), query1.data(), dimension),
                    Distance::Euclidean(vec2.data(), query2.data(), dimension));
    EXPECT_FLOAT_EQ(std::sqrt(Distance::SquaredEuclidean(
                        vec1.data(), query1.data(), dimension)),
                    Distance::Euclidean(vec2.data(), query2.data(), dimension));
  }
}

template <size_t M, size_t N>
void TestEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;
  size_t matrix_size = batch_size * (dimension / 2);
  size_t query_matrix_size = query_size * (dimension / 2);

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }
  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
      matrix1.data(), dimension / 8, &matrix2[0]);
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);
    }
  }
  EuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;
  size_t matrix_size = batch_size * (dimension / 2);
  size_t query_matrix_size = query_size * (dimension / 2);

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }
  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
      matrix1.data(), dimension / 8, &matrix2[0]);
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);
    }
  }
  SquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, Euclidean_1x1) {
  TestEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, Euclidean_2x1) {
  TestEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, Euclidean_2x2) {
  TestEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, Euclidean_3x3) {
  TestEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, Euclidean_4x1) {
  TestEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, Euclidean_4x2) {
  TestEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, Euclidean_4x4) {
  TestEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, Euclidean_8x1) {
  TestEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, Euclidean_8x2) {
  TestEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, Euclidean_8x4) {
  TestEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, Euclidean_8x8) {
  TestEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, Euclidean_16x1) {
  TestEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, Euclidean_16x2) {
  TestEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, Euclidean_16x4) {
  TestEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, Euclidean_16x8) {
  TestEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, Euclidean_16x16) {
  TestEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, Euclidean_32x1) {
  TestEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, Euclidean_32x2) {
  TestEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, Euclidean_32x4) {
  TestEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, Euclidean_32x8) {
  TestEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, Euclidean_32x16) {
  TestEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, Euclidean_32x32) {
  TestEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, Euclidean_64x1) {
  TestEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, Euclidean_64x2) {
  TestEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, Euclidean_64x4) {
  TestEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, Euclidean_64x8) {
  TestEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, Euclidean_64x16) {
  TestEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, Euclidean_64x32) {
  TestEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, Euclidean_64x64) {
  TestEuclideanMatrix<64, 128>();
}

TEST(DistanceMatrix, Euclidean_128x1) {
  TestEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, Euclidean_128x2) {
  TestEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, Euclidean_128x4) {
  TestEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, Euclidean_128x8) {
  TestEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, Euclidean_128x16) {
  TestEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, Euclidean_128x32) {
  TestEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, Euclidean_128x64) {
  TestEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, Euclidean_128x128) {
  TestEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_1x1) {
  TestSquaredEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x1) {
  TestSquaredEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x2) {
  TestSquaredEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_3x3) {
  TestSquaredEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x1) {
  TestSquaredEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x2) {
  TestSquaredEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x4) {
  TestSquaredEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x1) {
  TestSquaredEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x2) {
  TestSquaredEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x4) {
  TestSquaredEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x8) {
  TestSquaredEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x1) {
  TestSquaredEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x2) {
  TestSquaredEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x4) {
  TestSquaredEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x8) {
  TestSquaredEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x16) {
  TestSquaredEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x1) {
  TestSquaredEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x2) {
  TestSquaredEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x4) {
  TestSquaredEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x8) {
  TestSquaredEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x16) {
  TestSquaredEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x32) {
  TestSquaredEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x1) {
  TestSquaredEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x2) {
  TestSquaredEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x4) {
  TestSquaredEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x8) {
  TestSquaredEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x16) {
  TestSquaredEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x32) {
  TestSquaredEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x64) {
  TestSquaredEuclideanMatrix<64, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x1) {
  TestSquaredEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x2) {
  TestSquaredEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x4) {
  TestSquaredEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x8) {
  TestSquaredEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x16) {
  TestSquaredEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x32) {
  TestSquaredEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x64) {
  TestSquaredEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x128) {
  TestSquaredEuclideanMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void EuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension / 2;
  const size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);
  }
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      EuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    EuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        EuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension / 2], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void SquaredEuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension / 2;
  const size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);
  }
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      SquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    SquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        SquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension / 2], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {
  EuclideanBenchmark<2, 1, 512, 128>();
  EuclideanBenchmark<2, 2, 512, 128>();
  EuclideanBenchmark<4, 1, 512, 128>();
  EuclideanBenchmark<4, 2, 512, 128>();
  EuclideanBenchmark<4, 4, 512, 128>();
  EuclideanBenchmark<8, 1, 512, 128>();
  EuclideanBenchmark<8, 2, 512, 128>();
  EuclideanBenchmark<8, 4, 512, 128>();
  EuclideanBenchmark<8, 8, 512, 128>();
  EuclideanBenchmark<16, 1, 512, 128>();
  EuclideanBenchmark<16, 2, 512, 128>();
  EuclideanBenchmark<16, 4, 512, 128>();
  EuclideanBenchmark<16, 8, 512, 128>();
  EuclideanBenchmark<16, 16, 512, 128>();
  EuclideanBenchmark<32, 1, 512, 128>();
  EuclideanBenchmark<32, 2, 512, 128>();
  EuclideanBenchmark<32, 4, 512, 128>();
  EuclideanBenchmark<32, 8, 512, 128>();
  EuclideanBenchmark<32, 16, 512, 128>();
  EuclideanBenchmark<32, 32, 512, 128>();
  EuclideanBenchmark<64, 1, 512, 128>();
  EuclideanBenchmark<64, 2, 512, 128>();
  EuclideanBenchmark<64, 4, 512, 128>();
  EuclideanBenchmark<64, 8, 512, 128>();
  EuclideanBenchmark<128, 1, 512, 128>();
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {
  SquaredEuclideanBenchmark<2, 1, 512, 128>();
  SquaredEuclideanBenchmark<2, 2, 512, 128>();
  SquaredEuclideanBenchmark<4, 1, 512, 128>();
  SquaredEuclideanBenchmark<4, 2, 512, 128>();
  SquaredEuclideanBenchmark<4, 4, 512, 128>();
  SquaredEuclideanBenchmark<8, 1, 512, 128>();
  SquaredEuclideanBenchmark<8, 2, 512, 128>();
  SquaredEuclideanBenchmark<8, 4, 512, 128>();
  SquaredEuclideanBenchmark<8, 8, 512, 128>();
  SquaredEuclideanBenchmark<16, 1, 512, 128>();
  SquaredEuclideanBenchmark<16, 2, 512, 128>();
  SquaredEuclideanBenchmark<16, 4, 512, 128>();
  SquaredEuclideanBenchmark<16, 8, 512, 128>();
  SquaredEuclideanBenchmark<16, 16, 512, 128>();
  SquaredEuclideanBenchmark<32, 1, 512, 128>();
  SquaredEuclideanBenchmark<32, 2, 512, 128>();
  SquaredEuclideanBenchmark<32, 4, 512, 128>();
  SquaredEuclideanBenchmark<32, 8, 512, 128>();
  SquaredEuclideanBenchmark<32, 16, 512, 128>();
  SquaredEuclideanBenchmark<32, 32, 512, 128>();
  SquaredEuclideanBenchmark<64, 1, 512, 128>();
  SquaredEuclideanBenchmark<64, 2, 512, 128>();
  SquaredEuclideanBenchmark<64, 4, 512, 128>();
  SquaredEuclideanBenchmark<64, 8, 512, 128>();
  SquaredEuclideanBenchmark<128, 1, 512, 128>();
}


================================================
FILE: tests/ailego/math/euclidean_distance_matrix_int8_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float EuclideanDistance(const FixedVector<int8_t, N> &lhs,
                               const FixedVector<int8_t, N> &rhs) {
  return Distance::Euclidean(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float SquaredEuclideanDistance(const FixedVector<int8_t, N> &lhs,
                                      const FixedVector<int8_t, N> &rhs) {
  return Distance::SquaredEuclidean(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, Euclidean_General) {
  FixedVector<int8_t, 1> a{(int8_t)0}, b{(int8_t)0};
  EXPECT_FLOAT_EQ(0.0f, EuclideanDistance(a, b));

  FixedVector<int8_t, 3> c{(int8_t)1, (int8_t)2, (int8_t)3},
      d{(int8_t)2, (int8_t)4, (int8_t)6};
  EXPECT_FLOAT_EQ(3.741657f, EuclideanDistance(c, d));

  FixedVector<int8_t, 4> e{(int8_t)0, (int8_t)0, (int8_t)127, (int8_t)127},
      f{(int8_t)127, (int8_t)127, (int8_t)0, (int8_t)0};
  EXPECT_FLOAT_EQ(254.0f, EuclideanDistance(e, f));

  FixedVector<int8_t, 5> g{(int8_t)0, (int8_t)0, (int8_t)127, (int8_t)127,
                           (int8_t)-127},
      h{(int8_t)127, (int8_t)127, (int8_t)0, (int8_t)0, (int8_t)127};
  EXPECT_FLOAT_EQ(359.21024f, EuclideanDistance(g, h));

  int8_t a2048[] = {
      59, 46, 36, 99, 49, 61, 45, 68, 79, 86, 2,  8,  73, 14, 45, 85, 5,  63,
      71, 45, 36, 54, 72, 18, 79, 15, 78, 29, 9,  12, 96, 27, 3,  45, 81, 37,
      51, 19, 43, 5,  55, 93, 85, 61, 86, 54, 2,  33, 39, 74, 95, 7,  98, 32,
      25, 30, 83, 45, 2,  7,  76, 95, 58, 52, 80, 85, 71, 56, 92, 41, 67, 98,
      32, 97, 3,  71, 59, 58, 87, 84, 44, 54, 81, 74, 0,  51, 7,  28, 81, 83,
      45, 88, 95, 87, 31, 65, 61, 84, 37, 13, 98, 59, 35, 41, 40, 12, 36, 87,
      4,  84, 15, 96, 97, 15, 19, 7,  67, 87, 13, 40, 56, 80, 86, 3,  85, 99,
      56, 94, 8,  63, 69, 24, 72, 44, 75, 58, 93, 19, 63, 81, 59, 90, 54, 99,
      2,  37, 20, 72, 57, 56, 25, 78, 27, 83, 77, 9,  66, 66, 62, 21, 81, 69,
      8,  13, 29, 95, 8,  75, 20, 48, 76, 53, 5,  97, 8,  26, 93, 76, 63, 48,
      26, 51, 69, 46, 6,  42, 76, 44, 84, 40, 85, 79, 44, 62, 78, 52, 18, 70,
      95, 9,  13, 71, 15, 2,  28, 98, 98, 44, 98, 64, 44, 9,  17, 71, 27, 73,
      24, 54, 24, 64, 68, 38, 90, 20, 89, 4,  79, 20, 56, 33, 92, 65, 64, 83,
      33, 92, 24, 4,  51, 16, 76, 14, 72, 36, 95, 22, 32, 27, 42, 58, 15, 87,
      23, 19, 76, 2,  35, 41, 1,  18, 77, 48, 51, 50, 14, 14, 22, 80, 23, 39,
      53, 69, 61, 63, 45, 91, 67, 75, 51, 9,  40, 42, 16, 3,  61, 18, 28, 58,
      28, 13, 79, 23, 43, 40, 99, 87, 63, 63, 72, 74, 74, 93, 10, 61, 86, 84,
      8,  63, 31, 98, 18, 79, 54, 25, 47, 61, 98, 15, 42, 53, 26, 40, 59, 77,
      80, 62, 73, 53, 22, 21, 6,  38, 31, 3,  80, 91, 53, 77, 36, 25, 28, 64,
      60, 31, 49, 91, 6,  50, 70, 94, 36, 66, 29, 32, 66, 64, 92, 78, 30, 87,
      29, 26, 16, 87, 29, 64, 13, 60, 1,  63, 2,  75, 31, 44, 8,  3,  65, 50,
      48, 26, 94, 44, 7,  45, 9,  94, 56, 57, 25, 95, 5,  8,  92, 71, 10, 83,
      62, 57, 74, 3,  95, 38, 90, 99, 53, 40, 37, 12, 88, 60, 78, 61, 54, 60,
      73, 73, 18, 48, 75, 42, 30, 19, 18, 96, 44, 62, 80, 46, 4,  63, 98, 12,
      44, 34, 83, 42, 36, 95, 84, 2,  48, 7,  68, 68, 47, 71, 74, 93, 78, 5,
      83, 99, 33, 17, 81, 49, 8,  75, 24, 67, 26, 44, 89, 38, 99, 22, 45, 52,
      89, 14, 94, 74, 35, 24, 46, 63, 6,  55, 23, 3,  91, 97, 6,  33, 53, 76,
      13, 68, 35, 9,  24, 43, 86, 5,  24, 4,  64, 86, 85, 12, 24, 60, 2,  9,
      23, 48, 12, 88, 71, 28, 1,  79, 31, 31, 82, 21, 49, 8,  31, 3,  99, 91,
      93, 29, 15, 67, 9,  50, 94, 46, 72, 77, 33, 30, 4,  16, 56, 86, 62, 76,
      78, 77, 77, 25, 30, 88, 29, 17, 38, 2,  80, 86, 73, 8,  8,  38, 3,  50,
      67, 49, 42, 75, 49, 96, 36, 77, 55, 27, 52, 77, 82, 92, 42, 52, 35, 55,
      60, 16, 14, 96, 90, 39, 49, 68, 47, 36, 53, 1,  78, 43, 93, 58, 66, 45,
      80, 56, 51, 23, 64, 49, 56, 28, 77, 99, 18, 21, 98, 46, 88, 97, 98, 21,
      58, 31, 33, 77, 61, 71, 37, 80, 80, 18, 2,  92, 70, 83, 3,  41, 93, 38,
      24, 92, 98, 69, 80, 84, 81, 74, 80, 89, 74, 90, 92, 81, 29, 54, 3,  49,
      52, 24, 78, 99, 33, 35, 54, 98, 36, 90, 66, 71, 67, 39, 79, 55, 68, 68,
      94, 58, 60, 74, 81, 26, 43, 50, 67, 21, 27, 41, 44, 85, 60, 39, 48, 6,
      49, 75, 17, 1,  94, 69, 98, 38, 77, 96, 61, 99, 52, 89, 81, 0,  73, 30,
      68, 90, 4,  77, 93, 16, 56, 92, 58, 49, 22, 49, 60, 4,  66, 33, 54, 49,
      4,  3,  75, 81, 36, 99, 7,  87, 60, 15, 90, 93, 33, 42, 21, 42, 1,  29,
      72, 20, 94, 3,  83, 56, 48, 94, 41, 78, 84, 98, 61, 70, 71, 10, 15, 75,
      86, 6,  57, 53, 7,  38, 94, 2,  94, 38, 79, 21, 53, 69, 89, 84, 53, 59,
      99, 69, 81, 84, 60, 27, 1,  10, 93, 0,  88, 4,  71, 44, 12, 35, 63, 60,
      65, 15, 5,  20, 60, 30, 86, 46, 50, 28, 67, 39, 1,  13, 26, 40, 57, 59,
      87, 46, 5,  51, 14, 62, 78, 92, 11, 42, 61, 59, 4,  70, 24, 54, 34, 54,
      30, 83, 58, 61, 23, 13, 17, 96, 32, 39, 85, 81, 70, 53, 53, 23, 96, 36,
      18, 84, 22, 96, 5,  90, 17, 62, 25, 91, 71, 70, 91, 88, 93, 86, 30, 90,
      25, 79, 86, 12, 28, 87, 46, 64, 82, 19, 82, 91, 79, 40, 83, 38, 83, 86,
      50, 84, 67, 0,  41, 80, 70, 36, 36, 41, 81, 68, 71, 40, 36, 10, 60, 7,
      87, 67, 41, 10, 26, 98, 45, 66, 92, 22, 63, 70, 91, 90, 34, 98, 31, 45,
      70, 34, 69, 55, 55, 32, 0,  81, 78, 7,  13, 50, 95, 69, 13, 76, 90, 85,
      28, 62, 22, 66, 44, 92, 95, 54, 2,  12, 43, 74, 51, 54, 97, 14, 34, 45,
      17, 13, 57, 29, 87, 49, 79, 20, 78, 92, 20, 82, 21, 71, 93, 51, 18, 58,
      12, 55, 70, 97, 60, 51, 94, 65, 64, 76, 27, 57, 76, 2,  32, 64, 9,  56,
      8,  37, 17, 53, 26, 45, 93, 61, 56, 9,  74, 32, 39, 82, 29, 1,  8,  95,
      2,  93, 93, 66, 56, 16, 60, 42, 28, 11, 47, 58, 98, 34, 93, 25, 49, 22,
      95, 6,  1,  78, 78, 11, 19, 13, 6,  80, 90, 20, 82, 26, 48, 51, 16, 84,
      51, 54, 94, 67, 59, 9,  29, 59, 53, 46, 13, 55, 92, 87, 48, 17, 45, 71,
      52, 86, 96, 4,  18, 32, 87, 40, 93, 98, 8,  85, 76, 88, 82, 57, 7,  61,
      5,  72, 99, 37, 45, 42, 15, 70, 8,  5,  41, 14, 28, 50, 20, 2,  77, 48,
      53, 16, 95, 78, 88, 78, 54, 19, 30, 80, 78, 97, 69, 23, 93, 48, 72, 92,
      88, 82, 17, 58, 98, 99, 70, 97, 52, 46, 66, 97, 95, 65, 38, 47, 1,  4,
      18, 31, 99, 16, 64, 84, 44, 40, 46, 2,  46, 32, 8,  47, 64, 28, 87, 70,
      80, 25, 85, 17, 43, 56, 97, 91, 20, 7,  70, 82, 32, 58, 46, 43, 25, 81,
      12, 97, 40, 73, 52, 27, 13, 30, 58, 1,  89, 68, 75, 17, 91, 22, 12, 48,
      41, 98, 81, 44, 60, 93, 54, 81, 3,  8,  43, 16, 11, 62, 33, 81, 1,  49,
      51, 67, 83, 83, 93, 7,  63, 71, 41, 39, 63, 52, 77, 77, 47, 20, 32, 26,
      20, 66, 64, 62, 94, 55, 37, 39, 28, 45, 67, 76, 6,  43, 10, 18, 55, 44,
      35, 41, 29, 33, 96, 90, 72, 70, 87, 75, 97, 43, 36, 14, 79, 8,  10, 83,
      33, 29, 83, 74, 72, 83, 96, 77, 72, 91, 41, 9,  85, 34, 7,  51, 13, 88,
      69, 47, 23, 22, 64, 2,  7,  38, 66, 58, 7,  8,  35, 92, 53, 65, 4,  94,
      79, 29, 88, 23, 81, 72, 55, 22, 44, 78, 75, 80, 74, 28, 54, 16, 8,  16,
      73, 92, 31, 17, 44, 6,  32, 80, 5,  61, 2,  58, 7,  80, 89, 51, 59, 63,
      65, 42, 93, 14, 44, 16, 36, 79, 41, 45, 33, 36, 13, 92, 85, 75, 7,  47,
      31, 62, 98, 66, 5,  20, 55, 26, 21, 93, 50, 62, 44, 3,  66, 43, 11, 15,
      35, 78, 73, 26, 55, 90, 90, 8,  40, 74, 17, 8,  61, 47, 76, 41, 43, 50,
      94, 62, 85, 44, 47, 91, 72, 86, 10, 86, 62, 18, 51, 23, 83, 0,  61, 41,
      99, 24, 15, 72, 42, 56, 19, 34, 54, 63, 5,  14, 3,  64, 26, 6,  1,  21,
      25, 64, 19, 84, 49, 55, 32, 85, 76, 62, 1,  52, 15, 86, 21, 49, 92, 22,
      79, 20, 90, 27, 32, 46, 76, 55, 23, 69, 56, 80, 35, 35, 30, 43, 70, 79,
      73, 12, 60, 20, 22, 80, 83, 72, 66, 56, 41, 68, 4,  8,  94, 97, 41, 76,
      96, 3,  53, 61, 15, 89, 65, 45, 65, 15, 6,  83, 82, 69, 76, 68, 95, 81,
      55, 55, 85, 26, 75, 34, 67, 75, 28, 95, 58, 11, 73, 96, 44, 70, 82, 89,
      72, 40, 17, 89, 51, 87, 69, 85, 45, 59, 2,  53, 82, 87, 24, 33, 41, 53,
      97, 35, 0,  54, 7,  94, 71, 42, 68, 88, 53, 15, 41, 79, 1,  24, 49, 54,
      26, 88, 23, 89, 14, 41, 52, 8,  12, 92, 98, 54, 56, 27, 17, 11, 89, 82,
      34, 81, 78, 15, 63, 18, 17, 18, 40, 85, 41, 57, 68, 21, 7,  34, 44, 97,
      20, 5,  67, 14, 32, 86, 8,  48, 8,  6,  28, 50, 74, 91, 82, 18, 26, 51,
      38, 21, 90, 54, 64, 91, 65, 32, 6,  67, 6,  97, 32, 70, 88, 39, 80, 39,
      86, 13, 72, 81, 6,  93, 10, 67, 41, 32, 32, 8,  60, 95, 94, 11, 63, 45,
      25, 25, 46, 28, 10, 91, 16, 82, 23, 88, 10, 21, 32, 31, 90, 26, 55, 59,
      74, 36, 49, 78, 86, 68, 6,  22, 25, 59, 51, 96, 77, 60, 20, 32, 36, 91,
      56, 52, 85, 42, 26, 30, 17, 31, 5,  18, 74, 42, 75, 45, 31, 40, 81, 65,
      20, 29, 94, 10, 71, 40, 69, 83, 83, 24, 76, 25, 73, 40, 47, 75, 44, 66,
      11, 52, 90, 6,  30, 85, 18, 56, 22, 18, 51, 54, 18, 18, 99, 80, 37, 89,
      83, 8,  83, 74, 18, 48, 39, 3,  45, 47, 70, 59, 14, 15, 94, 84, 39, 62,
      42, 79, 84, 88, 26, 52, 34, 48, 92, 28, 20, 59, 53, 81, 34, 5,  98, 36,
      18, 80, 36, 8,  83, 28, 98, 67, 92, 44, 9,  47, 65, 59, 11, 31, 33, 88,
      77, 2,  20, 22, 0,  24, 12, 45, 88, 11, 38, 75, 43, 99, 30, 71, 66, 47,
      67, 14, 22, 57, 40, 88, 48, 12, 89, 6,  93, 28, 96, 37, 99, 38, 75, 72,
      68, 42, 11, 76, 53, 4,  9,  38, 7,  77, 47, 46, 66, 73, 27, 93, 17, 87,
      9,  72, 77, 78, 1,  74, 97, 54, 87, 44, 43, 64, 70, 34, 62, 82, 74, 48,
      41, 54, 41, 78, 75, 4,  21, 30, 80, 41, 17, 13, 76, 87, 47, 68, 37, 17,
      42, 32, 23, 15, 70, 56, 40, 31, 33, 79, 77, 73, 21, 4,  54, 41, 25, 67,
      18, 6,  26, 42, 36, 44, 33, 87, 94, 22, 41, 79, 15, 16, 5,  84, 29, 30,
      25, 67, 3,  55, 96, 36, 36, 89, 2,  47, 92, 94, 23, 63, 54, 45, 14, 41,
      18, 48, 61, 91, 33, 99, 9,  52, 59, 71, 20, 62, 99, 94, 6,  79, 59, 99,
      94, 3,  9,  16, 53, 74, 55, 43, 44, 62, 89, 2,  17, 97, 47, 99, 87, 31,
      90, 82, 26, 33, 7,  92, 0,  98, 78, 94, 44, 89, 5,  97, 18, 43, 19, 6,
      74, 57, 33, 0,  14, 50, 43, 8,  19, 21, 96, 95, 28, 60, 11, 81, 65, 10,
      20, 51, 45, 45, 54, 16, 22, 26, 35, 30, 79, 51, 16, 91, 25, 40, 25, 75,
      85, 43, 72, 3,  23, 5,  59, 90, 12, 89, 81, 86, 28, 75, 5,  79, 45, 28,
      33, 65, 22, 15, 14, 76, 29, 85, 89, 37, 19, 84, 5,  51};
  int8_t b2048[] = {
      43, 84, 90, 44, 54, 43, 49, 42, 24, 10, 61, 8,  68, 2,  75, 9,  25, 25,
      80, 6,  9,  62, 33, 22, 84, 43, 20, 34, 33, 53, 47, 8,  16, 15, 4,  96,
      3,  73, 75, 61, 75, 68, 37, 6,  25, 48, 40, 0,  67, 89, 98, 92, 37, 72,
      44, 94, 88, 42, 97, 24, 11, 24, 39, 13, 34, 30, 58, 22, 29, 28, 22, 82,
      15, 16, 57, 99, 9,  7,  76, 57, 39, 31, 21, 7,  44, 73, 88, 8,  62, 47,
      45, 65, 11, 78, 82, 89, 72, 18, 9,  24, 59, 75, 17, 0,  70, 1,  62, 52,
      51, 67, 5,  99, 83, 80, 82, 16, 43, 43, 94, 8,  52, 58, 68, 60, 72, 26,
      57, 22, 72, 95, 70, 12, 51, 43, 28, 53, 72, 0,  12, 67, 96, 89, 34, 28,
      9,  96, 5,  82, 19, 52, 28, 8,  8,  45, 60, 34, 66, 60, 54, 41, 87, 13,
      15, 23, 96, 29, 70, 50, 72, 10, 87, 98, 81, 11, 43, 27, 96, 9,  17, 16,
      6,  14, 31, 12, 89, 55, 37, 91, 50, 74, 12, 63, 10, 77, 81, 5,  98, 96,
      22, 9,  3,  48, 96, 1,  36, 87, 54, 40, 91, 51, 35, 38, 56, 78, 84, 4,
      95, 2,  20, 18, 87, 60, 73, 28, 69, 55, 8,  12, 86, 2,  31, 55, 46, 57,
      77, 25, 54, 50, 58, 13, 93, 6,  79, 80, 83, 78, 27, 1,  14, 52, 70, 82,
      87, 81, 82, 63, 86, 24, 37, 12, 66, 22, 63, 93, 21, 11, 86, 92, 22, 47,
      33, 84, 28, 69, 69, 31, 39, 43, 2,  29, 14, 14, 62, 42, 75, 37, 36, 88,
      98, 53, 18, 81, 40, 3,  49, 85, 99, 65, 15, 21, 23, 88, 42, 80, 79, 94,
      46, 2,  46, 91, 80, 4,  13, 90, 3,  52, 23, 65, 30, 1,  37, 86, 71, 64,
      63, 56, 44, 10, 49, 6,  31, 10, 85, 75, 50, 27, 65, 58, 96, 0,  26, 0,
      69, 70, 3,  69, 91, 96, 59, 44, 29, 20, 22, 54, 16, 69, 0,  16, 3,  69,
      64, 68, 55, 9,  71, 62, 38, 84, 6,  27, 21, 50, 42, 1,  27, 14, 49, 16,
      74, 10, 45, 31, 37, 61, 72, 8,  94, 93, 25, 81, 62, 9,  35, 15, 21, 48,
      64, 62, 18, 72, 38, 85, 55, 27, 20, 86, 56, 84, 72, 12, 59, 54, 94, 83,
      21, 25, 34, 11, 82, 32, 59, 90, 97, 81, 29, 18, 38, 16, 5,  53, 96, 85,
      19, 88, 37, 72, 32, 38, 41, 74, 70, 12, 60, 3,  67, 29, 2,  60, 38, 6,
      82, 34, 53, 24, 31, 18, 14, 40, 39, 61, 10, 6,  69, 40, 76, 32, 9,  4,
      47, 65, 13, 45, 60, 35, 59, 53, 67, 88, 74, 71, 3,  32, 97, 4,  77, 55,
      25, 27, 38, 18, 91, 48, 86, 18, 30, 66, 22, 3,  24, 8,  43, 72, 75, 22,
      7,  46, 5,  58, 67, 10, 95, 55, 99, 12, 59, 40, 57, 89, 50, 80, 41, 41,
      36, 28, 35, 87, 66, 94, 9,  11, 24, 19, 94, 51, 3,  34, 21, 44, 33, 71,
      12, 1,  58, 84, 78, 85, 55, 41, 63, 25, 13, 15, 69, 7,  43, 55, 52, 15,
      16, 19, 85, 63, 71, 66, 29, 55, 64, 27, 79, 74, 15, 62, 54, 83, 50, 38,
      54, 2,  40, 29, 94, 65, 32, 50, 41, 72, 5,  68, 15, 8,  4,  50, 74, 37,
      76, 61, 53, 71, 9,  70, 1,  1,  44, 38, 7,  6,  49, 53, 44, 57, 80, 45,
      79, 97, 85, 2,  81, 3,  67, 72, 31, 52, 41, 42, 83, 97, 30, 32, 39, 38,
      71, 32, 17, 96, 12, 34, 52, 64, 25, 20, 60, 2,  53, 66, 1,  38, 10, 75,
      98, 44, 11, 16, 15, 53, 12, 29, 18, 46, 91, 13, 26, 36, 74, 32, 3,  97,
      76, 97, 80, 11, 27, 54, 57, 9,  0,  10, 28, 8,  55, 83, 56, 57, 82, 2,
      70, 42, 2,  64, 84, 97, 1,  34, 2,  7,  42, 54, 20, 55, 39, 77, 79, 58,
      59, 16, 98, 95, 31, 22, 80, 77, 15, 12, 39, 29, 86, 8,  4,  13, 72, 95,
      67, 45, 2,  53, 61, 3,  87, 94, 33, 60, 63, 33, 42, 33, 44, 35, 69, 22,
      96, 69, 73, 33, 28, 0,  79, 23, 54, 23, 80, 87, 99, 32, 56, 0,  51, 40,
      12, 28, 68, 74, 6,  71, 68, 18, 72, 99, 58, 48, 44, 12, 55, 98, 46, 19,
      93, 62, 65, 36, 43, 38, 10, 23, 3,  48, 27, 51, 5,  48, 97, 28, 73, 64,
      43, 77, 10, 52, 36, 5,  1,  44, 18, 20, 58, 21, 30, 14, 12, 35, 66, 90,
      31, 69, 93, 30, 51, 17, 43, 10, 53, 83, 91, 65, 44, 72, 32, 41, 41, 3,
      48, 67, 98, 86, 65, 67, 82, 25, 73, 53, 23, 99, 86, 95, 43, 52, 53, 82,
      65, 79, 59, 64, 69, 89, 71, 13, 60, 28, 61, 97, 88, 39, 31, 65, 90, 40,
      20, 51, 2,  6,  74, 2,  62, 97, 21, 6,  25, 23, 42, 72, 24, 96, 72, 84,
      55, 29, 32, 55, 98, 79, 16, 52, 69, 85, 74, 19, 26, 25, 6,  47, 88, 90,
      40, 63, 58, 45, 64, 59, 65, 83, 27, 62, 15, 65, 23, 68, 23, 95, 13, 35,
      6,  93, 97, 91, 37, 37, 7,  86, 98, 81, 34, 61, 44, 4,  85, 87, 74, 54,
      80, 45, 68, 19, 48, 27, 73, 78, 76, 90, 75, 93, 4,  32, 36, 87, 19, 71,
      47, 37, 83, 83, 99, 58, 83, 2,  34, 25, 18, 25, 74, 8,  12, 96, 83, 93,
      36, 96, 4,  82, 9,  57, 70, 36, 96, 73, 88, 72, 69, 80, 10, 12, 20, 11,
      33, 97, 79, 52, 83, 56, 71, 59, 20, 70, 50, 63, 79, 60, 15, 97, 72, 47,
      53, 60, 89, 53, 98, 24, 86, 40, 74, 9,  39, 27, 15, 59, 11, 84, 41, 68,
      91, 13, 27, 40, 52, 89, 29, 52, 32, 37, 33, 48, 44, 10, 62, 18, 87, 53,
      56, 84, 95, 57, 38, 73, 75, 58, 66, 93, 65, 81, 45, 66, 54, 73, 27, 72,
      46, 46, 19, 46, 53, 53, 5,  77, 88, 3,  19, 99, 67, 16, 89, 93, 68, 37,
      29, 94, 69, 3,  29, 8,  76, 25, 0,  28, 24, 71, 25, 90, 87, 97, 32, 80,
      23, 90, 86, 30, 80, 40, 80, 46, 17, 66, 97, 4,  36, 2,  31, 14, 75, 15,
      34, 84, 56, 76, 61, 15, 93, 87, 52, 69, 26, 2,  18, 39, 60, 37, 31, 79,
      27, 84, 36, 53, 76, 62, 71, 62, 74, 51, 59, 26, 70, 94, 56, 89, 72, 3,
      26, 27, 66, 49, 16, 13, 81, 44, 85, 7,  54, 6,  14, 35, 60, 84, 48, 24,
      11, 29, 57, 15, 0,  76, 23, 72, 11, 50, 69, 90, 20, 5,  32, 64, 4,  23,
      82, 33, 94, 69, 28, 99, 80, 85, 27, 89, 8,  45, 37, 34, 57, 87, 37, 57,
      73, 17, 56, 45, 25, 1,  67, 67, 67, 56, 81, 20, 23, 25, 37, 93, 93, 13,
      5,  58, 62, 93, 16, 61, 69, 43, 52, 66, 59, 20, 65, 89, 84, 67, 98, 98,
      10, 21, 10, 27, 83, 39, 69, 6,  49, 88, 95, 83, 61, 87, 78, 38, 67, 43,
      45, 61, 69, 71, 4,  45, 49, 78, 51, 30, 84, 4,  47, 18, 71, 73, 32, 73,
      24, 56, 76, 82, 99, 40, 39, 42, 71, 24, 57, 83, 31, 68, 6,  38, 38, 2,
      46, 46, 90, 61, 89, 30, 15, 5,  76, 24, 70, 35, 90, 45, 45, 91, 47, 73,
      34, 30, 53, 64, 61, 94, 96, 58, 84, 37, 32, 19, 34, 12, 96, 75, 28, 86,
      66, 91, 55, 93, 93, 6,  69, 51, 44, 92, 40, 85, 22, 1,  42, 10, 38, 86,
      52, 28, 19, 7,  75, 5,  47, 28, 52, 76, 50, 27, 56, 59, 95, 85, 89, 63,
      62, 73, 56, 52, 89, 5,  8,  70, 28, 62, 36, 21, 15, 6,  19, 10, 19, 38,
      4,  61, 27, 87, 71, 54, 34, 5,  27, 48, 8,  26, 48, 29, 4,  76, 52, 29,
      21, 36, 34, 87, 11, 97, 78, 0,  34, 46, 93, 51, 77, 14, 47, 86, 2,  92,
      84, 92, 15, 57, 67, 12, 37, 5,  74, 49, 59, 13, 88, 0,  59, 29, 86, 91,
      19, 20, 2,  19, 4,  51, 68, 5,  77, 26, 36, 88, 73, 13, 68, 5,  77, 18,
      25, 13, 25, 47, 66, 69, 75, 45, 51, 35, 2,  61, 95, 60, 86, 97, 17, 74,
      19, 52, 43, 76, 26, 51, 38, 27, 13, 81, 53, 3,  87, 2,  99, 36, 7,  72,
      44, 42, 10, 8,  78, 87, 20, 75, 9,  36, 25, 0,  56, 37, 20, 13, 41, 80,
      69, 76, 39, 47, 61, 28, 87, 81, 30, 11, 4,  62, 66, 3,  77, 7,  0,  95,
      52, 81, 42, 84, 47, 78, 55, 25, 55, 13, 63, 32, 16, 68, 8,  35, 1,  30,
      66, 75, 79, 63, 71, 63, 65, 70, 92, 74, 68, 92, 61, 97, 36, 86, 61, 3,
      85, 13, 97, 69, 56, 58, 22, 71, 70, 86, 61, 33, 79, 91, 21, 72, 80, 65,
      27, 14, 82, 82, 20, 87, 47, 4,  38, 49, 89, 63, 10, 45, 48, 96, 8,  78,
      95, 67, 3,  6,  2,  64, 44, 89, 13, 31, 18, 83, 95, 92, 11, 80, 35, 87,
      14, 14, 58, 22, 86, 16, 98, 7,  26, 67, 27, 91, 96, 56, 28, 19, 17, 81,
      4,  56, 23, 19, 17, 77, 54, 93, 64, 27, 21, 40, 31, 24, 24, 55, 28, 73,
      13, 33, 76, 47, 38, 48, 66, 95, 72, 84, 23, 77, 65, 5,  28, 55, 32, 0,
      14, 47, 57, 33, 36, 26, 59, 98, 85, 2,  49, 29, 40, 44, 84, 24, 23, 88,
      66, 91, 4,  0,  4,  99, 40, 94, 55, 19, 13, 22, 96, 37, 89, 94, 78, 50,
      0,  37, 48, 79, 69, 16, 15, 57, 91, 52, 85, 92, 18, 38, 56, 55, 11, 10,
      27, 48, 98, 53, 83, 27, 14, 25, 53, 64, 71, 67, 26, 47, 53, 30, 76, 76,
      67, 83, 9,  20, 4,  61, 69, 10, 93, 63, 37, 22, 26, 64, 10, 75, 39, 86,
      34, 44, 42, 44, 4,  42, 37, 85, 3,  95, 49, 43, 84, 44, 73, 7,  59, 33,
      21, 46, 86, 88, 17, 88, 83, 32, 53, 6,  83, 85, 54, 32, 92, 45, 13, 20,
      49, 42, 7,  54, 76, 62, 58, 13, 99, 43, 94, 60, 43, 94, 58, 35, 69, 84,
      23, 57, 22, 81, 97, 97, 49, 91, 76, 65, 71, 82, 72, 39, 53, 92, 58, 77,
      20, 39, 20, 48, 46, 52, 20, 9,  85, 9,  48, 89, 24, 65, 73, 81, 73, 10,
      1,  25, 89, 83, 48, 38, 56, 82, 68, 27, 35, 87, 68, 32, 89, 23, 90, 5,
      99, 19, 55, 97, 83, 41, 34, 29, 69, 58, 8,  2,  90, 54, 66, 66, 37, 27,
      86, 46, 48, 50, 63, 76, 96, 41, 36, 9,  38, 31, 46, 58, 17, 53, 53, 81,
      79, 94, 95, 98, 96, 40, 43, 63, 2,  5,  26, 22, 10, 21, 43, 30, 30, 29,
      80, 49, 51, 74, 41, 64, 86, 50, 23, 81, 48, 41, 48, 98, 55, 38, 61, 40,
      52, 79, 99, 17, 71, 78, 62, 40, 5,  15, 26, 47, 75, 67, 17, 46, 93, 90,
      2,  81, 78, 22, 12, 74, 7,  7,  36, 48, 13, 41, 30, 68, 86, 50, 28, 72,
      40, 45, 82, 92, 38, 95, 68, 48, 42, 23, 4,  40, 82, 9,  59, 81, 58, 33,
      68, 12, 60, 71, 91, 47, 49, 21, 55, 1,  77, 57, 53, 4,  67, 4,  13, 29,
      76, 28, 70, 29, 20, 25, 81, 1,  57, 26, 74, 79, 95, 63, 83, 3,  28, 31,
      49, 30, 87, 84, 29, 60, 47, 49, 45, 16, 37, 68, 13, 19};

  EXPECT_FLOAT_EQ(1844.638672f,
                  EuclideanDistance(*FixedVector<int8_t, 2048>::Cast(a2048),
                                    *FixedVector<int8_t, 2048>::Cast(b2048)));
}

TEST(DistanceMatrix, SquaredEuclidean_General) {
  int8_t a8[] = {127, 0, 1, 2, -127, -127, -127, -127};
  int8_t b8[] = {-127, -127, -127, -127, 1, 2, 1, 127};
  int8_t a16[] = {127, 127, 16,   3,   100,  -127, 1,    2,
                  3,   4,   -127, 100, -127, -127, -127, -127};
  int8_t b16[] = {-127, 123, -127, -127, -127, -127, 127, 127,
                  1,    2,   3,    4,    127,  127,  121, 16};
  int8_t a32[] = {127, 127,  0,    0,   -127, -127, 0,    0,    0,    0, 0,
                  0,   -127, -127, 127, 127,  0,    0,    -127, -127, 0, 0,
                  127, 127,  127,  127, 0,    0,    -127, -127, 0,    0};
  int8_t b32[] = {-127, -127, 0,    0,    127,  127, 0,   0,   0,   0, 0,
                  0,    127,  127,  -127, -127, 0,   0,   127, 127, 0, 0,
                  -127, -127, -127, -127, 0,    0,   127, 127, 0,   0};

  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,
                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,
                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,
                  0,   0, -127, 112,  -127, -127, -127, -127, 127,  127,
                  1,   2, 3,    4,    127,  127,  120};
  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,
                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,
                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,
                  0,    0, 127,  127, 80,   111,  122, -127, 1,   2,
                  3,    4, -127, 112, -127, -127, -127};

  EXPECT_FLOAT_EQ(227595.0f,
                  SquaredEuclideanDistance(*FixedVector<int8_t, 8>::Cast(a8),
                                           *FixedVector<int8_t, 8>::Cast(b8)));
  EXPECT_FLOAT_EQ(
      422020.0f, SquaredEuclideanDistance(*FixedVector<int8_t, 16>::Cast(a16),
                                          *FixedVector<int8_t, 16>::Cast(b16)));
  EXPECT_FLOAT_EQ(1032256.0f, SquaredEuclideanDistance(
                                  *FixedVector<int8_t, 32>::Cast(a32),
                                  *FixedVector<int8_t, 32>::Cast(b32)));
  EXPECT_FLOAT_EQ(1379578.0f, SquaredEuclideanDistance(
                                  *FixedVector<int8_t, 47>::Cast(a47),
                                  *FixedVector<int8_t, 47>::Cast(b47)));
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  SquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestEuclideanMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  EuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, SquaredEuclidean_1x1) {
  TestSquaredEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x1) {
  TestSquaredEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_2x2) {
  TestSquaredEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_3x3) {
  TestSquaredEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x1) {
  TestSquaredEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x2) {
  TestSquaredEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_4x4) {
  TestSquaredEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x1) {
  TestSquaredEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x2) {
  TestSquaredEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x4) {
  TestSquaredEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_8x8) {
  TestSquaredEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x1) {
  TestSquaredEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x2) {
  TestSquaredEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x4) {
  TestSquaredEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x8) {
  TestSquaredEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_16x16) {
  TestSquaredEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x1) {
  TestSquaredEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x2) {
  TestSquaredEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x4) {
  TestSquaredEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x8) {
  TestSquaredEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x16) {
  TestSquaredEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_32x32) {
  TestSquaredEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x1) {
  TestSquaredEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x2) {
  TestSquaredEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x4) {
  TestSquaredEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x8) {
  TestSquaredEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x16) {
  TestSquaredEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x32) {
  TestSquaredEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_64x64) {
  TestSquaredEuclideanMatrix<64, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x1) {
  TestSquaredEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x2) {
  TestSquaredEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x4) {
  TestSquaredEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x8) {
  TestSquaredEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x16) {
  TestSquaredEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x32) {
  TestSquaredEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x64) {
  TestSquaredEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, SquaredEuclidean_128x128) {
  TestSquaredEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, Euclidean_1x1) {
  TestEuclideanMatrix<1, 1>();
}

TEST(DistanceMatrix, Euclidean_2x1) {
  TestEuclideanMatrix<2, 1>();
}

TEST(DistanceMatrix, Euclidean_2x2) {
  TestEuclideanMatrix<2, 2>();
}

TEST(DistanceMatrix, Euclidean_3x3) {
  TestEuclideanMatrix<3, 3>();
}

TEST(DistanceMatrix, Euclidean_4x1) {
  TestEuclideanMatrix<4, 1>();
}

TEST(DistanceMatrix, Euclidean_4x2) {
  TestEuclideanMatrix<4, 2>();
}

TEST(DistanceMatrix, Euclidean_4x4) {
  TestEuclideanMatrix<4, 4>();
}

TEST(DistanceMatrix, Euclidean_8x1) {
  TestEuclideanMatrix<8, 1>();
}

TEST(DistanceMatrix, Euclidean_8x2) {
  TestEuclideanMatrix<8, 2>();
}

TEST(DistanceMatrix, Euclidean_8x4) {
  TestEuclideanMatrix<8, 4>();
}

TEST(DistanceMatrix, Euclidean_8x8) {
  TestEuclideanMatrix<8, 8>();
}

TEST(DistanceMatrix, Euclidean_16x1) {
  TestEuclideanMatrix<16, 1>();
}

TEST(DistanceMatrix, Euclidean_16x2) {
  TestEuclideanMatrix<16, 2>();
}

TEST(DistanceMatrix, Euclidean_16x4) {
  TestEuclideanMatrix<16, 4>();
}

TEST(DistanceMatrix, Euclidean_16x8) {
  TestEuclideanMatrix<16, 8>();
}

TEST(DistanceMatrix, Euclidean_16x16) {
  TestEuclideanMatrix<16, 16>();
}

TEST(DistanceMatrix, Euclidean_32x1) {
  TestEuclideanMatrix<32, 1>();
}

TEST(DistanceMatrix, Euclidean_32x2) {
  TestEuclideanMatrix<32, 2>();
}

TEST(DistanceMatrix, Euclidean_32x4) {
  TestEuclideanMatrix<32, 4>();
}

TEST(DistanceMatrix, Euclidean_32x8) {
  TestEuclideanMatrix<32, 8>();
}

TEST(DistanceMatrix, Euclidean_32x16) {
  TestEuclideanMatrix<32, 16>();
}

TEST(DistanceMatrix, Euclidean_32x32) {
  TestEuclideanMatrix<32, 32>();
}

TEST(DistanceMatrix, Euclidean_64x1) {
  TestEuclideanMatrix<64, 1>();
}

TEST(DistanceMatrix, Euclidean_64x2) {
  TestEuclideanMatrix<64, 2>();
}

TEST(DistanceMatrix, Euclidean_64x4) {
  TestEuclideanMatrix<64, 4>();
}

TEST(DistanceMatrix, Euclidean_64x8) {
  TestEuclideanMatrix<64, 8>();
}

TEST(DistanceMatrix, Euclidean_64x16) {
  TestEuclideanMatrix<64, 16>();
}

TEST(DistanceMatrix, Euclidean_64x32) {
  TestEuclideanMatrix<64, 32>();
}

TEST(DistanceMatrix, Euclidean_64x64) {
  TestEuclideanMatrix<64, 128>();
}

TEST(DistanceMatrix, Euclidean_128x1) {
  TestEuclideanMatrix<128, 1>();
}

TEST(DistanceMatrix, Euclidean_128x2) {
  TestEuclideanMatrix<128, 2>();
}

TEST(DistanceMatrix, Euclidean_128x4) {
  TestEuclideanMatrix<128, 4>();
}

TEST(DistanceMatrix, Euclidean_128x8) {
  TestEuclideanMatrix<128, 8>();
}

TEST(DistanceMatrix, Euclidean_128x16) {
  TestEuclideanMatrix<128, 16>();
}

TEST(DistanceMatrix, Euclidean_128x32) {
  TestEuclideanMatrix<128, 32>();
}

TEST(DistanceMatrix, Euclidean_128x64) {
  TestEuclideanMatrix<128, 128>();
}

TEST(DistanceMatrix, Euclidean_128x128) {
  TestEuclideanMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void EuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      EuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    EuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        EuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Euclidean (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void SquaredEuclideanBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      SquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    SquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched SquaredEuclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        SquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched SquaredEuclidean (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_Euclidean_Benchmark) {
  EuclideanBenchmark<2, 1, 512, 128>();
  EuclideanBenchmark<2, 2, 512, 128>();
  EuclideanBenchmark<4, 1, 512, 128>();
  EuclideanBenchmark<4, 2, 512, 128>();
  EuclideanBenchmark<4, 4, 512, 128>();
  EuclideanBenchmark<8, 1, 512, 128>();
  EuclideanBenchmark<8, 2, 512, 128>();
  EuclideanBenchmark<8, 4, 512, 128>();
  EuclideanBenchmark<8, 8, 512, 128>();
  EuclideanBenchmark<16, 1, 512, 128>();
  EuclideanBenchmark<16, 2, 512, 128>();
  EuclideanBenchmark<16, 4, 512, 128>();
  EuclideanBenchmark<16, 8, 512, 128>();
  EuclideanBenchmark<16, 16, 512, 128>();
  EuclideanBenchmark<32, 1, 512, 128>();
  EuclideanBenchmark<32, 2, 512, 128>();
  EuclideanBenchmark<32, 4, 512, 128>();
  EuclideanBenchmark<32, 8, 512, 128>();
  EuclideanBenchmark<32, 16, 512, 128>();
  EuclideanBenchmark<32, 32, 512, 128>();
  EuclideanBenchmark<64, 1, 512, 128>();
  EuclideanBenchmark<64, 2, 512, 128>();
  EuclideanBenchmark<64, 4, 512, 128>();
  EuclideanBenchmark<64, 8, 512, 128>();
  EuclideanBenchmark<128, 1, 512, 128>();
}

TEST(DistanceMatrix, DISABLED_SquaredEuclidean_Benchmark) {
  SquaredEuclideanBenchmark<2, 1, 512, 128>();
  SquaredEuclideanBenchmark<2, 2, 512, 128>();
  SquaredEuclideanBenchmark<4, 1, 512, 128>();
  SquaredEuclideanBenchmark<4, 2, 512, 128>();
  SquaredEuclideanBenchmark<4, 4, 512, 128>();
  SquaredEuclideanBenchmark<8, 1, 512, 128>();
  SquaredEuclideanBenchmark<8, 2, 512, 128>();
  SquaredEuclideanBenchmark<8, 4, 512, 128>();
  SquaredEuclideanBenchmark<8, 8, 512, 128>();
  SquaredEuclideanBenchmark<16, 1, 512, 128>();
  SquaredEuclideanBenchmark<16, 2, 512, 128>();
  SquaredEuclideanBenchmark<16, 4, 512, 128>();
  SquaredEuclideanBenchmark<16, 8, 512, 128>();
  SquaredEuclideanBenchmark<16, 16, 512, 128>();
  SquaredEuclideanBenchmark<32, 1, 512, 128>();
  SquaredEuclideanBenchmark<32, 2, 512, 128>();
  SquaredEuclideanBenchmark<32, 4, 512, 128>();
  SquaredEuclideanBenchmark<32, 8, 512, 128>();
  SquaredEuclideanBenchmark<32, 16, 512, 128>();
  SquaredEuclideanBenchmark<32, 32, 512, 128>();
  SquaredEuclideanBenchmark<64, 1, 512, 128>();
  SquaredEuclideanBenchmark<64, 2, 512, 128>();
  SquaredEuclideanBenchmark<64, 4, 512, 128>();
  SquaredEuclideanBenchmark<64, 8, 512, 128>();
  SquaredEuclideanBenchmark<128, 1, 512, 128>();
}


================================================
FILE: tests/ailego/math/hamming_distance_matrix_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <bitset>
#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

static inline void MatrixTranspose(uint64_t *dst, const uint64_t *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

TEST(DistanceMatrix, Hamming_General) {
  srand((uint32_t)time(NULL));
  srand((uint32_t)rand());

  FixedBitset<63936> bitset1;
  FixedBitset<63936> bitset2;
  std::bitset<63936> stl_bitset1;
  std::bitset<63936> stl_bitset2;

  for (uint32_t i = 0; i < 1333; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.set(val1);
    stl_bitset1.set(val1);

    bitset2.set(val2);
    stl_bitset2.set(val2);
  }
  for (uint32_t i = 0; i < 1666; ++i) {
    uint32_t val1 = (uint32_t)(rand() % bitset1.size());
    uint32_t val2 = (uint32_t)(rand() % bitset2.size());

    bitset1.flip(val1);
    stl_bitset1.flip(val1);

    bitset2.flip(val2);
    stl_bitset2.flip(val2);
  }

  float result0 = (float)(stl_bitset1 ^ stl_bitset2).count();
  float result1 = Distance::Hamming(bitset1.data(), bitset2.data(),
                                    bitset1.size() / 32 * 32);
  float result2 = Distance::Hamming((const uint64_t *)bitset1.data(),
                                    (const uint64_t *)bitset2.data(),
                                    bitset1.size() / 64 * 64);
  EXPECT_FLOAT_EQ(result0, result1);
  EXPECT_FLOAT_EQ(result0, result2);
}

template <size_t M, size_t N>
void TestHamming32Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t count = (std::uniform_int_distribution<size_t>(1, 8192))(gen);
  size_t matrix_size = batch_size * count;
  size_t query_matrix_size = query_size * count;

  std::vector<uint32_t> matrix1(matrix_size);
  std::vector<uint32_t> matrix2(matrix_size);
  std::vector<uint32_t> query1(query_matrix_size);
  std::vector<uint32_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint32_t *cur_query = &query1[i * count];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      HammingDistanceMatrix<uint32_t, 1, 1>::Compute(
          &matrix1[j * count], cur_query, count * 32, &query_result[j]);
    }
  }
  HammingDistanceMatrix<uint32_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], count * 32, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestHammingSquareRoot32Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t count = (std::uniform_int_distribution<size_t>(1, 8192))(gen);
  size_t matrix_size = batch_size * count;
  size_t query_matrix_size = query_size * count;

  std::vector<uint32_t> matrix1(matrix_size);
  std::vector<uint32_t> matrix2(matrix_size);
  std::vector<uint32_t> query1(query_matrix_size);
  std::vector<uint32_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint32_t *cur_query = &query1[i * count];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      HammingSquareRootDistanceMatrix<uint32_t, 1, 1>::Compute(
          &matrix1[j * count], cur_query, count * 32, &query_result[j]);
    }
  }
  HammingSquareRootDistanceMatrix<uint32_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], count * 32, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, Hamming32_1x1) {
  TestHamming32Matrix<1, 1>();
}

TEST(DistanceMatrix, Hamming32_2x1) {
  TestHamming32Matrix<2, 1>();
}

TEST(DistanceMatrix, Hamming32_2x2) {
  TestHamming32Matrix<2, 2>();
}

TEST(DistanceMatrix, Hamming32_3x3) {
  TestHamming32Matrix<3, 3>();
}

TEST(DistanceMatrix, Hamming32_4x1) {
  TestHamming32Matrix<4, 1>();
}

TEST(DistanceMatrix, Hamming32_4x2) {
  TestHamming32Matrix<4, 2>();
}

TEST(DistanceMatrix, Hamming32_4x4) {
  TestHamming32Matrix<4, 4>();
}

TEST(DistanceMatrix, Hamming32_8x1) {
  TestHamming32Matrix<8, 1>();
}

TEST(DistanceMatrix, Hamming32_8x2) {
  TestHamming32Matrix<8, 2>();
}

TEST(DistanceMatrix, Hamming32_8x4) {
  TestHamming32Matrix<8, 4>();
}

TEST(DistanceMatrix, Hamming32_8x8) {
  TestHamming32Matrix<8, 8>();
}

TEST(DistanceMatrix, Hamming32_16x1) {
  TestHamming32Matrix<16, 1>();
}

TEST(DistanceMatrix, Hamming32_16x2) {
  TestHamming32Matrix<16, 2>();
}

TEST(DistanceMatrix, Hamming32_16x4) {
  TestHamming32Matrix<16, 4>();
}

TEST(DistanceMatrix, Hamming32_16x8) {
  TestHamming32Matrix<16, 8>();
}

TEST(DistanceMatrix, Hamming32_16x16) {
  TestHamming32Matrix<16, 16>();
}

TEST(DistanceMatrix, Hamming32_32x1) {
  TestHamming32Matrix<32, 1>();
}

TEST(DistanceMatrix, Hamming32_32x2) {
  TestHamming32Matrix<32, 2>();
}

TEST(DistanceMatrix, Hamming32_32x4) {
  TestHamming32Matrix<32, 4>();
}

TEST(DistanceMatrix, Hamming32_32x8) {
  TestHamming32Matrix<32, 8>();
}

TEST(DistanceMatrix, Hamming32_32x16) {
  TestHamming32Matrix<32, 16>();
}

TEST(DistanceMatrix, Hamming32_32x32) {
  TestHamming32Matrix<32, 32>();
}

TEST(DistanceMatrix, Hamming32_64x1) {
  TestHamming32Matrix<64, 1>();
}

TEST(DistanceMatrix, Hamming32_64x2) {
  TestHamming32Matrix<64, 2>();
}

TEST(DistanceMatrix, Hamming32_64x4) {
  TestHamming32Matrix<64, 4>();
}

TEST(DistanceMatrix, Hamming32_64x8) {
  TestHamming32Matrix<64, 8>();
}

TEST(DistanceMatrix, Hamming32_64x16) {
  TestHamming32Matrix<64, 16>();
}

TEST(DistanceMatrix, Hamming32_64x32) {
  TestHamming32Matrix<64, 32>();
}

TEST(DistanceMatrix, Hamming32_64x64) {
  TestHamming32Matrix<64, 64>();
}

TEST(DistanceMatrix, Hamming32_128x1) {
  TestHamming32Matrix<128, 1>();
}

TEST(DistanceMatrix, Hamming32_128x2) {
  TestHamming32Matrix<128, 2>();
}

TEST(DistanceMatrix, Hamming32_128x4) {
  TestHamming32Matrix<128, 4>();
}

TEST(DistanceMatrix, Hamming32_128x8) {
  TestHamming32Matrix<128, 8>();
}

TEST(DistanceMatrix, Hamming32_128x16) {
  TestHamming32Matrix<128, 16>();
}

TEST(DistanceMatrix, Hamming32_128x32) {
  TestHamming32Matrix<128, 32>();
}

TEST(DistanceMatrix, Hamming32_128x64) {
  TestHamming32Matrix<128, 64>();
}

TEST(DistanceMatrix, Hamming32_128x128) {
  TestHamming32Matrix<128, 128>();
}

TEST(DistanceMatrix, HammingSquareRoot32_1x1) {
  TestHammingSquareRoot32Matrix<1, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_2x1) {
  TestHammingSquareRoot32Matrix<2, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_2x2) {
  TestHammingSquareRoot32Matrix<2, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_3x3) {
  TestHammingSquareRoot32Matrix<3, 3>();
}

TEST(DistanceMatrix, HammingSquareRoot32_4x1) {
  TestHammingSquareRoot32Matrix<4, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_4x2) {
  TestHammingSquareRoot32Matrix<4, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_4x4) {
  TestHammingSquareRoot32Matrix<4, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_8x1) {
  TestHammingSquareRoot32Matrix<8, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_8x2) {
  TestHammingSquareRoot32Matrix<8, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_8x4) {
  TestHammingSquareRoot32Matrix<8, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_8x8) {
  TestHammingSquareRoot32Matrix<8, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot32_16x1) {
  TestHammingSquareRoot32Matrix<16, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_16x2) {
  TestHammingSquareRoot32Matrix<16, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_16x4) {
  TestHammingSquareRoot32Matrix<16, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_16x8) {
  TestHammingSquareRoot32Matrix<16, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot32_16x16) {
  TestHammingSquareRoot32Matrix<16, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x1) {
  TestHammingSquareRoot32Matrix<32, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x2) {
  TestHammingSquareRoot32Matrix<32, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x4) {
  TestHammingSquareRoot32Matrix<32, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x8) {
  TestHammingSquareRoot32Matrix<32, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x16) {
  TestHammingSquareRoot32Matrix<32, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot32_32x32) {
  TestHammingSquareRoot32Matrix<32, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x1) {
  TestHammingSquareRoot32Matrix<64, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x2) {
  TestHammingSquareRoot32Matrix<64, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x4) {
  TestHammingSquareRoot32Matrix<64, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x8) {
  TestHammingSquareRoot32Matrix<64, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x16) {
  TestHammingSquareRoot32Matrix<64, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x32) {
  TestHammingSquareRoot32Matrix<64, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot32_64x64) {
  TestHammingSquareRoot32Matrix<64, 64>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x1) {
  TestHammingSquareRoot32Matrix<128, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x2) {
  TestHammingSquareRoot32Matrix<128, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x4) {
  TestHammingSquareRoot32Matrix<128, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x8) {
  TestHammingSquareRoot32Matrix<128, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x16) {
  TestHammingSquareRoot32Matrix<128, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x32) {
  TestHammingSquareRoot32Matrix<128, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x64) {
  TestHammingSquareRoot32Matrix<128, 64>();
}

TEST(DistanceMatrix, HammingSquareRoot32_128x128) {
  TestHammingSquareRoot32Matrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void Hamming32Benchmark(void) {
  const size_t count = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * count;
  const size_t query_matrix_size = count * query_size;

  std::vector<uint32_t> matrix1(matrix_size);
  std::vector<uint32_t> matrix2(matrix_size);
  std::vector<uint32_t> query1(query_matrix_size);
  std::vector<uint32_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint32_t> dist(0, 0xfffffffful);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * count;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], count,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") UINT32 " << count << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint32_t *matrix_batch = &matrix2[i * batch_size * count];

    for (size_t j = 0; j < query_size; ++j) {
      const uint32_t *current_query = &query1[j * count];
      float *current_results = &results[j * batch_size];

      HammingDistanceMatrix<uint32_t, batch_size, 1>::Compute(
          matrix_batch, current_query, count * 32, current_results);
    }
  }
  std::cout << "* 1 Batched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint32_t *matrix_batch = &matrix2[i * batch_size * count];

    HammingDistanceMatrix<uint32_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], count * 32, results.data());
  }
  std::cout << "* N Batched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint32_t *matrix_batch = &matrix1[i * batch_size * count];

    for (size_t j = 0; j < query_size; ++j) {
      const uint32_t *current_query = &query1[j * count];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        HammingDistanceMatrix<uint32_t, 1, 1>::Compute(
            &matrix_batch[k * count], current_query, count * 32,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(DistanceMatrix, DISABLED_Hamming32_Benchmark) {
  Hamming32Benchmark<2, 1, 512, 64>();
  Hamming32Benchmark<2, 2, 512, 64>();
  Hamming32Benchmark<4, 1, 2048, 16>();
  Hamming32Benchmark<4, 2, 2048, 16>();
  Hamming32Benchmark<4, 4, 2048, 16>();
  Hamming32Benchmark<8, 1, 512, 64>();
  Hamming32Benchmark<8, 2, 512, 64>();
  Hamming32Benchmark<8, 4, 512, 64>();
  Hamming32Benchmark<8, 8, 512, 64>();
  Hamming32Benchmark<16, 1, 512, 64>();
  Hamming32Benchmark<16, 2, 512, 64>();
  Hamming32Benchmark<16, 4, 512, 64>();
  Hamming32Benchmark<16, 8, 512, 64>();
  Hamming32Benchmark<16, 16, 512, 64>();
  Hamming32Benchmark<32, 1, 512, 64>();
  Hamming32Benchmark<32, 2, 512, 64>();
  Hamming32Benchmark<32, 4, 512, 64>();
  Hamming32Benchmark<32, 8, 512, 64>();
  Hamming32Benchmark<32, 16, 512, 64>();
  Hamming32Benchmark<32, 32, 512, 64>();
  Hamming32Benchmark<64, 1, 512, 64>();
  Hamming32Benchmark<64, 2, 512, 64>();
  Hamming32Benchmark<64, 4, 512, 64>();
  Hamming32Benchmark<64, 8, 512, 64>();
  Hamming32Benchmark<128, 1, 512, 64>();
}

#if defined(AILEGO_M64)
template <size_t M, size_t N>
void TestHamming64Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t count = (std::uniform_int_distribution<size_t>(1, 512))(gen);
  size_t matrix_size = batch_size * count;
  size_t query_matrix_size = query_size * count;

  std::vector<uint64_t> matrix1(matrix_size);
  std::vector<uint64_t> matrix2(matrix_size);
  std::vector<uint64_t> query1(query_matrix_size);
  std::vector<uint64_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint64_t> dist(0, 0x7fffffffffffffffull);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint64_t *cur_query = &query1[i * count];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      HammingDistanceMatrix<uint64_t, 1, 1>::Compute(
          &matrix1[j * count], cur_query, count * 64, &query_result[j]);
    }
  }
  HammingDistanceMatrix<uint64_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], count * 64, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestHammingSquareRoot64Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t count = (std::uniform_int_distribution<size_t>(1, 512))(gen);
  size_t matrix_size = batch_size * count;
  size_t query_matrix_size = query_size * count;

  std::vector<uint64_t> matrix1(matrix_size);
  std::vector<uint64_t> matrix2(matrix_size);
  std::vector<uint64_t> query1(query_matrix_size);
  std::vector<uint64_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint64_t> dist(0, 0x7fffffffffffffffull);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), count, batch_size);
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint64_t *cur_query = &query1[i * count];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      HammingSquareRootDistanceMatrix<uint64_t, 1, 1>::Compute(
          &matrix1[j * count], cur_query, count * 64, &query_result[j]);
    }
  }
  HammingSquareRootDistanceMatrix<uint64_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], count * 64, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, Hamming64_1x1) {
  TestHamming64Matrix<1, 1>();
}

TEST(DistanceMatrix, Hamming64_2x1) {
  TestHamming64Matrix<2, 1>();
}

TEST(DistanceMatrix, Hamming64_2x2) {
  TestHamming64Matrix<2, 2>();
}

TEST(DistanceMatrix, Hamming64_3x3) {
  TestHamming64Matrix<3, 3>();
}

TEST(DistanceMatrix, Hamming64_4x1) {
  TestHamming64Matrix<4, 1>();
}

TEST(DistanceMatrix, Hamming64_4x2) {
  TestHamming64Matrix<4, 2>();
}

TEST(DistanceMatrix, Hamming64_4x4) {
  TestHamming64Matrix<4, 4>();
}

TEST(DistanceMatrix, Hamming64_8x1) {
  TestHamming64Matrix<8, 1>();
}

TEST(DistanceMatrix, Hamming64_8x2) {
  TestHamming64Matrix<8, 2>();
}

TEST(DistanceMatrix, Hamming64_8x4) {
  TestHamming64Matrix<8, 4>();
}

TEST(DistanceMatrix, Hamming64_8x8) {
  TestHamming64Matrix<8, 8>();
}

TEST(DistanceMatrix, Hamming64_16x1) {
  TestHamming64Matrix<16, 1>();
}

TEST(DistanceMatrix, Hamming64_16x2) {
  TestHamming64Matrix<16, 2>();
}

TEST(DistanceMatrix, Hamming64_16x4) {
  TestHamming64Matrix<16, 4>();
}

TEST(DistanceMatrix, Hamming64_16x8) {
  TestHamming64Matrix<16, 8>();
}

TEST(DistanceMatrix, Hamming64_16x16) {
  TestHamming64Matrix<16, 16>();
}

TEST(DistanceMatrix, Hamming64_32x1) {
  TestHamming64Matrix<32, 1>();
}

TEST(DistanceMatrix, Hamming64_32x2) {
  TestHamming64Matrix<32, 2>();
}

TEST(DistanceMatrix, Hamming64_32x4) {
  TestHamming64Matrix<32, 4>();
}

TEST(DistanceMatrix, Hamming64_32x8) {
  TestHamming64Matrix<32, 8>();
}

TEST(DistanceMatrix, Hamming64_32x16) {
  TestHamming64Matrix<32, 16>();
}

TEST(DistanceMatrix, Hamming64_32x32) {
  TestHamming64Matrix<32, 32>();
}

TEST(DistanceMatrix, Hamming64_64x1) {
  TestHamming64Matrix<64, 1>();
}

TEST(DistanceMatrix, Hamming64_64x2) {
  TestHamming64Matrix<64, 2>();
}

TEST(DistanceMatrix, Hamming64_64x4) {
  TestHamming64Matrix<64, 4>();
}

TEST(DistanceMatrix, Hamming64_64x8) {
  TestHamming64Matrix<64, 8>();
}

TEST(DistanceMatrix, Hamming64_64x16) {
  TestHamming64Matrix<64, 16>();
}

TEST(DistanceMatrix, Hamming64_64x32) {
  TestHamming64Matrix<64, 32>();
}

TEST(DistanceMatrix, Hamming64_64x64) {
  TestHamming64Matrix<64, 64>();
}

TEST(DistanceMatrix, Hamming64_128x1) {
  TestHamming64Matrix<128, 1>();
}

TEST(DistanceMatrix, Hamming64_128x2) {
  TestHamming64Matrix<128, 2>();
}

TEST(DistanceMatrix, Hamming64_128x4) {
  TestHamming64Matrix<128, 4>();
}

TEST(DistanceMatrix, Hamming64_128x8) {
  TestHamming64Matrix<128, 8>();
}

TEST(DistanceMatrix, Hamming64_128x16) {
  TestHamming64Matrix<128, 16>();
}

TEST(DistanceMatrix, Hamming64_128x32) {
  TestHamming64Matrix<128, 32>();
}

TEST(DistanceMatrix, Hamming64_128x64) {
  TestHamming64Matrix<128, 64>();
}

TEST(DistanceMatrix, Hamming64_128x128) {
  TestHamming64Matrix<128, 128>();
}

TEST(DistanceMatrix, HammingSquareRoot64_1x1) {
  TestHammingSquareRoot64Matrix<1, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_2x1) {
  TestHammingSquareRoot64Matrix<2, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_2x2) {
  TestHammingSquareRoot64Matrix<2, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_3x3) {
  TestHammingSquareRoot64Matrix<3, 3>();
}

TEST(DistanceMatrix, HammingSquareRoot64_4x1) {
  TestHammingSquareRoot64Matrix<4, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_4x2) {
  TestHammingSquareRoot64Matrix<4, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_4x4) {
  TestHammingSquareRoot64Matrix<4, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_8x1) {
  TestHammingSquareRoot64Matrix<8, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_8x2) {
  TestHammingSquareRoot64Matrix<8, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_8x4) {
  TestHammingSquareRoot64Matrix<8, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_8x8) {
  TestHammingSquareRoot64Matrix<8, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot64_16x1) {
  TestHammingSquareRoot64Matrix<16, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_16x2) {
  TestHammingSquareRoot64Matrix<16, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_16x4) {
  TestHammingSquareRoot64Matrix<16, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_16x8) {
  TestHammingSquareRoot64Matrix<16, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot64_16x16) {
  TestHammingSquareRoot64Matrix<16, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x1) {
  TestHammingSquareRoot64Matrix<32, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x2) {
  TestHammingSquareRoot64Matrix<32, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x4) {
  TestHammingSquareRoot64Matrix<32, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x8) {
  TestHammingSquareRoot64Matrix<32, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x16) {
  TestHammingSquareRoot64Matrix<32, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot64_32x32) {
  TestHammingSquareRoot64Matrix<32, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x1) {
  TestHammingSquareRoot64Matrix<64, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x2) {
  TestHammingSquareRoot64Matrix<64, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x4) {
  TestHammingSquareRoot64Matrix<64, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x8) {
  TestHammingSquareRoot64Matrix<64, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x16) {
  TestHammingSquareRoot64Matrix<64, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x32) {
  TestHammingSquareRoot64Matrix<64, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot64_64x64) {
  TestHammingSquareRoot64Matrix<64, 64>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x1) {
  TestHammingSquareRoot64Matrix<128, 1>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x2) {
  TestHammingSquareRoot64Matrix<128, 2>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x4) {
  TestHammingSquareRoot64Matrix<128, 4>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x8) {
  TestHammingSquareRoot64Matrix<128, 8>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x16) {
  TestHammingSquareRoot64Matrix<128, 16>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x32) {
  TestHammingSquareRoot64Matrix<128, 32>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x64) {
  TestHammingSquareRoot64Matrix<128, 64>();
}

TEST(DistanceMatrix, HammingSquareRoot64_128x128) {
  TestHammingSquareRoot64Matrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void Hamming64Benchmark(void) {
  const size_t count = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * count;
  const size_t query_matrix_size = count * query_size;

  std::vector<uint64_t> matrix1(matrix_size);
  std::vector<uint64_t> matrix2(matrix_size);
  std::vector<uint64_t> query1(query_matrix_size);
  std::vector<uint64_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint32_t> dist(0, 0x7ffffffful);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * count;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], count,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), count, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") UINT64 " << count << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint64_t *matrix_batch = &matrix2[i * batch_size * count];

    for (size_t j = 0; j < query_size; ++j) {
      const uint64_t *current_query = &query1[j * count];
      float *current_results = &results[j * batch_size];

      HammingDistanceMatrix<uint64_t, batch_size, 1>::Compute(
          matrix_batch, current_query, count * 64, current_results);
    }
  }
  std::cout << "* 1 Batched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // N Batched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint64_t *matrix_batch = &matrix2[i * batch_size * count];

    HammingDistanceMatrix<uint64_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], count * 64, results.data());
  }
  std::cout << "* N Batched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Hamming
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint64_t *matrix_batch = &matrix1[i * batch_size * count];

    for (size_t j = 0; j < query_size; ++j) {
      const uint64_t *current_query = &query1[j * count];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        HammingDistanceMatrix<uint64_t, 1, 1>::Compute(
            &matrix_batch[k * count], current_query, count * 64,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched Hamming (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(DistanceMatrix, DISABLED_Hamming64_Benchmark) {
  Hamming64Benchmark<2, 1, 512, 64>();
  Hamming64Benchmark<2, 2, 512, 64>();
  Hamming64Benchmark<4, 1, 2048, 16>();
  Hamming64Benchmark<4, 2, 2048, 16>();
  Hamming64Benchmark<4, 4, 2048, 16>();
  Hamming64Benchmark<8, 1, 512, 64>();
  Hamming64Benchmark<8, 2, 512, 64>();
  Hamming64Benchmark<8, 4, 512, 64>();
  Hamming64Benchmark<8, 8, 512, 64>();
  Hamming64Benchmark<16, 1, 512, 64>();
  Hamming64Benchmark<16, 2, 512, 64>();
  Hamming64Benchmark<16, 4, 512, 64>();
  Hamming64Benchmark<16, 8, 512, 64>();
  Hamming64Benchmark<16, 16, 512, 64>();
  Hamming64Benchmark<32, 1, 512, 64>();
  Hamming64Benchmark<32, 2, 512, 64>();
  Hamming64Benchmark<32, 4, 512, 64>();
  Hamming64Benchmark<32, 8, 512, 64>();
  Hamming64Benchmark<32, 16, 512, 64>();
  Hamming64Benchmark<32, 32, 512, 64>();
  Hamming64Benchmark<64, 1, 512, 64>();
  Hamming64Benchmark<64, 2, 512, 64>();
  Hamming64Benchmark<64, 4, 512, 64>();
  Hamming64Benchmark<64, 8, 512, 64>();
  Hamming64Benchmark<128, 1, 512, 64>();
}
#endif  // AILEGO_M64


================================================
FILE: tests/ailego/math/inner_product_matrix_fp16_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <iostream>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float InnerProductDistance(const FixedVector<Float16, N> &lhs,
                                  const FixedVector<Float16, N> &rhs) {
  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float MinusInnerProductDistance(const FixedVector<Float16, N> &lhs,
                                       const FixedVector<Float16, N> &rhs) {
  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, InnerProduct_General) {
  FixedVector<Float16, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,
                               555.0f, 0.8f,  5.5f,   3.75f, 9.0f,
                               6.6f,   0.1f,  8.8f,   0.2f,  5.6f},
      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(308441.62f,
                                        InnerProductDistance(x15, y15), 1000));

  FixedVector<Float16, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,
                               9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,
                               0.1f,  8.8f,  0.2f,   5.6f},
      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,
          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(308526.19f,
                                        InnerProductDistance(x16, y16), 1000));

  FixedVector<Float16, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,
                               1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,
                               3.4f, 4.5f,  5.6f, 1.6f,  1.3f},
      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,
          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(307.1762f,
                                        InnerProductDistance(x17, y17), 1000));

  FixedVector<Float16, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,
                               4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,
                               4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},
      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(378.67197f,
                                        InnerProductDistance(x18, y18), 1000));

  FixedVector<Float16, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,
                               2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,
                               5.6f,  1.6f,  2.3f,  1.11f,  2.3f},
      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(380.33203f,
                                        InnerProductDistance(x19, y19), 1000));

  FixedVector<Float16, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,
                               1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,
                               4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},
      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,
          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(222.23581f,
                                        InnerProductDistance(x20, y20), 1000));

  FixedVector<Float16, 21> x21{0.0f}, y21{0.0f};
  EXPECT_TRUE(
      MathHelper::IsAlmostEqual(0.0f, InnerProductDistance(x21, y21), 1000));
}

TEST(DistanceMatrix, MinusInnerProduct_General) {
  FixedVector<Float16, 1> x1{0.7f}, y1{0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -0.35009766f, MinusInnerProductDistance(x1, y1), 1000));

  FixedVector<Float16, 2> x2{2.0f, 3.76f}, y2{2.0f, 0.901f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -7.387093f, MinusInnerProductDistance(x2, y2), 1000));

  FixedVector<Float16, 3> x3{2.0f, 3.0f, 0.7f}, y3{2.0f, 3.0f, 2.0f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -14.400391f, MinusInnerProductDistance(x3, y3), 1000));

  FixedVector<Float16, 4> x4{7.8f, -8.9f, 9.0f, 5.6f},
      y4{7.8f, 8.9f, -9.0f, -0.1f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      99.89003f, MinusInnerProductDistance(x4, y4), 1000));

  FixedVector<Float16, 5> x5{7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y5{7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -222.16441f, MinusInnerProductDistance(x5, y5), 1000));

  FixedVector<Float16, 6> x6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -222.66985f, MinusInnerProductDistance(x6, y6), 1000));

  FixedVector<Float16, 7> x7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -249.9052f, MinusInnerProductDistance(x7, y7), 1000));

  FixedVector<Float16, 8> x8{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f, 5.6f},
      y8{5.22f, 0.711f, -7.8f, -8.9f, -9.0f, 0.1f, 0.2f, 0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      190.44284f, MinusInnerProductDistance(x8, y8), 1000));

  FixedVector<Float16, 9> x9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f,
                             6.6f,  0.1f,   0.2f, 5.6f},
      y9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 6.6f, 0.1f, 0.2f, 0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -295.20654f, MinusInnerProductDistance(x9, y9), 1000));

  FixedVector<Float16, 10> x10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f,
                               9.0f,  6.6f,   0.1f, 0.2f, 5.6f},
      y10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f, 9.0f, 6.6f, 0.1f, 0.2f, 0.522f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -325.57962f, MinusInnerProductDistance(x10, y10), 1000));

  FixedVector<Float16, 11> x11{2.3f,    -1.11f, 3.04f, 8.23f, 1.0f, 7.7f,
                               -11.11f, 2.3f,   3.4f,  4.5f,  5.6f},
      y11{2.3f,    1.11f, 3.04f, 8.23f, -1.0f, 7.7f,
          -11.11f, 2.3f,  3.4f,  4.5f,  0.511f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -302.63904f, MinusInnerProductDistance(x11, y11), 1000));

  FixedVector<Float16, 12> x12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,
                               7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},
      y12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,
          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -309.60065f, MinusInnerProductDistance(x12, y12), 1000));

  FixedVector<Float16, 13> x13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,
                               7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},
      y13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,
          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  3.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -346.13144f, MinusInnerProductDistance(x13, y13), 1000));

  FixedVector<Float16, 14> x14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,
                               3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 5.6f},
      y14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -417.96613f, MinusInnerProductDistance(x14, y14), 1000));

  FixedVector<Float16, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,
                               555.0f, 0.8f,  5.5f,   3.75f, 9.0f,
                               6.6f,   0.1f,  8.8f,   0.2f,  5.6f},
      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -308441.62f, MinusInnerProductDistance(x15, y15), 1000));

  FixedVector<Float16, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,
                               9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,
                               0.1f,  8.8f,  0.2f,   5.6f},
      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,
          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -308526.19f, MinusInnerProductDistance(x16, y16), 1000));

  FixedVector<Float16, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,
                               1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,
                               3.4f, 4.5f,  5.6f, 1.6f,  1.3f},
      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,
          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -307.17618f, MinusInnerProductDistance(x17, y17), 1000));

  FixedVector<Float16, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,
                               4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,
                               4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},
      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -378.67197f, MinusInnerProductDistance(x18, y18), 1000));

  FixedVector<Float16, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,
                               2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,
                               5.6f,  1.6f,  2.3f,  1.11f,  2.3f},
      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -380.33203f, MinusInnerProductDistance(x19, y19), 1000));

  FixedVector<Float16, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,
                               1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,
                               4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},
      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,
          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      -222.23581f, MinusInnerProductDistance(x20, y20), 1000));

  FixedVector<Float16, 21> x21{0.0f}, y21{0.0f};
  EXPECT_TRUE(MathHelper::IsAlmostEqual(
      0.0f, MinusInnerProductDistance(x21, y21), 1000));
}

template <size_t M, size_t N>
void TestInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      InnerProductMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  InnerProductMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));
  }
}

template <size_t M, size_t N>
void TestMinusInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MinusInnerProductMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  MinusInnerProductMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_TRUE(MathHelper::IsAlmostEqual(result1[i], result2[i], 10000));
  }
}

TEST(DistanceMatrix, InnerProduct_1x1) {
  TestInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x1) {
  TestInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x2) {
  TestInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, InnerProduct_3x3) {
  TestInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, InnerProduct_4x1) {
  TestInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, InnerProduct_4x2) {
  TestInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, InnerProduct_4x4) {
  TestInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x1) {
  TestInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, InnerProduct_8x2) {
  TestInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, InnerProduct_8x4) {
  TestInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x8) {
  TestInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x1) {
  TestInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, InnerProduct_16x2) {
  TestInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, InnerProduct_16x4) {
  TestInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, InnerProduct_16x8) {
  TestInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x16) {
  TestInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x1) {
  TestInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, InnerProduct_32x2) {
  TestInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, InnerProduct_32x4) {
  TestInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, InnerProduct_32x8) {
  TestInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, InnerProduct_32x16) {
  TestInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x32) {
  TestInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x1) {
  TestInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, InnerProduct_64x2) {
  TestInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, InnerProduct_64x4) {
  TestInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, InnerProduct_64x8) {
  TestInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, InnerProduct_64x16) {
  TestInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, InnerProduct_64x32) {
  TestInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x64) {
  TestInnerProductMatrix<64, 64>();
}

TEST(DistanceMatrix, InnerProduct_128x1) {
  TestInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, InnerProduct_128x2) {
  TestInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, InnerProduct_128x4) {
  TestInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, InnerProduct_128x8) {
  TestInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, InnerProduct_128x16) {
  TestInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, InnerProduct_128x32) {
  TestInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, InnerProduct_128x64) {
  TestInnerProductMatrix<128, 64>();
}

TEST(DistanceMatrix, InnerProduct_128x128) {
  TestInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_1x1) {
  TestMinusInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x1) {
  TestMinusInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x2) {
  TestMinusInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_3x3) {
  TestMinusInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x1) {
  TestMinusInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x2) {
  TestMinusInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x4) {
  TestMinusInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x1) {
  TestMinusInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x2) {
  TestMinusInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x4) {
  TestMinusInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x8) {
  TestMinusInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x1) {
  TestMinusInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x2) {
  TestMinusInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x4) {
  TestMinusInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x8) {
  TestMinusInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x16) {
  TestMinusInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x1) {
  TestMinusInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x2) {
  TestMinusInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x4) {
  TestMinusInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x8) {
  TestMinusInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x16) {
  TestMinusInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x32) {
  TestMinusInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x1) {
  TestMinusInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x2) {
  TestMinusInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x4) {
  TestMinusInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x8) {
  TestMinusInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x16) {
  TestMinusInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x32) {
  TestMinusInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x64) {
  TestMinusInnerProductMatrix<64, 64>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x1) {
  TestMinusInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x2) {
  TestMinusInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x4) {
  TestMinusInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x8) {
  TestMinusInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x16) {
  TestMinusInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x32) {
  TestMinusInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x64) {
  TestMinusInnerProductMatrix<128, 64>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x128) {
  TestMinusInnerProductMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void InnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      InnerProductMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    InnerProductMatrix<Float16, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        InnerProductMatrix<Float16, 1, 1>::Compute(&matrix_batch[k * dimension],
                                                   current_query, dimension,
                                                   &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void MinusInnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MinusInnerProductMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    MinusInnerProductMatrix<Float16, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MinusInnerProductMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {
  InnerProductBenchmark<2, 1, 512, 64>();
  InnerProductBenchmark<2, 2, 512, 64>();
  InnerProductBenchmark<4, 1, 512, 64>();
  InnerProductBenchmark<4, 2, 512, 64>();
  InnerProductBenchmark<4, 4, 512, 64>();
  InnerProductBenchmark<8, 1, 512, 64>();
  InnerProductBenchmark<8, 2, 512, 64>();
  InnerProductBenchmark<8, 4, 512, 64>();
  InnerProductBenchmark<8, 8, 512, 64>();
  InnerProductBenchmark<16, 1, 512, 64>();
  InnerProductBenchmark<16, 2, 512, 64>();
  InnerProductBenchmark<16, 4, 512, 64>();
  InnerProductBenchmark<16, 8, 512, 64>();
  InnerProductBenchmark<16, 16, 512, 64>();
  InnerProductBenchmark<32, 1, 512, 64>();
  InnerProductBenchmark<32, 2, 512, 64>();
  InnerProductBenchmark<32, 4, 512, 64>();
  InnerProductBenchmark<32, 8, 512, 64>();
  InnerProductBenchmark<32, 16, 512, 64>();
  InnerProductBenchmark<32, 32, 512, 64>();
  InnerProductBenchmark<64, 1, 512, 64>();
  InnerProductBenchmark<64, 2, 512, 64>();
  InnerProductBenchmark<64, 4, 512, 64>();
  InnerProductBenchmark<64, 8, 512, 64>();
  InnerProductBenchmark<128, 1, 512, 64>();
  InnerProductBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {
  MinusInnerProductBenchmark<2, 1, 512, 64>();
  MinusInnerProductBenchmark<2, 2, 512, 64>();
  MinusInnerProductBenchmark<4, 1, 512, 64>();
  MinusInnerProductBenchmark<4, 2, 512, 64>();
  MinusInnerProductBenchmark<4, 4, 512, 64>();
  MinusInnerProductBenchmark<8, 1, 512, 64>();
  MinusInnerProductBenchmark<8, 2, 512, 64>();
  MinusInnerProductBenchmark<8, 4, 512, 64>();
  MinusInnerProductBenchmark<8, 8, 512, 64>();
  MinusInnerProductBenchmark<16, 1, 512, 64>();
  MinusInnerProductBenchmark<16, 2, 512, 64>();
  MinusInnerProductBenchmark<16, 4, 512, 64>();
  MinusInnerProductBenchmark<16, 8, 512, 64>();
  MinusInnerProductBenchmark<16, 16, 512, 64>();
  MinusInnerProductBenchmark<32, 1, 512, 64>();
  MinusInnerProductBenchmark<32, 2, 512, 64>();
  MinusInnerProductBenchmark<32, 4, 512, 64>();
  MinusInnerProductBenchmark<32, 8, 512, 64>();
  MinusInnerProductBenchmark<32, 16, 512, 64>();
  MinusInnerProductBenchmark<32, 32, 512, 64>();
  MinusInnerProductBenchmark<64, 1, 512, 64>();
  MinusInnerProductBenchmark<64, 2, 512, 64>();
  MinusInnerProductBenchmark<64, 4, 512, 64>();
  MinusInnerProductBenchmark<64, 8, 512, 64>();
  MinusInnerProductBenchmark<128, 1, 512, 64>();
  MinusInnerProductBenchmark<1, 1, 1024, 256>();
}

TEST(DistanceMatrix, DISABLED_MinusInnerProduct_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 100000000LLU;

  std::vector<Float16> data(dimension);
  std::vector<Float16> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    MinusInnerProductMatrix<Float16, 1, 1>::Compute(&data[0], &query[0],
                                                    dimension, &result);
  }
}

static inline float SparseDistanceCommon(uint32_t count1, uint32_t *index1,
                                         Float16 *value1, uint32_t count2,
                                         uint32_t *index2, Float16 *value2) {
  float result{0.0f};

  size_t m = 0;
  size_t q = 0;
  while (m < count1 && q < count2) {
    if (index1[m] == index2[q]) {
      result += value1[m] * value2[q];

      ++m;
      ++q;
    } else if (index1[m] < index2[q]) {
      ++m;
    } else {
      ++q;
    }
  }

  return result;
}

void TestInnerProductSparse(void) {
  // test 1
  const uint32_t sparse_vec_count_0 = 52;
  uint32_t sparse_vec_index_0[] = {
      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,
      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,
      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,
      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,
      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};
  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{
      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,
      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,
      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,
      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,
      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,
      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,
      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,
      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,
      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,
      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,
      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,
      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,
      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};

  const uint32_t sparse_vec_count_1 = 43;
  uint32_t sparse_vec_index_1[] = {
      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,
      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,
      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,
      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};
  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{
      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,
      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,
      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,
      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,
      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,
      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,
      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,
      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,
      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,
      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,
      0.247628793044,  -0.984985692607,  -0.427929860028};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),
      sparse_query_buffer_1);

  float result0{0.0f};
  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,
                                 sparse_vec_value_0.data(), sparse_vec_count_1,
                                 sparse_vec_index_1, sparse_vec_value_1.data());

  float result1{0.0f};
  MinusInnerProductSparseMatrix<Float16>::Compute(
      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);
  result1 = -result1;

  EXPECT_GE(0.00001, std::abs(result0 - result1));

  // test 2
  constexpr uint32_t sparse_vec_count_2 = 49;
  uint32_t sparse_vec_index_2[] = {
      13200,  20900,  36300,  41800,  50600,  74800,  78100,  81400,  93500,
      99000,  107800, 121000, 127600, 137500, 140800, 143000, 145200, 166100,
      174900, 193600, 194700, 195800, 233200, 261800, 262900, 273900, 277200,
      299200, 302500, 343200, 381700, 387200, 418000, 421300, 436700, 449900,
      480700, 510400, 586300, 596200, 603900, 607200, 612700, 625900, 632500,
      633600, 639100, 642400, 650100};
  FixedVector<Float16, sparse_vec_count_2> sparse_vec_value_2{
      0.167493264953,  0.178347102375,   0.61850792017,    0.707662206696,
      -0.604456492928, 0.898905062153,   -0.971984671516,  -0.337950525868,
      -0.942538751319, -0.115612454156,  0.78433412971,    0.601522288928,
      -0.640321042923, -0.235673191423,  0.00632807223978, 0.629970437467,
      0.966519256786,  -0.279362437157,  0.396153064627,   -0.614592812875,
      -0.642157513141, 0.686723258138,   0.10227967727,    -0.5921196708,
      0.499411577177,  -0.0188556369919, 0.512245212443,   0.424666758023,
      0.299827154891,  -0.615468257454,  -0.0499098903374, -0.54873640329,
      0.899673049133,  -0.873237346565,  0.463117084808,   -0.810200151551,
      0.676836615658,  0.596247430713,   0.946225552468,   0.968425796351,
      -0.821041580744, -0.697734977387,  0.295618053879,   -0.476597945375,
      -0.246035224835, 0.927603570489,   -0.640242995569,  0.610224433234,
      -0.657550506633};

  constexpr uint32_t sparse_vec_count_3 = 58;
  uint32_t sparse_vec_index_3[] = {
      13200,  19800,  37400,  56100,  68200,  78100,  81400,  99000,  103400,
      107800, 108900, 110000, 111100, 125400, 127600, 137500, 141900, 151800,
      154000, 155100, 158400, 163900, 165000, 173800, 198000, 201300, 215600,
      247500, 249700, 264000, 269500, 287100, 291500, 311300, 312400, 336600,
      353100, 354200, 361900, 367400, 390500, 398200, 407000, 414700, 424600,
      510400, 533500, 535700, 551100, 556600, 568700, 576400, 577500, 590700,
      592900, 618200, 631400, 636900};
  FixedVector<Float16, sparse_vec_count_3> sparse_vec_value_3{
      0.175769744964,  -0.198506965419,  0.0842021015107, 0.544957076263,
      0.0856447356878, 0.838582935178,   0.796525374862,  -0.931940801441,
      0.555150441425,  0.957490431546,   -0.422126167235, -0.40903200281,
      0.242643233475,  0.698565387541,   -0.325754491857, 0.540403772154,
      -0.449888493042, 0.349262051644,   -0.612943655195, 0.874112675658,
      0.943939922271,  -0.994946966212,  -0.978705162429, 0.321190597007,
      0.17722019302,   0.6041089417,     -0.353184098327, -0.938569390092,
      -0.92268220981,  -0.268600478592,  -0.598069229627, 0.0720175726713,
      0.426800021137,  0.369250757861,   -0.823348360327, -0.664061107875,
      -0.418342805261, -0.430818720049,  0.0941988181812, 0.0765632945538,
      -0.148533061047, 0.404665036566,   -0.170747760502, -0.206564280292,
      0.311035754032,  0.498520039471,   -0.16255148444,  -0.137950933749,
      -0.234990864629, 0.602901363949,   0.0297103943437, -0.730955584059,
      0.117169059405,  -0.0746546228896, 0.39067258928,   -0.214782717972,
      -0.111009971497, -0.87766242691};

  std::string sparse_query_buffer_2;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2.data(),
      sparse_query_buffer_2);

  std::string sparse_query_buffer_3;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3.data(),
      sparse_query_buffer_3);

  float result2{0.0f};
  result2 = SparseDistanceCommon(sparse_vec_count_2, sparse_vec_index_2,
                                 sparse_vec_value_2.data(), sparse_vec_count_3,
                                 sparse_vec_index_3, sparse_vec_value_3.data());

  float result3{0.0f};
  MinusInnerProductSparseMatrix<Float16>::Compute(
      sparse_query_buffer_2.data(), sparse_query_buffer_3.data(), &result3);
  result3 = -result3;

  EXPECT_GE(0.00001, std::abs(result2 - result3));
}

void TestInnerProductSparseMore(void) {
  std::vector<uint32_t> sparse_vec_counts;
  std::vector<uint32_t *> sparse_vec_indices;
  std::vector<Float16 *> sparse_vec_values;

  const uint32_t sparse_vec_count_0 = 173;
  uint32_t sparse_vec_index_0[] = {
      1012,  1996,  2001,  2018,  2020,  2036,  2037,  2056,  2058,  2069,
      2111,  2116,  2138,  2162,  2166,  2245,  2253,  2259,  2306,  2307,
      2318,  2331,  2351,  2359,  2390,  2419,  2426,  2428,  2466,  2470,
      2535,  2554,  2557,  2568,  2590,  2622,  2671,  2739,  2765,  2812,
      2817,  2837,  2913,  2920,  3003,  3092,  3112,  3125,  3144,  3214,
      3241,  3249,  3260,  3268,  3271,  3278,  3280,  3330,  3463,  3478,
      3716,  3739,  3768,  3800,  3908,  3934,  3992,  4028,  4045,  4072,
      4146,  4254,  4301,  4382,  4454,  4471,  4504,  4517,  4598,  4806,
      4807,  4847,  4928,  4988,  5081,  5113,  5177,  5190,  5197,  5201,
      5234,  5456,  5621,  5689,  5792,  5817,  5823,  5875,  5920,  5921,
      5951,  5968,  6033,  6112,  6145,  6215,  6344,  6396,  6429,  6438,
      6529,  6627,  6691,  6731,  6801,  6865,  6950,  7036,  7128,  7155,
      7461,  7551,  7596,  7691,  7784,  7789,  7848,  7857,  8044,  8052,
      8053,  8553,  8573,  8664,  8817,  8826,  9250,  9273,  9593,  9727,
      10013, 10106, 10617, 10639, 10753, 11657, 12108, 13128, 13463, 13702,
      13787, 14152, 14332, 15237, 15313, 15359, 15699, 16724, 17171, 17571,
      17669, 20168, 20805, 20972, 22134, 22229, 22779, 24762, 24823, 25526,
      25699, 26761, 27885};
  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{
      0.36311877,  0.10386213,  0.64821976,   0.26300138,    0.29727572,
      0.047292523, 0.022334402, 0.118793316,  0.7198291,     0.73566943,
      0.19491579,  0.5763569,   0.5245229,    0.022828134,   0.43562022,
      0.6946562,   0.09275672,  0.9687072,    0.1751608,     0.09703954,
      0.18717986,  0.43182945,  0.055112287,  0.0021027816,  0.13972417,
      0.1019873,   0.8679199,   0.26797894,   0.097350314,   0.5125363,
      0.2829703,   0.052232087, 0.3248494,    1.1258097,     0.90756655,
      1.6490538,   0.45066822,  0.004210417,  0.028443621,   0.41171393,
      0.09246816,  0.053040083, 0.052729037,  0.00041907438, 0.32047704,
      0.2290303,   1.3542659,   0.28811434,   1.1722984,     0.4484738,
      0.73670006,  0.22390367,  0.0058781556, 0.48173144,    0.76392287,
      0.32048634,  0.42589885,  0.8624791,    0.0376546,     0.56702816,
      0.002337549, 1.5856861,   0.14177673,   0.22762497,    0.6601752,
      1.0603137,   0.914821,    0.34792075,   1.4387932,     0.035774633,
      0.04391008,  0.7179224,   0.49199906,   0.043692447,   1.1404462,
      0.47572234,  0.22777049,  0.7626374,    0.59730506,    1.4541638,
      1.6540457,   0.089919806, 0.0050144624, 0.15902519,    0.2989032,
      0.121926464, 0.11911,     0.27476037,   1.2774497,     0.42462146,
      0.30179682,  0.18773684,  0.82144237,   1.2033592,     0.07180116,
      0.06378868,  0.029040875, 0.2089903,    0.03591103,    0.94913304,
      0.18240769,  0.9050947,   0.0034226696, 1.2841027,     0.629526,
      0.06401547,  1.0698998,   0.11138009,   0.20497903,    0.017457427,
      0.6316996,   0.12303611,  0.01563728,   0.090583175,   0.23981698,
      0.48518667,  0.6207808,   1.8336427,    2.3282833,     0.8153351,
      0.026216522, 0.6143031,   0.17374748,   0.32929608,    0.33730298,
      1.1497657,   0.1926745,   0.14235665,   1.1076177,     0.945609,
      0.48826388,  0.10458124,  0.19699246,   0.20899634,    0.44853806,
      0.26411146,  0.7495864,   1.3681723,    1.4299264,     0.037516754,
      0.17946614,  0.98060745,  0.055851664,  0.2002921,     0.45136684,
      0.33716172,  0.58752763,  0.34051904,   1.9018586,     0.20597915,
      0.82819384,  0.23866963,  0.4160662,    0.11889692,    0.172538,
      0.005433464, 0.089198045, 0.3896585,    0.74038976,    0.24974349,
      0.044961147, 0.32671204,  0.044312827,  0.25430596,    0.021065181,
      0.071978964, 1.992692,    0.02640776,   1.7344381,     0.09561436,
      0.07097204,  0.2922402,   0.8794989};

  const uint32_t sparse_vec_count_1 = 144;
  uint32_t sparse_vec_index_1[] = {
      1012,  1016,  1059,  1996,  2001,  2020,  2049,  2068,  2076,  2088,
      2109,  2138,  2145,  2149,  2162,  2203,  2220,  2224,  2256,  2259,
      2318,  2373,  2381,  2390,  2393,  2419,  2462,  2466,  2485,  2506,
      2554,  2557,  2580,  2590,  2622,  2633,  2645,  2671,  2716,  2724,
      2900,  2942,  2943,  3003,  3029,  3092,  3112,  3125,  3260,  3271,
      3278,  3283,  3288,  3439,  3466,  3478,  3521,  3578,  3594,  3595,
      3607,  3647,  3690,  3800,  3826,  3896,  3908,  3934,  3947,  3987,
      4045,  4068,  4204,  4254,  4255,  4302,  4329,  4471,  4504,  4517,
      4566,  4736,  4762,  4789,  5081,  5094,  5105,  5195,  5197,  5201,
      5233,  5234,  5584,  5817,  5823,  5832,  5875,  5951,  5968,  6033,
      6035,  6179,  6215,  6245,  6383,  6394,  6396,  6529,  6613,  6691,
      6801,  7091,  7128,  7155,  7240,  7461,  7551,  7596,  7691,  7738,
      7784,  8027,  8144,  8192,  8249,  8309,  8573,  8647,  8826,  9379,
      9593,  9767,  10400, 10461, 10530, 11028, 12799, 13787, 14487, 14670,
      15237, 15523, 20168, 25755};
  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{
      0.3815109,   0.21950184,   0.389138,    0.03037462,  0.738938,
      0.11151163,  0.21257511,   0.008723602, 0.42403504,  0.17748593,
      0.38613674,  0.38208488,   0.49048766,  0.056615792, 1.285813,
      1.1482359,   0.016783785,  0.7362169,   0.21784282,  1.0905122,
      0.37420613,  0.81915,      0.67411584,  0.35778007,  0.80538017,
      0.10094925,  1.2726786,    0.12334787,  0.18297458,  0.13315988,
      0.041079145, 0.2655652,    0.10946682,  0.6782494,   1.7451618,
      0.17126456,  0.17718226,   0.7430134,   0.9090848,   0.31985787,
      0.21779177,  0.13639484,   1.2293936,   0.065131165, 0.03718982,
      0.64121664,  0.46517274,   0.39498892,  0.07401267,  1.2061241,
      0.1276834,   0.059918232,  1.1935436,   0.61886644,  0.32731527,
      0.37830237,  1.0287925,    0.09565632,  0.4313508,   0.03845683,
      0.066990376, 0.10886483,   0.097683005, 0.29624575,  0.48645914,
      0.250733,    0.03274726,   1.205507,    0.048636433, 0.034002367,
      0.83021015,  0.044592205,  0.06007409,  1.1224703,   0.45620173,
      0.16457361,  0.053571727,  0.12527509,  0.1308366,   0.92323685,
      0.7821679,   0.23838642,   0.2558486,   0.09402168,  0.22815736,
      0.51750314,  0.08442147,   0.5565446,   0.3642559,   0.6661639,
      0.73750395,  0.17278494,   0.05865512,  0.013724559, 0.023783961,
      0.04283593,  0.24765956,   0.3991119,   1.5201892,   0.035530984,
      0.049782272, 0.06485597,   0.5367931,   0.15097857,  0.014405596,
      0.14585418,  0.22106051,   0.49575308,  0.08290891,  0.17875223,
      0.21095915,  0.0038430362, 2.3110201,   0.6543391,   0.06421487,
      0.3782336,   0.3514111,    0.5225064,   0.21472597,  0.07987356,
      0.06002587,  1.5242931,    0.081204355, 0.32025364,  0.39068836,
      0.027896391, 0.2872351,    0.50436527,  0.5434884,   1.653683,
      1.444315,    0.988968,     0.024239752, 0.055084217, 0.074782506,
      0.021114044, 0.07288233,   0.822755,    0.10772858,  0.6189507,
      0.29534152,  0.20032129,   0.5609191,   1.2844883};

  const uint32_t sparse_vec_count_2 = 153;
  uint32_t sparse_vec_index_2[] = {
      1012,  1059,  1996,  2001,  2020,  2049,  2052,  2055,  2056,  2081,
      2088,  2124,  2138,  2156,  2158,  2162,  2191,  2231,  2242,  2256,
      2259,  2311,  2318,  2359,  2373,  2381,  2390,  2437,  2458,  2466,
      2477,  2510,  2554,  2580,  2590,  2622,  2640,  2671,  2689,  2825,
      2844,  2881,  2904,  2957,  3029,  3112,  3125,  3144,  3214,  3246,
      3271,  3312,  3330,  3399,  3443,  3478,  3578,  3595,  3647,  3697,
      3740,  3800,  3817,  3818,  3928,  3934,  3987,  4034,  4072,  4079,
      4172,  4204,  4254,  4255,  4302,  4517,  4526,  4695,  4706,  4795,
      4807,  4986,  5081,  5091,  5113,  5195,  5197,  5234,  5253,  5263,
      5623,  5646,  5656,  5817,  5875,  5951,  5954,  5968,  6033,  6061,
      6108,  6119,  6157,  6213,  6215,  6287,  6384,  6396,  6461,  6469,
      6613,  6801,  6842,  7128,  7240,  7305,  7477,  7551,  7596,  7609,
      7624,  7723,  7779,  7857,  7935,  8144,  8238,  8249,  8275,  8547,
      8573,  8647,  8826,  8927,  9036,  9491,  9593,  9767,  10267, 10461,
      10505, 10660, 10721, 11028, 12578, 13787, 14487, 14874, 15523, 20168,
      21565, 24212, 25628};
  FixedVector<Float16, sparse_vec_count_2> sparse_vec_value_2{
      0.19194126,  0.11344757,   0.21317342,  0.6771587,    0.08591107,
      0.006228663, 0.28981656,   0.58056134,  0.064362876,  0.5794717,
      0.4288167,   0.59527594,   0.6106896,   0.23139843,   0.897008,
      0.20689227,  0.28713426,   0.38175523,  0.4028853,    0.08509491,
      1.0562526,   0.1165676,    0.06347306,  0.41331312,   0.16935593,
      0.1626863,   0.29352358,   0.45827967,  0.21193665,   0.39532298,
      0.0789344,   0.026420705,  0.1763078,   0.18424834,   0.7216729,
      1.6683924,   0.06257952,   0.13419773,  0.6851299,    1.2139059,
      0.092483185, 0.10803583,   0.74339646,  0.14461784,   0.2389669,
      0.9306581,   0.5645601,    0.83565444,  0.11930474,   0.22862941,
      0.6214566,   0.0033283439, 0.42018214,  0.15267797,   0.029068783,
      0.24103808,  0.18765616,   0.11574381,  0.31545344,   0.09386852,
      0.038362045, 0.7730324,    0.4456206,   0.20152733,   0.94718367,
      1.1934134,   0.12610391,   0.014013804, 0.47198555,   0.21791361,
      0.05394335,  0.08415188,   0.066486694, 0.47462225,   0.16693182,
      0.9021425,   0.27905586,   0.09939155,  0.12642553,   0.27529165,
      0.024804203, 0.24346212,   0.25561446,  1.4675297,    0.21566682,
      0.5453194,   0.21558505,   0.21294887,  0.2740208,    0.43185237,
      0.2280337,   0.0048945076, 0.26826337,  0.016979327,  0.3338952,
      0.23080347,  0.21200272,   1.3268396,   0.05323057,   0.30005422,
      0.088871606, 0.13259241,   0.04766706,  0.0017769856, 0.2698414,
      0.08068406,  0.38578644,   0.09752118,  0.13972333,   0.0731375,
      0.36664346,  0.12214721,   0.1541759,   2.2295072,    0.22542699,
      0.028530587, 0.022988612,  0.35836184,  0.10530607,   0.53756726,
      0.05818686,  0.044951066,  0.05753079,  0.09009998,   0.24644017,
      0.22693348,  0.0019512648, 0.035316195, 0.057344455,  0.36419895,
      0.1534858,   0.18924302,   0.38702026,  1.2569604,    0.07787755,
      1.7163913,   1.1903315,    0.8173934,   0.13888475,   0.10908335,
      0.35437793,  0.15787303,   0.25039884,  0.130508,     0.09830101,
      0.5841259,   0.22020355,   0.37849018,  0.14054261,   0.5179198,
      1.1891438,   0.44022372,   0.1794719};

  const uint32_t sparse_vec_count_3 = 166;
  uint32_t sparse_vec_index_3[] = {
      1012,  1059,  1996,  1997,  2001,  2020,  2034,  2076,  2086,  2104,
      2138,  2149,  2162,  2170,  2171,  2220,  2231,  2236,  2259,  2311,
      2315,  2318,  2328,  2343,  2344,  2359,  2381,  2390,  2419,  2458,
      2462,  2466,  2472,  2479,  2491,  2510,  2557,  2558,  2565,  2580,
      2590,  2622,  2724,  2764,  2817,  2837,  2881,  2900,  2911,  2933,
      2949,  3003,  3029,  3058,  3092,  3101,  3125,  3188,  3271,  3330,
      3386,  3399,  3434,  3447,  3474,  3478,  3578,  3595,  3607,  3650,
      3690,  3740,  3779,  3800,  3817,  3818,  3826,  3910,  3918,  3934,
      3987,  3992,  4006,  4034,  4068,  4075,  4114,  4146,  4172,  4255,
      4302,  4327,  4503,  4517,  4758,  4883,  4944,  4975,  5036,  5195,
      5205,  5218,  5233,  5234,  5253,  5456,  5623,  5656,  5687,  5817,
      5875,  5951,  5954,  5968,  6059,  6119,  6145,  6157,  6215,  6262,
      6384,  6394,  6613,  6787,  6801,  6842,  6993,  7128,  7156,  7240,
      7305,  7421,  7551,  7596,  7676,  7935,  8547,  8573,  8647,  8773,
      8826,  8886,  8911,  9036,  9274,  9433,  9593,  9767,  9915,  10267,
      10461, 10505, 11028, 11274, 11593, 13058, 13787, 14487, 15237, 17060,
      20168, 21695, 23041, 24363, 25526, 25755};
  FixedVector<Float16, sparse_vec_count_3> sparse_vec_value_3{
      0.17927244,   0.20557176,   0.40560228,   0.32370853,  0.8060634,
      0.21424179,   1.0674698,    0.6046889,    0.21051478,  0.46186206,
      0.24661283,   0.5616991,    1.016811,     0.2618776,   0.9686127,
      0.869671,     0.1458332,    0.60725594,   1.206012,    0.10357225,
      0.4350595,    0.83702874,   0.146196,     0.8644738,   0.15587087,
      0.16456357,   0.36376593,   1.053665,     0.06609649,  0.6504239,
      0.9697015,    0.04947369,   0.43753505,   0.04289205,  0.42075413,
      0.330524,     0.1743388,    0.6540892,    0.012900644, 0.23207273,
      0.2674499,    1.9736407,    0.21540764,   0.63648874,  0.049446102,
      0.3750183,    0.17441651,   0.123951435,  0.015306404, 0.1767618,
      0.24109434,   0.4245122,    0.114403255,  0.91849947,  0.12018716,
      0.01165807,   0.47680765,   0.036503244,  0.5782868,   0.9163635,
      0.27396393,   0.16385026,   0.052631885,  0.72294754,  0.4022935,
      0.06351255,   0.27786675,   0.25394455,   0.08041568,  1.3137422,
      0.5514297,    0.2503315,    0.009040705,  0.40985608,  0.27673048,
      0.14055687,   0.50529444,   0.6049716,    1.0692317,   1.207644,
      0.108388424,  0.9495853,    0.35366973,   0.3762234,   0.19875458,
      0.14685634,   0.0060924664, 1.0126622,    0.034943417, 0.49489433,
      0.34451365,   0.21992311,   0.7039926,    0.9501215,   0.34629604,
      0.20126931,   0.23908958,   0.019030606,  0.12528977,  0.6009518,
      0.056694727,  0.19225678,   0.61745095,   0.26769277,  0.18739952,
      0.10380342,   0.08536158,   0.18679029,   0.040631995, 0.23538794,
      0.081166975,  0.3206779,    0.0018739193, 1.5819491,   0.07052032,
      0.2504746,    0.7514167,    0.06575893,   0.08000714,  0.0012445971,
      0.23989597,   0.12001178,   0.51009554,   0.14469045,  0.12445986,
      0.08644873,   0.5645543,    2.539498,     0.54383165,  0.22437337,
      0.0018195114, 0.11787724,   0.34932667,   0.49611032,  0.24439196,
      0.100613214,  0.2844197,    0.38720158,   0.22204469,  0.078220785,
      0.76444066,   1.7794204,    0.17640579,   0.04227443,  0.28023362,
      0.06434563,   1.320367,     0.9287479,    0.14726646,  0.27983913,
      0.022449814,  0.09246922,   0.22375125,   0.10417365,  0.034148056,
      0.12830476,   0.6065902,    0.16593556,   0.25840235,  0.2596266,
      0.6388732,    1.6666834,    0.030998405,  0.14869562,  0.30502653,
      1.183558};

  const uint32_t sparse_vec_count_4 = 104;
  uint32_t sparse_vec_index_4[] = {
      1012,  1996,  1997,  2001,  2033,  2034,  2080,  2120,  2142,  2149,
      2220,  2231,  2259,  2284,  2318,  2338,  2381,  2405,  2424,  2436,
      2458,  2472,  2533,  2544,  2557,  2580,  2609,  2622,  2627,  2688,
      2800,  2820,  2837,  2862,  2932,  2949,  3029,  3036,  3181,  3390,
      3439,  3690,  3780,  3784,  3818,  3872,  3931,  3934,  4034,  4037,
      4075,  4219,  4348,  4517,  4573,  4617,  4773,  4809,  4822,  4879,
      5234,  5272,  5851,  5968,  6119,  6378,  6396,  6613,  6702,  6728,
      6787,  7128,  7156,  7240,  7479,  7551,  7596,  7692,  7809,  8027,
      8249,  8264,  8299,  8573,  8826,  9123,  9152,  9274,  9445,  9593,
      9915,  11377, 11744, 12935, 13308, 14487, 14947, 15720, 17060, 17669,
      18079, 18629, 19841, 21053};
  FixedVector<Float16, sparse_vec_count_4> sparse_vec_value_4{
      0.2030336,   0.1411735,   0.12635018,  0.45823106,  0.22794029,
      1.4105916,   0.2769118,   0.75515395,  0.07748295,  0.19260094,
      0.12458416,  0.065163694, 0.9765741,   0.07470863,  0.80718166,
      0.12307288,  0.9393725,   0.048733678, 0.17115222,  1.1922649,
      0.03547645,  0.33111426,  0.03772038,  0.46104532,  0.3141086,
      0.25707254,  1.1549219,   1.8509476,   0.98180383,  0.7270674,
      0.91343564,  0.3373339,   0.081498206, 0.01140901,  0.43917242,
      0.072401166, 0.11307132,  0.8945273,   0.10071963,  0.1945517,
      0.7594797,   0.096463405, 0.07759007,  0.11009286,  0.012562437,
      1.1797432,   0.02481144,  1.2393609,   0.50596905,  1.48781,
      0.53125334,  0.9950063,   1.4128636,   1.5830894,   0.93246186,
      0.60709685,  0.40433922,  0.14255294,  0.7125986,   0.021445543,
      0.4104336,   0.14560317,  0.3189296,   0.51019174,  0.041676614,
      0.22844397,  0.18406813,  0.1604107,   1.2178165,   0.46861333,
      0.04899898,  2.4448788,   0.6505235,   0.051029652, 0.7550255,
      0.00625443,  0.5090246,   0.7109037,   0.1125403,   0.05059699,
      0.03856528,  0.4538238,   0.72464395,  0.1360473,   0.5109412,
      2.0780752,   0.049649376, 0.31396037,  0.114775784, 0.9717559,
      0.05478335,  0.12228666,  1.3433831,   1.6574994,   0.053257514,
      0.51201975,  0.029570522, 0.35752434,  0.39366165,  0.25994724,
      1.1072603,   2.0454218,   1.1423918,   0.59795356};

  const uint32_t sparse_vec_count_5 = 147;
  uint32_t sparse_vec_index_5[] = {
      1012,  1996,  2001,  2018,  2020,  2034,  2047,  2081,  2154,  2162,
      2170,  2171,  2207,  2210,  2220,  2233,  2251,  2253,  2257,  2259,
      2287,  2315,  2318,  2328,  2381,  2390,  2458,  2466,  2510,  2557,
      2580,  2609,  2622,  2645,  2688,  2707,  2724,  2762,  2838,  2900,
      2911,  2915,  3047,  3058,  3260,  3282,  3290,  3295,  3297,  3386,
      3390,  3578,  3603,  3607,  3690,  3746,  3826,  3861,  3908,  3910,
      3918,  3934,  3987,  4006,  4045,  4075,  4088,  4110,  4255,  4302,
      4517,  4620,  4761,  4871,  4916,  5195,  5221,  5234,  5246,  5532,
      5700,  5798,  5832,  5855,  5951,  5968,  6033,  6215,  6219,  6302,
      6394,  6396,  6529,  6950,  7008,  7084,  7128,  7155,  7156,  7240,
      7421,  7467,  7551,  7596,  7738,  7760,  8088,  8367,  8372,  8479,
      8573,  8647,  8773,  8826,  9188,  9274,  9290,  9433,  9593,  9767,
      9913,  9919,  9982,  10461, 10815, 11028, 11721, 12416, 12496, 12779,
      13221, 13702, 13787, 14487, 15699, 16164, 18801, 20168, 21650, 24291,
      24321, 25209, 25526, 25755, 28110, 28682, 28858};
  FixedVector<Float16, sparse_vec_count_5> sparse_vec_value_5{
      0.22246745,  0.1639393,    0.6902539,    0.087209724, 0.3150326,
      1.3589038,   0.39210027,   0.06905281,   0.2940129,   0.48745865,
      0.5185849,   0.06468885,   0.33793828,   0.01934533,  0.9160348,
      0.12213709,  0.64625627,   0.05484681,   0.18600157,  0.7439921,
      1.4779477,   0.50866294,   0.9324953,    0.11494038,  0.14815839,
      0.4024814,   0.0025193223, 0.0039419075, 0.04004241,  0.1137441,
      0.100572474, 0.09889997,   1.6465691,    0.45031455,  0.4567774,
      0.7614913,   0.5324026,    0.09957147,   0.21556115,  0.36752453,
      0.13450043,  0.06911261,   0.04267344,   1.2791942,   0.054822505,
      0.06269096,  1.3170663,    0.8852742,    0.37885663,  0.92810893,
      0.12803665,  0.10517517,   0.24920024,   0.16889784,  1.3619378,
      0.59796244,  0.81389725,   0.06489252,   0.020069994, 0.06319,
      0.71297073,  1.2515233,    0.019061586,  0.04731544,  0.3536146,
      0.50835687,  0.56439734,   0.09884678,   1.1007178,   0.1480219,
      1.6361246,   0.3891063,    0.03873499,   0.050479025, 0.5629584,
      1.0016122,   0.16247666,   0.06476003,   0.43833405,  1.3702114,
      0.11968183,  0.29155007,   0.12643526,   0.518913,    0.41796717,
      1.740134,    0.015489911,  0.2183447,    1.5380116,   1.058654,
      0.06226158,  0.270943,     0.91666347,   0.06422295,  0.33474496,
      0.002399514, 2.0762439,    0.8989307,    0.7876583,   0.03783609,
      0.22333156,  0.13323776,   0.27660817,   0.56637865,  0.21507333,
      0.6770579,   0.7013793,    0.7085848,    0.15651116,  0.05219105,
      0.03743524,  0.30775747,   0.073243596,  0.8181374,   0.28133482,
      0.23539418,  0.07533616,   0.2044144,    1.574523,    1.1304078,
      0.24084339,  1.3286508,    0.775562,     0.10096621,  0.197577,
      0.2307252,   1.719028,     0.07254901,   0.13916898,  0.17486195,
      0.8424586,   0.27879223,   0.8650824,    0.35050592,  0.24243252,
      0.31039444,  0.17227773,   0.90619636,   0.63083464,  2.2181685,
      0.20995331,  0.14425081,   0.37305146,   0.5955121,   0.87200415,
      1.028527,    1.0835907};

  const uint32_t sparse_vec_count_6 = 141;
  uint32_t sparse_vec_index_6[] = {
      1012,  1059,  1996,  1997,  1998,  2001,  2012,  2018,  2020,  2021,
      2025,  2055,  2056,  2076,  2077,  2127,  2130,  2134,  2138,  2143,
      2162,  2197,  2203,  2220,  2259,  2318,  2328,  2338,  2345,  2381,
      2390,  2458,  2462,  2466,  2501,  2517,  2580,  2622,  2631,  2645,
      2688,  2707,  2724,  2748,  2764,  2808,  2900,  2911,  2933,  2949,
      3047,  3058,  3074,  3075,  3092,  3101,  3188,  3271,  3283,  3439,
      3478,  3535,  3595,  3607,  3690,  3720,  3740,  3793,  3818,  3826,
      3906,  3908,  3934,  3981,  3986,  4028,  4138,  4469,  4496,  4503,
      4515,  4517,  4566,  4704,  4706,  4761,  4839,  5036,  5175,  5233,
      5234,  5246,  5254,  5263,  5491,  5817,  5823,  5839,  5875,  5968,
      6215,  6254,  6268,  6394,  6407,  6801,  6848,  7128,  7177,  7321,
      7421,  7487,  7551,  7596,  7681,  7940,  8145,  8264,  8321,  8551,
      8573,  8647,  8773,  8826,  8832,  9472,  9593,  9599,  9767,  10530,
      12149, 13787, 14487, 15237, 15523, 17060, 20168, 23633, 24363, 25526,
      25755};
  FixedVector<Float16, sparse_vec_count_6> sparse_vec_value_6{
      0.48692977,  0.23770119,  0.24359323,   0.030566106,  0.121271,
      0.5703241,   0.12787338,  0.037069157,  0.075816214,  0.05305081,
      0.45591223,  0.5893366,   0.01829792,   0.42078727,   0.036012013,
      0.0750098,   0.20031127,  0.033489488,  0.10935432,   0.054307006,
      1.0000131,   0.20630358,  1.1161063,    0.5766484,    0.86030954,
      0.65358734,  0.062234607, 0.8518808,    0.23441537,   0.14816457,
      0.19284223,  0.94708407,  1.0017378,    0.51629704,   0.082293354,
      0.09170858,  0.2138309,   1.533815,     0.0030641577, 0.029126635,
      0.3632337,   0.1761491,   0.34924436,   0.67822266,   0.5976219,
      0.8595736,   0.17943758,  0.038340267,  0.0052374,    0.29047492,
      0.070157826, 0.6779024,   0.75593567,   0.054473646,  0.4906121,
      0.11288958,  0.15934071,  0.3192689,    0.1435216,    0.30725288,
      0.37506026,  0.7213243,   0.18401349,   0.01871983,   0.19455475,
      0.02040177,  0.28111485,  0.043639474,  0.19826981,   0.27416018,
      1.429636,    0.05111553,  1.0482118,    0.98164123,   0.17426124,
      0.10582682,  1.002954,    1.0261939,    0.83377177,   0.6798103,
      0.015373114, 0.8136259,   0.95782644,   0.13387722,   0.40847424,
      0.80647326,  0.28733957,  0.0029352994, 0.30276307,   0.4768307,
      0.32016084,  0.10302183,  0.3044403,    0.040031943,  0.44271877,
      0.061298616, 0.08278493,  0.107188344,  0.5086274,    1.3297924,
      0.050804485, 0.68582493,  0.21776867,   0.027724598,  0.5286007,
      0.1899133,   0.04971613,  2.2401748,    0.09252626,   0.80688274,
      0.014750206, 0.07568165,  0.021886598,  0.23429997,   1.1812011,
      0.6390751,   0.2643012,   0.13720371,   0.10989579,   1.4969206,
      0.2209742,   0.54690766,  0.15685914,   0.47841135,   0.566988,
      0.08368683,  1.2788389,   0.09509155,   1.0241207,    0.07167757,
      0.29240122,  0.5619141,   0.016415644,  0.28731114,   0.035925347,
      0.34043407,  0.60646313,  0.07248792,   0.08602479,   0.10247773,
      1.13258};

  const uint32_t sparse_vec_count_7 = 221;
  uint32_t sparse_vec_index_7[] = {
      1059,  1996,  2001,  2003,  2008,  2010,  2020,  2029,  2034,  2076,
      2080,  2081,  2103,  2104,  2137,  2138,  2142,  2149,  2162,  2163,
      2220,  2231,  2236,  2253,  2256,  2259,  2315,  2318,  2328,  2329,
      2343,  2344,  2350,  2359,  2381,  2390,  2419,  2458,  2462,  2466,
      2470,  2472,  2490,  2510,  2537,  2550,  2554,  2557,  2580,  2590,
      2599,  2608,  2622,  2631,  2640,  2645,  2662,  2710,  2724,  2728,
      2762,  2764,  2817,  2820,  2832,  2837,  2856,  2866,  2881,  2891,
      2957,  2974,  2983,  3003,  3010,  3029,  3050,  3058,  3063,  3068,
      3092,  3101,  3125,  3135,  3257,  3271,  3282,  3330,  3386,  3399,
      3474,  3578,  3595,  3603,  3607,  3650,  3690,  3758,  3800,  3817,
      3826,  3878,  3910,  3918,  3934,  3947,  3965,  3987,  3992,  4006,
      4034,  4045,  4068,  4146,  4172,  4202,  4255,  4302,  4327,  4351,
      4503,  4517,  4637,  4707,  4944,  5025,  5036,  5195,  5201,  5233,
      5234,  5253,  5501,  5584,  5623,  5656,  5687,  5814,  5817,  5911,
      5951,  5954,  5968,  6035,  6108,  6119,  6145,  6157,  6177,  6215,
      6254,  6262,  6384,  6394,  6613,  6728,  6787,  6801,  6842,  6845,
      6922,  6960,  7128,  7155,  7156,  7240,  7421,  7551,  7596,  7609,
      7654,  7676,  7723,  7779,  7935,  8049,  8144,  8151,  8249,  8547,
      8573,  8647,  8773,  8826,  8864,  8886,  9036,  9274,  9290,  9433,
      9593,  9667,  9767,  9915,  10267, 10505, 10544, 10753, 10815, 11028,
      11593, 11837, 12496, 13058, 13308, 13625, 13702, 14487, 15523, 17669,
      18457, 18800, 18826, 20168, 20843, 21695, 24363, 25526, 25755, 26234,
      26911};
  FixedVector<Float16, sparse_vec_count_7> sparse_vec_value_7{
      0.29634815,  0.3303992,    1.0099697,   0.09545747,  0.046319153,
      0.001999375, 0.27222815,   0.107896015, 1.0792782,   0.5411261,
      0.27695096,  0.020715078,  0.021571944, 0.61097443,  0.10560424,
      0.15401895,  0.46480918,   0.6496758,   1.0116925,   0.0040072273,
      0.8931394,   0.2361543,    0.74389607,  0.039703716, 0.020886008,
      1.1108406,   0.09039394,   0.69578373,  0.27737862,  0.3083219,
      0.5698159,   0.31437457,   0.7131746,   0.14947455,  0.33504876,
      1.1611847,   0.8632542,    1.058698,    1.0307701,   0.15223494,
      0.9391413,   0.9473978,    0.3767169,   0.5806728,   0.70086235,
      0.8544429,   0.07839825,   0.46189323,  0.57343185,  0.17151174,
      0.45118546,  0.03416668,   2.037371,    0.1311739,   0.22600843,
      0.061421365, 0.0063685803, 0.9023181,   0.17874505,  1.458104,
      0.09657643,  0.36346155,   0.11396522,  0.2762966,   0.11472289,
      0.16151813,  0.5954224,    0.68847394,  0.6934064,   1.0951325,
      0.008113728, 0.320056,     0.2934685,   0.38948777,  0.64446163,
      0.11539491,  1.4196212,    0.6417532,   0.10939098,  0.115132414,
      0.10055387,  0.15150718,   0.3015885,   0.36512154,  0.85847276,
      0.42005107,  0.06733843,   0.9194887,   0.2446694,   0.3528377,
      0.30540454,  0.0549386,    0.15950806,  0.12754358,  0.22250807,
      1.3793756,   0.01503605,   0.33390692,  0.2052875,   0.32573462,
      0.66194123,  0.03896839,   0.921685,    1.1364039,   1.2451752,
      0.072772495, 0.10148866,   0.2922106,   0.97420144,  0.25800666,
      0.13455145,  0.3459612,    0.16713561,  0.21625288,  0.20754638,
      0.017042752, 1.2139128,    0.38501504,  0.18923776,  0.58807755,
      0.42623222,  1.8636363,    0.15489826,  0.24531981,  0.330716,
      0.6148099,   0.12145276,   0.938947,    0.08298498,  0.5002425,
      0.42643633,  0.3724926,    0.351435,    0.35051146,  0.15093777,
      0.2753887,   0.11030835,   0.05864477,  0.12825343,  0.4938676,
      0.4091608,   0.13155867,   1.362572,    0.26034647,  0.005735014,
      0.25208464,  0.77931124,   0.08418636,  0.2567355,   0.108983725,
      0.04566572,  0.06202907,   0.3991703,   0.2785334,   0.45871663,
      1.584949,    0.099409536,  0.114265166, 0.0603091,   0.71120745,
      0.35286796,  0.03805246,   2.6303916,   0.6235311,   0.6544235,
      0.254192,    0.5172861,    0.46474016,  0.51770395,  0.3868696,
      0.030558605, 0.79667675,   0.1053426,   0.08400551,  0.26797673,
      0.52138245,  0.13453461,   0.070371106, 0.003556521, 0.34309983,
      0.2104394,   0.02274147,   0.19070747,  0.9488226,   0.09138845,
      2.092856,    0.10931594,   0.18929166,  0.113100395, 0.08495193,
      1.124685,    0.08020554,   1.0792019,   0.27422333,  0.31508496,
      0.20671548,  0.05064338,   0.46511328,  0.38314936,  0.52556884,
      0.36894837,  1.4199936,    0.05843645,  0.055732273, 0.26817194,
      0.2876586,   1.0425944,    0.062882155, 0.09840146,  0.1544766,
      0.98742366,  0.20589906,   2.1226256,   0.47266316,  0.33193296,
      2.0077822,   0.23509863,   0.53764015,  1.2505449,   1.719803,
      0.39262286};

  std::vector<std::string> sparse_query_buffers;

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),
      sparse_query_buffer_0);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_0));

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),
      sparse_query_buffer_1);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_1));

  std::string sparse_query_buffer_2;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2.data(),
      sparse_query_buffer_2);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_2));

  std::string sparse_query_buffer_3;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3.data(),
      sparse_query_buffer_3);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_3));

  std::string sparse_query_buffer_4;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_4, sparse_vec_index_4, sparse_vec_value_4.data(),
      sparse_query_buffer_4);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_4));

  std::string sparse_query_buffer_5;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_5, sparse_vec_index_5, sparse_vec_value_5.data(),
      sparse_query_buffer_5);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_5));

  std::string sparse_query_buffer_6;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_6, sparse_vec_index_6, sparse_vec_value_6.data(),
      sparse_query_buffer_6);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_6));

  std::string sparse_query_buffer_7;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_7, sparse_vec_index_7, sparse_vec_value_7.data(),
      sparse_query_buffer_7);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_7));

  sparse_vec_counts.emplace_back(sparse_vec_count_0);
  sparse_vec_counts.emplace_back(sparse_vec_count_1);
  sparse_vec_counts.emplace_back(sparse_vec_count_2);
  sparse_vec_counts.emplace_back(sparse_vec_count_3);
  sparse_vec_counts.emplace_back(sparse_vec_count_4);
  sparse_vec_counts.emplace_back(sparse_vec_count_5);
  sparse_vec_counts.emplace_back(sparse_vec_count_6);
  sparse_vec_counts.emplace_back(sparse_vec_count_7);

  sparse_vec_indices.emplace_back(sparse_vec_index_0);
  sparse_vec_indices.emplace_back(sparse_vec_index_1);
  sparse_vec_indices.emplace_back(sparse_vec_index_2);
  sparse_vec_indices.emplace_back(sparse_vec_index_3);
  sparse_vec_indices.emplace_back(sparse_vec_index_4);
  sparse_vec_indices.emplace_back(sparse_vec_index_5);
  sparse_vec_indices.emplace_back(sparse_vec_index_6);
  sparse_vec_indices.emplace_back(sparse_vec_index_7);

  sparse_vec_values.emplace_back(sparse_vec_value_0.data());
  sparse_vec_values.emplace_back(sparse_vec_value_1.data());
  sparse_vec_values.emplace_back(sparse_vec_value_2.data());
  sparse_vec_values.emplace_back(sparse_vec_value_3.data());
  sparse_vec_values.emplace_back(sparse_vec_value_4.data());
  sparse_vec_values.emplace_back(sparse_vec_value_5.data());
  sparse_vec_values.emplace_back(sparse_vec_value_6.data());
  sparse_vec_values.emplace_back(sparse_vec_value_7.data());

  for (size_t i = 0; i < sparse_query_buffers.size(); ++i) {
    for (size_t j = 0; j < sparse_query_buffers.size(); ++j) {
      float result0{0.0f};
      result0 = SparseDistanceCommon(
          sparse_vec_counts[i], sparse_vec_indices[i], sparse_vec_values[i],
          sparse_vec_counts[j], sparse_vec_indices[j], sparse_vec_values[j]);

      float result1{0.0f};
      MinusInnerProductSparseMatrix<Float16>::Compute(
          sparse_query_buffers[i].data(), sparse_query_buffers[j].data(),
          &result1);
      result1 = -result1;

      float epsilon = 0.001 * std::max(result0, result1);
      EXPECT_GE(epsilon, std::abs(result0 - result1));
    }
  }
}

TEST(DistanceMatrix, InnerProductSparse) {
  TestInnerProductSparse();
}

TEST(DistanceMatrix, InnerProductSparseMore) {
  TestInnerProductSparseMore();
}

TEST(DistanceMatrix, DISABLED_InnerProductSparse_Benchmark) {
  const uint32_t sparse_vec_count_0 = 52;
  uint32_t sparse_vec_index_0[] = {
      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,
      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,
      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,
      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,
      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};
  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{
      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,
      -0.767956138324, -0.410683610330, 0.763314047145,  0.347851184825,
      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,
      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,
      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,
      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,
      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,
      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,
      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,
      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,
      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,
      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,
      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};

  const uint32_t sparse_vec_count_1 = 43;
  uint32_t sparse_vec_index_1[] = {
      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,
      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,
      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,
      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};
  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{
      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,
      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,
      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,
      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,
      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,
      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,
      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,
      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,
      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,
      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,
      0.247628793044,  -0.984985692607,  -0.427929860028};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),
      sparse_query_buffer_1);

  size_t loop_cnt = 100000000LLU;
  float result[100];

  for (size_t i = 0; i < loop_cnt; ++i) {
    MinusInnerProductSparseMatrix<Float16>::Compute(
        sparse_query_buffer_0.data(), sparse_query_buffer_1.data(),
        result + (i % 100));
  }

  EXPECT_EQ(result[0], result[1]);
}

TEST(DistanceMatrix, TestInnerProductSparseDimWithZero) {
  // test 1
  const uint32_t sparse_vec_count_0 = 10;
  uint32_t sparse_vec_index_0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  FixedVector<Float16, sparse_vec_count_0> sparse_vec_value_0{
      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};

  const uint32_t sparse_vec_count_1 = 10;
  uint32_t sparse_vec_index_1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  FixedVector<Float16, sparse_vec_count_1> sparse_vec_value_1{
      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0.data(),
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<Float16>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1.data(),
      sparse_query_buffer_1);

  float result0{0.0f};
  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,
                                 sparse_vec_value_0.data(), sparse_vec_count_1,
                                 sparse_vec_index_1, sparse_vec_value_1.data());

  float result1{0.0f};
  MinusInnerProductSparseMatrix<Float16>::Compute(
      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);
  result1 = -result1;

  EXPECT_GE(0.00001, std::abs(result0 - result1));
}


================================================
FILE: tests/ailego/math/inner_product_matrix_fp32_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(float *dst, const float *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

template <size_t N>
static float InnerProductDistance(const FixedVector<float, N> &lhs,
                                  const FixedVector<float, N> &rhs) {
  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float MinusInnerProductDistance(const FixedVector<float, N> &lhs,
                                       const FixedVector<float, N> &rhs) {
  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, InnerProduct_General) {
  FixedVector<float, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,
                             555.0f, 0.8f,  5.5f,   3.75f, 9.0f,
                             6.6f,   0.1f,  8.8f,   0.2f,  5.6f},
      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};
  EXPECT_FLOAT_EQ(308441.62f, InnerProductDistance(x15, y15));

  FixedVector<float, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,
                             9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,
                             0.1f,  8.8f,  0.2f,   5.6f},
      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,
          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};
  EXPECT_FLOAT_EQ(308526.19f, InnerProductDistance(x16, y16));

  FixedVector<float, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,
                             1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,
                             3.4f, 4.5f,  5.6f, 1.6f,  1.3f},
      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,
          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};
  EXPECT_FLOAT_EQ(307.1218f, InnerProductDistance(x17, y17));

  FixedVector<float, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,
                             4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,
                             4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},
      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};
  EXPECT_FLOAT_EQ(378.72156f, InnerProductDistance(x18, y18));

  FixedVector<float, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,
                             2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,
                             5.6f,  1.6f,  2.3f,  1.11f,  2.3f},
      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_FLOAT_EQ(380.37f, InnerProductDistance(x19, y19));

  FixedVector<float, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,
                             1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,
                             4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},
      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,
          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_FLOAT_EQ(222.23868f, InnerProductDistance(x20, y20));

  FixedVector<float, 21> x21{0.0f}, y21{0.0f};
  EXPECT_FLOAT_EQ(0.0f, InnerProductDistance(x21, y21));
}

TEST(DistanceMatrix, MinusInnerProduct_General) {
  FixedVector<float, 1> x1{0.7f}, y1{0.5f};
  EXPECT_FLOAT_EQ(-0.35f, MinusInnerProductDistance(x1, y1));

  FixedVector<float, 2> x2{2.0f, 3.76f}, y2{2.0f, 0.901f};
  EXPECT_FLOAT_EQ(-7.38776f, MinusInnerProductDistance(x2, y2));

  FixedVector<float, 3> x3{2.0f, 3.0f, 0.7f}, y3{2.0f, 3.0f, 2.0f};
  EXPECT_FLOAT_EQ(-14.4f, MinusInnerProductDistance(x3, y3));

  FixedVector<float, 4> x4{7.8f, -8.9f, 9.0f, 5.6f},
      y4{7.8f, 8.9f, -9.0f, -0.1f};
  EXPECT_FLOAT_EQ(99.93f, MinusInnerProductDistance(x4, y4));

  FixedVector<float, 5> x5{7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y5{7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_FLOAT_EQ(-222.18f, MinusInnerProductDistance(x5, y5));

  FixedVector<float, 6> x6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y6{0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_FLOAT_EQ(-222.6855f, MinusInnerProductDistance(x6, y6));

  FixedVector<float, 7> x7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 5.6f},
      y7{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f};
  EXPECT_FLOAT_EQ(-249.9339f, MinusInnerProductDistance(x7, y7));

  FixedVector<float, 8> x8{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 0.1f, 0.2f, 5.6f},
      y8{5.22f, 0.711f, -7.8f, -8.9f, -9.0f, 0.1f, 0.2f, 0.5f};
  EXPECT_FLOAT_EQ(190.44608f, MinusInnerProductDistance(x8, y8));

  FixedVector<float, 9> x9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f,
                           6.6f,  0.1f,   0.2f, 5.6f},
      y9{5.22f, 0.711f, 7.8f, 8.9f, 9.0f, 6.6f, 0.1f, 0.2f, 0.5f};
  EXPECT_FLOAT_EQ(-295.214f, MinusInnerProductDistance(x9, y9));

  FixedVector<float, 10> x10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f,
                             9.0f,  6.6f,   0.1f, 0.2f, 5.6f},
      y10{5.22f, 0.711f, 7.8f, 8.9f, 5.5f, 9.0f, 6.6f, 0.1f, 0.2f, 0.522f};
  EXPECT_FLOAT_EQ(-325.587f, MinusInnerProductDistance(x10, y10));

  FixedVector<float, 11> x11{2.3f,    -1.11f, 3.04f, 8.23f, 1.0f, 7.7f,
                             -11.11f, 2.3f,   3.4f,  4.5f,  5.6f},
      y11{2.3f,    1.11f, 3.04f, 8.23f, -1.0f, 7.7f,
          -11.11f, 2.3f,  3.4f,  4.5f,  0.511f};
  EXPECT_FLOAT_EQ(-302.716f, MinusInnerProductDistance(x11, y11));

  FixedVector<float, 12> x12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,
                             7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},
      y12{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f,
          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  0.5f};
  EXPECT_FLOAT_EQ(-309.67868f, MinusInnerProductDistance(x12, y12));

  FixedVector<float, 13> x13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,
                             7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  5.6f},
      y13{1.6f, 2.3f,   1.11f, 3.04f, 8.23f, 1.0f, 4.44f,
          7.7f, 11.11f, 2.3f,  3.4f,  4.5f,  3.5f};
  EXPECT_FLOAT_EQ(-346.19229f, MinusInnerProductDistance(x13, y13));

  FixedVector<float, 14> x14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,
                             3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 5.6f},
      y14{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f, 0.5f};
  EXPECT_FLOAT_EQ(-418.029f, MinusInnerProductDistance(x14, y14));

  FixedVector<float, 15> x15{5.22f,  0.65f, 0.711f, 7.8f,  8.9f,
                             555.0f, 0.8f,  5.5f,   3.75f, 9.0f,
                             6.6f,   0.1f,  8.8f,   0.2f,  5.6f},
      y15{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 0.8f, 5.5f,
          3.75f, 9.0f,  6.6f,   0.1f, 8.8f, 0.2f,   0.25f};
  EXPECT_FLOAT_EQ(-308441.62f, MinusInnerProductDistance(x15, y15));

  FixedVector<float, 16> x16{5.22f, 0.65f, 0.711f, 7.8f,  8.9f, 555.0f,
                             9.12f, 0.8f,  5.5f,   3.75f, 9.0f, 6.6f,
                             0.1f,  8.8f,  0.2f,   5.6f},
      y16{5.22f, 0.65f, 0.711f, 7.8f, 8.9f, 555.0f, 9.12f, 0.8f,
          5.5f,  3.75f, 9.0f,   6.6f, 0.1f, 8.8f,   0.2f,  0.5f};
  EXPECT_FLOAT_EQ(-308526.19f, MinusInnerProductDistance(x16, y16));

  FixedVector<float, 17> x17{3.4f, 4.5f,  5.6f, 1.6f,  3.4f,  8.1f,
                             1.0f, 4.41f, 7.7f, 1.11f, 3.04f, 2.3f,
                             3.4f, 4.5f,  5.6f, 1.6f,  1.3f},
      y17{3.4f,  4.5f,  5.6f, 1.6f, 3.4f, 8.1f, 1.0f, 4.41f, 7.7f,
          1.11f, 3.04f, 2.3f, 3.4f, 4.5f, 5.6f, 1.6f, 2.3f};
  EXPECT_FLOAT_EQ(-307.1218f, MinusInnerProductDistance(x17, y17));

  FixedVector<float, 18> x18{1.66f, 2.3f, 1.11f, 3.04f,  8.23f, 1.0f,
                             4.44f, 7.7f, 1.5f,  11.11f, 2.3f,  3.4f,
                             4.5f,  5.6f, 1.6f,  2.3f,   1.11f, 3.04f},
      y18{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 4.44f, 7.7f,  1.5f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f,  1.11f, 3.04f};
  EXPECT_FLOAT_EQ(-378.72156f, MinusInnerProductDistance(x18, y18));

  FixedVector<float, 19> x19{1.66f, 2.3f,  1.11f, 3.04f,  8.23f, 1.0f, 1.6f,
                             2.3f,  4.44f, 7.7f,  11.11f, 2.3f,  3.4f, 4.5f,
                             5.6f,  1.6f,  2.3f,  1.11f,  2.3f},
      y19{1.66f,  2.3f, 1.11f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  4.44f, 7.7f,
          11.11f, 2.3f, 3.4f,  4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_FLOAT_EQ(-380.37f, MinusInnerProductDistance(x19, y19));

  FixedVector<float, 20> x20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f,
                             1.6f, 2.3f, 5.6f,  1.6f, 2.3f,  2.3f,  3.4f,
                             4.5f, 5.6f, 1.6f,  2.3f, 1.11f, 2.3f},
      y20{1.6f, 2.3f, 1.11f, 2.3f, 3.04f, 8.23f, 1.0f, 1.6f, 2.3f,  5.6f,
          1.6f, 2.3f, 2.3f,  3.4f, 4.5f,  5.6f,  1.6f, 2.3f, 1.11f, 2.3f};
  EXPECT_FLOAT_EQ(-222.23868f, MinusInnerProductDistance(x20, y20));

  FixedVector<float, 21> x21{0.0f}, y21{0.0f};
  EXPECT_FLOAT_EQ(0.0f, MinusInnerProductDistance(x21, y21));
}

template <size_t M, size_t N>
void TestMinusInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MinusInnerProductMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  MinusInnerProductMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));
  }
}

template <size_t M, size_t N>
void TestInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      InnerProductMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  InnerProductMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(0.00001, std::abs(result1[i] - result2[i]));
  }
}

TEST(DistanceMatrix, MinusInnerProduct_1x1) {
  TestMinusInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x1) {
  TestMinusInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x2) {
  TestMinusInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_3x3) {
  TestMinusInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x1) {
  TestMinusInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x2) {
  TestMinusInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x4) {
  TestMinusInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x1) {
  TestMinusInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x2) {
  TestMinusInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x4) {
  TestMinusInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x8) {
  TestMinusInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x1) {
  TestMinusInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x2) {
  TestMinusInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x4) {
  TestMinusInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x8) {
  TestMinusInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x16) {
  TestMinusInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x1) {
  TestMinusInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x2) {
  TestMinusInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x4) {
  TestMinusInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x8) {
  TestMinusInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x16) {
  TestMinusInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x32) {
  TestMinusInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x1) {
  TestMinusInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x2) {
  TestMinusInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x4) {
  TestMinusInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x8) {
  TestMinusInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x16) {
  TestMinusInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x32) {
  TestMinusInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x64) {
  TestMinusInnerProductMatrix<64, 64>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x1) {
  TestMinusInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x2) {
  TestMinusInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x4) {
  TestMinusInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x8) {
  TestMinusInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x16) {
  TestMinusInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x32) {
  TestMinusInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x64) {
  TestMinusInnerProductMatrix<128, 64>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x128) {
  TestMinusInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, InnerProduct_1x1) {
  TestInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x1) {
  TestInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x2) {
  TestInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, InnerProduct_3x3) {
  TestInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, InnerProduct_4x1) {
  TestInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, InnerProduct_4x2) {
  TestInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, InnerProduct_4x4) {
  TestInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x1) {
  TestInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, InnerProduct_8x2) {
  TestInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, InnerProduct_8x4) {
  TestInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x8) {
  TestInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x1) {
  TestInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, InnerProduct_16x2) {
  TestInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, InnerProduct_16x4) {
  TestInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, InnerProduct_16x8) {
  TestInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x16) {
  TestInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x1) {
  TestInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, InnerProduct_32x2) {
  TestInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, InnerProduct_32x4) {
  TestInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, InnerProduct_32x8) {
  TestInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, InnerProduct_32x16) {
  TestInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x32) {
  TestInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x1) {
  TestInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, InnerProduct_64x2) {
  TestInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, InnerProduct_64x4) {
  TestInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, InnerProduct_64x8) {
  TestInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, InnerProduct_64x16) {
  TestInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, InnerProduct_64x32) {
  TestInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x64) {
  TestInnerProductMatrix<64, 64>();
}

TEST(DistanceMatrix, InnerProduct_128x1) {
  TestInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, InnerProduct_128x2) {
  TestInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, InnerProduct_128x4) {
  TestInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, InnerProduct_128x8) {
  TestInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, InnerProduct_128x16) {
  TestInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, InnerProduct_128x32) {
  TestInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, InnerProduct_128x64) {
  TestInnerProductMatrix<128, 64>();
}

TEST(DistanceMatrix, InnerProduct_128x128) {
  TestInnerProductMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void InnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      InnerProductMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    InnerProductMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        InnerProductMatrix<float, 1, 1>::Compute(&matrix_batch[k * dimension],
                                                 current_query, dimension,
                                                 &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void MinusInnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MinusInnerProductMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    MinusInnerProductMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MinusInnerProductMatrix<float, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {
  InnerProductBenchmark<2, 1, 512, 64>();
  InnerProductBenchmark<2, 2, 512, 64>();
  InnerProductBenchmark<4, 1, 512, 64>();
  InnerProductBenchmark<4, 2, 512, 64>();
  InnerProductBenchmark<4, 4, 512, 64>();
  InnerProductBenchmark<8, 1, 512, 64>();
  InnerProductBenchmark<8, 2, 512, 64>();
  InnerProductBenchmark<8, 4, 512, 64>();
  InnerProductBenchmark<8, 8, 512, 64>();
  InnerProductBenchmark<16, 1, 512, 64>();
  InnerProductBenchmark<16, 2, 512, 64>();
  InnerProductBenchmark<16, 4, 512, 64>();
  InnerProductBenchmark<16, 8, 512, 64>();
  InnerProductBenchmark<32, 1, 512, 64>();
  InnerProductBenchmark<32, 2, 512, 64>();
  InnerProductBenchmark<32, 4, 512, 64>();
  InnerProductBenchmark<32, 8, 512, 64>();
  InnerProductBenchmark<32, 16, 512, 64>();
  InnerProductBenchmark<32, 32, 512, 64>();
  InnerProductBenchmark<64, 1, 512, 64>();
  InnerProductBenchmark<64, 2, 512, 64>();
  InnerProductBenchmark<64, 4, 512, 64>();
  InnerProductBenchmark<64, 8, 512, 64>();
  InnerProductBenchmark<128, 1, 512, 64>();
}

TEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {
  MinusInnerProductBenchmark<2, 1, 512, 64>();
  MinusInnerProductBenchmark<2, 2, 512, 64>();
  MinusInnerProductBenchmark<4, 1, 512, 64>();
  MinusInnerProductBenchmark<4, 2, 512, 64>();
  MinusInnerProductBenchmark<4, 4, 512, 64>();
  MinusInnerProductBenchmark<8, 1, 512, 64>();
  MinusInnerProductBenchmark<8, 2, 512, 64>();
  MinusInnerProductBenchmark<8, 4, 512, 64>();
  MinusInnerProductBenchmark<8, 8, 512, 64>();
  MinusInnerProductBenchmark<16, 1, 512, 64>();
  MinusInnerProductBenchmark<16, 2, 512, 64>();
  MinusInnerProductBenchmark<16, 4, 512, 64>();
  MinusInnerProductBenchmark<16, 8, 512, 64>();
  MinusInnerProductBenchmark<16, 16, 512, 64>();
  MinusInnerProductBenchmark<32, 1, 512, 64>();
  MinusInnerProductBenchmark<32, 2, 512, 64>();
  MinusInnerProductBenchmark<32, 4, 512, 64>();
  MinusInnerProductBenchmark<32, 8, 512, 64>();
  MinusInnerProductBenchmark<32, 16, 512, 64>();
  MinusInnerProductBenchmark<32, 32, 512, 64>();
  MinusInnerProductBenchmark<64, 1, 512, 64>();
  MinusInnerProductBenchmark<64, 2, 512, 64>();
  MinusInnerProductBenchmark<64, 4, 512, 64>();
  MinusInnerProductBenchmark<64, 8, 512, 64>();
  MinusInnerProductBenchmark<128, 1, 512, 64>();
}

TEST(DistanceMatrix, DISABLED_MinusInnerProduct_BenchmarkSimple) {
  std::mt19937 gen((std::random_device())());

  size_t dimension = 768;
  size_t loop_cnt = 100000000LLU;

  std::vector<float> data(dimension);
  std::vector<float> query(dimension);

  float result;

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < dimension; ++i) {
    data[i] = dist(gen);
  }
  for (size_t i = 0; i < dimension; ++i) {
    query[i] = dist(gen);
  }

  for (size_t i = 0; i < loop_cnt; ++i) {
    MinusInnerProductMatrix<float, 1, 1>::Compute(&data[0], &query[0],
                                                  dimension, &result);
  }
}

static inline float SparseDistanceCommon(uint32_t count1, uint32_t *index1,
                                         float *value1, uint32_t count2,
                                         uint32_t *index2, float *value2) {
  float result{0.0f};

  size_t m = 0;
  size_t q = 0;
  while (m < count1 && q < count2) {
    if (index1[m] == index2[q]) {
      result += value1[m] * value2[q];

      ++m;
      ++q;
    } else if (index1[m] < index2[q]) {
      ++m;
    } else {
      ++q;
    }
  }

  return result;
}

void TestInnerProductSparse(void) {
  // test 1
  const uint32_t sparse_vec_count_0 = 52;

  uint32_t sparse_vec_index_0[] = {
      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,
      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,
      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,
      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,
      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};
  float sparse_vec_value_0[] = {
      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,
      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,
      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,
      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,
      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,
      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,
      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,
      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,
      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,
      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,
      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,
      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,
      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};

  const uint32_t sparse_vec_count_1 = 43;
  uint32_t sparse_vec_index_1[] = {
      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,
      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,
      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,
      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};
  float sparse_vec_value_1[] = {
      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,
      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,
      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,
      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,
      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,
      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,
      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,
      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,
      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,
      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,
      0.247628793044,  -0.984985692607,  -0.427929860028};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,
      sparse_query_buffer_1);

  float result0{0.0f};
  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,
                                 sparse_vec_value_0, sparse_vec_count_1,
                                 sparse_vec_index_1, sparse_vec_value_1);

  float result1{0.0f};
  MinusInnerProductSparseMatrix<float>::Compute(
      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);
  result1 = -result1;

  EXPECT_GE(0.00001, std::abs(result0 - result1));

  // test 2
  constexpr uint32_t sparse_vec_count_2 = 49;

  uint32_t sparse_vec_index_2[] = {
      13200,  20900,  36300,  41800,  50600,  74800,  78100,  81400,  93500,
      99000,  107800, 121000, 127600, 137500, 140800, 143000, 145200, 166100,
      174900, 193600, 194700, 195800, 233200, 261800, 262900, 273900, 277200,
      299200, 302500, 343200, 381700, 387200, 418000, 421300, 436700, 449900,
      480700, 510400, 586300, 596200, 603900, 607200, 612700, 625900, 632500,
      633600, 639100, 642400, 650100};
  float sparse_vec_value_2[] = {
      0.167493264953,  0.178347102375,   0.61850792017,    0.707662206696,
      -0.604456492928, 0.898905062153,   -0.971984671516,  -0.337950525868,
      -0.942538751319, -0.115612454156,  0.78433412971,    0.601522288928,
      -0.640321042923, -0.235673191423,  0.00632807223978, 0.629970437467,
      0.966519256786,  -0.279362437157,  0.396153064627,   -0.614592812875,
      -0.642157513141, 0.686723258138,   0.10227967727,    -0.5921196708,
      0.499411577177,  -0.0188556369919, 0.512245212443,   0.424666758023,
      0.299827154891,  -0.615468257454,  -0.0499098903374, -0.54873640329,
      0.899673049133,  -0.873237346565,  0.463117084808,   -0.810200151551,
      0.676836615658,  0.596247430713,   0.946225552468,   0.968425796351,
      -0.821041580744, -0.697734977387,  0.295618053879,   -0.476597945375,
      -0.246035224835, 0.927603570489,   -0.640242995569,  0.610224433234,
      -0.657550506633};

  constexpr uint32_t sparse_vec_count_3 = 58;
  uint32_t sparse_vec_index_3[] = {
      13200,  19800,  37400,  56100,  68200,  78100,  81400,  99000,  103400,
      107800, 108900, 110000, 111100, 125400, 127600, 137500, 141900, 151800,
      154000, 155100, 158400, 163900, 165000, 173800, 198000, 201300, 215600,
      247500, 249700, 264000, 269500, 287100, 291500, 311300, 312400, 336600,
      353100, 354200, 361900, 367400, 390500, 398200, 407000, 414700, 424600,
      510400, 533500, 535700, 551100, 556600, 568700, 576400, 577500, 590700,
      592900, 618200, 631400, 636900};
  float sparse_vec_value_3[] = {
      0.175769744964,  -0.198506965419,  0.0842021015107, 0.544957076263,
      0.0856447356878, 0.838582935178,   0.796525374862,  -0.931940801441,
      0.555150441425,  0.957490431546,   -0.422126167235, -0.40903200281,
      0.242643233475,  0.698565387541,   -0.325754491857, 0.540403772154,
      -0.449888493042, 0.349262051644,   -0.612943655195, 0.874112675658,
      0.943939922271,  -0.994946966212,  -0.978705162429, 0.321190597007,
      0.17722019302,   0.6041089417,     -0.353184098327, -0.938569390092,
      -0.92268220981,  -0.268600478592,  -0.598069229627, 0.0720175726713,
      0.426800021137,  0.369250757861,   -0.823348360327, -0.664061107875,
      -0.418342805261, -0.430818720049,  0.0941988181812, 0.0765632945538,
      -0.148533061047, 0.404665036566,   -0.170747760502, -0.206564280292,
      0.311035754032,  0.498520039471,   -0.16255148444,  -0.137950933749,
      -0.234990864629, 0.602901363949,   0.0297103943437, -0.730955584059,
      0.117169059405,  -0.0746546228896, 0.39067258928,   -0.214782717972,
      -0.111009971497, -0.87766242691};

  std::string sparse_query_buffer_2;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2,
      sparse_query_buffer_2);

  std::string sparse_query_buffer_3;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3,
      sparse_query_buffer_3);

  float result2{0.0f};
  result2 = SparseDistanceCommon(sparse_vec_count_2, sparse_vec_index_2,
                                 sparse_vec_value_2, sparse_vec_count_3,
                                 sparse_vec_index_3, sparse_vec_value_3);

  float result3{0.0f};
  MinusInnerProductSparseMatrix<float>::Compute(
      sparse_query_buffer_2.data(), sparse_query_buffer_3.data(), &result3);
  result3 = -result3;

  EXPECT_GE(0.00001, std::abs(result2 - result3));
}

void TestInnerProductSparseMore(void) {
  std::vector<uint32_t> sparse_vec_counts;
  std::vector<uint32_t *> sparse_vec_indices;
  std::vector<float *> sparse_vec_values;

  const uint32_t sparse_vec_count_0 = 173;
  uint32_t sparse_vec_index_0[] = {
      1012,  1996,  2001,  2018,  2020,  2036,  2037,  2056,  2058,  2069,
      2111,  2116,  2138,  2162,  2166,  2245,  2253,  2259,  2306,  2307,
      2318,  2331,  2351,  2359,  2390,  2419,  2426,  2428,  2466,  2470,
      2535,  2554,  2557,  2568,  2590,  2622,  2671,  2739,  2765,  2812,
      2817,  2837,  2913,  2920,  3003,  3092,  3112,  3125,  3144,  3214,
      3241,  3249,  3260,  3268,  3271,  3278,  3280,  3330,  3463,  3478,
      3716,  3739,  3768,  3800,  3908,  3934,  3992,  4028,  4045,  4072,
      4146,  4254,  4301,  4382,  4454,  4471,  4504,  4517,  4598,  4806,
      4807,  4847,  4928,  4988,  5081,  5113,  5177,  5190,  5197,  5201,
      5234,  5456,  5621,  5689,  5792,  5817,  5823,  5875,  5920,  5921,
      5951,  5968,  6033,  6112,  6145,  6215,  6344,  6396,  6429,  6438,
      6529,  6627,  6691,  6731,  6801,  6865,  6950,  7036,  7128,  7155,
      7461,  7551,  7596,  7691,  7784,  7789,  7848,  7857,  8044,  8052,
      8053,  8553,  8573,  8664,  8817,  8826,  9250,  9273,  9593,  9727,
      10013, 10106, 10617, 10639, 10753, 11657, 12108, 13128, 13463, 13702,
      13787, 14152, 14332, 15237, 15313, 15359, 15699, 16724, 17171, 17571,
      17669, 20168, 20805, 20972, 22134, 22229, 22779, 24762, 24823, 25526,
      25699, 26761, 27885};
  float sparse_vec_value_0[] = {
      0.36311877,  0.10386213,  0.64821976,   0.26300138,    0.29727572,
      0.047292523, 0.022334402, 0.118793316,  0.7198291,     0.73566943,
      0.19491579,  0.5763569,   0.5245229,    0.022828134,   0.43562022,
      0.6946562,   0.09275672,  0.9687072,    0.1751608,     0.09703954,
      0.18717986,  0.43182945,  0.055112287,  0.0021027816,  0.13972417,
      0.1019873,   0.8679199,   0.26797894,   0.097350314,   0.5125363,
      0.2829703,   0.052232087, 0.3248494,    1.1258097,     0.90756655,
      1.6490538,   0.45066822,  0.004210417,  0.028443621,   0.41171393,
      0.09246816,  0.053040083, 0.052729037,  0.00041907438, 0.32047704,
      0.2290303,   1.3542659,   0.28811434,   1.1722984,     0.4484738,
      0.73670006,  0.22390367,  0.0058781556, 0.48173144,    0.76392287,
      0.32048634,  0.42589885,  0.8624791,    0.0376546,     0.56702816,
      0.002337549, 1.5856861,   0.14177673,   0.22762497,    0.6601752,
      1.0603137,   0.914821,    0.34792075,   1.4387932,     0.035774633,
      0.04391008,  0.7179224,   0.49199906,   0.043692447,   1.1404462,
      0.47572234,  0.22777049,  0.7626374,    0.59730506,    1.4541638,
      1.6540457,   0.089919806, 0.0050144624, 0.15902519,    0.2989032,
      0.121926464, 0.11911,     0.27476037,   1.2774497,     0.42462146,
      0.30179682,  0.18773684,  0.82144237,   1.2033592,     0.07180116,
      0.06378868,  0.029040875, 0.2089903,    0.03591103,    0.94913304,
      0.18240769,  0.9050947,   0.0034226696, 1.2841027,     0.629526,
      0.06401547,  1.0698998,   0.11138009,   0.20497903,    0.017457427,
      0.6316996,   0.12303611,  0.01563728,   0.090583175,   0.23981698,
      0.48518667,  0.6207808,   1.8336427,    2.3282833,     0.8153351,
      0.026216522, 0.6143031,   0.17374748,   0.32929608,    0.33730298,
      1.1497657,   0.1926745,   0.14235665,   1.1076177,     0.945609,
      0.48826388,  0.10458124,  0.19699246,   0.20899634,    0.44853806,
      0.26411146,  0.7495864,   1.3681723,    1.4299264,     0.037516754,
      0.17946614,  0.98060745,  0.055851664,  0.2002921,     0.45136684,
      0.33716172,  0.58752763,  0.34051904,   1.9018586,     0.20597915,
      0.82819384,  0.23866963,  0.4160662,    0.11889692,    0.172538,
      0.005433464, 0.089198045, 0.3896585,    0.74038976,    0.24974349,
      0.044961147, 0.32671204,  0.044312827,  0.25430596,    0.021065181,
      0.071978964, 1.992692,    0.02640776,   1.7344381,     0.09561436,
      0.07097204,  0.2922402,   0.8794989};

  const uint32_t sparse_vec_count_1 = 144;
  uint32_t sparse_vec_index_1[] = {
      1012,  1016,  1059,  1996,  2001,  2020,  2049,  2068,  2076,  2088,
      2109,  2138,  2145,  2149,  2162,  2203,  2220,  2224,  2256,  2259,
      2318,  2373,  2381,  2390,  2393,  2419,  2462,  2466,  2485,  2506,
      2554,  2557,  2580,  2590,  2622,  2633,  2645,  2671,  2716,  2724,
      2900,  2942,  2943,  3003,  3029,  3092,  3112,  3125,  3260,  3271,
      3278,  3283,  3288,  3439,  3466,  3478,  3521,  3578,  3594,  3595,
      3607,  3647,  3690,  3800,  3826,  3896,  3908,  3934,  3947,  3987,
      4045,  4068,  4204,  4254,  4255,  4302,  4329,  4471,  4504,  4517,
      4566,  4736,  4762,  4789,  5081,  5094,  5105,  5195,  5197,  5201,
      5233,  5234,  5584,  5817,  5823,  5832,  5875,  5951,  5968,  6033,
      6035,  6179,  6215,  6245,  6383,  6394,  6396,  6529,  6613,  6691,
      6801,  7091,  7128,  7155,  7240,  7461,  7551,  7596,  7691,  7738,
      7784,  8027,  8144,  8192,  8249,  8309,  8573,  8647,  8826,  9379,
      9593,  9767,  10400, 10461, 10530, 11028, 12799, 13787, 14487, 14670,
      15237, 15523, 20168, 25755};
  float sparse_vec_value_1[] = {
      0.3815109,   0.21950184,   0.389138,    0.03037462,  0.738938,
      0.11151163,  0.21257511,   0.008723602, 0.42403504,  0.17748593,
      0.38613674,  0.38208488,   0.49048766,  0.056615792, 1.285813,
      1.1482359,   0.016783785,  0.7362169,   0.21784282,  1.0905122,
      0.37420613,  0.81915,      0.67411584,  0.35778007,  0.80538017,
      0.10094925,  1.2726786,    0.12334787,  0.18297458,  0.13315988,
      0.041079145, 0.2655652,    0.10946682,  0.6782494,   1.7451618,
      0.17126456,  0.17718226,   0.7430134,   0.9090848,   0.31985787,
      0.21779177,  0.13639484,   1.2293936,   0.065131165, 0.03718982,
      0.64121664,  0.46517274,   0.39498892,  0.07401267,  1.2061241,
      0.1276834,   0.059918232,  1.1935436,   0.61886644,  0.32731527,
      0.37830237,  1.0287925,    0.09565632,  0.4313508,   0.03845683,
      0.066990376, 0.10886483,   0.097683005, 0.29624575,  0.48645914,
      0.250733,    0.03274726,   1.205507,    0.048636433, 0.034002367,
      0.83021015,  0.044592205,  0.06007409,  1.1224703,   0.45620173,
      0.16457361,  0.053571727,  0.12527509,  0.1308366,   0.92323685,
      0.7821679,   0.23838642,   0.2558486,   0.09402168,  0.22815736,
      0.51750314,  0.08442147,   0.5565446,   0.3642559,   0.6661639,
      0.73750395,  0.17278494,   0.05865512,  0.013724559, 0.023783961,
      0.04283593,  0.24765956,   0.3991119,   1.5201892,   0.035530984,
      0.049782272, 0.06485597,   0.5367931,   0.15097857,  0.014405596,
      0.14585418,  0.22106051,   0.49575308,  0.08290891,  0.17875223,
      0.21095915,  0.0038430362, 2.3110201,   0.6543391,   0.06421487,
      0.3782336,   0.3514111,    0.5225064,   0.21472597,  0.07987356,
      0.06002587,  1.5242931,    0.081204355, 0.32025364,  0.39068836,
      0.027896391, 0.2872351,    0.50436527,  0.5434884,   1.653683,
      1.444315,    0.988968,     0.024239752, 0.055084217, 0.074782506,
      0.021114044, 0.07288233,   0.822755,    0.10772858,  0.6189507,
      0.29534152,  0.20032129,   0.5609191,   1.2844883};

  const uint32_t sparse_vec_count_2 = 153;
  uint32_t sparse_vec_index_2[] = {
      1012,  1059,  1996,  2001,  2020,  2049,  2052,  2055,  2056,  2081,
      2088,  2124,  2138,  2156,  2158,  2162,  2191,  2231,  2242,  2256,
      2259,  2311,  2318,  2359,  2373,  2381,  2390,  2437,  2458,  2466,
      2477,  2510,  2554,  2580,  2590,  2622,  2640,  2671,  2689,  2825,
      2844,  2881,  2904,  2957,  3029,  3112,  3125,  3144,  3214,  3246,
      3271,  3312,  3330,  3399,  3443,  3478,  3578,  3595,  3647,  3697,
      3740,  3800,  3817,  3818,  3928,  3934,  3987,  4034,  4072,  4079,
      4172,  4204,  4254,  4255,  4302,  4517,  4526,  4695,  4706,  4795,
      4807,  4986,  5081,  5091,  5113,  5195,  5197,  5234,  5253,  5263,
      5623,  5646,  5656,  5817,  5875,  5951,  5954,  5968,  6033,  6061,
      6108,  6119,  6157,  6213,  6215,  6287,  6384,  6396,  6461,  6469,
      6613,  6801,  6842,  7128,  7240,  7305,  7477,  7551,  7596,  7609,
      7624,  7723,  7779,  7857,  7935,  8144,  8238,  8249,  8275,  8547,
      8573,  8647,  8826,  8927,  9036,  9491,  9593,  9767,  10267, 10461,
      10505, 10660, 10721, 11028, 12578, 13787, 14487, 14874, 15523, 20168,
      21565, 24212, 25628};
  float sparse_vec_value_2[] = {
      0.19194126,  0.11344757,   0.21317342,  0.6771587,    0.08591107,
      0.006228663, 0.28981656,   0.58056134,  0.064362876,  0.5794717,
      0.4288167,   0.59527594,   0.6106896,   0.23139843,   0.897008,
      0.20689227,  0.28713426,   0.38175523,  0.4028853,    0.08509491,
      1.0562526,   0.1165676,    0.06347306,  0.41331312,   0.16935593,
      0.1626863,   0.29352358,   0.45827967,  0.21193665,   0.39532298,
      0.0789344,   0.026420705,  0.1763078,   0.18424834,   0.7216729,
      1.6683924,   0.06257952,   0.13419773,  0.6851299,    1.2139059,
      0.092483185, 0.10803583,   0.74339646,  0.14461784,   0.2389669,
      0.9306581,   0.5645601,    0.83565444,  0.11930474,   0.22862941,
      0.6214566,   0.0033283439, 0.42018214,  0.15267797,   0.029068783,
      0.24103808,  0.18765616,   0.11574381,  0.31545344,   0.09386852,
      0.038362045, 0.7730324,    0.4456206,   0.20152733,   0.94718367,
      1.1934134,   0.12610391,   0.014013804, 0.47198555,   0.21791361,
      0.05394335,  0.08415188,   0.066486694, 0.47462225,   0.16693182,
      0.9021425,   0.27905586,   0.09939155,  0.12642553,   0.27529165,
      0.024804203, 0.24346212,   0.25561446,  1.4675297,    0.21566682,
      0.5453194,   0.21558505,   0.21294887,  0.2740208,    0.43185237,
      0.2280337,   0.0048945076, 0.26826337,  0.016979327,  0.3338952,
      0.23080347,  0.21200272,   1.3268396,   0.05323057,   0.30005422,
      0.088871606, 0.13259241,   0.04766706,  0.0017769856, 0.2698414,
      0.08068406,  0.38578644,   0.09752118,  0.13972333,   0.0731375,
      0.36664346,  0.12214721,   0.1541759,   2.2295072,    0.22542699,
      0.028530587, 0.022988612,  0.35836184,  0.10530607,   0.53756726,
      0.05818686,  0.044951066,  0.05753079,  0.09009998,   0.24644017,
      0.22693348,  0.0019512648, 0.035316195, 0.057344455,  0.36419895,
      0.1534858,   0.18924302,   0.38702026,  1.2569604,    0.07787755,
      1.7163913,   1.1903315,    0.8173934,   0.13888475,   0.10908335,
      0.35437793,  0.15787303,   0.25039884,  0.130508,     0.09830101,
      0.5841259,   0.22020355,   0.37849018,  0.14054261,   0.5179198,
      1.1891438,   0.44022372,   0.1794719};

  const uint32_t sparse_vec_count_3 = 166;
  uint32_t sparse_vec_index_3[] = {
      1012,  1059,  1996,  1997,  2001,  2020,  2034,  2076,  2086,  2104,
      2138,  2149,  2162,  2170,  2171,  2220,  2231,  2236,  2259,  2311,
      2315,  2318,  2328,  2343,  2344,  2359,  2381,  2390,  2419,  2458,
      2462,  2466,  2472,  2479,  2491,  2510,  2557,  2558,  2565,  2580,
      2590,  2622,  2724,  2764,  2817,  2837,  2881,  2900,  2911,  2933,
      2949,  3003,  3029,  3058,  3092,  3101,  3125,  3188,  3271,  3330,
      3386,  3399,  3434,  3447,  3474,  3478,  3578,  3595,  3607,  3650,
      3690,  3740,  3779,  3800,  3817,  3818,  3826,  3910,  3918,  3934,
      3987,  3992,  4006,  4034,  4068,  4075,  4114,  4146,  4172,  4255,
      4302,  4327,  4503,  4517,  4758,  4883,  4944,  4975,  5036,  5195,
      5205,  5218,  5233,  5234,  5253,  5456,  5623,  5656,  5687,  5817,
      5875,  5951,  5954,  5968,  6059,  6119,  6145,  6157,  6215,  6262,
      6384,  6394,  6613,  6787,  6801,  6842,  6993,  7128,  7156,  7240,
      7305,  7421,  7551,  7596,  7676,  7935,  8547,  8573,  8647,  8773,
      8826,  8886,  8911,  9036,  9274,  9433,  9593,  9767,  9915,  10267,
      10461, 10505, 11028, 11274, 11593, 13058, 13787, 14487, 15237, 17060,
      20168, 21695, 23041, 24363, 25526, 25755};
  float sparse_vec_value_3[] = {
      0.17927244,   0.20557176,   0.40560228,   0.32370853,  0.8060634,
      0.21424179,   1.0674698,    0.6046889,    0.21051478,  0.46186206,
      0.24661283,   0.5616991,    1.016811,     0.2618776,   0.9686127,
      0.869671,     0.1458332,    0.60725594,   1.206012,    0.10357225,
      0.4350595,    0.83702874,   0.146196,     0.8644738,   0.15587087,
      0.16456357,   0.36376593,   1.053665,     0.06609649,  0.6504239,
      0.9697015,    0.04947369,   0.43753505,   0.04289205,  0.42075413,
      0.330524,     0.1743388,    0.6540892,    0.012900644, 0.23207273,
      0.2674499,    1.9736407,    0.21540764,   0.63648874,  0.049446102,
      0.3750183,    0.17441651,   0.123951435,  0.015306404, 0.1767618,
      0.24109434,   0.4245122,    0.114403255,  0.91849947,  0.12018716,
      0.01165807,   0.47680765,   0.036503244,  0.5782868,   0.9163635,
      0.27396393,   0.16385026,   0.052631885,  0.72294754,  0.4022935,
      0.06351255,   0.27786675,   0.25394455,   0.08041568,  1.3137422,
      0.5514297,    0.2503315,    0.009040705,  0.40985608,  0.27673048,
      0.14055687,   0.50529444,   0.6049716,    1.0692317,   1.207644,
      0.108388424,  0.9495853,    0.35366973,   0.3762234,   0.19875458,
      0.14685634,   0.0060924664, 1.0126622,    0.034943417, 0.49489433,
      0.34451365,   0.21992311,   0.7039926,    0.9501215,   0.34629604,
      0.20126931,   0.23908958,   0.019030606,  0.12528977,  0.6009518,
      0.056694727,  0.19225678,   0.61745095,   0.26769277,  0.18739952,
      0.10380342,   0.08536158,   0.18679029,   0.040631995, 0.23538794,
      0.081166975,  0.3206779,    0.0018739193, 1.5819491,   0.07052032,
      0.2504746,    0.7514167,    0.06575893,   0.08000714,  0.0012445971,
      0.23989597,   0.12001178,   0.51009554,   0.14469045,  0.12445986,
      0.08644873,   0.5645543,    2.539498,     0.54383165,  0.22437337,
      0.0018195114, 0.11787724,   0.34932667,   0.49611032,  0.24439196,
      0.100613214,  0.2844197,    0.38720158,   0.22204469,  0.078220785,
      0.76444066,   1.7794204,    0.17640579,   0.04227443,  0.28023362,
      0.06434563,   1.320367,     0.9287479,    0.14726646,  0.27983913,
      0.022449814,  0.09246922,   0.22375125,   0.10417365,  0.034148056,
      0.12830476,   0.6065902,    0.16593556,   0.25840235,  0.2596266,
      0.6388732,    1.6666834,    0.030998405,  0.14869562,  0.30502653,
      1.183558};

  const uint32_t sparse_vec_count_4 = 104;
  uint32_t sparse_vec_index_4[] = {
      1012,  1996,  1997,  2001,  2033,  2034,  2080,  2120,  2142,  2149,
      2220,  2231,  2259,  2284,  2318,  2338,  2381,  2405,  2424,  2436,
      2458,  2472,  2533,  2544,  2557,  2580,  2609,  2622,  2627,  2688,
      2800,  2820,  2837,  2862,  2932,  2949,  3029,  3036,  3181,  3390,
      3439,  3690,  3780,  3784,  3818,  3872,  3931,  3934,  4034,  4037,
      4075,  4219,  4348,  4517,  4573,  4617,  4773,  4809,  4822,  4879,
      5234,  5272,  5851,  5968,  6119,  6378,  6396,  6613,  6702,  6728,
      6787,  7128,  7156,  7240,  7479,  7551,  7596,  7692,  7809,  8027,
      8249,  8264,  8299,  8573,  8826,  9123,  9152,  9274,  9445,  9593,
      9915,  11377, 11744, 12935, 13308, 14487, 14947, 15720, 17060, 17669,
      18079, 18629, 19841, 21053};
  float sparse_vec_value_4[] = {
      0.2030336,   0.1411735,   0.12635018,  0.45823106,  0.22794029,
      1.4105916,   0.2769118,   0.75515395,  0.07748295,  0.19260094,
      0.12458416,  0.065163694, 0.9765741,   0.07470863,  0.80718166,
      0.12307288,  0.9393725,   0.048733678, 0.17115222,  1.1922649,
      0.03547645,  0.33111426,  0.03772038,  0.46104532,  0.3141086,
      0.25707254,  1.1549219,   1.8509476,   0.98180383,  0.7270674,
      0.91343564,  0.3373339,   0.081498206, 0.01140901,  0.43917242,
      0.072401166, 0.11307132,  0.8945273,   0.10071963,  0.1945517,
      0.7594797,   0.096463405, 0.07759007,  0.11009286,  0.012562437,
      1.1797432,   0.02481144,  1.2393609,   0.50596905,  1.48781,
      0.53125334,  0.9950063,   1.4128636,   1.5830894,   0.93246186,
      0.60709685,  0.40433922,  0.14255294,  0.7125986,   0.021445543,
      0.4104336,   0.14560317,  0.3189296,   0.51019174,  0.041676614,
      0.22844397,  0.18406813,  0.1604107,   1.2178165,   0.46861333,
      0.04899898,  2.4448788,   0.6505235,   0.051029652, 0.7550255,
      0.00625443,  0.5090246,   0.7109037,   0.1125403,   0.05059699,
      0.03856528,  0.4538238,   0.72464395,  0.1360473,   0.5109412,
      2.0780752,   0.049649376, 0.31396037,  0.114775784, 0.9717559,
      0.05478335,  0.12228666,  1.3433831,   1.6574994,   0.053257514,
      0.51201975,  0.029570522, 0.35752434,  0.39366165,  0.25994724,
      1.1072603,   2.0454218,   1.1423918,   0.59795356};

  const uint32_t sparse_vec_count_5 = 147;
  uint32_t sparse_vec_index_5[] = {
      1012,  1996,  2001,  2018,  2020,  2034,  2047,  2081,  2154,  2162,
      2170,  2171,  2207,  2210,  2220,  2233,  2251,  2253,  2257,  2259,
      2287,  2315,  2318,  2328,  2381,  2390,  2458,  2466,  2510,  2557,
      2580,  2609,  2622,  2645,  2688,  2707,  2724,  2762,  2838,  2900,
      2911,  2915,  3047,  3058,  3260,  3282,  3290,  3295,  3297,  3386,
      3390,  3578,  3603,  3607,  3690,  3746,  3826,  3861,  3908,  3910,
      3918,  3934,  3987,  4006,  4045,  4075,  4088,  4110,  4255,  4302,
      4517,  4620,  4761,  4871,  4916,  5195,  5221,  5234,  5246,  5532,
      5700,  5798,  5832,  5855,  5951,  5968,  6033,  6215,  6219,  6302,
      6394,  6396,  6529,  6950,  7008,  7084,  7128,  7155,  7156,  7240,
      7421,  7467,  7551,  7596,  7738,  7760,  8088,  8367,  8372,  8479,
      8573,  8647,  8773,  8826,  9188,  9274,  9290,  9433,  9593,  9767,
      9913,  9919,  9982,  10461, 10815, 11028, 11721, 12416, 12496, 12779,
      13221, 13702, 13787, 14487, 15699, 16164, 18801, 20168, 21650, 24291,
      24321, 25209, 25526, 25755, 28110, 28682, 28858};
  float sparse_vec_value_5[] = {
      0.22246745,  0.1639393,    0.6902539,    0.087209724, 0.3150326,
      1.3589038,   0.39210027,   0.06905281,   0.2940129,   0.48745865,
      0.5185849,   0.06468885,   0.33793828,   0.01934533,  0.9160348,
      0.12213709,  0.64625627,   0.05484681,   0.18600157,  0.7439921,
      1.4779477,   0.50866294,   0.9324953,    0.11494038,  0.14815839,
      0.4024814,   0.0025193223, 0.0039419075, 0.04004241,  0.1137441,
      0.100572474, 0.09889997,   1.6465691,    0.45031455,  0.4567774,
      0.7614913,   0.5324026,    0.09957147,   0.21556115,  0.36752453,
      0.13450043,  0.06911261,   0.04267344,   1.2791942,   0.054822505,
      0.06269096,  1.3170663,    0.8852742,    0.37885663,  0.92810893,
      0.12803665,  0.10517517,   0.24920024,   0.16889784,  1.3619378,
      0.59796244,  0.81389725,   0.06489252,   0.020069994, 0.06319,
      0.71297073,  1.2515233,    0.019061586,  0.04731544,  0.3536146,
      0.50835687,  0.56439734,   0.09884678,   1.1007178,   0.1480219,
      1.6361246,   0.3891063,    0.03873499,   0.050479025, 0.5629584,
      1.0016122,   0.16247666,   0.06476003,   0.43833405,  1.3702114,
      0.11968183,  0.29155007,   0.12643526,   0.518913,    0.41796717,
      1.740134,    0.015489911,  0.2183447,    1.5380116,   1.058654,
      0.06226158,  0.270943,     0.91666347,   0.06422295,  0.33474496,
      0.002399514, 2.0762439,    0.8989307,    0.7876583,   0.03783609,
      0.22333156,  0.13323776,   0.27660817,   0.56637865,  0.21507333,
      0.6770579,   0.7013793,    0.7085848,    0.15651116,  0.05219105,
      0.03743524,  0.30775747,   0.073243596,  0.8181374,   0.28133482,
      0.23539418,  0.07533616,   0.2044144,    1.574523,    1.1304078,
      0.24084339,  1.3286508,    0.775562,     0.10096621,  0.197577,
      0.2307252,   1.719028,     0.07254901,   0.13916898,  0.17486195,
      0.8424586,   0.27879223,   0.8650824,    0.35050592,  0.24243252,
      0.31039444,  0.17227773,   0.90619636,   0.63083464,  2.2181685,
      0.20995331,  0.14425081,   0.37305146,   0.5955121,   0.87200415,
      1.028527,    1.0835907};

  const uint32_t sparse_vec_count_6 = 141;
  uint32_t sparse_vec_index_6[] = {
      1012,  1059,  1996,  1997,  1998,  2001,  2012,  2018,  2020,  2021,
      2025,  2055,  2056,  2076,  2077,  2127,  2130,  2134,  2138,  2143,
      2162,  2197,  2203,  2220,  2259,  2318,  2328,  2338,  2345,  2381,
      2390,  2458,  2462,  2466,  2501,  2517,  2580,  2622,  2631,  2645,
      2688,  2707,  2724,  2748,  2764,  2808,  2900,  2911,  2933,  2949,
      3047,  3058,  3074,  3075,  3092,  3101,  3188,  3271,  3283,  3439,
      3478,  3535,  3595,  3607,  3690,  3720,  3740,  3793,  3818,  3826,
      3906,  3908,  3934,  3981,  3986,  4028,  4138,  4469,  4496,  4503,
      4515,  4517,  4566,  4704,  4706,  4761,  4839,  5036,  5175,  5233,
      5234,  5246,  5254,  5263,  5491,  5817,  5823,  5839,  5875,  5968,
      6215,  6254,  6268,  6394,  6407,  6801,  6848,  7128,  7177,  7321,
      7421,  7487,  7551,  7596,  7681,  7940,  8145,  8264,  8321,  8551,
      8573,  8647,  8773,  8826,  8832,  9472,  9593,  9599,  9767,  10530,
      12149, 13787, 14487, 15237, 15523, 17060, 20168, 23633, 24363, 25526,
      25755};
  float sparse_vec_value_6[] = {
      0.48692977,  0.23770119,  0.24359323,   0.030566106,  0.121271,
      0.5703241,   0.12787338,  0.037069157,  0.075816214,  0.05305081,
      0.45591223,  0.5893366,   0.01829792,   0.42078727,   0.036012013,
      0.0750098,   0.20031127,  0.033489488,  0.10935432,   0.054307006,
      1.0000131,   0.20630358,  1.1161063,    0.5766484,    0.86030954,
      0.65358734,  0.062234607, 0.8518808,    0.23441537,   0.14816457,
      0.19284223,  0.94708407,  1.0017378,    0.51629704,   0.082293354,
      0.09170858,  0.2138309,   1.533815,     0.0030641577, 0.029126635,
      0.3632337,   0.1761491,   0.34924436,   0.67822266,   0.5976219,
      0.8595736,   0.17943758,  0.038340267,  0.0052374,    0.29047492,
      0.070157826, 0.6779024,   0.75593567,   0.054473646,  0.4906121,
      0.11288958,  0.15934071,  0.3192689,    0.1435216,    0.30725288,
      0.37506026,  0.7213243,   0.18401349,   0.01871983,   0.19455475,
      0.02040177,  0.28111485,  0.043639474,  0.19826981,   0.27416018,
      1.429636,    0.05111553,  1.0482118,    0.98164123,   0.17426124,
      0.10582682,  1.002954,    1.0261939,    0.83377177,   0.6798103,
      0.015373114, 0.8136259,   0.95782644,   0.13387722,   0.40847424,
      0.80647326,  0.28733957,  0.0029352994, 0.30276307,   0.4768307,
      0.32016084,  0.10302183,  0.3044403,    0.040031943,  0.44271877,
      0.061298616, 0.08278493,  0.107188344,  0.5086274,    1.3297924,
      0.050804485, 0.68582493,  0.21776867,   0.027724598,  0.5286007,
      0.1899133,   0.04971613,  2.2401748,    0.09252626,   0.80688274,
      0.014750206, 0.07568165,  0.021886598,  0.23429997,   1.1812011,
      0.6390751,   0.2643012,   0.13720371,   0.10989579,   1.4969206,
      0.2209742,   0.54690766,  0.15685914,   0.47841135,   0.566988,
      0.08368683,  1.2788389,   0.09509155,   1.0241207,    0.07167757,
      0.29240122,  0.5619141,   0.016415644,  0.28731114,   0.035925347,
      0.34043407,  0.60646313,  0.07248792,   0.08602479,   0.10247773,
      1.13258};

  const uint32_t sparse_vec_count_7 = 221;
  uint32_t sparse_vec_index_7[] = {
      1059,  1996,  2001,  2003,  2008,  2010,  2020,  2029,  2034,  2076,
      2080,  2081,  2103,  2104,  2137,  2138,  2142,  2149,  2162,  2163,
      2220,  2231,  2236,  2253,  2256,  2259,  2315,  2318,  2328,  2329,
      2343,  2344,  2350,  2359,  2381,  2390,  2419,  2458,  2462,  2466,
      2470,  2472,  2490,  2510,  2537,  2550,  2554,  2557,  2580,  2590,
      2599,  2608,  2622,  2631,  2640,  2645,  2662,  2710,  2724,  2728,
      2762,  2764,  2817,  2820,  2832,  2837,  2856,  2866,  2881,  2891,
      2957,  2974,  2983,  3003,  3010,  3029,  3050,  3058,  3063,  3068,
      3092,  3101,  3125,  3135,  3257,  3271,  3282,  3330,  3386,  3399,
      3474,  3578,  3595,  3603,  3607,  3650,  3690,  3758,  3800,  3817,
      3826,  3878,  3910,  3918,  3934,  3947,  3965,  3987,  3992,  4006,
      4034,  4045,  4068,  4146,  4172,  4202,  4255,  4302,  4327,  4351,
      4503,  4517,  4637,  4707,  4944,  5025,  5036,  5195,  5201,  5233,
      5234,  5253,  5501,  5584,  5623,  5656,  5687,  5814,  5817,  5911,
      5951,  5954,  5968,  6035,  6108,  6119,  6145,  6157,  6177,  6215,
      6254,  6262,  6384,  6394,  6613,  6728,  6787,  6801,  6842,  6845,
      6922,  6960,  7128,  7155,  7156,  7240,  7421,  7551,  7596,  7609,
      7654,  7676,  7723,  7779,  7935,  8049,  8144,  8151,  8249,  8547,
      8573,  8647,  8773,  8826,  8864,  8886,  9036,  9274,  9290,  9433,
      9593,  9667,  9767,  9915,  10267, 10505, 10544, 10753, 10815, 11028,
      11593, 11837, 12496, 13058, 13308, 13625, 13702, 14487, 15523, 17669,
      18457, 18800, 18826, 20168, 20843, 21695, 24363, 25526, 25755, 26234,
      26911};
  float sparse_vec_value_7[] = {
      0.29634815,  0.3303992,    1.0099697,   0.09545747,  0.046319153,
      0.001999375, 0.27222815,   0.107896015, 1.0792782,   0.5411261,
      0.27695096,  0.020715078,  0.021571944, 0.61097443,  0.10560424,
      0.15401895,  0.46480918,   0.6496758,   1.0116925,   0.0040072273,
      0.8931394,   0.2361543,    0.74389607,  0.039703716, 0.020886008,
      1.1108406,   0.09039394,   0.69578373,  0.27737862,  0.3083219,
      0.5698159,   0.31437457,   0.7131746,   0.14947455,  0.33504876,
      1.1611847,   0.8632542,    1.058698,    1.0307701,   0.15223494,
      0.9391413,   0.9473978,    0.3767169,   0.5806728,   0.70086235,
      0.8544429,   0.07839825,   0.46189323,  0.57343185,  0.17151174,
      0.45118546,  0.03416668,   2.037371,    0.1311739,   0.22600843,
      0.061421365, 0.0063685803, 0.9023181,   0.17874505,  1.458104,
      0.09657643,  0.36346155,   0.11396522,  0.2762966,   0.11472289,
      0.16151813,  0.5954224,    0.68847394,  0.6934064,   1.0951325,
      0.008113728, 0.320056,     0.2934685,   0.38948777,  0.64446163,
      0.11539491,  1.4196212,    0.6417532,   0.10939098,  0.115132414,
      0.10055387,  0.15150718,   0.3015885,   0.36512154,  0.85847276,
      0.42005107,  0.06733843,   0.9194887,   0.2446694,   0.3528377,
      0.30540454,  0.0549386,    0.15950806,  0.12754358,  0.22250807,
      1.3793756,   0.01503605,   0.33390692,  0.2052875,   0.32573462,
      0.66194123,  0.03896839,   0.921685,    1.1364039,   1.2451752,
      0.072772495, 0.10148866,   0.2922106,   0.97420144,  0.25800666,
      0.13455145,  0.3459612,    0.16713561,  0.21625288,  0.20754638,
      0.017042752, 1.2139128,    0.38501504,  0.18923776,  0.58807755,
      0.42623222,  1.8636363,    0.15489826,  0.24531981,  0.330716,
      0.6148099,   0.12145276,   0.938947,    0.08298498,  0.5002425,
      0.42643633,  0.3724926,    0.351435,    0.35051146,  0.15093777,
      0.2753887,   0.11030835,   0.05864477,  0.12825343,  0.4938676,
      0.4091608,   0.13155867,   1.362572,    0.26034647,  0.005735014,
      0.25208464,  0.77931124,   0.08418636,  0.2567355,   0.108983725,
      0.04566572,  0.06202907,   0.3991703,   0.2785334,   0.45871663,
      1.584949,    0.099409536,  0.114265166, 0.0603091,   0.71120745,
      0.35286796,  0.03805246,   2.6303916,   0.6235311,   0.6544235,
      0.254192,    0.5172861,    0.46474016,  0.51770395,  0.3868696,
      0.030558605, 0.79667675,   0.1053426,   0.08400551,  0.26797673,
      0.52138245,  0.13453461,   0.070371106, 0.003556521, 0.34309983,
      0.2104394,   0.02274147,   0.19070747,  0.9488226,   0.09138845,
      2.092856,    0.10931594,   0.18929166,  0.113100395, 0.08495193,
      1.124685,    0.08020554,   1.0792019,   0.27422333,  0.31508496,
      0.20671548,  0.05064338,   0.46511328,  0.38314936,  0.52556884,
      0.36894837,  1.4199936,    0.05843645,  0.055732273, 0.26817194,
      0.2876586,   1.0425944,    0.062882155, 0.09840146,  0.1544766,
      0.98742366,  0.20589906,   2.1226256,   0.47266316,  0.33193296,
      2.0077822,   0.23509863,   0.53764015,  1.2505449,   1.719803,
      0.39262286};

  std::vector<std::string> sparse_query_buffers;

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,
      sparse_query_buffer_0);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_0));

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,
      sparse_query_buffer_1);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_1));

  std::string sparse_query_buffer_2;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_2, sparse_vec_index_2, sparse_vec_value_2,
      sparse_query_buffer_2);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_2));

  std::string sparse_query_buffer_3;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_3, sparse_vec_index_3, sparse_vec_value_3,
      sparse_query_buffer_3);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_3));

  std::string sparse_query_buffer_4;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_4, sparse_vec_index_4, sparse_vec_value_4,
      sparse_query_buffer_4);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_4));

  std::string sparse_query_buffer_5;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_5, sparse_vec_index_5, sparse_vec_value_5,
      sparse_query_buffer_5);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_5));

  std::string sparse_query_buffer_6;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_6, sparse_vec_index_6, sparse_vec_value_6,
      sparse_query_buffer_6);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_6));

  std::string sparse_query_buffer_7;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_7, sparse_vec_index_7, sparse_vec_value_7,
      sparse_query_buffer_7);
  sparse_query_buffers.emplace_back(std::move(sparse_query_buffer_7));

  sparse_vec_counts.emplace_back(sparse_vec_count_0);
  sparse_vec_counts.emplace_back(sparse_vec_count_1);
  sparse_vec_counts.emplace_back(sparse_vec_count_2);
  sparse_vec_counts.emplace_back(sparse_vec_count_3);
  sparse_vec_counts.emplace_back(sparse_vec_count_4);
  sparse_vec_counts.emplace_back(sparse_vec_count_5);
  sparse_vec_counts.emplace_back(sparse_vec_count_6);
  sparse_vec_counts.emplace_back(sparse_vec_count_7);

  sparse_vec_indices.emplace_back(sparse_vec_index_0);
  sparse_vec_indices.emplace_back(sparse_vec_index_1);
  sparse_vec_indices.emplace_back(sparse_vec_index_2);
  sparse_vec_indices.emplace_back(sparse_vec_index_3);
  sparse_vec_indices.emplace_back(sparse_vec_index_4);
  sparse_vec_indices.emplace_back(sparse_vec_index_5);
  sparse_vec_indices.emplace_back(sparse_vec_index_6);
  sparse_vec_indices.emplace_back(sparse_vec_index_7);

  sparse_vec_values.emplace_back(sparse_vec_value_0);
  sparse_vec_values.emplace_back(sparse_vec_value_1);
  sparse_vec_values.emplace_back(sparse_vec_value_2);
  sparse_vec_values.emplace_back(sparse_vec_value_3);
  sparse_vec_values.emplace_back(sparse_vec_value_4);
  sparse_vec_values.emplace_back(sparse_vec_value_5);
  sparse_vec_values.emplace_back(sparse_vec_value_6);
  sparse_vec_values.emplace_back(sparse_vec_value_7);

  for (size_t i = 0; i < sparse_query_buffers.size(); ++i) {
    for (size_t j = 0; j < sparse_query_buffers.size(); ++j) {
      float result0{0.0f};
      result0 = SparseDistanceCommon(
          sparse_vec_counts[i], sparse_vec_indices[i], sparse_vec_values[i],
          sparse_vec_counts[j], sparse_vec_indices[j], sparse_vec_values[j]);

      float result1{0.0f};
      MinusInnerProductSparseMatrix<float>::Compute(
          sparse_query_buffers[i].data(), sparse_query_buffers[j].data(),
          &result1);
      result1 = -result1;

      // float epsilon = 0.001*std::max(result0, result1);
      EXPECT_GE(0.0001, std::abs(result0 - result1));
    }
  }
}

TEST(DistanceMatrix, InnerProductSparse) {
  TestInnerProductSparse();
}

TEST(DistanceMatrix, InnerProductSparseMore) {
  TestInnerProductSparseMore();
}

TEST(DistanceMatrix, DISABLED_InnerProductSparse_Benchmark) {
  const uint32_t sparse_vec_count_0 = 52;
  uint32_t sparse_vec_index_0[] = {
      33,   66,   77,   209,  385,  396,  539,  583,  649,  715,  880,
      935,  968,  1023, 1100, 1111, 1661, 1694, 1749, 2288, 2343, 2453,
      2530, 2772, 2871, 2882, 2948, 3069, 3322, 3333, 3410, 3575, 3608,
      4026, 4037, 4048, 4059, 4070, 4268, 4323, 4741, 4752, 5137, 5170,
      5423, 5555, 5918, 6028, 6094, 6347, 6369, 6468};
  float sparse_vec_value_0[] = {
      -0.246404298254, 0.892043114755,  0.163785949199,  -0.680309913534,
      -0.767956138324, -0.410683610329, 0.763314047145,  0.347851184825,
      -0.676969102165, -0.774662820732, 0.274471489215,  -0.131269040962,
      0.206478593023,  0.764082612827,  -0.57678381864,  -0.256053693585,
      0.661507236032,  -0.812832823664, 0.929611593685,  -0.381852499144,
      -0.35890001953,  0.538386710846,  -0.829565442015, 0.384046166409,
      0.623125501212,  0.043215334982,  -0.689536097425, -0.500913794456,
      -0.419818105671, -0.503346955801, -0.99419236655,  -0.414091535679,
      -0.829474457209, -0.103915702521, -0.419445202934, -0.26891898936,
      0.311013521629,  0.172923023003,  -0.818231467063, -0.728015315042,
      0.110116365075,  0.845786117564,  -0.587841450807, 0.533763235805,
      -0.601437402994, -0.117487602176, 0.106103380748,  -0.00151542886833,
      0.189967593506,  0.890365538566,  -0.581876671583, -0.232173604777};

  const uint32_t sparse_vec_count_1 = 43;
  uint32_t sparse_vec_index_1[] = {
      33,   77,   110,  209,  1023, 1111, 1221, 1496, 1661, 1749, 2189,
      2255, 2288, 2420, 2530, 2695, 2772, 2838, 2948, 3179, 3575, 4202,
      4268, 4290, 4433, 4444, 4653, 4697, 4741, 5137, 5192, 5346, 5423,
      5445, 5555, 5588, 5764, 5896, 5918, 6028, 6270, 6347, 6501};
  float sparse_vec_value_1[] = {
      -0.847561468192, -0.761580890729,  0.683791378502,  0.729670644228,
      -0.111989702001, -0.3435914518,    -0.806454864134, -0.0243347460596,
      0.497209110076,  0.852745969955,   0.403748558594,  -0.634016410599,
      -0.74513226711,  0.738086689871,   0.364575651925,  0.0867637408004,
      -0.285921174394, -0.321390976616,  -0.971849760722, -0.246041408731,
      -0.110667223833, 0.0744013655781,  0.84846334839,   0.167405689007,
      0.0289923642993, -0.536394124155,  0.518249809298,  -0.695798108647,
      0.0653215071151, -0.0046338401448, 0.644189056747,  -0.52301532328,
      -0.660275328421, 0.643514995264,   0.0333307952838, -0.401825159735,
      -0.188869041499, -0.23065238799,   -0.409416817144, -0.142933941372,
      0.247628793044,  -0.984985692607,  -0.427929860028};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,
      sparse_query_buffer_1);

  size_t loop_cnt = 100000000LLU;
  float result[100];

  for (size_t i = 0; i < loop_cnt; ++i) {
    MinusInnerProductSparseMatrix<float>::Compute(sparse_query_buffer_0.data(),
                                                  sparse_query_buffer_1.data(),
                                                  result + (i % 100));
  }

  EXPECT_EQ(result[0], result[1]);
}

TEST(DistanceMatrix, TestInnerProductSparseDimWithZero) {
  // test 1
  const uint32_t sparse_vec_count_0 = 10;
  uint32_t sparse_vec_index_0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  float sparse_vec_value_0[] = {2.0, 2.0, 2.0, 2.0, 2.0,
                                2.0, 2.0, 2.0, 2.0, 2.0};

  const uint32_t sparse_vec_count_1 = 10;
  uint32_t sparse_vec_index_1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  float sparse_vec_value_1[] = {2.0, 2.0, 2.0, 2.0, 2.0,
                                2.0, 2.0, 2.0, 2.0, 2.0};

  std::string sparse_query_buffer_0;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_0, sparse_vec_index_0, sparse_vec_value_0,
      sparse_query_buffer_0);

  std::string sparse_query_buffer_1;
  MinusInnerProductSparseMatrix<float>::transform_sparse_format(
      sparse_vec_count_1, sparse_vec_index_1, sparse_vec_value_1,
      sparse_query_buffer_1);

  float result0{0.0f};
  result0 = SparseDistanceCommon(sparse_vec_count_0, sparse_vec_index_0,
                                 sparse_vec_value_0, sparse_vec_count_1,
                                 sparse_vec_index_1, sparse_vec_value_1);

  float result1{0.0f};
  MinusInnerProductSparseMatrix<float>::Compute(
      sparse_query_buffer_0.data(), sparse_query_buffer_1.data(), &result1);
  result1 = -result1;

  EXPECT_GE(0.00001, std::abs(result0 - result1));
}


================================================
FILE: tests/ailego/math/inner_product_matrix_int4_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/utility/matrix_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

TEST(DistanceMatrix, InnerProduct_General) {
  std::mt19937 gen((std::random_device())());
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 1;

  std::vector<int8_t> vec1(dimension), query1(dimension);
  std::vector<uint8_t> vec2(dimension >> 1), query2(dimension >> 1);

  std::uniform_int_distribution<int> dist(-8, 7);

  for (size_t k = 0; k < 100; ++k) {
    for (size_t i = 0; i < dimension; i += 2) {
      vec1[i + 0] = (int8_t)dist(gen);
      vec1[i + 1] = (int8_t)dist(gen);
      vec2[i >> 1] =
          ((uint8_t)(vec1[i + 0]) << 4) | ((uint8_t)(vec1[i + 1]) & 0xf);
      EXPECT_EQ(vec1[i + 0] * vec1[i + 1], Int4MulTable[vec2[i >> 1]]);

      query1[i + 0] = (int8_t)dist(gen);
      query1[i + 1] = (int8_t)dist(gen);
      query2[i >> 1] =
          ((uint8_t)(query1[i + 0]) << 4) | ((uint8_t)(query1[i + 1]) & 0xf);
      EXPECT_EQ(query1[i + 0] * query1[i + 1], Int4MulTable[query2[i >> 1]]);
    }

    EXPECT_FLOAT_EQ(
        Distance::MinusInnerProduct(vec1.data(), query1.data(), dimension),
        Distance::MinusInnerProduct(vec2.data(), query2.data(), dimension));
    EXPECT_FLOAT_EQ(
        Distance::InnerProduct(vec1.data(), query1.data(), dimension),
        Distance::InnerProduct(vec2.data(), query2.data(), dimension));
    EXPECT_FLOAT_EQ(
        Distance::MinusInnerProduct(vec1.data(), query1.data(), dimension),
        -Distance::InnerProduct(vec2.data(), query2.data(), dimension));
  }
}

template <size_t M, size_t N>
void TestInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;
  size_t matrix_size = batch_size * (dimension / 2);
  size_t query_matrix_size = query_size * (dimension / 2);

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }
  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
      matrix1.data(), dimension / 8, &matrix2[0]);
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      InnerProductMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);
    }
  }
  InnerProductMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestMinusInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 3;
  size_t matrix_size = batch_size * (dimension / 2);
  size_t query_matrix_size = query_size * (dimension / 2);

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }
  ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
      matrix1.data(), dimension / 8, &matrix2[0]);
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MinusInnerProductMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, &query_result[j]);
    }
  }
  MinusInnerProductMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, InnerProduct_1x1) {
  TestInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x1) {
  TestInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x2) {
  TestInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, InnerProduct_3x3) {
  TestInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, InnerProduct_4x1) {
  TestInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, InnerProduct_4x2) {
  TestInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, InnerProduct_4x4) {
  TestInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x1) {
  TestInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, InnerProduct_8x2) {
  TestInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, InnerProduct_8x4) {
  TestInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x8) {
  TestInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x1) {
  TestInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, InnerProduct_16x2) {
  TestInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, InnerProduct_16x4) {
  TestInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, InnerProduct_16x8) {
  TestInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x16) {
  TestInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x1) {
  TestInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, InnerProduct_32x2) {
  TestInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, InnerProduct_32x4) {
  TestInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, InnerProduct_32x8) {
  TestInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, InnerProduct_32x16) {
  TestInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x32) {
  TestInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x1) {
  TestInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, InnerProduct_64x2) {
  TestInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, InnerProduct_64x4) {
  TestInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, InnerProduct_64x8) {
  TestInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, InnerProduct_64x16) {
  TestInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, InnerProduct_64x32) {
  TestInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x64) {
  TestInnerProductMatrix<64, 128>();
}

TEST(DistanceMatrix, InnerProduct_128x1) {
  TestInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, InnerProduct_128x2) {
  TestInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, InnerProduct_128x4) {
  TestInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, InnerProduct_128x8) {
  TestInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, InnerProduct_128x16) {
  TestInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, InnerProduct_128x32) {
  TestInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, InnerProduct_128x64) {
  TestInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, InnerProduct_128x128) {
  TestInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_1x1) {
  TestMinusInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x1) {
  TestMinusInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x2) {
  TestMinusInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_3x3) {
  TestMinusInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x1) {
  TestMinusInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x2) {
  TestMinusInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x4) {
  TestMinusInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x1) {
  TestMinusInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x2) {
  TestMinusInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x4) {
  TestMinusInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x8) {
  TestMinusInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x1) {
  TestMinusInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x2) {
  TestMinusInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x4) {
  TestMinusInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x8) {
  TestMinusInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x16) {
  TestMinusInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x1) {
  TestMinusInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x2) {
  TestMinusInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x4) {
  TestMinusInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x8) {
  TestMinusInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x16) {
  TestMinusInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x32) {
  TestMinusInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x1) {
  TestMinusInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x2) {
  TestMinusInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x4) {
  TestMinusInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x8) {
  TestMinusInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x16) {
  TestMinusInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x32) {
  TestMinusInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x64) {
  TestMinusInnerProductMatrix<64, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x1) {
  TestMinusInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x2) {
  TestMinusInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x4) {
  TestMinusInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x8) {
  TestMinusInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x16) {
  TestMinusInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x32) {
  TestMinusInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x64) {
  TestMinusInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x128) {
  TestMinusInnerProductMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void InnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension / 2;
  const size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(0, 0xff);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (uint8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (uint8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    ailego::MatrixHelper::Transpose<uint32_t, batch_size>(
        &matrix1[start_pos], dimension / 8, &matrix2[start_pos]);
  }
  ailego::MatrixHelper::Transpose<uint32_t, query_size>(
      query1.data(), dimension / 8, &query2[0]);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      InnerProductMatrix<uint8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    InnerProductMatrix<uint8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        InnerProductMatrix<uint8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension / 2], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {
  InnerProductBenchmark<2, 1, 512, 128>();
  InnerProductBenchmark<2, 2, 512, 128>();
  InnerProductBenchmark<4, 1, 512, 128>();
  InnerProductBenchmark<4, 2, 512, 128>();
  InnerProductBenchmark<4, 4, 512, 128>();
  InnerProductBenchmark<8, 1, 512, 128>();
  InnerProductBenchmark<8, 2, 512, 128>();
  InnerProductBenchmark<8, 4, 512, 128>();
  InnerProductBenchmark<8, 8, 512, 128>();
  InnerProductBenchmark<16, 1, 512, 128>();
  InnerProductBenchmark<16, 2, 512, 128>();
  InnerProductBenchmark<16, 4, 512, 128>();
  InnerProductBenchmark<16, 8, 512, 128>();
  InnerProductBenchmark<16, 16, 512, 128>();
  InnerProductBenchmark<32, 1, 512, 128>();
  InnerProductBenchmark<32, 2, 512, 128>();
  InnerProductBenchmark<32, 4, 512, 128>();
  InnerProductBenchmark<32, 8, 512, 128>();
  InnerProductBenchmark<32, 16, 512, 128>();
  InnerProductBenchmark<32, 32, 512, 128>();
  InnerProductBenchmark<64, 1, 512, 128>();
  InnerProductBenchmark<64, 2, 512, 128>();
  InnerProductBenchmark<64, 4, 512, 128>();
  InnerProductBenchmark<64, 8, 512, 128>();
  InnerProductBenchmark<128, 1, 512, 128>();
}


================================================
FILE: tests/ailego/math/inner_product_matrix_int8_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t i = 0; i < N; ++i) {
    for (size_t j = 0; j < M; ++j) {
      dst[j * N + i] = src[i * M + j];
    }
  }
}

template <size_t N>
static float InnerProductDistance(const FixedVector<int8_t, N> &lhs,
                                  const FixedVector<int8_t, N> &rhs) {
  return Distance::InnerProduct(lhs.data(), rhs.data(), lhs.size());
}

template <size_t N>
static float MinusInnerProductDistance(const FixedVector<int8_t, N> &lhs,
                                       const FixedVector<int8_t, N> &rhs) {
  return Distance::MinusInnerProduct(lhs.data(), rhs.data(), lhs.size());
}

TEST(DistanceMatrix, InnerProduct_General) {
  int8_t a1[] = {0};
  int8_t b1[] = {0};

  int8_t a17[] = {127, -1,  -1,  127, 127, 127, 127, -1, 127,
                  127, 127, 127, 127, 127, -1,  -1,  127};
  int8_t b17[] = {127, -1,  -1,  127, 127, 127, -1, 127, 127,
                  127, 127, 127, 127, 127, -1,  -1, 127};

  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,
                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,
                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,
                  0,   0, -127, 126,  -127, -127, -127, -127, 127,  127,
                  1,   2, 3,    4,    127,  127,  111};
  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,
                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,
                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,
                  0,    0, 127,  127, 100,  122,  123, -127, 1,   2,
                  3,    4, -127, 122, -127, -127, -127};

  EXPECT_FLOAT_EQ(0.0f,
                  InnerProductDistance(*FixedVector<int8_t, 1>::Cast(a1),
                                       *FixedVector<int8_t, 1>::Cast(b1)));
  EXPECT_FLOAT_EQ(177169.0f,
                  InnerProductDistance(*FixedVector<int8_t, 17>::Cast(a17),
                                       *FixedVector<int8_t, 17>::Cast(b17)));
  EXPECT_FLOAT_EQ(-299458.0f,
                  InnerProductDistance(*FixedVector<int8_t, 47>::Cast(a47),
                                       *FixedVector<int8_t, 47>::Cast(b47)));
}

TEST(DistanceMatrix, MinusInnerProduct_General) {
  int8_t a1[] = {0};
  int8_t b1[] = {0};

  int8_t a17[] = {127, -1,  -1,  127, 127, 127, 127, -1, 127,
                  127, 127, 127, 127, 127, -1,  -1,  127};
  int8_t b17[] = {127, -1,  -1,  127, 127, 127, -1, 127, 127,
                  127, 127, 127, 127, 127, -1,  -1, 127};

  int8_t a47[] = {127, 2, 0,    0,    -127, -127, 0,    0,    0,    0,
                  0,   0, -127, -127, 127,  127,  0,    0,    -127, -127,
                  0,   0, 127,  5,    127,  127,  0,    0,    -127, -127,
                  0,   0, -127, 126,  -127, -127, -127, -127, 127,  127,
                  1,   2, 3,    4,    127,  127,  111};
  int8_t b47[] = {-127, 1, 0,    0,   127,  127,  0,   0,    0,   0,
                  0,    0, 127,  127, -127, -127, 0,   0,    127, 127,
                  0,    0, -127, 3,   -127, -127, 0,   0,    127, 127,
                  0,    0, 127,  127, 100,  122,  123, -127, 1,   2,
                  3,    4, -127, 122, -127, -127, -127};

  EXPECT_FLOAT_EQ(0.0f,
                  MinusInnerProductDistance(*FixedVector<int8_t, 1>::Cast(a1),
                                            *FixedVector<int8_t, 1>::Cast(b1)));
  EXPECT_FLOAT_EQ(-177169.0f, MinusInnerProductDistance(
                                  *FixedVector<int8_t, 17>::Cast(a17),
                                  *FixedVector<int8_t, 17>::Cast(b17)));
  EXPECT_FLOAT_EQ(299458.0f, MinusInnerProductDistance(
                                 *FixedVector<int8_t, 47>::Cast(a47),
                                 *FixedVector<int8_t, 47>::Cast(b47)));
}

template <size_t M, size_t N>
void TestMinusInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MinusInnerProductMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  MinusInnerProductMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M, size_t N>
void TestInnerProductMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 64))(gen) << 2;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      InnerProductMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, &query_result[j]);
    }
  }
  InnerProductMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(DistanceMatrix, MinusInnerProduct_1x1) {
  TestMinusInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x1) {
  TestMinusInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_2x2) {
  TestMinusInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_3x3) {
  TestMinusInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x1) {
  TestMinusInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x2) {
  TestMinusInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_4x4) {
  TestMinusInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x1) {
  TestMinusInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x2) {
  TestMinusInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x4) {
  TestMinusInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_8x8) {
  TestMinusInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x1) {
  TestMinusInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x2) {
  TestMinusInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x4) {
  TestMinusInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x8) {
  TestMinusInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_16x16) {
  TestMinusInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x1) {
  TestMinusInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x2) {
  TestMinusInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x4) {
  TestMinusInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x8) {
  TestMinusInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x16) {
  TestMinusInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_32x32) {
  TestMinusInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x1) {
  TestMinusInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x2) {
  TestMinusInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x4) {
  TestMinusInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x8) {
  TestMinusInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x16) {
  TestMinusInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x32) {
  TestMinusInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_64x64) {
  TestMinusInnerProductMatrix<64, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x1) {
  TestMinusInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x2) {
  TestMinusInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x4) {
  TestMinusInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x8) {
  TestMinusInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x16) {
  TestMinusInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x32) {
  TestMinusInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x64) {
  TestMinusInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, MinusInnerProduct_128x128) {
  TestMinusInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, InnerProduct_1x1) {
  TestInnerProductMatrix<1, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x1) {
  TestInnerProductMatrix<2, 1>();
}

TEST(DistanceMatrix, InnerProduct_2x2) {
  TestInnerProductMatrix<2, 2>();
}

TEST(DistanceMatrix, InnerProduct_3x3) {
  TestInnerProductMatrix<3, 3>();
}

TEST(DistanceMatrix, InnerProduct_4x1) {
  TestInnerProductMatrix<4, 1>();
}

TEST(DistanceMatrix, InnerProduct_4x2) {
  TestInnerProductMatrix<4, 2>();
}

TEST(DistanceMatrix, InnerProduct_4x4) {
  TestInnerProductMatrix<4, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x1) {
  TestInnerProductMatrix<8, 1>();
}

TEST(DistanceMatrix, InnerProduct_8x2) {
  TestInnerProductMatrix<8, 2>();
}

TEST(DistanceMatrix, InnerProduct_8x4) {
  TestInnerProductMatrix<8, 4>();
}

TEST(DistanceMatrix, InnerProduct_8x8) {
  TestInnerProductMatrix<8, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x1) {
  TestInnerProductMatrix<16, 1>();
}

TEST(DistanceMatrix, InnerProduct_16x2) {
  TestInnerProductMatrix<16, 2>();
}

TEST(DistanceMatrix, InnerProduct_16x4) {
  TestInnerProductMatrix<16, 4>();
}

TEST(DistanceMatrix, InnerProduct_16x8) {
  TestInnerProductMatrix<16, 8>();
}

TEST(DistanceMatrix, InnerProduct_16x16) {
  TestInnerProductMatrix<16, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x1) {
  TestInnerProductMatrix<32, 1>();
}

TEST(DistanceMatrix, InnerProduct_32x2) {
  TestInnerProductMatrix<32, 2>();
}

TEST(DistanceMatrix, InnerProduct_32x4) {
  TestInnerProductMatrix<32, 4>();
}

TEST(DistanceMatrix, InnerProduct_32x8) {
  TestInnerProductMatrix<32, 8>();
}

TEST(DistanceMatrix, InnerProduct_32x16) {
  TestInnerProductMatrix<32, 16>();
}

TEST(DistanceMatrix, InnerProduct_32x32) {
  TestInnerProductMatrix<32, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x1) {
  TestInnerProductMatrix<64, 1>();
}

TEST(DistanceMatrix, InnerProduct_64x2) {
  TestInnerProductMatrix<64, 2>();
}

TEST(DistanceMatrix, InnerProduct_64x4) {
  TestInnerProductMatrix<64, 4>();
}

TEST(DistanceMatrix, InnerProduct_64x8) {
  TestInnerProductMatrix<64, 8>();
}

TEST(DistanceMatrix, InnerProduct_64x16) {
  TestInnerProductMatrix<64, 16>();
}

TEST(DistanceMatrix, InnerProduct_64x32) {
  TestInnerProductMatrix<64, 32>();
}

TEST(DistanceMatrix, InnerProduct_64x64) {
  TestInnerProductMatrix<64, 128>();
}

TEST(DistanceMatrix, InnerProduct_128x1) {
  TestInnerProductMatrix<128, 1>();
}

TEST(DistanceMatrix, InnerProduct_128x2) {
  TestInnerProductMatrix<128, 2>();
}

TEST(DistanceMatrix, InnerProduct_128x4) {
  TestInnerProductMatrix<128, 4>();
}

TEST(DistanceMatrix, InnerProduct_128x8) {
  TestInnerProductMatrix<128, 8>();
}

TEST(DistanceMatrix, InnerProduct_128x16) {
  TestInnerProductMatrix<128, 16>();
}

TEST(DistanceMatrix, InnerProduct_128x32) {
  TestInnerProductMatrix<128, 32>();
}

TEST(DistanceMatrix, InnerProduct_128x64) {
  TestInnerProductMatrix<128, 128>();
}

TEST(DistanceMatrix, InnerProduct_128x128) {
  TestInnerProductMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void InnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      InnerProductMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    InnerProductMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched InnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        InnerProductMatrix<int8_t, 1, 1>::Compute(&matrix_batch[k * dimension],
                                                  current_query, dimension,
                                                  &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched InnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

template <size_t M, size_t N, size_t B, size_t D>
void MinusInnerProductBenchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched MinusInnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MinusInnerProductMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, current_results);
    }
  }
  std::cout << "* 1 Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched MinusInnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    MinusInnerProductMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, results.data());
  }
  std::cout << "* N Batched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched MinusInnerProduct
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MinusInnerProductMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MinusInnerProduct (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, DISABLED_InnerProduct_Benchmark) {
  InnerProductBenchmark<2, 1, 512, 128>();
  InnerProductBenchmark<2, 2, 512, 128>();
  InnerProductBenchmark<4, 1, 512, 128>();
  InnerProductBenchmark<4, 2, 512, 128>();
  InnerProductBenchmark<4, 4, 512, 128>();
  InnerProductBenchmark<8, 1, 512, 128>();
  InnerProductBenchmark<8, 2, 512, 128>();
  InnerProductBenchmark<8, 4, 512, 128>();
  InnerProductBenchmark<8, 8, 512, 128>();
  InnerProductBenchmark<16, 1, 512, 128>();
  InnerProductBenchmark<16, 2, 512, 128>();
  InnerProductBenchmark<16, 4, 512, 128>();
  InnerProductBenchmark<16, 8, 512, 128>();
  InnerProductBenchmark<16, 16, 512, 128>();
  InnerProductBenchmark<32, 1, 512, 128>();
  InnerProductBenchmark<32, 2, 512, 128>();
  InnerProductBenchmark<32, 4, 512, 128>();
  InnerProductBenchmark<32, 8, 512, 128>();
  InnerProductBenchmark<32, 16, 512, 128>();
  InnerProductBenchmark<32, 32, 512, 128>();
  InnerProductBenchmark<64, 1, 512, 128>();
  InnerProductBenchmark<64, 2, 512, 128>();
  InnerProductBenchmark<64, 4, 512, 128>();
  InnerProductBenchmark<64, 8, 512, 128>();
  InnerProductBenchmark<128, 1, 512, 128>();
}

TEST(DistanceMatrix, DISABLED_MinusInnerProduct_Benchmark) {
  MinusInnerProductBenchmark<2, 1, 512, 128>();
  MinusInnerProductBenchmark<2, 2, 512, 128>();
  MinusInnerProductBenchmark<4, 1, 512, 128>();
  MinusInnerProductBenchmark<4, 2, 512, 128>();
  MinusInnerProductBenchmark<4, 4, 512, 128>();
  MinusInnerProductBenchmark<8, 1, 512, 128>();
  MinusInnerProductBenchmark<8, 2, 512, 128>();
  MinusInnerProductBenchmark<8, 4, 512, 128>();
  MinusInnerProductBenchmark<8, 8, 512, 128>();
  MinusInnerProductBenchmark<16, 1, 512, 128>();
  MinusInnerProductBenchmark<16, 2, 512, 128>();
  MinusInnerProductBenchmark<16, 4, 512, 128>();
  MinusInnerProductBenchmark<16, 8, 512, 128>();
  MinusInnerProductBenchmark<16, 16, 512, 128>();
  MinusInnerProductBenchmark<32, 1, 512, 128>();
  MinusInnerProductBenchmark<32, 2, 512, 128>();
  MinusInnerProductBenchmark<32, 4, 512, 128>();
  MinusInnerProductBenchmark<32, 8, 512, 128>();
  MinusInnerProductBenchmark<32, 16, 512, 128>();
  MinusInnerProductBenchmark<32, 32, 512, 128>();
  MinusInnerProductBenchmark<64, 1, 512, 128>();
  MinusInnerProductBenchmark<64, 2, 512, 128>();
  MinusInnerProductBenchmark<64, 4, 512, 128>();
  MinusInnerProductBenchmark<64, 8, 512, 128>();
  MinusInnerProductBenchmark<128, 1, 512, 128>();
}


================================================
FILE: tests/ailego/math/mips_euclidean_distance_matrix_fp16_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/math/norm2_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float MipsSquaredEuclideanDistance(const Float16 *lhs,
                                          const Float16 *rhs, size_t dim,
                                          size_t m_value, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);
}

template <size_t N>
static float MipsSquaredEuclideanDistance(const FixedVector<Float16, N> &lhs,
                                          const FixedVector<Float16, N> &rhs,
                                          size_t m_value, float e2) {
  return MipsSquaredEuclideanDistance(lhs.data(), rhs.data(), lhs.size(),
                                      m_value, e2);
}

static float ConvertAndComputeByMips(const Float16 *lhs, const Float16 *rhs,
                                     size_t dim, size_t m_value, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + m_value);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    lhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  std::vector<float> rhs_vec(dim + m_value);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    rhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + m_value);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<Float16, N> &lhs,
                                     const FixedVector<Float16, N> &rhs,
                                     size_t m_value, float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,
                                 e2);
}

TEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = (std::uniform_int_distribution<size_t>(1, 4))(gen);
  const float u_val = (std::uniform_real_distribution<float>(0.1, 1.0))(gen);
  const float epsilon = 1e-2;
  const uint32_t dim = (std::uniform_int_distribution<uint32_t>(2, 128))(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < count; ++i) {
    std::vector<Float16> vec1(dim);
    std::vector<Float16> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    float norm1{0.0}, norm2{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(vec1.data(), dim, &norm1);
    SquaredNorm2Matrix<Float16, 1>::Compute(vec2.data(), dim, &norm2);
    const float e2 = u_val * u_val / std::max(norm1, norm2);
    ASSERT_NEAR(
        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),
        MipsSquaredEuclideanDistance(vec1.data(), vec2.data(), dim, m_val, e2),
        epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = 4;
  const float u_val = 0.68f;
  const float l2_norm = 15.5f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-2;

  FixedVector<Float16, 1> a{0.0f}, b{0.0f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(a, b, m_val, e2), epsilon);

  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(c, d, m_val, e2), epsilon);

  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(e, f, m_val, e2), epsilon);

  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(g, h, m_val, e2), epsilon);

  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(i, j, m_val, e2), epsilon);

  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(l, k, m_val, e2), epsilon);

  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(m, n, m_val, e2), epsilon);

  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(o, p, m_val, e2), epsilon);

  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                            0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(q, r, m_val, e2), epsilon);

  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclideanDistance(s, t, m_val, e2), epsilon);

  FixedVector<Float16, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_NEAR(0.00746485f, MipsSquaredEuclideanDistance(u, v, m_val, e2),
              epsilon);

  FixedVector<Float16, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_NEAR(0.00983364f, MipsSquaredEuclideanDistance(w, x, m_val, e2),
              epsilon);

  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_NEAR(0.0126668f, MipsSquaredEuclideanDistance(y, z, m_val, e2),
              epsilon);

  FixedVector<Float16, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_NEAR(0.0206175f, MipsSquaredEuclideanDistance(x14, y14, m_val, e2),
              epsilon);

  FixedVector<Float16, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_NEAR(0.0389414f, MipsSquaredEuclideanDistance(x15, y15, m_val, e2),
              epsilon);
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {
  std::mt19937 gen((std::random_device())());

  const size_t m_val = (std::uniform_int_distribution<size_t>(1, 4))(gen);
  const float u_val = (std::uniform_real_distribution<float>(0.3, 0.9))(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, m_val, e2,
          &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_NEAR(result1[i], result2[i], 1e-2);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsRepeatedQuadraticInjectionBenchMark(void) {
  const size_t m_val = 4;
  const float u_val = 0.6;
  const float l2_norm = 1.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, m_val, e2, current_results);
    }
  }
  std::cout
      << "* 1 Batched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) "
         "(us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<Float16, batch_size,
                                       query_size>::Compute(matrix_batch,
                                                            &query2[0],
                                                            dimension, m_val,
                                                            e2, results.data());
  }
  std::cout
      << "* N Batched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) "
         "(us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,
            &current_results[k]);
      }
    }
  }
  std::cout
      << "* Unbatched MipsSquaredEuclideanDistance(RepeatedQuadraticInjection) "
         "(us) \t"
      << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {
  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();
}

static float MipsSquaredEuclidean(const Float16 *lhs, const Float16 *rhs,
                                  size_t dim, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<Float16, N> &lhs,
                                  const FixedVector<Float16, N> &rhs,
                                  float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);
}

static float ConvertAndComputeByMips(const Float16 *lhs, const Float16 *rhs,
                                     size_t dim, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + 1);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  float norm2;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);
  lhs_vec[dim] = std::sqrt(1 - norm2);

  std::vector<float> rhs_vec(dim + 1);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  std::cout << "squ: " << squ << std::endl;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);
  rhs_vec[dim] = std::sqrt(1 - norm2);
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + 1);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<float, N> &lhs,
                                     const FixedVector<float, N> &rhs,
                                     float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);
}

TEST(DistanceMatrix, GeneralSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = std::uniform_real_distribution<float>(0.5, 1.0)(gen);
  const float epsilon = 1e-2;
  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < count; ++i) {
    std::vector<Float16> vec1(dim);
    std::vector<Float16> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    float norm1{0.0}, norm2{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(vec1.data(), dim, &norm1);
    SquaredNorm2Matrix<Float16, 1>::Compute(vec2.data(), dim, &norm2);
    const float e2 = u_val * u_val / std::max(norm1, norm2);
    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),
                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),
                epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = 0.68f;
  const float l2_norm = 15.5f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-2;

  FixedVector<Float16, 1> a{0.0f}, b{0.0f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, e2), epsilon);

  FixedVector<Float16, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, e2), epsilon);

  FixedVector<Float16, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, e2), epsilon);

  FixedVector<Float16, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, e2), epsilon);

  FixedVector<Float16, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, e2), epsilon);

  FixedVector<Float16, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, e2), epsilon);

  FixedVector<Float16, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, e2), epsilon);

  FixedVector<Float16, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, e2), epsilon);

  FixedVector<Float16, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                            0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, e2), epsilon);

  FixedVector<Float16, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                             0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, e2), epsilon);

  FixedVector<Float16, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_NEAR(0.00742372544f, MipsSquaredEuclidean(u, v, e2), epsilon);

  FixedVector<Float16, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_NEAR(0.00976261682f, MipsSquaredEuclidean(w, x, e2), epsilon);

  FixedVector<Float16, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                      0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_NEAR(0.01254967600f, MipsSquaredEuclidean(y, z, e2), epsilon);

  FixedVector<Float16, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_NEAR(0.02031209506f, MipsSquaredEuclidean(x14, y14, e2), epsilon);

  FixedVector<Float16, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_NEAR(0.03788981214f, MipsSquaredEuclidean(x15, y15, e2), epsilon);
}

template <size_t M, size_t N>
void TestMipsSphericalInjectionMatrix(void) {
  std::mt19937 gen((std::random_device())());
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score{0.0};
    SquaredNorm2Matrix<Float16, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = 0.98f / squared_l2_norm;
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const Float16 *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, e2, &result2[0]);

  const float epsilon = 1e-2;
  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_NEAR(result1[i], result2[i], epsilon);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {
  TestMipsSphericalInjectionMatrix<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {
  TestMipsSphericalInjectionMatrix<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {
  TestMipsSphericalInjectionMatrix<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {
  TestMipsSphericalInjectionMatrix<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {
  TestMipsSphericalInjectionMatrix<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {
  TestMipsSphericalInjectionMatrix<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {
  TestMipsSphericalInjectionMatrix<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {
  TestMipsSphericalInjectionMatrix<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {
  TestMipsSphericalInjectionMatrix<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {
  TestMipsSphericalInjectionMatrix<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {
  TestMipsSphericalInjectionMatrix<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {
  TestMipsSphericalInjectionMatrix<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {
  TestMipsSphericalInjectionMatrix<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {
  TestMipsSphericalInjectionMatrix<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {
  TestMipsSphericalInjectionMatrix<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {
  TestMipsSphericalInjectionMatrix<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {
  TestMipsSphericalInjectionMatrix<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {
  TestMipsSphericalInjectionMatrix<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {
  TestMipsSphericalInjectionMatrix<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {
  TestMipsSphericalInjectionMatrix<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {
  TestMipsSphericalInjectionMatrix<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {
  TestMipsSphericalInjectionMatrix<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {
  TestMipsSphericalInjectionMatrix<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {
  TestMipsSphericalInjectionMatrix<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {
  TestMipsSphericalInjectionMatrix<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {
  TestMipsSphericalInjectionMatrix<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {
  TestMipsSphericalInjectionMatrix<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {
  TestMipsSphericalInjectionMatrix<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {
  TestMipsSphericalInjectionMatrix<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {
  TestMipsSphericalInjectionMatrix<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {
  TestMipsSphericalInjectionMatrix<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {
  TestMipsSphericalInjectionMatrix<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {
  TestMipsSphericalInjectionMatrix<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {
  TestMipsSphericalInjectionMatrix<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {
  TestMipsSphericalInjectionMatrix<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {
  TestMipsSphericalInjectionMatrix<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {
  TestMipsSphericalInjectionMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsSphericalInjectionBenchMarkk(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;
  const float e2 = 1.0 / dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<Float16> query1(query_matrix_size);
  std::vector<Float16> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<Float16, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, e2, current_results);
    }
  }
  std::cout << "* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<Float16, batch_size,
                                       query_size>::Compute(matrix_batch,
                                                            &query2[0],
                                                            dimension, e2,
                                                            results.data());
  }
  std::cout << "* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const Float16 *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<Float16, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, e2,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {
  MipsSphericalInjectionBenchMarkk<2, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<2, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 16, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 16, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 32, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<128, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<1, 1, 1024, 256>();
}


================================================
FILE: tests/ailego/math/mips_euclidean_distance_matrix_fp32_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/math/norm2_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(float *dst, const float *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float MipsSquaredEuclidean(const float *lhs, const float *rhs,
                                  size_t dim, size_t m_value, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<float, N> &lhs,
                                  const FixedVector<float, N> &rhs,
                                  size_t m_value, float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);
}

static float ConvertAndComputeByMips(const float *lhs, const float *rhs,
                                     size_t dim, size_t m_value, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + m_value);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    lhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }

  std::vector<float> rhs_vec(dim + m_value);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    rhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + m_value);
}

TEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);
  const float epsilon = 1e-5;
  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < count; ++i) {
    std::vector<float> vec1(dim);
    std::vector<float> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    float norm1, norm2;
    SquaredNorm2Matrix<float, 1>::Compute(vec1.data(), dim, &norm1);
    SquaredNorm2Matrix<float, 1>::Compute(vec2.data(), dim, &norm2);
    const float e2 = u_val * u_val / std::max(norm1, norm2);
    ASSERT_NEAR(
        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),
        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),
        epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = 4;
  const float u_val = 0.68f;
  const float l2_norm = 15.5f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = std::numeric_limits<float>::epsilon();

  FixedVector<float, 1> a{0.0f}, b{0.0f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, m_val, e2), epsilon);

  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, m_val, e2), epsilon);

  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, m_val, e2), epsilon);

  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, m_val, e2), epsilon);

  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, m_val, e2), epsilon);

  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, m_val, e2), epsilon);

  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, m_val, e2), epsilon);

  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, m_val, e2), epsilon);

  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, m_val, e2), epsilon);

  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, m_val, e2), epsilon);

  FixedVector<float, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_NEAR(0.00746485f, MipsSquaredEuclidean(u, v, m_val, e2), epsilon);

  FixedVector<float, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_NEAR(0.00983364f, MipsSquaredEuclidean(w, x, m_val, e2), epsilon);

  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_NEAR(0.0126668f, MipsSquaredEuclidean(y, z, m_val, e2), epsilon);

  FixedVector<float, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_NEAR(0.0206175f, MipsSquaredEuclidean(x14, y14, m_val, e2), epsilon);

  FixedVector<float, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_NEAR(0.0389414f, MipsSquaredEuclidean(x15, y15, m_val, e2), epsilon);
}

template <size_t M, size_t N>
void TestMipsRepeatedQuadraticInjectionMatrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.5, 0.9)(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<float, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, m_val, e2,
          &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);

  const float epsilon = 1e-4;
  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_NEAR(result1[i], result2[i], epsilon);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {
  TestMipsRepeatedQuadraticInjectionMatrix<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {
  TestMipsRepeatedQuadraticInjectionMatrix<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {
  TestMipsRepeatedQuadraticInjectionMatrix<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {
  TestMipsRepeatedQuadraticInjectionMatrix<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {
  TestMipsRepeatedQuadraticInjectionMatrix<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {
  TestMipsRepeatedQuadraticInjectionMatrix<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {
  TestMipsRepeatedQuadraticInjectionMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsRepeatedQuadraticInjectionBenchMark(void) {
  const size_t m_val = 4;
  const float u_val = 0.6;
  const float l2_norm = 1.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, m_val, e2, current_results);
    }
  }
  std::cout
      << "* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, m_val, e2, results.data());
  }
  std::cout
      << "* N Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,
            &current_results[k]);
      }
    }
  }
  std::cout
      << "* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {
  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();
}

static float MipsSquaredEuclidean(const float *lhs, const float *rhs,
                                  size_t dim, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<float, N> &lhs,
                                  const FixedVector<float, N> &rhs, float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);
}

static float ConvertAndComputeByMips(const float *lhs, const float *rhs,
                                     size_t dim, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + 1);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  float norm2;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);
  lhs_vec[dim] = std::sqrt(1 - norm2);

  std::vector<float> rhs_vec(dim + 1);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  std::cout << "squ: " << squ << std::endl;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);
  rhs_vec[dim] = std::sqrt(1 - norm2);
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + 1);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<float, N> &lhs,
                                     const FixedVector<float, N> &rhs,
                                     float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);
}

TEST(DistanceMatrix, GeneralSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = std::uniform_real_distribution<float>(0.5, 0.9)(gen);
  const float epsilon = 1e-6;
  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < count; ++i) {
    std::vector<float> vec1(dim);
    std::vector<float> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    float norm1, norm2;
    SquaredNorm2Matrix<float, 1>::Compute(vec1.data(), dim, &norm1);
    SquaredNorm2Matrix<float, 1>::Compute(vec2.data(), dim, &norm2);
    const float e2 = u_val * u_val / std::max(norm1, norm2);
    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),
                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),
                epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = 0.68f;
  const float l2_norm = 15.5f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = std::numeric_limits<float>::epsilon();

  FixedVector<float, 1> a{0.0f}, b{0.0f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, e2), epsilon);

  FixedVector<float, 2> c{0.0f, 0.1f}, d{0.0f, 0.1f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, e2), epsilon);

  FixedVector<float, 3> e{0.0f, 0.1f, 0.2f}, f{0.0f, 0.1f, 0.2f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, e2), epsilon);

  FixedVector<float, 4> g{0.0f, 0.1f, 0.2f, 0.3f}, h{0.0f, 0.1f, 0.2f, 0.3f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, e2), epsilon);

  FixedVector<float, 5> i{0.0f, 0.1f, 0.2f, 0.3f, 0.4f},
      j{0.0f, 0.1f, 0.2f, 0.3f, 0.4f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, e2), epsilon);

  FixedVector<float, 6> l{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f},
      k{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, e2), epsilon);

  FixedVector<float, 7> m{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f},
      n{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, e2), epsilon);

  FixedVector<float, 8> o{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f},
      p{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, e2), epsilon);

  FixedVector<float, 9> q{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f},
      r{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, e2), epsilon);

  FixedVector<float, 10> s{0.0f, 0.1f, 0.2f, 0.3f, 0.4f,
                           0.5f, 0.6f, 0.7f, 0.8f, 0.9f},
      t{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, e2), epsilon);

  FixedVector<float, 11> u{0.0f},
      v{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f};
  EXPECT_NEAR(0.00742372544f, MipsSquaredEuclidean(u, v, e2), epsilon);

  FixedVector<float, 12> w{0.0f},
      x{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.0f, 1.1f};
  EXPECT_NEAR(0.00976261682f, MipsSquaredEuclidean(w, x, e2), epsilon);

  FixedVector<float, 13> y{0.0f}, z{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
                                    0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f};
  EXPECT_NEAR(0.01254967600f, MipsSquaredEuclidean(y, z, e2), epsilon);

  FixedVector<float, 14> x14{0.0f},
      y14{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f,
          0.7f, 0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f};
  EXPECT_NEAR(0.02031209506f, MipsSquaredEuclidean(x14, y14, e2), epsilon);

  FixedVector<float, 15> x15{0.0f},
      y15{0.0f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f,
          0.8f, 0.9f, 1.0f, 1.1f, 1.2f, 2.0f, 3.0f};
  EXPECT_NEAR(0.03788981214f, MipsSquaredEuclidean(x15, y15, e2), epsilon);
}

template <size_t M, size_t N>
void TestMipsSphericalInjectionMatrix(void) {
  std::mt19937 gen((std::random_device())());
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen);
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<float, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = 0.98f / squared_l2_norm;
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const float *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(1e-4, std::abs(result1[i] - result2[i]));
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {
  TestMipsSphericalInjectionMatrix<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {
  TestMipsSphericalInjectionMatrix<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {
  TestMipsSphericalInjectionMatrix<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {
  TestMipsSphericalInjectionMatrix<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {
  TestMipsSphericalInjectionMatrix<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {
  TestMipsSphericalInjectionMatrix<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {
  TestMipsSphericalInjectionMatrix<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {
  TestMipsSphericalInjectionMatrix<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {
  TestMipsSphericalInjectionMatrix<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {
  TestMipsSphericalInjectionMatrix<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {
  TestMipsSphericalInjectionMatrix<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {
  TestMipsSphericalInjectionMatrix<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {
  TestMipsSphericalInjectionMatrix<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {
  TestMipsSphericalInjectionMatrix<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {
  TestMipsSphericalInjectionMatrix<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {
  TestMipsSphericalInjectionMatrix<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {
  TestMipsSphericalInjectionMatrix<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {
  TestMipsSphericalInjectionMatrix<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {
  TestMipsSphericalInjectionMatrix<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {
  TestMipsSphericalInjectionMatrix<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {
  TestMipsSphericalInjectionMatrix<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {
  TestMipsSphericalInjectionMatrix<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {
  TestMipsSphericalInjectionMatrix<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {
  TestMipsSphericalInjectionMatrix<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {
  TestMipsSphericalInjectionMatrix<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {
  TestMipsSphericalInjectionMatrix<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {
  TestMipsSphericalInjectionMatrix<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {
  TestMipsSphericalInjectionMatrix<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {
  TestMipsSphericalInjectionMatrix<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {
  TestMipsSphericalInjectionMatrix<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {
  TestMipsSphericalInjectionMatrix<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {
  TestMipsSphericalInjectionMatrix<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {
  TestMipsSphericalInjectionMatrix<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {
  TestMipsSphericalInjectionMatrix<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {
  TestMipsSphericalInjectionMatrix<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {
  TestMipsSphericalInjectionMatrix<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {
  TestMipsSphericalInjectionMatrix<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsSphericalInjectionBenchMarkk(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;
  const float e2 = 1.0 / dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> query1(query_matrix_size);
  std::vector<float> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }
  MatrixTranspose(&query2[0], query1.data(), dimension, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<float, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, e2, current_results);
    }
  }
  std::cout << "* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<float, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, e2, results.data());
  }
  std::cout << "* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const float *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, e2,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {
  MipsSphericalInjectionBenchMarkk<2, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<2, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<4, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<8, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<16, 16, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 16, 512, 64>();
  MipsSphericalInjectionBenchMarkk<32, 32, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 2, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 4, 512, 64>();
  MipsSphericalInjectionBenchMarkk<64, 8, 512, 64>();
  MipsSphericalInjectionBenchMarkk<128, 1, 512, 64>();
  MipsSphericalInjectionBenchMarkk<1, 1, 1024, 256>();
}

================================================
FILE: tests/ailego/math/mips_euclidean_distance_matrix_int4_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <iostream>
#include <ostream>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/math/norm2_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,
                                  size_t dim, size_t m_value, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<uint8_t, N> &lhs,
                                  const FixedVector<uint8_t, N> &rhs,
                                  size_t m_value, float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);
}

static float ConvertAndComputeByMips(const uint8_t *lhs, const uint8_t *rhs,
                                     size_t dim, size_t m_value, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + m_value);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; i += 2) {
    uint8_t v = lhs[i / 2];
    int8_t lo = (int8_t)(v << 4) >> 4;
    int8_t hi = (int8_t)(v & 0xf0) >> 4;
    float val = lo * eta;
    lhs_vec[i] = val;
    squ += val * val;
    val = hi * eta;
    lhs_vec[i + 1] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    lhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  std::vector<float> rhs_vec(dim + m_value);
  squ = 0.0f;
  for (size_t i = 0; i < dim; i += 2) {
    uint8_t v = rhs[i / 2];
    int8_t lo = (int8_t)(v << 4) >> 4;
    int8_t hi = (int8_t)(v & 0xf0) >> 4;
    float val = lo * eta;
    rhs_vec[i] = val;
    squ += val * val;
    val = hi * eta;
    rhs_vec[i + 1] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    rhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + m_value);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<uint8_t, N> &lhs,
                                     const FixedVector<uint8_t, N> &rhs,
                                     size_t m_value, float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,
                                 e2);
}

TEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);
  const float l2_norm =
      std::uniform_real_distribution<float>(100.0, 150.0)(gen);
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-6;
  const uint32_t dim =
      (std::uniform_int_distribution<uint32_t>(2, 128))(gen) * 2;
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < count; ++i) {
    std::vector<uint8_t> vec1(dim / 2);
    std::vector<uint8_t> vec2(dim / 2);
    for (size_t d = 0; d < dim / 2; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    ASSERT_NEAR(
        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),
        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),
        epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = 4;
  const float u_val = 0.68f;
  const float l2_norm = 20.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-5;

  uint8_t a[] = {0}, b[] = {0};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 2, m_val, e2), epsilon);

  uint8_t c[] = {0, 1}, d[] = {0, 1};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 4, m_val, e2), epsilon);

  uint8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 6, m_val, e2), epsilon);

  uint8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 8, m_val, e2), epsilon);

  uint8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 10, m_val, e2), epsilon);

  uint8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 12, m_val, e2), epsilon);

  uint8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 14, m_val, e2), epsilon);

  uint8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 16, m_val, e2), epsilon);

  uint8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 18, m_val, e2), epsilon);

  uint8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
          t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 20, m_val, e2), epsilon);

  uint8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
          v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  EXPECT_NEAR(0.458308637f, MipsSquaredEuclidean(u, v, 22, m_val, e2), epsilon);

  uint8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
  EXPECT_NEAR(0.512402892f, MipsSquaredEuclidean(w, x, 24, m_val, e2), epsilon);

  uint8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
  EXPECT_NEAR(0.548633813f, MipsSquaredEuclidean(y, z, 26, m_val, e2), epsilon);

  uint8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};
  EXPECT_NEAR(0.588600754f, MipsSquaredEuclidean(x14, y14, 28, m_val, e2),
              epsilon);

  uint8_t x15[15] = {0},
          y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};
  EXPECT_NEAR(0.600657463f, MipsSquaredEuclidean(x15, y15, 30, m_val, e2),
              epsilon);
  uint8_t x16[16] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9},
          y16[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30, 50};
  EXPECT_NEAR(2.628833294f, MipsSquaredEuclidean(x16, y16, 32, m_val, e2),
              epsilon);
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {
  std::mt19937 gen((std::random_device())());

  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen) * 8;
  size_t matrix_size = batch_size * dimension / 2;
  size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<uint8_t, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<uint8_t, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 8, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 8, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, m_val, e2,
          &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    ASSERT_NEAR(result1[i], result2[i], 1e-4);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsRepeatedQuadraticInjectionBenchMark(void) {
  const size_t m_val = 4;
  const float u_val = 0.6;
  const float l2_norm = 1.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension / 2;
  const size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 8,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 8, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, m_val, e2, current_results);
    }
  }
  std::cout
      << "* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size,
                                       query_size>::Compute(matrix_batch,
                                                            &query2[0],
                                                            dimension, m_val,
                                                            e2, results.data());
  }
  std::cout
      << "* N Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension / 2], current_query, dimension, m_val,
            e2, &current_results[k]);
      }
    }
  }
  std::cout
      << "* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_) {
  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();
}

static float MipsSquaredEuclidean(const uint8_t *lhs, const uint8_t *rhs,
                                  size_t dim, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<uint8_t, N> &lhs,
                                  const FixedVector<uint8_t, N> &rhs,
                                  float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);
}

static float ConvertAndComputeByMips(const uint8_t *lhs, const uint8_t *rhs,
                                     size_t dim, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + 1);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; i += 2) {
    uint8_t v = lhs[i / 2];
    int8_t lo = (int8_t)(v << 4) >> 4;
    int8_t hi = (int8_t)(v & 0xf0) >> 4;
    float val = lo * eta;
    lhs_vec[i] = val;
    squ += val * val;
    val = hi * eta;
    lhs_vec[i + 1] = val;
    squ += val * val;
  }
  float norm2;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);
  lhs_vec[dim] = std::sqrt(1 - norm2);

  std::vector<float> rhs_vec(dim + 1);
  squ = 0.0f;
  for (size_t i = 0; i < dim; i += 2) {
    uint8_t v = rhs[i / 2];
    int8_t lo = (int8_t)(v << 4) >> 4;
    int8_t hi = (int8_t)(v & 0xf0) >> 4;
    float val = lo * eta;
    rhs_vec[i] = val;
    squ += val * val;
    val = hi * eta;
    rhs_vec[i + 1] = val;
    squ += val * val;
  }
  std::cout << "squ: " << squ << std::endl;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);
  rhs_vec[dim] = std::sqrt(1 - norm2);
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + 1);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<uint8_t, N> &lhs,
                                     const FixedVector<uint8_t, N> &rhs,
                                     float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);
}

TEST(DistanceMatrix, GeneralSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);
  const float l2_norm =
      std::uniform_real_distribution<float>(100.0, 150.0)(gen);
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-6;
  const uint32_t dim =
      (std::uniform_int_distribution<uint32_t>(2, 128))(gen) * 2;
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < count; ++i) {
    std::vector<uint8_t> vec1(dim / 2);
    std::vector<uint8_t> vec2(dim / 2);
    for (size_t d = 0; d < dim / 2; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),
                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),
                epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = 0.68f;
  const float l2_norm = 20.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-5;

  uint8_t a[] = {0}, b[] = {0};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 2, e2), epsilon);

  uint8_t c[] = {0, 1}, d[] = {0, 1};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 4, e2), epsilon);

  uint8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 6, e2), epsilon);

  uint8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 8, e2), epsilon);

  uint8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 10, e2), epsilon);

  uint8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 12, e2), epsilon);

  uint8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 14, e2), epsilon);

  uint8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 16, e2), epsilon);

  uint8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 18, e2), epsilon);

  uint8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
          t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 20, e2), epsilon);

  uint8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
          v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  EXPECT_NEAR(0.367926508f, MipsSquaredEuclidean(u, v, 22, e2), epsilon);

  uint8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
  EXPECT_NEAR(0.403734415f, MipsSquaredEuclidean(w, x, 24, e2), epsilon);

  uint8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
  EXPECT_NEAR(0.427079230f, MipsSquaredEuclidean(y, z, 26, e2), epsilon);

  uint8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};
  EXPECT_NEAR(0.452268809f, MipsSquaredEuclidean(x14, y14, 28, e2), epsilon);

  uint8_t x15[15] = {0},
          y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};
  EXPECT_NEAR(0.459755957f, MipsSquaredEuclidean(x15, y15, 30, e2), epsilon);
  uint8_t x16[16] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9},
          y16[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30, 50};
  EXPECT_NEAR(1.566913843f, MipsSquaredEuclidean(x16, y16, 32, e2), epsilon);
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrixSphericalInjection(void) {
  std::mt19937 gen((std::random_device())());

  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(2, 128))(gen) * 8;
  size_t matrix_size = batch_size * dimension / 2;
  size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<uint8_t, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<uint8_t, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 8, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 8, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * dimension / 2];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
          &matrix1[j * dimension / 2], cur_query, dimension, e2,
          &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    ASSERT_NEAR(result1[i], result2[i], 1e-4);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {
  TestSquaredEuclideanMatrixSphericalInjection<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {
  TestSquaredEuclideanMatrixSphericalInjection<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {
  TestSquaredEuclideanMatrixSphericalInjection<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {
  TestSquaredEuclideanMatrixSphericalInjection<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsSphericalInjectionBenchMark(void) {
  const float u_val = 0.99;
  const float l2_norm = 100.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension / 2;
  const size_t query_matrix_size = query_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint8_t> dist(0, 255);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 8,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 8, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, e2, current_results);
    }
  }
  std::cout << "* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];

    MipsSquaredEuclideanDistanceMatrix<uint8_t, batch_size,
                                       query_size>::Compute(matrix_batch,
                                                            &query2[0],
                                                            dimension, e2,
                                                            results.data());
  }
  std::cout << "* N Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];

    for (size_t j = 0; j < query_size; ++j) {
      const uint8_t *current_query = &query1[j * dimension / 2];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<uint8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension / 2], current_query, dimension, e2,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {
  MipsSphericalInjectionBenchMark<2, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<2, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 16, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 16, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 32, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<128, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<1, 1, 1024, 256>();
}


================================================
FILE: tests/ailego/math/mips_euclidean_distance_matrix_int8_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <random>
#include <string>
#include <thread>
#include <vector>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/distance.h>
#include <ailego/math/norm2_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,
                                  size_t dim, size_t m_value, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, m_value, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<int8_t, N> &lhs,
                                  const FixedVector<int8_t, N> &rhs,
                                  size_t m_value, float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), m_value, e2);
}

static float ConvertAndComputeByMips(const int8_t *lhs, const int8_t *rhs,
                                     size_t dim, size_t m_value, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + m_value);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    lhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  std::vector<float> rhs_vec(dim + m_value);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  for (size_t i = dim; i < dim + m_value; ++i) {
    rhs_vec[i] = 0.5f - squ;
    squ *= squ;
  }
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + m_value);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<int8_t, N> &lhs,
                                     const FixedVector<int8_t, N> &rhs,
                                     size_t m_value, float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), m_value,
                                 e2);
}

TEST(DistanceMatrix, GeneralRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);
  const float l2_norm =
      std::uniform_real_distribution<float>(1000.0, 1500.0)(gen);
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-6;
  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < count; ++i) {
    std::vector<int8_t> vec1(dim);
    std::vector<int8_t> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    ASSERT_NEAR(
        ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, m_val, e2),
        MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, m_val, e2),
        epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsRepeatedQuadraticInjection) {
  std::mt19937 gen((std::random_device())());
  const size_t m_val = 4;
  const float u_val = 0.68f;
  const float l2_norm = 30.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-5;

  int8_t a[] = {0}, b[] = {0};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 1, m_val, e2), epsilon);

  int8_t c[] = {0, 1}, d[] = {0, 1};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 2, m_val, e2), epsilon);

  int8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 3, m_val, e2), epsilon);

  int8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 4, m_val, e2), epsilon);

  int8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 5, m_val, e2), epsilon);

  int8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 6, m_val, e2), epsilon);

  int8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 7, m_val, e2), epsilon);

  int8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 8, m_val, e2), epsilon);

  int8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 9, m_val, e2), epsilon);

  int8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
         t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 10, m_val, e2), epsilon);

  int8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
         v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  EXPECT_NEAR(0.2384642f, MipsSquaredEuclidean(u, v, 11, m_val, e2), epsilon);

  int8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
  EXPECT_NEAR(0.3321453f, MipsSquaredEuclidean(w, x, 12, m_val, e2), epsilon);

  int8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
  EXPECT_NEAR(0.4580747f, MipsSquaredEuclidean(y, z, 13, m_val, e2), epsilon);

  int8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20};
  EXPECT_NEAR(0.9224106f, MipsSquaredEuclidean(x14, y14, 14, m_val, e2),
              epsilon);

  int8_t x15[15] = {0},
         y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 20, 30};
  EXPECT_NEAR(5.0584077f, MipsSquaredEuclidean(x15, y15, 15, m_val, e2),
              epsilon);
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrixRepeatedQuadraticInjection(void) {
  std::mt19937 gen((std::random_device())());

  const size_t m_val = std::uniform_int_distribution<size_t>(1, 4)(gen);
  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = std::uniform_int_distribution<size_t>(2, 128)(gen) * 4;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, m_val, e2,
          &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, m_val, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_NEAR(result1[i], result2[i], 1e-4);
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_1x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_2x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_3x3) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_4x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_8x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_16x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_32x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_64x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x1) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x2) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x4) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x8) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x16) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x32) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x64) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanRepeatedQuadraticInjection_128x128) {
  TestSquaredEuclideanMatrixRepeatedQuadraticInjection<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsRepeatedQuadraticInjectionBenchMark(void) {
  const size_t m_val = 4;
  const float u_val = 0.6;
  const float l2_norm = 1.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, m_val, e2, current_results);
    }
  }
  std::cout
      << "* 1 Batched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, m_val, e2, results.data());
  }
  std::cout
      << "* N Batched MipsSquaredErclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, m_val, e2,
            &current_results[k]);
      }
    }
  }
  std::cout
      << "* Unbatched MipsSquaredEuclidean(RepeatedQuadraticInjection) (us) \t"
      << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanRepeatedQuadraticInjection_Benchmark) {
  MipsRepeatedQuadraticInjectionBenchMark<2, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<2, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<4, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<8, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<16, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 16, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<32, 32, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 2, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 4, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<64, 8, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<128, 1, 512, 64>();
  MipsRepeatedQuadraticInjectionBenchMark<1, 1, 1024, 256>();
}

static float MipsSquaredEuclidean(const int8_t *lhs, const int8_t *rhs,
                                  size_t dim, float e2) {
  return Distance::MipsSquaredEuclidean(lhs, rhs, dim, e2);
}

template <size_t N>
static float MipsSquaredEuclidean(const FixedVector<int8_t, N> &lhs,
                                  const FixedVector<int8_t, N> &rhs, float e2) {
  return MipsSquaredEuclidean(lhs.data(), rhs.data(), lhs.size(), e2);
}

static float ConvertAndComputeByMips(const int8_t *lhs, const int8_t *rhs,
                                     size_t dim, float e2) {
  float squ = 0.0f;
  std::vector<float> lhs_vec(dim + 1);
  const float eta = std::sqrt(e2);
  for (size_t i = 0; i < dim; ++i) {
    float val = lhs[i] * eta;
    lhs_vec[i] = val;
    squ += val * val;
  }
  float norm2;
  ailego::SquaredNorm2Matrix<float, 1>::Compute(lhs_vec.data(), dim, &norm2);
  lhs_vec[dim] = std::sqrt(1 - norm2);

  std::vector<float> rhs_vec(dim + 1);
  squ = 0.0f;
  for (size_t i = 0; i < dim; ++i) {
    float val = rhs[i] * eta;
    rhs_vec[i] = val;
    squ += val * val;
  }
  ailego::SquaredNorm2Matrix<float, 1>::Compute(rhs_vec.data(), dim, &norm2);
  rhs_vec[dim] = std::sqrt(1 - norm2);
  std::cout << "squ: " << squ << std::endl;
  return ailego::Distance::SquaredEuclidean(lhs_vec.data(), rhs_vec.data(),
                                            dim + 1);
}

template <size_t N>
static float ConvertAndComputeByMips(const FixedVector<int8_t, N> &lhs,
                                     const FixedVector<int8_t, N> &rhs,
                                     float e2) {
  return ConvertAndComputeByMips(lhs.data(), rhs.data(), lhs.size(), e2);
}

TEST(DistanceMatrix, GeneralSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = std::uniform_real_distribution<float>(0.1, 1.0)(gen);
  const float l2_norm =
      std::uniform_real_distribution<float>(1000.0, 1500.0)(gen);
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-6;
  const uint32_t dim = std::uniform_int_distribution<uint32_t>(2, 128)(gen);
  const uint32_t count = std::uniform_int_distribution<uint32_t>(1, 1000)(gen);
  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < count; ++i) {
    std::vector<int8_t> vec1(dim);
    std::vector<int8_t> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = dist(gen);
      vec2[d] = dist(gen);
    }
    ASSERT_NEAR(ConvertAndComputeByMips(vec1.data(), vec2.data(), dim, e2),
                MipsSquaredEuclidean(vec1.data(), vec2.data(), dim, e2),
                epsilon);
  }
}

TEST(DistanceMatrix, FixedVectorsSphericalInjection) {
  std::mt19937 gen((std::random_device())());
  const float u_val = 0.68f;
  const float l2_norm = 100.0f;
  const float e2 = (u_val / l2_norm) * (u_val / l2_norm);
  const float epsilon = 1e-5;

  int8_t a[] = {0}, b[] = {0};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(a, b, 1, e2), epsilon);

  int8_t c[] = {0, 1}, d[] = {0, 1};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(c, d, 2, e2), epsilon);

  int8_t e[] = {0, 1, 2}, f[] = {0, 1, 2};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(e, f, 3, e2), epsilon);

  int8_t g[] = {0, 1, 2, 3}, h[] = {0, 1, 2, 3};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(g, h, 4, e2), epsilon);

  int8_t i[] = {0, 1, 2, 3, 4}, j[] = {0, 1, 2, 3, 4};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(i, j, 5, e2), epsilon);

  int8_t l[] = {0, 1, 2, 3, 4, 5}, k[] = {0, 1, 2, 3, 4, 5};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(l, k, 6, e2), epsilon);

  int8_t m[] = {0, 1, 2, 3, 4, 5, 6}, n[] = {0, 1, 2, 3, 4, 5, 6};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(m, n, 7, e2), epsilon);

  int8_t o[] = {0, 1, 2, 3, 4, 5, 6, 7}, p[] = {0, 1, 2, 3, 4, 5, 6, 7};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(o, p, 8, e2), epsilon);

  int8_t q[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}, r[] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(q, r, 9, e2), epsilon);

  int8_t s[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
         t[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  EXPECT_NEAR(0.0f, MipsSquaredEuclidean(s, t, 10, e2), epsilon);

  int8_t u[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
         v[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
  EXPECT_NEAR(0.0178823452f, MipsSquaredEuclidean(u, v, 11, e2), epsilon);

  int8_t w[12] = {0}, x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
  EXPECT_NEAR(0.0235359258f, MipsSquaredEuclidean(w, x, 12, e2), epsilon);

  int8_t y[13] = {0}, z[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
  EXPECT_NEAR(0.0302853006f, MipsSquaredEuclidean(y, z, 13, e2), epsilon);

  int8_t x14[14] = {0}, y14[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
  EXPECT_NEAR(0.0382360629f, MipsSquaredEuclidean(x14, y14, 14, e2), epsilon);

  int8_t x15[15] = {0},
         y15[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15};
  EXPECT_NEAR(0.0488716699f, MipsSquaredEuclidean(x15, y15, 15, e2), epsilon);
}

template <size_t M, size_t N>
void TestSquaredEuclideanMatrixSphericalInjection(void) {
  std::mt19937 gen((std::random_device())());

  const float u_val = std::uniform_real_distribution<float>(0.3, 0.9)(gen);
  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = std::uniform_int_distribution<size_t>(2, 128)(gen) * 4;
  size_t matrix_size = batch_size * dimension;
  size_t query_matrix_size = query_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }
  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = u_val * u_val / squared_l2_norm;
  MatrixTranspose((uint32_t *)(&matrix2[0]), (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * dimension];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
          &matrix1[j * dimension], cur_query, dimension, e2, &query_result[j]);
    }
  }
  MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
      &matrix2[0], &query2[0], dimension, e2, &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_GE(1e-4, std::abs(result1[i] - result2[i]));
  }
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_1x1) {
  TestSquaredEuclideanMatrixSphericalInjection<1, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x1) {
  TestSquaredEuclideanMatrixSphericalInjection<2, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_2x2) {
  TestSquaredEuclideanMatrixSphericalInjection<2, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_3x3) {
  TestSquaredEuclideanMatrixSphericalInjection<3, 3>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x1) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x2) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_4x4) {
  TestSquaredEuclideanMatrixSphericalInjection<4, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x1) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x2) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x4) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_8x8) {
  TestSquaredEuclideanMatrixSphericalInjection<8, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x1) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x2) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x4) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x8) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_16x16) {
  TestSquaredEuclideanMatrixSphericalInjection<16, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x1) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x2) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x4) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x8) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x16) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_32x32) {
  TestSquaredEuclideanMatrixSphericalInjection<32, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x1) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x2) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x4) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x8) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x16) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x32) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_64x64) {
  TestSquaredEuclideanMatrixSphericalInjection<64, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x1) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 1>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x2) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 2>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x4) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 4>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x8) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 8>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x16) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 16>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x32) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 32>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x64) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 64>();
}

TEST(DistanceMatrix, MipsSquaredEuclideanSphericalInjection_128x128) {
  TestSquaredEuclideanMatrixSphericalInjection<128, 128>();
}

template <size_t M, size_t N, size_t B, size_t D>
void MipsSphericalInjectionBenchMark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t query_size = N;
  const size_t matrix_size = block_size * batch_size * dimension;
  const size_t query_matrix_size = dimension * query_size;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int8_t> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  for (size_t i = 0; i < query_matrix_size; ++i) {
    query1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)(&matrix2[start_pos]),
                    (const uint32_t *)(&matrix1[start_pos]), dimension / 4,
                    batch_size);
  }
  MatrixTranspose((uint32_t *)(&query2[0]), (const uint32_t *)query1.data(),
                  dimension / 4, query_size);

  float squared_l2_norm = 0.0f;
  for (size_t i = 0; i < matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&matrix1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  for (size_t i = 0; i < query_matrix_size; i += dimension) {
    float score;
    SquaredNorm2Matrix<int8_t, 1>::Compute(&query1[i], dimension, &score);
    squared_l2_norm = std::max(squared_l2_norm, score);
  }
  const float e2 = 0.98f / squared_l2_norm;
  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size * query_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << query_size << " * " << block_size
            << std::endl;

  // 1 Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, 1>::Compute(
          matrix_batch, current_query, dimension, e2, current_results);
    }
  }
  std::cout << "* 1 Batched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // N Batched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];

    MipsSquaredEuclideanDistanceMatrix<int8_t, batch_size, query_size>::Compute(
        matrix_batch, &query2[0], dimension, e2, results.data());
  }
  std::cout << "* N Batched MipsSquaredErclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;

  // Unbatched Euclidean
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];

    for (size_t j = 0; j < query_size; ++j) {
      const int8_t *current_query = &query1[j * dimension];
      float *current_results = &results[j * batch_size];

      for (size_t k = 0; k < batch_size; ++k) {
        MipsSquaredEuclideanDistanceMatrix<int8_t, 1, 1>::Compute(
            &matrix_batch[k * dimension], current_query, dimension, e2,
            &current_results[k]);
      }
    }
  }
  std::cout << "* Unbatched MipsSquaredEuclidean(SphericalInjection) (us) \t"
            << elapsed_time.micro_seconds() << std::endl;
}

TEST(DistanceMatrix,
     DISABLED_MipsSquaredEuclideanSphericalInjection_Benchmark) {
  MipsSphericalInjectionBenchMark<2, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<2, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<4, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<8, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<16, 16, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 16, 512, 64>();
  MipsSphericalInjectionBenchMark<32, 32, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 2, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 4, 512, 64>();
  MipsSphericalInjectionBenchMark<64, 8, 512, 64>();
  MipsSphericalInjectionBenchMark<128, 1, 512, 64>();
  MipsSphericalInjectionBenchMark<1, 1, 1024, 256>();
}


================================================
FILE: tests/ailego/math/norm_matrix_fp16_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(Float16 *dst, const Float16 *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float Norm1(const std::vector<Float16> &vec) {
  float out = 0.0f;
  Norm1Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

static float Norm2(const std::vector<Float16> &vec) {
  float out = 0.0f;
  Norm2Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

TEST(NormMatrix, Norm1_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 0.5);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<Float16> vec;
    float result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      Float16 val = dist(gen);
      result += Float16::Absolute(val);
      vec.push_back(val);
    }
    // EXPECT_FLOAT_EQ(result, Norm1(vec));
    EXPECT_GT(0.005, std::abs((Norm1(vec) - result) / result));
  }
}

TEST(NormMatrix, Norm2_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<Float16> vec;
    float result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      Float16 val = dist(gen);
      result += val * val;
      vec.push_back(val);
    }
    result = std::sqrt(result);
    // EXPECT_FLOAT_EQ(result, Norm2(vec));
    EXPECT_GT(0.005, std::abs((Norm2(vec) - result) / result));
  }
}

template <size_t M>
void TestNorm1Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm1Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,
                                     &result1[j]);
  }
  Norm1Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,
                                            &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));
  }
}

template <size_t M>
void TestNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm2Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,
                                     &result1[j]);
  }
  Norm2Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,
                                            &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));
  }
}

template <size_t M>
void TestSquaredNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    SquaredNorm2Matrix<Float16, 1>::Compute(&matrix1[j * dimension], dimension,
                                            &result1[j]);
  }
  SquaredNorm2Matrix<Float16, batch_size>::Compute(&matrix2[0], dimension,
                                                   &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_GT(0.005, std::abs((result1[i] - result2[i]) / result1[i]));
  }
}

TEST(NormMatrix, Norm1_Matrix) {
  TestNorm1Matrix<1>();
  TestNorm1Matrix<3>();
  TestNorm1Matrix<4>();
  TestNorm1Matrix<8>();
  TestNorm1Matrix<10>();
  TestNorm1Matrix<12>();
  TestNorm1Matrix<16>();
  TestNorm1Matrix<29>();
  TestNorm1Matrix<32>();
  TestNorm1Matrix<38>();
  TestNorm1Matrix<40>();
  TestNorm1Matrix<51>();
  TestNorm1Matrix<64>();
  TestNorm1Matrix<65>();
}

TEST(NormMatrix, Norm2_Matrix) {
  TestNorm2Matrix<1>();
  TestNorm2Matrix<3>();
  TestNorm2Matrix<4>();
  TestNorm2Matrix<8>();
  TestNorm2Matrix<10>();
  TestNorm2Matrix<12>();
  TestNorm2Matrix<16>();
  TestNorm2Matrix<29>();
  TestNorm2Matrix<32>();
  TestNorm2Matrix<38>();
  TestNorm2Matrix<40>();
  TestNorm2Matrix<51>();
  TestNorm2Matrix<64>();
  TestNorm2Matrix<65>();
}

TEST(NormMatrix, SquaredNorm2_Matrix) {
  TestSquaredNorm2Matrix<1>();
  TestSquaredNorm2Matrix<3>();
  TestSquaredNorm2Matrix<4>();
  TestSquaredNorm2Matrix<8>();
  TestSquaredNorm2Matrix<10>();
  TestSquaredNorm2Matrix<12>();
  TestSquaredNorm2Matrix<16>();
  TestSquaredNorm2Matrix<29>();
  TestSquaredNorm2Matrix<32>();
  TestSquaredNorm2Matrix<38>();
  TestSquaredNorm2Matrix<40>();
  TestSquaredNorm2Matrix<51>();
  TestSquaredNorm2Matrix<64>();
  TestSquaredNorm2Matrix<65>();
}

template <size_t M, size_t B, size_t D>
void Norm1Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm1Matrix<Float16, batch_size>::Compute(matrix_batch, dimension,
                                              &results[0]);
  }
  std::cout << "* Batched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm1Matrix<Float16, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                       &results[k]);
    }
  }
  std::cout << "* Unbatched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t B, size_t D>
void Norm2Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<Float16> matrix1(matrix_size);
  std::vector<Float16> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP16 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm2Matrix<Float16, batch_size>::Compute(matrix_batch, dimension,
                                              &results[0]);
  }
  std::cout << "* Batched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const Float16 *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm2Matrix<Float16, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                       &results[k]);
    }
  }
  std::cout << "* Unbatched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(NormMatrix, DISABLED_Norm1_Benchmark) {
  Norm1Benchmark<2, 512, 128>();
  Norm1Benchmark<4, 512, 128>();
  Norm1Benchmark<8, 512, 128>();
  Norm1Benchmark<16, 512, 128>();
  Norm1Benchmark<32, 512, 128>();
  Norm1Benchmark<64, 512, 128>();
}

TEST(NormMatrix, DISABLED_Norm2_Benchmark) {
  Norm2Benchmark<2, 512, 128>();
  Norm2Benchmark<4, 512, 128>();
  Norm2Benchmark<8, 512, 128>();
  Norm2Benchmark<16, 512, 128>();
  Norm2Benchmark<32, 512, 128>();
  Norm2Benchmark<64, 512, 128>();
}


================================================
FILE: tests/ailego/math/norm_matrix_fp32_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/norm_matrix.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(float *dst, const float *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float Norm1(const std::vector<float> &vec) {
  float out = 0.0f;
  Norm1Matrix<float, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

static float Norm2(const std::vector<float> &vec) {
  float out = 0.0f;
  Norm2Matrix<float, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

TEST(NormMatrix, Norm1_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<float> vec;
    double result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      float val = dist(gen);
      result += std::abs(val);
      vec.push_back(val);
    }
    EXPECT_FLOAT_EQ(Norm1(vec), (float)result);
    // EXPECT_GE(0.00002, std::abs(Norm1(vec) - result));
  }
}

TEST(NormMatrix, Norm2_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<float> vec;
    double result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      float val = dist(gen);
      result += val * val;
      vec.push_back(val);
    }
    EXPECT_FLOAT_EQ(Norm2(vec), (float)std::sqrt(result));
    // EXPECT_GE(0.00002, std::abs(Norm2(vec) - std::sqrt(result)));
  }
}

template <size_t M>
void TestNorm1Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm1Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,
                                   &result1[j]);
  }
  Norm1Matrix<float, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));
  }
}

template <size_t M>
void TestNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm2Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,
                                   &result1[j]);
  }
  Norm2Matrix<float, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));
  }
}

template <size_t M>
void TestSquaredNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen);
  size_t matrix_size = batch_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_real_distribution<float> dist(0.0, 0.5);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }
  MatrixTranspose(&matrix2[0], matrix1.data(), dimension, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    SquaredNorm2Matrix<float, 1>::Compute(&matrix1[j * dimension], dimension,
                                          &result1[j]);
  }
  SquaredNorm2Matrix<float, batch_size>::Compute(&matrix2[0], dimension,
                                                 &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_GE(0.00002, std::abs((double)result1[i] - (double)result2[i]));
  }
}

TEST(NormMatrix, Norm1_Matrix) {
  TestNorm1Matrix<1>();
  TestNorm1Matrix<3>();
  TestNorm1Matrix<4>();
  TestNorm1Matrix<8>();
  TestNorm1Matrix<10>();
  TestNorm1Matrix<12>();
  TestNorm1Matrix<16>();
  TestNorm1Matrix<29>();
  TestNorm1Matrix<32>();
  TestNorm1Matrix<38>();
  TestNorm1Matrix<40>();
  TestNorm1Matrix<51>();
  TestNorm1Matrix<64>();
  TestNorm1Matrix<65>();
}

TEST(NormMatrix, Norm2_Matrix) {
  TestNorm2Matrix<1>();
  TestNorm2Matrix<3>();
  TestNorm2Matrix<4>();
  TestNorm2Matrix<8>();
  TestNorm2Matrix<10>();
  TestNorm2Matrix<12>();
  TestNorm2Matrix<16>();
  TestNorm2Matrix<29>();
  TestNorm2Matrix<32>();
  TestNorm2Matrix<38>();
  TestNorm2Matrix<40>();
  TestNorm2Matrix<51>();
  TestNorm2Matrix<64>();
  TestNorm2Matrix<65>();
}

TEST(NormMatrix, SquaredNorm2_Matrix) {
  TestSquaredNorm2Matrix<1>();
  TestSquaredNorm2Matrix<3>();
  TestSquaredNorm2Matrix<4>();
  TestSquaredNorm2Matrix<8>();
  TestSquaredNorm2Matrix<10>();
  TestSquaredNorm2Matrix<12>();
  TestSquaredNorm2Matrix<16>();
  TestSquaredNorm2Matrix<29>();
  TestSquaredNorm2Matrix<32>();
  TestSquaredNorm2Matrix<38>();
  TestSquaredNorm2Matrix<40>();
  TestSquaredNorm2Matrix<51>();
  TestSquaredNorm2Matrix<64>();
  TestSquaredNorm2Matrix<65>();
}

template <size_t M, size_t B, size_t D>
void Norm1Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm1Matrix<float, batch_size>::Compute(matrix_batch, dimension,
                                            &results[0]);
  }
  std::cout << "* Batched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm1Matrix<float, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                     &results[k]);
    }
  }
  std::cout << "* Unbatched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t B, size_t D>
void Norm2Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<float> matrix1(matrix_size);
  std::vector<float> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0f, 1.0f);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose(&matrix2[start_pos], &matrix1[start_pos], dimension,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") FP32 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm2Matrix<float, batch_size>::Compute(matrix_batch, dimension,
                                            &results[0]);
  }
  std::cout << "* Batched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const float *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm2Matrix<float, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                     &results[k]);
    }
  }
  std::cout << "* Unbatched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(NormMatrix, DISABLED_Norm1_Benchmark) {
  Norm1Benchmark<2, 512, 128>();
  Norm1Benchmark<4, 512, 128>();
  Norm1Benchmark<8, 512, 128>();
  Norm1Benchmark<16, 512, 128>();
  Norm1Benchmark<32, 512, 128>();
  Norm1Benchmark<64, 512, 128>();
}

TEST(NormMatrix, DISABLED_Norm2_Benchmark) {
  Norm2Benchmark<2, 512, 128>();
  Norm2Benchmark<4, 512, 128>();
  Norm2Benchmark<8, 512, 128>();
  Norm2Benchmark<16, 512, 128>();
  Norm2Benchmark<32, 512, 128>();
  Norm2Benchmark<64, 512, 128>();
}


================================================
FILE: tests/ailego/math/norm_matrix_int4_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float Norm2(const std::vector<uint8_t> &vec) {
  float out = 0.0f;
  Norm2Matrix<uint8_t, 1>::Compute(vec.data(), vec.size() * 2, &out);
  return out;
}

static float SquaredNorm2(const std::vector<uint8_t> &vec) {
  float out = 0.0f;
  SquaredNorm2Matrix<uint8_t, 1>::Compute(vec.data(), vec.size() * 2, &out);
  return out;
}

TEST(NormMatrix, Norm2_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-8, 7);
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 1;

  for (size_t d = 2; d < dimension; d += 2) {
    std::vector<uint8_t> vec;
    float result = 0.0f;
    for (size_t i = 0; i < d; i += 2) {
      int8_t v1 = (int8_t)dist(gen);
      int8_t v2 = (int8_t)dist(gen);
      result += v1 * v1;
      result += v2 * v2;
      uint8_t v =
          ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));
      vec.push_back(v);
    }
    EXPECT_FLOAT_EQ(result, SquaredNorm2(vec));
    EXPECT_FLOAT_EQ(std::sqrt(result), Norm2(vec));
  }
}

template <size_t M>
void TestNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 3;
  size_t matrix_size = batch_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_int_distribution<int> dist(-8, 7);
  for (size_t i = 0; i < matrix_size; ++i) {
    int8_t v1 = (int8_t)dist(gen);
    int8_t v2 = (int8_t)dist(gen);
    uint8_t v =
        ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));
    matrix1[i] = v;
  }
  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),
                  dimension / 8, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm2Matrix<uint8_t, 1>::Compute(&matrix1[j * dimension / 2], dimension,
                                     &result1[j]);
  }
  Norm2Matrix<uint8_t, batch_size>::Compute(&matrix2[0], dimension,
                                            &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(NormMatrix, Norm2_Matrix) {
  TestNorm2Matrix<2>();
  TestNorm2Matrix<3>();
  TestNorm2Matrix<4>();
  TestNorm2Matrix<8>();
  TestNorm2Matrix<10>();
  TestNorm2Matrix<12>();
  TestNorm2Matrix<16>();
  TestNorm2Matrix<29>();
  TestNorm2Matrix<32>();
  TestNorm2Matrix<38>();
  TestNorm2Matrix<40>();
  TestNorm2Matrix<51>();
  TestNorm2Matrix<64>();
  TestNorm2Matrix<65>();
}

template <size_t M, size_t B, size_t D>
void Norm2Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension / 2;

  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-8, 7);
  for (size_t i = 0; i < matrix_size; ++i) {
    int8_t v1 = (int8_t)dist(gen);
    int8_t v2 = (int8_t)dist(gen);
    uint8_t v =
        ((static_cast<uint8_t>(v2) << 4) | (static_cast<uint8_t>(v1) & 0xF));
    matrix1[i] = v;
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension / 2;
    MatrixTranspose((uint32_t *)&matrix2[start_pos],
                    (const uint32_t *)&matrix1[start_pos], dimension / 8,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT4 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix2[i * batch_size * dimension / 2];
    Norm2Matrix<uint8_t, batch_size>::Compute(matrix_batch, dimension,
                                              &results[0]);
  }
  std::cout << "* Batched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const uint8_t *matrix_batch = &matrix1[i * batch_size * dimension / 2];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm2Matrix<uint8_t, 1>::Compute(&matrix_batch[k * dimension / 2],
                                       dimension, &results[k]);
    }
  }
  std::cout << "* Unbatched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(NormMatrix, DISABLED_Norm2_Benchmark) {
  Norm2Benchmark<2, 512, 128>();
  Norm2Benchmark<4, 512, 128>();
  Norm2Benchmark<8, 512, 128>();
  Norm2Benchmark<16, 512, 128>();
  Norm2Benchmark<32, 512, 128>();
  Norm2Benchmark<64, 512, 128>();
}

================================================
FILE: tests/ailego/math/norm_matrix_int8_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

static inline const char *IntelIntrinsics(void) {
  return internal::CpuFeatures::Intrinsics();
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

static float Norm1(const std::vector<int8_t> &vec) {
  float out = 0.0f;
  Norm1Matrix<int8_t, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

static float Norm2(const std::vector<int8_t> &vec) {
  float out = 0.0f;
  Norm2Matrix<int8_t, 1>::Compute(vec.data(), vec.size(), &out);
  return out;
}

TEST(NormMatrix, Norm1_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<int8_t> vec;
    float result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      int8_t val = (int8_t)dist(gen);
      result += std::abs(val);
      vec.push_back(val);
    }
    EXPECT_FLOAT_EQ(result, Norm1(vec));
  }
}

TEST(NormMatrix, Norm2_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);

  for (size_t d = 1; d < 100; ++d) {
    std::vector<int8_t> vec;
    float result = 0.0f;
    for (size_t i = 0; i < d; ++i) {
      int8_t val = (int8_t)dist(gen);
      result += val * val;
      vec.push_back(val);
    }
    EXPECT_FLOAT_EQ(std::sqrt(result), Norm2(vec));
  }
}

template <size_t M>
void TestNorm1Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 2;
  size_t matrix_size = batch_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm1Matrix<int8_t, 1>::Compute(&matrix1[j * dimension], dimension,
                                    &result1[j]);
  }
  Norm1Matrix<int8_t, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

template <size_t M>
void TestNorm2Matrix(void) {
  std::mt19937 gen((std::random_device())());

  const size_t batch_size = M;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen) << 2;
  size_t matrix_size = batch_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  std::vector<float> result1(batch_size);
  std::vector<float> result2(batch_size);

  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }
  MatrixTranspose((uint32_t *)&matrix2[0], (const uint32_t *)matrix1.data(),
                  dimension / 4, batch_size);

  for (size_t j = 0; j < batch_size; ++j) {
    Norm2Matrix<int8_t, 1>::Compute(&matrix1[j * dimension], dimension,
                                    &result1[j]);
  }
  Norm2Matrix<int8_t, batch_size>::Compute(&matrix2[0], dimension, &result2[0]);

  for (size_t i = 0; i < batch_size; ++i) {
    EXPECT_FLOAT_EQ(result1[i], result2[i]);
  }
}

TEST(NormMatrix, Norm1_Matrix) {
  TestNorm1Matrix<1>();
  TestNorm1Matrix<3>();
  TestNorm1Matrix<4>();
  TestNorm1Matrix<8>();
  TestNorm1Matrix<10>();
  TestNorm1Matrix<12>();
  TestNorm1Matrix<16>();
  TestNorm1Matrix<29>();
  TestNorm1Matrix<32>();
  TestNorm1Matrix<38>();
  TestNorm1Matrix<40>();
  TestNorm1Matrix<51>();
  TestNorm1Matrix<64>();
  TestNorm1Matrix<65>();
}

TEST(NormMatrix, Norm2_Matrix) {
  TestNorm2Matrix<1>();
  TestNorm2Matrix<3>();
  TestNorm2Matrix<4>();
  TestNorm2Matrix<8>();
  TestNorm2Matrix<10>();
  TestNorm2Matrix<12>();
  TestNorm2Matrix<16>();
  TestNorm2Matrix<29>();
  TestNorm2Matrix<32>();
  TestNorm2Matrix<38>();
  TestNorm2Matrix<40>();
  TestNorm2Matrix<51>();
  TestNorm2Matrix<64>();
  TestNorm2Matrix<65>();
}

template <size_t M, size_t B, size_t D>
void Norm1Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)&matrix2[start_pos],
                    (const uint32_t *)&matrix1[start_pos], dimension / 4,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm1Matrix<int8_t, batch_size>::Compute(matrix_batch, dimension,
                                             &results[0]);
  }
  std::cout << "* Batched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm1
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm1Matrix<int8_t, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                      &results[k]);
    }
  }
  std::cout << "* Unbatched Norm1 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

template <size_t M, size_t B, size_t D>
void Norm2Benchmark(void) {
  const size_t dimension = D;
  const size_t batch_size = M;
  const size_t block_size = B;
  const size_t matrix_size = block_size * batch_size * dimension;

  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);

  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<int> dist(-127, 127);
  for (size_t i = 0; i < matrix_size; ++i) {
    matrix1[i] = (int8_t)dist(gen);
  }

  for (size_t i = 0; i < block_size; ++i) {
    size_t start_pos = i * batch_size * dimension;
    MatrixTranspose((uint32_t *)&matrix2[start_pos],
                    (const uint32_t *)&matrix1[start_pos], dimension / 4,
                    batch_size);
  }

  ElapsedTime elapsed_time;
  std::vector<float> results(batch_size);

  std::cout << "# (" << IntelIntrinsics() << ") INT8 " << dimension << "d, "
            << batch_size << " * " << block_size << std::endl;

  // 1 Batched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix2[i * batch_size * dimension];
    Norm2Matrix<int8_t, batch_size>::Compute(matrix_batch, dimension,
                                             results.data());
  }
  std::cout << "* Batched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;

  // Unbatched Norm2
  elapsed_time.reset();
  for (size_t i = 0; i < block_size; ++i) {
    const int8_t *matrix_batch = &matrix1[i * batch_size * dimension];
    for (size_t k = 0; k < batch_size; ++k) {
      Norm2Matrix<int8_t, 1>::Compute(&matrix_batch[k * dimension], dimension,
                                      &results[k]);
    }
  }
  std::cout << "* Unbatched Norm2 (us) \t" << elapsed_time.micro_seconds()
            << std::endl;
}

TEST(NormMatrix, DISABLED_Norm1_Benchmark) {
  Norm1Benchmark<2, 512, 128>();
  Norm1Benchmark<4, 512, 128>();
  Norm1Benchmark<8, 512, 128>();
  Norm1Benchmark<16, 512, 128>();
  Norm1Benchmark<32, 512, 128>();
  Norm1Benchmark<64, 512, 128>();
}

TEST(NormMatrix, DISABLED_Norm2_Benchmark) {
  Norm2Benchmark<2, 512, 128>();
  Norm2Benchmark<4, 512, 128>();
  Norm2Benchmark<8, 512, 128>();
  Norm2Benchmark<16, 512, 128>();
  Norm2Benchmark<32, 512, 128>();
  Norm2Benchmark<64, 512, 128>();
}


================================================
FILE: tests/ailego/math/normalizer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/container/bitmap.h>
#include <ailego/internal/cpu_features.h>
#include <ailego/math/normalizer.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec::ailego;

TEST(Normalizer, FP32_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < 100; ++i) {
    std::vector<float> vec1;
    std::vector<float> vec2;
    for (size_t j = 0; j < 111; ++j) {
      float val = dist(gen);
      vec1.push_back(val);
      vec2.push_back(val);
    }

    Normalizer<float>::Compute(vec1.data(), vec1.size(), 1.1f);
    for (size_t j = 0; j < vec1.size(); ++j) {
      EXPECT_FLOAT_EQ(vec1[j] * 1.1f, vec2[j]);
    }

    float l1 = 0.0f, l2 = 0.0f;
    Normalizer<float>::L1(vec1.data(), vec1.size(), &l1);
    Normalizer<float>::L2(vec2.data(), vec2.size(), &l2);
  }
}

TEST(Normalizer, FP16_General) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (size_t i = 0; i < 100; ++i) {
    std::vector<Float16> vec1;
    std::vector<Float16> vec2;
    for (size_t j = 0; j < 111; ++j) {
      float val = dist(gen);
      vec1.push_back(val);
      vec2.push_back(val);
    }

    Normalizer<Float16>::Compute(vec1.data(), vec1.size(), 1.0f);
    for (size_t j = 0; j < vec1.size(); ++j) {
      EXPECT_FLOAT_EQ(vec1[j], vec2[j]);
    }

    float l1 = 0.0f, l2 = 0.0f;
    Normalizer<Float16>::L1(vec1.data(), vec1.size(), &l1);
    Normalizer<Float16>::L2(vec2.data(), vec2.size(), &l2);
  }
}

TEST(Normalizer, FP32_Zero) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint32_t> dist(1, 128);
  const uint32_t dimension = dist(gen);

  std::vector<float> vec1(dimension, 0.0f);
  std::vector<float> vec2(dimension, 0.0f);

  float norm;
  Normalizer<float>::L1(vec1.data(), vec1.size(), &norm);
  Normalizer<float>::L2(vec2.data(), vec2.size(), &norm);
  for (auto v : vec1) {
    EXPECT_FALSE(std::isnan(v));
  }
  for (auto v : vec2) {
    EXPECT_FALSE(std::isnan(v));
  }
}

TEST(Normalizer, FP16_Zero) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint32_t> dist(1, 128);
  const uint32_t dimension = dist(gen);

  std::vector<Float16> vec1(dimension, 0.0f);
  std::vector<Float16> vec2(dimension, 0.0f);

  float norm;
  Normalizer<Float16>::L2(vec1.data(), vec1.size(), &norm);
  Normalizer<Float16>::L2(vec2.data(), vec2.size(), &norm);
  for (auto v : vec1) {
    EXPECT_FALSE(std::isnan(v));
  }
  for (auto v : vec2) {
    EXPECT_FALSE(std::isnan(v));
  }
}


================================================
FILE: tests/ailego/parallel/lock_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <mutex>
#include <ailego/parallel/lock.h>
#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_pool.h>

using namespace zvec;

TEST(SpinMutex, General) {
  ailego::SpinMutex mutex;
  { std::unique_lock<ailego::SpinMutex> signal_lock1(mutex); }
  { std::lock_guard<ailego::SpinMutex> signal_lock2(mutex); }

  ailego::SpinMutex mutex2;
  int result = std::try_lock(mutex, mutex2);
  if (result == -1) {
    mutex.unlock();
    mutex2.unlock();
  }
}

TEST(WriteLock, General) {
  ailego::SharedMutex mutex;
  ailego::WriteLock wrlock(mutex);
  { std::unique_lock<ailego::WriteLock> signal_lock1(wrlock); }
  { std::lock_guard<ailego::WriteLock> signal_lock2(wrlock); }
}

TEST(ReadLock, General) {
  ailego::SharedMutex mutex;
  ailego::ReadLock rdlock(mutex);
  { std::unique_lock<ailego::ReadLock> signal_lock1(rdlock); }
  { std::lock_guard<ailego::ReadLock> signal_lock2(rdlock); }
}

TEST(Mutex, General) {
  ailego::ThreadPool pool;
  std::mutex mutex;

  int count = 0;
  for (int i = 0; i < 2000; ++i) {
    pool.execute([&]() {
      std::lock_guard<std::mutex> lock(mutex);
      ++count;
    });
  }
  pool.wait_finish();
  EXPECT_EQ(2000, count);
}

class NoLockTest {
 public:
  virtual void open() {
    no_lock_opened_ = true;
  }
  virtual int read(volatile int *count) {
    if (!no_lock_opened_) {
      return -1;
    }
    (*count)++;
    std::this_thread::sleep_for(std::chrono::milliseconds(10));
    return 0;
  }
  virtual int write(volatile int *count) {
    if (!no_lock_opened_) {
      return -1;
    }
    (*count)++;
    std::this_thread::sleep_for(std::chrono::milliseconds(10));
    return 0;
  }
  virtual void close() {
    no_lock_opened_ = false;
  }

 private:
  volatile bool no_lock_opened_{false};
};

class AtomicLockTest : public NoLockTest {
 public:
  void open() override {
    opened_.store(true);
    NoLockTest::open();
  }
  int read(volatile int *count) override {
    AILEGO_SAFE_ACCESS(-1);
    return NoLockTest::read(count);
  }
  int write(volatile int *count) override {
    AILEGO_SAFE_ACCESS(-1);
    return NoLockTest::write(count);
  }
  void close() override {
    AILEGO_SAFE_CLOSE;
    NoLockTest::close();
  }

 private:
  mutable std::atomic<int> counter_{0};
  std::atomic<bool> opened_{false};
};

void test_lock(NoLockTest &test_obj) {
  ailego::ThreadPool pool;
  test_obj.open();

  auto start = std::chrono::system_clock::now();

  const int kTestCount = 10000;
  volatile int count = 0;
  for (int i = 0; i < kTestCount; ++i) {
    pool.execute([&]() {
      test_obj.read(&count);
      test_obj.write(&count);
    });
  }
  test_obj.close();
  pool.wait_finish();

  auto stop = std::chrono::system_clock::now();
  std::chrono::nanoseconds time_used = stop - start;
  std::cout << "use: " << time_used.count() / 1000 << " us" << std::endl;

  std::cout << "count: " << count << std::endl;
  EXPECT_LE(count, kTestCount * 2);
}

TEST(CloseLock, Perf) {
  std::cout << "NoLockTest" << std::endl;
  NoLockTest test_obj;
  test_lock(test_obj);

  std::cout << "AtomicLockTest" << std::endl;
  AtomicLockTest test_obj3;
  test_lock(test_obj3);
}


================================================
FILE: tests/ailego/parallel/multi_thread_list_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <memory>
#include <zvec/ailego/parallel/thread_pool.h>

#define private public
#include <ailego/parallel/multi_thread_list.h>
#undef private

#include <gtest/gtest.h>

using namespace zvec;
using namespace zvec::ailego;
using namespace std;

struct Item {
  uint32_t a_;
  std::string b_;
  Item() {};
  Item(uint32_t a, std::string b) : a_(a), b_(b) {}
};

MultiThreadList<Item> mt_queue(100);

void producer(uint32_t i) {
  Item item{i, std::to_string(i)};
  mt_queue.produce(item);
  return;
}
void consumer(uint32_t i, uint32_t *result) {
  Item item;
  while (mt_queue.consume(&item)) {
    *result += item.a_;
  }
}
void producer_done(uint32_t i) {
  Item item{i, std::to_string(i)};
  EXPECT_EQ(false, mt_queue.produce(item));
  return;
}

TEST(MultiThreadListTest, General) {
  int times = 100;
  while (times--) {
    cout << "================================" << endl;
    cout << "times: " << times << endl;

    mt_queue.reset();

    ailego::ThreadPool producer_pool;
    ailego::ThreadPool consumer_pool;
    ailego::ThreadPool producer_done_pool;

    uint32_t num_of_consumer = 100;
    uint32_t num_of_producer = 100;
    uint32_t num_of_producer_done = 100;

    std::vector<uint32_t> consumer_results(num_of_consumer);
    std::fill(consumer_results.begin(), consumer_results.end(), 0);

    for (uint32_t i = 0; i < num_of_consumer; i++) {
      consumer_pool.execute(consumer, i + 1, &consumer_results[i]);
    }

    for (uint32_t i = 0; i < num_of_producer; i++) {
      producer_pool.execute(producer, i + 1);
    }

    producer_pool.wait_finish();
    mt_queue.done();
    consumer_pool.wait_finish();

    // produce after queue done
    for (uint32_t i = 0; i < num_of_producer_done; i++) {
      producer_done_pool.execute(producer_done, i + 1);
    }
    producer_done_pool.wait_finish();

    uint32_t total = 0;
    for (uint32_t i = 0; i < num_of_consumer; i++) {
      cout << consumer_results[i] << " ";
      total += consumer_results[i];
    }
    cout << endl;

    EXPECT_EQ(total, 5050);
  }
}

TEST(MultiThreadListTest, FullQueueQuit) {
  mt_queue.reset();

  ailego::ThreadPool producer_pool;

  uint32_t num_of_producer = 1000;

  for (uint32_t i = 1; i <= num_of_producer; i++) {
    producer_pool.execute(producer, i);
  }

  mt_queue.done();
  producer_pool.wait_finish();
}

TEST(MultiThreadListTest, ConsumeStopResume) {
  mt_queue.reset();

  ailego::ThreadPool producer_pool;
  ailego::ThreadPool consumer_pool;

  uint32_t num_of_consumer = 100;
  uint32_t num_of_producer = 100;

  std::vector<uint32_t> consumer_results(2 * num_of_consumer);
  std::fill(consumer_results.begin(), consumer_results.end(), 0);

  for (uint32_t i = 0; i < num_of_consumer; i++) {
    consumer_pool.execute(consumer, i + 1, &consumer_results[i]);
  }

  for (uint32_t i = 0; i < num_of_producer; i++) {
    producer_pool.execute(producer, i + 1);
  }

  producer_pool.wait_finish();

  std::this_thread::sleep_for(std::chrono::milliseconds(100));

  mt_queue.stop_consume();
  consumer_pool.wait_finish();

  uint32_t total = 0;
  for (uint32_t i = 0; i < num_of_consumer; i++) {
    cout << consumer_results[i] << " ";
    total += consumer_results[i];
  }
  cout << endl;

  cout << "mt queue size: " << mt_queue.list_.size() << endl;

  EXPECT_EQ(total, 5050);

  for (uint32_t i = num_of_producer; i < 2 * num_of_producer; i++) {
    producer_pool.execute(producer, i + 1);
  }

  mt_queue.resume_consume();

  for (uint32_t i = num_of_producer; i < 2 * num_of_consumer; i++) {
    consumer_pool.execute(consumer, i + 1, &consumer_results[i]);
  }

  producer_pool.wait_finish();
  mt_queue.done();
  consumer_pool.wait_finish();

  total = 0;
  for (uint32_t i = num_of_consumer; i < 2 * num_of_consumer; i++) {
    cout << consumer_results[i] << " ";
    total += consumer_results[i];
  }
  cout << endl;

  cout << "mt queue size: " << mt_queue.list_.size() << endl;

  EXPECT_EQ(total, 15050);
}

struct MoveableItem {
  uint32_t a_;
  std::string b_;
  MoveableItem() {};
  MoveableItem(uint32_t a, std::string b) : a_(a), b_(b) {}

  MoveableItem(const MoveableItem &) = delete;
  MoveableItem &operator=(const MoveableItem &) = delete;

  MoveableItem(MoveableItem &&) = default;
  MoveableItem &operator=(MoveableItem &&) = default;
};

MultiThreadList<MoveableItem> mt_moveable_queue(100);

void producer_moveable(uint32_t i) {
  MoveableItem item{i, std::to_string(i)};
  mt_moveable_queue.produce(std::move(item));
  return;
}
void consumer_moveable(uint32_t i, uint32_t *result) {
  MoveableItem item;
  while (mt_moveable_queue.consume(&item)) {
    *result += item.a_;
  }
}
void producer_moveable_done(uint32_t i) {
  MoveableItem item{i, std::to_string(i)};
  EXPECT_EQ(false, mt_moveable_queue.produce(std::move(item)));
  return;
}

TEST(MultiThreadListTest, General_Moveable) {
  int times = 100;
  while (times--) {
    cout << "================================" << endl;
    cout << "times: " << times << endl;

    mt_moveable_queue.reset();

    ailego::ThreadPool producer_pool;
    ailego::ThreadPool consumer_pool;
    ailego::ThreadPool producer_done_pool;

    uint32_t num_of_consumer = 100;
    uint32_t num_of_producer = 100;
    uint32_t num_of_producer_done = 100;

    std::vector<uint32_t> consumer_results(num_of_consumer);
    std::fill(consumer_results.begin(), consumer_results.end(), 0);

    for (uint32_t i = 0; i < num_of_consumer; i++) {
      consumer_pool.execute(consumer_moveable, i + 1, &consumer_results[i]);
    }

    for (uint32_t i = 0; i < num_of_producer; i++) {
      producer_pool.execute(producer_moveable, i + 1);
    }

    producer_pool.wait_finish();
    mt_moveable_queue.done();
    consumer_pool.wait_finish();

    // produce after queue done
    for (uint32_t i = 0; i < num_of_producer_done; i++) {
      producer_done_pool.execute(producer_moveable_done, i + 1);
    }
    producer_done_pool.wait_finish();

    uint32_t total = 0;
    for (uint32_t i = 0; i < num_of_consumer; i++) {
      cout << consumer_results[i] << " ";
      total += consumer_results[i];
    }
    cout << endl;

    EXPECT_EQ(total, 5050);
  }
}


================================================
FILE: tests/ailego/parallel/semaphore_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <mutex>
#include <ailego/parallel/semaphore.h>
#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(Semaphore, General) {
  ailego::ThreadPool pool;
  ailego::Semaphore sem_mutex(1);

  std::atomic<int> count(0);
  for (int i = 0; i < 2000; ++i) {
    pool.execute([&]() {
      std::lock_guard<ailego::Semaphore> lock(sem_mutex);
      ++count;
    });
  }
  pool.wait_finish();
  EXPECT_EQ(2000, count);
}

TEST(BinarySemaphores, General) {
  ailego::ThreadPool pool;
  const int sem_count = 35;
  ailego::BinarySemaphores<32> sem_mutex0(0);
  ailego::BinarySemaphores<32> sem_mutex32(sem_count);
  ailego::BinarySemaphores<63> sem_mutex64(sem_count);
  ailego::BinarySemaphores<15> sem_mutex16(sem_count);
  ailego::BinarySemaphores<7> sem_mutex8(sem_count);
  ailego::BinarySemaphores<1> sem_mutex1(sem_count);

  std::atomic<uint32_t> total{0u};
  std::vector<uint32_t> counts(sem_count, 0u);
  for (int i = 0; i < 2000; ++i) {
    pool.execute([&]() {
      int index1 = sem_mutex32.acquire();
      ++counts[index1];
      ++total;
      std::this_thread::sleep_for(
          std::chrono::microseconds(std::rand() % 100 + 1));
      sem_mutex32.release(index1);
    });
  }
  pool.wait_finish();

  uint32_t sum = 0;
  for (int i = 0; i < sem_count; ++i) {
    sum += counts[i];
  }
  EXPECT_EQ(total, sum);
}

TEST(BinarySemaphores, General2) {
  ailego::ThreadPool pool;
  const int sem_count = 32;
  ailego::BinarySemaphores<64> sem_mutex64(sem_count);
  std::atomic<uint32_t> total{0u};
  std::vector<uint32_t> counts(sem_count, 0u);
  bool flag = true;
  for (int i = 0; i < 64; ++i) {
    pool.execute([&]() {
      while (flag) {
        int index1 = sem_mutex64.acquire();
        ++counts[index1];
        ++total;
        std::this_thread::sleep_for(
            std::chrono::microseconds(std::rand() % 100000 + 100));
        sem_mutex64.release(index1);
      }
    });
  }
  for (int i = 0; i < sem_count; ++i) {
    printf("Begin acquire %d ...\n", i);
    ailego::ElapsedTime timer;
    int index = sem_mutex64.acquire(i);
    uint64_t cost = timer.micro_seconds();
    sem_mutex64.release(index);
    printf("Acquire %d cost %zuus\n", i, (size_t)cost);
  }
  flag = false;
  pool.wait_finish();
  uint32_t sum = 0;
  for (int i = 0; i < sem_count; ++i) {
    sum += counts[i];
  }
  EXPECT_EQ(total, sum);
}


================================================
FILE: tests/ailego/parallel/thread_pool_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <chrono>
#include <iostream>
#include <memory>
#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_pool.h>

using namespace zvec::ailego;

struct A {
  A(void) : pool(std::make_shared<ThreadPool>()) {}

  int ThreadMain(int32_t &thread_index, uint32_t &num) {
    std::stringstream buf;
    buf << num << " Task (" << thread_index << " : " << pool->indexof_this()
        << ") " << pool->active_count() << ' ' << pool->pending_count()
        << std::endl;

    // std::cout << buf.str();
    ++run_count;
    return 0;
  }
  std::atomic<uint32_t> run_count{0};
  std::shared_ptr<ThreadPool> pool;
};

struct B {
  B(void) : pool(std::make_shared<ThreadPool>(true)) {}

  std::string ThreadMain(uint32_t &num) {
    aaa.pool->enqueue(
        Closure::New(&aaa, &A::ThreadMain, pool->indexof_this(), num));
    aaa.pool->wake_any();
    // std::this_thread::sleep_for(
    //    std::chrono::microseconds(std::rand() % 1000 + 1));
    ++run_count;
    return "";
  }
  A aaa;
  std::atomic<uint32_t> run_count{0};
  std::shared_ptr<ThreadPool> pool;
};

TEST(ThreadPool, General) {
  // srand((uint32_t)time(NULL));
  // srand((uint32_t)rand());

  B bbb;
  for (uint32_t i = 0; i < 10000u; ++i) {
    bbb.pool->execute(&bbb, &B::ThreadMain, i);
  }
  bbb.pool->wait_finish();
  bbb.aaa.pool->wait_finish();

  while (!bbb.aaa.pool->is_finished() || !bbb.pool->is_finished()) {
    EXPECT_LE(0u, bbb.aaa.pool->pending_count());
  }
  EXPECT_EQ(bbb.aaa.pool->pending_count(), 0u);

  EXPECT_EQ(10000u, bbb.run_count);
  EXPECT_EQ(10000u, bbb.aaa.run_count);

  EXPECT_FALSE(bbb.aaa.pool->is_stopped());
  EXPECT_FALSE(bbb.pool->is_stopped());
  EXPECT_NE(0u, bbb.aaa.pool->worker_count());
  EXPECT_NE(0u, bbb.pool->worker_count());

  bbb.aaa.pool->stop();
  bbb.aaa.pool->wait_stop();
  bbb.pool->stop();
  bbb.pool->wait_stop();

  EXPECT_TRUE(bbb.aaa.pool->is_stopped());
  EXPECT_TRUE(bbb.pool->is_stopped());
  EXPECT_EQ(0u, bbb.aaa.pool->worker_count());
  EXPECT_EQ(0u, bbb.pool->worker_count());
}

void ExecuteAndWaitThread(int *count) {
  ++(*count);
}

TEST(ThreadPool, ExecuteAndWait) {
  ThreadPool pool;
  int count = 0;
  for (int i = 0; i < 100; ++i) {
    EXPECT_EQ(i * 2, count);
    pool.execute_and_wait(ExecuteAndWaitThread, &count);
    EXPECT_EQ(i * 2 + 1, count);
    count++;
  }
  EXPECT_EQ(200, count);
}

TEST(ThreadPool, WaitFinish) {
  ThreadPool pool;

  for (int i = 0; i < 10000; ++i) {
    std::atomic_uint count{0};
    for (int j = 0; j < 10; ++j) {
      pool.execute([&count]() { ++count; });
    }
    pool.wait_finish();
    EXPECT_EQ(10, count);
  }
}

TEST(ThreadPool, TaskGroup) {
  ThreadPool pool1, pool2;
  std::atomic_uint count{0};

  for (int i = 0; i < 12; ++i) {
    pool1.execute(
        [&count](ThreadPool *p) {
          auto group = p->make_group();

          EXPECT_TRUE(group->is_finished());
          EXPECT_EQ(0u, group->pending_count());
          EXPECT_EQ(0u, group->active_count());

          for (int j = 0; j < 12; ++j) {
            group->execute([&count]() {
              std::this_thread::sleep_for(
                  std::chrono::microseconds(std::rand() % 1000 + 1));
              ++count;
            });
          }
          group->wait_finish();
        },
        &pool2);
  }
  pool1.wait_finish();
  EXPECT_EQ(12u * 12u, count);
}

TEST(ThreadPool, TaskGroup2) {
  ThreadPool pool;

  auto group = pool.make_group();
  for (int i = 0; i < 10000; ++i) {
    std::atomic_uint count{0};
    for (int j = 0; j < 10; ++j) {
      group->execute([&count]() { ++count; });
    }
    group->wait_finish();
    EXPECT_EQ(10, count);
  }
  pool.wait_finish();
}


================================================
FILE: tests/ailego/parallel/thread_queue_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <chrono>
#include <iostream>
#include <memory>
#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_queue.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;
using namespace zvec::ailego;

TEST(ThreadQueue, General) {
  ThreadQueue queue;

  std::this_thread::sleep_for(
      std::chrono::microseconds(std::rand() % 1000 + 1));
  queue.wake();

  int count = 0;
  for (int i = 0; i < 1000; ++i) {
    queue[0].execute([&count, i]() {
      EXPECT_EQ(i, count);
      ++count;
      // std::cout << count << std::endl;
    });
  }
  std::this_thread::sleep_for(std::chrono::microseconds(20000));
  EXPECT_EQ(1000, count);

  queue.stop();
  queue.wait_stop();
}

TEST(ThreadQueue, MutliThread) {
  ThreadQueue queue;

  std::this_thread::sleep_for(
      std::chrono::microseconds(std::rand() % 1000 + 1));
  queue.wake();

  std::atomic_uint count{0};
  for (int i = 0; i < 10000; ++i) {
    queue.execute(std::rand(), [&count]() {
      ++count;
      // std::cout << count << std::endl;
    });
  }
  std::this_thread::sleep_for(std::chrono::microseconds(20000));

  EXPECT_EQ(10000u, count);
  queue.stop();
  queue.wait_stop();
}

TEST(ThreadQueue, MultiThreadWithHighPriority) {
  ThreadQueue queue;

  std::this_thread::sleep_for(
      std::chrono::microseconds(std::rand() % 1000 + 1));
  queue.wake();

  std::atomic_uint count{0};
  std::atomic_uint high_priority_count{0};

  ailego::ElapsedTime timer;
  uint64_t task_time;
  uint64_t high_priority_task_time;

  // Enqueue normal tasks
  for (int i = 0; i < 1000; ++i) {
    queue.execute(std::rand(), [&count, &timer, &task_time]() {
      ++count;
      std::this_thread::sleep_for(std::chrono::microseconds(100));
      if (count == 1000) {
        task_time = timer.milli_seconds();
      }
    });
  }

  // Enqueue high-priority tasks
  for (int i = 0; i < 1000; ++i) {
    queue.execute_high_priority(std::rand(), [&high_priority_count, &timer,
                                              &high_priority_task_time]() {
      ++high_priority_count;
      std::this_thread::sleep_for(std::chrono::microseconds(500));
      if (high_priority_count == 1000) {
        high_priority_task_time = timer.milli_seconds();
      }
    });
  }

  // Wait for all tasks to complete
  std::this_thread::sleep_for(std::chrono::seconds(3));

  EXPECT_EQ(count, 1000);
  EXPECT_EQ(high_priority_count, 1000);

  // Verify that all high-priority tasks are completed first
  EXPECT_GT(task_time, high_priority_task_time);
  std::cout << "task time: " << task_time
            << ", high priority task time: " << high_priority_task_time
            << std::endl;

  queue.stop();
  queue.wait_stop();
}


================================================
FILE: tests/ailego/pattern/closure_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <functional>
#include <iostream>
#include <gtest/gtest.h>
#include <zvec/ailego/pattern/closure.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

void GlobalProcess0(void) {}
void GlobalProcess1(int) {}

void GlobalProcess2(int a1, int *a2) {
  EXPECT_EQ(a1 + 1, *a2);
}

void GlobalProcess3(int a1, int *a2, int &a3) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
}

void GlobalProcess4(int a1, int *a2, int &a3, const int &a4) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
}

void GlobalProcess5(int a1, int *a2, int &a3, const int &a4, volatile int *a5) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
}

void GlobalProcess6(int a1, int *a2, int &a3, const int &a4, volatile int *a5,
                    int *const volatile a6) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
  EXPECT_EQ(*a5 + 1, *a6);
}

void GlobalProcess7(int a1, int *a2, int &a3, const int &a4, volatile int *a5,
                    int *const volatile a6, int &&a7) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
  EXPECT_EQ(*a5 + 1, *a6);
  EXPECT_EQ(*a6 + 1, a7);
}

size_t GlobalFunction0(void) {
  return 0;
}
size_t GlobalFunction1(long) {
  return 1;
}

size_t GlobalFunction2(long a1, long *a2) {
  EXPECT_EQ(a1 + 1, *a2);
  return 2;
}

size_t GlobalFunction3(long a1, long *a2, long &a3) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  return 3;
}

size_t GlobalFunction4(long a1, long *a2, long &a3, const long &a4) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  return 4;
}

size_t GlobalFunction5(long a1, long *a2, long &a3, const long &a4,
                       volatile long *a5) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
  return 5;
}

size_t GlobalFunction6(long a1, long *a2, long &a3, const long &a4,
                       volatile long *a5, long *const volatile a6) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
  EXPECT_EQ(*a5 + 1, *a6);
  return 6;
}

size_t GlobalFunction7(long a1, long *a2, long &a3, const long &a4,
                       volatile long *a5, long *const volatile a6, long &&a7) {
  EXPECT_EQ(a1 + 1, *a2);
  EXPECT_EQ(*a2 + 1, a3);
  EXPECT_EQ(a3 + 1, a4);
  EXPECT_EQ(a4 + 1, *a5);
  EXPECT_EQ(*a5 + 1, *a6);
  EXPECT_EQ(*a6 + 1, a7);
  return 7;
}

struct WithFunctionCall {
  int operator()(int a) {
    return a + b;
  }
  int do_something(int a) {
    return a + b;
  }
  int b = 11;
};

struct WithoutFunctionCall {
  int do_something(int a) {
    return a + b;
  }
  int b = 11;
};

struct ClassA {
  static void StaticProcess0(void) {}
  static void StaticProcess1(int) {}

  static void StaticProcess2(int a1, int *a2) {
    EXPECT_EQ(a1 + 1, *a2);
  }

  static void StaticProcess3(int a1, int *a2, int &a3) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
  }

  static void StaticProcess4(int a1, int *a2, int &a3, const int &a4) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
  }

  static void StaticProcess5(int a1, int *a2, int &a3, const int &a4,
                             volatile int *a5) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
  }

  static void StaticProcess6(int a1, int *a2, int &a3, const int &a4,
                             volatile int *a5, int *const volatile a6) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
  }

  static void StaticProcess7(int a1, int *a2, int &a3, const int &a4,
                             volatile int *a5, int *const volatile a6,
                             int &&a7) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    EXPECT_EQ(*a6 + 1, a7);
  }

  static size_t StaticFunction0(void) {
    return 0;
  }
  static size_t StaticFunction1(long) {
    return 1;
  }

  static size_t StaticFunction2(long a1, long *a2) {
    EXPECT_EQ(a1 + 1, *a2);
    return 2;
  }

  static size_t StaticFunction3(long a1, long *a2, long &a3) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    return 3;
  }

  static size_t StaticFunction4(long a1, long *a2, long &a3, const long &a4) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    return 4;
  }

  static size_t StaticFunction5(long a1, long *a2, long &a3, const long &a4,
                                volatile long *a5) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    return 5;
  }

  static size_t StaticFunction6(long a1, long *a2, long &a3, const long &a4,
                                volatile long *a5, long *const volatile a6) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    return 6;
  }

  static size_t StaticFunction7(long a1, long *a2, long &a3, const long &a4,
                                volatile long *a5, long *const volatile a6,
                                long &&a7) {
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    EXPECT_EQ(*a6 + 1, a7);
    return 7;
  }
};

class ClassB {
 public:
  ClassB(int v) : b_(v) {}

  int operator()(int a1) {
    EXPECT_TRUE(0);
    return a1 + b_;
  }

  virtual void MemberProcess0(void) const {}

  virtual void MemberProcess1(int a1) {
    EXPECT_EQ(a1, b_);
  }

  void MemberProcess2(int a1, int *a2) {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
  }

  void MemberProcess3(int a1, int *a2, int &a3) const {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
  }

  virtual void MemberProcess4(int a1, int *a2, int &a3, const int &a4) {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
  }

  virtual void MemberProcess5(int a1, int *a2, int &a3, const int &a4,
                              volatile int *a5) const {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
  }

  void MemberProcess6(int a1, int *a2, int &a3, const int &a4, volatile int *a5,
                      int *const volatile a6) {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
  }

  void MemberProcess7(int a1, int *a2, int &a3, const int &a4, volatile int *a5,
                      int *const volatile a6, int &&a7) {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    EXPECT_EQ(*a6 + 1, a7);
  }

  size_t MemberFunction0(void) {
    return 0;
  }
  size_t MemberFunction1(long a1) {
    EXPECT_EQ(a1, b_);
    return 1;
  }

  size_t MemberFunction2(long a1, long *a2) {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    return 2;
  }

  size_t MemberFunction3(long a1, long *a2, long &a3) volatile {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    return 3;
  }

  size_t MemberFunction4(long a1, long *a2, long &a3, const long &a4) const {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    return 4;
  }

  size_t MemberFunction5(long a1, long *a2, long &a3, const long &a4,
                         volatile long *a5) const volatile {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    return 5;
  }

  size_t MemberFunction6(long a1, long *a2, long &a3, const long &a4,
                         volatile long *a5, long *const volatile a6) const {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    return 6;
  }

  size_t MemberFunction7(long a1, long *a2, long &a3, const long &a4,
                         volatile long *a5, long *const volatile a6,
                         long &&a7) const volatile {
    EXPECT_EQ(a1, b_);
    EXPECT_EQ(a1 + 1, *a2);
    EXPECT_EQ(*a2 + 1, a3);
    EXPECT_EQ(a3 + 1, a4);
    EXPECT_EQ(a4 + 1, *a5);
    EXPECT_EQ(*a5 + 1, *a6);
    EXPECT_EQ(*a6 + 1, a7);
    return 7;
  }

 private:
  int b_{11};
};

class ClassAB {
 public:
  void Run1(void) const {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::ConstFunc, &bbb);
  }

  void Run2(void) {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::ConstFunc, &bbb);
  }

  void Run3(void) {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::MutableFunc, &bbb);
  }

  void Run4(void) const {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);
  }

  void Run5(void) {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::VolatileMutableFunc, &bbb);
  }

  void Run6(void) const volatile {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);
  }

  void Run7(void) volatile {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::VolatileConstFunc, &bbb);
  }

  void Run8(void) volatile {
    ClassB bbb(1);
    ailego::Closure::New(this, &ClassAB::VolatileMutableFunc, &bbb);
  }

 protected:
  void ConstFunc(const ClassB *b) const {
    ClassA::StaticFunction0();
    b->MemberProcess0();
  }

  void MutableFunc(const ClassB *b) {
    ClassA::StaticFunction0();
    b->MemberProcess0();
  }

  void VolatileConstFunc(const ClassB *b) const volatile {
    ClassA::StaticFunction0();
    b->MemberProcess0();
  }

  void VolatileMutableFunc(const ClassB *b) volatile {
    ClassA::StaticFunction0();
    b->MemberProcess0();
  }
};

TEST(CallbackValidator, General) {
  EXPECT_FALSE(ailego::CallbackValidator<int>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<long *>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<const void *>::Value);

  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction0)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction0)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction1)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction1)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction2)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction2)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction3)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction3)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction4)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction4)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction5)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction5)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction6)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction6)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalFunction7)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalFunction7)>::Value);

  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess0)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess0)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess1)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess1)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess2)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess2)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess3)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess3)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess4)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess4)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess5)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess5)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess6)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess6)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(GlobalProcess7)>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<decltype(&GlobalProcess7)>::Value);

  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction0)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction1)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction2)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction3)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction4)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction5)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction6)>>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<
              std::function<decltype(GlobalFunction7)>>::Value);

  EXPECT_TRUE(ailego::CallbackValidator<WithFunctionCall>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<WithFunctionCall &>::Value);
  EXPECT_TRUE(ailego::CallbackValidator<const WithFunctionCall &>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<WithFunctionCall *>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<const WithFunctionCall *>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall &>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<const WithoutFunctionCall &>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<WithoutFunctionCall *>::Value);
  EXPECT_FALSE(ailego::CallbackValidator<const WithoutFunctionCall *>::Value);

  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticFunction7)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticFunction7)>::Value);

  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(ClassA::StaticProcess7)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassA::StaticProcess7)>::Value);

  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberFunction7)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess0)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess1)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess2)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess3)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess4)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess5)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess6)>::Value);
  EXPECT_TRUE(
      ailego::CallbackValidator<decltype(&ClassB::MemberProcess7)>::Value);
}

TEST(CallbackTraits, General) {
  EXPECT_EQ(0, ailego::CallbackTraits<decltype(GlobalProcess0)>::Arity);
  EXPECT_EQ(1, ailego::CallbackTraits<decltype(GlobalProcess1)>::Arity);
  EXPECT_EQ(2, ailego::CallbackTraits<decltype(GlobalProcess2)>::Arity);
  EXPECT_EQ(3, ailego::CallbackTraits<decltype(GlobalProcess3)>::Arity);
  EXPECT_EQ(4, ailego::CallbackTraits<decltype(GlobalProcess4)>::Arity);
  EXPECT_EQ(5, ailego::CallbackTraits<decltype(GlobalProcess5)>::Arity);
  EXPECT_EQ(6, ailego::CallbackTraits<decltype(GlobalProcess6)>::Arity);
  EXPECT_EQ(7, ailego::CallbackTraits<decltype(GlobalProcess7)>::Arity);

  EXPECT_EQ(0,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess0)>::Arity);
  EXPECT_EQ(1,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess1)>::Arity);
  EXPECT_EQ(2,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess2)>::Arity);
  EXPECT_EQ(3,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess3)>::Arity);
  EXPECT_EQ(4,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess4)>::Arity);
  EXPECT_EQ(5,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess5)>::Arity);
  EXPECT_EQ(6,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess6)>::Arity);
  EXPECT_EQ(7,
            ailego::CallbackTraits<decltype(&ClassA::StaticProcess7)>::Arity);

  EXPECT_EQ(0,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess0)>::Arity);
  EXPECT_EQ(1,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess1)>::Arity);
  EXPECT_EQ(2,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess2)>::Arity);
  EXPECT_EQ(3,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess3)>::Arity);
  EXPECT_EQ(4,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess4)>::Arity);
  EXPECT_EQ(5,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess5)>::Arity);
  EXPECT_EQ(6,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess6)>::Arity);
  EXPECT_EQ(7,
            ailego::CallbackTraits<decltype(&ClassB::MemberProcess7)>::Arity);

  EXPECT_EQ(
      1u, sizeof(ailego::CallbackTraits<decltype(GlobalProcess0)>::TupleType));
  EXPECT_EQ(1u, sizeof(ailego::CallbackTraits<
                       decltype(&ClassA::StaticProcess0)>::TupleType));
  EXPECT_EQ(1u, sizeof(ailego::CallbackTraits<
                       decltype(&ClassB::MemberProcess0)>::TupleType));
}

TEST(Closure, Static) {
  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

  ailego::Closure::New(GlobalFunction0)->run();
  ailego::Closure::New(&GlobalFunction0)->run();
  ailego::Closure::New(GlobalFunction1, a[1])->run();
  ailego::Closure::New(&GlobalFunction1, 1)->run();
  ailego::Closure::New(GlobalFunction2, 1, &a[2])->run();
  ailego::Closure::New(&GlobalFunction2, a[1], &a[2])->run();
  ailego::Closure::New(GlobalFunction3, a[1], &a[2], a[3])->run();
  ailego::Closure::New(&GlobalFunction3, 1, &a[2], a[3])->run();
  ailego::Closure::New(GlobalFunction4, 1, &a[2], a[3], 4)->run();
  ailego::Closure::New(&GlobalFunction4, a[1], &a[2], a[3], a[4])->run();
  ailego::Closure::New(GlobalFunction5, a[1], &a[2], a[3], a[4], &a[5])->run();
  ailego::Closure::New(&GlobalFunction5, 1, &a[2], a[3], 4, &a[5])->run();
  ailego::Closure::New(GlobalFunction6, 1, &a[2], a[3], 4, &a[5], &a[6])->run();
  ailego::Closure::New(&GlobalFunction6, a[1], &a[2], a[3], a[4], &a[5], &a[6])
      ->run();
  ailego::Closure::New(GlobalFunction7, 1, &a[2], a[3], 4, &a[5], &a[6], a[7])
      ->run();
  ailego::Closure::New(&GlobalFunction7, a[1], &a[2], a[3], a[4], &a[5], &a[6],
                       7)
      ->run();

  ailego::Closure::New(GlobalProcess0)->run();
  ailego::Closure::New(&GlobalProcess0)->run();
  ailego::Closure::New(GlobalProcess1, b[1])->run();
  ailego::Closure::New(&GlobalProcess1, 1)->run();
  ailego::Closure::New(GlobalProcess2, 1, &b[2])->run();
  ailego::Closure::New(&GlobalProcess2, b[1], &b[2])->run();
  ailego::Closure::New(GlobalProcess3, b[1], &b[2], b[3])->run();
  ailego::Closure::New(&GlobalProcess3, 1, &b[2], b[3])->run();
  ailego::Closure::New(GlobalProcess4, 1, &b[2], b[3], 4)->run();
  ailego::Closure::New(&GlobalProcess4, b[1], &b[2], b[3], b[4])->run();
  ailego::Closure::New(GlobalProcess5, b[1], &b[2], b[3], b[4], &b[5])->run();
  ailego::Closure::New(&GlobalProcess5, 1, &b[2], b[3], 4, &b[5])->run();
  ailego::Closure::New(GlobalProcess6, 1, &b[2], b[3], 4, &b[5], &b[6])->run();
  ailego::Closure::New(&GlobalProcess6, b[1], &b[2], b[3], b[4], &b[5], &b[6])
      ->run();
  ailego::Closure::New(GlobalProcess7, 1, &b[2], b[3], 4, &b[5], &b[6], b[7])
      ->run();
  ailego::Closure::New(&GlobalProcess7, b[1], &b[2], b[3], b[4], &b[5], &b[6],
                       7)
      ->run();

  ailego::Closure::New(ClassA::StaticFunction0)->run();
  ailego::Closure::New(&ClassA::StaticFunction0)->run();
  ailego::Closure::New(ClassA::StaticFunction1, a[1])->run();
  ailego::Closure::New(&ClassA::StaticFunction1, 1)->run();
  ailego::Closure::New(ClassA::StaticFunction2, 1, &a[2])->run();
  ailego::Closure::New(&ClassA::StaticFunction2, a[1], &a[2])->run();
  ailego::Closure::New(ClassA::StaticFunction3, a[1], &a[2], a[3])->run();
  ailego::Closure::New(&ClassA::StaticFunction3, 1, &a[2], a[3])->run();
  ailego::Closure::New(ClassA::StaticFunction4, 1, &a[2], a[3], 4)->run();
  ailego::Closure::New(&ClassA::StaticFunction4, a[1], &a[2], a[3], a[4])
      ->run();
  ailego::Closure::New(ClassA::StaticFunction5, a[1], &a[2], a[3], a[4], &a[5])
      ->run();
  ailego::Closure::New(&ClassA::StaticFunction5, 1, &a[2], a[3], 4, &a[5])
      ->run();
  ailego::Closure::New(ClassA::StaticFunction6, 1, &a[2], a[3], 4, &a[5], &a[6])
      ->run();
  ailego::Closure::New(&ClassA::StaticFunction6, a[1], &a[2], a[3], a[4], &a[5],
                       &a[6])
      ->run();
  ailego::Closure::New(ClassA::StaticFunction7, 1, &a[2], a[3], 4, &a[5], &a[6],
                       a[7])
      ->run();
  ailego::Closure::New(&ClassA::StaticFunction7, a[1], &a[2], a[3], a[4], &a[5],
                       &a[6], 7)
      ->run();

  ailego::Closure::New(ClassA::StaticProcess0)->run();
  ailego::Closure::New(&ClassA::StaticProcess0)->run();
  ailego::Closure::New(ClassA::StaticProcess1, b[1])->run();
  ailego::Closure::New(&ClassA::StaticProcess1, 1)->run();
  ailego::Closure::New(ClassA::StaticProcess2, 1, &b[2])->run();
  ailego::Closure::New(&ClassA::StaticProcess2, b[1], &b[2])->run();
  ailego::Closure::New(ClassA::StaticProcess3, b[1], &b[2], b[3])->run();
  ailego::Closure::New(&ClassA::StaticProcess3, 1, &b[2], b[3])->run();
  ailego::Closure::New(ClassA::StaticProcess4, 1, &b[2], b[3], 4)->run();
  ailego::Closure::New(&ClassA::StaticProcess4, b[1], &b[2], b[3], b[4])->run();
  ailego::Closure::New(ClassA::StaticProcess5, b[1], &b[2], b[3], b[4], &b[5])
      ->run();
  ailego::Closure::New(&ClassA::StaticProcess5, 1, &b[2], b[3], 4, &b[5])
      ->run();
  ailego::Closure::New(ClassA::StaticProcess6, 1, &b[2], b[3], 4, &b[5], &b[6])
      ->run();
  ailego::Closure::New(&ClassA::StaticProcess6, b[1], &b[2], b[3], b[4], &b[5],
                       &b[6])
      ->run();
  ailego::Closure::New(ClassA::StaticProcess7, 1, &b[2], b[3], 4, &b[5], &b[6],
                       b[7])
      ->run();
  ailego::Closure::New(&ClassA::StaticProcess7, b[1], &b[2], b[3], b[4], &b[5],
                       &b[6], 7)
      ->run();
}

TEST(Closure, Member) {
  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  ClassB bbb(1);

  ailego::Closure::New(&bbb, &ClassB::MemberFunction0)->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction1, 1)->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction2, a[1], &a[2])->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction3, 1, &a[2], a[3])->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction4, a[1], &a[2], a[3], a[4])
      ->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction5, 1, &a[2], a[3], 4, &a[5])
      ->run();
  ailego::Closure::New(&bbb, &ClassB::MemberFunction6, a[1], &a[2], a[3], a[4],
                       &a[5], &a[6])
      ->run();
  ailego::Closure::New((const ClassB *)(&bbb), &ClassB::MemberFunction7, a[1],
                       &a[2], a[3], a[4], &a[5], &a[6], 7)
      ->run();
  ailego::Closure::New((const volatile ClassB *)(&bbb),
                       &ClassB::MemberFunction7, a[1], &a[2], a[3], a[4], &a[5],
                       &a[6], 7)
      ->run();

  ClassB &&bbc = std::move(bbb);
  ailego::Closure::New(&bbc, &ClassB::MemberProcess0)->run();
  ailego::Closure::New(&bbc, &ClassB::MemberProcess1, 1)->run();
  ailego::Closure::New(&bbc, &ClassB::MemberProcess2, b[1], &b[2])->run();
  ailego::Closure::New(&bbc, &ClassB::MemberProcess3, 1, &b[2], b[3])->run();
  ailego::Closure::New(&bbc, &ClassB::MemberProcess4, b[1], &b[2], b[3], b[4])
      ->run();

  ClassB &bbd = bbb;
  ailego::Closure::New(&bbd, &ClassB::MemberProcess5, 1, &b[2], b[3], 4, &b[5])
      ->run();
  ailego::Closure::New(&bbd, &ClassB::MemberProcess6, b[1], &b[2], b[3], b[4],
                       &b[5], &b[6])
      ->run();
  ailego::Closure::New(&bbd, &ClassB::MemberProcess7, b[1], &b[2], b[3], b[4],
                       &b[5], &b[6], 7)
      ->run();
}

TEST(Closure, Function) {
  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  int b[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  ClassB bbb(1);

  std::function<decltype(GlobalFunction0)> f0 =
      std::bind(&ClassB::MemberFunction0, &bbb);
  ailego::Closure::New(f0)->run();

  std::function<decltype(GlobalFunction1)> f1 =
      std::bind(&ClassB::MemberFunction1, &bbb, std::placeholders::_1);
  ailego::Closure::New(f1, 1)->run();

  std::function<decltype(GlobalFunction2)> f2 =
      std::bind(&ClassB::MemberFunction2, &bbb, std::placeholders::_1,
                std::placeholders::_2);
  ailego::Closure::New(f2, a[1], &a[2])->run();

  std::function<decltype(GlobalFunction3)> f3 =
      std::bind(&ClassB::MemberFunction3, &bbb, std::placeholders::_1,
                std::placeholders::_2, std::placeholders::_3);
  ailego::Closure::New(f3, a[1], &a[2], a[3])->run();

  std::function<decltype(GlobalFunction4)> f4 = std::bind(
      &ClassB::MemberFunction4, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
  ailego::Closure::New(f4, 1, &a[2], a[3], a[4])->run();

  std::function<decltype(GlobalFunction5)> f5 =
      std::bind(&ClassB::MemberFunction5, &bbb, std::placeholders::_1,
                std::placeholders::_2, std::placeholders::_3,
                std::placeholders::_4, std::placeholders::_5);
  ailego::Closure::New(f5, 1, &a[2], a[3], 4, &a[5])->run();

  std::function<decltype(GlobalFunction6)> f6 = std::bind(
      &ClassB::MemberFunction6, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
      std::placeholders::_5, std::placeholders::_6);
  ailego::Closure::New(f6, 1, &a[2], a[3], a[4], &a[5], &a[6])->run();

  std::function<decltype(GlobalFunction7)> f7 = std::bind(
      &ClassB::MemberFunction7, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
      std::placeholders::_5, std::placeholders::_6, std::placeholders::_7);
  ailego::Closure::New(f7, a[1], &a[2], a[3], a[4], &a[5], &a[6], 7)->run();

  std::function<decltype(GlobalProcess0)> p0 =
      std::bind(&ClassB::MemberProcess0, &bbb);
  ailego::Closure::New(p0)->run();

  std::function<decltype(GlobalProcess1)> p1 =
      std::bind(&ClassB::MemberProcess1, &bbb, std::placeholders::_1);
  ailego::Closure::New(p1, 1)->run();

  std::function<decltype(GlobalProcess2)> p2 =
      std::bind(&ClassB::MemberProcess2, &bbb, std::placeholders::_1,
                std::placeholders::_2);
  ailego::Closure::New(p2, b[1], &b[2])->run();

  std::function<decltype(GlobalProcess3)> p3 =
      std::bind(&ClassB::MemberProcess3, &bbb, std::placeholders::_1,
                std::placeholders::_2, std::placeholders::_3);
  ailego::Closure::New(p3, b[1], &b[2], b[3])->run();

  std::function<decltype(GlobalProcess4)> p4 = std::bind(
      &ClassB::MemberProcess4, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
  ailego::Closure::New(p4, 1, &b[2], b[3], b[4])->run();

  std::function<decltype(GlobalProcess5)> p5 =
      std::bind(&ClassB::MemberProcess5, &bbb, std::placeholders::_1,
                std::placeholders::_2, std::placeholders::_3,
                std::placeholders::_4, std::placeholders::_5);
  ailego::Closure::New(p5, 1, &b[2], b[3], 4, &b[5])->run();

  std::function<decltype(GlobalProcess6)> p6 = std::bind(
      &ClassB::MemberProcess6, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
      std::placeholders::_5, std::placeholders::_6);
  ailego::Closure::New(p6, 1, &b[2], b[3], b[4], &b[5], &b[6])->run();

  std::function<decltype(GlobalProcess7)> p7 = std::bind(
      &ClassB::MemberProcess7, &bbb, std::placeholders::_1,
      std::placeholders::_2, std::placeholders::_3, std::placeholders::_4,
      std::placeholders::_5, std::placeholders::_6, std::placeholders::_7);
  ailego::Closure::New(p7, b[1], &b[2], b[3], b[4], &b[5], &b[6], 7)->run();
}

TEST(Closure, Lambda) {
  auto lambda0 = []() { return 0; };
  ailego::Closure::New(lambda0)->run();
  ailego::Closure::New([&]() { return 0; })->run();

  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  ClassB bbb(1);

  auto lambda1 = [&](long a1) { return bbb.MemberFunction1(a1); };
  ailego::Closure::New(lambda1, 1)->run();

  auto lambda2 = [&](long a1, long *a2) { return bbb.MemberFunction2(a1, a2); };
  ailego::Closure::New(lambda2, 1, &a[2])->run();

  auto lambda3 = [&](long a1, long *a2, long &a3) {
    return bbb.MemberFunction3(a1, a2, a3);
  };
  ailego::Closure::New(lambda3, 1, &a[2], a[3])->run();

  auto lambda4 = [&](long a1, long *a2, long &a3, const long &a4) {
    return bbb.MemberFunction4(a1, a2, a3, a4);
  };
  ailego::Closure::New(lambda4, a[1], &a[2], a[3], a[4])->run();

  auto lambda5 = [&](long a1, long *a2, long &a3, const long &a4,
                     volatile long *a5) {
    return bbb.MemberFunction5(a1, a2, a3, a4, a5);
  };
  ailego::Closure::New(lambda5, 1, &a[2], a[3], 4, &a[5])->run();

  auto lambda6 = [&](long a1, long *a2, long &a3, const long &a4,
                     volatile long *a5, long *const volatile a6) {
    return bbb.MemberFunction6(a1, a2, a3, a4, a5, a6);
  };
  ailego::Closure::New(lambda6, 1, &a[2], a[3], 4, &a[5], &a[6])->run();

  auto lambda7 = [&](long a1, long *a2, long &a3, const long &a4,
                     volatile long *a5, long *const volatile a6, long &&a7) {
    return bbb.MemberFunction7(a1, a2, a3, a4, a5, a6, std::move(a7));
  };
  ailego::Closure::New(lambda7, a[1], &a[2], a[3], a[4], &a[5], &a[6], 7)
      ->run();
}

TEST(Closure, Return) {
  long a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};

  size_t r = 0;
  ailego::Closure::New(&GlobalFunction0)->run(&r);
  EXPECT_EQ(0u, r);
  ailego::Closure::New(&GlobalFunction1, 1)->run(&r);
  EXPECT_EQ(1u, r);
  ailego::Closure::New(&GlobalFunction2, a[1], &a[2])->run(&r);
  EXPECT_EQ(2u, r);
  ailego::Closure::New(&GlobalFunction3, 1, &a[2], a[3])->run(&r);
  EXPECT_EQ(3u, r);
  ailego::Closure::New(&GlobalFunction4, a[1], &a[2], a[3], a[4])->run(&r);
  EXPECT_EQ(4u, r);
  ailego::Closure::New(&GlobalFunction5, 1, &a[2], a[3], 4, &a[5])->run(&r);
  EXPECT_EQ(5u, r);
  ailego::Closure::New(&GlobalFunction6, a[1], &a[2], a[3], a[4], &a[5], &a[6])
      ->run(&r);
  EXPECT_EQ(6u, r);
  ailego::Closure::New(&GlobalFunction7, a[1], &a[2], a[3], a[4], &a[5], &a[6],
                       7)
      ->run(&r);
  EXPECT_EQ(7u, r);

  ClassB bbb(1);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction0)->run(&r);
  EXPECT_EQ(0u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction1, 1)->run(&r);
  EXPECT_EQ(1u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction2, a[1], &a[2])->run(&r);
  EXPECT_EQ(2u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction3, 1, &a[2], a[3])->run(&r);
  EXPECT_EQ(3u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction4, a[1], &a[2], a[3], a[4])
      ->run(&r);
  EXPECT_EQ(4u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction5, 1, &a[2], a[3], 4, &a[5])
      ->run(&r);
  EXPECT_EQ(5u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction6, a[1], &a[2], a[3], a[4],
                       &a[5], &a[6])
      ->run(&r);
  EXPECT_EQ(6u, r);
  ailego::Closure::New(&bbb, &ClassB::MemberFunction7, a[1], &a[2], a[3], a[4],
                       &a[5], &a[6], 7)
      ->run(&r);
  EXPECT_EQ(7u, r);
}

struct LeftValue {
  LeftValue(void) {
    std::cout << "LeftValue(void)" << std::endl;
  }
  LeftValue(const LeftValue &) {
    ++count;
    std::cout << "LeftValue(const LeftValue &)" << std::endl;
  }
  LeftValue(LeftValue &&) {
    std::cout << "LeftValue(LeftValue &&)" << std::endl;
    EXPECT_TRUE(0);
  }
  static int count;
  int val = 1;
};

int LeftValue::count = 0;

struct RightValue {
  RightValue(void) {
    std::cout << "RightValue(void)" << std::endl;
  }
  RightValue(const RightValue &) {
    std::cout << "RightValue(const RightValue &)" << std::endl;
    EXPECT_TRUE(0);
  }
  RightValue(RightValue &&) {
    ++count;
    std::cout << "RightValue(RightValue &&)" << std::endl;
  }
  static int count;
  int val = 2;
};

int RightValue::count = 0;

struct TestLeftRight {
  static int Run(LeftValue &&, const RightValue &) {
    return 0;
  }
  static int RunLeft(LeftValue &&) {
    return 0;
  }
  static int RunRight(const RightValue &) {
    return 0;
  }
};

TEST(Closure, LeftRight) {
  LeftValue lval;
  RightValue rval;

  std::cout << "## Starting 1..." << std::endl;
  ailego::Closure::New(&TestLeftRight::RunLeft, lval)->run();
  EXPECT_EQ(1, LeftValue::count);

  std::cout << "## Starting 2..." << std::endl;
  ailego::Closure::New(&TestLeftRight::RunRight, RightValue())->run();
  EXPECT_EQ(1, RightValue::count);

  std::cout << "## Starting 3..." << std::endl;
  auto call = ailego::Closure::New(&TestLeftRight::Run, std::ref(lval),
                                   std::move(rval));
  (*call)();
  EXPECT_EQ(2, LeftValue::count);
  EXPECT_EQ(2, RightValue::count);
}

void NoinlineFunction(int *a) {
  ++(*a);
}

TEST(Closure, Benchmark) {
  const int count = 10000000;

  ailego::ElapsedTime stamp0;
  int num0 = 0;
  typedef void (*FUNC)(int *);
  volatile FUNC fn0 = NoinlineFunction;
  for (int i = 0; i < count; i++) {
    (*fn0)(&num0);
  }
  std::cout << "Noinline elapsed: " << stamp0.micro_seconds() << " us"
            << std::endl;
  EXPECT_EQ(count, num0);

  ailego::ElapsedTime stamp1;
  int num1 = 0;
  auto fn1 = ailego::Closure::New([](int *a) { ++(*a); }, &num1);
  for (int i = 0; i < count; i++) {
    fn1->run();
  }
  std::cout << "Closure elapsed: " << stamp1.micro_seconds() << " us"
            << std::endl;
  EXPECT_EQ(count, num1);

  ailego::ElapsedTime stamp2;
  int num2 = 0;
  auto fn2 = [](int *a) { ++(*a); };
  for (int i = 0; i < count; i++) {
    fn2(&num2);
  }
  std::cout << "Lambda elapsed: " << stamp2.micro_seconds() << " us"
            << std::endl;
  EXPECT_EQ(count, num2);

  ailego::ElapsedTime stamp3;
  int num3 = 0;
  std::function<void(int *)> fn3 = [](int *a) { ++(*a); };
  for (int i = 0; i < count; i++) {
    fn3(&num3);
  }
  std::cout << "Function elapsed: " << stamp3.micro_seconds() << " us"
            << std::endl;
  EXPECT_EQ(count, num3);
}


================================================
FILE: tests/ailego/pattern/factory_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/pattern/factory.h>

using namespace zvec;
using namespace zvec::ailego;

struct Base {
  virtual ~Base(void) {}
  virtual void do_something() = 0;
};

struct AAA : public Base {
  AAA(void) {}

  virtual void do_something() {
    printf("do something\n");
  }
};

AILEGO_FACTORY_REGISTER(AAA, Base, AAA);

TEST(Factory, General) {
  EXPECT_TRUE(!ailego::Factory<Base>::MakeShared("BBB"));
  EXPECT_TRUE(!ailego::Factory<Base>::Has("BBB"));

  auto aaa = ailego::Factory<Base>::MakeShared("AAA");
  ASSERT_TRUE(!!aaa);
  aaa->do_something();
  EXPECT_TRUE(!!ailego::Factory<Base>::Has("AAA"));

  auto vec = ailego::Factory<Base>::Classes();
  EXPECT_EQ(1u, vec.size());
  EXPECT_EQ("AAA", std::string(vec[0]));
}


================================================
FILE: tests/ailego/pattern/scope_guard_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/pattern/defer.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(ScopeGuard, Lambda) {
  int count = 0;
  auto a = ailego::ScopeGuard::Make(
      [](int val) { printf("ScopeGuard: Lambda %d\n", val); }, 1);

  auto b = ailego::ScopeGuard::Make([&] {
    printf("ScopeGuard: Lambda 2\n");
    ++count;
  });

  auto c = ailego::ScopeGuard::Make([] {
    printf("ScopeGuard: Lambda 3\n");
    return 0;
  });

  auto d = ailego::ScopeGuard::Make([&] {
    printf("ScopeGuard: Lambda 4\n");
    ++count;
    return false;
  });

  EXPECT_EQ(0, count);
}

struct ClassA {
  static void StaticProcess0(void) {
    printf("ScopeGuard: Static Function 1\n");
    ++count;
  }

  static int StaticProcess1(int val) {
    printf("ScopeGuard: Static Function %d\n", val);
    ++count;
    return 0;
  }

  static int count;
};

int ClassA::count{0};

TEST(ScopeGuard, StaticFunction) {
  auto a = ailego::ScopeGuard::Make(ClassA::StaticProcess0);
  auto b = ailego::ScopeGuard::Make(ClassA::StaticProcess1, 2);

  EXPECT_EQ(0, ClassA::count);
}

class ClassB {
 public:
  virtual void MemberProcess0(void) const {
    printf("ScopeGuard: Member Function 0\n");
    ++count;
  }

  virtual void MemberProcess1(int val) {
    printf("ScopeGuard: Member Function %d\n", val);
    ++count;
  }

  virtual void MemberProcess2(long val) const volatile {
    printf("ScopeGuard: Member Function %ld\n", val);
    ++count;
  }

  virtual void MemberProcess3(size_t val) volatile {
    printf("ScopeGuard: Member Function %zu\n", val);
    ++count;
  }

  static int count;
};

int ClassB::count{0};

TEST(ScopeGuard, MemberFunction) {
  ClassB bb;
  auto a = ailego::ScopeGuard::Make(&bb, &ClassB::MemberProcess0);
  auto b = ailego::ScopeGuard::Make(&bb, &ClassB::MemberProcess1, 2);
  AILEGO_DEFER(&bb, &ClassB::MemberProcess2, 3);
  AILEGO_DEFER(&bb, &ClassB::MemberProcess3, 4);
  EXPECT_EQ(0, ClassB::count);
}


================================================
FILE: tests/ailego/pattern/singleton_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/pattern/singleton.h>

using namespace zvec::ailego;

struct AAA {
  void run() {
    ++a;
  }
  uint32_t val() {
    return a;
  }
  std::atomic_uint a{0};
};

TEST(Singleton, General) {
  Singleton<int>::Instance() = 15;
  EXPECT_EQ(15, Singleton<int>::Instance());

  Singleton<double>::Instance() = 1.2;
  EXPECT_DOUBLE_EQ(1.2, Singleton<double>::Instance());

  ThreadPool pool1;
  for (int i = 0; i < 1000; ++i) {
    pool1.execute([] { Singleton<AAA>::Instance().run(); });
  }
  pool1.wait_finish();

  EXPECT_EQ(1000u, Singleton<AAA>::Instance().val());
}


================================================
FILE: tests/ailego/utility/bit_string_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <random>
#include <ailego/utility/bit_string_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(BitStringHelper, General) {
  size_t data_bits = 13;
  size_t data_num = 10;
  size_t buffer_size = (data_bits * data_num + 7) / 8;

  std::vector<uint8_t> buffer;
  buffer.reserve(buffer_size);

  uint8_t *buffer_data = buffer.data();

  ailego::BitStringWriter bsw(buffer_data, buffer_size);
  for (size_t m = 0; m < data_num; m++) {
    uint64_t data = m;

    EXPECT_EQ(bsw.write(data, data_bits), true);
  }

  uint64_t data_read = 0;
  ailego::BitStringReader bsr(buffer_data, buffer_size);
  for (size_t m = 0; m < data_num; m++) {
    EXPECT_EQ(bsr.read(data_read, data_bits), true);

    EXPECT_EQ(data_read, m);

    // std::cout << "m: " << m << ", data read: " << data_read << std::endl;
  }
}


================================================
FILE: tests/ailego/utility/bitset_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <random>
#include <ailego/utility/bitset_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(BitsetHelper, Benchmark) {
  std::mt19937 gen((std::random_device())());
  std::uniform_int_distribution<uint32_t> dist(0, 0xffffffff);
  const size_t batch_size = 1000;
  const size_t dimension = 1024;

  std::vector<uint32_t> vec;
  for (size_t i = 0; i < batch_size; ++i) {
    for (size_t j = 0; j < (dimension >> 5); ++j) {
      vec.push_back(dist(gen));
    }
  }

  ailego::ElapsedTime elapsed_time;
  size_t count = (dimension >> 5);
  size_t total = 0;
  std::cout << "# " << dimension << "d, " << batch_size << std::endl;

  elapsed_time.reset();
  for (size_t i = 0; i < batch_size; ++i) {
    total += ailego::BitsetHelper::Cardinality(&vec[i * count], count);
  }
  printf("* Cardinality (us): \t%zu\n", (size_t)elapsed_time.micro_seconds());
  printf("* Result: \t%zu\n", total);
}


================================================
FILE: tests/ailego/utility/dl_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>
#include <ailego/utility/dl_helper.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(DLHelper, General) {
  std::string no_exist = "no_exist_file";
  std::string error_msg;

  EXPECT_EQ(nullptr, ailego::DLHelper::Load(no_exist, nullptr));
  EXPECT_EQ(nullptr, ailego::DLHelper::Load(no_exist, &error_msg));
  EXPECT_TRUE(!error_msg.empty());
  printf("%s\n", error_msg.c_str());
  ailego::DLHelper::Unload(nullptr);

  EXPECT_EQ(nullptr, ailego::DLHelper::Symbol(nullptr, "test"));
}


================================================
FILE: tests/ailego/utility/float_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <random>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/float_helper.h>

using namespace zvec;

TEST(FloatHelper, General) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0f, 0.9f);
  std::uniform_int_distribution<int> dist2(1, 250);

  for (int i = 0; i < 1000; ++i) {
    float fp32 = dist(gen);
    float fp16 = ailego::FloatHelper::ToFP32(
        ailego::FloatHelper::ToFP16(fp32, 1.0f), 1.0f);
    EXPECT_GT(0.00025, std::abs(fp32 - fp16));
  }

  for (int i = 0; i < 1000; ++i) {
    std::vector<float> vec1_fp32, vec2_fp32;
    std::vector<ailego::Float16> vec1_fp16, vec2_fp16;
    int count = dist2(gen);

    vec1_fp32.resize(count);
    vec2_fp32.resize(count);
    vec1_fp16.resize(count);
    vec2_fp16.resize(count);
    for (size_t j = 0; j < vec1_fp32.size(); ++j) {
      vec1_fp32[j] = dist(gen);
    }
    float norm1;
    ailego::Norm2Matrix<float, 1>::Compute(vec1_fp32.data(), vec1_fp32.size(),
                                           &norm1);
    EXPECT_NE(1.0f, norm1);

    // Convert to FP16
    ailego::FloatHelper::ToFP16(vec1_fp32.data(), vec1_fp32.size(),
                                (uint16_t *)vec1_fp16.data());
    ailego::FloatHelper::ToFP16(vec1_fp32.data(), vec1_fp32.size(), norm1,
                                (uint16_t *)vec2_fp16.data());
    for (size_t j = 0; j < vec1_fp32.size(); ++j) {
      EXPECT_GT(0.00025, std::abs(vec1_fp32[j] - vec1_fp16[j]));
      // EXPECT_FLOAT_EQ(vec1_fp32[j], vec1_fp16[j]);
    }

    float norm2;
    ailego::Norm2Matrix<ailego::Float16, 1>::Compute(vec1_fp16.data(),
                                                     vec1_fp16.size(), &norm2);
    EXPECT_NE(1.0f, norm2);

    // Convert to FP32
    ailego::FloatHelper::ToFP32((const uint16_t *)vec1_fp16.data(),
                                vec1_fp16.size(), vec1_fp32.data());
    ailego::FloatHelper::ToFP32((const uint16_t *)vec1_fp16.data(),
                                vec1_fp16.size(), norm2, vec2_fp32.data());
    for (size_t j = 0; j < vec1_fp32.size(); ++j) {
      EXPECT_GT(0.00025, std::abs(vec1_fp32[j] - vec1_fp16[j]));
      // EXPECT_FLOAT_EQ(vec1_fp32[j], vec1_fp16[j]);
    }

    ailego::Norm2Matrix<float, 1>::Compute(vec2_fp32.data(), vec2_fp32.size(),
                                           &norm1);
    ailego::Norm2Matrix<ailego::Float16, 1>::Compute(vec2_fp16.data(),
                                                     vec2_fp16.size(), &norm2);
    // EXPECT_FLOAT_EQ(norm1, norm2);
    // EXPECT_FLOAT_EQ(1.0f, norm1);
    EXPECT_GT(0.001, std::abs(1.0f - norm1));
    // EXPECT_FLOAT_EQ(1.0f, norm2);
    EXPECT_GT(0.001, std::abs(1.0f - norm2));
  }
}

TEST(Float16, General) {
  ailego::Float16 a1;
  EXPECT_FLOAT_EQ(0.0f, a1);

  ailego::Float16 a2 = 0.33f;
  EXPECT_TRUE(0.0f < a2);
  EXPECT_TRUE(0.0f <= a2);
  EXPECT_TRUE(0.5f > a2);
  EXPECT_TRUE(0.5f >= a2);
  EXPECT_TRUE(0.0 < a2);
  EXPECT_TRUE(0.0 <= a2);
  EXPECT_TRUE(0.5 > a2);
  EXPECT_TRUE(0.5 >= a2);
  EXPECT_TRUE((float)a2 != 0.0);
  EXPECT_FALSE((float)a2 == 0.0);

  ailego::Float16 a3 = 0.55;
  EXPECT_TRUE((double)a3 != 0.0);
  EXPECT_FALSE((double)a3 == 0.0);

  EXPECT_TRUE(a1 < a2);
  EXPECT_TRUE(a2 <= a3);
  EXPECT_TRUE(a2 > a1);
  EXPECT_TRUE(a3 >= a1);

  ailego::Float16 a4 = a2 + a3;
  ailego::Float16 a5 = a2 - a3;
  ailego::Float16 a6 = a2 * a3;
  ailego::Float16 a7 = a2 / a3;

  a4 *= 1.0;
  a5 /= 1.0;
  a6 -= 0.0;
  a7 += 0.0;

  EXPECT_TRUE(0.0f != a4);
  EXPECT_TRUE(0.0f != a5);
  EXPECT_TRUE(0.0f != a6);
  EXPECT_TRUE(0.0f != a7);

  ailego::Float16 one = 1.0;
  uint16_t *one_encoded = (uint16_t *)(&one);
  printf("One: %f, 0x%x\n", (float)one, *one_encoded);
}


================================================
FILE: tests/ailego/utility/matrix_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include <ailego/utility/matrix_helper.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(MatrixHelper, Transpose) {
  std::mt19937 gen((std::random_device())());

  std::vector<float> result1(31 * 7);
  std::vector<float> result2(31 * 7);
  std::vector<float> result3(31 * 7);

  std::uniform_real_distribution<float> dist(0.0, 1.0);
  for (size_t i = 0; i < 31 * 7; ++i) {
    result1[i] = dist(gen);
  }

  ailego::MatrixHelper::Transpose<float, 31>(result1.data(), 7, result2.data());
  ailego::MatrixHelper::ReverseTranspose<float, 31>(result2.data(), 7,
                                                    result3.data());
  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),
                      result1.size() * sizeof(float)));

  ailego::MatrixHelper::Transpose<float, 7>(result1.data(), 31, result2.data());
  ailego::MatrixHelper::ReverseTranspose<float, 7>(result2.data(), 31,
                                                   result3.data());
  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),
                      result1.size() * sizeof(float)));

  ailego::MatrixHelper::Transpose<float>(result1.data(), 31, 7, result2.data());
  ailego::MatrixHelper::ReverseTranspose<float>(result2.data(), 31, 7,
                                                result3.data());
  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),
                      result1.size() * sizeof(float)));

  ailego::MatrixHelper::Transpose<float>(result1.data(), 7, 31, result2.data());
  ailego::MatrixHelper::ReverseTranspose<float>(result2.data(), 7, 31,
                                                result3.data());
  EXPECT_EQ(0, memcmp(result1.data(), result3.data(),
                      result1.size() * sizeof(float)));
}


================================================
FILE: tests/ailego/utility/memory_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <ailego/utility/memory_helper.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(MemoryHelper, General) {
  size_t vsz, rss;
  EXPECT_TRUE(ailego::MemoryHelper::SelfUsage(&vsz, &rss));

  std::cout << "Page Size: " << ailego::MemoryHelper::PageSize() << std::endl;
  std::cout << "Usage: VSZ=" << vsz << ", RSS=" << rss << std::endl;
  std::cout << "RSS: " << ailego::MemoryHelper::SelfRSS() << std::endl;
  std::cout << "Peak RSS: " << ailego::MemoryHelper::SelfPeakRSS() << std::endl;
  std::cout << "Total RAM Size: " << ailego::MemoryHelper::TotalRamSize()
            << std::endl;
  std::cout << "Available RAM Size: "
            << ailego::MemoryHelper::AvailableRamSize() << std::endl;
  std::cout << "Used RAM Size: " << ailego::MemoryHelper::UsedRamSize()
            << std::endl;
  std::cout << "Total RAM Size in Container: "
            << ailego::MemoryHelper::ContainerAwareTotalRamSize() << std::endl;
}


================================================
FILE: tests/ailego/utility/string_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <limits>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/string_helper.h>

using namespace zvec;

TEST(StringHelper, Split) {
  std::vector<std::string> out;

  ailego::StringHelper::Split("", ",", &out);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("", out[0]);

  ailego::StringHelper::Split("", ';', &out);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("", out[0]);

  ailego::StringHelper::Split("Hello, world!", "", &out);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("Hello, world!", out[0]);

  ailego::StringHelper::Split("Hello, world!", '!', &out);
  EXPECT_EQ(2u, out.size());
  EXPECT_EQ("Hello, world", out[0]);
  EXPECT_EQ("", out[1]);

  ailego::StringHelper::Split("abxycdxyxydefxya", "xyz", &out);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("abxycdxyxydefxya", out[0]);

  ailego::StringHelper::Split("abxycdxyxydefxya", 'a', &out);
  EXPECT_EQ(3u, out.size());
  EXPECT_EQ("", out[0]);
  EXPECT_EQ("bxycdxyxydefxy", out[1]);
  EXPECT_EQ("", out[2]);

  ailego::StringHelper::Split("abxycdxy!!xydefxya", "xy", &out);
  EXPECT_EQ(5u, out.size());
  EXPECT_EQ("ab", out[0]);
  EXPECT_EQ("cd", out[1]);
  EXPECT_EQ("!!", out[2]);
  EXPECT_EQ("def", out[3]);
  EXPECT_EQ("a", out[4]);

  ailego::StringHelper::Split("abxycdxy!!xydefxya", '!', &out);
  EXPECT_EQ(3u, out.size());
  EXPECT_EQ("abxycdxy", out[0]);
  EXPECT_EQ("", out[1]);
  EXPECT_EQ("xydefxya", out[2]);

  ailego::StringHelper::Split("abxycdxyxydefxya", "xy", &out);
  EXPECT_EQ(5u, out.size());
  EXPECT_EQ("ab", out[0]);
  EXPECT_EQ("cd", out[1]);
  EXPECT_EQ("", out[2]);
  EXPECT_EQ("def", out[3]);
  EXPECT_EQ("a", out[4]);

  ailego::StringHelper::Split("abxycdxyxydefxya", 'y', &out);
  EXPECT_EQ(5u, out.size());
  EXPECT_EQ("abx", out[0]);
  EXPECT_EQ("cdx", out[1]);
  EXPECT_EQ("x", out[2]);
  EXPECT_EQ("defx", out[3]);
  EXPECT_EQ("a", out[4]);

  ailego::StringHelper::Split("abxycdxyxydefxy", "xy", &out);
  EXPECT_EQ(5u, out.size());
  EXPECT_EQ("ab", out[0]);
  EXPECT_EQ("cd", out[1]);
  EXPECT_EQ("", out[2]);
  EXPECT_EQ("def", out[3]);
  EXPECT_EQ("", out[4]);

  ailego::StringHelper::Split("abxycdxyxydefxy", 'y', &out);
  EXPECT_EQ(5u, out.size());
  EXPECT_EQ("abx", out[0]);
  EXPECT_EQ("cdx", out[1]);
  EXPECT_EQ("x", out[2]);
  EXPECT_EQ("defx", out[3]);
  EXPECT_EQ("", out[4]);

  ailego::StringHelper::Split("xy", "xy", &out);
  EXPECT_EQ(2u, out.size());
  EXPECT_EQ("", out[0]);
  EXPECT_EQ("", out[1]);

  ailego::StringHelper::Split("x", 'x', &out);
  EXPECT_EQ(2u, out.size());
  EXPECT_EQ("", out[0]);
  EXPECT_EQ("", out[1]);
}

TEST(StringHelper, SplitFloat) {
  std::vector<float> out1;
  ailego::StringHelper::Split("1.0, tt, 2,", ',', &out1);
  EXPECT_EQ(4u, out1.size());
  EXPECT_FLOAT_EQ(1.0f, out1[0]);
  EXPECT_FLOAT_EQ(0.0f, out1[1]);
  EXPECT_FLOAT_EQ(2.0f, out1[2]);
  EXPECT_FLOAT_EQ(0.0f, out1[3]);

  std::vector<double> out2;
  ailego::StringHelper::Split("1.0, tt, 2,", ',', &out2);
  EXPECT_EQ(4u, out2.size());
  EXPECT_DOUBLE_EQ(1.0f, out2[0]);
  EXPECT_DOUBLE_EQ(0.0f, out2[1]);
  EXPECT_DOUBLE_EQ(2.0f, out2[2]);
  EXPECT_DOUBLE_EQ(0.0f, out2[3]);
}

TEST(StringHelper, SplitInteger) {
  std::vector<int32_t> out1;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out1);
  EXPECT_EQ(4u, out1.size());
  EXPECT_EQ(-1, out1[0]);
  EXPECT_EQ(0, out1[1]);
  EXPECT_EQ(2, out1[2]);
  EXPECT_EQ(0, out1[3]);

  std::vector<uint32_t> out2;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out2);
  EXPECT_EQ(4u, out2.size());
  EXPECT_EQ(0xffffffffu, out2[0]);
  EXPECT_EQ(0u, out2[1]);
  EXPECT_EQ(2u, out2[2]);
  EXPECT_EQ(0u, out2[3]);

  std::vector<int64_t> out3;
  ailego::StringHelper::Split("-1.0, tt, 2.3,", ',', &out3);
  EXPECT_EQ(4u, out3.size());
  EXPECT_EQ(-1, out3[0]);
  EXPECT_EQ(0, out3[1]);
  EXPECT_EQ(2, out3[2]);
  EXPECT_EQ(0, out3[3]);

  std::vector<uint64_t> out4;
  ailego::StringHelper::Split("-1.0, tt, 2.3,", ',', &out4);
  EXPECT_EQ(4u, out4.size());
  EXPECT_EQ((uint64_t)-1, out4[0]);
  EXPECT_EQ(0u, out4[1]);
  EXPECT_EQ(2u, out4[2]);
  EXPECT_EQ(0u, out4[3]);

  std::vector<int8_t> out5;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out5);
  EXPECT_EQ(4u, out5.size());
  EXPECT_EQ(-1, out5[0]);
  EXPECT_EQ(0, out5[1]);
  EXPECT_EQ(2, out5[2]);
  EXPECT_EQ(0, out5[3]);

  std::vector<uint8_t> out6;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out6);
  EXPECT_EQ(4u, out6.size());
  EXPECT_EQ(255u, out6[0]);
  EXPECT_EQ(0u, out6[1]);
  EXPECT_EQ(2u, out6[2]);
  EXPECT_EQ(0u, out6[3]);

  std::vector<int16_t> out7;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out7);
  EXPECT_EQ(4u, out7.size());
  EXPECT_EQ(-1, out7[0]);
  EXPECT_EQ(0, out7[1]);
  EXPECT_EQ(2, out7[2]);
  EXPECT_EQ(0, out7[3]);

  std::vector<uint16_t> out8;
  ailego::StringHelper::Split("-1.0, tt, 2,", ',', &out8);
  EXPECT_EQ(4u, out8.size());
  EXPECT_EQ(65535u, out8[0]);
  EXPECT_EQ(0u, out8[1]);
  EXPECT_EQ(2u, out8[2]);
  EXPECT_EQ(0u, out8[3]);
}

TEST(StringHelper, SplitWithTValidDelimeter) {
  std::vector<int32_t> out1;
  ailego::StringHelper::Split("12321", '2', &out1);
  EXPECT_EQ(3u, out1.size());
  EXPECT_EQ(1, out1[0]);
  EXPECT_EQ(3, out1[1]);
  EXPECT_EQ(1, out1[2]);

  std::vector<double> out2;
  ailego::StringHelper::Split("300e30e3", 'e', &out2);
  EXPECT_EQ(3u, out2.size());
  EXPECT_DOUBLE_EQ(300.0f, out2[0]);
  EXPECT_DOUBLE_EQ(30.0f, out2[1]);
  EXPECT_DOUBLE_EQ(3.0f, out2[2]);
}

TEST(StringHelper, SplitByString) {
  std::string sep = ",";
  std::vector<int32_t> out1;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out1);
  EXPECT_EQ(4u, out1.size());
  EXPECT_EQ(-1, out1[0]);
  EXPECT_EQ(0, out1[1]);
  EXPECT_EQ(2, out1[2]);
  EXPECT_EQ(0, out1[3]);

  std::vector<uint32_t> out2;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out2);
  EXPECT_EQ(4u, out2.size());
  EXPECT_EQ(0xffffffffu, out2[0]);
  EXPECT_EQ(0u, out2[1]);
  EXPECT_EQ(2u, out2[2]);
  EXPECT_EQ(0u, out2[3]);

  std::vector<int64_t> out3;
  ailego::StringHelper::Split("-1.0, tt, 2.3,", sep, &out3);
  EXPECT_EQ(4u, out3.size());
  EXPECT_EQ(-1, out3[0]);
  EXPECT_EQ(0, out3[1]);
  EXPECT_EQ(2, out3[2]);
  EXPECT_EQ(0, out3[3]);

  std::vector<uint64_t> out4;
  ailego::StringHelper::Split("-1.0, tt, 2.3,", sep, &out4);
  EXPECT_EQ(4u, out4.size());
  EXPECT_EQ((uint64_t)-1, out4[0]);
  EXPECT_EQ(0u, out4[1]);
  EXPECT_EQ(2u, out4[2]);
  EXPECT_EQ(0u, out4[3]);

  std::vector<int8_t> out5;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out5);
  EXPECT_EQ(4u, out5.size());
  EXPECT_EQ(-1, out5[0]);
  EXPECT_EQ(0, out5[1]);
  EXPECT_EQ(2, out5[2]);
  EXPECT_EQ(0, out5[3]);

  std::vector<uint8_t> out6;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out6);
  EXPECT_EQ(4u, out6.size());
  EXPECT_EQ(255u, out6[0]);
  EXPECT_EQ(0u, out6[1]);
  EXPECT_EQ(2u, out6[2]);
  EXPECT_EQ(0u, out6[3]);

  std::vector<int16_t> out7;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out7);
  EXPECT_EQ(4u, out7.size());
  EXPECT_EQ(-1, out7[0]);
  EXPECT_EQ(0, out7[1]);
  EXPECT_EQ(2, out7[2]);
  EXPECT_EQ(0, out7[3]);

  std::vector<uint16_t> out8;
  ailego::StringHelper::Split("-1.0, tt, 2,", sep, &out8);
  EXPECT_EQ(4u, out8.size());
  EXPECT_EQ(65535u, out8[0]);
  EXPECT_EQ(0u, out8[1]);
  EXPECT_EQ(2u, out8[2]);
  EXPECT_EQ(0u, out8[3]);
}

TEST(StringHelper, Trim) {
  std::string aaa = "  \t123 45 67\t\n8\r \n";
  EXPECT_EQ("123 45 67\t\n8\r \n", ailego::StringHelper::CopyLeftTrim(aaa));
  EXPECT_EQ("  \t123 45 67\t\n8", ailego::StringHelper::CopyRightTrim(aaa));
  EXPECT_EQ("123 45 67\t\n8", ailego::StringHelper::CopyTrim(aaa));

  std::string bbb = "  \t123 45 67\t\n8\r \n";
  ailego::StringHelper::LeftTrim(bbb);
  EXPECT_EQ("123 45 67\t\n8\r \n", bbb);

  std::string ccc = "  \t123 45 67\t\n8\r \n";
  ailego::StringHelper::RightTrim(ccc);
  EXPECT_EQ("  \t123 45 67\t\n8", ccc);

  std::string ddd = "  \t123 45 67\t\n8\r \n";
  ailego::StringHelper::Trim(ddd);
  EXPECT_EQ("123 45 67\t\n8", ddd);
}

TEST(StringHelper, CompareIgnoreCase) {
  {
    std::string a = "a b\tc\nd";
    std::string b = "A B\tC\nd";
    EXPECT_TRUE(ailego::StringHelper::CompareIgnoreCase(a, b));
  }
  {
    std::string a = "a d\tc\nd";
    std::string b = "A B\tC\nd";
    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));
  }
  {
    std::string a = "a d\tc\n";
    std::string b = "A B\tC\nd";
    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));
  }
  {
    std::string a = "A D\tc\n123456";
    std::string b = "A d\tC\n123456";
    EXPECT_TRUE(ailego::StringHelper::CompareIgnoreCase(a, b));
  }
  {
    std::string a = "A D\tc\n123456";
    std::string b = "";
    EXPECT_FALSE(ailego::StringHelper::CompareIgnoreCase(a, b));
  }
}

namespace zvec::ailego {
namespace testing {

TEST(StringHelperJoinAppend, Integer) {
  short a = -1;
  unsigned short b = 2;
  long c = -3;
  unsigned long d = 4;
  long long e = -5;
  unsigned long long f = 6;
  ssize_t g = -7;
  size_t h = 8;
  auto res = StringHelper::Concat(a, b, c, d, e, f, g, h);
  EXPECT_EQ(res, "-12-34-56-78");
  std::string str = "TEST";
  StringHelper::Append(&str, a, b, c, d, e, f, g, h);
  EXPECT_EQ(str, "TEST-12-34-56-78");
}

TEST(StringHelperJoinAppend, SizedInteger) {
  int8_t a = -1;
  uint8_t b = 2;
  int16_t c = -3;
  uint16_t d = 4;
  int32_t e = -5;
  uint32_t f = 6;
  int64_t g = -7;
  uint64_t h = 8;
  EXPECT_EQ("-12", StringHelper::Concat(a, b));
  EXPECT_EQ("-12-3", StringHelper::Concat(a, b, c));
  EXPECT_EQ("4-5", StringHelper::Concat(d, e));
  EXPECT_EQ("-78", StringHelper::Concat(g, h));

  auto res = StringHelper::Concat(a, b, c, d, e, f, g, h);
  EXPECT_EQ(res, "-12-34-56-78");
  std::string str = "TEST";
  StringHelper::Append(&str, a, b, c, d, e, f, g, h);
  EXPECT_EQ(str, "TEST-12-34-56-78");
}

TEST(StringHelperJoinAppend, MinMax) {
  auto a = StringHelper::Concat(
      std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max(),
      std::numeric_limits<int16_t>::min(), std::numeric_limits<int16_t>::max(),
      std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max(),
      std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max());
  EXPECT_EQ(a,
            "-128127-3276832767-21474836482147483647-"
            "92233720368547758089223372036854775807");
}

TEST(StringHelperJoinAppend, Float) {
  float f = 3.14f;
  double d = 6.28;
  long double ld = 9.42;
  auto a = StringHelper::Concat(
      f, d, ld, NAN, INFINITY, std::numeric_limits<float>::min(),
      std::numeric_limits<float>::max(), std::numeric_limits<double>::min(),
      std::numeric_limits<double>::max());
  EXPECT_EQ(a,
            "3.146.289.42naninf1.17549e-383.40282e+382.22507e-3081.79769e+308");
}

TEST(StringHelperJoinAppend, Enums) {
  enum { kOne = 1, kTen = 10 };
  enum class A : int64_t { kFirst = 100, kLast = 10000 };
  auto a = StringHelper::Concat(kOne, kTen, A::kFirst, A::kLast);
  EXPECT_EQ(a, "11010010000");
}

TEST(StringHelperJoinAppend, String) {
  auto a = StringHelper::Concat("a", std::string{"b"}, "c", std::string{"d"});
  EXPECT_EQ(a, "abcd");
  auto b = StringHelper::Concat("aaaa", std::string{"bbbb"}, "cccc",
                                std::string{"dddd"});
  EXPECT_EQ(b, "aaaabbbbccccdddd");
  auto c = StringHelper::Concat("aaaaaaaa", std::string{"bbbbbbbb"}, "cccccccc",
                                std::string{"dddddddd"});
  EXPECT_EQ(c, "aaaaaaaabbbbbbbbccccccccdddddddd");
}

TEST(StringHelperJoinAppend, ArbitaryNumberOfArguments) {
  auto a = StringHelper::Concat(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c",
                                "d", "e", "f", "g", "h", "i", "j", "k", "l",
                                "m", "n", "o", "p", "q", "r", "s", "t", "u",
                                "v", "w", "x", "y", "z");
  EXPECT_EQ(a, "0123456789abcdefghijklmnopqrstuvwxyz");

  std::string str = "TEST";
  StringHelper::Append(&str, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d",
                       "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
                       "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z");
  EXPECT_EQ(str, "TEST0123456789abcdefghijklmnopqrstuvwxyz");
}

TEST(StringHelperJoinAppend, Empty) {
  for (const char *t :
       {"", "short string", "a very very very very very long string"}) {
    EXPECT_EQ(t, StringHelper::Concat(t));
    EXPECT_EQ(t, StringHelper::Concat(t, ""));
    EXPECT_EQ(t, StringHelper::Concat(t, "", ""));
    EXPECT_EQ(t, StringHelper::Concat(t, "", "", ""));
    EXPECT_EQ(t, StringHelper::Concat(t, "", "", "", ""));
    EXPECT_EQ(t, StringHelper::Concat(t, "", std::string{}, "", "", ""));
    EXPECT_EQ(t, StringHelper::Concat(t, "", std::string{}, "", std::string{},
                                      "", ""));

    std::string str = t;
    StringHelper::Append(&str);
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "");
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "", "");
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "", "", "");
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "", "", "", "");
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "", std::string{}, "", "", "");
    EXPECT_EQ(str, t);
    StringHelper::Append(&str, "", std::string{}, "", std::string{}, "", "");
    EXPECT_EQ(str, t);
  }
}

TEST(StringHelperJoinAppend, StringView) {
  StringView v1 = "hello";
  StringView v2 = v1;
  StringView v3 = nullptr;
  std::string foo = "foo";
  StringView v4 = foo;
  StringView v5 = "bar";
  StringView v6{v1.data() + 2, 2};
  auto s = StringHelper::Concat(v1, v2, v3, v4, v5, v6);
  EXPECT_EQ(s, "hellohellofoobarll");
}

TEST(StringHelper, SplitWithEmptySkipped) {
  std::vector<std::string> out;

  ailego::StringHelper::Split("", ",", &out, true);
  EXPECT_EQ(0u, out.size());

  ailego::StringHelper::Split(";1;", ';', &out, true);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("1", out[0]);

  ailego::StringHelper::Split(";;;", ";", &out, true);
  EXPECT_EQ(0u, out.size());

  ailego::StringHelper::Split(";;;1", ';', &out, true);
  EXPECT_EQ(1u, out.size());
  EXPECT_EQ("1", out[0]);
}

}  // namespace testing
}  // namespace zvec::ailego


================================================
FILE: tests/ailego/utility/time_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <chrono>
#include <thread>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/time_helper.h>

using namespace zvec;

TEST(TimeHelper, Monotime) {
  std::cout << "NanoSeconds: " << ailego::Monotime::NanoSeconds() << std::endl;
  std::cout << "MicroSeconds: " << ailego::Monotime::MicroSeconds()
            << std::endl;
  std::cout << "MilliSeconds: " << ailego::Monotime::MilliSeconds()
            << std::endl;
  std::cout << "Seconds: " << ailego::Monotime::Seconds() << std::endl;
}

TEST(TimeHelper, Realtime) {
  std::cout << "NanoSeconds: " << ailego::Realtime::NanoSeconds() << std::endl;
  std::cout << "MicroSeconds: " << ailego::Realtime::MicroSeconds()
            << std::endl;
  std::cout << "MilliSeconds: " << ailego::Realtime::MilliSeconds()
            << std::endl;
  std::cout << "Seconds: " << ailego::Realtime::Seconds() << std::endl;

  uint64_t now = ailego::Realtime::Seconds();
  std::cout << "Localtime: " << ailego::Realtime::Localtime(now) << std::endl;
  std::cout << "Gmtime: " << ailego::Realtime::Gmtime(now) << std::endl;
  std::cout << "Localtime: " << ailego::Realtime::Localtime() << std::endl;
  std::cout << "Gmtime: " << ailego::Realtime::Gmtime() << std::endl;
}

TEST(TimeHelper, ElapsedTime) {
  ailego::ElapsedTime stamp;
  std::cout << "elapsed: " << stamp.nano_seconds() << " ns" << std::endl;
  std::cout << "elapsed: " << stamp.micro_seconds() << " us" << std::endl;
  std::cout << "elapsed: " << stamp.milli_seconds() << " ms" << std::endl;
  std::cout << "elapsed: " << stamp.seconds() << " s" << std::endl;
  std::this_thread::sleep_for(std::chrono::milliseconds(101));

  stamp.reset();
  std::cout << "elapsed: " << stamp.nano_seconds() << " ns" << std::endl;
  std::cout << "elapsed: " << stamp.micro_seconds() << " us" << std::endl;
  std::cout << "elapsed: " << stamp.milli_seconds() << " ms" << std::endl;
  std::cout << "elapsed: " << stamp.seconds() << " s" << std::endl;
  std::this_thread::sleep_for(std::chrono::milliseconds(101));

  stamp.reset();
  std::cout << "elapsed: " << stamp.nano_seconds() << " ns" << std::endl;
  std::cout << "elapsed: " << stamp.micro_seconds() << " us" << std::endl;
  std::cout << "elapsed: " << stamp.milli_seconds() << " ms" << std::endl;
  std::cout << "elapsed: " << stamp.seconds() << " s" << std::endl;
  std::this_thread::sleep_for(std::chrono::milliseconds(101));

  stamp.reset();
  std::cout << "elapsed: " << stamp.nano_seconds() << " ns" << std::endl;
  std::cout << "elapsed: " << stamp.micro_seconds() << " us" << std::endl;
  std::cout << "elapsed: " << stamp.milli_seconds() << " ms" << std::endl;
  std::cout << "elapsed: " << stamp.seconds() << " s" << std::endl;
  std::this_thread::sleep_for(std::chrono::milliseconds(101));

  std::cout << "elapsed: " << stamp.nano_seconds() << " ns" << std::endl;
  std::cout << "elapsed: " << stamp.micro_seconds() << " us" << std::endl;
  std::cout << "elapsed: " << stamp.milli_seconds() << " ms" << std::endl;
  std::cout << "elapsed: " << stamp.seconds() << " s" << std::endl;
}


================================================
FILE: tests/ailego/utility/type_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/ailego/utility/type_helper.h>

using namespace zvec;

TEST(TypeHelper, IsArithmetic) {
  EXPECT_TRUE(ailego::IsArithmetic<uintptr_t>::value);
  EXPECT_TRUE(ailego::IsArithmetic<int>::value);
  EXPECT_TRUE(ailego::IsArithmetic<double>::value);
  EXPECT_TRUE(ailego::IsArithmetic<float>::value);
  EXPECT_TRUE(ailego::IsArithmetic<ailego::Float16>::value);
  EXPECT_FALSE(ailego::IsArithmetic<void>::value);
}

TEST(TypeHelper, IsFloatingPoint) {
  EXPECT_FALSE(ailego::IsFloatingPoint<long>::value);
  EXPECT_FALSE(ailego::IsFloatingPoint<int>::value);
  EXPECT_TRUE(ailego::IsFloatingPoint<double>::value);
  EXPECT_TRUE(ailego::IsFloatingPoint<float>::value);
  EXPECT_TRUE(ailego::IsFloatingPoint<ailego::Float16>::value);
  EXPECT_FALSE(ailego::IsFloatingPoint<void>::value);
}

template <typename... TArgs,
          typename = typename std::enable_if<
              ailego::Conjunction<std::is_integral<TArgs>...>::value>::type>
static bool TrueAnd(TArgs...) {
  return true;
}

template <typename... TArgs,
          typename = typename std::enable_if<
              !ailego::Conjunction<std::is_integral<TArgs>...>::value>::type>
static bool FalseAnd(TArgs...) {
  return false;
}

template <typename... TArgs,
          typename = typename std::enable_if<
              ailego::Disjunction<std::is_integral<TArgs>...>::value>::type>
static bool TrueOr(TArgs...) {
  return true;
}

template <typename... TArgs,
          typename = typename std::enable_if<
              !ailego::Disjunction<std::is_integral<TArgs>...>::value>::type>
static bool FalseOr(TArgs...) {
  return false;
}

TEST(TypeHelper, Conjunction) {
  EXPECT_TRUE(TrueAnd(1, 2, 2u, 0u));
  EXPECT_FALSE(FalseAnd(1, 2, 2u, ""));
  EXPECT_FALSE(FalseAnd(1, 2, 2u, 0.0));
}

TEST(TypeHelper, Disjunction) {
  EXPECT_TRUE(TrueOr(1, 2, 2u, ""));
  EXPECT_TRUE(TrueOr(0.0, "", 0u));
  EXPECT_FALSE(FalseOr("", ""));
  EXPECT_FALSE(FalseOr(0.0, ""));
}

struct TriviallyStruct {
  float a;
  uint32_t b;
};

TEST(TypeHelper, IsTriviallyCopyable) {
  EXPECT_TRUE(ailego::IsTriviallyCopyable<ailego::Float16>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<float>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<float>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<uint64_t>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<uint64_t *>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<void *>::value);
  // EXPECT_FALSE(ailego::IsTriviallyCopyable<uint64_t &>::value);
  EXPECT_TRUE(ailego::IsTriviallyCopyable<TriviallyStruct>::value);
}

TEST(TypeHelper, IsInvocable) {
  EXPECT_TRUE(ailego::IsInvocable<int()>::value);

  EXPECT_TRUE(!!(ailego::IsInvocableWithResult<int, int()>::value));
  EXPECT_TRUE(!!(ailego::IsInvocableWithResult<void, void(int), int>::value));
}

static_assert(ailego::IsInvocable<int()>::value, "");
static_assert(ailego::IsInvocableWithResult<int, int()>::value, "");
static_assert(ailego::IsInvocableWithResult<void, void(int), int>::value, "");


================================================
FILE: tests/ailego/version_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <ailego/version.h>
#include <gtest/gtest.h>

using namespace zvec;

TEST(Version, General) {
  printf("Version: %s\n\n", ailego::Version::String());
  printf("%s", ailego::Version::Details());
}


================================================
FILE: tests/core/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_directories(algorithm)
cc_directories(framework)
cc_directories(metric)
cc_directories(utility)
cc_directories(interface)
cc_directories(quantizer)

================================================
FILE: tests/core/algorithm/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_directories(cluster)
cc_directories(flat)
cc_directories(flat_sparse)
cc_directories(ivf)
cc_directories(hnsw)
cc_directories(hnsw_sparse)
if(RABITQ_SUPPORTED)
cc_directories(hnsw_rabitq)
endif()


================================================
FILE: tests/core/algorithm/cluster/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_knn_cluster
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    )
endforeach()

================================================
FILE: tests/core/algorithm/cluster/kmeans_cluster_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/params.h>
#include "zvec/core/framework/index_framework.h"
#include "zvec/core/framework/index_meta.h"

using namespace zvec::core;
using namespace zvec::ailego;

TEST(KmeansCluster, General) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 33u;

  IndexMeta index_meta;
  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);
  index_meta.set_metric("SquaredEuclidean", 0, zvec::ailego::Params());

  std::shared_ptr<CompactIndexFeatures> features(
      new CompactIndexFeatures(index_meta));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 5.0);

  for (uint32_t i = 0; i < count; ++i) {
    std::vector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }
    features->emplace(vec.data());
  }

  // Create a Kmeans cluster
  // IndexCluster::Pointer cluster = std::make_shared<KmeansCluster>();
  IndexCluster::Pointer cluster = IndexFactory::CreateCluster("KmeansCluster");
  ASSERT_TRUE(!!cluster);

  zvec::ailego::Params params;
  params.set("proxima.general.cluster.count", 1);
  params.set("proxima.kmeans.cluster.count", 56);

  ASSERT_EQ(0, cluster->init(index_meta, params));
  ASSERT_EQ(0, cluster->mount(features));
  cluster->suggest(64u);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  std::cout << "---------- FIRST ----------\n";
  std::vector<IndexCluster::Centroid> centroids;
  std::vector<uint32_t> labels;
  ASSERT_NE(0, cluster->classify(threads, centroids));
  ASSERT_NE(0, cluster->label(threads, centroids, &labels));
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- SECOND ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- THIRD ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  ASSERT_EQ(0, cluster->classify(threads, centroids));
  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));
}


================================================
FILE: tests/core/algorithm/cluster/opt_kmeans_cluster_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cmath>
#include <random>
#include <ailego/algorithm/kmeans.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/params.h>
#include "zvec/core/framework/index_framework.h"

using namespace zvec::core;
using namespace zvec::ailego;
using namespace zvec::ailego;

TEST(OptKmeansCluster, General) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 33u;

  IndexMeta index_meta;
  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);

  std::shared_ptr<CompactIndexFeatures> features(
      new CompactIndexFeatures(index_meta));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 5.0);

  for (uint32_t i = 0; i < count; ++i) {
    std::vector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }
    features->emplace(vec.data());
  }

  // Create a Kmeans cluster
  IndexCluster::Pointer cluster =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster);

  Params params;
  params.set("proxima.general.cluster.count", 1);
  params.set("proxima.optkmeans.cluster.count", 56);

  ASSERT_EQ(0, cluster->init(index_meta, params));
  ASSERT_EQ(0, cluster->mount(features));
  cluster->suggest(64u);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  std::cout << "---------- FIRST ----------\n";
  std::vector<IndexCluster::Centroid> centroids;
  std::vector<uint32_t> labels;
  ASSERT_NE(0, cluster->classify(threads, centroids));
  ASSERT_NE(0, cluster->label(threads, centroids, &labels));
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- SECOND ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- THIRD ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  ASSERT_EQ(0, cluster->classify(threads, centroids));
  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));
}

// TEST(OptKmeansCluster, NoEmptyCentroids) {
//   // Prepare index data
//   const uint32_t count = 500u;
//   const uint32_t dimension = 8u;

//   IndexMeta index_meta;
//   index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);
//   index_meta.set_metric("SquaredEuclidean", 0, Params());

//   std::shared_ptr<CompactIndexFeatures> features(
//       new CompactIndexFeatures(index_meta));

//   std::random_device rd;
//   std::mt19937 gen(rd());
//   std::uniform_real_distribution<float> dist(0.0, 5.0);

//   for (uint32_t i = 0; i < count; ++i) {
//     std::vector<float> vec(dimension);
//     for (size_t j = 0; j < dimension; ++j) {
//       vec[j] = dist(gen);
//     }
//     features->emplace(vec.data());
//   }

//   // Create a Kmeans cluster
//   IndexCluster::Pointer cluster =
//       IndexFactory::CreateCluster("OptKmeansCluster");
//   ASSERT_TRUE(!!cluster);

//   Params params;
//   ASSERT_EQ(0, cluster->init(index_meta, params));
//   ASSERT_EQ(0, cluster->mount(features));
//   cluster->suggest(20u);

//   auto threads = std::make_shared<SingleQueueIndexThreads>();
//   std::vector<IndexCluster::Centroid> centroids;
//   for (uint32_t i = 0; i < 3; ++i) {
//     std::vector<float> vec(dimension);
//     for (size_t j = 0; j < dimension; ++j) {
//       vec[j] = NAN;
//     }
//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));
//   }
//   ASSERT_EQ(0, cluster->cluster(threads, centroids));
//   ASSERT_EQ(3u, centroids.size());

//   for (uint32_t i = 0; i < 3; ++i) {
//     std::vector<float> vec(dimension);
//     for (size_t j = 0; j < dimension; ++j) {
//       vec[j] = dist(gen);
//     }
//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));
//   }
//   ASSERT_EQ(0, cluster->cluster(threads, centroids));
//   ASSERT_EQ(6u, centroids.size());

//   for (uint32_t i = 0; i < 3; ++i) {
//     std::vector<float> vec(dimension);
//     for (size_t j = 0; j < dimension; ++j) {
//       vec[j] = NAN;
//     }
//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));
//   }
//   ASSERT_EQ(0, cluster->cluster(threads, centroids));
//   ASSERT_EQ(9u, centroids.size());

//   for (uint32_t i = 0; i < 3; ++i) {
//     std::vector<float> vec(dimension);
//     for (size_t j = 0; j < dimension; ++j) {
//       vec[j] = dist(gen);
//     }
//     centroids.emplace_back(vec.data(), vec.size() * sizeof(float));
//   }
//   ASSERT_EQ(0, cluster->cluster(threads, centroids));
//   ASSERT_EQ(12u, centroids.size());

//   for (const auto &it : centroids) {
//     const auto &vec = it.vector<float>();

//     std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ",
//     "
//               << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() -
//               2]
//               << ", " << vec[vec.size() - 1] << " }" << std::endl;
//   }

//   params.set("proxima.optkmeans.cluster.purge_empty", true);
//   cluster->update(params);

//   ASSERT_EQ(12u, centroids.size());
//   ASSERT_EQ(0, cluster->cluster(threads, centroids));
//   ASSERT_EQ(7u, centroids.size());
//   for (const auto &it : centroids) {
//     const auto &vec = it.vector<float>();

//     std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ",
//     "
//               << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() -
//               2]
//               << ", " << vec[vec.size() - 1] << " }" << std::endl;
//   }
// }

TEST(OptKmeansCluster, BinaryGeneral) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 1024u;

  IndexMeta index_meta;
  index_meta.set_meta(IndexMeta::DataType::DT_BINARY32, dimension);
  index_meta.set_metric("SquaredEuclidean", 0, Params());

  std::shared_ptr<CompactIndexFeatures> features(
      new CompactIndexFeatures(index_meta));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  for (uint32_t i = 0; i < count; ++i) {
    BinaryVector<uint32_t> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      if (dist(gen) >= 0.5) {
        vec.set(j);
      }
    }
    features->emplace(vec.data());
  }

  std::cout << "---------- FIRST ----------\n";

  // Create a Kmeans cluster
  IndexCluster::Pointer cluster =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster);

  Params params;
  params.set("proxima.general.cluster.count", 1);
  params.set("proxima.optkmeans.cluster.count", 56);

  ASSERT_EQ(0, cluster->init(index_meta, params));
  ASSERT_EQ(0, cluster->mount(features));
  cluster->suggest(64u);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  std::cout << "---------- FIRST ----------\n";
  std::vector<IndexCluster::Centroid> centroids;
  std::vector<uint32_t> labels;
  ASSERT_NE(0, cluster->classify(threads, centroids));
  ASSERT_NE(0, cluster->label(threads, centroids, &labels));
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<uint32_t>();

    uint mask = 0x1;
    std::cout << it.follows() << " (" << it.score() << ") { "
              << !!(vec[0] & mask) << ", " << !!(vec[0] & (mask << 1)) << ", "
              << !!(vec[0] & (mask << 2)) << ", ... , "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << ", "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << " }"
              << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- SECOND ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<uint32_t>();

    uint mask = 0x1;
    std::cout << it.follows() << " (" << it.score() << ") { "
              << !!(vec[0] & mask) << ", " << !!(vec[0] & (mask << 1)) << ", "
              << !!(vec[0] & (mask << 2)) << ", ... , "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << ", "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << " }"
              << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- THIRD ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<uint32_t>();

    uint mask = 0x1;
    std::cout << it.follows() << " (" << it.score() << ") { "
              << !!(vec[0] & mask) << ", " << !!(vec[0] & (mask << 1)) << ", "
              << !!(vec[0] & (mask << 2)) << ", ... , "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 2))) << ", "
              << !!(vec[0] & (mask << !!(sizeof(uint32_t) - 1))) << " }"
              << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  ASSERT_EQ(0, cluster->classify(threads, centroids));
  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));
}


TEST(OptKmeansCluster, IN4General) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 64u;
  const uint32_t dimension_wrong = 66u;

  IndexMeta index_meta;
  index_meta.set_meta(IndexMeta::DataType::DT_INT4, dimension);
  index_meta.set_metric("SquaredEuclidean", 0, Params());

  IndexMeta index_meta_wrong;
  index_meta_wrong.set_meta(IndexMeta::DataType::DT_INT4, dimension_wrong);
  index_meta_wrong.set_metric("SquaredEuclidean", 0, Params());

  std::shared_ptr<CompactIndexFeatures> features(
      new CompactIndexFeatures(index_meta));

  std::shared_ptr<CompactIndexFeatures> features_wrong(
      new CompactIndexFeatures(index_meta_wrong));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<uint8_t> dist(0, UINT8_MAX);

  for (uint32_t i = 0; i < count; ++i) {
    std::vector<uint8_t> vec(dimension / 2);
    std::vector<uint8_t> vec_wrong(dimension_wrong / 2);
    for (size_t j = 0; j < dimension / 2; ++j) {
      vec[j] = dist(gen);
    }
    for (size_t j = 0; j < dimension_wrong / 2; ++j) {
      vec_wrong[j] = dist(gen);
    }
    features->emplace(vec.data());
    features_wrong->emplace(vec_wrong.data());
  }

  // Create a OptKmeans cluster
  IndexCluster::Pointer cluster =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster);

  Params params;
  ASSERT_EQ(0, cluster->init(index_meta_wrong, params));
  ASSERT_NE(0, cluster->mount(features_wrong));

  params.set("proxima.general.cluster.count", 1);
  params.set("proxima.optkmeans.cluster.count", 56);

  ASSERT_EQ(0, cluster->init(index_meta, params));
  ASSERT_EQ(0, cluster->mount(features));
  cluster->suggest(64u);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  std::cout << "---------- FIRST ----------\n";
  std::vector<IndexCluster::Centroid> centroids;
  std::vector<uint32_t> labels;
  ASSERT_NE(0, cluster->classify(threads, centroids));
  ASSERT_NE(0, cluster->label(threads, centroids, &labels));
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- SECOND ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- THIRD ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  ASSERT_EQ(0, cluster->classify(threads, centroids));
  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));
}


TEST(OptKmeansCluster, IN4Correctness) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 64u;

  IndexMeta index_meta1;
  index_meta1.set_meta(IndexMeta::DataType::DT_INT8, dimension);
  index_meta1.set_metric("SquaredEuclidean", 0, Params());

  IndexMeta index_meta2;
  index_meta2.set_meta(IndexMeta::DataType::DT_INT4, dimension);
  index_meta2.set_metric("SquaredEuclidean", 0, Params());

  std::shared_ptr<CompactIndexFeatures> features1(
      new CompactIndexFeatures(index_meta1));

  std::shared_ptr<CompactIndexFeatures> features2(
      new CompactIndexFeatures(index_meta2));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<int> dist(-8, 7);

  // Generate features
  for (size_t i = 0; i < count; ++i) {
    NumericalVector<int8_t> vec1(dimension);
    NibbleVector<int32_t> vec2(dimension);

    for (size_t j = 0; j < dimension; ++j) {
      int8_t val = (int8_t)dist(gen);
      vec1[j] = val;
      vec2.set(j, val);
    }
    features1->emplace(vec1.data());
    features2->emplace(vec2.data());
  }

  // Create a OptKmeans cluster of int8, and cluster only once
  IndexCluster::Pointer cluster_once =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster_once);

  Params params_once;
  params_once.set("proxima.general.cluster.count", 65);
  params_once.set("proxima.optkmeans.cluster.count", 63);
  params_once.set("proxima.optkmeans.cluster.max_iterations", 1);
  // Use KMC2 to init centroids
  params_once.set("proxima.optkmeans.cluster.markov_chain_length", 20);

  ASSERT_EQ(0, cluster_once->init(index_meta1, params_once));
  ASSERT_EQ(0, cluster_once->mount(features1));
  cluster_once->suggest(63);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  // Cluster once and get centroids
  std::vector<IndexCluster::Centroid> centroids1;
  ASSERT_EQ(0, cluster_once->cluster(threads, centroids1));

  // Use centroids_one as init centroids to both int8 and int4 cluster
  // Create a int8 cluster
  IndexCluster::Pointer cluster_int8 =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster_int8);

  Params params_int8;
  params_int8.set("proxima.general.cluster.count", 65);
  params_int8.set("proxima.optkmeans.cluster.count", 63);

  ASSERT_EQ(0, cluster_int8->init(index_meta1, params_int8));
  ASSERT_EQ(0, cluster_int8->mount(features1));
  cluster_int8->suggest(63u);

  // Create a int4 cluster
  IndexCluster::Pointer cluster_int4 =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster_int4);

  Params params_int4;
  params_int4.set("proxima.general.cluster.count", 65);
  params_int4.set("proxima.optkmeans.cluster.count", 63);

  ASSERT_EQ(0, cluster_int4->init(index_meta2, params_int4));
  ASSERT_EQ(0, cluster_int4->mount(features2));
  cluster_int4->suggest(63u);

  std::vector<IndexCluster::Centroid> centroids2;

  // Use centroids of int8 to init centroids of int4
  for (size_t i = 0; i < centroids1.size(); ++i) {
    NibbleVector<int8_t> nvec;
    nvec.assign(reinterpret_cast<const int8_t *>(centroids1[i].feature()),
                dimension);
    IndexCluster::Centroid curr_centroid;
    curr_centroid.set_score(centroids1[i].score());
    curr_centroid.set_follows(centroids1[i].follows());
    curr_centroid.set_feature(nvec.data(), nvec.dimension() >> 1);
    centroids2.push_back(curr_centroid);
  }

  ASSERT_EQ(0, cluster_int8->cluster(threads, centroids1));
  ASSERT_EQ(0, cluster_int4->cluster(threads, centroids2));

  EXPECT_EQ(centroids1.size(), centroids2.size());
  for (size_t i = 0; i < centroids1.size(); ++i) {
    EXPECT_EQ(centroids1[i].follows(), centroids2[i].follows());
    EXPECT_DOUBLE_EQ(centroids1[i].score(), centroids2[i].score());
  }
}

TEST(OptKmeansCluster, InnerProduct) {
  // Prepare index data
  const uint32_t count = 5000u;
  const uint32_t dimension = 33u;

  IndexMeta index_meta;
  index_meta.set_meta(IndexMeta::DataType::DT_FP32, dimension);
  index_meta.set_metric("InnerProduct", 0, Params());

  std::shared_ptr<CompactIndexFeatures> features(
      new CompactIndexFeatures(index_meta));

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (uint32_t i = 0; i < count; ++i) {
    std::vector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }
    features->emplace(vec.data());
  }

  // Create a Kmeans cluster
  IndexCluster::Pointer cluster =
      IndexFactory::CreateCluster("OptKmeansCluster");
  ASSERT_TRUE(!!cluster);

  Params params;
  params.set("proxima.general.cluster.count", 1);
  params.set("proxima.optkmeans.cluster.count", 56);

  ASSERT_EQ(0, cluster->init(index_meta, params));
  ASSERT_EQ(0, cluster->mount(features));
  cluster->suggest(64u);

  auto threads = std::make_shared<SingleQueueIndexThreads>();

  std::cout << "---------- FIRST ----------\n";
  std::vector<IndexCluster::Centroid> centroids;
  std::vector<uint32_t> labels;
  ASSERT_NE(0, cluster->classify(threads, centroids));
  ASSERT_NE(0, cluster->label(threads, centroids, &labels));
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- SECOND ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  std::cout << "---------- THIRD ----------\n";
  ASSERT_EQ(0, cluster->cluster(threads, centroids));

  for (const auto &it : centroids) {
    const auto &vec = it.vector<float>();

    std::cout << it.follows() << " (" << it.score() << ") { " << vec[0] << ", "
              << vec[1] << ", " << vec[2] << ", ... , " << vec[vec.size() - 2]
              << ", " << vec[vec.size() - 1] << " }" << std::endl;
    ASSERT_EQ(0u, it.similars().size());
  }

  ASSERT_EQ(0, cluster->classify(threads, centroids));
  ASSERT_EQ(0, cluster->label(threads, centroids, &labels));
}


================================================
FILE: tests/core/algorithm/flat/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_flat 
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    )
endforeach()

================================================
FILE: tests/core/algorithm/flat/flat_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat/flat_builder.h"
#include <future>
#include <iostream>
#include <vector>
#include <gtest/gtest.h>

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

static inline size_t RandomDimension(void) {
  std::mt19937 gen((std::random_device())());
  return (std::uniform_int_distribution<size_t>(1, 129))(gen);
}

static size_t DIMENSION = RandomDimension();
class FlatBuilderTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

 public:
  static std::string dir_;
  static IndexMeta meta_;
};

std::string FlatBuilderTest ::dir_("flat_builder_test");
IndexMeta FlatBuilderTest::meta_;

void FlatBuilderTest::SetUp(void) {
  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
}

//! self-check column-major and row-major search.
void FlatBuilderTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void build_process(IndexBuilder::Pointer &builder,
                   IndexHolder::Pointer holder) {
  Params params;
  ASSERT_EQ(0, builder->init(FlatBuilderTest::meta_, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  std::string path = FlatBuilderTest::dir_ + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(0UL, stats.discarded_count());
}

TEST_F(FlatBuilderTest, TestInitSuccess) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
}

TEST_F(FlatBuilderTest, TestInitFailedWithInvalidMeasure) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
  meta_.set_metric("invalid", 0, Params());
  Params params;
  int ret = builder->init(meta_, params);
  EXPECT_EQ(IndexError_InvalidArgument, ret);
}

TEST_F(FlatBuilderTest, TestInt8InvalidColumnMajor) {
  size_t dim = (DIMENSION + 3) / 4 * 4;
  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim + 2);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  ASSERT_EQ(IndexMeta::MO_COLUMN, meta_.major_order());
  Params params;
  ASSERT_NE(0, builder->init(meta_, params));
}

TEST_F(FlatBuilderTest, TestInt8WithRandomDimension) {
  size_t dim = DIMENSION;
  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_UNDEFINED);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
}

TEST_F(FlatBuilderTest, TestBinaryInvalidColumnMajor) {
  size_t dim = (DIMENSION + 31) / 32 * 32;
  meta_.set_metric("Hamming", 0, Params());
  meta_.set_meta(IndexMeta::DT_BINARY32, dim + 2);
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";
}

TEST_F(FlatBuilderTest, TestBuildWithRowMajor) {
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);
  size_t doc_cnt = 2000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestInt8BuildWithRowMajor) {
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_meta(IndexMeta::DT_INT8, DIMENSION);
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(DIMENSION);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<int8_t> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = (int8_t)(i % 128);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestBinaryBuildWithRowMajor) {
  size_t dim = (DIMENSION + 31) / 32 * 32;
  meta_.set_metric("Hamming", 0, Params());
  meta_.set_meta(IndexMeta::DT_BINARY32, dim);
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < dim && j < i; ++j) {
      vec.set(j);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestBuildWithColumnMajor) {
  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);
  size_t doc_cnt = 2000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestInt8BuildWithColumnMajor) {
  size_t dim = (DIMENSION + 3) / 4 * 4;
  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder = std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<int8_t> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = (int8_t)(i % 128);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestBinaryBuildWithColumnMajor) {
  size_t dim = (DIMENSION + 31) / 32 * 32;
  meta_.set_metric("Hamming", 0, Params());
  meta_.set_meta(IndexMeta::DT_BINARY32, dim);
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  ASSERT_EQ(0, builder->init(meta_, params));
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < dim && j < i; ++j) {
      vec.set(j);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  int ret = builder->train(holder);
  EXPECT_EQ(0, ret);

  ret = builder->build(holder);
  EXPECT_EQ(0, ret);
}

TEST_F(FlatBuilderTest, TestWithRowMajor) {
  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);
  size_t doc_cnt = 2000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

TEST_F(FlatBuilderTest, TestInt8WithRowMajor) {
  meta_.set_meta(IndexMeta::DataType::DT_INT8, DIMENSION);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(DIMENSION);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<int8_t> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = (int8_t)(i % 128);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

TEST_F(FlatBuilderTest, TestBinaryWithRowMajor) {
  size_t dim = (DIMENSION + 31) / 32 * 32;
  meta_.set_metric("Hamming", 0, Params());
  meta_.set_meta(IndexMeta::DT_BINARY32, dim);
  meta_.set_major_order(IndexMeta::MO_ROW);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < dim && j < i; ++j) {
      vec.set(j);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

TEST_F(FlatBuilderTest, TestWithColumnMajor) {
  meta_.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_FP32>>(DIMENSION);
  size_t doc_cnt = 2000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

TEST_F(FlatBuilderTest, TestInt8WithColumnMajor) {
  size_t dim = (DIMENSION + 3) / 4 * 4;
  meta_.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta_.set_metric("SquaredEuclidean", 0, Params());
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder = std::make_shared<OnePassIndexHolder<IndexMeta::DT_INT8>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<int8_t> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = (int8_t)(i % 128);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

TEST_F(FlatBuilderTest, TestBinaryWithColumnMajor) {
  size_t dim = (DIMENSION + 31) / 32 * 32;
  meta_.set_metric("Hamming", 0, Params());
  meta_.set_meta(IndexMeta::DT_BINARY32, dim);
  meta_.set_major_order(IndexMeta::MO_COLUMN);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  Params params;
  std::string path = dir_ + "/TestGeneral";

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  size_t doc_cnt = 128UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < dim && j < i; ++j) {
      vec.set(j);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  build_process(builder, holder);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat/flat_searcher_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat/flat_searcher.h"
#include <future>
#include <iostream>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "flat/flat_builder.h"

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

static const std::string INDEX_PATH = "brute_force_searcher_test/out.indexes";

static void BuildIndex(const IndexMeta &meta, IndexHolder::Pointer holder,
                       const std::string &path) {
  auto builder = IndexFactory::CreateBuilder("FlatBuilder");
  auto dumper = IndexFactory::CreateDumper("FileDumper");

  ASSERT_NE(nullptr, builder);
  ASSERT_NE(nullptr, dumper);

  Params params;
  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, IndexBuilder::TrainBuildAndDump(builder, holder, dumper));
  ASSERT_EQ(0, dumper->close());

  auto stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(0UL, stats.discarded_count());
}

static void BuildIndex(const IndexMeta &meta, const Params &params,
                       IndexHolder::Pointer holder, const std::string &path) {
  auto builder = IndexFactory::CreateBuilder("FlatBuilder");
  auto dumper = IndexFactory::CreateDumper("FileDumper");

  ASSERT_NE(nullptr, builder);
  ASSERT_NE(nullptr, dumper);

  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, IndexBuilder::TrainBuildAndDump(builder, holder, dumper));
  ASSERT_EQ(0, dumper->close());

  auto stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(0UL, stats.discarded_count());
}

static void LoadIndex(const std::string &path,
                      IndexSearcher::Pointer &searcher) {
  searcher = IndexFactory::CreateSearcher("FlatSearcher");
  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");

  ASSERT_NE(nullptr, searcher);
  ASSERT_NE(nullptr, storage);

  Params params;
  ASSERT_EQ(0, searcher->init(params));
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
}

static void Shuffle(std::vector<uint32_t> &keys) {
  if (keys.size() <= 1) {
    return;
  }
  for (size_t i = keys.size() - 1; i > 0; i--) {
    std::mt19937 gen((std::random_device())());
    std::uniform_int_distribution<size_t> dist(0, i);
    size_t pos = dist(gen);
    std::swap(keys[i], keys[pos]);
  }
}

TEST(FlatSearcher, NoBatch_FP32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher2->create_context();
  auto context5 = searcher1->create_context();
  auto context6 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context4->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });
  context4->set_filter([](uint64_t) { return false; });
  context5->set_filter([](uint64_t) { return true; });
  context6->set_filter([](uint64_t) { return true; });

  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);
  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<float> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context1));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context2));
    ASSERT_EQ(topk, context1->result().size());
    ASSERT_EQ(topk, context2->result().size());

    // Test shared context
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context4));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context3));
    ASSERT_EQ(topk, context3->result().size());
    ASSERT_EQ(topk, context4->result().size());

    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context5));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP32, vec.dimension()),
               context6));
    ASSERT_EQ(0u, context5->result().size());
    ASSERT_EQ(0u, context6->result().size());

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result();
      auto &result2 = context2->result();
      auto &result3 = context3->result();
      auto &result4 = context4->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result =
          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      ASSERT_EQ(result1[j].index(), result3[j].index());
      ASSERT_EQ(result1[j].key(), result3[j].key());
      score_cmp_result =
          math_help.IsAlmostEqual(result1[j].score(), result3[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      ASSERT_EQ(result2[j].index(), result4[j].index());
      ASSERT_EQ(result2[j].key(), result4[j].key());
      score_cmp_result =
          math_help.IsAlmostEqual(result2[j].score(), result4[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, NoBatch_FP16) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 64))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher2->create_context();
  auto context5 = searcher1->create_context();
  auto context6 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context4->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });
  context4->set_filter([](uint64_t) { return false; });
  context5->set_filter([](uint64_t) { return true; });
  context6->set_filter([](uint64_t) { return true; });

  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);
  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context1));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context2));
    ASSERT_EQ(topk, context1->result().size());
    ASSERT_EQ(topk, context2->result().size());

    // Test shared context
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context4));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context3));
    ASSERT_EQ(topk, context3->result().size());
    ASSERT_EQ(topk, context4->result().size());

    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context5));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_FP16, vec.dimension()),
               context6));
    ASSERT_EQ(0u, context5->result().size());
    ASSERT_EQ(0u, context6->result().size());

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result();
      auto &result2 = context2->result();
      auto &result3 = context3->result();
      auto &result4 = context4->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result = math_help.IsAlmostEqual(
          result1[j].score(), result2[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      ASSERT_EQ(result1[j].index(), result3[j].index());
      ASSERT_EQ(result1[j].key(), result3[j].key());
      score_cmp_result = math_help.IsAlmostEqual(result1[j].score(),
                                                 result3[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      ASSERT_EQ(result2[j].index(), result4[j].index());
      ASSERT_EQ(result2[j].key(), result4[j].key());
      score_cmp_result = math_help.IsAlmostEqual(result2[j].score(),
                                                 result4[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, NoBatch_INT8) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<int>(-127, 127);
  size_t dim =
      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher2->create_context();
  auto context5 = searcher1->create_context();
  auto context6 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context4->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });
  context4->set_filter([](uint64_t) { return false; });
  context5->set_filter([](uint64_t) { return true; });
  context6->set_filter([](uint64_t) { return true; });

  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);
  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context1));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context2));
    ASSERT_EQ(topk, context1->result().size());
    ASSERT_EQ(topk, context2->result().size());

    // Test shared context
    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context4));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context3));
    ASSERT_EQ(topk, context3->result().size());
    ASSERT_EQ(topk, context4->result().size());

    ASSERT_EQ(
        0, searcher1->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context5));
    ASSERT_EQ(
        0, searcher2->search_impl(
               vec.data(), IndexQueryMeta(IndexMeta::DT_INT8, vec.dimension()),
               context6));
    ASSERT_EQ(0u, context5->result().size());
    ASSERT_EQ(0u, context6->result().size());

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result();
      auto &result2 = context2->result();
      auto &result3 = context3->result();
      auto &result4 = context4->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());

      ASSERT_EQ(result1[j].index(), result3[j].index());
      ASSERT_EQ(result1[j].key(), result3[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());

      ASSERT_EQ(result2[j].index(), result4[j].index());
      ASSERT_EQ(result2[j].key(), result4[j].key());
      ASSERT_FLOAT_EQ(result2[j].score(), result4[j].score());
    }
  }
}

TEST(FlatSearcher, NoBatch_Binary32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);
  size_t dim = (dist(gen) + 31) / 32 * 32;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 3 == 0) {
        vec.set(j);
      }
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher2->create_context();
  auto context5 = searcher1->create_context();
  auto context6 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context4->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });
  context4->set_filter([](uint64_t) { return false; });
  context5->set_filter([](uint64_t) { return true; });
  context6->set_filter([](uint64_t) { return true; });

  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);
  for (uint32_t i = 0; i < query_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 7 == 0) {
        vec.set(j);
      }
    }
    ASSERT_EQ(0, searcher1->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context1));
    ASSERT_EQ(0, searcher2->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context2));
    ASSERT_EQ(topk, context1->result().size());
    ASSERT_EQ(topk, context2->result().size());

    // Test shared context
    ASSERT_EQ(0, searcher1->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context4));
    ASSERT_EQ(0, searcher2->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context3));
    ASSERT_EQ(topk, context3->result().size());
    ASSERT_EQ(topk, context4->result().size());

    ASSERT_EQ(0, searcher1->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context5));
    ASSERT_EQ(0, searcher2->search_impl(
                     vec.data(),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, vec.dimension()),
                     context6));
    ASSERT_EQ(0u, context5->result().size());
    ASSERT_EQ(0u, context6->result().size());

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result();
      auto &result2 = context2->result();
      auto &result3 = context3->result();
      auto &result4 = context4->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());

      ASSERT_EQ(result1[j].index(), result3[j].index());
      ASSERT_EQ(result1[j].key(), result3[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());

      ASSERT_EQ(result2[j].index(), result4[j].index());
      ASSERT_EQ(result2[j].key(), result4[j].key());
      ASSERT_FLOAT_EQ(result2[j].score(), result4[j].score());
    }
  }
}

TEST(FlatSearcher, RowBatch_FP32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context1->set_filter([](uint64_t) { return false; });
  context2->set_topk(topk);

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<float> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),
                                      query_count, context1));

  NumericalVector<float> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    ASSERT_EQ(0, searcher2->search_impl(
                     (const float *)(&query_buffer[i * vec.bytes()]),
                     IndexQueryMeta(IndexMeta::DT_FP32, dim), context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result =
          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, RowBatch_FP16) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 256))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context2->set_filter([](uint64_t) { return false; });

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                      query_count, context1));

  NumericalVector<Float16> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),
                                        IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                        context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result = math_help.IsAlmostEqual(
          result1[j].score(), result2[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, RowBatch_INT8) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<int>(-127, 127);
  size_t dim =
      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context1->set_filter([](uint64_t) { return false; });
  context2->set_topk(topk);
  context2->set_filter([](uint64_t) { return false; });

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                      query_count, context1));

  NumericalVector<int8_t> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),
                                        IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                        context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());
    }
  }
}

TEST(FlatSearcher, RowBatch_Binary32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);
  size_t dim = (dist(gen) + 31) / 32 * 32;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 3 == 0) {
        vec.set(j);
      }
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (uint32_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 7 == 0) {
        vec.set(j);
      }
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }
  ASSERT_EQ(0,
            searcher1->search_impl(query_buffer.data(),
                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),
                                   query_count, context1));

  BinaryVector<uint32_t> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    ASSERT_EQ(0, searcher2->search_impl(
                     (&query_buffer[i * vec.bytes()]),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, dim), context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      ASSERT_EQ(result1[j].index(), result2[j].index());
      ASSERT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());
    }
  }
}

TEST(FlatSearcher, ColumnBatch_Binary32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<uint32_t>(1, 512);
  size_t dim = (dist(gen) + 31) / 32 * 32;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DT_BINARY32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (size_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 3 == 0) {
        vec.set(j);
      }
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);

  Params params1;
  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER, true);
  BuildIndex(meta1, params1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher1->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });
  context4->set_topk(topk);
  context4->set_filter([](uint64_t) { return true; });

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    BinaryVector<uint32_t> vec(dim);
    for (uint32_t j = 0; j < vec.dimension(); ++j) {
      if (dist(gen) % 7 == 0) {
        vec.set(j);
      }
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }
  ASSERT_EQ(0,
            searcher1->search_impl(query_buffer.data(),
                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),
                                   query_count, context1));
  ASSERT_EQ(0,
            searcher1->search_impl(query_buffer.data(),
                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),
                                   query_count, context3));
  ASSERT_EQ(0,
            searcher1->search_impl(query_buffer.data(),
                                   IndexQueryMeta(IndexMeta::DT_BINARY32, dim),
                                   query_count, context4));

  BinaryVector<uint32_t> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    ASSERT_EQ(0, searcher1->search_impl(
                     (&query_buffer[i * vec.bytes()]),
                     IndexQueryMeta(IndexMeta::DT_BINARY32, dim), context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      auto &result3 = context3->result(i);
      auto &result4 = context4->result(i);
      EXPECT_TRUE(result4.empty());

      EXPECT_EQ(result1[j].index(), result2[j].index());
      EXPECT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());

      EXPECT_EQ(result1[j].index(), result3[j].index());
      EXPECT_EQ(result1[j].key(), result3[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());
    }
  }
}

TEST(FlatSearcher, ColumnBatch_FP32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }


  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set
                                             // column_major_order in params

  Params params1;
  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,
              true);  // make it MO_COLUMN
  BuildIndex(meta1, params1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher1->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });  // same as no filter
  context4->set_topk(topk);
  context4->set_filter([](uint64_t) { return true; });  // filter all result

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<float> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }

  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),
                                      query_count, context1));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),
                                      query_count, context3));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP32, dim),
                                      query_count, context4));

  NumericalVector<float> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    // not batch
    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),
                                        IndexQueryMeta(IndexMeta::DT_FP32, dim),
                                        context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      auto &result3 = context3->result(i);
      auto &result4 = context4->result(i);
      EXPECT_TRUE(result4.empty());

      // batch result is equal to not batch result
      EXPECT_EQ(result1[j].index(), result2[j].index());
      EXPECT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result =
          math_help.IsAlmostEqual(result1[j].score(), result2[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      // test filter
      EXPECT_EQ(result1[j].index(), result3[j].index());
      EXPECT_EQ(result1[j].key(), result3[j].key());
      score_cmp_result =
          math_help.IsAlmostEqual(result1[j].score(), result3[j].score(), 10);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, ColumnBatch_FP16) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(-1.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 256))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP16>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }


  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set
                                             // column_major_order in params

  Params params1;
  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,
              true);  // make it MO_COLUMN
  BuildIndex(meta1, params1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP16, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher1->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });  // same as no filter
  context4->set_topk(topk);
  context4->set_filter([](uint64_t) { return true; });  // filter all result

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<Float16> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }

  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                      query_count, context1));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                      query_count, context3));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                      query_count, context4));

  NumericalVector<Float16> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    // not batch
    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),
                                        IndexQueryMeta(IndexMeta::DT_FP16, dim),
                                        query_count, context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      auto &result3 = context3->result(i);
      auto &result4 = context4->result(i);
      EXPECT_TRUE(result4.empty());

      // batch result is equal to not batch result
      EXPECT_EQ(result1[j].index(), result2[j].index());
      EXPECT_EQ(result1[j].key(), result2[j].key());
      MathHelper math_help = MathHelper();
      bool score_cmp_result = math_help.IsAlmostEqual(
          result1[j].score(), result2[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);

      // test filter
      EXPECT_EQ(result1[j].index(), result3[j].index());
      EXPECT_EQ(result1[j].key(), result3[j].key());
      score_cmp_result = math_help.IsAlmostEqual(result1[j].score(),
                                                 result3[j].score(), 10000);
      ASSERT_FLOAT_EQ(true, score_cmp_result);
    }
  }
}

TEST(FlatSearcher, ColumnBatch_INT8) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_int_distribution<int>(-127, 127);
  size_t dim =
      ((std::uniform_int_distribution<size_t>(1, 512))(gen) + 3) / 4 * 4;

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_INT8>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<uint32_t>(1, 10000))(gen);
  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);  // will invalide when set
                                             // column_major_order in params

  Params params1;
  params1.set(PARAM_FLAT_COLUMN_MAJOR_ORDER,
              true);  // make it MO_COLUMN
  BuildIndex(meta1, params1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_INT8, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto context1 = searcher1->create_context();
  auto context2 = searcher2->create_context();
  auto context3 = searcher1->create_context();
  auto context4 = searcher1->create_context();
  uint32_t topk = std::min(10u, document_count);
  context1->set_topk(topk);
  context2->set_topk(topk);
  context3->set_topk(topk);
  context3->set_filter([](uint64_t) { return false; });  // same as no filter
  context4->set_topk(topk);
  context4->set_filter([](uint64_t) { return true; });  // filter all result

  std::string query_buffer;
  uint32_t query_count = (std::uniform_int_distribution<size_t>(1, 100))(gen);

  for (uint32_t i = 0; i < query_count; i++) {
    NumericalVector<int8_t> vec(dim);
    for (uint32_t j = 0; j < vec.size(); ++j) {
      vec[j] = (int8_t)dist(gen);
    }
    query_buffer.append((const char *)vec.data(), vec.bytes());
  }

  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                      query_count, context1));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                      query_count, context3));
  ASSERT_EQ(0, searcher1->search_impl(query_buffer.data(),
                                      IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                      query_count, context4));

  NumericalVector<int8_t> vec(dim);
  for (uint32_t i = 0; i < query_count; i++) {
    // not batch
    ASSERT_EQ(0, searcher2->search_impl((&query_buffer[i * vec.bytes()]),
                                        IndexQueryMeta(IndexMeta::DT_INT8, dim),
                                        context2));

    for (uint32_t j = 0; j < topk; ++j) {
      auto &result1 = context1->result(i);
      auto &result2 = context2->result();
      auto &result3 = context3->result(i);
      auto &result4 = context4->result(i);
      EXPECT_TRUE(result4.empty());

      // batch result is equal to not batch result
      EXPECT_EQ(result1[j].index(), result2[j].index());
      EXPECT_EQ(result1[j].key(), result2[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result2[j].score());

      // test filter
      EXPECT_EQ(result1[j].index(), result3[j].index());
      EXPECT_EQ(result1[j].key(), result3[j].key());
      ASSERT_FLOAT_EQ(result1[j].score(), result3[j].score());
    }
  }
}

TEST(FlatProvider, Provider_FP32) {
  std::mt19937 gen((std::random_device())());
  auto dist = std::uniform_real_distribution<float>(0.0f, 1.0f);
  size_t dim = (std::uniform_int_distribution<size_t>(1, 512))(gen);

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  uint32_t document_count =
      (std::uniform_int_distribution<size_t>(1, 10000))(gen);

  std::vector<uint32_t> keys(document_count);
  for (uint32_t i = 0; i < document_count; i++) {
    keys[i] = i;
  }
  Shuffle(keys);

  for (uint32_t i = 0; i < document_count; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < vec.size(); ++j) {
      vec[j] = dist(gen) + static_cast<float>(i * 5);
    }
    ASSERT_TRUE(holder->emplace(keys[i], vec));
  }

  IndexMeta meta1;
  meta1.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  meta1.set_major_order(IndexMeta::MO_ROW);
  BuildIndex(meta1, holder, INDEX_PATH + ".1");

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_FP32, dim);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  meta2.set_major_order(IndexMeta::MO_COLUMN);
  BuildIndex(meta2, holder, INDEX_PATH + ".2");

  IndexSearcher::Pointer searcher1, searcher2;
  LoadIndex(INDEX_PATH + ".1", searcher1);
  LoadIndex(INDEX_PATH + ".2", searcher2);

  auto provider1 = searcher1->create_provider();
  auto provider2 = searcher2->create_provider();

  ASSERT_TRUE(!!provider1);
  ASSERT_TRUE(!!provider2);

  ASSERT_EQ(document_count, provider1->count());
  ASSERT_EQ(document_count, provider2->count());

  ASSERT_EQ("FlatSearcher", provider1->owner_class());
  ASSERT_EQ("FlatSearcher", provider2->owner_class());

  auto it1 = provider1->create_iterator();
  auto it2 = provider2->create_iterator();
  auto holder_it = holder->create_iterator();

  uint32_t readed_count = 0;
  while (it1->is_valid() && it2->is_valid()) {
    ASSERT_EQ(it1->key(), it2->key());
    const float *data1 = (const float *)it1->data();
    const float *data2 = (const float *)it2->data();
    const float *holder_data = (const float *)holder_it->data();
    for (size_t idx = 0; idx < dim; idx++) {
      ASSERT_EQ(*data1, *data2) << "Fail when dim is: " << dim
                                << " document_count is: " << document_count;
      ASSERT_EQ(*data1, *holder_data);
      data1++;
      data2++;
      holder_data++;
    }
    readed_count++;
    const float *features1 = (const float *)provider1->get_vector(it1->key());
    const float *features2 = (const float *)provider2->get_vector(it2->key());
    for (size_t idx = 0; idx < dim; idx++) {
      ASSERT_FLOAT_EQ(*features1, *features2);
      features1++;
      features2++;
    }
    it1->next();
    it2->next();
    holder_it->next();
  }

  ASSERT_FALSE(holder_it->is_valid());

  ASSERT_EQ(readed_count, provider1->count());
  ASSERT_EQ(readed_count, provider2->count());
  ASSERT_EQ(readed_count, holder->count());
}

TEST(FlatSearcher, TestGroup) {
  const int dim = 32;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
  index_meta_ptr_.reset(new (std::nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("FlatBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  Params params;

  ASSERT_EQ(0, builder->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  std::string path = INDEX_PATH + "/TestGroup";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSearcher");
  ASSERT_NE(searcher, nullptr);
  Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;

    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;

    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;

  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = doc_cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = Realtime::MicroSeconds();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, 1, ctx));
  auto t2 = Realtime::MicroSeconds();

  total_time += t2 - t1;
  std::cout << "Total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    std::cout << "Group ID: " << group_id << std::endl;

    for (uint32_t j = 0; j < result.size(); ++j) {
      std::cout << "\tKey: " << result[j].key() << std::fixed
                << std::setprecision(3) << ", Score: " << result[j].score()
                << std::endl;
    }
  }

  // do linear search by p_keys test
  auto groupbyFuncLinear = [](uint64_t key) {
    uint32_t group_id = key % 10;

    return std::string("g_") + std::to_string(group_id);
  };

  auto linear_pk_ctx = searcher->create_context();

  linear_pk_ctx->set_group_params(group_num, group_topk);
  linear_pk_ctx->set_group_by(groupbyFuncLinear);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};

  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linear_pk_ctx));
  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();
  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);

  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {
    const std::string &group_id = linear_by_pkeys_group_result[i].group_id();
    auto &result = linear_by_pkeys_group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    std::cout << "Group ID: " << group_id << std::endl;

    for (uint32_t j = 0; j < result.size(); ++j) {
      std::cout << "\tKey: " << result[j].key() << std::fixed
                << std::setprecision(3) << ", Score: " << result[j].score()
                << std::endl;
    }

    ASSERT_EQ(10 - i, result[0].key());
  }
}


================================================
FILE: tests/core/algorithm/flat/flat_streamer_buffer_test.cc
================================================
#include <future>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

constexpr size_t static dim = 16;

class FlatStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void hybrid_scale(std::vector<float> &dense_value,
                    std::vector<float> &sparse_value, float alpha_scale);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatStreamerTest::dir_("streamer_test/");
std::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;

void FlatStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (std::nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatStreamerTest, TestLinearSearch) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearch", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 10000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();


  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearch", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 3;
  auto provider = read_streamer->create_provider();
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());

  ElapsedTime elapsed_time;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl;

  read_streamer->close();
  read_streamer.reset();
}

TEST_F(FlatStreamerTest, TestLinearSearchWithLRU) {
  constexpr size_t static dim = 1600;
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  IndexMeta meta = IndexMeta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  ASSERT_EQ(0, write_streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchWithLRU", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 1000000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();


  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_EQ(0, read_streamer->init(meta, params));
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchWithLRU", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 3;
  auto provider = read_streamer->create_provider();
  ElapsedTime elapsed_time;
  for (size_t i = 0; i < 10; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl;

  read_streamer->close();
  read_streamer.reset();
}

TEST_F(FlatStreamerTest, TestLinearSearchMMap) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 10000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 3;
  auto provider = read_streamer->create_provider();
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());

  ElapsedTime elapsed_time;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      const float *data = (float *)provider->get_vector(result1[0].key());
      EXPECT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  read_streamer->close();
  read_streamer.reset();
  cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl;
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc
================================================
#include <future>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

constexpr size_t static dim = 128;

class FlatStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void hybrid_scale(std::vector<float> &dense_value,
                    std::vector<float> &sparse_value, float alpha_scale);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatStreamerTest::dir_("streamer_test/");
std::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;

void FlatStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (std::nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatStreamerTest, TestLinearSearchMMap) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchMMap", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t data_cnt = 300000UL, cnt = 500UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < data_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchMMap", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 30;
  ElapsedTime elapsed_time;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result1 = ctx->result();
    // ASSERT_EQ(topk, result1.size());
    // ASSERT_EQ(i, result1[0].key());

    // for (size_t j = 0; j < dim; ++j) {
    //   vec[j] = i + 0.1f;
    // }
    // ctx->set_topk(topk);
    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result2 = ctx->result();
    // ASSERT_EQ(topk, result2.size());
    // ASSERT_EQ(i, result2[0].key());
    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result1 = ctx->result();
    // ASSERT_EQ(topk, result1.size());
    // ASSERT_EQ(i, result1[0].key());

    // for (size_t j = 0; j < dim; ++j) {
    //   vec[j] = i + 0.1f;
    // }
    // ctx->set_topk(topk);
    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result2 = ctx->result();
    // ASSERT_EQ(topk, result2.size());
    // ASSERT_EQ(i, result2[0].key());
    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl;
  read_streamer->close();
  read_streamer.reset();
}

TEST_F(FlatStreamerTest, TestLinearSearchBuffer) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/LinearSearchBuffer", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t data_cnt = 300000UL, cnt = 500UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < data_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/LinearSearchBuffer", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 30;
  ElapsedTime elapsed_time;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result1 = ctx->result();
    // ASSERT_EQ(topk, result1.size());
    // ASSERT_EQ(i, result1[0].key());

    // for (size_t j = 0; j < dim; ++j) {
    //   vec[j] = i + 0.1f;
    // }
    // ctx->set_topk(topk);
    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result2 = ctx->result();
    // ASSERT_EQ(topk, result2.size());
    // ASSERT_EQ(i, result2[0].key());
    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result1 = ctx->result();
    // ASSERT_EQ(topk, result1.size());
    // ASSERT_EQ(i, result1[0].key());

    // for (size_t j = 0; j < dim; ++j) {
    //   vec[j] = i + 0.1f;
    // }
    // ctx->set_topk(topk);
    // ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    // auto &result2 = ctx->result();
    // ASSERT_EQ(topk, result2.size());
    // ASSERT_EQ(i, result2[0].key());
    // ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    // ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }
  cout << "Elapsed time: " << elapsed_time.micro_seconds() << " us" << endl;
  read_streamer->close();
  read_streamer.reset();
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat/flat_streamer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstddef>
#include <future>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/ailego/encoding/json/mod_json.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>
#include "algorithm/flat/flat_utility.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

constexpr size_t static dim = 16;

class FlatStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void hybrid_scale(std::vector<float> &dense_value,
                    std::vector<float> &sparse_value, float alpha_scale);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatStreamerTest::dir_("streamer_test/");
std::shared_ptr<IndexMeta> FlatStreamerTest::index_meta_ptr_;

void FlatStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (std::nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatStreamerTest, TestAddVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/AddVector", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  auto provider = streamer->create_provider();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < 1000UL; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    const float *data = (float *)provider->get_vector(i);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(data[j], i);
    }
  }

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(FlatStreamerTest, TestLinearSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/AddVector", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  auto provider = streamer->create_provider();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 1000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    for (size_t j = 0; j < dim; ++j) {
      const float *data = (float *)provider->get_vector(result1[0].key());
      ASSERT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(FlatStreamerTest, TestAddAndSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestAddAndSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  const size_t topk = 200U, cnt = 2000U;
  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ctx->set_topk(topk);
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
    auto &knnResult = ctx->result();
    ASSERT_EQ(std::min(i + 1, topk), knnResult.size());
  }
}

TEST_F(FlatStreamerTest, TestAddAndSearcherSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestAddAndSearcherSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  const size_t topk = 200U, cnt = 2000U;
  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ctx->set_topk(topk);
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  std::string path1 = dir_ + "/TestAddAndSearcherSearchDump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_EQ(0, dumper->init(Params()));
  ASSERT_EQ(0, dumper->create(path1));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto container = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, container->init(Params()));
  ASSERT_EQ(0, container->open(path1, false));
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSearcher");
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));

  auto linearCtx = searcher->create_context();
  linearCtx->set_topk(topk);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, linearCtx));
    auto &knnResult = linearCtx->result();
    ASSERT_EQ(topk, knnResult.size());
  }
}

TEST_F(FlatStreamerTest, TestLinearSearchRandomData) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  constexpr size_t static dim = 128;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  Params params;

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestKnnSearchRandomData", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  size_t cnt = 1500;
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }
    streamer->add_impl(i + cnt, vec.data(), qmeta, ctx);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  size_t topk = 100;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  cnt = 500;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }
    auto t1 = Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t2 = Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t3 = Realtime::MicroSeconds();
    knnTotalTime += t3 - t2;
    linearTotalTime += t2 - t1;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());

    topk1Hits += linearResult[0].key() == knnResult[0].key();

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / cnt;
#if 1
  printf(
      "knnTotalTime=%zu linearTotalTime=%zu totalHits=%d totalCnts=%d "
      "R@%zd=%f R@1=%f\n",
      (size_t)knnTotalTime, (size_t)linearTotalTime, totalHits, totalCnts, topk,
      recall, topk1Recall);
#endif
  EXPECT_GT(recall, 0.50f);
  EXPECT_GT(topk1Recall, 0.80f);
}

TEST_F(FlatStreamerTest, TestOpenClose) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  constexpr size_t static dim = 2048;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  Params params;
  // params.set(PARAM_FLAT_COLUMN_MAJOR_ORDER, false);
  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ASSERT_NE(nullptr, storage2);
  Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TestOpenAndClose1", true));
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TestOpenAndClose2", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  auto checkIter = [](size_t base, size_t total,
                      IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = base;
    size_t cnt = 0;
    while (iter->is_valid()) {
      float *data = (float *)provider->get_vector(cur);
      for (size_t d = 0; d < dim; ++d) {
        ASSERT_FLOAT_EQ((float)cur, data[d]);
      }
      iter->next();
      cur += 2;
      cnt++;
    }
    ASSERT_EQ(cnt, total);
  };

  size_t testCnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < testCnt; i += 2) {
    float v1 = (float)i;
    ASSERT_EQ(0, streamer->open(storage1));
    auto ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    std::vector<float> vec1(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = v1;
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec1.data(), qmeta, ctx));
    checkIter(0, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());

    float v2 = (float)(i + 1);
    std::vector<float> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec2[d] = v2;
    }
    ASSERT_EQ(0, streamer->open(storage2));
    ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    ASSERT_EQ(0, streamer->add_impl(i + 1, vec2.data(), qmeta, ctx));
    checkIter(1, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());
  }

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer1->init(meta, params));
  ASSERT_EQ(0, streamer1->open(storage1));

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer2->init(meta, params));
  ASSERT_EQ(0, streamer2->open(storage2));

  checkIter(0, testCnt / 2, streamer1);
  checkIter(1, testCnt / 2, streamer2);
}

TEST_F(FlatStreamerTest, TestNoInit) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  streamer->cleanup();
}

TEST_F(FlatStreamerTest, TestForceFlush) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  stg_params.set("proxima.mmap_file.storage.copy_on_write", true);
  stg_params.set("proxima.mmap_file.storage.force_flush", true);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      float *data = (float *)provider->get_vector(cur);
      for (size_t d = 0; d < dim; ++d) {
        ASSERT_FLOAT_EQ((float)cur, data[d]);
      }
      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  NumericalVector<float> vec(dim);
  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  storage->close();

  storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_provider();
  for (size_t i = 0; i < cnt; i++) {
    const float *data = (const float *)provider->get_vector(i);
    ASSERT_NE(data, nullptr);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(i, data[j]);
    }
  }
}

TEST_F(FlatStreamerTest, TestMultiThread) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnMultiThread", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    NumericalVector<float> vec(dim);
    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = (float)i + baseKey;
      }
      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };
  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);
  auto t1 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000U, t1.get());
  ASSERT_EQ(1000U, t2.get());
  ASSERT_EQ(1000U, t3.get());
  streamer->close();

  // checking data
  ASSERT_EQ(0, streamer->open(storage));
  auto provider = streamer->create_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    float *data = (float *)iter->data();
    for (size_t d = 0; d < dim; ++d) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }
  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);

  // ====== multi thread search
  size_t topk = 100;
  size_t cnt = 3000;
  auto knnSearch = [&]() {
    NumericalVector<float> vec(dim);
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i + 0.1f;
      }
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(i, r2[0].key());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    // printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  auto s3 = std::async(std::launch::async, knnSearch);
  s1.wait();
  s2.wait();
  s3.wait();
}

TEST_F(FlatStreamerTest, TestConcurrentAddAndSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnConcurrentAddAndSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    NumericalVector<float> vec(dim);
    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
    auto ctx = streamer->create_context();
    size_t succAdd = 0;
    for (size_t i = 0; i < addCnt; i++) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = (float)i + baseKey;
      }
      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  // ====== multi thread search
  auto knnSearch = [&]() {
    size_t topk = 100;
    size_t cnt = 3000;
    NumericalVector<float> vec(dim);
    auto linearCtx = streamer->create_context();
    auto linearByPKeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    linearCtx->set_topk(topk);
    linearByPKeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
    for (size_t i = 0; i < cnt; i += 1) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i + 0.1f;
      }
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
#if 0
      printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
              r2[1].key, r2[2].key, r2[3].key, r2[4].key);
      printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
              r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    //        printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };
  auto t0 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000, t0.get());
  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  ASSERT_EQ(1000, t1.get());
  ASSERT_EQ(1000, t2.get());
  s1.wait();
  s2.wait();

  // checking data
  auto provider = streamer->create_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    float *data = (float *)iter->data();
    for (size_t d = 0; d < dim; ++d) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }
  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);
}

TEST_F(FlatStreamerTest, TestFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessFilter", true));
  ASSERT_EQ(0, streamer->open(storage));


  NumericalVector<float> vec(dim);
  size_t cnt = 2000;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(10U);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    p_keys[0].push_back(i);
  }

  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 100.1;
  }
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(10, results.size());
  ASSERT_EQ(100, results[0].key());
  ASSERT_EQ(101, results[1].key());
  ASSERT_EQ(99, results[2].key());

  auto filterFunc = [](uint64_t key) {
    if (key == 100UL || key == 101UL) {
      return true;
    }
    return false;
  };
  ctx->set_filter(filterFunc);

  // after set filter
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results1 = ctx->result();
  ASSERT_EQ(10, results1.size());
  ASSERT_EQ(99, results1[0].key());
  ASSERT_EQ(102, results1[1].key());
  ASSERT_EQ(98, results1[2].key());

  // linear
  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &results2 = ctx->result();
  ASSERT_EQ(10, results2.size());
  ASSERT_EQ(99, results2[0].key());
  ASSERT_EQ(102, results2[1].key());
  ASSERT_EQ(98, results2[2].key());

  auto &results3 = ctx->result();
  ASSERT_EQ(10, results3.size());
  ASSERT_EQ(99, results3[0].key());
  ASSERT_EQ(102, results3[1].key());
  ASSERT_EQ(98, results3[2].key());
}

TEST_F(FlatStreamerTest, TestMaxIndexSize) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  constexpr size_t static dim = 128;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessMaxIndexSize", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t vsz0 = 0;
  size_t rss0 = 0;
  if (!MemoryHelper::SelfUsage(&vsz0, &rss0)) {
    // do not check if get mem usage failed
    return;
  }
  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {
    // asan mode
    return;
  }

  NumericalVector<float> vec(dim);
  size_t writeCnt1 = 10000;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  auto ctx = streamer->create_context();
  for (size_t i = 0; i < writeCnt1; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  size_t vsz1 = 0;
  size_t rss1 = 0;
  MemoryHelper::SelfUsage(&vsz1, &rss1);
  size_t increment1 = rss1 - rss0;
  // data + key + block_header
  size_t expect_size =
      writeCnt1 * 128 * 4 + writeCnt1 * 8 + writeCnt1 * 28 / 32;
  LOG_INFO("increment1: %lu, expect_size: %lu", increment1, expect_size);

  ASSERT_GT(expect_size, increment1 * 0.75f);
  ASSERT_LT(expect_size, increment1 * 1.25f);

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(FlatStreamerTest, TestCleanUp) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TessKnnCluenUp1", true));
  Params params;
  constexpr size_t static dim1 = 32;
  IndexMeta meta1(IndexMeta::DataType::DT_FP32, dim1);
  meta1.set_metric("SquaredEuclidean", 0, Params());
  NumericalVector<float> vec1(dim1);
  ASSERT_EQ(0, streamer->init(meta1, params));
  ASSERT_EQ(0, streamer->open(storage1));
  IndexQueryMeta qmeta1(IndexMeta::DT_FP32, dim1);
  auto ctx1 = streamer->create_context();
  ASSERT_EQ(0, streamer->add_impl(1, vec1.data(), qmeta1, ctx1));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());

  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage2);
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TessKnnCluenUp2", true));
  constexpr size_t static dim2 = 64;
  IndexMeta meta2(IndexMeta::DataType::DT_FP32, dim2);
  meta2.set_metric("SquaredEuclidean", 0, Params());
  NumericalVector<float> vec2(dim2);
  ASSERT_EQ(0, streamer->init(meta2, params));
  ASSERT_EQ(0, streamer->open(storage2));
  IndexQueryMeta qmeta2(IndexMeta::DT_FP32, dim2);
  auto ctx2 = streamer->create_context();
  ASSERT_EQ(0, streamer->add_impl(2, vec2.data(), qmeta2, ctx2));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());
}

TEST_F(FlatStreamerTest, TestBloomFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestBloomFilter", true));
  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  ctx->set_topk(10U);
  size_t cnt = 5000;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    if ((i + 1) % 10 == 0) {
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      auto &results = ctx->result();
      ASSERT_EQ(10, results.size());
    }
  }
}

TEST_F(FlatStreamerTest, TestGroup) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_NE(streamer, nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t doc_cnt = 5000U;
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);

  for (size_t i = 0; i < doc_cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;

    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;

    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;

  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = doc_cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = query_value * 1.0 / 10 + 0.1f;
  }

  auto t1 = Realtime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, 1, ctx));
  auto t2 = Realtime::MicroSeconds();

  total_time += t2 - t1;
  std::cout << "Total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    std::cout << "Group ID: " << group_id << std::endl;

    for (uint32_t j = 0; j < result.size(); ++j) {
      std::cout << "\tKey: " << result[j].key() << std::fixed
                << std::setprecision(3) << ", Score: " << result[j].score()
                << std::endl;
    }
  }

  // do linear search by p_keys test
  auto groupbyFuncLinear = [](uint64_t key) {
    uint32_t group_id = key % 10;

    return std::string("g_") + std::to_string(group_id);
  };

  auto linear_pk_ctx = streamer->create_context();

  linear_pk_ctx->set_group_params(group_num, group_topk);
  linear_pk_ctx->set_group_by(groupbyFuncLinear);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};

  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linear_pk_ctx));
  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();
  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);

  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {
    const std::string &group_id = linear_by_pkeys_group_result[i].group_id();
    auto &result = linear_by_pkeys_group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    std::cout << "Group ID: " << group_id << std::endl;

    for (uint32_t j = 0; j < result.size(); ++j) {
      std::cout << "\tKey: " << result[j].key() << std::fixed
                << std::setprecision(3) << ", Score: " << result[j].score()
                << std::endl;
    }

    ASSERT_EQ(10 - i, result[0].key());
  }
}

TEST_F(FlatStreamerTest, TestAddAndSearchWithID) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_NE(streamer, nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 20000U;
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i += 2) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }
  for (size_t i = 1; i < cnt; i += 2) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }
  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt; i += 100) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;
    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());
    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
#if 1
  printf(
      "knnTotalTime=%zu linearTotalTime=%zu totalHits=%d totalCnts=%d "
      "R@%zd=%f R@1=%f\n",
      (size_t)knnTotalTime, (size_t)linearTotalTime, totalHits, totalCnts, topk,
      recall, topk1Recall);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
}

TEST_F(FlatStreamerTest, TestAddAndSearchWithID2) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_NE(write_streamer, nullptr);

  Params write_params;
  Params write_stg_params;
  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, write_storage->init(write_stg_params));
  ASSERT_EQ(0, write_storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, write_params));
  ASSERT_EQ(0, write_streamer->open(write_storage));
  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 20000U;
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i += 2) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }
  for (size_t i = 1; i < cnt; i += 2) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();  //

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  Params read_params;
  read_params.set(PARAM_FLAT_USE_ID_MAP, false);
  Params read_stg_params;
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, read_storage->init(read_stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, read_params));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  auto linearCtx = read_streamer->create_context();
  auto knnCtx = read_streamer->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt; i += 100) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = Realtime::MicroSeconds();
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = Realtime::MicroSeconds();
    ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;
    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());
    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat_sparse/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_flat_sparse
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    )
endforeach()

================================================
FILE: tests/core/algorithm/flat_sparse/flat_sparse_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "flat_sparse/flat_sparse_builder.h"
#include <future>
#include <iostream>
#include <vector>
#include <gtest/gtest.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

class FlatSparseBuilderTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string FlatSparseBuilderTest::_dir("FlatSparseBuilderTest");
shared_ptr<IndexMeta> FlatSparseBuilderTest::_index_meta_ptr;

void FlatSparseBuilderTest::SetUp(void) {
  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  _index_meta_ptr->set_metric("InnerProductSparse", 0, Params());
}

void FlatSparseBuilderTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  system(cmdBuf);
}

TEST_F(FlatSparseBuilderTest, TestGeneral) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("FlatSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = make_shared<OnePassIndexSparseHolder<IndexMeta::DT_FP32>>();
  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  Params params;
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));

  ASSERT_EQ(0, builder->train(holder));

  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_EQ(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder2->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_EQ(stats.built_costtime(), 0UL);
}

TEST_F(FlatSparseBuilderTest, TestIndexThreads) {
  IndexBuilder::Pointer builder1 =
      IndexFactory::CreateBuilder("FlatSparseBuilder");
  ASSERT_NE(builder1, nullptr);
  IndexBuilder::Pointer builder2 =
      IndexFactory::CreateBuilder("FlatSparseBuilder");
  ASSERT_NE(builder2, nullptr);

  auto holder = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();

  size_t doc_cnt = 1000UL;
  uint32_t sparse_count = 32;

  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  Params params;
  std::srand(Realtime::MilliSeconds());
  auto threads =
      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);
  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));

  auto build_index1 = [&]() {
    ASSERT_EQ(0, builder1->train(threads, holder));
    ASSERT_EQ(0, builder1->build(threads, holder));
  };
  auto build_index2 = [&]() {
    ASSERT_EQ(0, builder2->train(threads, holder));
    ASSERT_EQ(0, builder2->build(threads, holder));
  };

  auto t1 = std::async(std::launch::async, build_index1);
  auto t2 = std::async(std::launch::async, build_index2);
  t1.wait();
  t2.wait();


  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndexThreads";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder1->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder2->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats1 = builder1->stats();
  ASSERT_EQ(doc_cnt, stats1.built_count());
  auto &stats2 = builder2->stats();
  ASSERT_EQ(doc_cnt, stats2.built_count());
}

TEST_F(FlatSparseBuilderTest, TestHalfFloatConverter) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("FlatSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder = make_shared<OnePassIndexSparseHolder<IndexMeta::DT_FP32>>();
  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  Params converter_params;
  auto converter = IndexFactory::CreateConverter("HalfFloatSparseConverter");
  converter->init(*_index_meta_ptr, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->sparse_result();

  Params params;
  ASSERT_EQ(0, builder->init(index_meta, converter_params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestHalFloatConverter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_EQ(stats.built_costtime(), 0UL);
  //ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 = make_shared<MultiPassIndexSparseHolder<IndexMeta::DT_FP32>>();
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder2->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_EQ(stats.built_costtime(), 0UL);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif


================================================
FILE: tests/core/algorithm/flat_sparse/flat_sparse_searcher_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <future>
#include <iostream>
#include <random>
#include <vector>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_meta.h"

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

constexpr size_t static sparse_dim_count = 16;

class FlatSparseSearcherTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatSparseSearcherTest::dir_("searcher_test/");
std::shared_ptr<IndexMeta> FlatSparseSearcherTest::index_meta_ptr_;

void FlatSparseSearcherTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count, &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}


void FlatSparseSearcherTest::SetUp(void) {
  IndexLoggerBroker::SetLevel(2);

  index_meta_ptr_.reset(new IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                      IndexMeta::DataType::DT_FP32));
  index_meta_ptr_->set_metric("InnerProductSparse", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatSparseSearcherTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatSparseSearcherTest, TestGeneral) {
  // init storage
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(storage != nullptr);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));


  // init streamer
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, Params());

  Params params;
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // generate sparse data
  size_t sparse_dim_count = 32;
  size_t cnt = 100U;
  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  // test add data
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  // test get data
  uint32_t sparse_count;
  std::string sparse_indices_buffer;
  std::string sparse_values_buffer;
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(
        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));
    ASSERT_EQ(sparse_dim_count, sparse_count);
    const uint32_t *sparse_indices_ptr =
        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
      // std::cout << "1: " << sparse_values_ptr[j]
      //           << " 2: " << sparse_vec_list[i][j] << std::endl;
    }

    // must clear ^_^
    sparse_indices_buffer.clear();
    sparse_values_buffer.clear();
  }

  // test dump
  auto path = dir_ + "/TestGeneral_dump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher get vector
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_TRUE(read_storage != nullptr);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  // test searcher get data
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(
        0, searcher->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));
    ASSERT_EQ(sparse_dim_count, sparse_count);
    const uint32_t *sparse_indices_ptr =
        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
      // std::cout << "1: " << sparse_values_ptr[j]
      //           << " 2: " << sparse_vec_list[i][j] << std::endl;
    }

    // must clear ^_^
    sparse_indices_buffer.clear();
    sparse_values_buffer.clear();
  }
}

TEST_F(FlatSparseSearcherTest, TestStreamerDump) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestStreamerDump.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 10000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  auto path = dir_ + "/TestStreamerDump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  for (size_t i = 0; i < cnt; i += 50) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    auto &linearResult = linearCtx->result();

    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(topk, knnResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      ASSERT_EQ(linearResult[k].key(), knnResult[k].key());
    }
  }

  printf("linear: %zu, knn: %zu\n", (size_t)linearTotalTime,
         (size_t)knnTotalTime);
}

TEST_F(FlatSparseSearcherTest, TestLoadClose) {
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(storage != nullptr);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));


  // init streamer
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, Params());

  Params params;
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // generate sparse data
  size_t sparse_dim_count = 32;
  size_t cnt = 100U;
  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  // test add data
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  // test get data
  uint32_t sparse_count;
  std::string sparse_indices_buffer;
  std::string sparse_values_buffer;
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(
        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));
    ASSERT_EQ(sparse_dim_count, sparse_count);
    const uint32_t *sparse_indices_ptr =
        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
      // std::cout << "1: " << sparse_values_ptr[j]
      //           << " 2: " << sparse_vec_list[i][j] << std::endl;
    }

    // must clear ^_^
    sparse_indices_buffer.clear();
    sparse_values_buffer.clear();
  }

  // test dump
  auto path = dir_ + "/TestGeneral_dump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher get vector
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_TRUE(read_storage != nullptr);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->init(Params()));

  uint32_t loop = 5;
  while (loop--) {
    std::cout << "loop: " << loop << std::endl;

    ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

    // test searcher get data
    for (size_t i = 0; i < cnt; i++) {
      ASSERT_EQ(0, searcher->get_sparse_vector(i, &sparse_count,
                                               &sparse_indices_buffer,
                                               &sparse_values_buffer));
      ASSERT_EQ(sparse_dim_count, sparse_count);
      const uint32_t *sparse_indices_ptr =
          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
      const float *sparse_values_ptr =
          reinterpret_cast<const float *>(sparse_values_buffer.data());
      for (size_t j = 0; j < sparse_count; ++j) {
        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
        // std::cout << "1: " << sparse_values_ptr[j]
        //           << " 2: " << sparse_vec_list[i][j] << std::endl;
      }

      // must clear ^_^
      sparse_indices_buffer.clear();
      sparse_values_buffer.clear();
    }

    ASSERT_EQ(searcher->unload(), 0);
  }
}

TEST_F(FlatSparseSearcherTest, TestSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  // test dump
  auto path = dir_ + "/TestGeneral_dump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher get vector
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_TRUE(read_storage != nullptr);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  size_t step = 50;
  for (size_t i = 0; i < cnt; i += step) {
    // std::cout << "search " << i << std::endl;
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }

    ctx->set_topk(1U);
    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());
    // std::cout << result1[0].key() << " " << result1[0].score() << std::endl;

    ctx->set_topk(3U);
    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(3UL, result2.size());
    for (size_t i = 0; i < 3UL; ++i) {
      // std::cout << result2[i].key() << " " << result2[i].score() <<
      // std::endl;
      ASSERT_EQ(i, result2[i].key());
    }
  }

  ctx->set_topk(100U);
  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  ASSERT_EQ(0, searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  for (size_t i = 0; i < 100; ++i) {
    ASSERT_EQ(i, result[i].key());
  }
}

TEST_F(FlatSparseSearcherTest, TestSearchPKeys) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchByKeys.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0].resize(cnt);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));

    p_keys[0][i] = i;
  }

  // test dump
  auto path = dir_ + "/TestGeneral_dump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher get vector
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_TRUE(read_storage != nullptr);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 50) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }
    ctx->set_topk(1U);
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());

    ctx->set_topk(topk);
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(0, result2[0].key());
    ASSERT_EQ(1, result2[1].key());
    ASSERT_EQ(2, result2[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 1.0f;
    }
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(1, result[1].key());
    ASSERT_EQ(10, result[10].key());
    ASSERT_EQ(20, result[20].key());
    ASSERT_EQ(30, result[30].key());
    ASSERT_EQ(35, result[35].key());
    ASSERT_EQ(99, result[99].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 10.0f;
    }

    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(3U, result.size());
    ASSERT_EQ(1, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(15, result[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 9.0f;
    }
    p_keys[0].clear();
    for (size_t j = 0; j < cnt; j += 10) {
      p_keys[0].push_back((uint64_t)j);
    }
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(100, result[10].key());
    ASSERT_EQ(200, result[20].key());
    ASSERT_EQ(300, result[30].key());
    ASSERT_EQ(350, result[35].key());
    ASSERT_EQ(990, result[99].key());
  }
}

TEST_F(FlatSparseSearcherTest, TestMultiThread) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  constexpr size_t static sparse_dim_count = 32;
  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnMultiThread", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i + baseKey;
      }

      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,
                                     sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);
  auto t1 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000U, t1.get());
  ASSERT_EQ(1000U, t2.get());
  ASSERT_EQ(1000U, t3.get());
  streamer->close();

  // checking data
  ASSERT_EQ(0, streamer->open(storage));
  auto provider = streamer->create_sparse_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;

  std::set<uint64_t> keys;

  while (iter->is_valid()) {
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_EQ((float)iter->key(), data[j]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    keys.insert(iter->key());
    iter->next();
  }

  ASSERT_EQ(3000, keys.size());

  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);

  // test dump
  auto path = dir_ + "/TestGeneral_dump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher get vector
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("FlatSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_TRUE(read_storage != nullptr);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->init(Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  // ====== multi thread search
  size_t topk = 10;
  size_t cnt = 3000;
  auto knnSearch = [&]() {
    auto linearCtx = searcher->create_context();
    auto linearByPkeysCtx = searcher->create_context();
    auto ctx = searcher->create_context();
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = ((float)i + 1.1f);
      }

      ASSERT_EQ(0,
                searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      ASSERT_EQ(
          0, searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};
      ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                       sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      // std::cout << "r1 top1: " << r1[0].key() << ", score: " << r1[0].score()
      //           << std::endl;
      ASSERT_EQ(cnt - 1, r1[0].key());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      // std::cout << "r2 top1: " << r2[0].key() << ", score: " << r2[0].score()
      //           << std::endl;
      ASSERT_EQ(cnt - 1, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_FLOAT_EQ(1.0f, totalHits * 1.0f / totalCnts);
  };

  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  auto s3 = std::async(std::launch::async, knnSearch);
  s1.wait();
  s2.wait();
  s3.wait();
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat_sparse/flat_sparse_streamer_buffer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>
#include <vector>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <algorithm/flat_sparse/flat_sparse_utility.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>

using namespace std;
using namespace testing;
using namespace zvec::ailego;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

namespace zvec {
namespace core {

class FlatSparseStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatSparseStreamerTest::dir_("FlatSparseStreamerTest/");
shared_ptr<IndexMeta> FlatSparseStreamerTest::index_meta_ptr_;

void FlatSparseStreamerTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,
                                           &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}

void FlatSparseStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  index_meta_ptr_->set_metric("InnerProductSparse", 0, ailego::Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatSparseStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatSparseStreamerTest, TestGeneral) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;

  ailego::Params stg_params;
  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, write_storage->init(stg_params));
  ASSERT_EQ(0, write_storage->open(dir_ + "/Test/FlatSparseSearch", true));
  ASSERT_EQ(0, write_streamer->init(index_meta, params));
  ASSERT_EQ(0, write_streamer->open(write_storage));

  size_t cnt = 20000U;
  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, write_streamer->add_impl(
                     i, sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, ctx));
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/FlatSparseSearch", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));

  auto linearCtx = read_streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = read_streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  for (size_t i = 0; i < cnt; i += 100) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(
        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, read_streamer->search_bf_impl(
                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),
                     qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  // float cost = linearTotalTime * 1.0f / knnTotalTime;

  std::cout << "knnTotalTime=" << knnTotalTime
            << " linearTotalTime=" << linearTotalTime << std::endl;

#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/flat_sparse/flat_sparse_streamer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <future>
#include <string>
#include <vector>
#include <ailego/math/norm2_matrix.h>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <algorithm/flat_sparse/flat_sparse_utility.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

constexpr static size_t sparse_dim_count = 16;

class FlatSparseStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string FlatSparseStreamerTest::dir_("streamer_test/");
std::shared_ptr<IndexMeta> FlatSparseStreamerTest::index_meta_ptr_;

void FlatSparseStreamerTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count, &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}


void FlatSparseStreamerTest::SetUp(void) {
  IndexLoggerBroker::SetLevel(2);

  index_meta_ptr_.reset(new IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                      IndexMeta::DataType::DT_FP32));
  index_meta_ptr_->set_metric("InnerProductSparse", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void FlatSparseStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(FlatSparseStreamerTest, TestGeneral) {
  // init storage
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(storage != nullptr);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));


  // init streamer
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, Params());

  Params params;
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // generate sparse data
  size_t sparse_dim_count = 32;
  size_t cnt = 100U;
  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  // test add data
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  // test get data
  uint32_t sparse_count;
  std::string sparse_indices_buffer;
  std::string sparse_values_buffer;
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(
        0, streamer->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));
    ASSERT_EQ(sparse_dim_count, sparse_count);
    const uint32_t *sparse_indices_ptr =
        reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
      // std::cout << "1: " << sparse_values_ptr[j]
      //           << " 2: " << sparse_vec_list[i][j] << std::endl;
    }

    // must clear ^_^
    sparse_indices_buffer.clear();
    sparse_values_buffer.clear();
  }
}

TEST_F(FlatSparseStreamerTest, TestLinearSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  for (size_t i = 0; i < cnt; i++) {
    // std::cout << "search " << i << std::endl;
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }

    ctx->set_topk(1U);
    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());
    // std::cout << result1[0].key() << " " << result1[0].score() << std::endl;

    ctx->set_topk(3U);
    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(3UL, result2.size());
    for (size_t i = 0; i < 3UL; ++i) {
      // std::cout << result2[i].key() << " " << result2[i].score() <<
      // std::endl;
      ASSERT_EQ(i, result2[i].key());
    }
  }

  ctx->set_topk(100U);
  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  for (size_t i = 0; i < 100; ++i) {
    ASSERT_EQ(i, result[i].key());
  }
}

TEST_F(FlatSparseStreamerTest, TestLinearSearchByKeys) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchByKeys.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0].resize(cnt);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));

    p_keys[0][i] = i;
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }
    ctx->set_topk(1U);
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());

    ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(0, result2[0].key());
    ASSERT_EQ(1, result2[1].key());
    ASSERT_EQ(2, result2[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 1.0f;
    }
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(1, result[1].key());
    ASSERT_EQ(10, result[10].key());
    ASSERT_EQ(20, result[20].key());
    ASSERT_EQ(30, result[30].key());
    ASSERT_EQ(35, result[35].key());
    ASSERT_EQ(99, result[99].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 10.0f;
    }

    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(3U, result.size());
    ASSERT_EQ(1, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(15, result[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 9.0f;
    }
    p_keys[0].clear();
    for (size_t j = 0; j < cnt; j += 10) {
      p_keys[0].push_back((uint64_t)j);
    }
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(100, result[10].key());
    ASSERT_EQ(200, result[20].key());
    ASSERT_EQ(300, result[30].key());
    ASSERT_EQ(350, result[35].key());
    ASSERT_EQ(990, result[99].key());
  }
}

TEST_F(FlatSparseStreamerTest, TestCreateIterator) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestCreateIterator", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      float *sparse_data = (float *)iter->sparse_data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < sparse_dim_count; ++d) {
        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
      }
      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);
    NumericalVector<float> sparse_velues1(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices1[j] = j * 20;
      sparse_velues1[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),
                                    sparse_velues1.data(), qmeta, ctx));
    checkIter(i + 1, streamer);
  }

  // check getVector
  auto provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }

  streamer->flush(0UL);
  streamer->close();
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestOpenAndClose) {
  constexpr size_t static sparse_dim_count = 2048;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, Params());
  Params params;
  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ASSERT_NE(nullptr, storage2);
  Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TessOpenAndClose1", true));
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TessOpenAndClose2", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  auto checkIter = [](size_t base, size_t total,
                      IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = base;
    size_t cnt = 0;
    while (iter->is_valid()) {
      float *sparse_data = (float *)iter->sparse_data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < sparse_dim_count; ++d) {
        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
      }
      iter->next();
      cur += 2;
      cnt++;
    }
    ASSERT_EQ(cnt, total);
  };

  size_t testCnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < testCnt; i += 2) {
    float v1 = (float)i;
    ASSERT_EQ(0, streamer->open(storage1));
    auto ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);

    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);
    NumericalVector<float> sparse_velues1(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices1[j] = j * 20;
      sparse_velues1[j] = v1;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),
                                    sparse_velues1.data(), qmeta, ctx));

    checkIter(0, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());

    float v2 = (float)(i + 1);
    NumericalVector<uint32_t> sparse_indices2(sparse_dim_count);
    NumericalVector<float> sparse_velues2(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices2[j] = j * 20;
      sparse_velues2[j] = v2;
    }

    ASSERT_EQ(0, streamer->open(storage2));
    ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    ASSERT_EQ(
        0, streamer->add_impl(i + 1, sparse_dim_count, sparse_indices2.data(),
                              sparse_velues2.data(), qmeta, ctx));
    checkIter(1, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());
  }

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer1->init(meta, params));
  ASSERT_EQ(0, streamer1->open(storage1));

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer2->init(meta, params));
  ASSERT_EQ(0, streamer2->open(storage2));

  checkIter(0, testCnt / 2, streamer1);
  checkIter(1, testCnt / 2, streamer2);
}

TEST_F(FlatSparseStreamerTest, TestNoInit) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  streamer->cleanup();
}

TEST_F(FlatSparseStreamerTest, TestForceFlush) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  stg_params.set("proxima.mmap_file.storage.copy_on_write", true);
  stg_params.set("proxima.mmap_file.storage.force_flush", true);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      ASSERT_EQ(cur, iter->key());
      const uint32_t sparse_count = iter->sparse_count();
      ASSERT_EQ(sparse_count, sparse_dim_count);

      const float *data = reinterpret_cast<const float *>(iter->sparse_data());
      for (size_t j = 0; j < sparse_dim_count; ++j) {
        ASSERT_FLOAT_EQ((float)cur, data[j]);
      }

      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  auto ctx = streamer->create_context();

  for (size_t i = 0; i < cnt; ++i) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  storage->close();

  storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestMultiThread) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  constexpr size_t static sparse_dim_count = 32;
  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnMultiThread", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i + baseKey;
      }

      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,
                                     sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);
  auto t1 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000U, t1.get());
  ASSERT_EQ(1000U, t2.get());
  ASSERT_EQ(1000U, t3.get());
  streamer->close();

  // checking data
  ASSERT_EQ(0, streamer->open(storage));
  auto provider = streamer->create_sparse_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;

  std::set<uint64_t> keys;

  while (iter->is_valid()) {
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    keys.insert(iter->key());
    iter->next();
  }

  ASSERT_EQ(3000, keys.size());

  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);

  // ====== multi thread search
  size_t topk = 10;
  size_t cnt = 3000;
  auto knnSearch = [&]() {
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = ((float)i + 1.1f);
      }

      ASSERT_EQ(0,
                streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      ASSERT_EQ(
          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                       sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(cnt - 1, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_FLOAT_EQ(1.0f, totalHits * 1.0f / totalCnts);
  };

  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  auto s3 = std::async(std::launch::async, knnSearch);
  s1.wait();
  s2.wait();
  s3.wait();
}

TEST_F(FlatSparseStreamerTest, TestConcurrentAddAndSearch) {
  constexpr size_t static sparse_dim_count = 32;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  // meta.set_metric("InnerProductSparse", 0, Params());
  meta.set_metric("SquaredEuclideanSparse", 0, Params());
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessConcurrentAddAndSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i + baseKey;
      }

      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,
                                     sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  auto knnSearch = [&]() {
    size_t topk = 100;
    size_t cnt = 3000;
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::DT_FP32);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = -((float)i + 1.1f);
      }

      ASSERT_EQ(0,
                streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      ASSERT_EQ(
          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                       sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(0, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE(totalHits * 1.0f / totalCnts > 0.8f);
  };

  auto t0 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000, t0.get());
  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  ASSERT_EQ(1000, t1.get());
  ASSERT_EQ(1000, t2.get());
  s1.wait();
  s2.wait();

  // checking data
  auto provider = streamer->create_sparse_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }

  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);
}

TEST_F(FlatSparseStreamerTest, TestFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestFilter", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 100UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(10U);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = (float)i + 1.0f;
    }

    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), qmeta, ctx);
    p_keys[0].push_back(i);
  }

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = -100.1;
  }
  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(10, results.size());
  ASSERT_EQ(0, results[0].key());
  ASSERT_EQ(1, results[1].key());
  ASSERT_EQ(2, results[2].key());

  auto filterFunc = [](uint64_t key) {
    if (key == 0UL || key == 3UL) {
      return true;
    }
    return false;
  };
  ctx->set_filter(filterFunc);

  // after set filter
  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  auto &results1 = ctx->result();
  ASSERT_EQ(10, results1.size());
  ASSERT_EQ(1, results1[0].key());
  ASSERT_EQ(2, results1[1].key());
  ASSERT_EQ(4, results1[2].key());

  // linear
  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto &results2 = ctx->result();
  ASSERT_EQ(10, results2.size());
  ASSERT_EQ(1, results2[0].key());
  ASSERT_EQ(2, results2[1].key());
  ASSERT_EQ(4, results2[2].key());

  // linear by p_keys
  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                   sparse_dim_count, sparse_indices.data(),
                   sparse_velues.data(), p_keys, qmeta, ctx));
  auto &results3 = ctx->result();
  ASSERT_EQ(10, results3.size());
  // for (int i = 0; i < 10; i++) {
  //   std::cout << "i: " << results3[i].key() << std::endl;
  // }

  ASSERT_EQ(1, results3[0].key());
  ASSERT_EQ(2, results3[1].key());
  ASSERT_EQ(4, results3[2].key());
}

TEST_F(FlatSparseStreamerTest, TestProvider) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestProvider.index", true));
  Params params;
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);

  //! prepare data
  size_t docs = 10000UL;
  srand(Realtime::MilliSeconds());
  std::vector<uint64_t> keys(docs);
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (true) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < docs; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](uint64_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }

  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < keys.size(); i++) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = keys[i];
    }

    for (size_t j = 0; j < sparse_dim_count; j++) {
      ASSERT_FLOAT_EQ(sparse_velues[j], keys[i]);
    }

    ASSERT_EQ(
        0, streamer->add_impl(keys[i], sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx));

    // std::cout << "i: " << i << " key: " << keys[i] << std::endl;
  }

  {
    // check streamer
    auto iter = streamer->create_sparse_provider()->create_iterator();
    size_t cnt = 0;
    while (iter->is_valid()) {
      auto key = iter->key();

      const uint32_t sparse_count = iter->sparse_count();
      ASSERT_EQ(sparse_count, sparse_dim_count);

      const float *data = reinterpret_cast<const float *>(iter->sparse_data());

      // std::cout << "cnt: " << cnt << " key: " << key
      //           << ", gt_key: " << keys[cnt] << std::endl;

      // for (size_t j = 0; j < sparse_count; ++j) {
      //   std::cout << "j: " << j << " data: " << data[j] << std::endl;
      // }

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        ASSERT_FLOAT_EQ((float)key, data[j]);
      }

      cnt++;
      iter->next();
    }
    ASSERT_EQ(cnt, docs);
  }

  // dump
  // auto path1 = dir_ + "/TestProvider";
  // auto dumper1 = IndexFactory::CreateDumper("FileDumper");
  // ASSERT_NE(dumper1, nullptr);
  // ASSERT_EQ(0, dumper1->create(path1));
  // ASSERT_EQ(0, streamer->dump(dumper1));
  // ASSERT_EQ(0, dumper1->close());
  // streamer->close();

  // // check dump index
  // IndexSparseSearcher::Pointer searcher =
  //     IndexFactory::CreateSparseSearcher("FlatSparseSearcher");
  // auto container = IndexFactory::CreateStorage("MMapFileContainer");
  // ASSERT_EQ(0, container->init(Params()));
  // ASSERT_EQ(0, container->load(path1));
  // ASSERT_NE(searcher, nullptr);
  // ASSERT_EQ(0, searcher->init(Params()));
  // ASSERT_EQ(0, searcher->load(container, IndexSparseMeasure::Pointer()));
  // auto iter = searcher->create_sparse_provider()->create_iterator();
  // size_t cnt = 0;
  // while (iter->is_valid()) {
  //   auto key = iter->key();

  //   const uint32_t sparse_count = iter->sparse_count();
  //   ASSERT_EQ(sparse_count, sparse_dim_count);

  //   const float *data = reinterpret_cast<const float *>(iter->sparse_data());
  //   for (size_t j = 0; j < sparse_dim_count; ++j) {
  //     ASSERT_FLOAT_EQ((float)key, data[j]);
  //   }

  //   cnt++;
  //   iter->next();
  // }
  // ASSERT_EQ(cnt, docs);

  // // check streamer
  // ASSERT_EQ(0, streamer->open(storage));
  // iter = streamer->create_sparse_provider()->create_iterator();
  // cnt = 0;
  // while (iter->is_valid()) {
  //   auto key = iter->key();

  //   const uint32_t sparse_count = iter->sparse_count();
  //   ASSERT_EQ(sparse_count, sparse_dim_count);

  //   const float *data = reinterpret_cast<const float *>(iter->sparse_data());
  //   for (size_t j = 0; j < sparse_dim_count; ++j) {
  //     ASSERT_FLOAT_EQ((float)key, data[j]);
  //   }

  //   cnt++;
  //   iter->next();
  // }
  // ASSERT_EQ(cnt, docs);

  // auto searcher_provider = searcher->create_sparse_provider();
  // auto streamer_provider = streamer->create_sparse_provider();
  // for (size_t i = 0; i < keys.size(); ++i) {
  //   {
  //     uint32_t sparse_count;
  //     std::string sparse_indices_buffer;
  //     std::string sparse_values_buffer;

  //     ASSERT_EQ(0, searcher_provider->get_sparse_vector(keys[i],
  //     &sparse_count,
  //                                                       &sparse_indices_buffer,
  //                                                       &sparse_values_buffer));

  //     const float *sparse_values_ptr =
  //         reinterpret_cast<const float *>(sparse_values_buffer.data());
  //     ASSERT_EQ(sparse_count, sparse_dim_count);
  //     for (size_t j = 0; j < sparse_count; ++j) {
  //       ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);
  //     }
  //   }

  //   {
  //     uint32_t sparse_count;
  //     std::string sparse_indices_buffer;
  //     std::string sparse_values_buffer;
  //     ASSERT_EQ(0, streamer_provider->get_sparse_vector(keys[i],
  //     &sparse_count,
  //                                                       &sparse_indices_buffer,
  //                                                       &sparse_values_buffer));

  //     const float *sparse_values_ptr =
  //         reinterpret_cast<const float *>(sparse_values_buffer.data());
  //     ASSERT_EQ(sparse_count, sparse_dim_count);
  //     for (size_t j = 0; j < sparse_count; ++j) {
  //       ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);
  //     }
  //   }
  // }

  // ASSERT_EQ(index_meta_ptr_->type(), streamer_provider->vector_type());
}

TEST_F(FlatSparseStreamerTest, TestParamsMaxDocCount) {
  // init storage
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(storage != nullptr);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));


  // init streamer
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, Params());

  Params params;
  uint32_t max_doc_count = 100U;
  params.set(PARAM_FLAT_SPARSE_STREAMER_MAX_DOC_CNT, max_doc_count);
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // generate sparse data
  size_t sparse_dim_count = 32;
  size_t cnt = max_doc_count * 2;
  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  // test add data
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    auto ret =
        streamer->add_impl(i, sparse_dim_count, sparse_indices_list[i].data(),
                           sparse_vec_list[i].data(), qmeta, ctx);
    if (i < max_doc_count) {
      ASSERT_EQ(0, ret);
    } else {
      ASSERT_EQ(IndexError_IndexFull, ret);
    }
  }

  // test get data
  uint32_t sparse_count;
  std::string sparse_indices_buffer;
  std::string sparse_values_buffer;
  for (size_t i = 0; i < cnt; i++) {
    auto ret = streamer->get_sparse_vector(
        i, &sparse_count, &sparse_indices_buffer, &sparse_values_buffer);
    if (i < max_doc_count) {
      ASSERT_EQ(ret, 0);
      ASSERT_EQ(0, streamer->get_sparse_vector(i, &sparse_count,
                                               &sparse_indices_buffer,
                                               &sparse_values_buffer));
      ASSERT_EQ(sparse_dim_count, sparse_count);
      const uint32_t *sparse_indices_ptr =
          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
      const float *sparse_values_ptr =
          reinterpret_cast<const float *>(sparse_values_buffer.data());
      for (size_t j = 0; j < sparse_count; ++j) {
        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
        // std::cout << "1: " << sparse_values_ptr[j]
        //           << " 2: " << sparse_vec_list[i][j] << std::endl;
      }

      // must clear ^_^
      sparse_indices_buffer.clear();
      sparse_values_buffer.clear();
    } else {
      ASSERT_EQ(ret, IndexError_NoExist);
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestParamsDataChunkSize) {
  // init storage
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(storage != nullptr);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));


  // init streamer
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, Params());

  Params params;
  uint32_t data_chunk_size = 1024 * 1024;
  uint32_t max_data_chunk_cnt = 1;
  params.set(PARAM_FLAT_SPARSE_STREAMER_DATA_CHUNK_SIZE, data_chunk_size);
  params.set(PARAM_FLAT_SPARSE_STREAMER_MAX_DATA_CHUNK_CNT, max_data_chunk_cnt);
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // generate sparse data
  size_t sparse_dim_count = 128;
  size_t cnt = 2000;
  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  // test add data
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  uint32_t insert_fail_idx = -1U;
  for (size_t i = 0; i < cnt; i++) {
    auto ret =
        streamer->add_impl(i, sparse_dim_count, sparse_indices_list[i].data(),
                           sparse_vec_list[i].data(), qmeta, ctx);
    if (insert_fail_idx != -1U) {
      ASSERT_EQ(ret, IndexError_IndexFull);
    }
    if (ret != 0 && insert_fail_idx == -1U) {
      insert_fail_idx = i;
    }
  }

  // test get data
  uint32_t sparse_count;
  std::string sparse_indices_buffer;
  std::string sparse_values_buffer;
  for (size_t i = 0; i < cnt; i++) {
    auto ret = streamer->get_sparse_vector(
        i, &sparse_count, &sparse_indices_buffer, &sparse_values_buffer);
    if (i < insert_fail_idx) {
      ASSERT_EQ(ret, 0);
      ASSERT_EQ(0, streamer->get_sparse_vector(i, &sparse_count,
                                               &sparse_indices_buffer,
                                               &sparse_values_buffer));
      ASSERT_EQ(sparse_dim_count, sparse_count);
      const uint32_t *sparse_indices_ptr =
          reinterpret_cast<const uint32_t *>(sparse_indices_buffer.data());
      const float *sparse_values_ptr =
          reinterpret_cast<const float *>(sparse_values_buffer.data());
      for (size_t j = 0; j < sparse_count; ++j) {
        ASSERT_EQ(sparse_indices_ptr[j], sparse_indices_list[i][j]);
        ASSERT_FLOAT_EQ(sparse_values_ptr[j], sparse_vec_list[i][j]);
        // std::cout << "1: " << sparse_values_ptr[j]
        //           << " 2: " << sparse_vec_list[i][j] << std::endl;
      }

      // must clear ^_^
      sparse_indices_buffer.clear();
      sparse_values_buffer.clear();
    } else {
      ASSERT_EQ(ret, IndexError_NoExist);
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestSharedContext) {
  auto create_streamer = [](std::string path) {
    IndexStreamer::Pointer streamer =
        IndexFactory::CreateStreamer("FlatSparseStreamer");
    auto storage = IndexFactory::CreateStorage("MMapFileStorage");
    Params stg_params;
    storage->init(stg_params);
    storage->open(path, true);
    Params params;
    streamer->init(*index_meta_ptr_, params);
    streamer->open(storage);
    return streamer;
  };
  auto streamer1 = create_streamer(dir_ + "TestSharedContext.index1");
  auto streamer2 = create_streamer(dir_ + "TestSharedContext.index2");
  auto streamer3 = create_streamer(dir_ + "TestSharedContext.index3");

  srand(Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);
  auto do_test = [&](int start) {
    auto code = rand() % 3;
    IndexStreamer::Context::Pointer ctx;
    switch (code) {
      case 0:
        ctx = streamer1->create_context();
        break;
      case 1:
        ctx = streamer2->create_context();
        break;
      case 2:
        ctx = streamer3->create_context();
        break;
    };
    ctx->set_topk(1);
    uint64_t key1 = start + 0;
    uint64_t key2 = start + 1;
    uint64_t key3 = start + 2;

    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);
    NumericalVector<float> query_sparse_velues(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      query_sparse_indices[j] = j * 20;
      query_sparse_velues[j] = 1.1f;
    }

    for (int i = 0; i < 1000; ++i) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = rand();
      }

      int ret = 0;
      auto code = rand() % 3;
      switch (code) {
        case 0:
          streamer1->add_impl(key1, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key1 += 3;
          ret = streamer1->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
        case 1:
          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key2 += 3;
          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key2 += 3;
          ret = streamer2->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
        case 2:
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          ret = streamer3->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
      }
      EXPECT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(1, results.size());
      EXPECT_EQ(code, results[0].key() % 3);
    }
  };

  auto t1 = std::async(std::launch::async, do_test, 0);
  auto t2 = std::async(std::launch::async, do_test, 30000000);
  t1.wait();
  t2.wait();
}

TEST_F(FlatSparseStreamerTest, TestGroupBy) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;
    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;
    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_topk = 200;
  size_t group_num = 5;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  std::vector<std::string> expect_group_ids = {
      "g_0", "g_1", "g_2", "g_3", "g_4", "g_5", "g_6", "g_7", "g_8", "g_9"};

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  auto t1 = Monotime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto t2 = Monotime::MicroSeconds();
  std::cout << "Search time: " << (t2 - t1) << " us" << std::endl;

  auto &group_result = ctx->group_result();
  ASSERT_EQ(group_num, group_result.size());
  for (uint32_t i = 0; i < group_result.size(); ++i) {
    const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();
    std::cout << "Group ID: " << group_id << std::endl;

    ASSERT_EQ(group_id, expect_group_ids[i]);

    ASSERT_GE(result.size(), group_topk);

    for (uint32_t j = 0; j < result.size(); ++j) {
      ASSERT_EQ(result[j].key() / 10 % 10, i);
      // std::cout << "\tKey: " << result[j].key() << std::fixed
      //           << std::setprecision(3) << ", Score: " << result[j].score()
      //           << std::endl;
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestGroupByNotEnoughNum) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;
    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;
    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_topk = 200;
  size_t group_num = 12;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  std::vector<std::string> expect_group_ids = {
      "g_0", "g_1", "g_2", "g_3", "g_4", "g_5", "g_6", "g_7", "g_8", "g_9"};

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  auto t1 = Monotime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto t2 = Monotime::MicroSeconds();
  std::cout << "Search time: " << (t2 - t1) << " us" << std::endl;

  auto &group_result = ctx->group_result();
  ASSERT_EQ(10, group_result.size());
  for (uint32_t i = 0; i < group_result.size(); ++i) {
    const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();
    std::cout << "Group ID: " << group_id << std::endl;

    ASSERT_EQ(group_id, expect_group_ids[i]);

    ASSERT_GE(result.size(), group_topk);

    for (uint32_t j = 0; j < result.size(); ++j) {
      ASSERT_EQ(result[j].key() / 10 % 10, i);
      // std::cout << "\tKey: " << result[j].key() << std::fixed
      //           << std::setprecision(3) << ", Score: " << result[j].score()
      //           << std::endl;
    }
  }
}

TEST_F(FlatSparseStreamerTest, TestAddAndSearchWithID) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("FlatSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  Params params;
  Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  constexpr size_t cnt = 1000U;
  constexpr size_t sparse_dim_count = cnt;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i += 2) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j;
      sparse_vec[j] = (i == j ? 1.0f : 0.0f);
    }
    streamer->add_with_id_impl(i, sparse_dim_count, sparse_indices.data(),
                               sparse_vec.data(), qmeta, ctx);
  }
  for (size_t i = 1; i < cnt; i += 2) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j;
      sparse_vec[j] = (i == j ? 1.0f : 0.0f);
    }
    streamer->add_with_id_impl(i, sparse_dim_count, sparse_indices.data(),
                               sparse_vec.data(), qmeta, ctx);
  }
  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt; i += 100) {
    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);
    NumericalVector<float> query_sparse_velues(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      query_sparse_indices[j] = j;
      query_sparse_velues[j] = (i == j ? 1.1f : 0.0f);
    }
    auto t1 = Realtime::MicroSeconds();
    ASSERT_EQ(
        0, streamer->search_impl(sparse_dim_count, query_sparse_indices.data(),
                                 query_sparse_velues.data(), qmeta, knnCtx));
    auto t2 = Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(
                     sparse_dim_count, query_sparse_indices.data(),
                     query_sparse_velues.data(), qmeta, linearCtx));
    auto t3 = Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;
    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());
    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw core_knn_flat
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/hnsw
    )
endforeach()

================================================
FILE: tests/core/algorithm/hnsw/hnsw_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_builder.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_framework.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static dim = 16;

class HnswBuilderTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswBuilderTest::_dir("hnswBuilderTest");
shared_ptr<IndexMeta> HnswBuilderTest::_index_meta_ptr;

void HnswBuilderTest::SetUp(void) {
  _index_meta_ptr.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
}

void HnswBuilderTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  system(cmdBuf);
}

TEST_F(HnswBuilderTest, TestGeneral) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  // params.set("proxima.hnsw.builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));

  ASSERT_EQ(0, builder->train(holder));

  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder2->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswBuilderTest, TestMemquota) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  params.set("proxima.hnsw.builder.memory_quota", 100000UL);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));
}

TEST_F(HnswBuilderTest, TestIndexThreads) {
  IndexBuilder::Pointer builder1 = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder1, nullptr);
  IndexBuilder::Pointer builder2 = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder2, nullptr);

  auto holder =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  std::srand(ailego::Realtime::MilliSeconds());
  auto threads =
      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);
  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));

  auto build_index1 = [&]() {
    ASSERT_EQ(0, builder1->train(threads, holder));
    ASSERT_EQ(0, builder1->build(threads, holder));
  };
  auto build_index2 = [&]() {
    ASSERT_EQ(0, builder2->train(threads, holder));
    ASSERT_EQ(0, builder2->build(threads, holder));
  };

  auto t1 = std::async(std::launch::async, build_index1);
  auto t2 = std::async(std::launch::async, build_index2);
  t1.wait();
  t2.wait();


  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndexThreads";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder1->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder2->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats1 = builder1->stats();
  ASSERT_EQ(doc_cnt, stats1.built_count());
  auto &stats2 = builder2->stats();
  ASSERT_EQ(doc_cnt, stats2.built_count());
}

TEST_F(HnswBuilderTest, TestCosine) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();

  ailego::Params params;
  // params.set("proxima.hnsw.builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(index_meta, params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCosine";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder2->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswBuilderTest, TestCosineFp16Converter) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp16Converter");

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();

  ailego::Params params;

  // params.set("proxima.hnsw.builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(index_meta, params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCosineFp16Converter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder2->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswBuilderTest, TestCosineInt8Converter) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt8Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();

  ailego::Params params;
  // params.set("proxima.hnsw.builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(index_meta, params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCosineInt8Converter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder2->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswBuilderTest, TestCosineInt4Converter) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt4Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();

  ailego::Params params;
  // params.set("proxima.hnsw.builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(index_meta, params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCosineInt4Converter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder2->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw/hnsw_searcher_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <cstdio>
#include <future>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_builder.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_meta.h"
#include "hnsw_params.h"

using namespace std;
using namespace testing;
using namespace zvec::ailego;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

namespace zvec {
namespace core {

constexpr size_t static dim = 16;

class HnswSearcherTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswSearcherTest::_dir("HnswSearcherTest/");
shared_ptr<IndexMeta> HnswSearcherTest::_index_meta_ptr;

void HnswSearcherTest::SetUp(void) {
  _index_meta_ptr.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
}

void HnswSearcherTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  system(cmdBuf);
}

TEST_F(HnswSearcherTest, TestRnnSearch) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestRnnSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 0.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 50;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = results[topk / 2].score();
  ctx->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswSearcherTest, TestRnnSearchInnerProduct) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("InnerProduct", 0, ailego::Params());

  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestRnnSearchInnerProduct";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 50;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = -results[topk / 2].score();
  ctx->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(-radius, results[topk - 1].score());
}

TEST_F(HnswSearcherTest, TestRnnSearchCosine) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();

  ASSERT_EQ(0, builder->init(index_meta, ailego::Params()));
  ASSERT_EQ(0, builder->train(converted_holder));
  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestRnnSearchCosine";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  std::string new_query;
  IndexQueryMeta new_meta;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

  size_t topk = 50;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = 0.5f;
  ctx->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswSearcherTest, TestRnnSearchMipsSquaredEuclidean) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("MipsSquaredEuclidean", 0, ailego::Params());

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestStreamerDump.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t doc_cnt = 1000UL;
  auto streamer_ctx = streamer->create_context();
  ASSERT_TRUE(!!streamer_ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    streamer->add_impl(i, vec.data(), qmeta, streamer_ctx);
  }

  {
    // Test Reset Threshold
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = 1.0;
    }

    size_t topk = 50;
    streamer_ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));
    auto &results = streamer_ctx->result();
    ASSERT_EQ(topk, results.size());

    float radius = -results[topk / 2].score();
    streamer_ctx->set_threshold(radius);
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));
    ASSERT_GT(topk, results.size());
    for (size_t k = 0; k < results.size(); ++k) {
      ASSERT_GE(radius, results[k].score());
    }

    streamer_ctx->reset_threshold();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, streamer_ctx));
    ASSERT_EQ(topk, results.size());
    ASSERT_LT(-radius, results[topk - 1].score());
  }

  auto path = _dir + "/TestStreamerDump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));

  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto searcher_ctx = searcher->create_context();
  ASSERT_TRUE(!!searcher_ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }

  {
    size_t topk = 50;
    searcher_ctx->set_topk(topk);
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));
    auto &results = searcher_ctx->result();
    ASSERT_EQ(topk, results.size());

    float radius = -results[topk / 2].score();
    searcher_ctx->set_threshold(radius);
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));
    ASSERT_GT(topk, results.size());
    for (size_t k = 0; k < results.size(); ++k) {
      ASSERT_GE(radius, results[k].score());
    }

    // Test Reset Threshold
    searcher_ctx->reset_threshold();
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, searcher_ctx));
    ASSERT_EQ(topk, results.size());
    ASSERT_LT(-radius, results[topk - 1].score());
  }
}

TEST_F(HnswSearcherTest, TestGeneral) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  ailego::Params params;
  // params.set("proxima.hnsw.builder.max_neighbor_count", 16);
  params.set("proxima.hnsw.builder.scaling_factor", 16);
  params.set("proxima.hnsw.builder.ef_construction", 10);
  params.set("proxima.hnsw.builder.thread_count", 2);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 1);
  ASSERT_EQ(0, searcher->init(searcherParams));


  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!linearByPKeysCtx);
  ASSERT_TRUE(!!knnCtx);
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 200;
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  linearCtx->set_topk(topk);
  linearByPKeysCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  // do linear search test
  {
    std::vector<float> query(dim);
    for (size_t i = 0; i < dim; ++i) {
      query[i] = 3.1f;
    }
    ASSERT_EQ(0, searcher->search_bf_impl(query.data(), qmeta, linearCtx));
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(3UL, linearResult[0].key());
    ASSERT_EQ(4UL, linearResult[1].key());
    ASSERT_EQ(2UL, linearResult[2].key());
    ASSERT_EQ(5UL, linearResult[3].key());
    ASSERT_EQ(1UL, linearResult[4].key());
    ASSERT_EQ(6UL, linearResult[5].key());
    ASSERT_EQ(0UL, linearResult[6].key());
    ASSERT_EQ(7UL, linearResult[7].key());
    for (size_t i = 8; i < topk; ++i) {
      ASSERT_EQ(i, linearResult[i].key());
    }
  }

  // do linear search by p_keys test
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0] = {8, 9, 10, 11, 3, 2, 1, 0};
  {
    std::vector<float> query(dim);
    for (size_t i = 0; i < dim; ++i) {
      query[i] = 3.1f;
    }
    ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(query.data(), p_keys, qmeta,
                                                    linearByPKeysCtx));
    auto &linearByPKeysResult = linearByPKeysCtx->result();
    ASSERT_EQ(8, linearByPKeysResult.size());
    ASSERT_EQ(3UL, linearByPKeysResult[0].key());
    ASSERT_EQ(2UL, linearByPKeysResult[1].key());
    ASSERT_EQ(1UL, linearByPKeysResult[2].key());
    ASSERT_EQ(0UL, linearByPKeysResult[3].key());
    ASSERT_EQ(8UL, linearByPKeysResult[4].key());
    ASSERT_EQ(9UL, linearByPKeysResult[5].key());
    ASSERT_EQ(10UL, linearByPKeysResult[6].key());
    ASSERT_EQ(11UL, linearByPKeysResult[7].key());
  }

  size_t step = 50;
  for (size_t i = 0; i < doc_cnt; i += step) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // TODO: check
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.90f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSearcherTest, TestClearAndReload) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  ailego::Params params;
  params.set("proxima.hnsw.builder.thread_count", 3);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.check_crc_enable", true);
  searcherParams.set("proxima.hnsw.searcher.max_scan_ratio",
                     1.1f);  // including upper layer
  ASSERT_EQ(0, searcher->init(searcherParams));


  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!knnCtx);
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 100;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
  auto &knnResult = knnCtx->result();
  ASSERT_EQ(topk, knnResult.size());
  auto &linearResult = linearCtx->result();
  ASSERT_EQ(topk, linearResult.size());
  auto &stats = searcher->stats();
  ASSERT_EQ(doc_cnt, stats.loaded_count());
  // ASSERT_GT(stats.loaded_costtime(), 0UL);

  //! cleanup
  ASSERT_EQ(0, searcher->cleanup());
  ASSERT_EQ(nullptr, searcher->create_context());
  ASSERT_EQ(IndexError_Runtime,
            searcher->load(storage, IndexMetric::Pointer()));
  ASSERT_EQ(0UL, stats.loaded_count());

  ASSERT_EQ(0, searcher->init(searcherParams));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  linearCtx = searcher->create_context();
  knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!knnCtx);
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
  auto &knnResult1 = knnCtx->result();
  ASSERT_EQ(topk, knnResult1.size());
  auto &linearResult1 = linearCtx->result();
  ASSERT_EQ(topk, linearResult1.size());
  ASSERT_EQ(doc_cnt, stats.loaded_count());

  //! unload
  ASSERT_EQ(0, searcher->unload());
  ASSERT_EQ(nullptr, searcher->create_context());
  ASSERT_EQ(0UL, stats.loaded_count());
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  linearCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  linearCtx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
  auto &linearResult2 = linearCtx->result();
  ASSERT_EQ(topk, linearResult2.size());
  ASSERT_EQ(doc_cnt, stats.loaded_count());
}

TEST_F(HnswSearcherTest, TestFilter) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 100UL;
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
    p_keys[0].push_back(i);
  }
  ailego::Params params;
  params.set("proxima.hnsw.builder.thread_count", 3);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.check_crc_enable", true);
  searcherParams.set("proxima.hnsw.searcher.max_scan_ratio", 1.0f);
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!linearByPKeysCtx);
  ASSERT_TRUE(!!knnCtx);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 10;
  linearCtx->set_topk(topk);
  linearByPKeysCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linearByPKeysCtx));

  auto filterFunc = [](uint64_t key) {
    if (key == 10UL || key == 11UL) {
      return true;
    }
    return false;
  };
  auto &knnResult = knnCtx->result();
  ASSERT_EQ(topk, knnResult.size());
  ASSERT_EQ(10UL, knnResult[0].key());
  ASSERT_EQ(11UL, knnResult[1].key());
  ASSERT_EQ(9UL, knnResult[2].key());

  auto &linearResult = linearCtx->result();
  ASSERT_EQ(topk, linearResult.size());
  ASSERT_EQ(10UL, linearResult[0].key());
  ASSERT_EQ(11UL, linearResult[1].key());
  ASSERT_EQ(9UL, linearResult[2].key());

  auto &linearByPKeysResult = linearByPKeysCtx->result();
  ASSERT_EQ(topk, linearByPKeysResult.size());
  ASSERT_EQ(10UL, linearByPKeysResult[0].key());
  ASSERT_EQ(11UL, linearByPKeysResult[1].key());
  ASSERT_EQ(9UL, linearByPKeysResult[2].key());

  knnCtx->set_filter(filterFunc);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
  auto &knnResult1 = knnCtx->result();
  ASSERT_EQ(topk, knnResult1.size());
  ASSERT_EQ(9UL, knnResult1[0].key());
  ASSERT_EQ(12UL, knnResult1[1].key());
  ASSERT_EQ(8UL, knnResult1[2].key());

  linearCtx->set_filter(filterFunc);
  ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
  auto &linearResult1 = linearCtx->result();
  ASSERT_EQ(topk, linearResult1.size());
  ASSERT_EQ(9UL, linearResult1[0].key());
  ASSERT_EQ(12UL, linearResult1[1].key());
  ASSERT_EQ(8UL, linearResult1[2].key());

  linearByPKeysCtx->set_filter(filterFunc);
  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linearByPKeysCtx));
  auto &linearByPKeysResult1 = linearByPKeysCtx->result();
  ASSERT_EQ(topk, linearByPKeysResult1.size());
  ASSERT_EQ(9UL, linearByPKeysResult1[0].key());
  ASSERT_EQ(12UL, linearByPKeysResult1[1].key());
  ASSERT_EQ(8UL, linearByPKeysResult1[2].key());
}

TEST_F(HnswSearcherTest, TestStreamerDump) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestStreamerDump.index", true));
  ASSERT_EQ(0, streamer->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  auto path = _dir + "/TestStreamerDump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  size_t step = 50;
  for (size_t i = 0; i < cnt; i += step) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSearcherTest, TestSharedContext) {
  auto gen_holder = [](int start, size_t doc_cnt) {
    auto holder =
        make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
    uint64_t key = start;
    for (size_t i = 0; i < doc_cnt; i++) {
      NumericalVector<float> vec(dim);
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i;
      }
      key += 3;
      holder->emplace(key, vec);
    }
    return holder;
  };
  auto gen_index = [&gen_holder](int start, size_t docs, std::string path) {
    auto holder = gen_holder(start, docs);
    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
    ailego::Params params;
    builder->init(*_index_meta_ptr, params);
    builder->train(holder);
    builder->build(holder);
    auto dumper = IndexFactory::CreateDumper("FileDumper");
    dumper->create(path);
    builder->dump(dumper);
    dumper->close();

    IndexSearcher::Pointer searcher =
        IndexFactory::CreateSearcher("HnswSearcher");
    auto name = rand() % 2 ? "FileReadStorage" : "MMapFileReadStorage";
    auto storage = IndexFactory::CreateStorage(name);
    storage->open(path, false);
    params.set("proxima.hnsw.searcher.visit_bloomfilter_enable", rand() % 2);
    searcher->init(ailego::Params());
    searcher->load(storage, IndexMetric::Pointer());
    return searcher;
  };

  srand(ailego::Realtime::MilliSeconds());
  size_t docs1 = rand() % 500 + 100;
  size_t docs2 = rand() % 5000 + 100;
  size_t docs3 = rand() % 50000 + 100;
  auto path1 = _dir + "/TestSharedContext.index1";
  auto path2 = _dir + "/TestSharedContext.index2";
  auto path3 = _dir + "/TestSharedContext.index3";
  auto searcher1 = gen_index(0, docs1, path1);
  auto searcher2 = gen_index(1, docs2, path2);
  auto searcher3 = gen_index(2, docs3, path3);

  srand(ailego::Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto do_test = [&]() {
    IndexSearcher::Context::Pointer ctx;
    switch (rand() % 3) {
      case 0:
        ctx = searcher1->create_context();
        break;
      case 1:
        ctx = searcher2->create_context();
        break;
      case 2:
        ctx = searcher3->create_context();
        break;
    }
    ctx->set_topk(10);

    int ret = 0;
    for (int i = 0; i < 100; ++i) {
      NumericalVector<float> query(dim);
      for (size_t j = 0; j < dim; ++j) {
        query[j] = i + 0.1f;
      }

      auto code = rand() % 6;
      switch (code) {
        case 0:
          ret = searcher1->search_impl(query.data(), qmeta, ctx);
          break;
        case 1:
          ret = searcher2->search_impl(query.data(), qmeta, ctx);
          break;
        case 2:
          ret = searcher3->search_impl(query.data(), qmeta, ctx);
          break;
        case 3:
          ret = searcher1->search_bf_impl(query.data(), qmeta, ctx);
          break;
        case 4:
          ret = searcher2->search_bf_impl(query.data(), qmeta, ctx);
          break;
        case 5:
          ret = searcher3->search_bf_impl(query.data(), qmeta, ctx);
          break;
      }

      EXPECT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(10, results.size());
      for (int k = 0; k < 10; ++k) {
        EXPECT_EQ(code % 3, results[k].key() % 3);
      }
    }
  };
  auto t1 = std::async(std::launch::async, do_test);
  auto t2 = std::async(std::launch::async, do_test);
  t1.wait();
  t2.wait();

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  storage->init(ailego::Params());
  storage->open(_dir + "/TestSharedContext.index4", true);
  streamer->init(*_index_meta_ptr, ailego::Params());
  streamer->open(storage);
  NumericalVector<float> query(dim);
  auto ctx1 = streamer->create_context();
  EXPECT_EQ(IndexError_Unsupported,
            searcher1->search_impl(query.data(), qmeta, ctx1));

  auto ctx2 = searcher1->create_context();
  EXPECT_EQ(IndexError_Unsupported,
            streamer->search_impl(query.data(), qmeta, ctx2));
}

TEST_F(HnswSearcherTest, TestProvider) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  std::vector<key_t> keys(doc_cnt);
  srand(ailego::Realtime::MilliSeconds());
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<uint16_t> dt(
        0, std::numeric_limits<uint16_t>::max());
    for (size_t i = 0; i < doc_cnt; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = ailego::Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = keys[i];
    }
    ASSERT_TRUE(holder->emplace(keys[i], vec));
  }
  ailego::Params params;
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestProvider";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 1);
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto provider = searcher->create_provider();
  for (size_t i = 0; i < keys.size(); ++i) {
    const float *d1 =
        reinterpret_cast<const float *>(provider->get_vector(keys[i]));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d1[j], keys[i]);
    }
  }

  auto iter = provider->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, doc_cnt);

  ASSERT_EQ(dim, provider->dimension());
  ASSERT_EQ(_index_meta_ptr->element_size(), provider->element_size());
  ASSERT_EQ(_index_meta_ptr->data_type(), provider->data_type());
}

TEST_F(HnswSearcherTest, TestMipsEuclideanMetric) {
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("MipsSquaredEuclidean", 0, ailego::Params());
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  const size_t COUNT = 10000UL;
  for (size_t i = 0; i < COUNT; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 100.0f;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(meta, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestMipsEuclideanMetric";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ailego::Params params;
  params.set("proxima.hnsw.searcher.ef", 10);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(params));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 50;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  EXPECT_EQ(results.size(), topk);
  EXPECT_NEAR((uint64_t)(COUNT - 1), results[0].key(), 20);
}

TEST_F(HnswSearcherTest, TestRandomPaddingTopk) {
  std::mt19937 mt{};
  std::uniform_real_distribution<float> gen(0.0f, 1.0f);
  constexpr size_t static dim = 8;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  const size_t COUNT = 10000UL;
  for (size_t i = 0; i < COUNT; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = gen(mt);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  ASSERT_EQ(0, builder->init(meta, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestRandomPadding";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ailego::Params params;
  params.set("proxima.hnsw.searcher.force_padding_result_enable", true);
  params.set("proxima.hnsw.searcher.scan_ratio", 0.01f);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(params));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  std::uniform_int_distribution<uint32_t> gen_int(1, COUNT);
  size_t topk = gen_int(mt);
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  EXPECT_EQ(results.size(), topk);
  for (size_t i = 0; i < results.size(); ++i) {
    for (size_t j = 0; j < i; ++j) {
      EXPECT_NE(results[i].key(), results[j].key());
    }
  }

  ctx->set_filter([](uint64_t key) { return true; });
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results1 = ctx->result();
  EXPECT_EQ(results1.size(), 0);
}


TEST_F(HnswSearcherTest, TestBruteForceSetupInContext) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  // params.set("proxima.hnsw.builder.max_neighbor_count", 16);
  params.set("proxima.hnsw.builder.scaling_factor", 16);
  params.set("proxima.hnsw.builder.ef_construction", 10);
  params.set("proxima.hnsw.builder.thread_count", 2);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 1);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 200;
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  bool set_bf_threshold = false;
  bool use_update = false;

  size_t step = 50;
  for (size_t i = 0; i < doc_cnt; i += step) {
    auto linearCtx = searcher->create_context();
    auto knnCtx = searcher->create_context();

    ASSERT_TRUE(!!linearCtx);
    ASSERT_TRUE(!!linearCtx);

    linearCtx->set_topk(topk);
    knnCtx->set_topk(topk);

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      if (use_update) {
        ailego::Params searcherParamsExtra;

        searcherParamsExtra.set("proxima.hnsw.searcher.brute_force_threshold",
                                doc_cnt);
        knnCtx->update(searcherParamsExtra);
      } else {
        knnCtx->set_bruteforce_threshold(doc_cnt);
      }

      use_update = !use_update;
    }
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
    // auto t3 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      linearTotalTime += t2 - t1;
    } else {
      knnTotalTime += t2 - t1;
    }

    set_bf_threshold = !set_bf_threshold;

    auto &knnResult = knnCtx->result();
    // TODO: check
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.90f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSearcherTest, TestCosine) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestCosine.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;
    IndexQueryMeta new_meta;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  auto path = _dir + "/TestCosine";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();

  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!linearByPKeysCtx);
  ASSERT_TRUE(!!knnCtx);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  NumericalVector<float> qvec(dim);
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        qvec[j] = fixed_value;
      else
        qvec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0,
              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }

  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / query_cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;

  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.90f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSearcherTest, TestFetchVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("SquaredEuclidean", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestFetchVector.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  auto path = _dir + "/TestFetchVector";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    float vector_value = *(float *)(vector);
    ASSERT_EQ(vector_value, i);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  for (size_t i = 0; i < query_cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);
    float vector_value = *((float *)(knnResult[0].vector()));
    ASSERT_EQ(vector_value, i);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswSearcherTest, TestFetchVectorCosine) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestFetchVectorCosine.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  auto path = _dir + "/TestFetchVectorCosine";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  NumericalVector<float> qvec(dim);
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        qvec[j] = fixed_value;
      else
        qvec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0,
              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}


TEST_F(HnswSearcherTest, TestFetchVectorCosineHalfFloatConverter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP16, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineHalfFloatConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(
      0, storage->open(_dir + "/TestFetchVectorCosineHalfFloatConverter.index",
                       true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP16, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 0.1;

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);

  std::vector<NumericalVector<uint16_t>> vecs;
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint16_t> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      float value = dist(gen);
      vec[j] = ailego::FloatHelper::ToFP16(value);
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  auto path = _dir + "/TestFetchVectorCosineHalfFloatConverter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    uint16_t expected_vec_value = vecs[i][dim - 1];

    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(uint16_t));
    reformer->revert(vector, new_meta, &denormalized_vec);

    uint16_t vector_value = *((uint16_t *)(denormalized_vec.data()) + dim - 1);
    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);

    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);

    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  NumericalVector<uint16_t> qvec(dim);

  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(uint16_t));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    uint16_t expected_vec_value = vec[dim - 1];
    uint16_t vector_value =
        *(((uint16_t *)(denormalized_vec.data()) + dim - 1));

    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);
    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);

    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswSearcherTest, TestFetchVectorCosineFp16Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp16Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestFetchVectorCosineFp16Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 0.1;

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);

  std::vector<NumericalVector<float>> vecs;
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  auto path = _dir + "/TestFetchVectorCosineFp16Converter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    float expected_vec_value = vecs[i][dim - 1];

    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);
    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);

    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  NumericalVector<float> qvec(dim);

  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float expected_vec_value = vec[dim - 1];
    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));

    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswSearcherTest, TestFetchVectorCosineInt8Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt8Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestFetchVectorCosineInt8Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  auto path = _dir + "/TestFetchVectorCosineInt8Converter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);

  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  NumericalVector<float> qvec(dim);
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        qvec[j] = fixed_value;
      else
        qvec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0,
              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswSearcherTest, TestFetchVectorCosineInt4Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt4Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestFetchVectorCosineInt4Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  auto path = _dir + "/TestFetchVectorCosineInt4Converter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);

  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 100);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = searcher->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  size_t query_cnt = 200U;
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  knnCtx->set_fetch_vector(true);

  size_t topk = 100;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  NumericalVector<float> qvec(dim);
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        qvec[j] = fixed_value;
      else
        qvec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0,
              reformer->transform(qvec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              searcher->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswSearcherTest, TestGroup) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGroup";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_NE(searcher, nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 50);
  searcherParams.set("proxima.hnsw.searcher.max_scan_ratio", 0.8);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;

    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;

    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;

  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = doc_cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();

  total_time += t2 - t1;

  std::cout << "total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    // const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }

  // do linear search by p_keys test
  auto groupbyFuncLinear = [](uint64_t key) {
    uint32_t group_id = key % 10;

    return std::string("g_") + std::to_string(group_id);
  };

  auto linear_pk_ctx = searcher->create_context();

  linear_pk_ctx->set_group_params(group_num, group_topk);
  linear_pk_ctx->set_group_by(groupbyFuncLinear);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};

  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linear_pk_ctx));
  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();
  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);

  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {
    // const std::string &group_id = linear_by_pkeys_group_result[i].group_id();
    auto &result = linear_by_pkeys_group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }

    ASSERT_EQ(10 - i, result[0].key());
  }
}

TEST_F(HnswSearcherTest, TestGroupNotEnoughNum) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGroupNotEnoughNum";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_NE(searcher, nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 50);
  searcherParams.set("proxima.hnsw.searcher.max_scan_ratio", 0.8);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;

    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;

    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 12;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = doc_cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();
  total_time += t2 - t1;

  std::cout << "total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();
  ASSERT_EQ(group_result.size(), 10);

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    // const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }
}

TEST_F(HnswSearcherTest, TestGroupInBruteforceSearch) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("HnswBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = _dir + "/TestGroupInBruteforceSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_NE(searcher, nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.searcher.ef", 50);
  searcherParams.set("proxima.hnsw.searcher.max_scan_ratio", 0.8);
  searcherParams.set("proxima.hnsw.searcher.brute_force_threshold",
                     2 * doc_cnt);

  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;

    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;

    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = doc_cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();
  total_time += t2 - t1;

  std::cout << "total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();
  ASSERT_EQ(group_result.size(), 5);

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    // const std::string &group_id = group_result[i].group_id();
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }
}

TEST_F(HnswSearcherTest, TestBinaryConverter) {
  uint32_t dimension = 256;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  // params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  // params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  // params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  // params.set(PARAM_HNSW_STREAMER_EF, 5);
  // params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dimension);
  index_meta_raw.set_metric("InnerProduct", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("BinaryConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(_dir + "/TestBinaryConverter.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);
  std::vector<NumericalVector<float>> vecs;

  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }

    std::string new_vec;
    IndexQueryMeta new_meta;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  auto path = _dir + "/TestBinaryConverter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  ASSERT_TRUE(searcher != nullptr);

  ailego::Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto read_storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  size_t query_cnt = 200U;
  auto knnCtx = searcher->create_context();

  float epison = 1e-6;
  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];
    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    size_t topk = 50;
    knnCtx->set_topk(topk);
    ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, knnCtx));
    auto &results = knnCtx->result();
    ASSERT_EQ(topk, results.size());
    ASSERT_EQ(i, results[0].key());
    ASSERT_NEAR(0, results[0].score(), epison);
  }
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc
================================================
#include <future>
#include <string>
#include <vector>
#include <ailego/utility/math_helper.h>
#include <ailego/utility/memory_helper.h>
#include <algorithm/hnsw/hnsw_params.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_framework.h>
#include <zvec/core/framework/index_streamer.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

constexpr size_t static dim = 16;

class HnswStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void hybrid_scale(std::vector<float> &dense_value,
                    std::vector<float> &sparse_value, float alpha_scale);

  static std::string dir_;
  static std::shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string HnswStreamerTest::dir_("streamer_test/");
std::shared_ptr<IndexMeta> HnswStreamerTest::index_meta_ptr_;

void HnswStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (std::nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void HnswStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswStreamerTest, TestHnswSearch) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/HnswSearch", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 10000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/HnswSearch", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 3;
  auto provider = read_streamer->create_provider();
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());

  ElapsedTime elapsed_time;
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  read_streamer->close();
  read_streamer.reset();
  cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl;
}

TEST_F(HnswStreamerTest, TestHnswSearchMMap) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  Params params;
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ASSERT_EQ(0, write_streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/HnswSearchMMap", true));
  ASSERT_EQ(0, write_streamer->open(storage));

  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 10000UL;
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    write_streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  ElapsedTime elapsed_time;
  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/HnswSearchMMap", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));
  size_t topk = 3;
  auto provider = read_streamer->create_provider();
  for (size_t i = 0; i < cnt; i += 1) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(topk, result1.size());
    IndexStorage::MemoryBlock block;
    ASSERT_EQ(0, provider->get_vector(result1[0].key(), block));
    const float *data = (float *)block.data();
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(data[j], i);
    }
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, read_streamer->search_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, read_streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());

  read_streamer->close();
  read_streamer.reset();
  cout << "Elapsed time: " << elapsed_time.milli_seconds() << " ms" << endl;
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw/hnsw_streamer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_streamer.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <iostream>
#include <memory>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace testing;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static dim = 16;

class HnswStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string dir_;
  static shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string HnswStreamerTest::dir_("streamer_test/");
shared_ptr<IndexMeta> HnswStreamerTest::index_meta_ptr_;

void HnswStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, ailego::Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void HnswStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswStreamerTest, TestAddVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set("proxima.hnsw.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.streamer.scaling_factor", 5U);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/AddVector", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < 1000UL; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  streamer->flush(0UL);
  streamer.reset();
}

// TODO: context cannot shared by different searcher
TEST_F(HnswStreamerTest, TestLinearSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set("proxima.hnsw.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.streamer.scaling_factor", 5U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  NumericalVector<float> vec(dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(1U);
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  ctx->set_topk(100U);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 10.1f;
  }
  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(10, result[0].key());
  ASSERT_EQ(11, result[1].key());
  ASSERT_EQ(5, result[10].key());
  ASSERT_EQ(0, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());
}

// TODO: context cannot shared by different searcher

TEST_F(HnswStreamerTest, TestLinearSearchByKeys) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set("proxima.hnsw.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.streamer.scaling_factor", 5U);
  params.set("proxima.hnsw.streamer.get_vector_enable", true);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchByKeys.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  NumericalVector<float> vec(dim);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0].resize(cnt);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    p_keys[0][i] = i;
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ctx->set_topk(1U);
    ASSERT_EQ(
        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(i, result1[0].key());

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    ctx->set_topk(topk);
    ASSERT_EQ(
        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
    ASSERT_EQ(i == cnt - 1 ? i - 1 : i + 1, result2[1].key());
    ASSERT_EQ(i == 0 ? 2 : (i == cnt - 1 ? i - 2 : i - 1), result2[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = 10.1f;
    }
    ASSERT_EQ(
        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(10, result[0].key());
    ASSERT_EQ(11, result[1].key());
    ASSERT_EQ(5, result[10].key());
    ASSERT_EQ(0, result[20].key());
    ASSERT_EQ(30, result[30].key());
    ASSERT_EQ(35, result[35].key());
    ASSERT_EQ(99, result[99].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = 10.1f;
    }
    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};
    ASSERT_EQ(
        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(3U, result.size());
    ASSERT_EQ(10, result[0].key());
    ASSERT_EQ(15, result[1].key());
    ASSERT_EQ(1, result[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = 9.1f;
    }
    p_keys[0].clear();
    for (size_t j = 0; j < cnt; j += 10) {
      p_keys[0].push_back((uint64_t)j);
    }
    ASSERT_EQ(
        0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(10, result[0].key());
    ASSERT_EQ(0, result[1].key());
    ASSERT_EQ(100, result[10].key());
    ASSERT_EQ(200, result[20].key());
    ASSERT_EQ(300, result[30].key());
    ASSERT_EQ(350, result[35].key());
    ASSERT_EQ(990, result[99].key());
  }
}

TEST_F(HnswStreamerTest, TestKnnSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestKnnSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswStreamerTest, TestAddAndSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestAddAndSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  // streamer->print_debug_info();

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt; i += 100) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswStreamerTest, TestKnnSearchRandomData) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  constexpr size_t static dim = 128;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 20);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 200);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_EF, 10);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestKnnSearchRandomData", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t cnt = 1500;
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }
    streamer->add_impl(i + cnt, vec.data(), qmeta, ctx);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  size_t topk = 100;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  cnt = 500;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t3 - t2;
    linearTotalTime += t2 - t1;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());

    topk1Hits += linearResult[0].key() == knnResult[0].key();

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;

  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / cnt;
  // float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.50f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 5.0f);
}

TEST_F(HnswStreamerTest, TestOpenClose) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  constexpr size_t static dim = 2048;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ASSERT_NE(nullptr, storage2);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TessOpenAndClose1", true));
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TessOpenAndClose2", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  auto checkIter = [](size_t base, size_t total,
                      IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = base;
    size_t cnt = 0;
    while (iter->is_valid()) {
      float *data = (float *)iter->data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < dim; ++d) {
        ASSERT_FLOAT_EQ((float)cur, data[d]);
      }
      iter->next();
      cur += 2;
      cnt++;
    }
    ASSERT_EQ(cnt, total);
  };

  size_t testCnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < testCnt; i += 2) {
    float v1 = (float)i;
    ASSERT_EQ(0, streamer->open(storage1));
    auto ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    std::vector<float> vec1(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec1[d] = v1;
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec1.data(), qmeta, ctx));
    checkIter(0, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());

    float v2 = (float)(i + 1);
    std::vector<float> vec2(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec2[d] = v2;
    }
    ASSERT_EQ(0, streamer->open(storage2));
    ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    ASSERT_EQ(0, streamer->add_impl(i + 1, vec2.data(), qmeta, ctx));
    checkIter(1, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());
  }

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer1->init(meta, params));
  ASSERT_EQ(0, streamer1->open(storage1));

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer2->init(meta, params));
  ASSERT_EQ(0, streamer2->open(storage2));

  checkIter(0, testCnt / 2, streamer1);
  checkIter(1, testCnt / 2, streamer2);
}

TEST_F(HnswStreamerTest, TestCreateIterator) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestCreateIterator", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      float *data = (float *)iter->data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < dim; ++d) {
        ASSERT_FLOAT_EQ((float)cur, data[d]);
      }
      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  NumericalVector<float> vec(dim);
  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_provider();
  for (size_t i = 0; i < cnt; i++) {
    const float *data = (const float *)provider->get_vector(i);
    ASSERT_NE(data, nullptr);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(i, data[j]);
    }
  }
}

TEST_F(HnswStreamerTest, TestNoInit) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  streamer->cleanup();
}

TEST_F(HnswStreamerTest, TestForceFlush) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  stg_params.set("proxima.mmap_file.storage.copy_on_write", true);
  stg_params.set("proxima.mmap_file.storage.force_flush", true);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      float *data = (float *)iter->data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < dim; ++d) {
        ASSERT_FLOAT_EQ((float)cur, data[d]);
      }
      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  NumericalVector<float> vec(dim);
  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  storage->close();

  storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_provider();
  for (size_t i = 0; i < cnt; i++) {
    const float *data = (const float *)provider->get_vector(i);
    ASSERT_NE(data, nullptr);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(i, data[j]);
    }
  }
}

TEST_F(HnswStreamerTest, TestKnnMultiThread) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 10);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 64);
  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_EF, 32);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnMultiThread", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    NumericalVector<float> vec(dim);
    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = (float)i + baseKey;
      }
      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };
  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);
  auto t1 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000U, t1.get());
  ASSERT_EQ(1000U, t2.get());
  ASSERT_EQ(1000U, t3.get());
  streamer->close();

  // checking data
  ASSERT_EQ(0, streamer->open(storage));
  auto provider = streamer->create_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    float *data = (float *)iter->data();
    for (size_t d = 0; d < dim; ++d) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }
  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);

  // ====== multi thread search
  size_t topk = 100;
  size_t cnt = 3000;
  auto knnSearch = [&]() {
    NumericalVector<float> vec(dim);
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i + 0.1f;
      }
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                      linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(i, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    // printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  auto s3 = std::async(std::launch::async, knnSearch);
  s1.wait();
  s2.wait();
  s3.wait();
}

TEST_F(HnswStreamerTest, TestKnnConcurrentAddAndSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 128);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 10);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 64);
  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 4096);
  params.set(PARAM_HNSW_STREAMER_EF, 32);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnConcurrentAddAndSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    NumericalVector<float> vec(dim);
    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
    auto ctx = streamer->create_context();
    size_t succAdd = 0;
    for (size_t i = 0; i < addCnt; i++) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = (float)i + baseKey;
      }
      succAdd += !streamer->add_impl(baseKey + i, vec.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  // ====== multi thread search
  auto knnSearch = [&]() {
    size_t topk = 100;
    size_t cnt = 3000;
    NumericalVector<float> vec(dim);
    auto linearCtx = streamer->create_context();
    auto linearByPKeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    linearCtx->set_topk(topk);
    linearByPKeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
    for (size_t i = 0; i < cnt; i += 1) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i + 0.1f;
      }
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                      linearByPKeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      auto &r3 = linearByPKeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
// ASSERT_EQ(i, r2[0].key);
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    //        printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };
  auto t0 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000, t0.get());
  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  ASSERT_EQ(1000, t1.get());
  ASSERT_EQ(1000, t2.get());
  s1.wait();
  s2.wait();

  // checking data
  auto provider = streamer->create_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    float *data = (float *)iter->data();
    for (size_t d = 0; d < dim; ++d) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[d]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }
  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);
}

TEST_F(HnswStreamerTest, TestBfThreshold) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_EF, 16);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessBfThreshold", true));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 100000;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(1U);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  streamer->flush(0UL);
  streamer->close();

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer1, nullptr);
  auto params1 = params;
  params1.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt - 1);
  ASSERT_EQ(0, streamer1->init(*index_meta_ptr_, params1));
  ASSERT_EQ(0, streamer1->open(storage));
  auto ctx1 = streamer1->create_context();

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_NE(streamer2, nullptr);
  auto params2 = params;
  params2.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt);
  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));
  ASSERT_EQ(0, streamer2->open(storage));
  auto ctx2 = streamer2->create_context();

  // do searcher
  size_t cost1 = 0;
  size_t cost2 = 0;
  for (size_t i = 0; i < 100; ++i) {
    auto t1 = ailego::Monotime::MicroSeconds();
    ASSERT_EQ(0, streamer1->search_impl(vec.data(), qmeta, ctx1));
    auto t2 = ailego::Monotime::MicroSeconds();
    ASSERT_EQ(0, streamer2->search_impl(vec.data(), qmeta, ctx2));
    auto t3 = ailego::Monotime::MicroSeconds();
    cost1 += t2 - t1;
    cost2 += t3 - t2;
  }

  ASSERT_LT(cost1, cost2);

  ailego::Params update_params;
  update_params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);
  update_params.set(PARAM_HNSW_STREAMER_EF, 50);
  ctx1->set_debug_mode(true);
  ctx1->update(update_params);
  ASSERT_EQ(0, streamer1->search_impl(vec.data(), qmeta, ctx1));
  LOG_DEBUG("%s", ctx1->debug_string().c_str());
}

TEST_F(HnswStreamerTest, TestFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 1000);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessFilter", true));
  ASSERT_EQ(0, streamer->open(storage));


  NumericalVector<float> vec(dim);
  size_t cnt = 2000;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(10U);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    p_keys[0].push_back(i);
  }

  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 100.1;
  }
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(10, results.size());
  ASSERT_EQ(100, results[0].key());
  ASSERT_EQ(101, results[1].key());
  ASSERT_EQ(99, results[2].key());

  auto filterFunc = [](uint64_t key) {
    if (key == 100UL || key == 101UL) {
      return true;
    }
    return false;
  };
  ctx->set_filter(filterFunc);

  // after set filter
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results1 = ctx->result();
  ASSERT_EQ(10, results1.size());
  ASSERT_EQ(99, results1[0].key());
  ASSERT_EQ(102, results1[1].key());
  ASSERT_EQ(98, results1[2].key());

  // linear
  ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, ctx));
  auto &results2 = ctx->result();
  ASSERT_EQ(10, results2.size());
  ASSERT_EQ(99, results2[0].key());
  ASSERT_EQ(102, results2[1].key());
  ASSERT_EQ(98, results2[2].key());

  // linear by p_keys
  ASSERT_EQ(0,
            streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta, ctx));
  auto &results3 = ctx->result();
  ASSERT_EQ(10, results3.size());
  ASSERT_EQ(99, results3[0].key());
  ASSERT_EQ(102, results3[1].key());
  ASSERT_EQ(98, results3[2].key());
}

TEST_F(HnswStreamerTest, TestMaxIndexSize) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  constexpr size_t static dim = 128;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessMaxIndexSize", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t vsz0 = 0;
  size_t rss0 = 0;
  if (!ailego::MemoryHelper::SelfUsage(&vsz0, &rss0)) {
    // do not check if get mem usage failed
    return;
  }
  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {
    // asan mode
    return;
  }

  NumericalVector<float> vec(dim);
  size_t writeCnt1 = 10000;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto ctx = streamer->create_context();
  for (size_t i = 0; i < writeCnt1; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }
  size_t vsz1 = 0;
  size_t rss1 = 0;
  ailego::MemoryHelper::SelfUsage(&vsz1, &rss1);
  size_t increment1 = rss1 - rss0;
  ASSERT_GT(writeCnt1 * 128 * 4 + writeCnt1 * 100 * 4, increment1 * 0.8f);
  ASSERT_LT(writeCnt1 * 128 * 4 + writeCnt1 * 100 * 4, increment1 * 1.2f);

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(HnswStreamerTest, TestKnnCleanUp) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TessKnnCluenUp1", true));
  ailego::Params params;
  constexpr size_t static dim1 = 32;
  IndexMeta meta1(IndexMeta::DataType::DT_FP32, dim1);
  meta1.set_metric("SquaredEuclidean", 0, ailego::Params());
  NumericalVector<float> vec1(dim1);
  ASSERT_EQ(0, streamer->init(meta1, params));
  ASSERT_EQ(0, streamer->open(storage1));
  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dim1);
  auto ctx1 = streamer->create_context();
  ASSERT_EQ(0, streamer->add_impl(1, vec1.data(), qmeta1, ctx1));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());

  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage2);
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TessKnnCluenUp2", true));
  constexpr size_t static dim2 = 64;
  IndexMeta meta2(IndexMeta::DataType::DT_FP32, dim2);
  meta2.set_metric("SquaredEuclidean", 0, ailego::Params());
  NumericalVector<float> vec2(dim2);
  ASSERT_EQ(0, streamer->init(meta2, params));
  ASSERT_EQ(0, streamer->open(storage2));
  IndexQueryMeta qmeta2(IndexMeta::DataType::DT_FP32, dim2);
  auto ctx2 = streamer->create_context();
  ASSERT_EQ(0, streamer->add_impl(2, vec2.data(), qmeta2, ctx2));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());
}

TEST_F(HnswStreamerTest, TestIndexSizeQuota) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestIndexSizeQuota", true));
  ailego::Params params;
  constexpr size_t static dim = 512;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("SquaredEuclidean", 0, ailego::Params());
  params.set(PARAM_HNSW_STREAMER_MAX_INDEX_SIZE, 2 * 1024 * 1024U);
  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 100 * 1024U);
  ASSERT_EQ(0, streamer->init(meta, params));
  ASSERT_EQ(0, streamer->open(storage));
  NumericalVector<float> vec(dim);
  size_t writeCnt1 = 850;
  int ret = 0;
  auto ctx = streamer->create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < writeCnt1; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    int iRet = streamer->add_impl(i, vec.data(), qmeta, ctx);
    if (iRet != 0) {
      ret = iRet;
    }
  }
  ASSERT_EQ(IndexError_IndexFull, ret);
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());
}

TEST_F(HnswStreamerTest, TestBloomFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestBloomFilter", true));
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  ctx->set_topk(10U);
  size_t cnt = 5000;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
    if ((i + 1) % 10 == 0) {
      ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
      auto &results = ctx->result();
      ASSERT_EQ(10, results.size());
    }
  }
}

TEST_F(HnswStreamerTest, TestStreamerParams) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestStreamerParams", true));
  ailego::Params params;
  params.set("proxima.hnsw.streamer.docs_hard_limit", 5);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto ctx = streamer->create_context();
  ASSERT_EQ(0, streamer->add_impl(1, vec.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(2, vec.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(3, vec.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(4, vec.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(5, vec.data(), qmeta, ctx));
  ASSERT_EQ(IndexError_IndexFull,
            streamer->add_impl(6, vec.data(), qmeta, ctx));
}

#if 0
TEST_F(HnswStreamerTest, TestCheckCrc)
{
    IndexStreamer::Pointer streamer =
        IndexFactory::CreateStreamer("HnswStreamer");
    ASSERT_TRUE(streamer != nullptr);

    auto storage = IndexFactory::CreateStorage("MMapFileStorage");
    ASSERT_NE(nullptr, storage);
    ailego::Params stg_params;
    ASSERT_EQ(0, storage->init(stg_params));
    std::string path = dir_ + "TestCheckCrc";
    ASSERT_EQ(0, storage->open(path, true));
    ailego::Params params;
    params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
    params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
    params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
    params.set(PARAM_HNSW_STREAMER_EF, 100);
    params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
    params.set(PARAM_HNSW_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);
    ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
    ASSERT_EQ(0, streamer->open(storage));

    NumericalVector<float> vec(dim);
    auto ctx = streamer->create_context();
    ASSERT_NE(nullptr, ctx);
    IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
    size_t cnt = 100;
    for (size_t i = 0; i < cnt; i++) {
        for (size_t j = 0; j < dim; ++j) {
            vec[j] = i;
        }
        streamer->add_impl(i, vec.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    streamer->close();
    storage->flush();
    storage->close();

    int fd = open(path.c_str(), O_RDWR);
    ASSERT_GT(fd, 0);
    struct stat fs;
    ASSERT_EQ(0, fstat(fd, &fs));
    char buf[1024];
    pwrite(fd, buf, sizeof(buf), fs.st_size/2);

    ASSERT_EQ(0, storage->open(path, true));
    IndexStreamer::Pointer streamer2 =
        IndexFactory::CreateStreamer("HnswStreamer");
    ASSERT_NE(streamer2, nullptr);

    ailego::Params params2;
    params2.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
    params2.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
    params2.set("proxima.hnsw.streamer.check_crc_enable", true);
    ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));
    ASSERT_EQ(0, streamer2->open(storage));
}
#endif

TEST_F(HnswStreamerTest, TestCheckStats) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  std::string path = dir_ + "/TestCheckStats.index";
  ASSERT_EQ(0, storage->open(path, true));
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 100);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);
  params.set(PARAM_HNSW_STREAMER_CHUNK_SIZE, 512 * 1024U);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto &stats = streamer->stats();
  ASSERT_EQ(0U, stats.revision_id());
  ASSERT_EQ(0U, stats.loaded_count());
  ASSERT_EQ(0U, stats.added_count());
  ASSERT_EQ(0U, stats.discarded_count());
  ASSERT_EQ(0u, stats.index_size() % ailego::MemoryHelper::PageSize());
  ASSERT_EQ(0U, stats.dumped_size());
  ASSERT_EQ(0U, stats.check_point());
  auto createTime = stats.create_time();
  auto updateTime = stats.update_time();
  ASSERT_GT(createTime, 0UL);
  ASSERT_EQ(createTime, updateTime);

  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t cnt = 3000;
  size_t size1 = stats.index_size();
  size_t size2 = 0;
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
    ASSERT_EQ(i + 1, stats.added_count());
    if (i == 0UL) {
      size2 = stats.index_size();
    }
  }
  size_t size3 = stats.index_size();
  ASSERT_GT(size2, size1);
  ASSERT_GT(size3, size2);
  LOG_INFO("size1=%zu size2=%zu size3=%zu", size1, size2, size3);

  uint64_t checkPoint = 23423UL;
  streamer->flush(checkPoint);
  size_t size4 = stats.index_size();
  ASSERT_EQ(size3, size4);
  auto stats1 = streamer->stats();
  ASSERT_EQ(1U, stats1.revision_id());
  ASSERT_EQ(0U, stats1.loaded_count());
  ASSERT_EQ(cnt, stats1.added_count());
  ASSERT_EQ(0U, stats1.discarded_count());
  ASSERT_GT(stats1.index_size(), 0U);
  ASSERT_EQ(0U, stats1.dumped_size());
  ASSERT_EQ(checkPoint, stats1.check_point());
  auto createTime1 = stats1.create_time();
  auto updateTime1 = stats1.update_time();
  ASSERT_GE(updateTime1, createTime1);
  ASSERT_EQ(createTime, createTime1);
  streamer->close();

  ASSERT_EQ(0, streamer->open(storage));
  auto &stats2 = streamer->stats();
  ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  ASSERT_EQ(0, streamer->add_impl(10000UL, vec.data(), qmeta, ctx));
  ASSERT_EQ(2U, stats2.revision_id());
  ASSERT_EQ(cnt, stats2.loaded_count());
  ASSERT_EQ(1U, stats2.added_count());
  ASSERT_EQ(0U, stats2.discarded_count());
  ASSERT_GT(stats1.index_size(), 0);
  ASSERT_EQ(0U, stats2.dumped_size());
  ASSERT_EQ(checkPoint, stats2.check_point());
  auto createTime2 = stats2.create_time();
  auto updateTime2 = stats2.update_time();
  ASSERT_EQ(createTime2, createTime1);
  ASSERT_GE(updateTime2, updateTime1);

  sleep(1);
  streamer->flush(checkPoint + 1);
  ASSERT_NE(0, streamer->add_impl(0U, vec.data(), qmeta, ctx));
  auto &stats3 = streamer->stats();
  ASSERT_EQ(2U, stats3.revision_id());
  ASSERT_EQ(cnt, stats3.loaded_count());
  ASSERT_EQ(1U, stats3.added_count());
  ASSERT_EQ(1U, stats3.discarded_count());
  ASSERT_EQ(stats2.index_size(), stats3.index_size());
  ASSERT_EQ(0U, stats3.dumped_size());
  ASSERT_EQ(checkPoint + 1, stats3.check_point());
  auto createTime3 = stats3.create_time();
  auto updateTime3 = stats3.update_time();
  ASSERT_EQ(createTime3, createTime1);
  ASSERT_GT(updateTime3, updateTime2);

  auto dpath = dir_ + "/dumpIndex";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(dpath));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  size_t doc_cnt = stats3.loaded_count() + stats3.added_count();
  struct stat st;
  ASSERT_EQ(3001UL, doc_cnt);
  ASSERT_EQ(0, stat(dpath.c_str(), &st));
  ASSERT_LT(st.st_size - stats3.dumped_size(), 8192);

  streamer->close();
}

TEST_F(HnswStreamerTest, TestCheckDuplicateAndGetVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestCheckDuplicateAndGetVec", true));
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_STREAMER_FILTER_SAME_KEY, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < 1000; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
  }
  for (size_t i = 0; i < 1000; i += 10) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_EQ(IndexError_Duplicate,
              streamer->add_impl(i, vec.data(), qmeta, ctx));
  }
  auto provider = streamer->create_provider();
  for (size_t i = 0; i < 1000; i++) {
    const float *data = (const float *)provider->get_vector(i);
    ASSERT_NE(data, nullptr);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(i, data[j]);
    }
  }

  streamer->flush(0UL);
  streamer.reset();
}

class TestDumper : public IndexDumper {
  virtual int init(const ailego::Params &) {
    return 0;
  }
  virtual int cleanup(void) {
    return 0;
  }
  virtual int create(const std::string &path) {
    return 0;
  }
  virtual uint32_t magic(void) const {
    return 0;
  }
  virtual int close(void) {
    return 0;
  }
  virtual int append(const std::string &id, size_t data_size,
                     size_t padding_size, uint32_t crc) {
    usleep(100000);
    return 0;
  }
  virtual size_t write(const void *data, size_t len) {
    return len;
  }
};

TEST_F(HnswStreamerTest, TestDumpIndexAndAdd) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestDumpIndexAndAdd", true));
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  auto ctx = streamer->create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  ASSERT_NE(nullptr, ctx);
  int code = 0;
  std::mutex mutex;
  auto addVector = [&](int a, int b) {
    int success = 0;
    mutex.unlock();
    for (int i = a; i < b; i++) {
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i;
      }
      int ret = streamer->add_impl(i, vec.data(), qmeta, ctx);
      if (ret != 0) {
        code = ret;
        ASSERT_EQ(IndexError_Unsupported, code);
        i = i - 1;  // retry
        usleep(10000);
      } else {
        success++;
      }
    }
    std::cout << "addVector: " << success << " success" << std::endl;
  };
  mutex.lock();
  addVector(0, 2000);
  mutex.lock();
  auto t2 = std::async(std::launch::async, addVector, 2000, 3000);
  auto path1 = dir_ + "/dumpIndex1";
  auto dumper1 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper1, nullptr);
  ASSERT_EQ(0, dumper1->create(path1));
  mutex.lock();  // sync: wait addVector start and release lock
  auto test_dumper = std::make_shared<TestDumper>();
  ASSERT_EQ(0, streamer->dump(test_dumper));
  mutex.unlock();
  ASSERT_EQ(0, streamer->dump(dumper1));
  ASSERT_EQ(0, dumper1->close());
  t2.get();
  streamer->close();
  ASSERT_EQ(IndexError_Unsupported, code);

  // check dump index
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->init(ailego::Params()));
  ASSERT_EQ(0, container->open(path1, false));
  ASSERT_NE(searcher, nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto iter = searcher->create_provider()->create_iterator();
  size_t docs = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    docs++;
    iter->next();
  }
  ASSERT_GE(docs, 2000U);

  // check streamer
  ASSERT_EQ(0, streamer->open(storage));
  iter = streamer->create_provider()->create_iterator();
  docs = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    docs++;
    iter->next();
  }
  ASSERT_EQ(docs, 3000U);
}


TEST_F(HnswStreamerTest, TestProvider) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestGetVector", true));
  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);

  //! prepare data
  size_t docs = 10000UL;
  srand(ailego::Realtime::MilliSeconds());
  std::vector<key_t> keys(docs);
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < docs; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = ailego::Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < keys.size(); i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = keys[i];
    }
    streamer->add_impl(keys[i], vec.data(), qmeta, ctx);
  }

  auto path1 = dir_ + "/TestGetVector1";
  auto dumper1 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper1, nullptr);
  ASSERT_EQ(0, dumper1->create(path1));
  ASSERT_EQ(0, streamer->dump(dumper1));
  ASSERT_EQ(0, dumper1->close());
  streamer->close();

  // check dump index
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSearcher");
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->init(ailego::Params()));
  ASSERT_EQ(0, container->open(path1, false));
  ASSERT_NE(searcher, nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto iter = searcher->create_provider()->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, docs);

  // check streamer
  ASSERT_EQ(0, streamer->open(storage));
  iter = streamer->create_provider()->create_iterator();
  cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, docs);


  auto searcher_provider = searcher->create_provider();
  auto streamer_provider = streamer->create_provider();
  for (size_t i = 0; i < keys.size(); ++i) {
    const float *d1 =
        reinterpret_cast<const float *>(searcher_provider->get_vector(keys[i]));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d1[j], keys[i]);
    }

    const float *d2 =
        reinterpret_cast<const float *>(streamer_provider->get_vector(keys[i]));
    ASSERT_TRUE(d2);
    for (size_t j = 0; j < dim; ++j) {
      ASSERT_FLOAT_EQ(d2[j], keys[i]);
    }
  }

  ASSERT_EQ(dim, streamer_provider->dimension());
  ASSERT_EQ(index_meta_ptr_->element_size(), streamer_provider->element_size());
  ASSERT_EQ(index_meta_ptr_->data_type(), streamer_provider->data_type());
}

TEST_F(HnswStreamerTest, TestSharedContext) {
  auto create_streamer = [](std::string path) {
    IndexStreamer::Pointer streamer =
        IndexFactory::CreateStreamer("HnswStreamer");
    auto storage = IndexFactory::CreateStorage("MMapFileStorage");
    ailego::Params stg_params;
    storage->init(stg_params);
    storage->open(path, true);
    ailego::Params params;
    streamer->init(*index_meta_ptr_, params);
    streamer->open(storage);
    return streamer;
  };
  auto streamer1 = create_streamer(dir_ + "TestSharedContext.index1");
  auto streamer2 = create_streamer(dir_ + "TestSharedContext.index2");
  auto streamer3 = create_streamer(dir_ + "TestSharedContext.index3");

  srand(ailego::Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto do_test = [&](int start) {
    auto code = rand() % 3;
    IndexStreamer::Context::Pointer ctx;
    switch (code) {
      case 0:
        ctx = streamer1->create_context();
        break;
      case 1:
        ctx = streamer2->create_context();
        break;
      case 2:
        ctx = streamer3->create_context();
        break;
    };
    ctx->set_topk(1);
    uint64_t key1 = start + 0;
    uint64_t key2 = start + 1;
    uint64_t key3 = start + 2;
    NumericalVector<float> query(dim);
    for (size_t j = 0; j < dim; ++j) {
      query[j] = 0.1f;
    }
    for (int i = 0; i < 1000; ++i) {
      NumericalVector<float> vec(dim);
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = rand();
      }
      int ret = 0;
      auto code = rand() % 3;
      switch (code) {
        case 0:
          streamer1->add_impl(key1, vec.data(), qmeta, ctx);
          key1 += 3;
          ret = streamer1->search_impl(query.data(), qmeta, ctx);
          break;
        case 1:
          streamer2->add_impl(key2, vec.data(), qmeta, ctx);
          key2 += 3;
          streamer2->add_impl(key2, vec.data(), qmeta, ctx);
          key2 += 3;
          ret = streamer2->search_impl(query.data(), qmeta, ctx);
          break;
        case 2:
          streamer3->add_impl(key3, vec.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, vec.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, vec.data(), qmeta, ctx);
          key3 += 3;
          ret = streamer3->search_impl(query.data(), qmeta, ctx);
          break;
      }
      EXPECT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(1, results.size());
      EXPECT_EQ(code, results[0].key() % 3);
    }
  };

  auto t1 = std::async(std::launch::async, do_test, 0);
  auto t2 = std::async(std::launch::async, do_test, 30000000);
  t1.wait();
  t2.wait();
}

TEST_F(HnswStreamerTest, TestMipsEuclideanMetric) {
  constexpr size_t static dim = 32;
  std::srand(ailego::Realtime::MilliSeconds());
  // int injection_type = rand() % 2;
  int injection_type = 0;

  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  ailego::Params params;
  params.set("proxima.mips_euclidean.metric.injection_type", injection_type);
  meta.set_metric("MipsSquaredEuclidean", 0, params);
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestMipsSquaredEuclidean", true));
  const size_t COUNT = 10000;
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  {
    IndexStreamer::Pointer streamer =
        IndexFactory::CreateStreamer("HnswStreamer");
    ASSERT_TRUE(streamer != nullptr);
    ASSERT_EQ(0, streamer->init(meta, params));
    ASSERT_EQ(0, streamer->open(storage));
    const auto &metric_params = streamer->meta().metric_params();
    EXPECT_FLOAT_EQ(0.0, metric_params.get_as_float(
                             "proxima.mips_euclidean.metric.max_l2_norm"));
    auto ctx = streamer->create_context();
    for (size_t i = COUNT; i < 2 * COUNT; i++) {
      std::vector<float> vec(dim);
      for (size_t d = 0; d < dim; ++d) {
        vec[d] = i;
      }
      ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
    }
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());
  }
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer->init(meta, params));
  ASSERT_EQ(0, streamer->open(storage));
  const auto &metric_params = streamer->meta().metric_params();
  // NoTrain for LocalizedSpherical (type == 1), so max_l2_norm equals to 0
  EXPECT_FLOAT_EQ(
      injection_type == 0 ? 0.0f : 113131.0f,
      metric_params.get_as_float("proxima.mips_euclidean.metric.max_l2_norm"));
  auto ctx = streamer->create_context();
  for (size_t i = 0; i < COUNT; i++) {
    std::vector<float> vec(dim);
    for (size_t d = 0; d < dim; ++d) {
      vec[d] = i;
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
  }
  std::vector<float> vec(dim);
  for (size_t d = 0; d < dim; ++d) {
    vec[d] = 1.0;
  }

  ctx->set_topk(10);
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  const auto &results = ctx->result();
  EXPECT_EQ(results.size(), 10);
  EXPECT_NEAR((uint64_t)(2 * COUNT - 1), results[0].key(), 10);
}

TEST_F(HnswStreamerTest, TestBruteForceSetupInContext) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0,
            storage->open(dir_ + "/TestBruteForceSetupInContext.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t topk = 200;
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  bool set_bf_threshold = false;
  bool use_update = false;

  for (size_t i = 0; i < cnt; i++) {
    auto linearCtx = streamer->create_context();
    auto knnCtx = streamer->create_context();

    ASSERT_TRUE(!!linearCtx);
    ASSERT_TRUE(!!linearCtx);

    linearCtx->set_topk(topk);
    knnCtx->set_topk(topk);

    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      if (use_update) {
        ailego::Params streamerParamsExtra;

        streamerParamsExtra.set("proxima.hnsw.streamer.brute_force_threshold",
                                cnt);
        knnCtx->update(streamerParamsExtra);
      } else {
        knnCtx->set_bruteforce_threshold(cnt);
      }

      use_update = !use_update;
    }
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));

    // auto t3 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      linearTotalTime += t2 - t1;
    } else {
      knnTotalTime += t2 - t1;
    }

    set_bf_threshold = !set_bf_threshold;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswStreamerTest, TestKnnSearchCosine) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestKnnSearchCosine.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 4000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;
    IndexQueryMeta new_meta;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  size_t query_cnt = 200U;
  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;


  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 1.0f / query_cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswStreamerTest, TestFetchVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("SquaredEuclidean", 0, ailego::Params());

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestFetchVector.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  for (size_t i = 0; i < cnt; i++) {
    const void *vector = streamer->get_vector(i);
    ASSERT_NE(vector, nullptr);

    float vector_value = *(float *)(vector);
    ASSERT_FLOAT_EQ(vector_value, i);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 200U;
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  for (size_t i = 0; i < query_cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);
    float vector_value = *((float *)(knnResult[0].vector()));
    ASSERT_FLOAT_EQ(vector_value, i);
  }
  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestFetchVectorCosine) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestFetchVectorCosine.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = streamer->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 200U;
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }
  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestFetchVectorCosineHalfFloatConverter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP16, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineHalfFloatConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);
  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(
      0, storage->open(dir_ + "/TestFetchVectorCosineHalfFloatConverter.index",
                       true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP16, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 0.1;

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);

  std::vector<NumericalVector<uint16_t>> vecs;
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint16_t> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = ailego::FloatHelper::ToFP16(dist(gen));
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  for (size_t i = 0; i < cnt; i++) {
    uint16_t expected_vec_value = vecs[i][dim - 1];

    const void *vector = streamer->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(uint16_t));
    reformer->revert(vector, new_meta, &denormalized_vec);

    uint16_t vector_value = *((uint16_t *)(denormalized_vec.data()) + dim - 1);
    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);

    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);

    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 200U;
  size_t topk = 30;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(uint16_t));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    uint16_t expected_vec_value = vec[dim - 1];
    uint16_t vector_value =
        *(((uint16_t *)(denormalized_vec.data()) + dim - 1));

    float vector_value_float = ailego::FloatHelper::ToFP32(vector_value);
    float expected_vec_float = ailego::FloatHelper::ToFP32(expected_vec_value);

    EXPECT_NEAR(expected_vec_float, vector_value_float, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestFetchVectorCosineFp16Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp16Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestFetchVectorCosineFp16Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 0.1;

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);

  std::vector<NumericalVector<float>> vecs;
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  for (size_t i = 0; i < cnt; i++) {
    float expected_vec_value = vecs[i][dim - 1];

    const void *vector = streamer->get_vector(i);


    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);
    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);

    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 200U;
  size_t topk = 30;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float expected_vec_value = vec[dim - 1];
    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));

    EXPECT_NEAR(expected_vec_value, vector_value, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestFetchVectorCosineInt8Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt8Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestFetchVectorCosineInt8Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = streamer->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 200U;
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestFetchVectorCosineInt4Converter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 100);
  params.set(PARAM_HNSW_STREAMER_EF, 100);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineInt4Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestFetchVectorCosineInt4Converter.index",
                             true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 2000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  IndexQueryMeta new_meta;

  const float epsilon = 1e-2;
  float fixed_value = float(cnt) / 2;
  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_vec;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  for (size_t i = 0; i < cnt; i++) {
    float add_on = i * 10;

    const void *vector = streamer->get_vector(i);
    ASSERT_NE(vector, nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(vector, new_meta, &denormalized_vec);

    float vector_value = *((float *)(denormalized_vec.data()) + dim - 1);
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  knnCtx->set_fetch_vector(true);

  size_t query_cnt = 100U;
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  for (size_t i = 0; i < query_cnt; i++) {
    float add_on = i * 10;
    for (size_t j = 0; j < dim; ++j) {
      if (j < dim / 4)
        vec[j] = fixed_value;
      else
        vec[j] = fixed_value + add_on;
    }

    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0,
              streamer->search_bf_impl(new_query.data(), new_meta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    ASSERT_NE(knnResult[0].vector(), nullptr);

    std::string denormalized_vec;
    denormalized_vec.resize(dim * sizeof(float));
    reformer->revert(knnResult[0].vector(), new_meta, &denormalized_vec);

    float vector_value = *(((float *)(denormalized_vec.data()) + dim - 1));
    EXPECT_NEAR(vector_value, fixed_value + add_on, epsilon);
  }

  std::cout << "knnTotalTime: " << knnTotalTime << std::endl;
  std::cout << "linearTotalTime: " << linearTotalTime << std::endl;
}

TEST_F(HnswStreamerTest, TestRnnSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  // params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestRnnSearchInnerProduct.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 1000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
  }

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }

  size_t topk = 50;
  ctx->set_topk(topk);
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = results[topk / 2].score();
  ctx->set_threshold(radius);
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswStreamerTest, TestRnnSearchInnerProduct) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  // params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("InnerProduct", 0, ailego::Params());

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestRnnSearchInnerProduct.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 1000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), qmeta, ctx));
  }

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }

  size_t topk = 50;
  ctx->set_topk(topk);

  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = -results[topk / 2].score();
  ctx->set_threshold(radius);
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(-radius, results[topk - 1].score());
}

TEST_F(HnswStreamerTest, TestRnnSearchCosine) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  // params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestRnnSearchCosine.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 1000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }

    std::string new_vec;
    IndexQueryMeta new_meta;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));
  }

  size_t topk = 50;
  ctx->set_topk(topk);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }

  std::string new_query;
  IndexQueryMeta new_meta;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = 0.5f;
  ctx->set_threshold(radius);
  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswStreamerTest, TestGroup) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_STREAMER_GET_VECTOR_ENABLE, true);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGroup.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 5000U;
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;
    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;
    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;

  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();

  total_time += t2 - t1;
  std::cout << "Total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();
  ASSERT_EQ(group_result.size(), group_num);

  for (uint32_t i = 0; i < group_result.size(); ++i) {
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);

    // const std::string &group_id = group_result[i].group_id();
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }

  // do linear search by p_keys test
  auto groupbyFuncLinear = [](uint64_t key) {
    uint32_t group_id = key % 10;

    return std::string("g_") + std::to_string(group_id);
  };

  auto linear_pk_ctx = streamer->create_context();

  linear_pk_ctx->set_group_params(group_num, group_topk);
  linear_pk_ctx->set_group_by(groupbyFuncLinear);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0] = {4, 3, 2, 1, 5, 6, 7, 8, 9, 10};

  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(vec.data(), p_keys, qmeta,
                                                  linear_pk_ctx));
  auto &linear_by_pkeys_group_result = linear_pk_ctx->group_result();
  ASSERT_EQ(linear_by_pkeys_group_result.size(), group_num);

  for (uint32_t i = 0; i < linear_by_pkeys_group_result.size(); ++i) {
    auto &result = linear_by_pkeys_group_result[i].docs();

    ASSERT_GT(result.size(), 0);

    // const std::string &group_id = linear_by_pkeys_group_result[i].group_id();
    //  std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }

    ASSERT_EQ(10 - i, result[0].key());
  }
}

TEST_F(HnswStreamerTest, TestGroupNotEnoughNum) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGroupNotEnoughNum.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  size_t cnt = 5000U;
  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;
    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;
    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 12;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();
  total_time += t2 - t1;

  std::cout << "Total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();

  ASSERT_EQ(group_result.size(), 10);
  for (uint32_t i = 0; i < group_result.size(); ++i) {
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);

    // const std::string &group_id = group_result[i].group_id();
    // std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }
}

TEST_F(HnswStreamerTest, TestGroupInBruteforceSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  size_t cnt = 5000U;

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, cnt * 2);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0,
            storage->open(dir_ + "/TestGroupInBruteforceSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 10.0;
    }
    streamer->add_impl(i, vec.data(), qmeta, ctx);
  }

  size_t group_topk = 20;
  uint64_t total_time = 0;

  auto groupbyFunc = [](uint64_t key) {
    uint32_t group_id = key / 10 % 10;
    // std::cout << "key: " << key << ", group id: " << group_id << std::endl;
    return std::string("g_") + std::to_string(group_id);
  };

  size_t group_num = 5;
  ctx->set_group_params(group_num, group_topk);
  ctx->set_group_by(groupbyFunc);

  size_t query_value = cnt / 2;
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = float(query_value) / 10 + 0.1f;
  }

  auto t1 = ailego::Realtime::MicroSeconds();
  ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, ctx));
  auto t2 = ailego::Realtime::MicroSeconds();
  total_time += t2 - t1;

  std::cout << "Total time: " << total_time << std::endl;

  auto &group_result = ctx->group_result();

  ASSERT_EQ(group_result.size(), 5);
  for (uint32_t i = 0; i < group_result.size(); ++i) {
    auto &result = group_result[i].docs();

    ASSERT_GT(result.size(), 0);

    // const std::string &group_id = group_result[i].group_id();
    //  std::cout << "Group ID: " << group_id << std::endl;

    // for (uint32_t j = 0; j < result.size(); ++j) {
    //   std::cout << "\tKey: " << result[j].key() << std::fixed
    //             << std::setprecision(3) << ", Score: " << result[j].score()
    //             << std::endl;
    // }
  }
}

TEST_F(HnswStreamerTest, TestBinaryConverter) {
  uint32_t dimension = 2560;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  // params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  // params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  // params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  // params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dimension);
  index_meta_raw.set_metric("InnerProduct", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("BinaryConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestBinaryConverter.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);
  std::vector<NumericalVector<float>> vecs;

  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }

    std::string new_vec;
    IndexQueryMeta new_meta;

    ASSERT_EQ(0, reformer->convert(vec.data(), qmeta, &new_vec, &new_meta));
    ASSERT_EQ(0, streamer->add_impl(i, new_vec.data(), new_meta, ctx));

    vecs.push_back(vec);
  }

  size_t query_cnt = 200U;
  auto knnCtx = streamer->create_context();

  float epison = 1e-6;
  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];
    std::string new_query;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &new_query, &new_meta));

    size_t topk = 50;
    knnCtx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_impl(new_query.data(), new_meta, knnCtx));
    auto &results = knnCtx->result();
    ASSERT_EQ(topk, results.size());
    ASSERT_EQ(i, results[0].key());
    ASSERT_NEAR(0, results[0].score(), epison);
  }
}

TEST_F(HnswStreamerTest, TestAddAndSearchWithID) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_STREAMER_EF, 5);
  params.set(PARAM_HNSW_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestAddAndSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  NumericalVector<float> vec(dim);
  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  auto linearCtx = streamer->create_context();
  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  for (size_t i = 0; i < cnt; i += 4) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }

  for (size_t i = 2; i < cnt; i += 4) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    streamer->add_with_id_impl(i, vec.data(), qmeta, ctx);
  }

  // streamer->print_debug_info();

  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  for (size_t i = 0; i < cnt / 10; i += 2) {
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i + 0.1f;
    }
    auto t1 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_impl(vec.data(), qmeta, knnCtx));
    auto t2 = ailego::Realtime::MicroSeconds();
    ASSERT_EQ(0, streamer->search_bf_impl(vec.data(), qmeta, linearCtx));
    auto t3 = ailego::Realtime::MicroSeconds();
    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }

    for (size_t j = 0; j < topk; ++j) {
      ASSERT_NE(linearResult[j].key(), kInvalidKey);
      ASSERT_NE(linearResult[j].index(), kInvalidKey);
      auto linearVec = static_cast<const float *>(
          streamer->get_vector_by_id(linearResult[j].index()));

      for (size_t z = 0; z < dim; ++z) {
        ASSERT_FLOAT_EQ(linearVec[z], linearResult[j].index());
      }
    }
    for (size_t j = 0; j < topk; ++j) {
      ASSERT_NE(knnResult[j].key(), kInvalidKey);
      ASSERT_NE(knnResult[j].index(), kInvalidKey);
      auto knnVec = static_cast<const float *>(
          streamer->get_vector_by_id(knnResult[j].index()));
      for (size_t z = 0; z < dim; ++z) {
        ASSERT_FLOAT_EQ(knnVec[z], knnResult[j].index());
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswStreamerTest, TestBasicRefiner) {
  uint32_t dimension = 1120;

  IndexStreamer::Pointer base_streamer =
      IndexFactory::CreateStreamer("HnswStreamer");
  ASSERT_TRUE(base_streamer != nullptr);

  IndexStreamer::Pointer refine_streamer =
      IndexFactory::CreateStreamer("FlatStreamer");
  ASSERT_TRUE(refine_streamer != nullptr);

  IndexRefiner::Pointer refiner = IndexFactory::CreateRefiner("BasicRefiner");
  ASSERT_TRUE(refiner != nullptr);

  ailego::Params params;
  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dimension);
  index_meta.set_metric("InnerProduct", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("BinaryConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta, converter_params);

  IndexMeta index_meta_binary = converter->meta();

  auto reformer =
      IndexFactory::CreateReformer(index_meta_binary.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta_binary.reformer_params()));

  // base streamer
  ailego::Params base_stg_params;
  auto base_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, base_storage->init(base_stg_params));
  ASSERT_EQ(0, base_storage->open(dir_ + "/TestBasicRefinerBase.index", true));
  ASSERT_EQ(0, base_streamer->init(index_meta_binary, params));
  ASSERT_EQ(0, base_streamer->open(base_storage));

  auto base_ctx = base_streamer->create_context();
  ASSERT_TRUE(!!base_ctx);

  // refine streamer
  ailego::Params refine_stg_params;
  auto refine_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, refine_storage->init(refine_stg_params));
  ASSERT_EQ(0,
            refine_storage->open(dir_ + "/TestBasicRefinerRefine.index", true));
  ASSERT_EQ(0, refine_streamer->init(index_meta, params));
  ASSERT_EQ(0, refine_streamer->open(refine_storage));
  auto refine_ctx = refine_streamer->create_context();
  ASSERT_TRUE(!!refine_ctx);

  ailego::Params refiner_params;
  ASSERT_EQ(0, refiner->init(base_streamer, refine_streamer, refiner_params));

  auto ctx = refiner->create_context();
  ASSERT_TRUE(!!ctx);

  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension);

  std::random_device rd;
  std::mt19937 gen(rd());

  std::uniform_real_distribution<float> dist(-2.0, 2.0);
  std::vector<NumericalVector<float>> vecs;

  size_t cnt = 5000U;
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<float> vec(dimension);
    for (size_t j = 0; j < dimension; ++j) {
      vec[j] = dist(gen);
    }

    std::string binary_vec;
    IndexQueryMeta binary_qmeta;

    ASSERT_EQ(0,
              reformer->convert(vec.data(), qmeta, &binary_vec, &binary_qmeta));
    ASSERT_EQ(0, refiner->add_impl(i, binary_vec.data(), binary_qmeta,
                                   vec.data(), qmeta, ctx));

    vecs.push_back(vec);
  }

  size_t query_cnt = 200U;
  // size_t query_cnt = 1U;

  auto searcherCtx = refiner->create_context();

  for (size_t i = 0; i < query_cnt; i++) {
    auto &vec = vecs[i];

    // float abs_value{0};
    // for (size_t j = 0; j < dimension; ++j) {
    //   std::cout << "dim: " << j << ", value: " << vec[j] << std::endl;

    //   abs_value += std::abs(vec[j]);
    // }
    // std::cout << "abs value: " << abs_value << std::endl;

    std::string new_query;
    IndexQueryMeta binary_qmeta;
    ASSERT_EQ(
        0, reformer->transform(vec.data(), qmeta, &new_query, &binary_qmeta));

    size_t topk = 50;
    searcherCtx->set_topk(topk);
    ASSERT_EQ(0, refiner->search_impl(new_query.data(), binary_qmeta,
                                      vec.data(), qmeta, searcherCtx));
    auto &results = searcherCtx->result();
    ASSERT_EQ(topk, results.size());
    ASSERT_EQ(i, results[0].key());

    // for (size_t i = 0; i < results.size(); ++i) {
    //   std::cout << i << ", id: " << results[i].index()
    //             << ", score: " << results[i].score() << std::endl;
    // }
  }
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw_rabitq/CMakeLists.txt
================================================
include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake)
include(${CMAKE_SOURCE_DIR}/cmake/option.cmake)

if(APPLE)
  set(APPLE_FRAMEWORK_LIBS
    -framework CoreFoundation
    -framework CoreGraphics
    -framework CoreData
    -framework CoreText
    -framework Security
    -framework Foundation
    -Wl,-U,_MallocExtension_ReleaseFreeMemory
    -Wl,-U,_ProfilerStart
    -Wl,-U,_ProfilerStop
    -Wl,-U,_RegisterThriftProtocol
  )
endif()

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw_rabitq core_knn_flat core_knn_cluster
      ${CMAKE_THREAD_LIBS_INIT}
      ${CMAKE_DL_LIBS}
      SRCS ${CC_SRCS}
      INCS . ${CMAKE_SOURCE_DIR}/src/core ${CMAKE_SOURCE_DIR}/src/core/algorithm/hnsw_rabitq
      LDFLAGS ${APPLE_FRAMEWORK_LIBS}
    )
  cc_test_suite(hnsw_rabitq ${CC_TARGET})
endforeach()


================================================
FILE: tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_rabitq_builder.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <gtest/gtest.h>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_framework.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_provider.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static dim = 128;

class HnswRabitqBuilderTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswRabitqBuilderTest::_dir("hnswRabitqBuilderTest");
shared_ptr<IndexMeta> HnswRabitqBuilderTest::_index_meta_ptr;

void HnswRabitqBuilderTest::SetUp(void) {
  IndexLoggerBroker::SetLevel(0);
  _index_meta_ptr.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
}

void HnswRabitqBuilderTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  // system(cmdBuf);
}

TEST_F(HnswRabitqBuilderTest, TestGeneral) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));

  ASSERT_EQ(0, builder->train(holder));

  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswRabitqBuilderTest, TestLoad) {
  // Load index with searcher and verify search
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  // Perform search verification
  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = static_cast<float>(j) / 1000.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  auto context = searcher->create_context();
  ASSERT_NE(context, nullptr);
  context->set_topk(10);

  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));

  const auto &result = context->result(0);
  ASSERT_GT(result.size(), 0UL);
  ASSERT_LE(result.size(), 10UL);
}

TEST_F(HnswRabitqBuilderTest, TestDimensions) {
  std::vector<size_t> dimensions = {1,    2,    4,    8,    16,   32,   33,
                                    63,   64,   128,  256,  512,  1024, 2047,
                                    2048, 2049, 4095, 4096, 4097, 8192, 16384};
  size_t doc_cnt = 100;

  for (size_t test_dim : dimensions) {
    std::cout << "Testing dimension: " << test_dim << std::endl;

    IndexMeta index_meta(IndexMeta::DataType::DT_FP32, test_dim);
    index_meta.set_metric("SquaredEuclidean", 0, ailego::Params());

    IndexBuilder::Pointer builder =
        IndexFactory::CreateBuilder("HnswRabitqBuilder");
    ASSERT_NE(builder, nullptr) << "dim=" << test_dim;

    ailego::Params params;
    params.set("proxima.rabitq.num_clusters", 16UL);
    params.set("proxima.rabitq.total_bits", 2UL);
    params.set("proxima.hnsw_rabitq.general.dimension", test_dim);

    int ret = builder->init(index_meta, params);

    // dimension <= 63 or >= 4096: init() should return -31
    if (test_dim <= 63 || test_dim >= 4096) {
      ASSERT_EQ(-31, ret) << "expected init to fail with -31, dim=" << test_dim;
      std::cout << "Dimension " << test_dim
                << " correctly rejected with ret=" << ret << std::endl;
      continue;
    }

    // Valid dimensions: verify full build succeeds
    ASSERT_EQ(0, ret) << "init failed, dim=" << test_dim;

    auto holder =
        make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(
            test_dim);
    for (size_t i = 0; i < doc_cnt; i++) {
      NumericalVector<float> vec(test_dim);
      for (size_t j = 0; j < test_dim; ++j) {
        vec[j] = static_cast<float>(i * test_dim + j) / 1000.0f;
      }
      ASSERT_TRUE(holder->emplace(i, std::move(vec))) << "dim=" << test_dim;
    }

    ret = builder->train(holder);
    ASSERT_EQ(0, ret) << "train failed, dim=" << test_dim;

    ret = builder->build(holder);
    ASSERT_EQ(0, ret) << "build failed, dim=" << test_dim;

    auto &stats = builder->stats();
    ASSERT_EQ(doc_cnt, stats.built_count()) << "dim=" << test_dim;

    std::cout << "Dimension " << test_dim << " passed, built "
              << stats.built_count() << " docs" << std::endl;
  }
}

TEST_F(HnswRabitqBuilderTest, TestMemquota) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  params.set("proxima.hnsw_rabitq.builder.memory_quota", 100000UL);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));
}

TEST_F(HnswRabitqBuilderTest, TestIndexThreads) {
  IndexBuilder::Pointer builder1 =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder1, nullptr);
  IndexBuilder::Pointer builder2 =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder2, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  std::srand(ailego::Realtime::MilliSeconds());
  auto threads =
      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);
  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));

  auto build_index1 = [&]() {
    ASSERT_EQ(0, builder1->train(threads, holder));
    ASSERT_EQ(0, builder1->build(threads, holder));
  };
  auto build_index2 = [&]() {
    ASSERT_EQ(0, builder2->train(threads, holder));
    ASSERT_EQ(0, builder2->build(threads, holder));
  };

  auto t1 = std::async(std::launch::async, build_index1);
  auto t2 = std::async(std::launch::async, build_index2);
  t1.wait();
  t2.wait();

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndexThreads";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder1->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder2->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats1 = builder1->stats();
  ASSERT_EQ(doc_cnt, stats1.built_count());
  auto &stats2 = builder2->stats();
  ASSERT_EQ(doc_cnt, stats2.built_count());
}

TEST_F(HnswRabitqBuilderTest, TestCosine) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();
  converted_holder = convert_holder_to_provider(converted_holder);

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(index_meta, params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCosine";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswRabitqBuilderTest, TestCleanupAndRebuild) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestCleanupAndRebuild";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);

  // Cleanup and rebuild with more documents
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder2->emplace(i, std::move(vec)));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));

  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif


================================================
FILE: tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_searcher_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_rabitq_searcher.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <cstdio>
#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_framework.h"
#include "zvec/core/framework/index_logger.h"
#include "hnsw_rabitq_builder.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static dim = 128;

class HnswRabitqSearcherTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswRabitqSearcherTest::_dir("HnswRabitqSearcherTest");
shared_ptr<IndexMeta> HnswRabitqSearcherTest::_index_meta_ptr;

void HnswRabitqSearcherTest::SetUp(void) {
  IndexLoggerBroker::SetLevel(0);
  _index_meta_ptr.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  _index_meta_ptr->set_metric("SquaredEuclidean", 0, ailego::Params());
}

void HnswRabitqSearcherTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  // system(cmdBuf);
}

TEST_F(HnswRabitqSearcherTest, TestBasicSearch) {
  // Build index first
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestBasicSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  // Perform search
  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = static_cast<float>(j) / 1000.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  auto context = searcher->create_context();
  ASSERT_TRUE(!!context);
  context->set_topk(10);

  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));

  const auto &result = context->result(0);
  ASSERT_GT(result.size(), 0UL);
  ASSERT_LE(result.size(), 10UL);

  // Verify results are sorted by distance
  for (size_t i = 1; i < result.size(); ++i) {
    ASSERT_LE(result[i - 1].score(), result[i].score());
  }
}

TEST_F(HnswRabitqSearcherTest, DISABLED_TestRnnSearch) {
  // Build index first
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestRnnSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher with radius search
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = 0.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  auto context = searcher->create_context();
  ASSERT_NE(context, nullptr);

  size_t topk = 50;
  context->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));

  const auto &results = context->result(0);
  ASSERT_EQ(topk, results.size());

  // Test with radius threshold
  float radius = results[topk / 2].score();
  context->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test reset threshold
  context->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswRabitqSearcherTest, DISABLED_TestSearchInnerProduct) {
  // Build index with InnerProduct metric
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  IndexMeta index_meta(IndexMeta::DataType::DT_FP32, dim);
  index_meta.set_metric("InnerProduct", 0, ailego::Params());

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(index_meta, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestSearchInnerProduct";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = 1.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  auto context = searcher->create_context();
  ASSERT_TRUE(!!context);

  size_t topk = 50;
  context->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));

  const auto &results = context->result(0);
  ASSERT_EQ(topk, results.size());

  // Test with radius threshold (note: InnerProduct uses negative scores)
  float radius = -results[topk / 2].score();
  context->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    LOG_ERROR("radius: %f, score: %f", radius, results[k].score());
    EXPECT_GE(radius, results[k].score());
  }

  // Test reset threshold
  context->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(query_vec.data(), query_meta, 1, context));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(-radius, results[topk - 1].score());
}

TEST_F(HnswRabitqSearcherTest, TestSearchCosine) {
  // Build index with Cosine metric
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  IndexMeta index_meta_raw(IndexMeta::DataType::DT_FP32, dim);
  index_meta_raw.set_metric("Cosine", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineFp32Converter");
  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->result();
  converted_holder = convert_holder_to_provider(converted_holder);

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(index_meta, params));
  ASSERT_EQ(0, builder->train(converted_holder));
  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestSearchCosine";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = 1.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  std::string new_query;
  IndexQueryMeta new_meta;
  ASSERT_EQ(0, reformer->transform(query_vec.data(), query_meta, &new_query,
                                   &new_meta));

  auto context = searcher->create_context();
  ASSERT_TRUE(!!context);

  size_t topk = 50;
  context->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));

  const auto &results = context->result(0);
  ASSERT_EQ(topk, results.size());

  // Test with radius threshold
  float radius = 0.5f;
  context->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));
  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }

  // Test reset threshold
  context->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(new_query.data(), new_meta, 1, context));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(radius, results[topk - 1].score());
}

TEST_F(HnswRabitqSearcherTest, TestMultipleQueries) {
  // Build index first
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestMultipleQueries";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher with multiple queries
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  // Test with different query vectors
  for (size_t query_id = 0; query_id < 5; ++query_id) {
    NumericalVector<float> query_vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      query_vec[j] = static_cast<float>(query_id * dim + j) / 1000.0f;
    }

    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

    auto context = searcher->create_context();
    ASSERT_TRUE(!!context);
    context->set_topk(20);

    ASSERT_EQ(0,
              searcher->search_impl(query_vec.data(), query_meta, 1, context));

    const auto &result = context->result(0);
    ASSERT_GT(result.size(), 0UL);
    ASSERT_LE(result.size(), 20UL);

    // Verify results are sorted
    for (size_t i = 1; i < result.size(); ++i) {
      ASSERT_LE(result[i - 1].score(), result[i].score());
    }
  }
}

TEST_F(HnswRabitqSearcherTest, TestDifferentTopK) {
  // Build index first
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswRabitqBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 10000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, std::move(vec)));
  }

  ailego::Params params;
  params.set("proxima.rabitq.num_clusters", 16UL);
  params.set("proxima.rabitq.total_bits", 2UL);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestDifferentTopK";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // Test searcher with different topk values
  auto searcher = IndexFactory::CreateSearcher("HnswRabitqSearcher");
  ASSERT_NE(searcher, nullptr);

  ailego::Params search_params;
  search_params.set("proxima.hnsw_rabitq.searcher.ef", 100UL);
  ASSERT_EQ(0, searcher->init(search_params));

  auto loader = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_NE(loader, nullptr);
  ASSERT_EQ(0, loader->init(ailego::Params()));
  ASSERT_EQ(0, loader->open(path, false));

  ASSERT_EQ(0, searcher->load(loader, nullptr));

  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = static_cast<float>(j) / 1000.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  // Test with different topk values
  std::vector<size_t> topk_values = {1, 5, 10, 20, 50, 100};
  for (size_t topk : topk_values) {
    auto context = searcher->create_context();
    ASSERT_TRUE(!!context);
    context->set_topk(topk);

    ASSERT_EQ(0,
              searcher->search_impl(query_vec.data(), query_meta, 1, context));

    const auto &result = context->result(0);
    ASSERT_GT(result.size(), 0UL);
    ASSERT_LE(result.size(), topk);

    // Verify results are sorted
    for (size_t i = 1; i < result.size(); ++i) {
      ASSERT_LE(result[i - 1].score(), result[i].score());
    }
  }
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif


================================================
FILE: tests/core/algorithm/hnsw_rabitq/hnsw_rabitq_streamer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "hnsw_rabitq_streamer.h"
#include <memory>
#include <gtest/gtest.h>
#include "zvec/ailego/container/params.h"
#include "zvec/core/framework/index_holder.h"
#include "zvec/core/framework/index_streamer.h"
#include "hnsw_rabitq_streamer.h"
#include "rabitq_converter.h"
#include "rabitq_reformer.h"

using namespace std;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static dim = 128;

class HnswRabitqStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string dir_;
  static shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string HnswRabitqStreamerTest::dir_("hnswRabitqStreamerTest");
shared_ptr<IndexMeta> HnswRabitqStreamerTest::index_meta_ptr_;

void HnswRabitqStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (nothrow)
                            IndexMeta(IndexMeta::DataType::DT_FP32, dim));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, ailego::Params());
}

void HnswRabitqStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswRabitqStreamerTest, TestBuildAndSearch) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i * dim + j) / 1000.0f;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);
  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 5U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/Test/AddVector", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {
    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
    ASSERT_EQ(0,
              streamer->add_impl(it->key(), it->data(), query_meta, context));
  }
  streamer->flush(0UL);

  // Perform search verification
  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = static_cast<float>(j) / 1000.0f;
  }

  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  context->set_topk(10);
  ASSERT_EQ(0, streamer->search_impl(query_vec.data(), query_meta, 1, context));

  const auto &result = context->result(0);
  ASSERT_GT(result.size(), 0UL);
  ASSERT_LE(result.size(), 10UL);

  // reopen and load reformer from storage
  ASSERT_EQ(0, streamer->close());
  IndexStreamer::Pointer new_streamer =
      std::make_shared<HnswRabitqStreamer>(holder);
  ASSERT_EQ(0, new_streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, new_streamer->open(storage));
}

TEST_F(HnswRabitqStreamerTest, TestLinearSearch) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);
  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 5U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {
    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
    ASSERT_EQ(0,
              streamer->add_impl(it->key(), it->data(), query_meta, context));
  }

  // Test linear search with exact match
  size_t topk = 3;
  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  NumericalVector<float> query_vec(dim);

  for (size_t i = 0; i < doc_cnt; i += 100) {
    for (size_t j = 0; j < dim; ++j) {
      query_vec[j] = static_cast<float>(i);
    }
    context->set_topk(1U);
    ASSERT_EQ(0,
              streamer->search_bf_impl(query_vec.data(), query_meta, context));
    auto &result1 = context->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(i, result1[0].key());

    // Test with slight offset
    for (size_t j = 0; j < dim; ++j) {
      query_vec[j] = static_cast<float>(i) + 0.1f;
    }
    context->set_topk(topk);
    ASSERT_EQ(0,
              streamer->search_bf_impl(query_vec.data(), query_meta, context));
    auto &result2 = context->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(i, result2[0].key());
  }
}

TEST_F(HnswRabitqStreamerTest, TestKnnSearch) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 2000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);
  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 10U);
  params.set("proxima.hnsw_rabitq.streamer.efconstruction", 100U);
  params.set("proxima.hnsw_rabitq.streamer.ef", 50U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestKnnSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {
    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
    ASSERT_EQ(0,
              streamer->add_impl(it->key(), it->data(), query_meta, context));
  }

  // Compare KNN search with brute force search
  auto linear_ctx = streamer->create_context();
  auto knn_ctx = streamer->create_context();
  size_t topk = 50;
  linear_ctx->set_topk(topk);
  knn_ctx->set_topk(topk);

  int total_hits = 0;
  int total_cnts = 0;
  int topk1_hits = 0;
  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  NumericalVector<float> query_vec(dim);

  for (size_t i = 0; i < doc_cnt; i += 100) {
    for (size_t j = 0; j < dim; ++j) {
      query_vec[j] = static_cast<float>(i) + 0.1f;
    }

    ASSERT_EQ(0,
              streamer->search_impl(query_vec.data(), query_meta, 1, knn_ctx));
    ASSERT_EQ(
        0, streamer->search_bf_impl(query_vec.data(), query_meta, linear_ctx));

    auto &knn_result = knn_ctx->result(0);
    ASSERT_EQ(topk, knn_result.size());
    topk1_hits += (i == knn_result[0].key());

    auto &linear_result = linear_ctx->result();
    ASSERT_EQ(topk, linear_result.size());
    ASSERT_EQ(i, linear_result[0].key());

    for (size_t k = 0; k < topk; ++k) {
      total_cnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linear_result[j].key() == knn_result[k].key()) {
          total_hits++;
          break;
        }
      }
    }
  }

  float recall = total_hits * 1.0f / total_cnts;
  float topk1_recall = topk1_hits * 100.0f / static_cast<float>(doc_cnt);
  EXPECT_GT(recall, 0.60f);
  // actual: no guarantee
  // TODO(jiliang.ljl): check if ok?
  EXPECT_GT(topk1_recall, 0.00f);
}

TEST_F(HnswRabitqStreamerTest, TestRandomData) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1500UL;

  // Add random vectors
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);
  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 32U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 20U);
  params.set("proxima.hnsw_rabitq.streamer.efconstruction", 200U);
  params.set("proxima.hnsw_rabitq.streamer.ef", 100U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestRandomData", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {
    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
    ASSERT_EQ(0,
              streamer->add_impl(it->key(), it->data(), query_meta, context));
  }

  // Test with random queries
  auto linear_ctx = streamer->create_context();
  auto knn_ctx = streamer->create_context();
  size_t topk = 50;
  linear_ctx->set_topk(topk);
  knn_ctx->set_topk(topk);

  int total_hits = 0;
  int total_cnts = 0;
  int topk1_hits = 0;
  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);
  NumericalVector<float> query_vec(dim);

  size_t query_cnt = 200;
  for (size_t i = 0; i < query_cnt; i++) {
    for (size_t j = 0; j < dim; ++j) {
      query_vec[j] = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
    }

    ASSERT_EQ(
        0, streamer->search_bf_impl(query_vec.data(), query_meta, linear_ctx));
    ASSERT_EQ(0,
              streamer->search_impl(query_vec.data(), query_meta, 1, knn_ctx));

    auto &knn_result = knn_ctx->result(0);
    ASSERT_EQ(topk, knn_result.size());

    auto &linear_result = linear_ctx->result();
    ASSERT_EQ(topk, linear_result.size());

    topk1_hits += (linear_result[0].key() == knn_result[0].key());

    for (size_t k = 0; k < topk; ++k) {
      total_cnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linear_result[j].key() == knn_result[k].key()) {
          total_hits++;
          break;
        }
      }
    }
  }

  float recall = total_hits * 1.0f / total_cnts;
  float topk1_recall = topk1_hits * 1.0f / query_cnt;
  EXPECT_GT(recall, 0.50f);
  EXPECT_GT(topk1_recall, 0.70f);
}

TEST_F(HnswRabitqStreamerTest, TestOpenClose) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 500UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 5U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestOpenClose", true));

  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  // Add first half of vectors
  for (size_t i = 0; i < doc_cnt / 2; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), query_meta, context));
  }

  ASSERT_EQ(0, streamer->flush(0UL));
  ASSERT_EQ(0, streamer->close());

  // Reopen and add second half
  IndexStreamer::Pointer streamer2 =
      std::make_shared<HnswRabitqStreamer>(holder);
  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer2->open(storage));

  auto context2 = streamer2->create_context();
  for (size_t i = doc_cnt / 2; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_EQ(0, streamer2->add_impl(i, vec.data(), query_meta, context2));
  }

  ASSERT_EQ(0, streamer2->flush(0UL));

  // Verify search works after reopen
  NumericalVector<float> query_vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    query_vec[j] = 10.0f;
  }

  context2->set_topk(5);
  ASSERT_EQ(0,
            streamer2->search_impl(query_vec.data(), query_meta, 1, context2));
  const auto &result = context2->result(0);
  ASSERT_EQ(5UL, result.size());
  ASSERT_EQ(10UL, result[0].key());
}

TEST_F(HnswRabitqStreamerTest, TestCreateIterator) {
  auto holder =
      make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 300UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, ailego::Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);
  auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);
  IndexStreamer::Pointer streamer =
      std::make_shared<HnswRabitqStreamer>(holder, reformer);

  ailego::Params params;
  params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 5U);
  params.set("proxima.hnsw_rabitq.general.dimension", dim);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestCreateIterator", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto context = streamer->create_context();
  IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, dim);

  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_EQ(0, streamer->add_impl(i, vec.data(), query_meta, context));
  }

  streamer->flush(0UL);

  // Test iterator
  auto provider = streamer->create_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);

  size_t count = 0;
  while (iter->is_valid()) {
    ASSERT_EQ(count, iter->key());
    // const float *data = (const float *)iter->data();
    // for (size_t j = 0; j < dim; ++j) {
    //   ASSERT_EQ(static_cast<float>(count), data[j]);
    // }
    iter->next();
    count++;
  }
  ASSERT_EQ(doc_cnt, count);

  // Test get_vector
  // for (size_t i = 0; i < doc_cnt; i++) {
  //   const float *data = (const float *)provider->get_vector(i);
  //   ASSERT_NE(data, nullptr);
  //   for (size_t j = 0; j < dim; ++j) {
  //     ASSERT_EQ(static_cast<float>(i), data[j]);
  //   }
  // }
}

TEST_F(HnswRabitqStreamerTest, TestDimensions) {
  std::vector<size_t> dimensions = {1,    2,    4,    8,    16,   32,   33,
                                    63,   64,   128,  256,  512,  1024, 2047,
                                    2048, 2049, 4095, 4096, 4097, 8192, 16384};
  size_t doc_cnt = 100;

  for (size_t test_dim : dimensions) {
    std::cout << "Testing dimension: " << test_dim << std::endl;

    IndexMeta index_meta(IndexMeta::DataType::DT_FP32, test_dim);
    index_meta.set_metric("SquaredEuclidean", 0, ailego::Params());

    auto holder =
        make_shared<MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(
            test_dim);
    IndexStreamer::Pointer streamer =
        std::make_shared<HnswRabitqStreamer>(holder);

    ailego::Params params;
    params.set("proxima.hnsw_rabitq.streamer.max_neighbor_count", 16U);
    params.set("proxima.hnsw_rabitq.streamer.upper_neighbor_count", 8U);
    params.set("proxima.hnsw_rabitq.streamer.scaling_factor", 5U);
    params.set("proxima.hnsw_rabitq.general.dimension", test_dim);

    int ret = streamer->init(index_meta, params);

    // dimension <= 63 or >= 4096: init() should return -31
    if (test_dim <= 63 || test_dim >= 4096) {
      ASSERT_EQ(-31, ret) << "expected init to fail with -31, dim=" << test_dim;
      std::cout << "Dimension " << test_dim
                << " correctly rejected with ret=" << ret << std::endl;
      continue;
    }

    // Valid dimensions: verify full streaming build succeeds
    ASSERT_EQ(0, ret) << "init failed, dim=" << test_dim;

    for (size_t i = 0; i < doc_cnt; i++) {
      NumericalVector<float> vec(test_dim);
      for (size_t j = 0; j < test_dim; ++j) {
        vec[j] = static_cast<float>(i * test_dim + j) / 1000.0f;
      }
      ASSERT_TRUE(holder->emplace(i, std::move(vec))) << "dim=" << test_dim;
    }

    RabitqConverter converter;
    converter.init(index_meta, ailego::Params());
    ASSERT_EQ(0, converter.train(holder))
        << "converter train failed, dim=" << test_dim;
    std::shared_ptr<IndexReformer> index_reformer;
    ASSERT_EQ(0, converter.to_reformer(&index_reformer)) << "dim=" << test_dim;
    auto reformer = std::dynamic_pointer_cast<RabitqReformer>(index_reformer);

    // Recreate streamer with reformer
    streamer = std::make_shared<HnswRabitqStreamer>(holder, reformer);
    ASSERT_EQ(0, streamer->init(index_meta, params))
        << "init with reformer failed, dim=" << test_dim;

    auto storage = IndexFactory::CreateStorage("MMapFileStorage");
    ASSERT_NE(nullptr, storage);
    ailego::Params stg_params;
    ASSERT_EQ(0, storage->init(stg_params));
    std::string storage_path =
        dir_ + "/TestDimensions_" + std::to_string(test_dim);
    ASSERT_EQ(0, storage->open(storage_path, true))
        << "storage open failed, dim=" << test_dim;
    ASSERT_EQ(0, streamer->open(storage))
        << "streamer open failed, dim=" << test_dim;

    auto context = streamer->create_context();
    IndexQueryMeta query_meta(IndexMeta::DataType::DT_FP32, test_dim);
    for (auto it = holder->create_iterator(); it->is_valid(); it->next()) {
      ASSERT_EQ(0,
                streamer->add_impl(it->key(), it->data(), query_meta, context))
          << "add failed, dim=" << test_dim << ", key=" << it->key();
    }
    ASSERT_EQ(0, streamer->flush(0UL)) << "flush failed, dim=" << test_dim;

    std::cout << "Dimension " << test_dim << " passed" << std::endl;
  }
}

}  // namespace core
}  // namespace zvec


================================================
FILE: tests/core/algorithm/hnsw_sparse/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_hnsw_sparse
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/hnsw_sparse
    )
endforeach()

================================================
FILE: tests/core/algorithm/hnsw_sparse/hnsw_sparse_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_builder.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_framework.h"
#include "hnsw_sparse_params.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace testing;
using namespace zvec::ailego;

namespace zvec {
namespace core {

class HnswSparseBuilderTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);

  static std::string _dir;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswSparseBuilderTest::_dir("HnswSparseBuilderTest");
shared_ptr<IndexMeta> HnswSparseBuilderTest::_index_meta_ptr;

void HnswSparseBuilderTest::SetUp(void) {
  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  _index_meta_ptr->set_metric("InnerProductSparse", 0, ailego::Params());
}

void HnswSparseBuilderTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", _dir.c_str());
  system(cmdBuf);
}

TEST_F(HnswSparseBuilderTest, TestGeneral) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));

  ASSERT_EQ(0, builder->train(holder));

  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestGeneral";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder2->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswSparseBuilderTest, TestMemquota) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 1000UL;
  uint32_t sparse_count = 32;

  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  params.set("proxima.hnsw.sparse_builder.memory_quota", 100000UL);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(IndexError_NoMemory, builder->build(holder));
}

TEST_F(HnswSparseBuilderTest, TestIndexThreads) {
  IndexBuilder::Pointer builder1 =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder1, nullptr);
  IndexBuilder::Pointer builder2 =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder2, nullptr);

  auto holder =
      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();

  size_t doc_cnt = 1000UL;
  uint32_t sparse_count = 32;

  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  std::srand(ailego::Realtime::MilliSeconds());
  auto threads =
      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);
  ASSERT_EQ(0, builder1->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder2->init(*_index_meta_ptr, params));

  auto build_index1 = [&]() {
    ASSERT_EQ(0, builder1->train(threads, holder));
    ASSERT_EQ(0, builder1->build(threads, holder));
  };
  auto build_index2 = [&]() {
    ASSERT_EQ(0, builder2->train(threads, holder));
    ASSERT_EQ(0, builder2->build(threads, holder));
  };

  auto t1 = std::async(std::launch::async, build_index1);
  auto t2 = std::async(std::launch::async, build_index2);
  t1.wait();
  t2.wait();


  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndexThreads";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder1->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder2->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats1 = builder1->stats();
  ASSERT_EQ(doc_cnt, stats1.built_count());
  auto &stats2 = builder2->stats();
  ASSERT_EQ(doc_cnt, stats2.built_count());
}

TEST_F(HnswSparseBuilderTest, TestHalfFloatConverter) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("HalfFloatSparseConverter");
  converter->init(*_index_meta_ptr, converter_params);

  IndexMeta index_meta = converter->meta();

  converter->transform(holder);

  auto converted_holder = converter->sparse_result();

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);
  ASSERT_EQ(0, builder->init(index_meta, converter_params));

  ASSERT_EQ(0, builder->train(converted_holder));

  ASSERT_EQ(0, builder->build(converted_holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestHalFloatConverter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);

  // cleanup and rebuild
  ASSERT_EQ(0, builder->cleanup());

  auto holder2 =
      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt2 = 2000UL;
  for (size_t i = 0; i < doc_cnt2; i++) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_count);
    NumericalVector<float> sparse_values(sparse_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices[j] = 20 * j;
      sparse_values[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_values);

    ASSERT_TRUE(holder2->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder2));
  ASSERT_EQ(0, builder->build(holder2));
  auto dumper2 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper2, nullptr);
  ASSERT_EQ(0, dumper2->create(path));
  ASSERT_EQ(0, builder->dump(dumper2));
  ASSERT_EQ(0, dumper2->close());

  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt2, stats.built_count());
  ASSERT_EQ(doc_cnt2, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
}

TEST_F(HnswSparseBuilderTest, TestIndptr) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;

  std::vector<uint64_t> keys;
  keys.reserve(doc_cnt);

  std::vector<uint64_t> sparse_indptr;
  sparse_indptr.reserve(doc_cnt + 1);

  std::vector<uint32_t> sparse_indices;
  sparse_indices.reserve(doc_cnt * sparse_count);

  std::vector<float> sparse_values;
  sparse_values.reserve(doc_cnt * sparse_count);

  size_t sparse_count_total = 0;
  sparse_indptr.push_back(0);
  for (size_t i = 0; i < doc_cnt; i++) {
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices.push_back(20 * j);
      sparse_values.push_back(i);
    }

    keys.push_back(i);

    sparse_count_total += sparse_count;
    sparse_indptr.push_back(sparse_count_total);
  }

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));

  ASSERT_EQ(0, builder->build(doc_cnt, keys.data(), sparse_indptr.data(),
                              sparse_indices.data(), sparse_values.data()));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndptr";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);
}

TEST_F(HnswSparseBuilderTest, TestIndptrFp16) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  uint32_t sparse_count = 4;
  size_t doc_cnt = 1000UL;

  std::vector<uint64_t> keys;
  keys.reserve(doc_cnt);

  std::vector<uint64_t> sparse_indptr;
  sparse_indptr.reserve(doc_cnt + 1);

  std::vector<uint32_t> sparse_indices;
  sparse_indices.reserve(doc_cnt * sparse_count);

  std::vector<float> sparse_values;
  sparse_values.reserve(doc_cnt * sparse_count);

  size_t sparse_count_total = 0;
  sparse_indptr.push_back(0);
  for (size_t i = 0; i < doc_cnt; i++) {
    for (size_t j = 0; j < sparse_count; ++j) {
      sparse_indices.push_back(20 * j);
      sparse_values.push_back(i);
    }

    keys.push_back(i);

    sparse_count_total += sparse_count;
    sparse_indptr.push_back(sparse_count_total);
  }

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP16);
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_BUILDER_THREAD_COUNT, 1);
  ASSERT_EQ(0, builder->init(meta, params));

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  ASSERT_EQ(0, builder->build(qmeta, doc_cnt, keys.data(), sparse_indptr.data(),
                              sparse_indices.data(), sparse_values.data()));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  string path = _dir + "/TestIndptrFp16";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats = builder->stats();
  ASSERT_EQ(0UL, stats.trained_count());
  ASSERT_EQ(doc_cnt, stats.built_count());
  ASSERT_EQ(doc_cnt, stats.dumped_count());
  ASSERT_EQ(0UL, stats.discarded_count());
  ASSERT_EQ(0UL, stats.trained_costtime());
  ASSERT_GT(stats.built_costtime(), 0UL);
  // ASSERT_GT(stats.dumped_costtime(), 0UL);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw_sparse/hnsw_sparse_searcher_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_searcher.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <iomanip>
#include <ailego/math/distance.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_framework.h"
#include "hnsw_sparse_params.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace testing;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static sparse_dim_count = 16;

class HnswSparseSearcherTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static shared_ptr<IndexMeta> _index_meta_ptr;
};

std::string HnswSparseSearcherTest::dir_("HnswSparseSearcherTest/");
shared_ptr<IndexMeta> HnswSparseSearcherTest::_index_meta_ptr;

void HnswSparseSearcherTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,
                                           &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}

void HnswSparseSearcherTest::SetUp(void) {
  _index_meta_ptr.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  _index_meta_ptr->set_metric("InnerProductSparse", 0, ailego::Params());
}

void HnswSparseSearcherTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswSparseSearcherTest, TestGeneral) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // size_t cnt = 5000U;
  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  auto path = dir_ + "/TestGeneral";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  size_t step = 50;
  for (size_t i = 0; i < cnt; i += step) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseSearcherTest, TestRnnSearch) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);

  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 1000UL;

  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(*_index_meta_ptr, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestRnnSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 1.0;
  }

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t topk = 50;
  ctx->set_topk(topk);

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  float radius = -results[topk / 2].score();
  ctx->set_threshold(radius);

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));

  ASSERT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(-results[k].score(), radius);
  }

  // Test Reset Threshold
  ctx->reset_threshold();
  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  ASSERT_EQ(topk, results.size());
  ASSERT_LT(-results[topk - 1].score(), radius);
}

TEST_F(HnswSparseSearcherTest, TestClearAndReload) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 1000UL;

  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  params.set("proxima.hnsw.sparse_builder.thread_count", 3);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestClearAndReload";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.sparse_searcher.check_crc_enable", true);
  searcherParams.set("proxima.hnsw.sparse_searcher.max_scan_ratio",
                     1.1f);  // including upper layer
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!knnCtx);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 1.0;
  }

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t topk = 100;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, knnCtx));
  ASSERT_EQ(0,
            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, linearCtx));

  auto &knnResult = knnCtx->result();
  ASSERT_EQ(topk, knnResult.size());
  auto &linearResult = linearCtx->result();
  ASSERT_EQ(topk, linearResult.size());
  auto &stats = searcher->stats();
  ASSERT_EQ(doc_cnt, stats.loaded_count());
  // ASSERT_GT(stats.loaded_costtime(), 0UL);

  //! cleanup
  ASSERT_EQ(0, searcher->cleanup());
  ASSERT_EQ(nullptr, searcher->create_context());
  ASSERT_EQ(IndexError_Runtime,
            searcher->load(storage, IndexMetric::Pointer()));
  ASSERT_EQ(0UL, stats.loaded_count());

  ASSERT_EQ(0, searcher->init(searcherParams));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  linearCtx = searcher->create_context();
  knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!knnCtx);
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, knnCtx));
  ASSERT_EQ(0,
            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, linearCtx));

  auto &knnResult1 = knnCtx->result();
  ASSERT_EQ(topk, knnResult1.size());
  auto &linearResult1 = linearCtx->result();
  ASSERT_EQ(topk, linearResult1.size());
  ASSERT_EQ(doc_cnt, stats.loaded_count());

  //! unload
  ASSERT_EQ(0, searcher->unload());
  ASSERT_EQ(nullptr, searcher->create_context());
  ASSERT_EQ(0UL, stats.loaded_count());
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  linearCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  linearCtx->set_topk(topk);

  ASSERT_EQ(0,
            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, linearCtx));

  auto &linearResult2 = linearCtx->result();
  ASSERT_EQ(topk, linearResult2.size());
  ASSERT_EQ(doc_cnt, stats.loaded_count());
}

TEST_F(HnswSparseSearcherTest, TestFilter) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 100UL;
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);

  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      if (i <= 10) {
        sparse_velues[j] = i;
      } else {
        sparse_velues[j] = 10 - (i - 10) * 0.5;
      }
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));
    p_keys[0].push_back(i);
  }

  ailego::Params params;
  params.set("proxima.hnsw.sparse_builder.thread_count", 3);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestFilter";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.sparse_searcher.check_crc_enable", true);
  searcherParams.set("proxima.hnsw.sparse_searcher.max_scan_ratio", 1.0f);
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!linearByPKeysCtx);
  ASSERT_TRUE(!!knnCtx);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t topk = 10;
  linearCtx->set_topk(topk);
  linearByPKeysCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, knnCtx));
  ASSERT_EQ(0,
            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, linearCtx));
  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                   sparse_dim_count, sparse_indices.data(),
                   sparse_velues.data(), p_keys, qmeta, linearByPKeysCtx));

  auto filterFunc = [](uint64_t key) {
    if (key == 10UL || key == 11UL) {
      return true;
    }
    return false;
  };

  auto &knnResult = knnCtx->result();
  ASSERT_EQ(topk, knnResult.size());
  ASSERT_EQ(10UL, knnResult[0].key());
  ASSERT_EQ(11UL, knnResult[1].key());
  ASSERT_EQ(12UL, knnResult[2].key());

  auto &linearResult = linearCtx->result();
  ASSERT_EQ(topk, linearResult.size());
  ASSERT_EQ(10UL, linearResult[0].key());
  ASSERT_EQ(11UL, linearResult[1].key());
  ASSERT_EQ(12UL, linearResult[2].key());

  auto &linearByPKeysResult = linearByPKeysCtx->result();
  ASSERT_EQ(topk, linearByPKeysResult.size());
  ASSERT_EQ(10UL, linearByPKeysResult[0].key());
  ASSERT_EQ(11UL, linearByPKeysResult[1].key());
  ASSERT_EQ(12UL, linearByPKeysResult[2].key());

  knnCtx->set_filter(filterFunc);
  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, knnCtx));
  auto &knnResult1 = knnCtx->result();
  ASSERT_EQ(topk, knnResult1.size());
  ASSERT_EQ(12UL, knnResult1[0].key());
  ASSERT_EQ(9UL, knnResult1[1].key());
  ASSERT_EQ(13UL, knnResult1[2].key());

  linearCtx->set_filter(filterFunc);
  ASSERT_EQ(0,
            searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, linearCtx));
  auto &linearResult1 = linearCtx->result();
  ASSERT_EQ(topk, linearResult1.size());
  ASSERT_EQ(12UL, linearResult1[0].key());
  ASSERT_EQ(9UL, linearResult1[1].key());
  ASSERT_EQ(13UL, linearResult1[2].key());

  linearByPKeysCtx->set_filter(filterFunc);
  ASSERT_EQ(0, searcher->search_bf_by_p_keys_impl(
                   sparse_dim_count, sparse_indices.data(),
                   sparse_velues.data(), p_keys, qmeta, linearByPKeysCtx));
  auto &linearByPKeysResult1 = linearByPKeysCtx->result();
  ASSERT_EQ(topk, linearByPKeysResult1.size());
  ASSERT_EQ(12UL, linearByPKeysResult1[0].key());
  ASSERT_EQ(9UL, linearByPKeysResult1[1].key());
  ASSERT_EQ(13UL, linearByPKeysResult1[2].key());
}

TEST_F(HnswSparseSearcherTest, TestBatchQuery) {
  constexpr uint32_t sparse_dim_count = 8U;
  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, ailego::Params());
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 5000UL;

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(2);
  p_keys[0].resize(doc_cnt);
  p_keys[1].resize(doc_cnt);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  float value_off = -(doc_cnt / 2.0);
  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;

      if (i <= 3 || i >= doc_cnt - 3) {
        sparse_velues[j] = 0;
      } else {
        sparse_velues[j] = i + value_off;
      }
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));

    p_keys[0][i] = i;
    p_keys[1][i] = i;
  }

  ailego::Params params;
  params.set("proxima.hnsw.sparse_builder.max_neighbor_count", 160);
  params.set("proxima.hnsw.sparse_builder.scaling_factor", 16);
  params.set("proxima.hnsw.sparse_builder.ef_construction", 10);
  params.set("proxima.hnsw.sparse_builder.thread_count", 1);
  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestBatchQuery";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.sparse_searcher.ef", 1000);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto linearByPKeysCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  ASSERT_TRUE(!!linearCtx);
  ASSERT_TRUE(!!linearByPKeysCtx);
  ASSERT_TRUE(!!knnCtx);
  linearCtx->set_debug_mode(true);
  linearByPKeysCtx->set_debug_mode(true);
  knnCtx->set_debug_mode(true);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t topk = 200;
  linearCtx->set_topk(topk);
  linearByPKeysCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  // // do linear search test
  {
    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};
    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,
                           0, 20, 40, 60, 80, 100, 120, 140};
    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,
                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};

    ASSERT_EQ(0, searcher->search_bf_impl(dims, indicies, queries, qmeta, 2,
                                          linearCtx));

    auto &linearResult0 = linearCtx->result(0);
    ASSERT_EQ(4996UL, linearResult0[0].key());
    ASSERT_EQ(4995UL, linearResult0[1].key());
    ASSERT_EQ(4994UL, linearResult0[2].key());
    ASSERT_EQ(4993UL, linearResult0[3].key());
    ASSERT_EQ(4992UL, linearResult0[4].key());
    ASSERT_EQ(4991UL, linearResult0[5].key());
    ASSERT_EQ(4990UL, linearResult0[6].key());
    ASSERT_EQ(4989UL, linearResult0[7].key());


    auto &linearResult1 = linearCtx->result(1);
    ASSERT_EQ(4UL, linearResult1[0].key());
    ASSERT_EQ(5UL, linearResult1[1].key());
    ASSERT_EQ(6UL, linearResult1[2].key());
    ASSERT_EQ(7UL, linearResult1[3].key());
    ASSERT_EQ(8UL, linearResult1[4].key());
    ASSERT_EQ(9UL, linearResult1[5].key());
    ASSERT_EQ(10UL, linearResult1[6].key());
    ASSERT_EQ(11UL, linearResult1[7].key());
  }

  // // do linear search by p_keys test
  {
    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};
    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,
                           0, 20, 40, 60, 80, 100, 120, 140};
    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,
                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};

    ASSERT_EQ(
        0, searcher->search_bf_by_p_keys_impl(dims, indicies, queries, p_keys,
                                              qmeta, 2, linearByPKeysCtx));

    auto &bfResult0 = linearByPKeysCtx->result(0);
    ASSERT_EQ(4996UL, bfResult0[0].key());
    ASSERT_EQ(4995UL, bfResult0[1].key());
    ASSERT_EQ(4994UL, bfResult0[2].key());
    ASSERT_EQ(4993UL, bfResult0[3].key());
    ASSERT_EQ(4992UL, bfResult0[4].key());
    ASSERT_EQ(4991UL, bfResult0[5].key());
    ASSERT_EQ(4990UL, bfResult0[6].key());
    ASSERT_EQ(4989UL, bfResult0[7].key());

    auto &bfResult1 = linearByPKeysCtx->result(1);
    ASSERT_EQ(4UL, bfResult1[0].key());
    ASSERT_EQ(5UL, bfResult1[1].key());
    ASSERT_EQ(6UL, bfResult1[2].key());
    ASSERT_EQ(7UL, bfResult1[3].key());
    ASSERT_EQ(8UL, bfResult1[4].key());
    ASSERT_EQ(9UL, bfResult1[5].key());
    ASSERT_EQ(10UL, bfResult1[6].key());
    ASSERT_EQ(11UL, bfResult1[7].key());
  }

  // // do knn search test
  {
    uint32_t dims[] = {sparse_dim_count, sparse_dim_count};
    uint32_t indicies[] = {0, 20, 40, 60, 80, 100, 120, 140,
                           0, 20, 40, 60, 80, 100, 120, 140};
    float queries[] = {3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,  3.1f,
                       -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f, -7.1f};

    ASSERT_EQ(0,
              searcher->search_impl(dims, indicies, queries, qmeta, 2, knnCtx));

    auto &knnResult0 = knnCtx->result(0);
    ASSERT_EQ(4996UL, knnResult0[0].key());
    ASSERT_EQ(4995UL, knnResult0[1].key());
    ASSERT_EQ(4994UL, knnResult0[2].key());

    auto &knnResult1 = knnCtx->result(1);
    ASSERT_EQ(4UL, knnResult1[0].key());
    ASSERT_EQ(5UL, knnResult1[1].key());
    ASSERT_EQ(6UL, knnResult1[2].key());
  }
}

TEST_F(HnswSparseSearcherTest, TestStreamerDump) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestStreamerDump.index", true));
  ASSERT_EQ(0, streamer->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 10000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  auto path = dir_ + "/TestStreamerDump";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  size_t step = 50;

  for (size_t i = 0; i < cnt; i += step) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  EXPECT_GT(cost, 1.50f);
}

TEST_F(HnswSparseSearcherTest, TestSharedContext) {
  auto gen_holder = [](int start, size_t doc_cnt) {
    auto holder =
        make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
    uint64_t key = start;

    for (size_t i = 0; i < doc_cnt; ++i) {
      SparseVector<float> vec;

      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = i;
      }

      vec.add_sparses(sparse_indices, sparse_velues);

      key += 3;

      holder->emplace(key, vec);
    }

    return holder;
  };
  auto gen_index = [&gen_holder](int start, size_t docs, std::string path) {
    auto holder = gen_holder(start, docs);
    IndexBuilder::Pointer builder =
        IndexFactory::CreateBuilder("HnswSparseBuilder");
    ailego::Params params;
    builder->init(*_index_meta_ptr, params);
    builder->train(holder);
    builder->build(holder);
    auto dumper = IndexFactory::CreateDumper("FileDumper");
    dumper->create(path);
    builder->dump(dumper);
    dumper->close();

    IndexSearcher::Pointer searcher =
        IndexFactory::CreateSearcher("HnswSparseSearcher");
    auto name = rand() % 2 ? "FileReadStorage" : "MMapFileReadStorage";
    auto storage = IndexFactory::CreateStorage(name);
    storage->open(path, false);
    params.set("proxima.hnsw.sparse_searcher.visit_bloomfilter_enable",
               rand() % 2);
    searcher->init(ailego::Params());
    searcher->load(storage, IndexMetric::Pointer());
    return searcher;
  };

  srand(ailego::Realtime::MilliSeconds());
  size_t docs1 = rand() % 500 + 100;
  size_t docs2 = rand() % 5000 + 100;
  size_t docs3 = rand() % 50000 + 100;
  auto path1 = dir_ + "/TestSharedContext.index1";
  auto path2 = dir_ + "/TestSharedContext.index2";
  auto path3 = dir_ + "/TestSharedContext.index3";
  auto searcher1 = gen_index(0, docs1, path1);
  auto searcher2 = gen_index(1, docs2, path2);
  auto searcher3 = gen_index(2, docs3, path3);

  srand(ailego::Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto do_test = [&]() {
    IndexSearcher::Context::Pointer ctx;
    switch (rand() % 3) {
      case 0:
        ctx = searcher1->create_context();
        break;
      case 1:
        ctx = searcher2->create_context();
        break;
      case 2:
        ctx = searcher3->create_context();
        break;
    }
    ctx->set_topk(10);

    int ret = 0;
    for (int i = 0; i < 100; ++i) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = -(i + 0.1f);
      }

      auto code = rand() % 6;
      switch (code) {
        case 0:
          ret = searcher1->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx);
          break;
        case 1:
          ret = searcher2->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx);
          break;
        case 2:
          ret = searcher3->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx);
          break;
        case 3:
          ret =
              searcher1->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx);
          break;
        case 4:
          ret =
              searcher2->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx);
          break;
        case 5:
          ret =
              searcher3->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx);
          break;
      }

      EXPECT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(10, results.size());
      for (int k = 0; k < 10; ++k) {
        // std::cout << "code: " << code << ", i: " << i << ", k: " << k
        //           << ", key: " << results[k].key()
        //           << ", score: " << results[k].score() << std::endl;

        EXPECT_EQ(code % 3, results[k].key() % 3);
      }
    }
  };
  auto t1 = std::async(std::launch::async, do_test);
  auto t2 = std::async(std::launch::async, do_test);
  t1.wait();
  t2.wait();

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  storage->init(ailego::Params());
  storage->open(dir_ + "/TestSharedContext.index4", true);
  streamer->init(*_index_meta_ptr, ailego::Params());
  streamer->open(storage);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 1.1f;
  }

  auto ctx1 = streamer->create_context();
  EXPECT_EQ(IndexError_Unsupported,
            searcher1->search_impl(sparse_dim_count, sparse_indices.data(),
                                   sparse_velues.data(), qmeta, ctx1));

  auto ctx2 = searcher1->create_context();
  EXPECT_EQ(IndexError_Unsupported,
            streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx2));
}

TEST_F(HnswSparseSearcherTest, TestProvider) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 5000UL;
  std::vector<key_t> keys(doc_cnt);
  srand(ailego::Realtime::MilliSeconds());
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < doc_cnt; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = ailego::Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }

  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = keys[i];
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(keys[i], vec));
  }

  ailego::Params params;
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestProvider";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.sparse_searcher.ef", 1);
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  auto provider = searcher->create_sparse_provider();
  for (size_t i = 0; i < keys.size(); ++i) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(0, provider->get_sparse_vector(keys[i], &sparse_count,
                                             &sparse_indices_buffer,
                                             &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);
    }
  }

  auto iter = provider->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *d = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }

  ASSERT_EQ(cnt, doc_cnt);
  ASSERT_EQ(_index_meta_ptr->data_type(), provider->data_type());
}

TEST_F(HnswSparseSearcherTest, TestRandomPaddingTopk) {
  std::mt19937 mt{};
  std::uniform_real_distribution<float> gen(0.0f, 1.0f);
  constexpr size_t static sparse_dim_count = 8;
  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<MultiPassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  const size_t COUNT = 10000UL;

  for (size_t i = 0; i < COUNT; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ASSERT_EQ(0, builder->init(meta, ailego::Params()));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestRandomPadding";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ailego::Params params;
  params.set("proxima.hnsw.sparse_searcher.force_padding_result_enable", true);
  params.set("proxima.hnsw.sparse_searcher.scan_ratio", 0.01f);
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(params));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 1.0f;
  }

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  std::uniform_int_distribution<uint32_t> gen_int(1, COUNT);
  size_t topk = gen_int(mt);
  ctx->set_topk(topk);

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));

  auto &results = ctx->result();
  EXPECT_EQ(results.size(), topk);
  for (size_t i = 0; i < results.size(); ++i) {
    for (size_t j = 0; j < i; ++j) {
      EXPECT_NE(results[i].key(), results[j].key());
    }
  }

  ctx->set_filter([](uint64_t key) { return true; });

  ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));

  auto &results1 = ctx->result();
  EXPECT_EQ(results1.size(), 0);
}

TEST_F(HnswSparseSearcherTest, TestBruteForceSetupInContext) {
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder("HnswSparseBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      make_shared<OnePassIndexSparseHolder<IndexMeta::DataType::DT_FP32>>();
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; ++i) {
    SparseVector<float> vec;

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    vec.add_sparses(sparse_indices, sparse_velues);

    ASSERT_TRUE(holder->emplace(i, vec));
  }

  ailego::Params params;
  // params.set("proxima.hnsw.sparse_builder.max_neighbor_count", 16);
  params.set("proxima.hnsw.sparse_builder.scaling_factor", 16);
  params.set("proxima.hnsw.sparse_builder.ef_construction", 10);
  params.set("proxima.hnsw.sparse_builder.thread_count", 2);
  ASSERT_EQ(0, builder->init(*_index_meta_ptr, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = dir_ + "/TestBruteForceSetupInContext";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  ASSERT_TRUE(searcher != nullptr);
  ailego::Params searcherParams;
  searcherParams.set("proxima.hnsw.sparse_searcher.ef", 1);
  ASSERT_EQ(0, searcher->init(searcherParams));

  auto storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, storage->open(path, false));
  ASSERT_EQ(0, searcher->load(storage, IndexMetric::Pointer()));

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t topk = 200;
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  bool set_bf_threshold = false;
  bool use_update = false;

  size_t step = 50;
  for (size_t i = 0; i < doc_cnt; i += step) {
    auto linearCtx = searcher->create_context();
    auto knnCtx = searcher->create_context();

    ASSERT_TRUE(!!linearCtx);
    ASSERT_TRUE(!!knnCtx);

    linearCtx->set_topk(topk);
    knnCtx->set_topk(topk);

    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 0.1f;
    }

    auto t1 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      if (use_update) {
        ailego::Params searcherParamsExtra;

        searcherParamsExtra.set(
            "proxima.hnsw.sparse_searcher.brute_force_threshold", doc_cnt);
        knnCtx->update(searcherParamsExtra);
      } else {
        knnCtx->set_bruteforce_threshold(doc_cnt);
      }

      use_update = !use_update;
    }
    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, linearCtx));
    // auto t3 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      linearTotalTime += t2 - t1;
    } else {
      knnTotalTime += t2 - t1;
    }

    set_bf_threshold = !set_bf_threshold;

    auto &knnResult = knnCtx->result();
    // TODO: check
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += doc_cnt - 1 == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(doc_cnt - 1, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / doc_cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.90f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseSearcherTest, TestHalfFloatConverter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  IndexMeta index_meta_raw(IndexMeta::MetaType::MT_SPARSE,
                           IndexMeta::DataType::DT_FP32);
  index_meta_raw.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("HalfFloatSparseConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  reformer->init(index_meta.reformer_params());

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestHalfFloatConverter.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // size_t cnt = 5000U;
  size_t cnt = 20000U;
  size_t sparse_dim_count = 32;

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    std::string new_vec;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->transform(
                     sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, &new_vec, &new_meta));

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    new_vec.data(), new_meta, ctx));
  }

  auto path = dir_ + "/TestHalfFloatConverter";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;
  size_t step = 50;
  for (size_t i = 0; i < cnt; i += step) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    std::string ovec;
    IndexQueryMeta new_qmeta;
    ASSERT_EQ(0,
              reformer->transform(sparse_dim_count, sparse_indices.data(),
                                  sparse_vec.data(), qmeta, &ovec, &new_qmeta));

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       ovec.data(), new_qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       ovec.data(), new_qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * step * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseSearcherTest, TestQueryFilteringRatio) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestQueryFilteringRatio.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // size_t cnt = 5000U;
  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  auto path = dir_ + "/TestQueryFilteringRatio";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);

  ailego::Params searcher_params;
  searcher_params.set(PARAM_HNSW_SPARSE_SEARCHER_QUERY_FILTERING_RATIO, 0.05);

  ASSERT_EQ(0, searcher->init(searcher_params));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));
  auto linearCtx = searcher->create_context();
  auto knnCtx = searcher->create_context();
  size_t topk = 20;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  size_t step = 100;
  for (size_t i = 0; i < cnt; i += step) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, searcher->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              searcher->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseSearcherTest, TestHalfFloatRevert) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer, nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);

  IndexMeta index_meta_raw(IndexMeta::MetaType::MT_SPARSE,
                           IndexMeta::DataType::DT_FP32);
  index_meta_raw.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params converter_params;
  auto converter = IndexFactory::CreateConverter("HalfFloatSparseConverter");
  ASSERT_TRUE(converter != nullptr);

  converter->init(index_meta_raw, converter_params);

  IndexMeta index_meta = converter->meta();

  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);

  reformer->init(index_meta.reformer_params());

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestHalfFloatRevert.index", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  // size_t cnt = 5000U;
  size_t cnt = 20000U;
  size_t sparse_dim_count = 32;

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  IndexQueryMeta new_meta;
  for (size_t i = 0; i < cnt; i++) {
    std::string new_vec;
    ASSERT_EQ(0, reformer->transform(
                     sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, &new_vec, &new_meta));

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    new_vec.data(), new_meta, ctx));
  }

  const float epsilon = 1e-2;

  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices;
    std::string sparse_values;

    ASSERT_EQ(streamer->get_sparse_vector(i, &sparse_count, &sparse_indices,
                                          &sparse_values),
              0);
    ASSERT_EQ(sparse_count, sparse_dim_count);

    std::string sparse_values_out;
    sparse_values_out.resize(sparse_count * sizeof(float));

    ASSERT_EQ(reformer->revert(
                  sparse_count,
                  reinterpret_cast<const uint32_t *>(sparse_indices.data()),
                  sparse_values.data(), new_meta, &sparse_values_out),
              0);

    for (size_t j = 0; j < sparse_count; ++j) {
      float vector_value = *((float *)(sparse_values_out.data()) + j);
      EXPECT_NEAR(vector_value, sparse_vec_list[i][j], epsilon);
    }
  }

  auto path = dir_ + "/TestHalfFloatRevert";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, dumper->close());

  // do searcher knn
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto read_storage = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, read_storage->open(path, false));
  ASSERT_TRUE(searcher != nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(read_storage, IndexMetric::Pointer()));

  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices;
    std::string sparse_values;

    ASSERT_EQ(searcher->get_sparse_vector(i, &sparse_count, &sparse_indices,
                                          &sparse_values),
              0);
    ASSERT_EQ(sparse_count, sparse_dim_count);

    std::string sparse_values_out;
    sparse_values_out.resize(sparse_count * sizeof(float));

    ASSERT_EQ(reformer->revert(
                  sparse_count,
                  reinterpret_cast<const uint32_t *>(sparse_indices.data()),
                  sparse_values.data(), new_meta, &sparse_values_out),
              0);

    for (size_t j = 0; j < sparse_count; ++j) {
      float vector_value = *((float *)(sparse_values_out.data()) + j);
      EXPECT_NEAR(vector_value, sparse_vec_list[i][j], epsilon);
    }
  }
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_buffer_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <iostream>
#include <memory>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "hnsw_sparse_streamer.h"

using namespace std;
using namespace testing;
using namespace zvec::ailego;

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

namespace zvec {
namespace core {

class HnswSparseStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string HnswSparseStreamerTest::dir_("HnswSparseStreamerTest/");
shared_ptr<IndexMeta> HnswSparseStreamerTest::index_meta_ptr_;

void HnswSparseStreamerTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,
                                           &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}

void HnswSparseStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  index_meta_ptr_->set_metric("InnerProductSparse", 0, ailego::Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void HnswSparseStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswSparseStreamerTest, TestGeneral) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, write_storage->init(stg_params));
  ASSERT_EQ(0, write_storage->open(dir_ + "/Test/HnswSparseSearch", true));
  ASSERT_EQ(0, write_streamer->init(index_meta, params));
  ASSERT_EQ(0, write_streamer->open(write_storage));

  size_t cnt = 20000U;
  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, write_streamer->add_impl(
                     i, sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, ctx));
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/HnswSparseSearch", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));

  auto linearCtx = read_streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = read_streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  for (size_t i = 0; i < cnt; i += 100) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(
        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, read_streamer->search_bf_impl(
                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),
                     qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseStreamerTest, TestHnswSearchMMap) {
  IndexStreamer::Pointer write_streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(write_streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, write_storage->init(stg_params));
  ASSERT_EQ(0, write_storage->open(dir_ + "/Test/HnswSparseSearch", true));
  ASSERT_EQ(0, write_streamer->init(index_meta, params));
  ASSERT_EQ(0, write_streamer->open(write_storage));

  size_t cnt = 20000U;
  auto ctx = write_streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, write_streamer->add_impl(
                     i, sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, ctx));
  }
  write_streamer->flush(0UL);
  write_streamer->close();
  write_streamer.reset();

  IndexStreamer::Pointer read_streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_EQ(0, read_streamer->init(*index_meta_ptr_, params));
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, read_storage);
  ASSERT_EQ(0, read_storage->init(stg_params));
  ASSERT_EQ(0, read_storage->open(dir_ + "/Test/HnswSparseSearch", false));
  ASSERT_EQ(0, read_streamer->open(read_storage));

  auto linearCtx = read_streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = read_streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  for (size_t i = 0; i < cnt; i += 100) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(
        0, read_streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, read_streamer->search_bf_impl(
                     sparse_dim_count, sparse_indices.data(), sparse_vec.data(),
                     qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/hnsw_sparse/hnsw_sparse_streamer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "hnsw_sparse_streamer.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <future>
#include <iostream>
#include <memory>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace std;
using namespace testing;
using namespace zvec::ailego;

namespace zvec {
namespace core {

constexpr size_t static sparse_dim_count = 16;

class HnswSparseStreamerTest : public testing::Test {
 protected:
  void SetUp(void);
  void TearDown(void);
  void generate_sparse_data(
      size_t cnt, uint32_t sparse_dim_count,
      std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
      std::vector<NumericalVector<float>> &sparse_vec_list, bool norm);

  static std::string dir_;
  static shared_ptr<IndexMeta> index_meta_ptr_;
};

std::string HnswSparseStreamerTest::dir_("HnswSparseStreamerTest/");
shared_ptr<IndexMeta> HnswSparseStreamerTest::index_meta_ptr_;

void HnswSparseStreamerTest::generate_sparse_data(
    size_t cnt, uint32_t sparse_dim_count,
    std::vector<NumericalVector<uint32_t>> &sparse_indices_list,
    std::vector<NumericalVector<float>> &sparse_vec_list, bool norm) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  for (size_t i = 0; i < cnt; ++i) {
    // prepare sparse
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_vec(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_vec[j] = dist(gen);
    }

    float norm;
    ailego::Norm2Matrix<float, 1>::Compute(sparse_vec.data(), sparse_dim_count,
                                           &norm);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_vec[j] = sparse_vec[j] / norm;
    }

    sparse_indices_list.push_back(sparse_indices);
    sparse_vec_list.push_back(sparse_vec);
  }
}

void HnswSparseStreamerTest::SetUp(void) {
  index_meta_ptr_.reset(new (nothrow) IndexMeta(IndexMeta::MetaType::MT_SPARSE,
                                                IndexMeta::DataType::DT_FP32));
  index_meta_ptr_->set_metric("InnerProductSparse", 0, ailego::Params());

  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

void HnswSparseStreamerTest::TearDown(void) {
  char cmdBuf[100];
  snprintf(cmdBuf, 100, "rm -rf %s", dir_.c_str());
  system(cmdBuf);
}

TEST_F(HnswSparseStreamerTest, TestGeneral) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  auto linearCtx = streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  for (size_t i = 0; i < cnt; i += 100) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseStreamerTest, TestAddVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  size_t sparse_dim_count = 32;

  ailego::Params params;
  params.set("proxima.hnsw.sparse_streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.sparse_streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.sparse_streamer.scaling_factor", 5U);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestAddVector", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;
  size_t cnt = 1000UL;
  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(HnswSparseStreamerTest, TestLinearSearch) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set("proxima.hnsw.sparse_streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.sparse_streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.sparse_streamer.scaling_factor", 5U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearch.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }

    ctx->set_topk(1U);
    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());

    ctx->set_topk(topk);
    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_velues.data(), qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(0, result2[0].key());
    ASSERT_EQ(1, result2[1].key());
    ASSERT_EQ(2, result2[2].key());
  }

  ctx->set_topk(100U);
  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 10.1f;
  }

  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto &result = ctx->result();
  ASSERT_EQ(100U, result.size());
  ASSERT_EQ(0, result[0].key());
  ASSERT_EQ(1, result[1].key());
  ASSERT_EQ(10, result[10].key());
  ASSERT_EQ(20, result[20].key());
  ASSERT_EQ(30, result[30].key());
  ASSERT_EQ(35, result[35].key());
  ASSERT_EQ(99, result[99].key());
}

TEST_F(HnswSparseStreamerTest, TestLinearSearchByKeys) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set("proxima.hnsw.sparse_streamer.max_neighbor_count", 16U);
  params.set("proxima.hnsw.sparse_streamer.upper_neighbor_count", 8U);
  params.set("proxima.hnsw.sparse_streamer.scaling_factor", 5U);
  params.set("proxima.hnsw.sparse_streamer.get_vector_enable", true);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestLinearSearchByKeys.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);
  p_keys[0].resize(cnt);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; ++i) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = -1.0 * i - 1.0f;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));

    p_keys[0][i] = i;
  }

  size_t topk = 3;
  for (size_t i = 0; i < cnt; i += 1) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i + 1.0f;
    }
    ctx->set_topk(1U);
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result1 = ctx->result();
    ASSERT_EQ(1UL, result1.size());
    ASSERT_EQ(0, result1[0].key());

    ctx->set_topk(topk);
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result2 = ctx->result();
    ASSERT_EQ(topk, result2.size());
    ASSERT_EQ(0, result2[0].key());
    ASSERT_EQ(1, result2[1].key());
    ASSERT_EQ(2, result2[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 1.0f;
    }
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(1, result[1].key());
    ASSERT_EQ(10, result[10].key());
    ASSERT_EQ(20, result[20].key());
    ASSERT_EQ(30, result[30].key());
    ASSERT_EQ(35, result[35].key());
    ASSERT_EQ(99, result[99].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 10.0f;
    }

    p_keys[0] = {{cnt + 1, 10, 1, 15, cnt + 2}};
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(3U, result.size());
    ASSERT_EQ(1, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(15, result[2].key());
  }

  {
    ctx->set_topk(100U);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = 9.0f;
    }
    p_keys[0].clear();
    for (size_t j = 0; j < cnt; j += 10) {
      p_keys[0].push_back((uint64_t)j);
    }
    ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                     sparse_dim_count, sparse_indices.data(),
                     sparse_velues.data(), p_keys, qmeta, ctx));
    auto &result = ctx->result();
    ASSERT_EQ(100U, result.size());
    ASSERT_EQ(0, result[0].key());
    ASSERT_EQ(10, result[1].key());
    ASSERT_EQ(100, result[10].key());
    ASSERT_EQ(200, result[20].key());
    ASSERT_EQ(300, result[30].key());
    ASSERT_EQ(350, result[35].key());
    ASSERT_EQ(990, result[99].key());
  }
}

TEST_F(HnswSparseStreamerTest, TestOpenClose) {
  constexpr size_t static sparse_dim_count = 2048;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, ailego::Params());
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ASSERT_NE(nullptr, storage2);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TessOpenAndClose1", true));
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TessOpenAndClose2", true));
  ASSERT_EQ(0, streamer->init(meta, params));
  auto checkIter = [](size_t base, size_t total,
                      IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = base;
    size_t cnt = 0;
    while (iter->is_valid()) {
      float *sparse_data = (float *)iter->sparse_data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < sparse_dim_count; ++d) {
        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
      }
      iter->next();
      cur += 2;
      cnt++;
    }
    ASSERT_EQ(cnt, total);
  };

  size_t testCnt = 200;
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < testCnt; i += 2) {
    float v1 = (float)i;
    ASSERT_EQ(0, streamer->open(storage1));
    auto ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);

    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);
    NumericalVector<float> sparse_velues1(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices1[j] = j * 20;
      sparse_velues1[j] = v1;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),
                                    sparse_velues1.data(), qmeta, ctx));

    checkIter(0, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());

    float v2 = (float)(i + 1);
    NumericalVector<uint32_t> sparse_indices2(sparse_dim_count);
    NumericalVector<float> sparse_velues2(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices2[j] = j * 20;
      sparse_velues2[j] = v2;
    }

    ASSERT_EQ(0, streamer->open(storage2));
    ctx = streamer->create_context();
    ASSERT_TRUE(!!ctx);
    ASSERT_EQ(
        0, streamer->add_impl(i + 1, sparse_dim_count, sparse_indices2.data(),
                              sparse_velues2.data(), qmeta, ctx));
    checkIter(1, i / 2 + 1, streamer);
    ASSERT_EQ(0, streamer->flush(0UL));
    ASSERT_EQ(0, streamer->close());
  }

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer1->init(meta, params));
  ASSERT_EQ(0, streamer1->open(storage1));

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ASSERT_EQ(0, streamer2->init(meta, params));
  ASSERT_EQ(0, streamer2->open(storage2));

  checkIter(0, testCnt / 2, streamer1);
  checkIter(1, testCnt / 2, streamer2);
}

TEST_F(HnswSparseStreamerTest, TestCreateIterator) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestCreateIterator", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      float *sparse_data = (float *)iter->sparse_data();
      ASSERT_EQ(cur, iter->key());
      for (size_t d = 0; d < sparse_dim_count; ++d) {
        ASSERT_FLOAT_EQ((float)cur, sparse_data[d]);
      }
      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint32_t> sparse_indices1(sparse_dim_count);
    NumericalVector<float> sparse_velues1(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices1[j] = j * 20;
      sparse_velues1[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices1.data(),
                                    sparse_velues1.data(), qmeta, ctx));
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }
}

TEST_F(HnswSparseStreamerTest, TestNoInit) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  streamer->cleanup();
}


TEST_F(HnswSparseStreamerTest, TestForceFlush) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  stg_params.set("proxima.mmap_file.storage.copy_on_write", true);
  stg_params.set("proxima.mmap_file.storage.force_flush", true);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto checkIter = [](size_t total, IndexStreamer::Pointer &streamer) {
    auto provider = streamer->create_sparse_provider();
    auto iter = provider->create_iterator();
    ASSERT_TRUE(!!iter);
    size_t cur = 0;
    while (iter->is_valid()) {
      ASSERT_EQ(cur, iter->key());
      const uint32_t sparse_count = iter->sparse_count();
      ASSERT_EQ(sparse_count, sparse_dim_count);

      const float *data = reinterpret_cast<const float *>(iter->sparse_data());
      for (size_t j = 0; j < sparse_dim_count; ++j) {
        ASSERT_FLOAT_EQ((float)cur, data[j]);
      }

      iter->next();
      cur++;
    }
    ASSERT_EQ(cur, total);
  };

  size_t cnt = 200;
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto ctx = streamer->create_context();

  for (size_t i = 0; i < cnt; ++i) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
    checkIter(i + 1, streamer);
  }

  streamer->flush(0UL);
  streamer->close();
  storage->close();

  storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestForceFlush", true));
  ASSERT_EQ(0, streamer->open(storage));
  checkIter(cnt, streamer);

  // check getVector
  auto provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < cnt; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }
}

TEST_F(HnswSparseStreamerTest, TestKnnMultiThread) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  constexpr size_t static sparse_dim_count = 32;
  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, ailego::Params());
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 128);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 64);
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 32);
  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnMultiThread", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                         IndexMeta::DataType::DT_FP32);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i + baseKey;
      }

      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,
                                     sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  auto t2 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t3 = std::async(std::launch::async, addVector, 2000, 1000);
  auto t1 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000U, t1.get());
  ASSERT_EQ(1000U, t2.get());
  ASSERT_EQ(1000U, t3.get());
  streamer->close();

  // checking data
  ASSERT_EQ(0, streamer->open(storage));
  auto provider = streamer->create_sparse_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_EQ((float)iter->key(), data[j]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }

  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);

  // ====== multi thread search
  size_t topk = 10;
  size_t cnt = 3000;
  auto knnSearch = [&]() {
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                         IndexMeta::DataType::DT_FP32);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = ((float)i + 1.1f);
      }

      ASSERT_EQ(0,
                streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      ASSERT_EQ(
          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{cnt - 1, cnt - 2, cnt - 3}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                       sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(cnt - 1, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  auto s3 = std::async(std::launch::async, knnSearch);
  s1.wait();
  s2.wait();
  s3.wait();
}

TEST_F(HnswSparseStreamerTest, TestKnnConcurrentAddAndSearch) {
  constexpr size_t static sparse_dim_count = 32;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  // meta.set_metric("InnerProductSparse", 0, ailego::Params());
  meta.set_metric("SquaredEuclideanSparse", 0, ailego::Params());
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 128);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 64);
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 30 * 1024 * 1024U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 4096);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 32);
  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessKnnConcurrentAddAndSearch", true));
  ASSERT_EQ(0, streamer->open(storage));

  auto addVector = [&streamer](int baseKey, size_t addCnt) {
    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                         IndexMeta::DataType::DT_FP32);
    size_t succAdd = 0;
    auto ctx = streamer->create_context();
    for (size_t i = 0; i < addCnt; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i + baseKey;
      }

      succAdd += !streamer->add_impl(baseKey + i, sparse_dim_count,
                                     sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx);
    }
    streamer->flush(0UL);
    return succAdd;
  };

  auto knnSearch = [&]() {
    size_t topk = 100;
    size_t cnt = 3000;
    auto linearCtx = streamer->create_context();
    auto linearByPkeysCtx = streamer->create_context();
    auto ctx = streamer->create_context();
    IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                         IndexMeta::DataType::DT_FP32);
    linearCtx->set_topk(topk);
    linearByPkeysCtx->set_topk(topk);
    ctx->set_topk(topk);
    size_t totalCnts = 0;
    size_t totalHits = 0;
    for (size_t i = 0; i < cnt; i += 1) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = -((float)i + 1.1f);
      }

      ASSERT_EQ(0,
                streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      ASSERT_EQ(
          0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, linearCtx));
      std::vector<std::vector<uint64_t>> p_keys = {{0, 1, 2}};
      ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                       sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), p_keys, qmeta, linearByPkeysCtx));
      auto &r1 = ctx->result();
      ASSERT_EQ(topk, r1.size());
      auto &r2 = linearCtx->result();
      ASSERT_EQ(topk, r2.size());
      ASSERT_EQ(0, r2[0].key());
      auto &r3 = linearByPkeysCtx->result();
      ASSERT_EQ(std::min(topk, p_keys[0].size()), r3.size());
#if 0
            printf("linear: %zd => %zd %zd %zd %zd %zd\n", i, r2[0].key,
                   r2[1].key, r2[2].key, r2[3].key, r2[4].key);
            printf("knn: %zd => %zd %zd %zd %zd %zd\n", i, r1[0].key, r1[1].key,
                   r1[2].key, r1[3].key, r1[4].key);
#endif
      for (size_t k = 0; k < topk; ++k) {
        totalCnts++;
        for (size_t j = 0; j < topk; ++j) {
          if (r2[j].key() == r1[k].key()) {
            totalHits++;
            break;
          }
        }
      }
    }
    printf("%f\n", totalHits * 1.0f / totalCnts);
    ASSERT_TRUE((totalHits * 1.0f / totalCnts) > 0.80f);
  };

  auto t0 = std::async(std::launch::async, addVector, 0, 1000);
  ASSERT_EQ(1000, t0.get());
  auto t1 = std::async(std::launch::async, addVector, 1000, 1000);
  auto t2 = std::async(std::launch::async, addVector, 2000, 1000);
  auto s1 = std::async(std::launch::async, knnSearch);
  auto s2 = std::async(std::launch::async, knnSearch);
  ASSERT_EQ(1000, t1.get());
  ASSERT_EQ(1000, t2.get());
  s1.wait();
  s2.wait();

  // checking data
  auto provider = streamer->create_sparse_provider();
  auto iter = provider->create_iterator();
  ASSERT_TRUE(!!iter);
  size_t total = 0;
  uint64_t min = 1000;
  uint64_t max = 0;
  while (iter->is_valid()) {
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)iter->key(), data[j]);
    }
    total++;
    min = std::min(min, iter->key());
    max = std::max(max, iter->key());
    iter->next();
  }

  ASSERT_EQ(3000, total);
  ASSERT_EQ(0, min);
  ASSERT_EQ(2999, max);
}

TEST_F(HnswSparseStreamerTest, TestBfThreshold) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 16);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TessBfThreshold", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 10000;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(1U);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = (float)i + 1.0f;
    }

    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), qmeta, ctx);
  }
  streamer->flush(0UL);
  streamer->close();

  IndexStreamer::Pointer streamer1 =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer1, nullptr);
  auto params1 = params;
  params1.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, cnt - 1);
  ASSERT_EQ(0, streamer1->init(*index_meta_ptr_, params1));
  ASSERT_EQ(0, streamer1->open(storage));
  auto ctx1 = streamer1->create_context();

  IndexStreamer::Pointer streamer2 =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_NE(streamer2, nullptr);
  auto params2 = params;
  params2.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, cnt);
  ASSERT_EQ(0, streamer2->init(*index_meta_ptr_, params2));
  ASSERT_EQ(0, streamer2->open(storage));
  auto ctx2 = streamer2->create_context();

  // do searcher
  size_t cost1 = 0;
  size_t cost2 = 0;
  for (size_t i = 0; i < 100; ++i) {
    auto t1 = ailego::Monotime::MicroSeconds();
    ASSERT_EQ(0, streamer1->search_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx1));
    auto t2 = ailego::Monotime::MicroSeconds();
    ASSERT_EQ(0, streamer2->search_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx2));
    auto t3 = ailego::Monotime::MicroSeconds();
    cost1 += t2 - t1;
    cost2 += t3 - t2;
  }

  ASSERT_LT(cost1, cost2);

  ailego::Params update_params;
  update_params.set(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);
  update_params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 50);
  ctx1->set_debug_mode(true);
  ctx1->update(update_params);
  ASSERT_EQ(0, streamer1->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx1));
  LOG_DEBUG("%s", ctx1->debug_string().c_str());
}

TEST_F(HnswSparseStreamerTest, TestFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  // constexpr size_t static sparse_dim_count = 64;

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 50);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 500);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 1000);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestFilter", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 100UL;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  ctx->set_topk(10U);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  std::vector<std::vector<uint64_t>> p_keys;
  p_keys.resize(1);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);
  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = (float)i + 1.0f;
    }

    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), qmeta, ctx);
    p_keys[0].push_back(i);
  }

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = -100.1;
  }
  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(10, results.size());
  ASSERT_EQ(0, results[0].key());
  ASSERT_EQ(1, results[1].key());
  ASSERT_EQ(2, results[2].key());

  auto filterFunc = [](uint64_t key) {
    if (key == 0UL || key == 3UL) {
      return true;
    }
    return false;
  };
  ctx->set_filter(filterFunc);

  // after set filter
  ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                     sparse_velues.data(), qmeta, ctx));
  auto &results1 = ctx->result();
  ASSERT_EQ(10, results1.size());
  ASSERT_EQ(1, results1[0].key());
  ASSERT_EQ(2, results1[1].key());
  ASSERT_EQ(4, results1[2].key());

  // linear
  ASSERT_EQ(0, streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                        sparse_velues.data(), qmeta, ctx));
  auto &results2 = ctx->result();
  ASSERT_EQ(10, results2.size());
  ASSERT_EQ(1, results2[0].key());
  ASSERT_EQ(2, results2[1].key());
  ASSERT_EQ(4, results2[2].key());

  // linear by p_keys
  ASSERT_EQ(0, streamer->search_bf_by_p_keys_impl(
                   sparse_dim_count, sparse_indices.data(),
                   sparse_velues.data(), p_keys, qmeta, ctx));
  auto &results3 = ctx->result();
  ASSERT_EQ(10, results3.size());
  ASSERT_EQ(1, results3[0].key());
  ASSERT_EQ(2, results3[1].key());
  ASSERT_EQ(4, results3[2].key());
}

TEST_F(HnswSparseStreamerTest, TestMaxIndexSize) {
  constexpr size_t static sparse_dim_count = 128;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, ailego::Params());
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  ASSERT_EQ(0, streamer->init(meta, params));
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestMaxIndexSize", true));
  ASSERT_EQ(0, streamer->open(storage));

  size_t vsz0 = 0;
  size_t rss0 = 0;
  if (!ailego::MemoryHelper::SelfUsage(&vsz0, &rss0)) {
    // do not check if get mem usage failed
    return;
  }
  if (vsz0 > 1024 * 1024 * 1024 * 1024UL) {
    // asan mode
    return;
  }

  size_t writeCnt1 = 10000;
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto ctx = streamer->create_context();

  for (size_t i = 0; i < writeCnt1; ++i) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }
  size_t vsz1 = 0;
  size_t rss1 = 0;
  ailego::MemoryHelper::SelfUsage(&vsz1, &rss1);
  size_t increment = rss1 - rss0;

  size_t total_write =
      writeCnt1 * sparse_dim_count * (sizeof(uint16_t) + sizeof(float)) +
      writeCnt1 * 32 + writeCnt1 * 100 * 4;

  ASSERT_GT(total_write, increment * 0.8f);
  ASSERT_LT(total_write, increment * 1.2f);

  LOG_DEBUG("total write: %zu, increment: %zu", total_write, increment);

  streamer->flush(0UL);
  streamer.reset();
}

TEST_F(HnswSparseStreamerTest, TestKnnCleanUp) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage1 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage1);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage1->init(stg_params));
  ASSERT_EQ(0, storage1->open(dir_ + "TestKnnCluenUp1", true));
  ailego::Params params;

  constexpr size_t static sparse_dim_count1 = 32;
  IndexMeta meta1(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta1.set_metric("InnerProductSparse", 0, ailego::Params());

  ASSERT_EQ(0, streamer->init(meta1, params));
  ASSERT_EQ(0, streamer->open(storage1));
  IndexQueryMeta qmeta1(IndexMeta::MetaType::MT_SPARSE,
                        IndexMeta::DataType::DT_FP32);
  auto ctx1 = streamer->create_context();

  NumericalVector<uint32_t> sparse_indices1(sparse_dim_count1);
  NumericalVector<float> sparse_velues1(sparse_dim_count1);

  for (size_t j = 0; j < sparse_dim_count1; ++j) {
    sparse_indices1[j] = j * 20;
    sparse_velues1[j] = 1.1f;
  }
  ASSERT_EQ(0, streamer->add_impl(1, sparse_dim_count, sparse_indices1.data(),
                                  sparse_velues1.data(), qmeta1, ctx1));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());

  auto storage2 = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage2);
  ASSERT_EQ(0, storage2->init(stg_params));
  ASSERT_EQ(0, storage2->open(dir_ + "TestKnnCluenUp2", true));

  constexpr size_t static sparse_dim_count2 = 64;
  IndexMeta meta2(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta2.set_metric("InnerProductSparse", 0, ailego::Params());

  ASSERT_EQ(0, streamer->init(meta2, params));
  ASSERT_EQ(0, streamer->open(storage2));
  IndexQueryMeta qmeta2(IndexMeta::MetaType::MT_SPARSE,
                        IndexMeta::DataType::DT_FP32);
  auto ctx2 = streamer->create_context();

  NumericalVector<uint32_t> sparse_indices2(sparse_dim_count2);
  NumericalVector<float> sparse_velues2(sparse_dim_count2);

  for (size_t j = 0; j < sparse_dim_count2; ++j) {
    sparse_indices2[j] = j * 20;
    sparse_velues2[j] = 1.1f;
  }

  ASSERT_EQ(0, streamer->add_impl(2, sparse_dim_count, sparse_indices1.data(),
                                  sparse_velues1.data(), qmeta2, ctx2));
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());
}

TEST_F(HnswSparseStreamerTest, TestIndexSizeQuota) {
  constexpr size_t static sparse_dim_count = 512;

  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestIndexSizeQuota", true));
  ailego::Params params;

  IndexMeta meta(IndexMeta::MetaType::MT_SPARSE, IndexMeta::DataType::DT_FP32);
  meta.set_metric("InnerProductSparse", 0, ailego::Params());
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_INDEX_SIZE, 2 * 1024 * 1024U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 100 * 1024U);
  ASSERT_EQ(0, streamer->init(meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t writeCnt1 = 850;
  int ret = 0;
  auto ctx = streamer->create_context();
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  for (size_t i = 0; i < writeCnt1; ++i) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    int iRet = streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx);
    if (iRet != 0) {
      ret = iRet;
    }
  }

  ASSERT_EQ(IndexError_IndexFull, ret);
  ASSERT_EQ(0, streamer->close());
  ASSERT_EQ(0, streamer->cleanup());
}

TEST_F(HnswSparseStreamerTest, TestBloomFilter) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestBloomFilter", true));
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 100);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_VISIT_BLOOMFILTER_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  ctx->set_topk(10U);
  size_t cnt = 5000;
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                       sparse_velues.data(), qmeta, ctx);

    if ((i + 1) % 10 == 0) {
      ASSERT_EQ(0,
                streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                      sparse_velues.data(), qmeta, ctx));
      auto &results = ctx->result();
      ASSERT_EQ(10, results.size());
    }
  }
}

TEST_F(HnswSparseStreamerTest, TestStreamerParams) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestStreamerParams", true));
  ailego::Params params;
  params.set("proxima.hnsw.sparse_streamer.docs_hard_limit", 5);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto ctx = streamer->create_context();

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t j = 0; j < sparse_dim_count; ++j) {
    sparse_indices[j] = j * 20;
    sparse_velues[j] = 1.1f;
  }

  ASSERT_EQ(0, streamer->add_impl(1, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(2, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(3, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(4, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));
  ASSERT_EQ(0, streamer->add_impl(5, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));

  ASSERT_EQ(IndexError_IndexFull,
            streamer->add_impl(6, sparse_dim_count, sparse_indices.data(),
                               sparse_velues.data(), qmeta, ctx));
}

TEST_F(HnswSparseStreamerTest, TestCheckStats) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  std::string path = dir_ + "/TestCheckStats.index";
  ASSERT_EQ(0, storage->open(path, true));
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 100);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);
  params.set(PARAM_HNSW_SPARSE_STREAMER_CHUNK_SIZE, 512 * 1024U);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto &stats = streamer->stats();
  ASSERT_EQ(0U, stats.revision_id());
  ASSERT_EQ(0U, stats.loaded_count());
  ASSERT_EQ(0U, stats.added_count());
  ASSERT_EQ(0U, stats.discarded_count());
  // header chunk + meta chunk
  size_t init_size = ailego::MemoryHelper::PageSize() * 2;
  ASSERT_EQ(init_size, stats.index_size());
  ASSERT_EQ(0U, stats.dumped_size());
  ASSERT_EQ(0U, stats.check_point());
  auto createTime = stats.create_time();
  auto updateTime = stats.update_time();
  ASSERT_GT(createTime, 0UL);
  ASSERT_EQ(createTime, updateTime);

  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  size_t cnt = 3000;
  size_t size1 = stats.index_size();
  size_t size2 = 0;

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t i = 0; i < cnt; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));

    ASSERT_EQ(i + 1, stats.added_count());
    if (i == 0UL) {
      size2 = stats.index_size();
    }
  }

  size_t size3 = stats.index_size();
  ASSERT_GT(size2, size1);
  ASSERT_GT(size3, size2);
  LOG_INFO("size1=%zu size2=%zu size3=%zu", size1, size2, size3);

  uint64_t checkPoint = 23423UL;
  streamer->flush(checkPoint);
  size_t size4 = stats.index_size();
  ASSERT_EQ(size3, size4);
  auto stats1 = streamer->stats();
  ASSERT_EQ(1U, stats1.revision_id());
  ASSERT_EQ(0U, stats1.loaded_count());
  ASSERT_EQ(cnt, stats1.added_count());
  ASSERT_EQ(0U, stats1.discarded_count());
  ASSERT_GT(stats1.index_size(), 0U);
  ASSERT_EQ(0U, stats1.dumped_size());
  ASSERT_EQ(checkPoint, stats1.check_point());
  auto createTime1 = stats1.create_time();
  auto updateTime1 = stats1.update_time();
  ASSERT_GE(updateTime1, createTime1);
  ASSERT_EQ(createTime, createTime1);
  streamer->close();

  ASSERT_EQ(0, streamer->open(storage));
  auto &stats2 = streamer->stats();
  ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);

  ASSERT_EQ(0,
            streamer->add_impl(10000UL, sparse_dim_count, sparse_indices.data(),
                               sparse_velues.data(), qmeta, ctx));

  ASSERT_EQ(2U, stats2.revision_id());
  ASSERT_EQ(cnt, stats2.loaded_count());
  ASSERT_EQ(1U, stats2.added_count());
  ASSERT_EQ(0U, stats2.discarded_count());
  ASSERT_GT(stats1.index_size(), 0);
  ASSERT_EQ(0U, stats2.dumped_size());
  ASSERT_EQ(checkPoint, stats2.check_point());
  auto createTime2 = stats2.create_time();
  auto updateTime2 = stats2.update_time();
  ASSERT_EQ(createTime2, createTime1);
  ASSERT_GE(updateTime2, updateTime1);

  sleep(1);
  streamer->flush(checkPoint + 1);

  ASSERT_NE(0, streamer->add_impl(0U, sparse_dim_count, sparse_indices.data(),
                                  sparse_velues.data(), qmeta, ctx));

  auto &stats3 = streamer->stats();
  ASSERT_EQ(2U, stats3.revision_id());
  ASSERT_EQ(cnt, stats3.loaded_count());
  ASSERT_EQ(1U, stats3.added_count());
  ASSERT_EQ(1U, stats3.discarded_count());
  ASSERT_EQ(stats2.index_size(), stats3.index_size());
  ASSERT_EQ(0U, stats3.dumped_size());
  ASSERT_EQ(checkPoint + 1, stats3.check_point());
  auto createTime3 = stats3.create_time();
  auto updateTime3 = stats3.update_time();
  ASSERT_EQ(createTime3, createTime1);
  ASSERT_GT(updateTime3, updateTime2);

  auto dpath = dir_ + "/dumpIndex";
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  ASSERT_EQ(0, dumper->create(dpath));
  ASSERT_EQ(0, streamer->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  size_t doc_cnt = stats3.loaded_count() + stats3.added_count();
  struct stat st;
  ASSERT_EQ(3001UL, doc_cnt);
  ASSERT_EQ(0, stat(dpath.c_str(), &st));
  ASSERT_LT(st.st_size - stats3.dumped_size(), 8192);

  streamer->close();
}

TEST_F(HnswSparseStreamerTest, TestCheckDuplicateAndGetVector) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestCheckDuplicateAndGetVector", true));
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_FILTER_SAME_KEY, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
  NumericalVector<float> sparse_velues(sparse_dim_count);

  for (size_t i = 0; i < 1000; i++) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  for (size_t i = 0; i < 1000; i += 10) {
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(IndexError_Duplicate,
              streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                 sparse_velues.data(), qmeta, ctx));
  }

  // check getVector
  auto provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < 1000; i++) {
    uint32_t sparse_count;
    std::string sparse_indices_buffer;
    std::string sparse_values_buffer;

    ASSERT_EQ(
        0, provider->get_sparse_vector(i, &sparse_count, &sparse_indices_buffer,
                                       &sparse_values_buffer));

    const float *sparse_values_ptr =
        reinterpret_cast<const float *>(sparse_values_buffer.data());
    ASSERT_EQ(sparse_count, sparse_dim_count);
    for (size_t j = 0; j < sparse_count; ++j) {
      ASSERT_FLOAT_EQ(sparse_values_ptr[j], i);
    }
  }

  streamer->flush(0UL);
  streamer.reset();
}

class TestDumper : public IndexDumper {
  virtual int init(const ailego::Params &) {
    return 0;
  }
  virtual int cleanup(void) {
    return 0;
  }
  virtual int create(const std::string &path) {
    return 0;
  }
  virtual uint32_t magic(void) const {
    return 0;
  }
  virtual int close(void) {
    return 0;
  }
  virtual int append(const std::string &id, size_t data_size,
                     size_t padding_size, uint32_t crc) {
    usleep(100000);
    return 0;
  }
  virtual size_t write(const void *data, size_t len) {
    return len;
  }
};

TEST_F(HnswSparseStreamerTest, TestDumpIndexAndAdd) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestDumpIndexAndAdd", true));
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  auto ctx = streamer->create_context();
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  ASSERT_NE(nullptr, ctx);
  int code = 0;
  std::mutex mutex;

  auto addVector = [&](int a, int b) {
    mutex.unlock();
    for (int i = a; i < b; i++) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = (float)i;
      }

      int ret = streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                   sparse_velues.data(), qmeta, ctx);
      if (ret != 0) {
        code = ret;
        ASSERT_EQ(IndexError_Unsupported, code);
        i = i - 1;  // retry
        usleep(10000);
      }
    }
  };

  mutex.lock();
  addVector(0, 2000);
  mutex.lock();
  auto t2 = std::async(std::launch::async, addVector, 2000, 3000);
  auto path1 = dir_ + "/dumpIndex1";
  auto dumper1 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper1, nullptr);
  ASSERT_EQ(0, dumper1->create(path1));
  mutex.lock();  // sync: wait addVector start and release lock
  auto test_dumper = std::make_shared<TestDumper>();
  ASSERT_EQ(0, streamer->dump(test_dumper));
  mutex.unlock();
  ASSERT_EQ(0, streamer->dump(dumper1));
  ASSERT_EQ(0, dumper1->close());
  t2.get();
  streamer->close();
  ASSERT_EQ(IndexError_Unsupported, code);

  // check dump index
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->init(ailego::Params()));
  ASSERT_EQ(0, container->open(path1, false));
  ASSERT_NE(searcher, nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto iter = searcher->create_sparse_provider()->create_iterator();

  size_t docs = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)key, data[j]);
    }

    docs++;
    iter->next();
  }

  ASSERT_GE(docs, 2000U);

  // check streamer
  ASSERT_EQ(0, streamer->open(storage));
  iter = streamer->create_sparse_provider()->create_iterator();

  docs = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)key, data[j]);
    }

    docs++;
    iter->next();
  }

  ASSERT_EQ(docs, 3000U);
}

TEST_F(HnswSparseStreamerTest, TestProvider) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_NE(nullptr, storage);
  ailego::Params stg_params;
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "TestProvider.index", true));
  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_GET_VECTOR_ENABLE, true);
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));
  auto ctx = streamer->create_context();
  ASSERT_NE(nullptr, ctx);

  //! prepare data
  size_t docs = 10000UL;
  srand(ailego::Realtime::MilliSeconds());
  std::vector<key_t> keys(docs);
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < docs; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = ailego::Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < keys.size(); i++) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = keys[i];
    }

    ASSERT_EQ(
        0, streamer->add_impl(keys[i], sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx));
  }

  auto path1 = dir_ + "/TestProvider";
  auto dumper1 = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper1, nullptr);
  ASSERT_EQ(0, dumper1->create(path1));
  ASSERT_EQ(0, streamer->dump(dumper1));
  ASSERT_EQ(0, dumper1->close());
  streamer->close();

  // check dump index
  IndexSearcher::Pointer searcher =
      IndexFactory::CreateSearcher("HnswSparseSearcher");
  auto container = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_EQ(0, container->init(ailego::Params()));
  ASSERT_EQ(0, container->open(path1, false));
  ASSERT_NE(searcher, nullptr);
  ASSERT_EQ(0, searcher->init(ailego::Params()));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto iter = searcher->create_sparse_provider()->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();

    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)key, data[j]);
    }

    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, docs);

  // check streamer
  ASSERT_EQ(0, streamer->open(storage));
  iter = streamer->create_sparse_provider()->create_iterator();
  cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();

    const uint32_t sparse_count = iter->sparse_count();
    ASSERT_EQ(sparse_count, sparse_dim_count);

    const float *data = reinterpret_cast<const float *>(iter->sparse_data());
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      ASSERT_FLOAT_EQ((float)key, data[j]);
    }

    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, docs);

  auto searcher_provider = searcher->create_sparse_provider();
  auto streamer_provider = streamer->create_sparse_provider();
  for (size_t i = 0; i < keys.size(); ++i) {
    {
      uint32_t sparse_count;
      std::string sparse_indices_buffer;
      std::string sparse_values_buffer;

      ASSERT_EQ(0, searcher_provider->get_sparse_vector(keys[i], &sparse_count,
                                                        &sparse_indices_buffer,
                                                        &sparse_values_buffer));

      const float *sparse_values_ptr =
          reinterpret_cast<const float *>(sparse_values_buffer.data());
      ASSERT_EQ(sparse_count, sparse_dim_count);
      for (size_t j = 0; j < sparse_count; ++j) {
        ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);
      }
    }

    {
      uint32_t sparse_count;
      std::string sparse_indices_buffer;
      std::string sparse_values_buffer;
      ASSERT_EQ(0, streamer_provider->get_sparse_vector(keys[i], &sparse_count,
                                                        &sparse_indices_buffer,
                                                        &sparse_values_buffer));

      const float *sparse_values_ptr =
          reinterpret_cast<const float *>(sparse_values_buffer.data());
      ASSERT_EQ(sparse_count, sparse_dim_count);
      for (size_t j = 0; j < sparse_count; ++j) {
        ASSERT_FLOAT_EQ(sparse_values_ptr[j], keys[i]);
      }
    }
  }

  ASSERT_EQ(index_meta_ptr_->data_type(), streamer_provider->data_type());
}

TEST_F(HnswSparseStreamerTest, TestSharedContext) {
  auto create_streamer = [](std::string path) {
    IndexStreamer::Pointer streamer =
        IndexFactory::CreateStreamer("HnswSparseStreamer");
    auto storage = IndexFactory::CreateStorage("MMapFileStorage");
    ailego::Params stg_params;
    storage->init(stg_params);
    storage->open(path, true);
    ailego::Params params;
    streamer->init(*index_meta_ptr_, params);
    streamer->open(storage);
    return streamer;
  };
  auto streamer1 = create_streamer(dir_ + "TestSharedContext.index1");
  auto streamer2 = create_streamer(dir_ + "TestSharedContext.index2");
  auto streamer3 = create_streamer(dir_ + "TestSharedContext.index3");

  srand(ailego::Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  auto do_test = [&](int start) {
    auto code = rand() % 3;
    IndexStreamer::Context::Pointer ctx;
    switch (code) {
      case 0:
        ctx = streamer1->create_context();
        break;
      case 1:
        ctx = streamer2->create_context();
        break;
      case 2:
        ctx = streamer3->create_context();
        break;
    };
    ctx->set_topk(1);
    uint64_t key1 = start + 0;
    uint64_t key2 = start + 1;
    uint64_t key3 = start + 2;

    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);
    NumericalVector<float> query_sparse_velues(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      query_sparse_indices[j] = j * 20;
      query_sparse_velues[j] = 1.1f;
    }

    for (int i = 0; i < 1000; ++i) {
      NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
      NumericalVector<float> sparse_velues(sparse_dim_count);

      for (size_t j = 0; j < sparse_dim_count; ++j) {
        sparse_indices[j] = j * 20;
        sparse_velues[j] = rand();
      }

      int ret = 0;
      auto code = rand() % 3;
      switch (code) {
        case 0:
          streamer1->add_impl(key1, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key1 += 3;
          ret = streamer1->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
        case 1:
          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key2 += 3;
          streamer2->add_impl(key2, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key2 += 3;
          ret = streamer2->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
        case 2:
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          streamer3->add_impl(key3, sparse_dim_count, sparse_indices.data(),
                              sparse_velues.data(), qmeta, ctx);
          key3 += 3;
          ret = streamer3->search_impl(sparse_dim_count,
                                       query_sparse_indices.data(),
                                       query_sparse_velues.data(), qmeta, ctx);
          break;
      }
      EXPECT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(1, results.size());
      EXPECT_EQ(code, results[0].key() % 3);
    }
  };

  auto t1 = std::async(std::launch::async, do_test, 0);
  auto t2 = std::async(std::launch::async, do_test, 30000000);
  t1.wait();
  t2.wait();
}

TEST_F(HnswSparseStreamerTest, TestBruteForceSetupInContext) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  ailego::Params params;
  // params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0,
            storage->open(dir_ + "/TestBruteForceSetupInContext.index", true));
  ASSERT_EQ(0, streamer->init(*index_meta_ptr_, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 5000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);
  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    NumericalVector<uint32_t> sparse_indices(sparse_dim_count);
    NumericalVector<float> sparse_velues(sparse_dim_count);

    for (size_t j = 0; j < sparse_dim_count; ++j) {
      sparse_indices[j] = j * 20;
      sparse_velues[j] = i;
    }

    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count, sparse_indices.data(),
                                    sparse_velues.data(), qmeta, ctx));
  }

  size_t topk = 20;
  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;
  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  bool set_bf_threshold = false;
  bool use_update = false;

  size_t step = 50;
  for (size_t i = 0; i < cnt; i += step) {
    // for (size_t i = 0; i < cnt; i++) {
    auto linearCtx = streamer->create_context();
    auto knnCtx = streamer->create_context();

    ASSERT_TRUE(!!linearCtx);
    ASSERT_TRUE(!!knnCtx);

    linearCtx->set_topk(topk);
    knnCtx->set_topk(topk);

    NumericalVector<uint32_t> query_sparse_indices(sparse_dim_count);
    NumericalVector<float> query_sparse_velues(sparse_dim_count);
    for (size_t j = 0; j < sparse_dim_count; ++j) {
      query_sparse_indices[j] = j * 20;
      query_sparse_velues[j] = i + 0.1f;
    }

    auto t1 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      if (use_update) {
        ailego::Params streamerParamsExtra;

        streamerParamsExtra.set(
            "proxima.hnsw.sparse_streamer.brute_force_threshold", cnt);
        knnCtx->update(streamerParamsExtra);
      } else {
        knnCtx->set_bruteforce_threshold(cnt);
      }

      use_update = !use_update;
    }
    ASSERT_EQ(
        0, streamer->search_impl(sparse_dim_count, query_sparse_indices.data(),
                                 query_sparse_velues.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, streamer->search_bf_impl(
                     sparse_dim_count, query_sparse_indices.data(),
                     query_sparse_velues.data(), qmeta, linearCtx));

    // auto t3 = ailego::Realtime::MicroSeconds();

    if (set_bf_threshold) {
      linearTotalTime += t2 - t1;
    } else {
      knnTotalTime += t2 - t1;
    }

    set_bf_threshold = !set_bf_threshold;

    auto &knnResult = knnCtx->result();
    // ASSERT_EQ(topk, knnResult.size());
    topk1Hits += cnt - 1 == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(cnt - 1, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.90f);
  EXPECT_GT(topk1Recall, 0.95f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseStreamerTest, TestQueryFilteringRatio) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);
  params.set(PARAM_HNSW_SPARSE_STREAMER_QUERY_FILTERING_RATIO, 0.05);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestQueryFilteringRatio", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  auto linearCtx = streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  for (size_t i = 0; i < cnt; i++) {
    ASSERT_EQ(0, streamer->add_impl(i, sparse_dim_count,
                                    sparse_indices_list[i].data(),
                                    sparse_vec_list[i].data(), qmeta, ctx));
  }

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  size_t step = 100;
  for (size_t i = 0; i < cnt; i += step) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * step * 1.0f / cnt;
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

TEST_F(HnswSparseStreamerTest, TestAddAndSearchWithID) {
  IndexStreamer::Pointer streamer =
      IndexFactory::CreateStreamer("HnswSparseStreamer");
  ASSERT_TRUE(streamer != nullptr);

  size_t sparse_dim_count = 32;

  IndexMeta index_meta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);
  index_meta.set_metric("InnerProductSparse", 0, ailego::Params());

  ailego::Params params;
  params.set(PARAM_HNSW_SPARSE_STREAMER_MAX_NEIGHBOR_COUNT, 20);
  params.set(PARAM_HNSW_SPARSE_STREAMER_SCALING_FACTOR, 16);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EFCONSTRUCTION, 10);
  params.set(PARAM_HNSW_SPARSE_STREAMER_EF, 5);
  params.set(PARAM_HNSW_SPARSE_STREAMER_BRUTE_FORCE_THRESHOLD, 1000U);

  ailego::Params stg_params;
  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_EQ(0, storage->init(stg_params));
  ASSERT_EQ(0, storage->open(dir_ + "/TestGeneral", true));
  ASSERT_EQ(0, streamer->init(index_meta, params));
  ASSERT_EQ(0, streamer->open(storage));

  size_t cnt = 20000U;
  auto ctx = streamer->create_context();
  ASSERT_TRUE(!!ctx);

  auto linearCtx = streamer->create_context();
  ASSERT_TRUE(!!linearCtx);

  auto knnCtx = streamer->create_context();
  ASSERT_TRUE(!!knnCtx);

  std::vector<NumericalVector<uint32_t>> sparse_indices_list;
  std::vector<NumericalVector<float>> sparse_vec_list;

  generate_sparse_data(cnt, sparse_dim_count, sparse_indices_list,
                       sparse_vec_list, true);

  IndexQueryMeta qmeta(IndexMeta::MetaType::MT_SPARSE,
                       IndexMeta::DataType::DT_FP32);

  for (size_t i = 0; i < cnt; i += 4) {
    ASSERT_EQ(0, streamer->add_with_id_impl(
                     i, sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, ctx));
  }

  for (size_t i = 2; i < cnt; i += 4) {
    ASSERT_EQ(0, streamer->add_with_id_impl(
                     i, sparse_dim_count, sparse_indices_list[i].data(),
                     sparse_vec_list[i].data(), qmeta, ctx));
  }

  // streamer->print_debug_info();
  size_t topk = 200;
  linearCtx->set_topk(topk);
  knnCtx->set_topk(topk);

  uint64_t knnTotalTime = 0;
  uint64_t linearTotalTime = 0;

  int totalHits = 0;
  int totalCnts = 0;
  int topk1Hits = 0;

  for (size_t i = 0; i < cnt / 100; i += 2) {
    const auto &sparse_indices = sparse_indices_list[i];
    const auto &sparse_vec = sparse_vec_list[i];

    auto t1 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0, streamer->search_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, knnCtx));

    auto t2 = ailego::Realtime::MicroSeconds();

    ASSERT_EQ(0,
              streamer->search_bf_impl(sparse_dim_count, sparse_indices.data(),
                                       sparse_vec.data(), qmeta, linearCtx));

    auto t3 = ailego::Realtime::MicroSeconds();

    knnTotalTime += t2 - t1;
    linearTotalTime += t3 - t2;

    // std::cout << "i: " << i << std::endl;

    auto &knnResult = knnCtx->result();
    ASSERT_EQ(topk, knnResult.size());
    topk1Hits += i == knnResult[0].key();

    auto &linearResult = linearCtx->result();
    ASSERT_EQ(topk, linearResult.size());
    ASSERT_EQ(i, linearResult[0].key());

    for (size_t k = 0; k < topk; ++k) {
      totalCnts++;
      for (size_t j = 0; j < topk; ++j) {
        if (linearResult[j].key() == knnResult[k].key()) {
          totalHits++;
          break;
        }
      }

      auto func = [&](const IndexDocumentList &result) {
        for (size_t j = 0; j < topk / 10; ++j) {
          ASSERT_NE(result[j].key(), -1LLU);
          ASSERT_NE(result[j].index(), -1LLU);
          uint32_t sparse_count = 0;
          std::string sparse_indices_buffer;
          std::string sparse_values_buffer;
          ASSERT_EQ(0, streamer->get_sparse_vector_by_id(
                           result[j].index(), &sparse_count,
                           &sparse_indices_buffer, &sparse_values_buffer));
          ASSERT_EQ(sparse_dim_count, sparse_count);

          const auto &_sparse_indices = sparse_indices_list[result[j].index()];
          const auto &_sparse_vec = sparse_vec_list[result[j].index()];
          std::string original_sparse_values_buffer;
          original_sparse_values_buffer.resize(_sparse_vec.size() *
                                               sizeof(float));
          memcpy((char *)original_sparse_values_buffer.data(),
                 (char *)_sparse_vec.data(),
                 _sparse_vec.size() * sizeof(float));

          ASSERT_EQ(sparse_indices_buffer, _sparse_indices);

          ASSERT_EQ(sparse_values_buffer, original_sparse_values_buffer);
        }
      };

      func(linearResult);
      func(knnResult);
    }
  }
  float recall = totalHits * 1.0f / totalCnts;
  float topk1Recall = topk1Hits * 100.0f / (float(cnt) / 100);
  float cost = linearTotalTime * 1.0f / knnTotalTime;
#if 0
    printf("knnTotalTime=%zd linearTotalTime=%zd totalHits=%d totalCnts=%d "
           "R@%zd=%f R@1=%f cost=%f\n",
           knnTotalTime, linearTotalTime, totalHits, totalCnts, topk, recall,
           topk1Recall, cost);
#endif
  EXPECT_GT(recall, 0.80f);
  EXPECT_GT(topk1Recall, 0.80f);
  // EXPECT_GT(cost, 2.0f);
}

}  // namespace core
}  // namespace zvec

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/algorithm/ivf/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_utility core_metric core_quantizer core_knn_cluster core_knn_flat core_knn_ivf
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm/ivf
    )
endforeach()

================================================
FILE: tests/core/algorithm/ivf/ivf_builder_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_builder.h"
#include <future>
#include <iostream>
#include <vector>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

class IVFBuilderTest : public testing::Test {
 protected:
  void SetUp();
  void TearDown();

  void prepare_index_holder(uint32_t base_key, uint32_t num);

  IndexMeta index_meta_;
  Params params_;
  uint32_t dimension_;
  IndexHolder::Pointer holder_;
  IndexThreads::Pointer threads_{};
};

void IVFBuilderTest::SetUp() {
  dimension_ = 8U;

  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_metric("SquaredEuclidean", 0, Params());

  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "8");
  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");
  std::mt19937 gen((std::random_device())());
  bool v = std::uniform_int_distribution<size_t>(0, 1)(gen);
  if (v) {
    threads_ = std::make_shared<SingleQueueIndexThreads>();
  }
}

void IVFBuilderTest::TearDown() {}

void IVFBuilderTest::prepare_index_holder(uint32_t base_key, uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 1.0f * i;
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

TEST_F(IVFBuilderTest, TestInitSuccess) {
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
}

TEST_F(IVFBuilderTest, TestInitFailedWithInvalidMetric) {
  IVFBuilder builder;
  index_meta_.set_metric("invalid", 0, Params());
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(IndexError_NoExist, ret);
}

TEST_F(IVFBuilderTest, TestInitFailedWithInvalidCentroidsNum) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(IndexError_InvalidArgument, ret);
}

TEST_F(IVFBuilderTest, TestTrainWithHolder1Level) {
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  auto centroid_index = builder.centroid_index();
  EXPECT_GT(centroid_index->centroids_count(), 0u);
}

TEST_F(IVFBuilderTest, TestTrainWithHolder2Level) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");
  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  auto centroid_index = builder.centroid_index();
  EXPECT_EQ(centroid_index->centroids_count(), 8);
}

TEST_F(IVFBuilderTest, TestTrainWithTrainer2Level) {
  IndexTrainer::Pointer trainer =
      IndexFactory::CreateTrainer("StratifiedClusterTrainer");
  ASSERT_TRUE(!!trainer);

  prepare_index_holder(0, 1000);

  Params params;
  params.set("proxima.stratified.trainer.cluster_count", "4*2");
  ASSERT_EQ(0, trainer->init(index_meta_, params));
  ASSERT_EQ(0, trainer->train(threads_, holder_));

  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);


  ret = builder.train(trainer);
  EXPECT_EQ(0, ret);

  auto centroid_index = builder.centroid_index();
  EXPECT_EQ(centroid_index->centroids_count(), 8);
}

TEST_F(IVFBuilderTest, TestTrainWithTrainer1Level) {
  IVFBuilder builder;

  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  IndexTrainer::Pointer trainer =
      IndexFactory::CreateTrainer("StratifiedClusterTrainer");
  ASSERT_TRUE(!!trainer);

  prepare_index_holder(0, 1000);

  Params params1;
  params1.set("proxima.stratified.trainer.cluster_count", "4");
  ASSERT_EQ(0, trainer->init(index_meta_, params1));
  ASSERT_EQ(0, trainer->train(threads_, holder_));

  ret = builder.train(trainer);
  EXPECT_EQ(0, ret);

  auto centroid_index = builder.centroid_index();
  EXPECT_EQ(centroid_index->centroids_count(), 4);
}

TEST_F(IVFBuilderTest, TestBuildWith2Level) {
  IVFBuilder builder;

  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");
  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  EXPECT_EQ((size_t)1000, builder.stats().built_count());
}

TEST_F(IVFBuilderTest, TestBuildWith1Level) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");
  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  EXPECT_EQ((size_t)1000, builder.stats().built_count());
}

TEST_F(IVFBuilderTest, TestDump) {
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  ret = dumper->create("path");
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
}

#if 0
TEST_F(IVFBuilderTest, TestBuildWithNoEnoughMemory)
{
    IVFBuilder builder;
    Params params;
    params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
    params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

    dimension_ = 256;
    index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);

    int ret = builder.init(index_meta_, params);
    EXPECT_EQ(0, ret);

    prepare_index_holder(0, 1000);

    ret = builder.train(threads_, holder_);
    EXPECT_EQ(0, ret);

    ret = builder.build(threads_, holder_);
    EXPECT_EQ(IndexError_IndexFull, ret);
}
#endif

TEST_F(IVFBuilderTest, TestBuildWithEnoughMemory) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  ret = dumper->create("path");
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
}

#if 0
TEST_F(IVFBuilderTest, TestBuildWithRowMajorAndNoEnoughMemory)
{
    IVFBuilder builder;
    Params params;
    params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
    params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

    dimension_ = 256;
    index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
    index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);

    int ret = builder.init(index_meta_, params);
    EXPECT_EQ(0, ret);

    prepare_index_holder(0, 1000);

    ret = builder.train(threads_, holder_);
    EXPECT_EQ(0, ret);

    ret = builder.build(threads_, holder_);
    EXPECT_EQ(IndexError_IndexFull, ret);
}
#endif

TEST_F(IVFBuilderTest, TestBuildWithRowMajorAndMemory) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);

  prepare_index_holder(0, 1000);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  ret = dumper->create("path");
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
}

TEST_F(IVFBuilderTest, TestBuildWithEmptyCentroid) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "2*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MajorOrder::MO_ROW);

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);
  size_t doc_cnt = 10;

  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  for (size_t i = 0; i < doc_cnt; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 1.0f;
    }
    holder->emplace(i, vec);
  }
  holder_.reset(holder);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  ret = dumper->create("path");
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)10, builder.stats().built_count());
  EXPECT_EQ((size_t)10, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
}

TEST_F(IVFBuilderTest, TestTrainClusterParams) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "2*2");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");
  prepare_index_holder(0, 1000);
  EXPECT_EQ(0, builder.init(index_meta_, params));
  EXPECT_EQ(0, builder.train(threads_, holder_));
  EXPECT_EQ(0, builder.build(threads_, holder_));

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("MemoryDumper");
  EXPECT_EQ(0, dumper->create("test.index"));
  EXPECT_EQ(0, builder.dump(dumper));
}

TEST_F(IVFBuilderTest, TestIndexThreads) {
  IndexBuilder::Pointer builder1 = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder1, nullptr);
  IndexBuilder::Pointer builder2 = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder2, nullptr);

  size_t dim = 128UL;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  std::srand(Realtime::MilliSeconds());
  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }

  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "2*2");
  ASSERT_EQ(0, builder1->init(meta, params));
  ASSERT_EQ(0, builder2->init(meta, params));

  auto threads =
      std::make_shared<SingleQueueIndexThreads>(std::rand() % 4, false);
  auto build_index1 = [&]() {
    ASSERT_EQ(0, builder1->train(threads, holder));
    ASSERT_EQ(0, builder1->build(threads, holder));
  };
  auto build_index2 = [&]() {
    ASSERT_EQ(0, builder2->train(threads, holder));
    ASSERT_EQ(0, builder2->build(threads, holder));
  };

  auto t1 = std::async(std::launch::async, build_index1);
  auto t2 = std::async(std::launch::async, build_index2);
  t1.wait();
  t2.wait();


  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);

  std::string path = "./hc_index";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder1->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder2->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  auto &stats1 = builder1->stats();
  ASSERT_EQ(doc_cnt, stats1.built_count());
  auto &stats2 = builder2->stats();
  ASSERT_EQ(doc_cnt, stats2.built_count());
}

================================================
FILE: tests/core/algorithm/ivf/ivf_searcher_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ivf_searcher.h"
#include <future>
#include <iostream>
#include <vector>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_framework.h"
#include "ivf_builder.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec::core;
using namespace zvec::ailego;
using namespace std;

class IVFSearcherTest : public testing::Test {
 public:
 protected:
  void SetUp();
  void TearDown();
  void prepare_index_holder(uint32_t base_key, uint32_t num);

  void prepare_rand_index_holder(uint32_t base_key, uint32_t num);

  void prepare_fp16_index_holder(uint32_t base_key, uint32_t num);

  void prepare_fp32_index_holder(uint32_t base_key, uint32_t num);

  void prepare_binary_index_holder(uint32_t base_key, uint32_t num);

  void prepare_int8_index_holder(uint32_t base_key, uint32_t num);

  void prepare_same_index_holder(uint32_t base_key, uint32_t num);

  IndexMeta index_meta_;
  Params params_;
  uint32_t dimension_;
  IndexHolder::Pointer holder_;
  std::string index_path_;
  IndexThreads::Pointer threads_{};
};

void IVFSearcherTest::SetUp() {
  dimension_ = 8U;

  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_metric("SquaredEuclidean", 0, Params());

  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "4*2");
  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");
  index_path_ = "./ivf_searcher.index";
  std::mt19937 gen((std::random_device())());
  bool v = std::uniform_int_distribution<size_t>(0, 1)(gen);
  if (v) {
    threads_ = std::make_shared<SingleQueueIndexThreads>();
  }
}

void IVFSearcherTest::TearDown() {
  File::RemovePath(index_path_);
}

void IVFSearcherTest::prepare_index_holder(uint32_t base_key, uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 1.0f * i;
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

void IVFSearcherTest::prepare_rand_index_holder(uint32_t base_key,
                                                uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = std::rand() % 1000 * 1.0;
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

void IVFSearcherTest::prepare_fp32_index_holder(uint32_t base_key,
                                                uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 0.01f * i;
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

void IVFSearcherTest::prepare_fp16_index_holder(uint32_t base_key,
                                                uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 0.01f * i;
    }
    holder->emplace(key + i, vec);
  }

  IndexConverter::Pointer conveter =
      IndexFactory::CreateConverter("HalfFloatConverter");
  conveter->init(index_meta_, Params());
  IndexHolder::Pointer new_holder(holder);
  conveter->transform(new_holder);
  holder_ = conveter->result();
}

void IVFSearcherTest::prepare_int8_index_holder(uint32_t base_key,
                                                uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_INT8> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<int8_t> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = (int8_t)(i % 128);
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

void IVFSearcherTest::prepare_binary_index_holder(uint32_t base_key,
                                                  uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_BINARY32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    BinaryVector<uint32_t> vec(dimension_);
    for (size_t j = 0; j < dimension_ && j < i; ++j) {
      vec.set(j);
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

void IVFSearcherTest::prepare_same_index_holder(uint32_t base_key,
                                                uint32_t num) {
  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  uint32_t key = base_key;
  for (size_t i = 0; i < num; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = 8;
    }
    holder->emplace(key + i, vec);
  }

  holder_.reset(holder);
}

TEST_F(IVFSearcherTest, TestInit) {
  IVFSearcher searcher;
  int ret = searcher.init(params_);
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestSimple) {
  IVFBuilder builder;
  //    index_meta_.set_major_order(IndexMeta::MO_ROW);
  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "1");
  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 33);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)33, builder.stats().built_count());
  EXPECT_EQ((size_t)33, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(32.0f);
  }

  size_t qnum = 33;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 33;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      ASSERT_EQ((uint64_t)32 - i, result[i].key());
      ASSERT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 33;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)32 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestSimpleCosine) {
  IVFBuilder builder;
  //    index_meta_.set_major_order(IndexMeta::MO_ROW);
  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "1");
  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");

  Params converter_params;
  auto converter = IndexFactory::CreateConverter("CosineNormalizeConverter");
  ASSERT_TRUE(converter != nullptr);
  auto original_index_meta = index_meta_;
  original_index_meta.set_metric("Cosine", 0, Params());
  EXPECT_EQ(0, converter->init(original_index_meta, converter_params));
  IndexMeta index_meta = converter->meta();
  auto reformer = IndexFactory::CreateReformer(index_meta.reformer_name());
  ASSERT_TRUE(reformer != nullptr);
  ASSERT_EQ(0, reformer->init(index_meta.reformer_params()));

  int ret = builder.init(index_meta, params_);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 33);
  converter->transform(holder_);
  auto holder = converter->result();

  EXPECT_EQ(0, builder.train(threads_, holder));
  EXPECT_EQ(0, builder.build(threads_, holder));
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  EXPECT_EQ(0, dumper->create(index_path_));

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)33, builder.stats().built_count());
  EXPECT_EQ((size_t)33, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(32.0f + i);
  }

  size_t qnum = 33;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }
  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf search
  {
    size_t topk = 33;
    context->set_topk(topk);
    
    std::string new_vec;
    IndexQueryMeta new_meta;
    ASSERT_EQ(0, reformer->convert(query.data(), qmeta, &new_vec, &new_meta));

    ret = searcher.search_bf_impl(new_vec.data(), new_meta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < 1; ++i) {
      // ASSERT_EQ(29, result[i].key());
      EXPECT_NEAR(0, result[i].score(), 1e-2);
    }
  }
  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithBuildMemory) {
  IVFBuilder builder;
  //    index_meta_.set_major_order(IndexMeta::MO_ROW);
  //    params_.set("proxima.hc.builder.thread_count", 1);
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back((total - 1) * 1.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      ASSERT_EQ((uint64_t)(total - 1) - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFilter) {
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 1000);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);
  context->set_filter([](uint64_t key) {
    if (key > 0) {
      return true;
    }
    return false;
  });
  // single bf serch
  {
    size_t topk = 1000;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)1, result.size());
    for (size_t i = 0; i < 1; ++i) {
      EXPECT_EQ((uint64_t)0, result[i].key());
      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)1, result.size());
      EXPECT_EQ((uint64_t)0, result[0].key());
      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)1, result.size());
    for (size_t i = 0; i < 1; ++i) {
      EXPECT_EQ((uint64_t)0, result[i].key());
      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)1, result.size());
      EXPECT_EQ((uint64_t)0, result[0].key());
      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

///////////////////////////  row major ////////////////////////////////
TEST_F(IVFSearcherTest, TestRowMajorFloatWithBuildMemory) {
  index_meta_.set_major_order(IndexMeta::MO_ROW);
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 1000);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 1000;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestRowMajorFloatWithFilter) {
  index_meta_.set_major_order(IndexMeta::MO_ROW);
  IVFBuilder builder;
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 1000);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);
  context->set_filter([](uint64_t key) {
    if (key > 0) {
      return true;
    }
    return false;
  });
  // single bf serch
  {
    size_t topk = 1000;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)1, result.size());
    for (size_t i = 0; i < 1; ++i) {
      EXPECT_EQ((uint64_t)0, result[i].key());
      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)1, result.size());
      EXPECT_EQ((uint64_t)0, result[0].key());
      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)1, result.size());
    for (size_t i = 0; i < 1; ++i) {
      EXPECT_EQ((uint64_t)0, result[i].key());
      EXPECT_FLOAT_EQ((float)999 * 999 * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)1, result.size());
      EXPECT_EQ((uint64_t)0, result[0].key());
      EXPECT_FLOAT_EQ((float)q * q * dimension_, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestRowMajorFloatWith1LevelAndBuildMemory) {
  IVFBuilder builder;
  Params build_params;
  build_params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "10");
  build_params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MO_ROW);

  int ret = builder.init(index_meta_, build_params);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 1000);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 3;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWith1LevelAndBuildMemory) {
  IVFBuilder builder;
  Params build_params;
  build_params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "10");
  build_params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MO_COLUMN);

  int ret = builder.init(index_meta_, build_params);
  EXPECT_EQ(0, ret);
  prepare_index_holder(0, 1000);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 3;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorInt8WithBuildMemory) {
  IVFBuilder builder;
  dimension_ = 12;
  index_meta_.set_meta(IndexMeta::DataType::DT_INT8, dimension_);
  index_meta_.set_metric("SquaredEuclidean", 0, Params());
  index_meta_.set_major_order(IndexMeta::MO_COLUMN);

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  size_t fnum = 128;
  prepare_int8_index_holder(0, fnum);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)fnum, builder.stats().built_count());
  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<int8_t> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(127);
  }

  size_t qnum = 63;
  std::vector<int8_t> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8, dimension_);

  // single bf serch
  {
    size_t topk = 128;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)127 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)127 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestRowMajorInt8WithBuildMemory) {
  IVFBuilder builder;
  dimension_ = 12;
  index_meta_.set_meta(IndexMeta::DataType::DT_INT8, dimension_);
  index_meta_.set_metric("SquaredEuclidean", 0, Params());
  index_meta_.set_major_order(IndexMeta::MO_ROW);

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  size_t fnum = 128;
  prepare_int8_index_holder(0, fnum);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)fnum, builder.stats().built_count());
  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<int8_t> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(127);
  }

  size_t qnum = 63;
  std::vector<int8_t> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8, dimension_);

  // single bf serch
  {
    size_t topk = 128;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)127 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)127 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorBinaryWithBuildMemory) {
  IVFBuilder builder;
  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dimension_);
  index_meta_.set_metric("Hamming", 0, Params());
  index_meta_.set_major_order(IndexMeta::MO_COLUMN);

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  size_t fnum = 257;
  prepare_binary_index_holder(0, fnum);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)fnum, builder.stats().built_count());
  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  BinaryVector<uint32_t> query(dimension_);
  for (size_t i = 0; i < dimension_; ++i) {
    query.set(i);
  }

  size_t qnum = 63;
  BinaryVector<uint32_t> query1(dimension_ * qnum);
  for (size_t i = 0; i < qnum; ++i) {
    for (size_t j = 0; j < dimension_ && j < i; ++j) {
      query1.set(i * dimension_ + j);
    }
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32, dimension_);

  // single bf serch
  {
    size_t topk = 128;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)256 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)256 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestRowMajorBinaryWithBuildMemory) {
  IVFBuilder builder;
  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_BINARY32, dimension_);
  index_meta_.set_metric("Hamming", 0, Params());
  index_meta_.set_major_order(IndexMeta::MO_ROW);

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  size_t fnum = 257;
  prepare_binary_index_holder(0, fnum);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, dumper->close());
  EXPECT_EQ((size_t)fnum, builder.stats().built_count());
  EXPECT_EQ((size_t)fnum, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  BinaryVector<uint32_t> query(dimension_);
  for (size_t i = 0; i < dimension_; ++i) {
    query.set(i);
  }

  size_t qnum = 63;
  BinaryVector<uint32_t> query1(dimension_ * qnum);
  for (size_t i = 0; i < qnum; ++i) {
    for (size_t j = 0; j < dimension_ && j < i; ++j) {
      query1.set(i * dimension_ + j);
    }
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32, dimension_);

  // single bf serch
  {
    size_t topk = 128;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)256 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)256 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestSearchWithEmptyCentroid) {
  IVFBuilder builder;
  Params params;
  params.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "3*3");
  params.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster*KmeansCluster");

  dimension_ = 256;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MO_ROW);

  int ret = builder.init(index_meta_, params);
  EXPECT_EQ(0, ret);
  size_t doc_cnt = 10;

  MultiPassIndexHolder<IndexMeta::DataType::DT_FP32> *holder =
      new MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>(dimension_);
  for (size_t i = 0; i < doc_cnt; ++i) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = i % 5;
    }
    holder->emplace(i, vec);
  }
  holder_.reset(holder);

  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);

  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);

  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");

  std::string path = "searcher_empty_centroid.index";
  ret = dumper->create(path);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ(0, ret);
  EXPECT_EQ((size_t)10, builder.stats().built_count());
  EXPECT_EQ((size_t)10, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  dumper->close();

  IVFSearcher searcher;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(path, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(999.0f);
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    auto key1 = result[0].key();
    EXPECT_TRUE(key1 == 4ul || key1 == 9ul);
  }

  // single knn search
  {
    size_t topk = 3;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    auto key1 = result[0].key();
    auto key2 = result[1].key();
    auto key3 = result[2].key();
    EXPECT_TRUE(key1 == 4ul || key1 == 9ul);
    EXPECT_TRUE(key2 == 4ul || key2 == 9ul);
    EXPECT_TRUE(key3 == 3ul || key3 == 8ul);
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFp16WithBuildMemory) {
  const float epsilon = 1e-2;
  dimension_ = 8;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MO_COLUMN);

  prepare_fp16_index_holder(0, 1000);
  IVFBuilder builder;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP16, dimension_);
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(-0.1f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_ * 0.01);
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dimension_);

  std::string query_buf;
  query_buf.resize(dimension_ * sizeof(uint16_t));
  std::string query1_buf;
  query1_buf.resize(dimension_ * sizeof(uint16_t) * qnum);

  IndexReformer::Pointer reformer =
      IndexFactory::CreateReformer("HalfFloatReformer");
  IndexQueryMeta qmeta;
  reformer->transform(query.data(), qmeta1, &query_buf, &qmeta);
  reformer->transform(query1.data(), qmeta1, qnum, &query1_buf, &qmeta);
  // single bf serch
  {
    size_t topk = 1000;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query_buf.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1_buf.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query_buf.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1_buf.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestRowMajorFp16WithBuildMemory) {
  const float epsilon = 1e-2;
  dimension_ = 8;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP32, dimension_);
  index_meta_.set_major_order(IndexMeta::MO_ROW);

  prepare_fp16_index_holder(0, 1000);
  IVFBuilder builder;
  index_meta_.set_meta(IndexMeta::DataType::DT_FP16, dimension_);
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)1000, builder.stats().built_count());
  EXPECT_EQ((size_t)1000, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(-0.1f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_ * 0.01);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta1(IndexMeta::DataType::DT_FP32, dimension_);

  std::string query_buf;
  query_buf.resize(dimension_ * sizeof(uint16_t));
  std::string query1_buf;
  query1_buf.resize(dimension_ * sizeof(uint16_t) * qnum);

  IndexReformer::Pointer reformer =
      IndexFactory::CreateReformer("HalfFloatReformer");
  IndexQueryMeta qmeta;
  reformer->transform(query.data(), qmeta1, &query_buf, &qmeta);
  reformer->transform(query1.data(), qmeta1, qnum, &query1_buf, &qmeta);
  // single bf serch
  {
    size_t topk = 1000;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query_buf.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1_buf.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query_buf.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1_buf.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithHnswGraphType) {
  IVFBuilder builder;
  params_.set("proxima.ivf.builder.graph_type", "hnsw");
  params_.set("proxima.ivf.builder.graph_ef", 200);
  params_.set("proxima.ivf.builder.graph_scan_ratio", 1.0);
  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back((total - 1) * 1.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithSsgGraphType) {
  IVFBuilder builder;
  params_.set("proxima.ivf.builder.graph_type", "ssg");
  params_.set("proxima.ivf.builder.graph_ef", 200);
  params_.set("proxima.ivf.builder.graph_scan_ratio", 1.0);

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back((total - 1) * 1.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithInt8Converter) {
  IVFBuilder builder;
  auto build_params = params_;
  build_params.set(PARAM_IVF_BUILDER_CONVERTER_CLASS, "Int8QuantizerConverter");
  int ret = builder.init(index_meta_, build_params);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  ASSERT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back((total - 1) * 1.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)(total - 1) - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)999 - i, result[i].key());
      EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithFloat16Quantizer) {
  const float epsilon = 1e-2;

  IVFBuilder builder;
  auto build_params = params_;
  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "HalfFloatConverter");
  int ret = builder.init(index_meta_, build_params);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_fp32_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  ASSERT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(-0.1f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_ * 0.01);
  }

  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    context->set_filter([](uint64_t) { return false; });
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestColumnMajorFloatWithConverterAndQuantizer) {
  const float epsilon = 1e-2;
  IVFBuilder builder;
  auto build_params = params_;
  build_params.set(PARAM_IVF_BUILDER_CONVERTER_CLASS, "Int8QuantizerConverter");
  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "HalfFloatConverter");
  int ret = builder.init(index_meta_, build_params);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_fp32_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  ASSERT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(-0.1f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_ * 0.01);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    context->set_filter([](uint64_t) { return false; });
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      ASSERT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 100;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_EQ((uint64_t)i, result[i].key());
      EXPECT_NEAR((float)(0.01f * i + 0.1) * (0.01f * i + 0.1) * dimension_ /
                      result[i].score(),
                  1, epsilon);
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      EXPECT_EQ((uint64_t)q, result[0].key());
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestQuantizedPerCentroid) {
  IVFBuilder builder;
  auto build_params = params_;
  auto meta = index_meta_;
  meta.set_metric("InnerProduct", 0, Params());
  build_params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "Int8QuantizerConverter");
  build_params.set(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, true);
  int ret = builder.init(meta, build_params);
  EXPECT_EQ(0, ret);
  int total = 1000;
  prepare_index_holder(0, total);
  ret = builder.train(threads_, holder_);
  ASSERT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)total, builder.stats().built_count());
  EXPECT_EQ((size_t)total, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(500.0f);
  }

  size_t qnum = 63;
  std::vector<float> query1;
  for (size_t i = 1; i <= dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = (size_t)total;
    context->set_topk(topk);
    context->set_filter([](uint64_t) { return false; });
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      ASSERT_NEAR((uint64_t)(total - 1) - i, result[i].key(), 150);
      float expect = (float)result[i].key() * 500.0f * dimension_;
      ASSERT_NEAR(expect, std::abs(result[i].score()), expect * 0.2 + 500000);
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);
      // EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 10;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_NEAR((uint64_t)total - i - 1, result[i].key(), 100);
      // EXPECT_FLOAT_EQ((float)i * i * dimension_, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_EQ((size_t)topk, result.size());
      ASSERT_NEAR((uint64_t)(total - 1) - q, result[0].key(), 100);
      // EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

TEST_F(IVFSearcherTest, TestSharedContext) {
  size_t dim = dimension_;
  auto gen_holder = [&](int start, size_t doc_cnt) {
    auto holder =
        make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
    uint64_t key = start;
    for (size_t i = 0; i < doc_cnt; i++) {
      NumericalVector<float> vec(dim);
      for (size_t j = 0; j < dim; ++j) {
        vec[j] = i;
      }
      key += 3;
      holder->emplace(key, vec);
    }
    return holder;
  };
  auto gen_index = [&](int start, size_t docs, std::string path) {
    auto holder = gen_holder(start, docs);
    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
    Params params;
    params.set("proxima.ivf.builder.centroid_count", "16");
    builder->init(index_meta_, params);
    builder->train(holder);
    builder->build(holder);
    auto dumper = IndexFactory::CreateDumper("FileDumper");
    dumper->create(path);
    builder->dump(dumper);
    dumper->close();

    IndexSearcher::Pointer searcher =
        IndexFactory::CreateSearcher("IVFSearcher");
    auto name = rand() % 2 ? "FileReadStorage" : "MMapFileReadStorage";
    auto container = IndexFactory::CreateStorage(name);
    bool alone_file_handle = std::rand() % 2;
    bool lock_hot = std::rand() % 2;
    params.set("proxima.file.read_storage.alone_file_handle",
               alone_file_handle);
    params.set("proxima.file.read_storage.lock_hot_in_memory", lock_hot);
    container->init(params);
    container->open(path, false);
    searcher->init(Params());
    searcher->load(container, IndexMetric::Pointer());
    return searcher;
  };

  srand(Realtime::MilliSeconds());
  size_t docs1 = rand() % 500 + 100;
  size_t docs2 = rand() % 5000 + 100;
  size_t docs3 = rand() % 50000 + 100;
  auto path1 = "unittest-index/TestSharedContext.index1";
  auto path2 = "unittest-index/TestSharedContext.index2";
  auto path3 = "unittest-index/TestSharedContext.index3";
  auto searcher1 = gen_index(0, docs1, path1);
  auto searcher2 = gen_index(1, docs2, path2);
  auto searcher3 = gen_index(2, docs3, path3);

  srand(Realtime::MilliSeconds());
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  auto do_test = [&]() {
    IndexSearcher::Context::Pointer ctx;
    switch (rand() % 3) {
      case 0:
        ctx = searcher1->create_context();
        if (rand() % 2 == 0) {
          ctx->set_filter([](uint64_t) { return false; });
        }
        break;
      case 1:
        ctx = searcher2->create_context();
        if (rand() % 2 == 0) {
          ctx->set_filter([](uint64_t) { return false; });
        }
        break;
      case 2:
        ctx = searcher3->create_context();
        if (rand() % 2 == 0) {
          ctx->set_filter([](uint64_t) { return false; });
        }
        break;
    }
    ctx->set_topk(10);

    int ret = 0;
    for (int i = 0; i < 100; ++i) {
      NumericalVector<float> query(dim);
      for (size_t j = 0; j < dim; ++j) {
        query[j] = i + 0.1f;
      }

      auto code = rand() % 6;
      switch (code) {
        case 0:
          ret = searcher1->search_impl(query.data(), qmeta, ctx);
          break;
        case 1:
          ret = searcher2->search_impl(query.data(), qmeta, ctx);
          break;
        case 2:
          ret = searcher3->search_impl(query.data(), qmeta, ctx);
          break;
        case 3:
          ret = searcher1->search_bf_impl(query.data(), qmeta, ctx);
          break;
        case 4:
          ret = searcher2->search_bf_impl(query.data(), qmeta, ctx);
          break;
        case 5:
          ret = searcher3->search_bf_impl(query.data(), qmeta, ctx);
          break;
      }

      ASSERT_EQ(0, ret);
      auto &results = ctx->result();
      EXPECT_EQ(10, results.size());
      for (int k = 0; k < 10; ++k) {
        EXPECT_EQ(code % 3, results[k].key() % 3);
      }
    }
  };
  auto t1 = std::async(std::launch::async, do_test);
  auto t2 = std::async(std::launch::async, do_test);
  t1.wait();
  t2.wait();
}

TEST_F(IVFSearcherTest, TestRnnSearch) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder, nullptr);
  size_t dim = 16;
  auto holder =
      make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  size_t doc_cnt = 1000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  Params params;
  params.set("proxima.ivf.builder.centroid_count", "20");
  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = "IVFSearcherTest.TestRnnSearch";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());
  ASSERT_EQ(0, builder->cleanup());

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  ASSERT_EQ(0, searcher->init(Params()));

  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 0.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 50;
  float radius = 1000.0f;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  ASSERT_EQ(topk, results.size());

  ctx->set_threshold(radius);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  EXPECT_GT(topk, results.size());
  for (size_t k = 0; k < results.size(); ++k) {
    ASSERT_GE(radius, results[k].score());
  }
  File::RemovePath(path);
}

TEST_F(IVFSearcherTest, TestProvider) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
      dimension_);
  size_t doc_cnt = 5000UL;
  std::vector<uint64_t> keys(doc_cnt);
  srand(Realtime::MilliSeconds());
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < doc_cnt; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = keys[i];
    }
    ASSERT_TRUE(holder->emplace(keys[i], vec));
  }
  Params params;
  params.set("proxima.ivf.builder.centroid_count", "20");
  ASSERT_EQ(0, builder->init(index_meta_, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = index_path_ + "/TestProvider";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));

  auto provider = searcher->create_provider();
  ASSERT_EQ(IndexMeta::DataType::DT_FP32, provider->data_type());
  for (size_t i = 0; i < keys.size(); ++i) {
    const float *d1 =
        reinterpret_cast<const float *>(provider->get_vector(keys[i]));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_FLOAT_EQ(d1[j], keys[i]);
    }
  }

  auto iter = provider->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, doc_cnt);

  ASSERT_EQ(dimension_, provider->dimension());
  ASSERT_EQ(index_meta_.element_size(), provider->element_size());
  ASSERT_EQ(index_meta_.data_type(), provider->data_type());
}

TEST_F(IVFSearcherTest, TestProviderInt8) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
      dimension_);
  size_t doc_cnt = 5000UL;
  std::vector<key_t> keys(doc_cnt);
  srand(Realtime::MilliSeconds());
  bool rand_key = rand() % 2;
  bool rand_order = rand() % 2;
  size_t step = rand() % 2 + 1;
  LOG_DEBUG("randKey=%u randOrder=%u step=%zu", rand_key, rand_order, step);
  if (rand_key) {
    std::mt19937 mt;
    std::uniform_int_distribution<size_t> dt(
        0, std::numeric_limits<size_t>::max());
    for (size_t i = 0; i < doc_cnt; ++i) {
      keys[i] = dt(mt);
    }
  } else {
    std::iota(keys.begin(), keys.end(), 0U);
    std::transform(keys.begin(), keys.end(), keys.begin(),
                   [&](key_t k) { return step * k; });
    if (rand_order) {
      uint32_t seed = Realtime::Seconds();
      std::shuffle(keys.begin(), keys.end(), std::default_random_engine(seed));
    }
  }
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = keys[i];
    }
    ASSERT_TRUE(holder->emplace(keys[i], vec));
  }
  Params params;
  params.set("proxima.ivf.builder.centroid_count", "20");
  params.set("proxima.ivf.builder.retain_original_features", false);
  auto meta = index_meta_;
  meta.set_metric("InnerProduct", 0, Params());
  params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "Int8QuantizerConverter");
  params.set(PARAM_IVF_BUILDER_QUANTIZE_BY_CENTROID, true);
  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = index_path_ + "/TestProvider";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));

  auto provider = searcher->create_provider();
  ASSERT_TRUE(!!provider);
  ASSERT_EQ(IndexMeta::DataType::DT_INT8, provider->data_type());
  for (size_t i = 0; i < keys.size(); ++i) {
    auto d1 = reinterpret_cast<const int8_t *>(provider->get_vector(keys[i]));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_LT(d1[j], 255);
    }
  }

  auto iter = provider->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    const int8_t *d = reinterpret_cast<const int8_t *>(iter->data());
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_LT(d[j], 255);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, doc_cnt);

  ASSERT_EQ(dimension_, provider->dimension());
  ASSERT_EQ(index_meta_.element_size(), provider->element_size() * 4);
}

TEST_F(IVFSearcherTest, TestSearcherReuse) {
  auto build_index = [](IndexMeta &meta, size_t base, size_t doc_cnt,
                        std::string &path) {
    IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
    ASSERT_NE(builder, nullptr);
    IndexHolder::Pointer holder;
    if (meta.data_type() == IndexMeta::DataType::DT_INT8) {
      auto h = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_INT8>>(
          meta.dimension());
      for (size_t i = base; i < doc_cnt; i++) {
        NumericalVector<int8_t> vec(meta.dimension());
        for (size_t j = 0; j < meta.dimension(); ++j) {
          vec[j] = i;
        }
        ASSERT_TRUE(h->emplace(i, vec));
      }
      holder = h;
    } else if (meta.data_type() == IndexMeta::DataType::DT_FP32) {
      auto h = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          meta.dimension());
      for (size_t i = base; i < doc_cnt; i++) {
        NumericalVector<float> vec(meta.dimension());
        for (size_t j = 0; j < meta.dimension(); ++j) {
          vec[j] = i;
        }
        ASSERT_TRUE(h->emplace(i, vec));
      }
      holder = h;
    }
    Params params;
    LOG_DEBUG("Build index %s count=%zu", path.c_str(), holder->count());
    params.set("proxima.ivf.builder.centroid_count", "10");
    ASSERT_EQ(0, builder->init(meta, params));
    ASSERT_EQ(0, builder->train(holder));
    ASSERT_EQ(0, builder->build(holder));
    auto dumper = IndexFactory::CreateDumper("FileDumper");
    ASSERT_NE(dumper, nullptr);
    ASSERT_EQ(0, dumper->create(path));
    ASSERT_EQ(0, builder->dump(dumper));
    ASSERT_EQ(0, dumper->close());
    ASSERT_EQ(0, builder->cleanup());
  };

  auto path1 = index_path_ + "/index1";
  auto path2 = index_path_ + "/index2";
  IndexMeta meta1(IndexMeta::DataType::DT_INT8, 16);
  IndexMeta meta2(IndexMeta::DataType::DT_FP32, 31);
  build_index(meta1, 10, 200, path1);
  build_index(meta2, 2000, 3000, path2);

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path1, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));

  auto provider = searcher->create_provider();
  ASSERT_EQ(IndexMeta::DataType::DT_INT8, searcher->meta().data_type());
  ASSERT_EQ(190UL, searcher->stats().loaded_count());
  ASSERT_EQ(190UL, provider->count());
  ASSERT_EQ("IVFSearcher", provider->owner_class());
  for (size_t i = 10; i < 200ul; ++i) {
    const int8_t *d1 =
        reinterpret_cast<const int8_t *>(provider->get_vector(i));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < meta1.dimension(); ++j) {
      ASSERT_EQ(d1[j], (int8_t)i);
    }
  }
  ASSERT_EQ(meta1.dimension(), provider->dimension());
  ASSERT_EQ(meta1.element_size(), provider->element_size());
  ASSERT_EQ(meta1.data_type(), provider->data_type());
  ASSERT_EQ(0, searcher->unload());
  ASSERT_EQ(0, searcher->cleanup());

  auto container2 = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container2->open(path2, false));
  ASSERT_EQ(0, searcher->init(searcherParams));
  ASSERT_EQ(0, searcher->load(container2, IndexMetric::Pointer()));

  auto provider2 = searcher->create_provider();
  ASSERT_EQ(IndexMeta::DataType::DT_FP32, searcher->meta().data_type());
  for (size_t i = 2000; i < 3000ul; ++i) {
    const float *d1 = reinterpret_cast<const float *>(provider2->get_vector(i));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < meta2.dimension(); ++j) {
      ASSERT_FLOAT_EQ(d1[j], i);
    }
  }
  ASSERT_EQ(meta2.dimension(), provider2->dimension());
  ASSERT_EQ(meta2.element_size(), provider2->element_size());
  ASSERT_EQ(meta2.data_type(), provider2->data_type());
  ASSERT_EQ(1000UL, provider2->count());
  ASSERT_EQ(1000UL, searcher->stats().loaded_count());
}

TEST_F(IVFSearcherTest, TestInt8QuantizerWithL2) {
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder = make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
      dimension_);
  size_t doc_cnt = 5000UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      vec[j] = i;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  Params params;
  params.set("proxima.ivf.builder.centroid_count", "20");
  params.set("proxima.ivf.builder.store_original_features", true);
  auto meta = index_meta_;
  params.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "Int8QuantizerConverter");
  ASSERT_EQ(0, builder->init(meta, params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));
  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  string path = index_path_ + "/TestQuantizer";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  Params searcherParams;
  ASSERT_EQ(0, searcher->init(searcherParams));
  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));

  auto provider = searcher->create_provider();
  ASSERT_EQ(IndexMeta::DataType::DT_FP32, provider->data_type());
  for (size_t i = 0; i < doc_cnt; ++i) {
    const float *d1 = reinterpret_cast<const float *>(provider->get_vector(i));
    ASSERT_TRUE(d1);
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_FLOAT_EQ(d1[j], i);
    }
  }

  auto iter = provider->create_iterator();
  size_t cnt = 0;
  while (iter->is_valid()) {
    auto key = iter->key();
    const float *d = reinterpret_cast<const float *>(iter->data());
    for (size_t j = 0; j < dimension_; ++j) {
      ASSERT_FLOAT_EQ(d[j], key);
    }
    cnt++;
    iter->next();
  }
  ASSERT_EQ(cnt, doc_cnt);

  ASSERT_EQ(dimension_, provider->dimension());
  ASSERT_EQ(index_meta_.element_size(), provider->element_size());
  ASSERT_EQ(index_meta_.data_type(), provider->data_type());

  auto context = searcher->create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);
  size_t topk = 1;
  context->set_topk(topk);
  context->set_filter([](uint64_t) { return false; });
  for (size_t i = 0; i < doc_cnt; i += 20) {
    NumericalVector<float> query(dimension_);
    for (size_t j = 0; j < dimension_; ++j) {
      query[j] = i;
    }
    int ret = searcher->search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);
    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    ASSERT_NEAR(i, result[0].key(), 100);
  }
}

TEST_F(IVFSearcherTest, TestMipsEuclideanMetric) {
  constexpr size_t static dim = 32;
  IndexMeta meta(IndexMeta::DataType::DT_FP32, dim);
  meta.set_metric("MipsSquaredEuclidean", 0, Params());
  IndexBuilder::Pointer builder = IndexFactory::CreateBuilder("IVFBuilder");
  ASSERT_NE(builder, nullptr);
  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(dim);
  const size_t COUNT = 10000UL;
  for (size_t i = 0; i < COUNT; i++) {
    NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = i / 100.0f;
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  Params builder_params;
  builder_params.set("proxima.ivf.builder.centroid_count", 1024);
  ASSERT_EQ(0, builder->init(meta, builder_params));
  ASSERT_EQ(0, builder->train(holder));
  ASSERT_EQ(0, builder->build(holder));

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_NE(dumper, nullptr);
  std::string path = "IVFTestMipsEuclideanMetric";
  ASSERT_EQ(0, dumper->create(path));
  ASSERT_EQ(0, builder->dump(dumper));
  ASSERT_EQ(0, dumper->close());

  // test searcher
  IndexSearcher::Pointer searcher = IndexFactory::CreateSearcher("IVFSearcher");
  ASSERT_NE(searcher, nullptr);
  Params params;
  params.set("proxima.ivf.searcher.scan_ratio", 0.1f);
  ASSERT_EQ(0, searcher->init(params));

  auto container = IndexFactory::CreateStorage("FileReadStorage");
  ASSERT_EQ(0, container->open(path, false));
  ASSERT_EQ(0, searcher->load(container, IndexMetric::Pointer()));
  auto ctx = searcher->create_context();
  ASSERT_TRUE(!!ctx);

  NumericalVector<float> vec(dim);
  for (size_t j = 0; j < dim; ++j) {
    vec[j] = 1.0;
  }
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dim);
  size_t topk = 10;
  ctx->set_topk(topk);
  ASSERT_EQ(0, searcher->search_impl(vec.data(), qmeta, ctx));
  auto &results = ctx->result();
  EXPECT_EQ(results.size(), topk);
  EXPECT_NEAR((uint64_t)(COUNT - 1), results[0].key(), 10);
  File::RemovePath(path);
}

TEST_F(IVFSearcherTest, TestSameValue) {
  IVFBuilder builder;
  //    index_meta_.set_major_order(IndexMeta::MO_ROW);
  params_.set(PARAM_IVF_BUILDER_CENTROID_COUNT, "2");
  params_.set(PARAM_IVF_BUILDER_CLUSTER_CLASS, "KmeansCluster");
  params_.set(PARAM_IVF_BUILDER_QUANTIZER_CLASS, "Int8QuantizerConverter");

  int ret = builder.init(index_meta_, params_);
  EXPECT_EQ(0, ret);
  prepare_same_index_holder(0, 33);
  ret = builder.train(threads_, holder_);
  EXPECT_EQ(0, ret);
  ret = builder.build(threads_, holder_);
  EXPECT_EQ(0, ret);
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  ret = dumper->create(index_path_);
  EXPECT_EQ(0, ret);

  ret = builder.dump(dumper);
  EXPECT_EQ((size_t)33, builder.stats().built_count());
  EXPECT_EQ((size_t)33, builder.stats().dumped_count());
  EXPECT_EQ((size_t)0, builder.stats().discarded_count());
  EXPECT_EQ(0, dumper->close());

  IVFSearcher searcher;
  Params params;
  params.set(PARAM_IVF_SEARCHER_SCAN_RATIO, 1.0);
  params.set(PARAM_IVF_SEARCHER_BRUTE_FORCE_THRESHOLD, 1);

  ret = searcher.init(params);
  EXPECT_EQ(0, ret);

  IndexStorage::Pointer container =
      IndexFactory::CreateStorage("MMapFileReadStorage");
  EXPECT_TRUE(!!container);

  Params container_params;
  container_params.set("proxima.mmap_file.container.memory_warmup", true);
  container->init(container_params);
  ret = container->open(index_path_, false);
  EXPECT_EQ(0, ret);

  ret = searcher.load(container, IndexMetric::Pointer());
  EXPECT_EQ(0, ret);

  std::vector<float> query;
  for (size_t i = 0; i < dimension_; ++i) {
    query.push_back(32.0f);
  }

  size_t qnum = 33;
  std::vector<float> query1;
  for (size_t i = 0; i < dimension_ * qnum; ++i) {
    query1.push_back(i / dimension_);
  }


  auto context = searcher.create_context();
  IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32, dimension_);

  // single bf serch
  {
    size_t topk = 33;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      // std::cout << "i: " << i << ", key: " << result[i].key() << ", score: "
      // << result[i].score() << std::endl;
      ASSERT_EQ(0, result[i].score());
    }
  }

  // batch bf serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_bf_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  // single knn search
  {
    size_t topk = 33;
    context->set_topk(topk);
    ret = searcher.search_impl(query.data(), qmeta, context);
    EXPECT_EQ(0, ret);

    const IndexDocumentList &result = context->result(0);
    EXPECT_EQ((size_t)topk, result.size());
    for (size_t i = 0; i < topk; ++i) {
      EXPECT_FLOAT_EQ((float)0, result[i].score());
    }
  }

  // batch knn serch
  {
    size_t topk = 1;
    context->set_topk(topk);
    ret = searcher.search_impl(query1.data(), qmeta, qnum, context);
    EXPECT_EQ(0, ret);

    for (size_t q = 0; q < qnum; ++q) {
      const IndexDocumentList &result = context->result(q);
      EXPECT_FLOAT_EQ((float)0, result[0].score());
    }
  }

  ret = searcher.unload();
  EXPECT_EQ(0, ret);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/framework/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework
      SRCS ${CC_SRCS}
      INCS ../../src
    )
endforeach()

================================================
FILE: tests/core/interface/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_metric core_interface core_knn_flat core_utility core_quantizer sparsehash core_knn_hnsw core_mix_reducer
          core_knn_flat_sparse core_knn_hnsw_sparse core_knn_ivf core_knn_hnsw_rabitq
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
    )
endforeach()

================================================
FILE: tests/core/interface/index_interface_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cassert>
#include <cmath>
#include <functional>
#include <iostream>
#include <unordered_map>
#include <gtest/gtest.h>
#if RABITQ_SUPPORTED
#include "core/algorithm/hnsw_rabitq/rabitq_converter.h"
#include "zvec/core/framework/index_provider.h"
#endif
#include "zvec/ailego/buffer/buffer_manager.h"
#include "zvec/core/interface/index.h"
#include "zvec/core/interface/index_factory.h"
#include "zvec/core/interface/index_param.h"
#include "zvec/core/interface/index_param_builders.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec::core_interface;

TEST(IndexInterface, General) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test.index"};
  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_name.c_str());

  auto func = [&](const BaseIndexParam::Pointer &param,
                  const BaseIndexQueryParam::Pointer &query_param) {
    system(cmd_buf);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);


    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});

    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 2.0f;
    VectorData vector_data;
    vector_data.vector = DenseVector{vector.data()};
    ASSERT_TRUE(0 == index->Add(vector_data, 233));
    ASSERT_TRUE(0 == index->Train());

    SearchResult result;
    VectorData query;
    query.vector = DenseVector{vector.data()};
    index->Search(query, query_param, &result);
    ASSERT_EQ(1, result.doc_list_.size());
    ASSERT_EQ(233, result.doc_list_[0].key());
    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());
    if (query_param->fetch_vector) {
      auto &doc = result.doc_list_[0];
      if (result.reverted_vector_list_.size() != 0) {
        // cosine metric or bf16 quantizer
        ASSERT_EQ(1, result.reverted_vector_list_.size());
        auto reverted_vector = reinterpret_cast<const float *>(
            result.reverted_vector_list_[0].data());
        ASSERT_FLOAT_EQ(1.0f, reverted_vector[1]);
        ASSERT_FLOAT_EQ(2.0f, reverted_vector[2]);
      } else {
        auto vector = reinterpret_cast<const float *>(doc.vector());
        ASSERT_FLOAT_EQ(1.0f, vector[1]);
        ASSERT_FLOAT_EQ(2.0f, vector[2]);
      }
    }

    vector[1] = 0;
    vector[2] = 0;
    VectorDataBuffer fetched_vector_data;
    ASSERT_TRUE(0 == index->Fetch(233, &fetched_vector_data));
    float *fetched_vector = reinterpret_cast<float *>(
        std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
            .data.data());
    ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);
    ASSERT_FLOAT_EQ(2.0f, fetched_vector[2]);
    index->Close();
    system(cmd_buf);
  };


  auto param = FlatIndexParamBuilder()
                   .WithMetricType(MetricType::kInnerProduct)
                   .WithDataType(DataType::DT_FP32)
                   .WithDimension(kDimension)
                   .WithIsSparse(false)
                   .Build();
  func(param,
       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());
  func(FlatIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());

  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
  func(IVFIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithNList(10)
           .Build(),
       IVFQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());
  func(IVFIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithNList(10)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       IVFQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());
}

TEST(IndexInterface, BufferGeneral) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test.index"};
  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s*", index_name.c_str());

  auto func = [&](const BaseIndexParam::Pointer &param,
                  const BaseIndexQueryParam::Pointer &query_param) {
    std::string real_index_name = index_name;
    system(cmd_buf);
    auto write_index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, write_index);

    write_index->Open(real_index_name,
                      {StorageOptions::StorageType::kMMAP, true});

    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 2.0f;
    VectorData vector_data;
    vector_data.vector = DenseVector{vector.data()};
    ASSERT_TRUE(0 == write_index->Add(vector_data, 233));
    write_index->Close();

    auto read_index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, read_index);
    read_index->Open(real_index_name,
                     {StorageOptions::StorageType::kBufferPool, false});

    SearchResult result;
    VectorData query;
    query.vector = DenseVector{vector.data()};
    read_index->Search(query, query_param, &result);
    ASSERT_EQ(1, result.doc_list_.size());
    ASSERT_EQ(233, result.doc_list_[0].key());
    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());
    if (query_param->fetch_vector) {
      auto &doc = result.doc_list_[0];
      if (result.reverted_vector_list_.size() != 0) {
        // cosine metric or bf16 quantizer
        ASSERT_EQ(1, result.reverted_vector_list_.size());
        auto reverted_vector = reinterpret_cast<const float *>(
            result.reverted_vector_list_[0].data());
        ASSERT_FLOAT_EQ(1.0f, reverted_vector[1]);
        ASSERT_FLOAT_EQ(2.0f, reverted_vector[2]);
      } else {
        auto vector = reinterpret_cast<const float *>(doc.vector());
        ASSERT_FLOAT_EQ(1.0f, vector[1]);
        ASSERT_FLOAT_EQ(2.0f, vector[2]);
      }
    }

    vector[1] = 0;
    vector[2] = 0;
    VectorDataBuffer fetched_vector_data;
    ASSERT_TRUE(0 == read_index->Fetch(233, &fetched_vector_data));
    float *fetched_vector = reinterpret_cast<float *>(
        std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
            .data.data());
    ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);
    ASSERT_FLOAT_EQ(2.0f, fetched_vector[2]);
    result.doc_list_.clear();
    read_index->Close();
    system(cmd_buf);
  };


  auto param = FlatIndexParamBuilder()
                   .WithMetricType(MetricType::kInnerProduct)
                   .WithDataType(DataType::DT_FP32)
                   .WithDimension(kDimension)
                   .WithIsSparse(false)
                   .Build();
  func(param,
       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());
  func(FlatIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());

  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
}


TEST(IndexInterface, SparseGeneral) {
  constexpr uint32_t kSparseCount = 3;
  const std::string index_name{"test.index"};
  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_name.c_str());

  auto func = [&](const BaseIndexParam::Pointer &param,
                  const BaseIndexQueryParam::Pointer &query_param) {
    system(cmd_buf);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);


    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});

    std::vector<uint32_t> indices(kSparseCount);
    std::vector<float> values(kSparseCount);
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      indices[i] = i;
      values[i] = i;
    }

    VectorData vector_data{
        SparseVector{kSparseCount, indices.data(), values.data()}};
    ASSERT_TRUE(0 == index->Add(vector_data, 233));


    SearchResult result;
    VectorData query = {
        SparseVector{kSparseCount, indices.data(), values.data()}};
    index->Search(query, query_param, &result);
    ASSERT_EQ(1, result.doc_list_.size());
    ASSERT_EQ(233, result.doc_list_[0].key());
    ASSERT_FLOAT_EQ(5.0f, result.doc_list_[0].score());

    if (query_param->fetch_vector) {
      auto &sparse_doc = result.doc_list_[0].sparse_doc();
      auto sparse_indices = reinterpret_cast<const uint32_t *>(
          sparse_doc.sparse_indices().data());
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, sparse_indices[i]);
      }
      if (!result.reverted_sparse_values_list_.empty()) {
        ASSERT_EQ(1, result.reverted_sparse_values_list_.size());
        auto reverted_sparse_values = reinterpret_cast<const float *>(
            result.reverted_sparse_values_list_[0].data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, reverted_sparse_values[i]);
        }
      } else {
        auto sparse_values =
            reinterpret_cast<const float *>(sparse_doc.sparse_values().data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, sparse_values[i]);
        }
      }
    }

    values[1] = 0;
    values[2] = 0;
    VectorDataBuffer fetched_vector_data;
    ASSERT_TRUE(0 == index->Fetch(233, &fetched_vector_data));
    const SparseVectorBuffer &sparse_vector_buffer =
        std::get<SparseVectorBuffer>(fetched_vector_data.vector_buffer);
    const uint32_t *fetched_indices =
        reinterpret_cast<const uint32_t *>(sparse_vector_buffer.indices.data());
    const float *fetched_values =
        reinterpret_cast<const float *>(sparse_vector_buffer.values.data());
    ASSERT_EQ(kSparseCount, sparse_vector_buffer.count);
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      ASSERT_EQ(i, fetched_indices[i]);
      ASSERT_EQ(i, fetched_values[i]);
    }
    index->Close();
    system(cmd_buf);
  };


  auto param = FlatIndexParamBuilder()
                   .WithMetricType(MetricType::kInnerProduct)
                   .WithDataType(DataType::DT_FP32)
                   .WithIsSparse(true)
                   .Build();
  // func(param, FlatQueryParam{{.topk = 10, .fetch_vector = true}});
  func(FlatIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithIsSparse(true)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build());

  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithIsSparse(true)
           .WithEFConstruction(100)
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
  func(HNSWIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithIsSparse(true)
           .WithEFConstruction(100)
           .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
           .Build(),
       HNSWQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(true)
           .with_ef_search(20)
           .build());
}


TEST(IndexInterface, Merge) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test.index"};

  auto del_index_file_func = [&](const std::string file_name) {
    auto cmd_buf = "rm -f " + file_name;
    system(cmd_buf.c_str());
  };

  auto create_index_func =
      [&](const BaseIndexParam::Pointer &param,
          const std::string &index_name) -> Index::Pointer {
    del_index_file_func(index_name);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    if (index == nullptr ||
        0 != index->Open(index_name,
                         {StorageOptions::StorageType::kMMAP, true})) {
      return nullptr;
    }
    return index;
  };

  auto func = [&](const BaseIndexParam::Pointer &param_target,
                  const BaseIndexParam::Pointer &param_source) {
    auto index1 = create_index_func(param_source, index_name + "1");
    ASSERT_NE(nullptr, index1);
    auto index2 = create_index_func(param_source, index_name + "2");
    ASSERT_NE(nullptr, index2);


    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 123.0f;
    VectorData vector_data{DenseVector{vector.data()}};
    ASSERT_TRUE(0 == index1->Add(vector_data, 0));

    vector[1] = 2.0f;
    ASSERT_TRUE(0 == index2->Add(vector_data, 0));
    vector[1] = 3.0f;
    ASSERT_TRUE(0 == index2->Add(vector_data, 1));

    {
      VectorDataBuffer fetched_vector_data;
      ASSERT_TRUE(0 == index1->Fetch(0, &fetched_vector_data));
      float *fetched_vector = reinterpret_cast<float *>(
          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
              .data.data());
      ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);
      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
    }
    {
      VectorDataBuffer fetched_vector_data;
      ASSERT_TRUE(0 == index2->Fetch(0, &fetched_vector_data));
      float *fetched_vector = reinterpret_cast<float *>(
          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
              .data.data());
      ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);
      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
    }
    {
      VectorDataBuffer fetched_vector_data;
      ASSERT_TRUE(0 == index2->Fetch(1, &fetched_vector_data));
      float *fetched_vector = reinterpret_cast<float *>(
          std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
              .data.data());
      ASSERT_FLOAT_EQ(3.0f, fetched_vector[1]);
      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
    }

    {  // test reduce
      auto index3 = create_index_func(param_target, index_name + "3");
      ASSERT_NE(nullptr, index3);
      ASSERT_TRUE(0 == index3->Merge({index1, index2}, IndexFilter()));
      ASSERT_TRUE(3 == index3->GetDocCount());
      {
        VectorDataBuffer fetched_vector_data;
        ASSERT_TRUE(0 == index3->Fetch(0, &fetched_vector_data));
        float *fetched_vector = reinterpret_cast<float *>(
            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
                .data.data());
        ASSERT_FLOAT_EQ(1.0f, fetched_vector[1]);
        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
      }
      {
        VectorDataBuffer fetched_vector_data;
        ASSERT_TRUE(0 == index3->Fetch(1, &fetched_vector_data));
        float *fetched_vector = reinterpret_cast<float *>(
            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
                .data.data());
        ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);
        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
      }
      index3->Close();
      del_index_file_func(index_name + "3");
    }

    {  // test reduce with filter
      auto index3 = create_index_func(param_target, index_name + "3");
      ASSERT_NE(nullptr, index3);
      auto filter = IndexFilter();
      filter.set([](uint64_t key) { return key == 0; });  // TODO: uint32?
      ASSERT_TRUE(0 == index3->Merge({index1, index2}, filter));
      ASSERT_TRUE(2 == index3->GetDocCount());
      {
        VectorDataBuffer fetched_vector_data;
        ASSERT_TRUE(0 == index3->Fetch(0, &fetched_vector_data));
        float *fetched_vector = reinterpret_cast<float *>(
            std::get<DenseVectorBuffer>(fetched_vector_data.vector_buffer)
                .data.data());
        ASSERT_FLOAT_EQ(2.0f, fetched_vector[1]);
        ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
      }
      index3->Close();
      del_index_file_func(index_name + "3");
    }

    index1->Close();
    index2->Close();
    del_index_file_func(index_name + "1");
    del_index_file_func(index_name + "2");
  };

  // same index
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(kDimension)
                     .WithIsSparse(false)
                     .Build();
    func(param, param);
  }
  {
    auto param = HNSWIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(kDimension)
                     .WithIsSparse(false)
                     .Build();
    func(param, param);
  }

  // different index
  {
    auto param_flat = FlatIndexParamBuilder()
                          .WithMetricType(MetricType::kInnerProduct)
                          .WithDataType(DataType::DT_FP32)
                          .WithDimension(kDimension)
                          .WithIsSparse(false)
                          .Build();
    auto param_hnsw = HNSWIndexParamBuilder()
                          .WithMetricType(MetricType::kInnerProduct)
                          .WithDataType(DataType::DT_FP32)
                          .WithDimension(kDimension)
                          .WithIsSparse(false)
                          .Build();
    func(param_flat, param_hnsw);
    func(param_hnsw, param_flat);
  }
}


TEST(IndexInterface, Serialize) {
  {
    std::cout << "\n\n----flat index----" << std::endl;
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .WithQuantizerParam(QuantizerParam{QuantizerType::kFP16})
                     .Build();

    std::cout << "flat index -- omit=true: " << param->SerializeToJson(true)
              << std::endl;
    std::cout << "omit=false: " << param->SerializeToJson() << std::endl;

    auto deserialized_param =
        IndexFactory::DeserializeIndexParamFromJson(param->SerializeToJson());
    ASSERT_NE(nullptr, deserialized_param.get());


    std::cout << "serialize then de then se:"
              << deserialized_param->SerializeToJson() << std::endl;

    ASSERT_TRUE(deserialized_param->SerializeToJson() ==
                param->SerializeToJson());
    ASSERT_TRUE(deserialized_param->SerializeToJson(true) ==
                param->SerializeToJson(true));
  }

  {
    std::cout << "\n\n----hnsw index----" << std::endl;
    auto param = HNSWIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .WithQuantizerParam(QuantizerParam{QuantizerType::kFP16})
                     .Build();

    std::cout << "hnsw index -- omit=true: " << param->SerializeToJson(true)
              << std::endl;
    std::cout << "hnsw index -- omit=false: " << param->SerializeToJson()
              << std::endl;

    auto deserialized_param =
        IndexFactory::DeserializeIndexParamFromJson(param->SerializeToJson());
    ASSERT_NE(nullptr, deserialized_param.get());

    std::cout << "serialize then de then se:"
              << deserialized_param->SerializeToJson() << std::endl;


    ASSERT_TRUE(deserialized_param->SerializeToJson() ==
                param->SerializeToJson());
    ASSERT_TRUE(deserialized_param->SerializeToJson(true) ==
                param->SerializeToJson(true));
  }

  {
    std::cout << "\n\n----flat query----" << std::endl;
    auto param =
        FlatQueryParamBuilder().with_topk(10).with_fetch_vector(true).build();
    std::cout << "flat query -- omit=true: "
              << IndexFactory::QueryParamSerializeToJson(*param, true)
              << std::endl;
    std::cout << "flat query -- omit=false: "
              << IndexFactory::QueryParamSerializeToJson(*param) << std::endl;

    auto deserialized_param =
        IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(
            IndexFactory::QueryParamSerializeToJson(*param));
    ASSERT_NE(nullptr, deserialized_param.get());

    std::cout << "serialize then de then se:"
              << IndexFactory::QueryParamSerializeToJson(*deserialized_param)
              << std::endl;

    ASSERT_TRUE(IndexFactory::QueryParamSerializeToJson(*deserialized_param) ==
                IndexFactory::QueryParamSerializeToJson(*param));
  }

  {
    std::cout << "\n\n----hnsw query----" << std::endl;
    auto param = HNSWQueryParamBuilder()
                     .with_topk(10)
                     .with_fetch_vector(true)
                     .with_ef_search(20)
                     .build();
    std::cout << "hnsw query -- omit=true: "
              << IndexFactory::QueryParamSerializeToJson(*param, true)
              << std::endl;
    std::cout << "hnsw query -- omit=false: "
              << IndexFactory::QueryParamSerializeToJson(*param, false)
              << std::endl;

    auto deserialized_param =
        IndexFactory::QueryParamDeserializeFromJson<HNSWQueryParam>(
            IndexFactory::QueryParamSerializeToJson(*param));
    ASSERT_NE(nullptr, deserialized_param.get());

    std::cout << "serialize then de then se:"
              << IndexFactory::QueryParamSerializeToJson(*deserialized_param)
              << std::endl;

    ASSERT_TRUE(IndexFactory::QueryParamSerializeToJson(*deserialized_param) ==
                IndexFactory::QueryParamSerializeToJson(*param));
  }
}

TEST(IndexInterface, Failure) {
  // Test unsupported index type
  {
    auto param = std::make_shared<BaseIndexParam>(IndexType::kIVF);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_EQ(nullptr, index);
  }

  // Test unsupported metric type
  {
    auto param =
        FlatIndexParamBuilder()
            .WithMetricType(MetricType::kNone)  // L2 not supported for sparse
            .WithDataType(DataType::DT_FP32)
            .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_EQ(nullptr, index);
  }

  // Test unsupported metric type for sparse index
  {
    auto param =
        FlatIndexParamBuilder()
            .WithMetricType(MetricType::kL2sq)  // L2 not supported for sparse
            .WithDataType(DataType::DT_FP32)
            .WithIsSparse(true)
            .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_EQ(nullptr, index);
  }

  // // Test unsupported quantizer type
  // {
  //   auto param = FlatIndexParamBuilder()
  //                    .WithMetricType(MetricType::kInnerProduct)
  //                    .WithDataType(DataType::DT_INT4)
  //                    .WithDimension(64)
  //                    .WithIsSparse(false)
  //                    .WithQuantizerParam(
  //                        QuantizerParam(QuantizerType::kInt8))  //
  //                        Unsupported
  //                    .Build();
  //   auto index = IndexFactory::CreateAndInitIndex(*param);
  //   ASSERT_EQ(nullptr, index);
  // }
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(true)
                     .WithQuantizerParam(
                         QuantizerParam(QuantizerType::kInt8))  // Unsupported
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_EQ(nullptr, index);
  }

  // Test unsupported data type for cosine metric
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kCosine)
                     .WithDataType(DataType::DT_INT8)  // Unsupported for cosine
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_EQ(nullptr, index);
  }

  // Test invalid storage type
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    StorageOptions invalid_storage;
    invalid_storage.type = StorageOptions::StorageType::kNone;  // Unsupported
    int ret = index->Open("test.index", invalid_storage);
    ASSERT_NE(0, ret);
  }

  // Test invalid vector data type for dense operations
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open("test.index", {StorageOptions::StorageType::kMMAP, true});

    // Try to add sparse vector to dense index
    std::vector<uint32_t> indices = {0, 1, 2};
    std::vector<float> values = {1.0f, 2.0f, 3.0f};
    VectorData sparse_vector_data{
        SparseVector{3, indices.data(), values.data()}};

    int ret = index->Add(sparse_vector_data, 1);
    ASSERT_NE(0, ret);

    index->Close();
    system("rm -f test.index");
  }

  // Test invalid vector data type for sparse operations
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithIsSparse(true)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open("test.index", {StorageOptions::StorageType::kMMAP, true});

    // Try to add dense vector to sparse index
    std::vector<float> vector(64, 1.0f);
    VectorData dense_vector_data{DenseVector{vector.data()}};

    int ret = index->Add(dense_vector_data, 1);
    ASSERT_NE(0, ret);

    index->Close();
    system("rm -f test.index");
  }

  // Test fetch non-existent document
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open("test.index", {StorageOptions::StorageType::kMMAP, true});

    VectorDataBuffer fetched_vector_data;
    int ret = index->Fetch(999, &fetched_vector_data);  // Non-existent doc_id
    ASSERT_NE(0, ret);

    index->Close();
    system("rm -f test.index");
  }

  // Test search with invalid vector data
  {
    auto param = FlatIndexParamBuilder()
                     .WithMetricType(MetricType::kInnerProduct)
                     .WithDataType(DataType::DT_FP32)
                     .WithDimension(64)
                     .WithIsSparse(false)
                     .Build();
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open("test.index", {StorageOptions::StorageType::kMMAP, true});

    // Add a vector first
    std::vector<float> vector(64, 1.0f);
    VectorData vector_data{DenseVector{vector.data()}};
    ASSERT_EQ(0, index->Add(vector_data, 1));

    // Try to search with sparse vector in dense index
    std::vector<uint32_t> indices = {0, 1, 2};
    std::vector<float> values = {1.0f, 2.0f, 3.0f};
    VectorData sparse_query{SparseVector{3, indices.data(), values.data()}};

    SearchResult result;
    FlatQueryParam::Pointer query_param =
        FlatQueryParamBuilder().with_topk(10).with_fetch_vector(false).build();
    int ret = index->Search(sparse_query, query_param, &result);
    ASSERT_NE(0, ret);

    index->Close();
    system("rm -f test.index");
  }

  // Test merge with invalid write concurrency
  {
    auto param1 = FlatIndexParamBuilder()
                      .WithMetricType(MetricType::kInnerProduct)
                      .WithDataType(DataType::DT_FP32)
                      .WithDimension(64)
                      .WithIsSparse(false)
                      .Build();
    auto index1 = IndexFactory::CreateAndInitIndex(*param1);
    ASSERT_NE(nullptr, index1);
    index1->Open("test1.index", {StorageOptions::StorageType::kMMAP, true});

    auto param2 = FlatIndexParamBuilder()
                      .WithMetricType(MetricType::kInnerProduct)
                      .WithDataType(DataType::DT_FP32)
                      .WithDimension(64)
                      .WithIsSparse(false)
                      .Build();
    auto index2 = IndexFactory::CreateAndInitIndex(*param2);
    ASSERT_NE(nullptr, index2);
    index2->Open("test2.index", {StorageOptions::StorageType::kMMAP, true});

    auto param3 = FlatIndexParamBuilder()
                      .WithMetricType(MetricType::kInnerProduct)
                      .WithDataType(DataType::DT_FP32)
                      .WithDimension(64)
                      .WithIsSparse(false)
                      .Build();
    auto index3 = IndexFactory::CreateAndInitIndex(*param3);
    ASSERT_NE(nullptr, index3);
    index3->Open("test3.index", {StorageOptions::StorageType::kMMAP, true});

    MergeOptions invalid_options;
    invalid_options.write_concurrency = 0;  // Invalid: must be > 0

    int ret = index3->Merge({index1, index2}, IndexFilter(), invalid_options);
    ASSERT_NE(0, ret);

    index1->Close();
    index2->Close();
    index3->Close();
    system("rm -f test1.index test2.index test3.index");
  }
}

TEST(IndexInterface, SerializeFailure) {
  // Test invalid JSON deserialization
  {
    std::string invalid_json = "invalid json string";
    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test JSON with invalid enum value
  {
    std::string invalid_enum_json = R"({
      "index_type": "kInvalidType",
      "metric_type": "kL2",
      "dimension": 64,
      "is_sparse": false,
      "data_type": "DT_FP32"
    })";
    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_enum_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test JSON with invalid field type
  {
    std::string invalid_type_json = R"({
      "index_type": "kFlat",
      "metric_type": "kL2",
      "dimension": "not_a_number",
      "is_sparse": false,
      "data_type": "DT_FP32"
    })";
    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_type_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test JSON with invalid field type
  {
    std::string invalid_type_json = R"({
      "index_type": "kHNSW",
      "metric_type": "kL2",
      "dimension": 1,
      "is_sparse": "false",
      "data_type": "DT_FP32"
    })";
    auto param = IndexFactory::DeserializeIndexParamFromJson(invalid_type_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test unsupported index_type
  {
    std::string wrong_type_json = R"({
      "index_type": "kNone",
      "metric_type": "kL2",
      "dimension": 64,
      "is_sparse": false,
      "data_type": "DT_FP32"
    })";
    auto param = IndexFactory::DeserializeIndexParamFromJson(wrong_type_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test QueryParam deserialization with invalid JSON
  {
    std::string invalid_json = "invalid json";
    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(
        invalid_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test QueryParam deserialization with invalid enum
  {
    std::string invalid_enum_json = R"({
      "index_type": "kInvalidType",
      "topk": 10,
      "fetch_vector": false,
      "radius": 0.0,
      "is_linear": false
    })";
    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(
        invalid_enum_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test QueryParam deserialization with invalid field type
  {
    std::string invalid_type_json = R"({
      "index_type": "kFlat",
      "topk": "not_a_number",
      "fetch_vector": false,
      "radius": 0.0,
      "is_linear": false
    })";
    auto param = IndexFactory::QueryParamDeserializeFromJson<FlatQueryParam>(
        invalid_type_json);
    ASSERT_EQ(nullptr, param);
  }

  // Test HNSWQueryParam deserialization with invalid field type
  {
    std::string invalid_type_json = R"({
      "index_type": "kHNSW",
      "topk": 10,
      "fetch_vector": false,
      "radius": 0.0,
      "is_linear": false,
      "ef_search": "not_a_number"
    })";
    auto param = IndexFactory::QueryParamDeserializeFromJson<HNSWQueryParam>(
        invalid_type_json);
    ASSERT_EQ(nullptr, param);
  }
}

TEST(IndexInterface, Score) {
  const std::string index_file_path = "test_indexer.index";
  const int kTopk = 10;
  constexpr uint32_t kDocId1 = 2345;
  constexpr uint32_t kDocId2 = 5432;
  auto vector1 = std::vector<float>{3.0f, 4.0f, 5.0f};
  auto vector2 = std::vector<float>{1.0f, 20.0f, 3.0f};
  auto vector_id_map = std::unordered_map<uint32_t, std::vector<float>>{
      {kDocId1, vector1},
      {kDocId2, vector2},
  };
  auto sparse_indices = std::vector<uint32_t>{0, 1, 2};
  auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f};

  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
  system(cmd_buf);

  auto check_score = [&](const SearchResult &result, MetricType metric_type) {
    ASSERT_EQ(result.doc_list_.size(), 2);

    auto inner_produce_score_func = [&](const std::vector<float> &v1,
                                        const std::vector<float> &v2) {
      return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];
    };

    auto cosine_score_func = [&](const std::vector<float> &v1,
                                 const std::vector<float> &v2) {
      return 1 - inner_produce_score_func(v1, v2) /
                     (std::sqrt(inner_produce_score_func(v1, v1)) *
                      std::sqrt(inner_produce_score_func(v2, v2)));
    };

    // SquaredEuclidean
    auto l2_score_func = [&](const std::vector<float> &v1,
                             const std::vector<float> &v2) {
      assert(v1.size() == 3);
      assert(v2.size() == 3);
      float ret = 0.0f;
      for (int i = 0; i < v1.size(); ++i) {
        ret += (v1[i] - v2[i]) * (v1[i] - v2[i]);
      }
      return ret;
    };

    std::function<float(const std::vector<float> &, const std::vector<float> &)>
        score_func;

    switch (metric_type) {
      case MetricType::kInnerProduct:
        score_func = inner_produce_score_func;
        break;
      case MetricType::kCosine:
        score_func = cosine_score_func;
        break;
      case MetricType::kL2sq:
        score_func = l2_score_func;
        break;
      default:
        ASSERT_TRUE(false);
    }

    // Iterate over doc_list_ and check scores
    ASSERT_GE(result.doc_list_.size(), 2);
    printf("result.doc_list_[0].score() top1: %f\n",
           result.doc_list_[0].score());
    printf(
        "score_func(vector_id_map[result.doc_list_[0].key()], query_vector): "
        "%f\n",
        score_func(vector_id_map[result.doc_list_[0].key()], query_vector));
    ASSERT_TRUE(std::abs(result.doc_list_[0].score() -
                         score_func(vector_id_map[result.doc_list_[0].key()],
                                    query_vector)) < 1e-2);
    printf("result.doc_list_[1].score() top2: %f\n",
           result.doc_list_[1].score());
    printf(
        "score_func(vector_id_map[result.doc_list_[1].key()], query_vector): "
        "%f\n",
        score_func(vector_id_map[result.doc_list_[1].key()], query_vector));
    ASSERT_TRUE(std::abs(result.doc_list_[1].score() -
                         score_func(vector_id_map[result.doc_list_[1].key()],
                                    query_vector)) < 1e-2);
  };

  auto dense_func = [&](const BaseIndexParam::Pointer &param,
                        const BaseIndexQueryParam::Pointer query_param,
                        MetricType metric_type) {
    system(cmd_buf);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open(index_file_path, {StorageOptions::StorageType::kMMAP, true});

    VectorData vector_data1;
    vector_data1.vector = DenseVector{vector1.data()};
    ASSERT_EQ(0, index->Add(vector_data1, kDocId1));

    VectorData vector_data2;
    vector_data2.vector = DenseVector{vector2.data()};
    ASSERT_EQ(0, index->Add(vector_data2, kDocId2));

    SearchResult result;
    VectorData query;
    query.vector = DenseVector{query_vector.data()};
    index->Search(query, query_param, &result);

    check_score(result, metric_type);

    index->Close();
    system(cmd_buf);
  };

  auto sparse_func = [&](const BaseIndexParam::Pointer &param,
                         const BaseIndexQueryParam::Pointer query_param,
                         MetricType metric_type) {
    system(cmd_buf);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open(index_file_path, {StorageOptions::StorageType::kMMAP, true});

    VectorData vector_data1;
    vector_data1.vector =
        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),
                     vector1.data()};
    ASSERT_EQ(0, index->Add(vector_data1, kDocId1));

    VectorData vector_data2;
    vector_data2.vector =
        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),
                     vector2.data()};
    ASSERT_EQ(0, index->Add(vector_data2, kDocId2));

    SearchResult result;
    VectorData query;
    query.vector =
        SparseVector{3, reinterpret_cast<const void *>(sparse_indices.data()),
                     query_vector.data()};
    index->Search(query, query_param, &result);

    check_score(result, metric_type);

    index->Close();
    system(cmd_buf);
  };

  constexpr uint32_t kDimension = 3;

  LOG_INFO("Test DenseVector, MetricType::kInnerProduct");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kInnerProduct)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kInnerProduct);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kInnerProduct)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kInnerProduct);

  LOG_INFO("Test DenseVector, MetricType::kInnerProduct, QuantizerType::kFP16");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kInnerProduct)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kInnerProduct);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kInnerProduct)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kInnerProduct);

  LOG_INFO("Test DenseVector, MetricType::kCosine");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kCosine)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kCosine);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kCosine)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kCosine);

  LOG_INFO("Test DenseVector, MetricType::kCosine, QuantizerType::kFP16");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kCosine)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kCosine);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kCosine)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kCosine);

  LOG_INFO("Test DenseVector, MetricType::kL2sq");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kL2sq)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kL2sq);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kL2sq)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kL2sq);

  LOG_INFO("Test DenseVector, MetricType::kL2sq, QuantizerType::kFP16");
  dense_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kL2sq)
          .WithDataType(DataType::DT_FP32)
          .WithDimension(kDimension)
          .WithIsSparse(false)
          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kL2sq);
  dense_func(HNSWIndexParamBuilder()
                 .WithMetricType(MetricType::kL2sq)
                 .WithDataType(DataType::DT_FP32)
                 .WithDimension(kDimension)
                 .WithIsSparse(false)
                 .WithEFConstruction(100)
                 .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
                 .Build(),
             HNSWQueryParamBuilder()
                 .with_topk(kTopk)
                 .with_fetch_vector(true)
                 .with_ef_search(20)
                 .build(),
             MetricType::kL2sq);

  LOG_INFO("Test SparseVector, MetricType::kInnerProduct");
  sparse_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kInnerProduct)
          .WithDataType(DataType::DT_FP32)
          .WithIsSparse(true)
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kInnerProduct);
  sparse_func(HNSWIndexParamBuilder()
                  .WithMetricType(MetricType::kInnerProduct)
                  .WithDataType(DataType::DT_FP32)
                  .WithIsSparse(true)
                  .WithEFConstruction(100)
                  .Build(),
              HNSWQueryParamBuilder()
                  .with_topk(kTopk)
                  .with_fetch_vector(true)
                  .with_ef_search(20)
                  .build(),
              MetricType::kInnerProduct);

  LOG_INFO(
      "Test SparseVector, MetricType::kInnerProduct, QuantizerType::kFP16");
  sparse_func(
      FlatIndexParamBuilder()
          .WithMetricType(MetricType::kInnerProduct)
          .WithDataType(DataType::DT_FP32)
          .WithIsSparse(true)
          .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
          .Build(),
      FlatQueryParamBuilder().with_topk(kTopk).with_fetch_vector(true).build(),
      MetricType::kInnerProduct);
  sparse_func(HNSWIndexParamBuilder()
                  .WithMetricType(MetricType::kInnerProduct)
                  .WithDataType(DataType::DT_FP32)
                  .WithIsSparse(true)
                  .WithEFConstruction(100)
                  .WithQuantizerParam(QuantizerParam(QuantizerType::kFP16))
                  .Build(),
              HNSWQueryParamBuilder()
                  .with_topk(kTopk)
                  .with_fetch_vector(true)
                  .with_ef_search(20)
                  .build(),
              MetricType::kInnerProduct);
}

#if RABITQ_SUPPORTED
TEST(IndexInterface, HNSWRabitqGeneral) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test_rabitq.index"};
  char cmd_buf[256];
  snprintf(cmd_buf, sizeof(cmd_buf), "rm -f %s*", index_name.c_str());

  auto func = [&](const BaseIndexParam::Pointer &param,
                  const BaseIndexQueryParam::Pointer &query_param) {
    system(cmd_buf);
    auto index = IndexFactory::CreateAndInitIndex(*param);
    ASSERT_NE(nullptr, index);

    index->Open(index_name, {StorageOptions::StorageType::kMMAP, true});

    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 2.0f;
    VectorData vector_data;
    vector_data.vector = DenseVector{vector.data()};
    ASSERT_TRUE(0 == index->Add(vector_data, 233));
    ASSERT_TRUE(0 == index->Train());

    SearchResult result;
    VectorData query;
    query.vector = DenseVector{vector.data()};
    index->Search(query, query_param, &result);
    ASSERT_EQ(1, result.doc_list_.size());
    ASSERT_EQ(233, result.doc_list_[0].key());

    // Fetch is meaningless for HNSWRabitq
    index->Close();
    system(cmd_buf);
  };

  using namespace zvec::core;
  using namespace zvec::ailego;
  auto holder = std::make_shared<
      zvec::core::MultiPassIndexProvider<IndexMeta::DataType::DT_FP32>>(
      kDimension);
  size_t doc_cnt = 500UL;
  for (size_t i = 0; i < doc_cnt; i++) {
    NumericalVector<float> vec(kDimension);
    for (size_t j = 0; j < kDimension; ++j) {
      vec[j] = static_cast<float>(i);
    }
    ASSERT_TRUE(holder->emplace(i, vec));
  }
  std::shared_ptr<IndexMeta> index_meta_ptr_;
  index_meta_ptr_.reset(
      new (std::nothrow) IndexMeta(IndexMeta::DataType::DT_FP32, kDimension));
  index_meta_ptr_->set_metric("SquaredEuclidean", 0, Params());

  RabitqConverter converter;
  converter.init(*index_meta_ptr_, Params());
  ASSERT_EQ(converter.train(holder), 0);
  std::shared_ptr<IndexReformer> index_reformer;
  ASSERT_EQ(converter.to_reformer(&index_reformer), 0);

  // HNSWRabitq with default total_bits
  func(HNSWRabitqIndexParamBuilder()
           .WithMetricType(MetricType::kL2sq)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .WithProvider(holder)
           .WithReformer(index_reformer)
           .Build(),
       HNSWRabitqQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(false)
           .with_ef_search(50)
           .build());

  // HNSWRabitq with InnerProduct metric
  func(HNSWRabitqIndexParamBuilder()
           .WithMetricType(MetricType::kInnerProduct)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .WithProvider(holder)
           .WithReformer(index_reformer)
           .Build(),
       HNSWRabitqQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(false)
           .with_ef_search(50)
           .build());

  // HNSWRabitq with custom total_bits
  func(HNSWRabitqIndexParamBuilder()
           .WithMetricType(MetricType::kL2sq)
           .WithDataType(DataType::DT_FP32)
           .WithDimension(kDimension)
           .WithIsSparse(false)
           .WithEFConstruction(100)
           .WithTotalBits(2)
           .WithProvider(holder)
           .WithReformer(index_reformer)
           .Build(),
       HNSWRabitqQueryParamBuilder()
           .with_topk(10)
           .with_fetch_vector(false)
           .with_ef_search(50)
           .build());
}
#endif

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/core/metric/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_metric core_quantizer
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core/
    )
endforeach()

================================================
FILE: tests/core/metric/cosine_metric_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <ailego/math/norm_matrix.h>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/float_helper.h>
#include "zvec/core/framework/index_factory.h"


using namespace zvec;
using namespace zvec::core;
using namespace zvec::ailego;

static void Norm2(std::vector<Float16> &vec, std::string *out) {
  float norm = 0.0f;

  out->resize(vec.size() * sizeof(Float16) + sizeof(float));

  Norm2Matrix<Float16, 1>::Compute(vec.data(), vec.size(), &norm);

  Float16 *buf = reinterpret_cast<Float16 *>(&(*out)[0]);

  for (uint32_t i = 0; i < vec.size(); ++i) {
    buf[i] = vec[i] / norm;
  }

  float *norm_buf =
      reinterpret_cast<float *>(&(*out)[vec.size() * sizeof(Float16)]);

  memcpy(norm_buf, &norm, sizeof(float));
}

static void Norm2(std::vector<float> &vec, std::string *out) {
  float norm = 0.0f;

  out->resize((vec.size() + 1) * sizeof(float));

  Norm2Matrix<float, 1>::Compute(vec.data(), vec.size(), &norm);

  float *buf = reinterpret_cast<float *>(&(*out)[0]);
  for (uint32_t i = 0; i < vec.size(); ++i) {
    buf[i] = vec[i] / norm;
  }

  buf[vec.size()] = norm;
}

static size_t ExtraDimension(IndexMeta::DataType type) {
  // The extra quantized params storage size to save for each vector
  if (type == IndexMeta::DT_FP32) return 1;
  if (type == IndexMeta::DT_FP16) return 2;

  return 0;
}

TEST(CosineMeasure_General_Test, General) {
  auto measure = IndexFactory::CreateMetric("Cosine");
  EXPECT_TRUE(measure);

  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_INT16, 64);
  ASSERT_NE(0, measure->init(meta, Params()));
  meta.set_meta(IndexMeta::DT_FP16, 64);
  ASSERT_EQ(0, measure->init(meta, Params()));
  meta.set_meta(IndexMeta::DT_FP32, 64);
  ASSERT_EQ(0, measure->init(meta, Params()));
  meta.set_meta(IndexMeta::DT_INT8, 64);
  ASSERT_NE(0, measure->init(meta, Params()));

  meta.set_meta(IndexMeta::DT_BINARY32, 64);
  ASSERT_NE(0, measure->init(meta, Params()));
  meta.set_meta(IndexMeta::DT_BINARY64, 64);
  ASSERT_NE(0, measure->init(meta, Params()));
  meta.set_meta(IndexMeta::DT_INT4, 64);
  ASSERT_NE(0, measure->init(meta, Params()));

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DT_BINARY32, 64);
  EXPECT_FALSE(measure->is_matched(meta2));
  EXPECT_TRUE(
      measure->is_matched(meta, IndexQueryMeta(IndexMeta::DT_FP32, 64)));
  EXPECT_FALSE(
      measure->is_matched(meta, IndexQueryMeta(IndexMeta::DT_FP32, 63)));

  EXPECT_FALSE(measure->distance_matrix(0, 0));
  EXPECT_FALSE(measure->distance_matrix(3, 5));
  EXPECT_FALSE(measure->distance_matrix(31, 65));
  EXPECT_TRUE(measure->distance_matrix(1, 1));
  EXPECT_FALSE(measure->distance_matrix(2, 1));
  EXPECT_FALSE(measure->distance_matrix(2, 2));
  EXPECT_FALSE(measure->distance_matrix(4, 1));
  EXPECT_FALSE(measure->distance_matrix(4, 2));
  EXPECT_FALSE(measure->distance_matrix(4, 4));
  EXPECT_FALSE(measure->distance_matrix(8, 1));
  EXPECT_FALSE(measure->distance_matrix(8, 2));
  EXPECT_FALSE(measure->distance_matrix(8, 4));
  EXPECT_FALSE(measure->distance_matrix(8, 8));
  EXPECT_FALSE(measure->distance_matrix(16, 1));
  EXPECT_FALSE(measure->distance_matrix(16, 2));
  EXPECT_FALSE(measure->distance_matrix(16, 4));
  EXPECT_FALSE(measure->distance_matrix(16, 8));
  EXPECT_FALSE(measure->distance_matrix(16, 16));
  EXPECT_FALSE(measure->distance_matrix(32, 1));
  EXPECT_FALSE(measure->distance_matrix(32, 2));
  EXPECT_FALSE(measure->distance_matrix(32, 4));
  EXPECT_FALSE(measure->distance_matrix(32, 8));
  EXPECT_FALSE(measure->distance_matrix(32, 16));
  EXPECT_FALSE(measure->distance_matrix(32, 32));

  EXPECT_FALSE(measure->support_normalize());
  float result = 1.0f;
  measure->normalize(&result);
  EXPECT_FLOAT_EQ(1.0f, result);
}

TEST(CosineMeasure_General_Test, TestDistanceFp32) {
  {
    constexpr uint32_t dimension = 2;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP32, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<float> a = {0.2f, 0.9f};
    std::vector<float> b = {0.3f, 0.5f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    distance(a_out.data(), b_out.data(),
             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.05131668f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.05131668f));
  }

  {
    constexpr uint32_t dimension = 3;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP32, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<float> a = {0.2f, 0.9f, 0.6f};
    std::vector<float> b = {0.3f, 0.5f, 0.7f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    distance(a_out.data(), b_out.data(),
             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.07199293f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.07199293f));
  }

  {
    constexpr uint32_t dimension = 11;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP32, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<float> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                            5.2f, 2.1f, 7.1f, 6.8f, 1.2f};
    std::vector<float> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,
                            1.0f, 2.3f, 3.4f, 4.5f, 6.4f};


    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    distance(a_out.data(), b_out.data(),
             dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.2803060f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP32), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.00001f, std::abs(result - 0.2803060f));
  }
}

TEST(CosineMeasure_General_Test, TestDistanceFp16) {
  {
    constexpr uint32_t dimension = 2;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP16, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<Float16> a = {0.2f, 0.9f};
    std::vector<Float16> b = {0.3f, 0.5f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    distance(a_out.data(), b_out.data(),
             dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.05131668f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.05131668f));
  }

  {
    constexpr uint32_t dimension = 3;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP16, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<Float16> a = {0.2f, 0.9f, 0.6f};
    std::vector<Float16> b = {0.3f, 0.5f, 0.7f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    distance(a_out.data(), b_out.data(),
             dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.07199293f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.07199293f));
  }

  {
    constexpr uint32_t dimension = 11;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP16, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto distance = measure->distance();
    ASSERT_NE(distance, nullptr);
    auto dist_matrix = measure->distance_matrix(1, 1);
    ASSERT_NE(dist_matrix, nullptr);

    std::vector<Float16> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                              5.2f, 2.1f, 7.1f, 6.8f, 1.2f};
    std::vector<Float16> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,
                              1.0f, 2.3f, 3.4f, 4.5f, 6.4f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float result = 0.0f;
    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.2803060f));

    dist_matrix(a_out.data(), b_out.data(),
                dimension + ExtraDimension(IndexMeta::DT_FP16), &result);

    if (measure->support_normalize()) {
      measure->normalize(&result);
    }

    EXPECT_GE(0.001f, std::abs(result - 0.2803060f));
  }
}

TEST(CosineMeasure_General_Test, TestDistanceBatchFp16Simple) {
  {
    constexpr uint32_t dimension = 2;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP16, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto dist_batch = measure->batch_distance();
    ASSERT_NE(dist_batch, nullptr);

    std::vector<Float16> a = {0.2f, 0.9f};
    std::vector<Float16> b = {0.3f, 0.5f};

    std::string a_out;
    std::string b_out;


    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float results[2] = {0.0f, 0.0f};

    const void *vecs[2];
    vecs[0] = a_out.data();
    vecs[1] = b_out.data();
    dist_batch(vecs, b_out.data(), 2,
               dimension + ExtraDimension(IndexMeta::DT_FP16), results);

    if (measure->support_normalize()) {
      measure->normalize(&results[0]);
      measure->normalize(&results[1]);
    }

    EXPECT_GE(0.001f, std::abs(results[0] - 0.05131668f));
    EXPECT_GE(0.001f, std::abs(results[1] - 0.0f));
  }
}

TEST(CosineMeasure_General_Test, TestDistanceBatchFp32Simple) {
  {
    constexpr uint32_t dimension = 2;
    IndexMeta meta;
    meta.set_meta(IndexMeta::DT_FP32, dimension);

    auto measure = IndexFactory::CreateMetric("Cosine");
    ASSERT_TRUE(measure);
    Params params;
    ASSERT_EQ(0, measure->init(meta, params));
    ASSERT_EQ(false, measure->support_train());

    auto dist_batch = measure->batch_distance();
    ASSERT_NE(dist_batch, nullptr);

    std::vector<float> a = {0.2f, 0.9f};
    std::vector<float> b = {0.3f, 0.5f};

    std::string a_out;
    std::string b_out;

    Norm2(a, &a_out);
    Norm2(b, &b_out);

    float results[2] = {0.0f, 0.0f};

    const void *vecs[2];
    vecs[0] = a_out.data();
    vecs[1] = b_out.data();
    dist_batch(vecs, b_out.data(), 2,
               dimension + ExtraDimension(IndexMeta::DT_FP32), results);

    if (measure->support_normalize()) {
      measure->normalize(&results[0]);
      measure->normalize(&results[1]);
    }

    EXPECT_GE(0.00001f, std::abs(results[0] - 0.05131668f));
    EXPECT_GE(0.00001f, std::abs(results[1] - 0.0f));
  }
}

template <typename T>
void calculate_distance(std::vector<T> &a, std::vector<T> &b, size_t dimension,
                        IndexMeta::DataType data_type, size_t batch_size,
                        float expected_distance, float epsilon = 0.00001f) {
  IndexMeta meta;
  meta.set_meta(data_type, dimension);

  auto measure = IndexFactory::CreateMetric("Cosine");
  ASSERT_TRUE(measure);
  Params params;
  ASSERT_EQ(0, measure->init(meta, params));
  ASSERT_EQ(false, measure->support_train());

  auto dist_batch = measure->batch_distance();
  ASSERT_NE(dist_batch, nullptr);

  std::string a_out;
  std::string b_out;

  Norm2(a, &a_out);
  Norm2(b, &b_out);

  float results[2] = {0.0f, 0.0f};

  const void *vecs[2];
  vecs[0] = a_out.data();
  vecs[1] = b_out.data();
  dist_batch(vecs, b_out.data(), batch_size,
             dimension + ExtraDimension(data_type), results);

  if (measure->support_normalize()) {
    measure->normalize(&results[0]);
    measure->normalize(&results[1]);
  }

  EXPECT_GE(epsilon, std::abs(results[0] - expected_distance));
  EXPECT_GE(epsilon, std::abs(results[1] - 0.0f));
}


TEST(CosineMeasure_General_Test, TestDistanceBatch) {
  {
    constexpr uint32_t dimension = 2;

    {
      std::vector<float> a = {0.2f, 0.9f};
      std::vector<float> b = {0.3f, 0.5f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.05131668f,
                         0.00001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.05131668f,
                         0.00001f);
    }
    {
      std::vector<Float16> a = {0.2f, 0.9f};
      std::vector<Float16> b = {0.3f, 0.5f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.05131668f,
                         0.001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.05131668f,
                         0.001f);
    }
  }

  {
    constexpr uint32_t dimension = 3;


    {
      std::vector<float> a = {0.2f, 0.9f, 0.6f};
      std::vector<float> b = {0.3f, 0.5f, 0.7f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.07199293f,
                         0.00001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.07199293f,
                         0.00001f);
    }
    {
      std::vector<Float16> a = {0.2f, 0.9f, 0.6f};
      std::vector<Float16> b = {0.3f, 0.5f, 0.7f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.07199293f,
                         0.001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.07199293f,
                         0.001f);
    }
  }

  {
    constexpr uint32_t dimension = 11;

    {
      std::vector<float> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                              5.2f, 2.1f, 7.1f, 6.8f, 1.2f};
      std::vector<float> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,
                              1.0f, 2.3f, 3.4f, 4.5f, 6.4f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 1, 0.2803060f,
                         0.00001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP32, 2, 0.2803060f,
                         0.00001f);
    }

    {
      std::vector<Float16> a = {1.0f, 2.0f, 3.0f, 0.2f, 0.3f, 0.1f,
                                5.2f, 2.1f, 7.1f, 6.8f, 1.2f};
      std::vector<Float16> b = {2.0f, 4.0f, 6.0f, 0.6f, 0.7f, 0.9f,
                                1.0f, 2.3f, 3.4f, 4.5f, 6.4f};

      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 1, 0.2803060f,
                         0.001f);
      calculate_distance(a, b, dimension, IndexMeta::DT_FP16, 2, 0.2803060f,
                         0.001f);
    }
  }
}


================================================
FILE: tests/core/metric/euclidean_metric_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"

using namespace zvec;
using namespace zvec::core;

TEST(SquaredEuclideanMetric, General) {
  auto metric = IndexFactory::CreateMetric("SquaredEuclidean");
  EXPECT_TRUE(metric);

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_INT16, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  EXPECT_TRUE(metric->is_matched(meta));
  EXPECT_FALSE(metric->is_matched(meta2));
  EXPECT_TRUE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));
  EXPECT_FALSE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));

  EXPECT_FALSE(metric->distance_matrix(0, 0));
  EXPECT_FALSE(metric->distance_matrix(3, 5));
  EXPECT_FALSE(metric->distance_matrix(31, 65));
  EXPECT_TRUE(metric->distance_matrix(1, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 1));
  EXPECT_TRUE(metric->distance_matrix(4, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 1));
  EXPECT_TRUE(metric->distance_matrix(8, 2));
  EXPECT_TRUE(metric->distance_matrix(8, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 8));
  EXPECT_FALSE(metric->distance_matrix(8, 32));
  EXPECT_FALSE(metric->distance_matrix(8, 9));
  EXPECT_TRUE(metric->distance_matrix(16, 1));
  EXPECT_TRUE(metric->distance_matrix(16, 2));
  EXPECT_TRUE(metric->distance_matrix(16, 4));
  EXPECT_TRUE(metric->distance_matrix(16, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 16));
  EXPECT_FALSE(metric->distance_matrix(16, 17));
  EXPECT_TRUE(metric->distance_matrix(32, 1));
  EXPECT_TRUE(metric->distance_matrix(32, 2));
  EXPECT_TRUE(metric->distance_matrix(32, 4));
  EXPECT_TRUE(metric->distance_matrix(32, 8));
  EXPECT_TRUE(metric->distance_matrix(32, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 32));

  EXPECT_FALSE(metric->support_normalize());
  float result = 1.0f;
  metric->normalize(&result);
  EXPECT_FLOAT_EQ(1.0f, result);
}

TEST(EuclideanMetric, General) {
  auto metric = IndexFactory::CreateMetric("Euclidean");
  EXPECT_TRUE(metric);

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_INT16, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  EXPECT_TRUE(metric->is_matched(meta));
  EXPECT_FALSE(metric->is_matched(meta2));
  EXPECT_TRUE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));
  EXPECT_FALSE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));

  EXPECT_FALSE(metric->distance_matrix(0, 0));
  EXPECT_FALSE(metric->distance_matrix(3, 5));
  EXPECT_FALSE(metric->distance_matrix(31, 65));
  EXPECT_TRUE(metric->distance_matrix(1, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 1));
  EXPECT_TRUE(metric->distance_matrix(4, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 1));
  EXPECT_TRUE(metric->distance_matrix(8, 2));
  EXPECT_TRUE(metric->distance_matrix(8, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 1));
  EXPECT_TRUE(metric->distance_matrix(16, 2));
  EXPECT_TRUE(metric->distance_matrix(16, 4));
  EXPECT_TRUE(metric->distance_matrix(16, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 1));
  EXPECT_TRUE(metric->distance_matrix(32, 2));
  EXPECT_TRUE(metric->distance_matrix(32, 4));
  EXPECT_TRUE(metric->distance_matrix(32, 8));
  EXPECT_TRUE(metric->distance_matrix(32, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 32));

  EXPECT_FALSE(metric->support_normalize());
  float result = 1.0f;
  metric->normalize(&result);
  EXPECT_FLOAT_EQ(1.0f, result);
}


================================================
FILE: tests/core/metric/hamming_metric_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"

using namespace zvec;
using namespace zvec::core;

TEST(HammingMetric, General) {
  auto metric = IndexFactory::CreateMetric("Hamming");
  ASSERT_TRUE(metric);

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_INT8, 64);
  EXPECT_TRUE(metric->is_matched(meta));
  EXPECT_FALSE(metric->is_matched(meta2));
  EXPECT_TRUE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_BINARY32, 64)));
  EXPECT_FALSE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_BINARY32, 63)));

  EXPECT_FALSE(metric->distance_matrix(0, 0));
  EXPECT_FALSE(metric->distance_matrix(3, 5));
  EXPECT_FALSE(metric->distance_matrix(31, 65));
  EXPECT_TRUE(metric->distance_matrix(1, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 1));
  EXPECT_TRUE(metric->distance_matrix(4, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 1));
  EXPECT_TRUE(metric->distance_matrix(8, 2));
  EXPECT_TRUE(metric->distance_matrix(8, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 1));
  EXPECT_TRUE(metric->distance_matrix(16, 2));
  EXPECT_TRUE(metric->distance_matrix(16, 4));
  EXPECT_TRUE(metric->distance_matrix(16, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 1));
  EXPECT_TRUE(metric->distance_matrix(32, 2));
  EXPECT_TRUE(metric->distance_matrix(32, 4));
  EXPECT_TRUE(metric->distance_matrix(32, 8));
  EXPECT_TRUE(metric->distance_matrix(32, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 32));

  EXPECT_FALSE(metric->support_normalize());
  float result = 1.0f;
  metric->normalize(&result);
  EXPECT_FLOAT_EQ(1.0f, result);
}

================================================
FILE: tests/core/metric/inner_product_metric_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"

using namespace zvec;
using namespace zvec::core;

TEST(InnerProductMetric, General) {
  auto metric = IndexFactory::CreateMetric("InnerProduct");
  ASSERT_TRUE(metric);

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_BINARY64, 64);
  ASSERT_NE(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP16, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_FP32, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT4, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));
  meta.set_meta(IndexMeta::DataType::DT_INT8, 64);
  ASSERT_EQ(0, metric->init(meta, ailego::Params()));

  IndexMeta meta2;
  meta2.set_meta(IndexMeta::DataType::DT_BINARY32, 64);
  EXPECT_TRUE(metric->is_matched(meta));
  EXPECT_FALSE(metric->is_matched(meta2));
  EXPECT_TRUE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 64)));
  EXPECT_FALSE(metric->is_matched(
      meta, IndexQueryMeta(IndexMeta::DataType::DT_INT8, 63)));

  EXPECT_FALSE(metric->distance_matrix(0, 0));
  EXPECT_FALSE(metric->distance_matrix(3, 5));
  EXPECT_FALSE(metric->distance_matrix(31, 65));
  EXPECT_TRUE(metric->distance_matrix(1, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 1));
  EXPECT_TRUE(metric->distance_matrix(2, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 1));
  EXPECT_TRUE(metric->distance_matrix(4, 2));
  EXPECT_TRUE(metric->distance_matrix(4, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 1));
  EXPECT_TRUE(metric->distance_matrix(8, 2));
  EXPECT_TRUE(metric->distance_matrix(8, 4));
  EXPECT_TRUE(metric->distance_matrix(8, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 1));
  EXPECT_TRUE(metric->distance_matrix(16, 2));
  EXPECT_TRUE(metric->distance_matrix(16, 4));
  EXPECT_TRUE(metric->distance_matrix(16, 8));
  EXPECT_TRUE(metric->distance_matrix(16, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 1));
  EXPECT_TRUE(metric->distance_matrix(32, 2));
  EXPECT_TRUE(metric->distance_matrix(32, 4));
  EXPECT_TRUE(metric->distance_matrix(32, 8));
  EXPECT_TRUE(metric->distance_matrix(32, 16));
  EXPECT_TRUE(metric->distance_matrix(32, 32));

  EXPECT_TRUE(metric->support_normalize());
  float result = 1.0f;
  metric->normalize(&result);
  EXPECT_FLOAT_EQ(-1.0f, result);
}

================================================
FILE: tests/core/metric/quantized_integer_metric_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <iostream>
#include <unordered_set>
#include <ailego/math/distance.h>
#include <ailego/math/norm_matrix.h>
#include <ailego/math/normalizer.h>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include <zvec/ailego/utility/float_helper.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_flow.h>
#include "core/quantizer/quantizer_params.h"
#include "zvec/core/framework/index_factory.h"


using namespace zvec;
using namespace zvec::core;
using namespace zvec::ailego;

static IndexHolder::Pointer GetHolder(
    size_t dim, size_t count, std::uniform_real_distribution<float> &dist) {
  std::random_device rd;
  std::mt19937 gen(rd());
  auto holder = std::make_shared<MultiPassIndexHolder<IndexMeta::DT_FP32>>(dim);
  for (size_t i = 0; i < count; ++i) {
    ailego::NumericalVector<float> vec(dim);
    for (size_t j = 0; j < dim; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
  }
  return holder;
}

static inline void MatrixTranspose(uint32_t *dst, const uint32_t *src, size_t M,
                                   size_t N) {
  for (size_t n = 0; n < N * M; n++) {
    size_t i = n / N;
    size_t j = n % N;
    dst[n] = src[M * j + i];
  }
}

//! Test whether two floating point numbers are equal
template <class T>
static inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->
    typename std::enable_if<std::is_floating_point<T>::value, bool>::type {
  // the machine epsilon has to be scaled to the magnitude of the values used
  // and multiplied by the desired precision in ULPs (units in the last place)
  return ((std::fabs(x - y) <=
           std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||
          (std::fabs(x - y) < std::numeric_limits<T>::min()));
}

TEST(QuantizedIntegerMetric, General) {
  auto metric = IndexFactory::CreateMetric("MipsSquaredEuclidean");
  ASSERT_TRUE(metric);

  Params params;

  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);
  const size_t DIMENSION = 21;
  ailego::NumericalVector<float> x(DIMENSION);
  ailego::NumericalVector<float> X(DIMENSION);
  ailego::NumericalVector<float> y(DIMENSION);
  ailego::NumericalVector<float> Y(DIMENSION);
  float xa = dist(gen);
  float xb = dist(gen);
  float ya = dist(gen);
  float yb = dist(gen);
  float x2 = 0, x1 = 0, y2 = 0, y1 = 0;
  float X2 = 0;
  float xx2 = 0;
  for (size_t j = 0; j < DIMENSION; ++j) {
    x[j] = dist(gen);
    printf("%f ", x[j]);
    X[j] = x[j] * xa + xb;
    x1 += x[j];
    X2 += X[j] * X[j];
    xx2 += x[j] * x[j];
  }
  printf("\n");

  for (size_t j = 0; j < DIMENSION; ++j) {
    y[j] = dist(gen);
    Y[j] = y[j] * ya + yb;
    y1 += y[j];
    printf("%f ", y[j]);
  }
  printf("\n");

  auto v1 = ailego::Distance::SquaredEuclidean(X.data(), Y.data(), DIMENSION);
  auto ip = ailego::Distance::InnerProduct(x.data(), y.data(), DIMENSION);
  ailego::SquaredNorm2Matrix<float, 1>::Compute(x.data(), DIMENSION, &x2);
  ailego::SquaredNorm2Matrix<float, 1>::Compute(y.data(), DIMENSION, &y2);
#if 0
  ailego::Norm1Matrix<float, 1>::Compute(x.data(), DIMENSION, &x1);
  ailego::Norm1Matrix<float, 1>::Compute(y.data(), DIMENSION, &y1);
#endif
  auto v2 = xa * xa * x2 + ya * ya * y2 - 2 * xa * ya * ip +
            (xb - yb) * (xb - yb) * DIMENSION +
            2 * (xb - yb) * (xa * x1 - ya * y1);
  auto t1 = (xa * x[0] - ya * y[0]) + (xb - yb);
  auto t2 = (xa * x[1] - ya * y[1]) + (xb - yb);
  auto v3 = t1 * t1 + t2 * t2;
  printf(
      "x=%f y=%f X=%f Y=%f, xa=%f xb=%f ya=%f yb=%f, x2=%f y2=%f x1=%f y1=%f "
      "ip=%f\n",
      x[0], y[0], X[0], Y[0], xa, xb, ya, yb, x2, y2, x1, y1, ip);
  printf("v1=%f v2=%f v3=%f\n", v1, v2, v3);

  auto IP = ailego::Distance::InnerProduct(X.data(), Y.data(), DIMENSION);
  auto v = xa * ya * ip + xb * ya * y1 + xa * yb * x1 + xb * yb * DIMENSION;
  printf("V=%f %f\n", IP, v);

  printf("=========\n");
  float mips;
  ailego::MipsSquaredEuclideanDistanceMatrix<float, 1, 1>::Compute(
      X.data(), Y.data(), DIMENSION, 0.0, &mips);
  printf("u2=%f v2=%f\n", x2, y2);
  float uu2 = xa * xa * x2 + 2 * xa * xb * x1 + xb * xb * DIMENSION;
  float vv2 = ya * ya * y2 + 2 * ya * yb * y1 + yb * yb * DIMENSION;
  float v7 = 2.0 - 2.0 * v / std::max(uu2, vv2);
  printf("mips=%f v7=%f\n", mips, v7);
  printf("X2=%f uu2=%f xx2=%f x2=%f\n", X2, uu2, xx2, x2);
}

TEST(QuantizedIntegerMetric, TestInt8SquaredEuclidean) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = 1000;
  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  auto converter = IndexFactory::CreateConverter("Int8StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 =
        ailego::Distance::SquaredEuclidean(mf, vec.data(), holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    // printf("%f %f\n", v1, v2);
    ASSERT_NEAR(v1, v2, 0.1 * (DIMENSION + 1));

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt8SquaredEuclideanReformer) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);
  std::uniform_int_distribution<int> dist2(0, 1);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = std::uniform_int_distribution<int>(1, 128)(gen);
  auto reformer = IndexFactory::CreateReformer("Int8StreamingReformer");
  ASSERT_TRUE(!!reformer);
  ASSERT_EQ(0u, reformer->init(Params()));

  ailego::NumericalVector<float> vecs(DIMENSION * COUNT);
  for (size_t j = 0; j < DIMENSION * COUNT; ++j) {
    vecs[j] = dist(gen);
  }
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta1;
  std::string out;
  ASSERT_EQ(0,
            dist2(gen)
                ? reformer->transform(vecs.data(), qmeta, COUNT, &out, &qmeta1)
                : reformer->convert(vecs.data(), qmeta, COUNT, &out, &qmeta1));

  std::string out2;
  IndexQueryMeta qmeta2;
  for (size_t i = 0; i < COUNT; ++i) {
    ASSERT_EQ(0,
              reformer->transform(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));
    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());
    ASSERT_EQ(out2.size(), qmeta2.element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],
                             out2.size()));

    ASSERT_EQ(0,
              reformer->convert(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));
    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());
    ASSERT_EQ(out2.size(), qmeta2.element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],
                             out2.size()));
  }
}

template <size_t M, size_t N>
void TestDistanceMatrixInt8(const std::string &metric_name) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen)*4;
  auto holder = GetHolder(dimension, batch_size, dist);
  IndexMeta meta(IndexMeta::DT_FP32, dimension);
  meta.set_metric(metric_name, 0, Params());
  auto converter = IndexFactory::CreateConverter("Int8StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  auto &meta2 = converter->meta();
  ASSERT_EQ(dimension + 20, holder2->dimension());
  size_t matrix_size = batch_size * holder2->dimension();
  std::vector<int8_t> matrix1(matrix_size);
  std::vector<int8_t> matrix2(matrix_size);
  auto iter = holder2->create_iterator();
  for (size_t i = 0; i < batch_size; ++i, iter->next()) {
    std::memcpy(&matrix1[i * holder2->dimension()], iter->data(),
                holder2->element_size());
  }
  MatrixTranspose(reinterpret_cast<uint32_t *>(&matrix2[0]),
                  reinterpret_cast<uint32_t *>(matrix1.data()),
                  meta2.dimension() / 4, batch_size);

  auto query_holder = GetHolder(dimension, query_size, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, query_holder));
  auto query_holder2 = converter->result();
  ASSERT_EQ(dimension + 20, query_holder2->dimension());
  size_t query_matrix_size = query_size * query_holder2->dimension();
  std::vector<int8_t> query1(query_matrix_size);
  std::vector<int8_t> query2(query_matrix_size);
  auto query_iter = query_holder2->create_iterator();
  for (size_t i = 0; i < query_size; ++i, query_iter->next()) {
    std::memcpy(&query1[i * holder2->dimension()], query_iter->data(),
                query_holder2->element_size());
  }
  MatrixTranspose(reinterpret_cast<uint32_t *>(&query2[0]),
                  reinterpret_cast<uint32_t *>(query1.data()),
                  meta2.dimension() / 4, query_size);

  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  auto metric = IndexFactory::CreateMetric("QuantizedInteger");
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0,
            metric->init(converter->meta(), converter->meta().metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);
  auto matrix_compute = metric->distance_matrix(M, N);
  ASSERT_TRUE(matrix_compute);

  for (size_t i = 0; i < query_size; ++i) {
    const int8_t *cur_query = &query1[i * meta2.dimension()];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      compute(&matrix1[j * meta2.dimension()], cur_query, meta2.dimension(),
              &query_result[j]);
    }
  }
  matrix_compute(&matrix2[0], &query2[0], meta2.dimension(), &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    // EXPECT_FLOAT_EQ(result1[i], result2[i]);
    EXPECT_TRUE(IsAlmostEqual(result1[i], result2[i], 1e4));
  }
}

TEST(QuantizedIntegerMetric, TestInt8SquaredEuclideanMetric) {
  TestDistanceMatrixInt8<1, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<2, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<2, 2>("SquaredEuclidean");
  TestDistanceMatrixInt8<4, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<4, 2>("SquaredEuclidean");
  TestDistanceMatrixInt8<4, 4>("SquaredEuclidean");
  TestDistanceMatrixInt8<8, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<8, 2>("SquaredEuclidean");
  TestDistanceMatrixInt8<8, 4>("SquaredEuclidean");
  TestDistanceMatrixInt8<8, 8>("SquaredEuclidean");
  TestDistanceMatrixInt8<16, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<16, 2>("SquaredEuclidean");
  TestDistanceMatrixInt8<16, 4>("SquaredEuclidean");
  TestDistanceMatrixInt8<16, 8>("SquaredEuclidean");
  TestDistanceMatrixInt8<16, 16>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 1>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 2>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 4>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 8>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 16>("SquaredEuclidean");
  TestDistanceMatrixInt8<32, 32>("SquaredEuclidean");
}

TEST(QuantizedIntegerMetric, TestInt4SquaredEuclidean) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;
  const size_t COUNT = 1000;
  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  auto converter = IndexFactory::CreateConverter("Int4StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 =
        ailego::Distance::SquaredEuclidean(mf, vec.data(), holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt4SquaredEuclideanReformer) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);
  std::uniform_int_distribution<int> dist2(0, 1);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;
  const size_t COUNT = std::uniform_int_distribution<int>(1, 128)(gen);
  auto reformer = IndexFactory::CreateReformer("Int4StreamingReformer");
  ASSERT_TRUE(!!reformer);
  ASSERT_EQ(0u, reformer->init(Params()));

  ailego::NumericalVector<float> vecs(DIMENSION * COUNT);
  for (size_t j = 0; j < DIMENSION * COUNT; ++j) {
    vecs[j] = dist(gen);
  }
  IndexQueryMeta qmeta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta1;
  std::string out;
  ASSERT_EQ(0,
            dist2(gen)
                ? reformer->transform(vecs.data(), qmeta, COUNT, &out, &qmeta1)
                : reformer->convert(vecs.data(), qmeta, COUNT, &out, &qmeta1));

  std::string out2;
  IndexQueryMeta qmeta2;
  for (size_t i = 0; i < COUNT; ++i) {
    ASSERT_EQ(0,
              reformer->transform(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));
    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());
    ASSERT_EQ(out2.size(), qmeta2.element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],
                             out2.size()));

    ASSERT_EQ(0,
              reformer->convert(&vecs[i * DIMENSION], qmeta, &out2, &qmeta2));
    ASSERT_EQ(qmeta1.element_size(), qmeta2.element_size());
    ASSERT_EQ(out2.size(), qmeta2.element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), &out[i * qmeta1.element_size()],
                             out2.size()));
  }
}

template <size_t M, size_t N>
void TestDistanceMatrixInt4(const std::string &metric_name) {
  std::mt19937 gen((std::random_device())());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t batch_size = M;
  const size_t query_size = N;
  size_t dimension = (std::uniform_int_distribution<size_t>(1, 65))(gen)*8;
  auto holder = GetHolder(dimension, batch_size, dist);
  IndexMeta meta(IndexMeta::DT_FP32, dimension);
  meta.set_metric(metric_name, 0, Params());
  auto converter = IndexFactory::CreateConverter("Int4StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  auto &meta2 = converter->meta();
  ASSERT_EQ(dimension + 32, holder2->dimension());
  size_t matrix_size = batch_size * holder2->element_size();
  std::vector<uint8_t> matrix1(matrix_size);
  std::vector<uint8_t> matrix2(matrix_size);
  auto iter = holder2->create_iterator();
  for (size_t i = 0; i < batch_size; ++i, iter->next()) {
    std::memcpy(&matrix1[i * holder2->element_size()], iter->data(),
                holder2->element_size());
  }
  MatrixTranspose(reinterpret_cast<uint32_t *>(&matrix2[0]),
                  reinterpret_cast<uint32_t *>(matrix1.data()),
                  meta2.dimension() / 8, batch_size);

  auto query_holder = GetHolder(dimension, query_size, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, query_holder));
  auto query_holder2 = converter->result();
  ASSERT_EQ(dimension + 32, query_holder2->dimension());
  size_t query_matrix_size = query_size * query_holder2->element_size();
  std::vector<uint8_t> query1(query_matrix_size);
  std::vector<uint8_t> query2(query_matrix_size);
  auto query_iter = query_holder2->create_iterator();
  for (size_t i = 0; i < query_size; ++i, query_iter->next()) {
    std::memcpy(&query1[i * holder2->element_size()], query_iter->data(),
                query_holder2->element_size());
  }
  MatrixTranspose(reinterpret_cast<uint32_t *>(&query2[0]),
                  reinterpret_cast<uint32_t *>(query1.data()),
                  meta2.dimension() / 8, query_size);

  std::vector<float> result1(batch_size * query_size);
  std::vector<float> result2(batch_size * query_size);

  auto metric = IndexFactory::CreateMetric("QuantizedInteger");
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0,
            metric->init(converter->meta(), converter->meta().metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);
  auto matrix_compute = metric->distance_matrix(M, N);
  ASSERT_TRUE(matrix_compute);

  for (size_t i = 0; i < query_size; ++i) {
    const uint8_t *cur_query = &query1[i * meta2.element_size()];
    float *query_result = &result1[i * batch_size];

    for (size_t j = 0; j < batch_size; ++j) {
      compute(&matrix1[j * meta2.element_size()], cur_query, meta2.dimension(),
              &query_result[j]);
    }
  }
  matrix_compute(&matrix2[0], &query2[0], meta2.dimension(), &result2[0]);

  for (size_t i = 0; i < batch_size * query_size; ++i) {
    EXPECT_NEAR(result1[i], result2[i], 1e-2 * dimension);
    EXPECT_TRUE(IsAlmostEqual(result1[i], result2[i], 1e4));
  }
}

TEST(QuantizedIntegerMetric, TestInt4SquaredEuclideanMetric) {
  TestDistanceMatrixInt4<1, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<2, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<2, 2>("SquaredEuclidean");
  TestDistanceMatrixInt4<4, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<4, 2>("SquaredEuclidean");
  TestDistanceMatrixInt4<4, 4>("SquaredEuclidean");
  TestDistanceMatrixInt4<8, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<8, 2>("SquaredEuclidean");
  TestDistanceMatrixInt4<8, 4>("SquaredEuclidean");
  TestDistanceMatrixInt4<8, 8>("SquaredEuclidean");
  TestDistanceMatrixInt4<16, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<16, 2>("SquaredEuclidean");
  TestDistanceMatrixInt4<16, 4>("SquaredEuclidean");
  TestDistanceMatrixInt4<16, 8>("SquaredEuclidean");
  TestDistanceMatrixInt4<16, 16>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 1>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 2>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 4>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 8>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 16>("SquaredEuclidean");
  TestDistanceMatrixInt4<32, 32>("SquaredEuclidean");
}

TEST(QuantizedIntegerMetric, TestInt8InnerProduct) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = 1000;
  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("InnerProduct", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int8StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 = ailego::Distance::MinusInnerProduct(mf, vec.data(),
                                                   holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    // printf("%f %f\n", v1, v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt8InnerProductMetric) {
  TestDistanceMatrixInt8<1, 1>("InnerProduct");
  TestDistanceMatrixInt8<2, 1>("InnerProduct");
  TestDistanceMatrixInt8<2, 2>("InnerProduct");
  TestDistanceMatrixInt8<4, 1>("InnerProduct");
  TestDistanceMatrixInt8<4, 2>("InnerProduct");
  TestDistanceMatrixInt8<4, 4>("InnerProduct");
  TestDistanceMatrixInt8<8, 1>("InnerProduct");
  TestDistanceMatrixInt8<8, 2>("InnerProduct");
  TestDistanceMatrixInt8<8, 4>("InnerProduct");
  TestDistanceMatrixInt8<8, 8>("InnerProduct");
  TestDistanceMatrixInt8<16, 1>("InnerProduct");
  TestDistanceMatrixInt8<16, 2>("InnerProduct");
  TestDistanceMatrixInt8<16, 4>("InnerProduct");
  TestDistanceMatrixInt8<16, 8>("InnerProduct");
  TestDistanceMatrixInt8<16, 16>("InnerProduct");
  TestDistanceMatrixInt8<32, 1>("InnerProduct");
  TestDistanceMatrixInt8<32, 2>("InnerProduct");
  TestDistanceMatrixInt8<32, 4>("InnerProduct");
  TestDistanceMatrixInt8<32, 8>("InnerProduct");
  TestDistanceMatrixInt8<32, 16>("InnerProduct");
  TestDistanceMatrixInt8<32, 32>("InnerProduct");
}

TEST(QuantizedIntegerMetric, TestInt4InnerProduct) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;
  const size_t COUNT = 1000;
  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("InnerProduct", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int4StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 = ailego::Distance::MinusInnerProduct(mf, vec.data(),
                                                   holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt4InnerProductMetric) {
  TestDistanceMatrixInt4<1, 1>("InnerProduct");
  TestDistanceMatrixInt4<2, 1>("InnerProduct");
  TestDistanceMatrixInt4<2, 2>("InnerProduct");
  TestDistanceMatrixInt4<4, 1>("InnerProduct");
  TestDistanceMatrixInt4<4, 2>("InnerProduct");
  TestDistanceMatrixInt4<4, 4>("InnerProduct");
  TestDistanceMatrixInt4<8, 1>("InnerProduct");
  TestDistanceMatrixInt4<8, 2>("InnerProduct");
  TestDistanceMatrixInt4<8, 4>("InnerProduct");
  TestDistanceMatrixInt4<8, 8>("InnerProduct");
  TestDistanceMatrixInt4<16, 1>("InnerProduct");
  TestDistanceMatrixInt4<16, 2>("InnerProduct");
  TestDistanceMatrixInt4<16, 4>("InnerProduct");
  TestDistanceMatrixInt4<16, 8>("InnerProduct");
  TestDistanceMatrixInt4<16, 16>("InnerProduct");
  TestDistanceMatrixInt4<32, 1>("InnerProduct");
  TestDistanceMatrixInt4<32, 2>("InnerProduct");
  TestDistanceMatrixInt4<32, 4>("InnerProduct");
  TestDistanceMatrixInt4<32, 8>("InnerProduct");
  TestDistanceMatrixInt4<32, 16>("InnerProduct");
  TestDistanceMatrixInt4<32, 32>("InnerProduct");
}

TEST(QuantizedIntegerMetric, TestInt8MipsSquaredEuclidean) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = 1000;
  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("MipsSquaredEuclidean", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int8StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  auto query_metric = metric->query_metric();
  ASSERT_TRUE(!!query_metric);
  ASSERT_EQ(query_metric->name(), "QuantizedInteger");

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 = ailego::Distance::MipsSquaredEuclidean(
        mf, vec.data(), holder->dimension(), 0.0f);
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    // printf("%f %f\n", v1, v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt8MipsSquaredEuclideanMetric) {
  TestDistanceMatrixInt8<1, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<2, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<2, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<4, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<4, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<4, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<8, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<8, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<8, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<8, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<16, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<16, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<16, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<16, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<16, 16>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 16>("MipsSquaredEuclidean");
  TestDistanceMatrixInt8<32, 32>("MipsSquaredEuclidean");
}

TEST(QuantizedIntegerMetric, TestInt4MipsSquaredEuclidean) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;
  const size_t COUNT = 1000;
  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("MipsSquaredEuclidean", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int4StreamingConverter");
  ASSERT_TRUE(!!converter);
  ASSERT_EQ(0u, converter->init(meta, Params()));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    float v1 = ailego::Distance::MipsSquaredEuclidean(mf, vec.data(),
                                                      holder->dimension(), 0.0);
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt4MipsSquaredEuclideanMetric) {
  TestDistanceMatrixInt4<1, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<2, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<2, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<4, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<4, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<4, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<8, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<8, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<8, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<8, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<16, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<16, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<16, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<16, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<16, 16>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 1>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 2>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 4>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 8>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 16>("MipsSquaredEuclidean");
  TestDistanceMatrixInt4<32, 32>("MipsSquaredEuclidean");
}

TEST(QuantizedIntegerMetric, TestInt8NormalizedCosine) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = 1000;
  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("NormalizedCosine", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int8StreamingConverter");
  ASSERT_TRUE(!!converter);
  Params converter_params;
  converter_params.set(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE, true);
  ASSERT_EQ(0u, converter->init(meta, converter_params));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);

    // normalize mf & vec
    std::vector<float> normalized_mf(DIMENSION);
    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));
    float norm_mf = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,
                                  &norm_mf);
    std::vector<float> normalized_vec(DIMENSION);
    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));
    float norm_vec = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,
                                  &norm_vec);

    float v1 = ailego::Distance::MinusInnerProduct(
        normalized_mf.data(), normalized_vec.data(), holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    // printf("%f %f\n", v1, v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt8NormalizedCosineMetric) {
  TestDistanceMatrixInt8<1, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<2, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<2, 2>("NormalizedCosine");
  TestDistanceMatrixInt8<4, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<4, 2>("NormalizedCosine");
  TestDistanceMatrixInt8<4, 4>("NormalizedCosine");
  TestDistanceMatrixInt8<8, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<8, 2>("NormalizedCosine");
  TestDistanceMatrixInt8<8, 4>("NormalizedCosine");
  TestDistanceMatrixInt8<8, 8>("NormalizedCosine");
  TestDistanceMatrixInt8<16, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<16, 2>("NormalizedCosine");
  TestDistanceMatrixInt8<16, 4>("NormalizedCosine");
  TestDistanceMatrixInt8<16, 8>("NormalizedCosine");
  TestDistanceMatrixInt8<16, 16>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 1>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 2>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 4>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 8>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 16>("NormalizedCosine");
  TestDistanceMatrixInt8<32, 32>("NormalizedCosine");
}

TEST(QuantizedIntegerMetric, TestInt8Cosine) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen);
  const size_t COUNT = 1000;
  IndexMeta meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("Cosine", 0, Params());
  auto converter = IndexFactory::CreateConverter("CosineInt8Converter");
  ASSERT_TRUE(!!converter);
  Params converter_params;
  ASSERT_EQ(0u, converter->init(meta, converter_params));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT8, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute_batch = metric->batch_distance();
  ASSERT_TRUE(compute_batch);

  int8_t *qi = reinterpret_cast<int8_t *>(&out[0]);
  if (auto query_preprocess_func = metric->get_query_preprocess_func();
      query_preprocess_func != nullptr) {
    query_preprocess_func(qi, holder2->dimension());
  }

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();

    // normalize mf & vec
    std::vector<float> normalized_mf(DIMENSION);
    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));
    float norm_mf = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,
                                  &norm_mf);
    std::vector<float> normalized_vec(DIMENSION);
    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));
    float norm_vec = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,
                                  &norm_vec);

    float v1 = ailego::Distance::MinusInnerProduct(
        normalized_mf.data(), normalized_vec.data(), holder->dimension());
    float v2;
    compute_batch(reinterpret_cast<const void **>(&mi), qi, 1,
                  holder2->dimension(), &v2);
    // printf("%f %f\n", v1, v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt4NormalizedCosine) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 2.0);

  const size_t DIMENSION = std::uniform_int_distribution<int>(1, 128)(gen) * 2;
  const size_t COUNT = 1000;
  IndexMeta meta;
  meta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  meta.set_metric("NormalizedCosine", 0, Params());
  auto converter = IndexFactory::CreateConverter("Int4StreamingConverter");
  ASSERT_TRUE(!!converter);
  Params converter_params;
  converter_params.set(INTEGER_STREAMING_CONVERTER_ENABLE_NORMALIZE, true);
  ASSERT_EQ(0u, converter->init(meta, converter_params));

  auto holder = GetHolder(DIMENSION, COUNT, dist);
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DT_INT4, holder2->data_type());
  auto &meta2 = converter->meta();

  auto reformer = IndexFactory::CreateReformer(meta2.reformer_name());
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(meta2.reformer_params()));

  ailego::NumericalVector<float> vec(DIMENSION);
  for (size_t j = 0; j < DIMENSION; ++j) {
    vec[j] = dist(gen);
  }
  IndexQueryMeta qmeta;
  qmeta.set_meta(IndexMeta::DT_FP32, DIMENSION);
  IndexQueryMeta qmeta2;
  std::string out;
  ASSERT_EQ(0, reformer->transform(vec.data(), qmeta, &out, &qmeta2));
  ASSERT_EQ(qmeta2.dimension(), meta2.dimension());

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  auto metric = IndexFactory::CreateMetric(meta2.metric_name());
  ASSERT_TRUE(!!metric);
  ASSERT_EQ(0, metric->init(meta2, meta2.metric_params()));
  auto compute = metric->distance();
  ASSERT_TRUE(compute);

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    const float *mf = (const float *)iter->data();
    const int8_t *mi = (const int8_t *)iter2->data();
    const int8_t *qi = reinterpret_cast<const int8_t *>(&out[0]);
    // normalize mf & vec
    std::vector<float> normalized_mf(DIMENSION);
    memcpy(normalized_mf.data(), mf, DIMENSION * sizeof(float));
    float norm_mf = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_mf.data(), DIMENSION,
                                  &norm_mf);
    std::vector<float> normalized_vec(DIMENSION);
    memcpy(normalized_vec.data(), vec.data(), DIMENSION * sizeof(float));
    float norm_vec = 0.0;
    ailego::Normalizer<float>::L2((float *)normalized_vec.data(), DIMENSION,
                                  &norm_vec);

    float v1 = ailego::Distance::MinusInnerProduct(
        normalized_mf.data(), normalized_vec.data(), holder->dimension());
    float v2;
    compute(mi, qi, holder2->dimension(), &v2);
    ASSERT_NEAR(v1, v2, 0.2 * DIMENSION);

    std::string out2;
    ASSERT_EQ(0, reformer->convert(iter->data(), qmeta, &out2, &qmeta2));
    ASSERT_EQ(out2.size(), holder2->element_size());
    ASSERT_EQ(0, std::memcmp(out2.data(), iter2->data(), out2.size()));
  }
}

TEST(QuantizedIntegerMetric, TestInt4NormalizedCosineMetric) {
  TestDistanceMatrixInt4<1, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<2, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<2, 2>("NormalizedCosine");
  TestDistanceMatrixInt4<4, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<4, 2>("NormalizedCosine");
  TestDistanceMatrixInt4<4, 4>("NormalizedCosine");
  TestDistanceMatrixInt4<8, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<8, 2>("NormalizedCosine");
  TestDistanceMatrixInt4<8, 4>("NormalizedCosine");
  TestDistanceMatrixInt4<8, 8>("NormalizedCosine");
  TestDistanceMatrixInt4<16, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<16, 2>("NormalizedCosine");
  TestDistanceMatrixInt4<16, 4>("NormalizedCosine");
  TestDistanceMatrixInt4<16, 8>("NormalizedCosine");
  TestDistanceMatrixInt4<16, 16>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 1>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 2>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 4>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 8>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 16>("NormalizedCosine");
  TestDistanceMatrixInt4<32, 32>("NormalizedCosine");
}


================================================
FILE: tests/core/quantizer/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
      NAME ${CC_TARGET}
      STRICT
      LIBS zvec_ailego core_framework core_quantizer
      SRCS ${CC_SRCS}
      INCS . ${PROJECT_ROOT_DIR}/src/core/
    )
endforeach()

================================================
FILE: tests/core/quantizer/half_float_reformer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <random>

// #include <zvec/ailego/container/vector.h>
// #include <zvec/ailego/container/params.h>

#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_holder.h"

using namespace zvec::core;

TEST(HalfFloatReformer, General) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(-1.0, 1.0);

  const size_t COUNT = 1000;
  const size_t DIMENSION = 128;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("HalfFloatConverter");
  ASSERT_TRUE(converter);
  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));

  auto reformer = IndexFactory::CreateReformer("HalfFloatReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(zvec::ailego::Params()));

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP16, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 2);

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    const float *f32 = (const float *)iter->data();
    const zvec::ailego::Float16 *f16 =
        (const zvec::ailego::Float16 *)iter2->data();
    printf("%f %f\n", f32[0], (float)f16[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    // Test reformer convert
    buffer.clear();
    EXPECT_EQ(0, reformer->convert(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    buffer.clear();
    EXPECT_EQ(0, reformer->convert(iter->data(),
                                   IndexQueryMeta(holder->data_type(),
                                                  holder->dimension() / 4),
                                   4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_FP16, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}


================================================
FILE: tests/core/quantizer/integer_quantizer_reformer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/container/vector.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_holder.h"

using namespace zvec::core;

TEST(IntegerReformer, Int8General) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 12;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int8_quantizer.converter.histogram_bins_count", 10000);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto &stats = converter->stats();
  EXPECT_EQ(COUNT, stats.trained_count());
  EXPECT_EQ(COUNT, stats.transformed_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int8QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    // Test reformer convert
    buffer.clear();
    EXPECT_EQ(0, reformer->convert(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    buffer.clear();
    EXPECT_EQ(0, reformer->convert(iter->data(),
                                   IndexQueryMeta(holder->data_type(),
                                                  holder->dimension() / 4),
                                   4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}


TEST(IntegerReformer, Int8OnePassHolder) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::normal_distribution<float> dist(5, 2.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_TRUE(converter);
  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);

  auto iter = holder_mirror->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int8QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}

TEST(IntegerReformer, Int8TrainedParams) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(5, 10.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_TRUE(converter);
  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto stats = converter->stats();
  ASSERT_EQ(COUNT, stats.trained_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);

  auto iter = holder_mirror->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int8QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}

TEST(IntegerReformer, Int8NonBias) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(5, 10.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int8_quantizer.converter.disable_bias", true);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto stats = converter->stats();
  ASSERT_EQ(COUNT, stats.trained_count());
  ASSERT_EQ(converter->meta().reformer_name(), "Int8QuantizerReformer");
  auto reformer_params = converter->meta().reformer_params();
  ASSERT_EQ(
      reformer_params.get_as_float("proxima.int8_quantizer.reformer.bias"),
      0.0f);
}

//! Test whether two floating point numbers are equal
template <class T>
static inline auto IsAlmostEqual(const T &x, const T &y, int ulp) ->
    typename std::enable_if<std::is_floating_point<T>::value, bool>::type {
  // the machine epsilon has to be scaled to the magnitude of the values used
  // and multiplied by the desired precision in ULPs (units in the last place)
  return ((std::fabs(x - y) <=
           std::numeric_limits<T>::epsilon() * std::fabs(x + y) * ulp) ||
          (std::fabs(x - y) < std::numeric_limits<T>::min()));
}

TEST(IntegerReformer, Int8InitConverterWithTrainedParams) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 12;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int8_quantizer.converter.histogram_bins_count", 10000);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0, converter->train(holder));
  auto reformer_params = converter->meta().reformer_params();
  auto converter_params = converter->meta().converter_params();
  converter = IndexFactory::CreateConverter("Int8QuantizerConverter");
  ASSERT_EQ(0, converter->init(meta, converter_params));
  ASSERT_EQ(0, converter->transform(holder));

  auto &stats = converter->stats();
  EXPECT_EQ(0u, stats.trained_count());
  EXPECT_EQ(COUNT, stats.transformed_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT8, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 4);

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int8QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(reformer_params));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    // Test reformer convert
    buffer.clear();
    EXPECT_EQ(0, reformer->convert(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    buffer.clear();
    EXPECT_EQ(0, reformer->convert(iter->data(),
                                   IndexQueryMeta(holder->data_type(),
                                                  holder->dimension() / 4),
                                   4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT8, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}

// Int4 Tests =====
TEST(IntegerReformer, Int4General) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 12;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int4_quantizer.converter.histogram_bins_count", 10000);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
      if (i == 0) printf(" %f", vec[j]);
    }
    if (i == 0) printf("\n");
    holder->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto &stats = converter->stats();
  EXPECT_EQ(COUNT, stats.trained_count());
  EXPECT_EQ(COUNT, stats.transformed_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int4QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 3),
                                     3, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 3, qmeta.dimension());
    ASSERT_EQ(buffer, buffer2);

    // Test reformer convert
    EXPECT_EQ(0, reformer->convert(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->convert(iter->data(),
                                   IndexQueryMeta(holder->data_type(),
                                                  holder->dimension() / 3),
                                   3, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 3, qmeta.dimension());
    ASSERT_EQ(buffer, buffer2);
  }
}


TEST(IntegerReformer, Int4OnePassHolder) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::normal_distribution<float> dist(5, 2.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_TRUE(converter);
  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);

  auto iter = holder_mirror->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int4QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}

TEST(IntegerReformer, Int4TrainedParams) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(5, 10.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_TRUE(converter);
  ASSERT_EQ(0u, converter->init(meta, zvec::ailego::Params()));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto stats = converter->stats();
  ASSERT_EQ(COUNT, stats.trained_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);

  auto iter = holder_mirror->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int4QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(converter->meta().reformer_params()));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}

TEST(IntegerReformer, Int4NonBias) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(5, 10.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 512;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int4_quantizer.converter.disable_bias", true);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<OnePassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  auto holder_mirror =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
    holder_mirror->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0u, IndexConverter::TrainAndTransform(converter, holder));
  auto stats = converter->stats();
  ASSERT_EQ(COUNT, stats.trained_count());
  ASSERT_EQ(converter->meta().reformer_name(), "Int4QuantizerReformer");
  auto reformer_params = converter->meta().reformer_params();
  ASSERT_EQ(
      reformer_params.get_as_float("proxima.int4_quantizer.reformer.bias"),
      0.0f);
}

TEST(IntegerReformer, Int4InitConverterWithTrainedParams) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_real_distribution<float> dist(0.0, 1.0);

  const size_t COUNT = 10000;
  const size_t DIMENSION = 16;

  IndexMeta meta;
  meta.set_meta(IndexMeta::DataType::DT_FP32, DIMENSION);

  auto converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_TRUE(converter);
  zvec::ailego::Params params;
  params.set("proxima.int4_quantizer.converter.histogram_bins_count", 10000);
  ASSERT_EQ(0u, converter->init(meta, params));

  auto holder =
      std::make_shared<MultiPassIndexHolder<IndexMeta::DataType::DT_FP32>>(
          DIMENSION);
  for (size_t i = 0; i < COUNT; ++i) {
    zvec::ailego::NumericalVector<float> vec(DIMENSION);
    for (size_t j = 0; j < DIMENSION; ++j) {
      vec[j] = dist(gen);
    }
    holder->emplace(i + 1, vec);
  }
  EXPECT_EQ(COUNT, holder->count());
  EXPECT_EQ(IndexMeta::DataType::DT_FP32, holder->data_type());
  ASSERT_EQ(0, converter->train(holder));
  auto reformer_params = converter->meta().reformer_params();
  auto converter_params = converter->meta().converter_params();
  converter = IndexFactory::CreateConverter("Int4QuantizerConverter");
  ASSERT_EQ(0, converter->init(meta, converter_params));
  ASSERT_EQ(0, converter->transform(holder));

  auto &stats = converter->stats();
  EXPECT_EQ(0u, stats.trained_count());
  EXPECT_EQ(COUNT, stats.transformed_count());

  auto holder2 = converter->result();
  EXPECT_EQ(COUNT, holder2->count());
  EXPECT_EQ(IndexMeta::DataType::DT_INT4, holder2->data_type());
  EXPECT_EQ(holder->dimension(), holder2->dimension());
  EXPECT_EQ(holder->element_size(), holder2->element_size() * 8);

  auto iter = holder->create_iterator();
  auto iter2 = holder2->create_iterator();
  std::string buffer;

  auto reformer = IndexFactory::CreateReformer("Int4QuantizerReformer");
  ASSERT_TRUE(reformer);
  ASSERT_EQ(0u, reformer->init(reformer_params));

  for (; iter->is_valid(); iter->next(), iter2->next()) {
    EXPECT_TRUE(iter2->is_valid());
    EXPECT_TRUE(iter->data());
    EXPECT_TRUE(iter2->data());

    // const float *f32 = (const float *)iter->data();
    // const int8_t *i8 = (const int8_t *)iter2->data();
    // printf("%f %d\n", f32[0], i8[0]);

    std::string buffer2(
        std::string((const char *)iter2->data(), holder2->element_size()));

    IndexQueryMeta qmeta;
    EXPECT_EQ(0, reformer->transform(
                     iter->data(),
                     IndexQueryMeta(holder->data_type(), holder->dimension()),
                     &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension(), qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);

    EXPECT_EQ(0, reformer->transform(iter->data(),
                                     IndexQueryMeta(holder->data_type(),
                                                    holder->dimension() / 4),
                                     4, &buffer, &qmeta));
    EXPECT_EQ(IndexMeta::DataType::DT_INT4, qmeta.data_type());
    EXPECT_EQ(holder->dimension() / 4, qmeta.dimension());
    EXPECT_EQ(buffer, buffer2);
  }
}


================================================
FILE: tests/core/utility/CMakeLists.txt
================================================

include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

file(GLOB_RECURSE ALL_TEST_SRCS *_test.cc)

foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gtest(
    NAME ${CC_TARGET} STRICT
    LIBS zvec_ailego core_framework core_utility
    Arrow::arrow_depends
    Arrow::parquet_static
    SRCS ${CC_SRCS}
    INCS . ${PROJECT_ROOT_DIR}/src/core/
  )
  cc_test_suite(zvec_ailego ${CC_TARGET})
endforeach()


================================================
FILE: tests/core/utility/buffer_storage_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_helper.h>

using namespace zvec;
using namespace zvec::core;

TEST(BufferStorage, General) {
  std::string file_path = "buffer_storage_test_file";
  ailego::File::Delete(file_path);

  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(write_storage);
  std::cout << file_path << std::endl;
  EXPECT_NE(0, write_storage->open(file_path, false));

  ailego::Params params;
  EXPECT_EQ(0, write_storage->init(params));
  std::cout << file_path << std::endl;
  EXPECT_EQ(0, write_storage->open(file_path, true));

  IndexMeta meta;
  meta.set_trainer("trainer", 111, ailego::Params());
  meta.set_searcher("searcher", 222, ailego::Params());
  meta.set_builder("builder", 333, ailego::Params());

  EXPECT_EQ(0, IndexHelper::SerializeToStorage(meta, write_storage.get()));
  EXPECT_EQ(0, write_storage->append("AAAA", 1234));
  EXPECT_EQ(0, write_storage->append("BBBB", 1234));
  auto aaaa = write_storage->get("AAAA");
  ASSERT_TRUE(aaaa);
  auto aaaa1 = aaaa->clone();
  ASSERT_TRUE(aaaa1);
  std::string hello = "Hello world!!!";
  EXPECT_EQ(hello.size(), aaaa1->write(0, hello.data(), hello.size()));
  EXPECT_EQ(0, write_storage->close());

  // Reopen it
  auto read_storage = IndexFactory::CreateStorage("BufferStorage");
  EXPECT_EQ(0, read_storage->open(file_path, false));

  IndexMeta meta2;
  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(read_storage.get(), &meta2));
  EXPECT_EQ("trainer", meta2.trainer_name());
  EXPECT_EQ("searcher", meta2.searcher_name());
  EXPECT_EQ("builder", meta2.builder_name());
  auto aaaa2 = read_storage->get("AAAA");
  ASSERT_TRUE(aaaa2);
  const void *data;
  EXPECT_EQ(hello.size(), aaaa2->read(0, &data, hello.size()));
  auto aaaa3 = aaaa2->clone();
  ASSERT_TRUE(aaaa3);
  EXPECT_EQ(hello.size(), aaaa3->read(0, &data, hello.size()));
  EXPECT_EQ(hello, std::string((const char *)data, hello.size()));
}


================================================
FILE: tests/core/utility/file_dumper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_helper.h"
#include "zvec/core/framework/index_segment_storage.h"

using namespace zvec;
using namespace zvec::core;

TEST(FileDumper, General) {
  std::string file_path = "file_dumper_test_file";

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_TRUE(dumper);

  IndexMeta meta1;
  meta1.set_trainer("index_trainer", 0, ailego::Params());
  ASSERT_EQ(0, dumper->create(file_path));
  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));

  for (size_t i = 0; i < 10; ++i) {
    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));
    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));
  }
  ASSERT_EQ(0, dumper->close());

  auto container = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_TRUE(container);

  ailego::Params params;
  params.set("proxima.mmap_file.container.memory_locking", true);
  params.set("proxima.mmap_file.container.memory_warmup", true);
  params.set("proxima.mmap_file.container.checksum_validation", true);
  ASSERT_EQ(0, container->init(params));

  IndexMeta meta2;
  EXPECT_EQ("", meta2.trainer_name());
  ASSERT_EQ(0, container->open(file_path, false));
  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));
  EXPECT_EQ("index_trainer", meta2.trainer_name());

  for (size_t i = 0; i < 10; ++i) {
    auto seg = container->get(std::to_string(i));
    const void *data = nullptr;
    EXPECT_EQ(seg->data_size(), seg->read(0, &data, seg->data_size()));

    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello, std::string((const char *)data, seg->data_size()));
  }
}

TEST(IndexSegmentDumper, General) {
  std::string file_path = "index_segment_dumper_test_file";

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_TRUE(dumper);
  ASSERT_EQ(0, dumper->create(file_path));

  {
    IndexDumper::Pointer dumper2 =
        std::make_shared<IndexSegmentDumper>(dumper, "AAAAA");

    IndexMeta meta1;
    meta1.set_trainer("index_trainer", 0, ailego::Params());
    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));

    for (size_t i = 0; i < 10; ++i) {
      std::string hello = "A: Hello world!!! #" + std::to_string(i);
      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));
      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));
    }
  }

  {
    IndexDumper::Pointer dumper2 =
        std::make_shared<IndexSegmentDumper>(dumper, "BBBBB");

    IndexMeta meta1;
    meta1.set_builder("index_builder", 0, ailego::Params());
    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));

    for (size_t i = 100; i < 110; ++i) {
      std::string hello = "B: Hello world!!! #" + std::to_string(i);
      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));
      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));
    }
  }

  {
    IndexDumper::Pointer dumper2 =
        std::make_shared<IndexSegmentDumper>(dumper, "CCCCC");

    IndexMeta meta1;
    meta1.set_converter("index_converter", 0, ailego::Params());
    EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper2.get()));

    for (size_t i = 1000; i < 1010; ++i) {
      std::string hello = "C: Hello world!!! #" + std::to_string(i);
      EXPECT_EQ(hello.size(), dumper2->write(hello.data(), hello.size()));
      EXPECT_EQ(0, dumper2->append(std::to_string(i), hello.size(), 0, 0));
    }
  }
  ASSERT_EQ(0, dumper->close());

  ///// Read data with container

  auto container = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_TRUE(container);

  ailego::Params params;
  params.set("proxima.mmap_file.container.memory_locking", true);
  params.set("proxima.mmap_file.container.memory_warmup", true);
  params.set("proxima.mmap_file.container.checksum_validation", true);
  ASSERT_EQ(0, container->init(params));
  ASSERT_EQ(0, container->open(file_path, false));
}


================================================
FILE: tests/core/utility/memory_dumper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_helper.h"

using namespace zvec;
using namespace zvec::core;

TEST(MemoryDumper, General) {
  std::string file_path = "memory_dumper_test_file";

  auto dumper = IndexFactory::CreateDumper("MemoryDumper");
  ASSERT_TRUE(dumper);

  IndexMeta meta1;
  meta1.set_trainer("index_trainer", 0, ailego::Params());
  ASSERT_EQ(0, dumper->create(file_path));
  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));

  for (size_t i = 0; i < 10; ++i) {
    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));
    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));
  }
  ASSERT_EQ(0, dumper->close());

  auto container = IndexFactory::CreateStorage("MemoryReadStorage");
  ASSERT_TRUE(container);

  ailego::Params params;
  params.set("memory.container.checksum_validation", true);
  ASSERT_EQ(0, container->init(params));

  IndexMeta meta2;
  EXPECT_EQ("", meta2.trainer_name());
  ASSERT_EQ(0, container->open(file_path, false));

  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));
  EXPECT_EQ("index_trainer", meta2.trainer_name());

  for (size_t i = 0; i < 10; ++i) {
    auto seg = container->get(std::to_string(i));
    const void *data = nullptr;
    EXPECT_EQ(seg->data_size(), seg->read(0, &data, seg->data_size()));

    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello, std::string((const char *)data, seg->data_size()));
  }
}


================================================
FILE: tests/core/utility/mmap_file_container_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <gtest/gtest.h>
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_helper.h"

using namespace zvec;
using namespace zvec::core;

static int GenRandInt(int m, int n) {
  static std::mt19937 gen((std::random_device())());
  return std::uniform_int_distribution<int>(m, n)(gen);
}

static void AddRandomPadding(const std::string &in, const std::string &out,
                             size_t header_padding_size,
                             size_t footer_padding_size) {
  ailego::File out_file;
  out_file.create(out, 0);
  for (size_t i = 0; i < header_padding_size; ++i) {
    uint8_t r = GenRandInt(0, 255);
    out_file.write(&r, 1);
  }

  ailego::File in_file;
  ASSERT_TRUE(in_file.open(in, true));
  std::string buf(in_file.size(), '\0');
  ASSERT_EQ(buf.size(), in_file.read(&buf[0], buf.size()));
  out_file.write(buf.data(), buf.size());

  for (size_t i = 0; i < footer_padding_size; ++i) {
    uint8_t r = GenRandInt(0, 255);
    out_file.write(&r, 1);
  }
}

TEST(MMapFileReadStorage, General) {
  std::string file_path = "mmap_file_container_test_file";
  std::string file_path_padding = "mmap_file_container_test_file_padding";

  auto dumper = IndexFactory::CreateDumper("FileDumper");
  ASSERT_TRUE(dumper);

  IndexMeta meta1;
  meta1.set_trainer("index_trainer", 0, ailego::Params());
  ASSERT_EQ(0, dumper->create(file_path));
  EXPECT_EQ(0, IndexHelper::SerializeToDumper(meta1, dumper.get()));

  for (size_t i = 0; i < 21; ++i) {
    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello.size(), dumper->write(hello.data(), hello.size()));
    EXPECT_EQ(0, dumper->append(std::to_string(i), hello.size(), 0, 0));
  }
  ASSERT_EQ(0, dumper->close());
  size_t header_paddings = GenRandInt(0, 1024);
  size_t footer_paddings = GenRandInt(0, 1024);
  AddRandomPadding(file_path, file_path_padding, header_paddings,
                   footer_paddings);
  ailego::File file;
  file.open(file_path_padding, true);
  int64_t header_offset =
      GenRandInt(0, 1) ? header_paddings : header_paddings - file.size();
  int64_t footer_offset =
      (GenRandInt(0, 1) ? file.size() : 0) - footer_paddings;

  auto container = IndexFactory::CreateStorage("MMapFileReadStorage");
  ASSERT_TRUE(container);

  ailego::Params params;
  params.set("proxima.mmap_file.container.memory_locking", true);
  params.set("proxima.mmap_file.container.memory_warmup", true);
  params.set("proxima.mmap_file.container.checksum_validation", true);
  params.set("proxima.mmap_file.container.header_offset", header_offset);
  params.set("proxima.mmap_file.container.footer_offset", footer_offset);
  ASSERT_EQ(0, container->init(params));

  IndexMeta meta2;
  EXPECT_EQ(0u, container->get_all().size());
  EXPECT_EQ("", meta2.trainer_name());
  EXPECT_EQ("", meta2.searcher_name());
  ASSERT_EQ(0, container->open(file_path_padding, false));
  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(container.get(), &meta2));
  EXPECT_EQ(23u, container->get_all().size());
  EXPECT_EQ("index_trainer", meta2.trainer_name());
  EXPECT_EQ("", meta2.searcher_name());

  for (size_t i = 0; i < 21; ++i) {
    auto seg = container->get(std::to_string(i));
    auto seg1 = seg->clone();

    const void *data = nullptr;
    EXPECT_EQ(seg1->data_size(), seg1->read(0, &data, seg1->data_size()));
    std::string hello = "Hello world!!! #" + std::to_string(i);
    EXPECT_EQ(hello, std::string((const char *)data, seg1->data_size()));
  }
  container->cleanup();
}


================================================
FILE: tests/core/utility/mmap_file_storage_test.cpp
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fstream>
#include <iostream>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include <zvec/core/framework/index_factory.h>
#include <zvec/core/framework/index_helper.h>

using namespace zvec;
using namespace zvec::core;

TEST(MMapFileStorage, TestHugePage) {
  std::string file_path = "/mnt/huge/mmap_file_storage_test_file";
  // std::string file_path = "mmap_file_storage_test_file";
  ailego::File::Delete(file_path);

  auto write_storage = IndexFactory::CreateStorage("MMapFileStorage");
  ASSERT_TRUE(write_storage);

  ailego::Params params;
  params.set("proxima.mmap_file.storage.huge_page", true);
  EXPECT_EQ(0, write_storage->init(params));
  EXPECT_EQ(0, write_storage->open(file_path, true));

  IndexMeta meta;
  meta.set_trainer("trainer", 111, ailego::Params());
  meta.set_searcher("searcher", 222, ailego::Params());
  meta.set_builder("builder", 333, ailego::Params());

  EXPECT_EQ(0, IndexHelper::SerializeToStorage(meta, write_storage.get()));
  EXPECT_EQ(0, write_storage->append("AAAA", 1234));
  EXPECT_EQ(0, write_storage->append("BBBB", 1234));
  auto aaaa = write_storage->get("AAAA");
  ASSERT_TRUE(aaaa);
  auto aaaa1 = aaaa->clone();
  ASSERT_TRUE(aaaa1);
  std::string hello = "Hello world!!!";
  EXPECT_EQ(hello.size(), aaaa1->write(0, hello.data(), hello.size()));
  auto hasHugePageInUse = [&]() {
    std::ifstream smaps("/proc/self/smaps");
    if (!smaps.is_open()) {
      std::cerr << "Cannot open /proc/self/smaps\n";
      return false;
    }

    std::string line;
    while (std::getline(smaps, line)) {
      // 查找 KernelPageSize 行
      if (line.find("KernelPageSize:") != std::string::npos) {
        // 提取页大小（单位 kB）
        size_t pos = line.find_first_of("0123456789");
        if (pos != std::string::npos) {
          uint64_t pageSizeKB = std::stoull(line.substr(pos));
          // std::cerr << pageSizeKB << std::endl;
          if (pageSizeKB > 4) {  // 普通页是 4kB，大于即为 HugePage
            std::cout << "Found HugePage region with KernelPageSize: "
                      << pageSizeKB << " kB\n";
            return true;
          }
        }
      }
    }
    return false;
  };
  if (!hasHugePageInUse()) {
    EXPECT_EQ(0, 1);
  }
  EXPECT_EQ(0, write_storage->close());
  // Reopen it
  auto read_storage = IndexFactory::CreateStorage("MMapFileStorage");
  EXPECT_EQ(0, write_storage->init(params));
  EXPECT_EQ(0, read_storage->open(file_path, false));

  IndexMeta meta2;
  EXPECT_EQ(0, IndexHelper::DeserializeFromStorage(read_storage.get(), &meta2));
  EXPECT_EQ("trainer", meta2.trainer_name());
  EXPECT_EQ("searcher", meta2.searcher_name());
  EXPECT_EQ("builder", meta2.builder_name());
  auto aaaa2 = read_storage->get("AAAA");
  ASSERT_TRUE(aaaa2);
  const void *data;
  EXPECT_EQ(hello.size(), aaaa2->read(0, &data, hello.size()));
  auto aaaa3 = aaaa2->clone();
  ASSERT_TRUE(aaaa3);
  EXPECT_EQ(hello.size(), aaaa3->read(0, &data, hello.size()));
  EXPECT_EQ(hello, std::string((const char *)data, hello.size()));
}


================================================
FILE: tests/db/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_directory(common)
cc_directories(crash_recovery)
cc_directory(sqlengine)
cc_directories(index)

if(APPLE)
  set(APPLE_FRAMEWORK_LIBS
    -framework CoreFoundation
    -framework CoreGraphics
    -framework CoreData
    -framework CoreText
    -framework Security
    -framework Foundation
    -Wl,-U,_MallocExtension_ReleaseFreeMemory
    -Wl,-U,_ProfilerStart
    -Wl,-U,_ProfilerStop
    -Wl,-U,_RegisterThriftProtocol
  )
endif()

file(GLOB ALL_TEST_SRCS *_test.cc)
foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gmock(
    NAME ${CC_TARGET} STRICT
    LIBS zvec_db
    zvec_proto
    core_knn_flat
    core_knn_flat_sparse
    core_knn_hnsw
    core_knn_hnsw_rabitq
    core_knn_hnsw_sparse
    core_knn_ivf
    core_mix_reducer
    core_metric
    core_utility
    core_quantizer
    ${CMAKE_THREAD_LIBS_INIT}
    ${CMAKE_DL_LIBS}
    SRCS ${CC_SRCS} index/utils/utils.cc
    INCS . .. ../../src
    LDFLAGS ${APPLE_FRAMEWORK_LIBS}
  )
  cc_test_suite(zvec_db ${CC_TARGET})
endforeach()


================================================
FILE: tests/db/collection_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/collection.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>
#include <gtest/gtest.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/utility/file_helper.h>
#include "db/common/file_helper.h"
#include "db/index/common/type_helper.h"
#include "index/utils/utils.h"
#include "zvec/ailego/utility/float_helper.h"
#include "zvec/db/config.h"
#include "zvec/db/doc.h"
#include "zvec/db/index_params.h"
#include "zvec/db/options.h"
#include "zvec/db/schema.h"
#include "zvec/db/status.h"
#include "zvec/db/type.h"

using namespace zvec;
using namespace zvec::test;

std::string col_path = "test_collection";

class CollectionTest : public ::testing::Test {
 protected:
  void SetUp() override {
    FileHelper::RemoveDirectory(col_path);
  }

  void TearDown() override {}
};

TEST_F(CollectionTest, Feature_CreateAndOpen_General) {
  CollectionOptions options;
  options.read_only_ = false;
  options.enable_mmap_ = true;

  std::string path = "./demo";

  ailego::FileHelper::RemoveDirectory(path.c_str());

  auto schema = TestHelper::CreateNormalSchema();
  auto result = Collection::CreateAndOpen(path, *schema, options);
  if (!result.has_value()) {
    std::cout << result.error().message() << std::endl;
  }
  ASSERT_TRUE(result.has_value());
  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));

  auto col = result.value();
  ASSERT_EQ(col->Path(), path);
  ASSERT_EQ(col->Schema(), *schema);
  ASSERT_EQ(col->Options(), options);
  auto stats = col->Stats().value();
  ASSERT_TRUE(stats.doc_count == 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
  ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
  // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
  ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
  ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);

  ASSERT_EQ(col->Destroy(), Status::OK());

  // after destroyed, every interface should return error
  std::vector<Doc> empty_docs;
  ASSERT_FALSE(col->Insert(empty_docs).has_value());
  ASSERT_FALSE(col->Update(empty_docs).has_value());
  ASSERT_FALSE(col->Delete({}).has_value());
  ASSERT_FALSE(col->DeleteByFilter("").ok());
  ASSERT_FALSE(col->Fetch({}).has_value());
  ASSERT_FALSE(col->Query({}).has_value());
  ASSERT_FALSE(col->GroupByQuery({}).has_value());
  ASSERT_FALSE(col->CreateIndex("", nullptr).ok());
  ASSERT_FALSE(col->DropIndex("").ok());
  ASSERT_FALSE(col->AddColumn(nullptr, "").ok());
  ASSERT_FALSE(col->AlterColumn("", "", nullptr).ok());
  ASSERT_FALSE(col->DropColumn("").ok());
  ASSERT_FALSE(col->CreateIndex("", nullptr).ok());
  ASSERT_FALSE(col->Optimize().ok());
  ASSERT_FALSE(col->Flush().ok());
  ASSERT_FALSE(col->Destroy().ok());
  ASSERT_FALSE(col->Options().has_value());
  ASSERT_FALSE(col->Path().has_value());
  ASSERT_FALSE(col->Stats().has_value());
  ASSERT_FALSE(col->Schema().has_value());

  ASSERT_FALSE(ailego::FileHelper::IsExist(path.c_str()));

  // recreate
  result = Collection::CreateAndOpen(path, *schema, options);
  ASSERT_TRUE(result.has_value());
  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));

  col = std::move(result.value());
  col.reset();
  col = nullptr;

  ASSERT_TRUE(ailego::FileHelper::IsExist(path.c_str()));

  // reopen
  result = Collection::Open(path, options);
  ASSERT_TRUE(result.has_value());
  col = std::move(result.value());
  col.reset();

  // reopen with read-only
  options.read_only_ = true;
  result = Collection::Open(path, options);
  if (!result.has_value()) {
    std::cout << result.error().message() << std::endl;
  }
  ASSERT_TRUE(result.has_value());
  col = result.value();

  ASSERT_EQ(col->Path(), path);
  ASSERT_EQ(col->Schema(), *schema);
  ASSERT_EQ(col->Options(), options);
  stats = col->Stats().value();
  ASSERT_TRUE(stats.doc_count == 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
  ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
  // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
  ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
  ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);

  // when open with read-only, write operation should fail
  ASSERT_FALSE(col->Flush().ok());
  ASSERT_FALSE(col->Destroy().ok());
  ASSERT_FALSE(col->Insert(empty_docs).has_value());
  ASSERT_FALSE(col->Update(empty_docs).has_value());
  ASSERT_FALSE(col->Delete({}).has_value());
  ASSERT_FALSE(col->DeleteByFilter("").ok());
  ASSERT_FALSE(col->CreateIndex("", nullptr).ok());
  ASSERT_FALSE(col->DropIndex("").ok());
  ASSERT_FALSE(col->AddColumn(nullptr, "").ok());
  ASSERT_FALSE(col->AlterColumn("", "", nullptr).ok());
  ASSERT_FALSE(col->DropColumn("").ok());
  ASSERT_FALSE(col->CreateIndex("", nullptr).ok());
  ASSERT_FALSE(col->Optimize().ok());

  // two threads open with read_only
  result = Collection::Open(path, options);
  if (!result.has_value()) {
    std::cout << result.error().message() << std::endl;
  }
  ASSERT_TRUE(result.has_value());
  col = result.value();

  auto result1 = Collection::Open(path, options);
  if (!result1.has_value()) {
    std::cout << result1.error().message() << std::endl;
  }
  ASSERT_TRUE(result1.has_value());
  auto col1 = result1.value();
}

TEST_F(CollectionTest, Feature_CreateAndOpen_Empty) {
  int doc_count = 0;
  int loop_count = 100;

  // create with normal schema
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};

  // Initial creation and insertion of 1000 docs
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  ASSERT_NE(collection, nullptr);

  // Close and reopen, then insert 1 doc - repeat 100 times
  for (int i = 0; i < loop_count; i++) {
    // Close collection
    collection.reset();

    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value())
        << "Failed to reopen collection at iteration " << i;
    collection = std::move(result.value());

    // Verify total doc count
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);
  }
}

TEST_F(CollectionTest, Feature_CreateAndOpen_PathValidate) {
  CollectionOptions options;
  options.read_only_ = false;
  options.enable_mmap_ = true;
  auto schema = TestHelper::CreateNormalSchema();

  {
    std::vector<std::string> valid_paths = {"abc",
                                            "data123",
                                            "my_collection",
                                            "v1.2_alpha-beta",
                                            ".hidden",
                                            "file.txt",
                                            "/tmp/absolute/path",
                                            "/tmp/a/b/c",
                                            "_",
                                            "-",
                                            "./tmp"};
    for (auto path : valid_paths) {
      ailego::FileHelper::RemoveDirectory(path.c_str());

      auto result = Collection::CreateAndOpen(path, *schema, options);
      if (!result.has_value()) {
        std::cout << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());
    }
  }

  {
    std::vector<std::string> inalid_paths = {
        " ",         "",
        "file name",  // space
        "file$name",  // $
        "a&b",        // &
        "a|b",        // |
        "a<b",        // <
        "a>b",        // >
        "a\"b",       // "
        "a'b",        // '
        "a;b",        // ;
        "a?b",        // ?
        "a*b",        // *
        "a[b]",       // []
        "a{b}",       // {}
        "a\\b",       //
        "a~b",        // ~
        "a#b",        // #
        "a\tb",       // tab
        "a\nb",       // newline
        "a\rb",       // carriage return
    };
    for (auto path : inalid_paths) {
      ailego::FileHelper::RemoveDirectory(path.c_str());

      auto result = Collection::CreateAndOpen(path, *schema, options);
      if (!result.has_value()) {
        std::cout << result.error().message() << std::endl;
      }
      ASSERT_FALSE(result.has_value());
    }
  }
}

TEST_F(CollectionTest, Feature_CreateAndOpen_Repeated) {
  int doc_count = 1000;
  int loop_count = 100;

  // create with normal schema
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};

  // Initial creation and insertion of 1000 docs
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  ASSERT_NE(collection, nullptr);

  // Close and reopen, then insert 1 doc - repeat 100 times
  for (int i = 0; i < loop_count; i++) {
    // Close collection
    collection.reset();

    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value())
        << "Failed to reopen collection at iteration " << i;
    collection = std::move(result.value());

    // Insert 1 additional doc
    auto s = TestHelper::CollectionInsertDoc(collection, doc_count + i,
                                             doc_count + i + 1, false);
    ASSERT_TRUE(s.ok()) << "Failed to insert doc at iteration " << i;

    // Verify total doc count
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count + i + 1)
        << "Document count mismatch at iteration " << i;
  }

  // Final verification - check all docs are present
  for (int i = 0; i < doc_count + loop_count; i++) {
    auto expect_doc = TestHelper::CreateDoc(i, *schema);
    auto result = collection->Fetch({expect_doc.pk()});
    ASSERT_TRUE(result.has_value()) << "Failed to fetch doc " << i;
    ASSERT_EQ(result.value().size(), 1);
    ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
    auto doc = result.value()[expect_doc.pk()];
    if (doc == nullptr) {
      std::cout << "fetch failed, doc_id: " << i << std::endl;
    }
    ASSERT_NE(doc, nullptr);
    if (*doc != expect_doc) {
      std::cout << "       doc:" << doc->to_detail_string() << std::endl;
      std::cout << "expect_doc:" << expect_doc.to_detail_string() << std::endl;
    }
    ASSERT_EQ(*doc, expect_doc);
  }

  // Clean up
  ASSERT_TRUE(collection->Destroy().ok());
}

TEST_F(CollectionTest, Feature_CreateAndOpen_MultiThread) {
  int doc_count = 0;

  // create with normal schema
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};

  // Initial creation and insertion of 1000 docs
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);
  ASSERT_NE(collection, nullptr);
  collection.reset();

  options.read_only_ = true;
  std::atomic<bool> has_error{false};
  auto open_readonly = [&]() {
    auto coll = Collection::Open(col_path, options);
    if (!coll.has_value()) {
      LOG_ERROR("Failed to reopen collection: %s", coll.error().c_str());
      has_error.store(true);
    }
    std::this_thread::sleep_for(std::chrono::milliseconds(100));
  };
  std::vector<std::thread> threads;
  for (int i = 0; i < 10; i++) {
    threads.emplace_back(open_readonly);
  }
  for (auto &t : threads) {
    t.join();
  }
  ASSERT_FALSE(has_error.load());
}

TEST_F(CollectionTest, Feature_Write_Batch_Validate) {
  FileHelper::RemoveDirectory(col_path);

  // create with normal schema
  auto schema = TestHelper::CreateNormalSchema(false);
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                        options, 0, 0, false);

  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);


  // insert batch docs
  auto insert_normal_status =
      TestHelper::CollectionInsertDoc(collection, 0, 1024, false, false, true);
  ASSERT_TRUE(insert_normal_status.ok());

  auto insert_exceed_status =
      TestHelper::CollectionInsertDoc(collection, 0, 1025, false, false, true);
  ASSERT_FALSE(insert_exceed_status.ok());

  // upsert batch docs
  auto upsert_normal_status =
      TestHelper::CollectionUpsertDoc(collection, 0, 1024, false, true);
  ASSERT_TRUE(upsert_normal_status.ok());

  auto upsert_exceed_status =
      TestHelper::CollectionUpsertDoc(collection, 0, 1025, false, true);
  ASSERT_FALSE(upsert_exceed_status.ok());
}

TEST_F(CollectionTest, Feature_Insert_General) {
  auto func = [&](bool schema_nullable, bool doc_nullable,
                  int doc_count = 1000) {
    FileHelper::RemoveDirectory(col_path);

    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema(schema_nullable);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, doc_nullable);


    if (!schema_nullable && doc_nullable) {
      ASSERT_EQ(collection, nullptr);
      return;
    } else {
      ASSERT_NE(collection, nullptr);
    }

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
    // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    collection.reset();
    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    // insert another 1000 docs
    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,
                                             doc_count * 2, doc_nullable);
    ASSERT_TRUE(s.ok());

    // validate fetch result
    for (int i = 0; i < doc_count * 2; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
    // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);
  };

  func(false, false);
  func(true, true);
  func(true, false);
  func(false, true);

  func(false, false, 0);
  func(false, false, 1);
  func(false, false, 2);
}

TEST_F(CollectionTest, Feature_Insert_ScalarIndex) {
  auto func = [&](bool nullable, bool enable_optimize, bool doc_nullable) {
    std::cout << "**** TEST INFO: nullable: " << nullable
              << ", enable_optimize: " << enable_optimize
              << ", doc_nullable: " << doc_nullable << std::endl;

    int doc_count = 1000;
    // create with normal schema
    auto schema =
        TestHelper::CreateSchemaWithScalarIndex(nullable, enable_optimize);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, doc_nullable);

    if (!nullable && doc_nullable) {
      ASSERT_EQ(collection, nullptr);
      return;
    } else {
      ASSERT_NE(collection, nullptr);
    }

    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    // insert another 1000 docs
    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,
                                             doc_count * 2, doc_nullable);
    ASSERT_TRUE(s.ok());
    ASSERT_TRUE(collection->Flush().ok());

    // validate fetch result
    for (int i = 0; i < doc_count * 2; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
  };

  func(false, false, false);
  func(false, true, false);
  func(false, false, true);
  func(true, false, true);
  func(true, false, false);
}

TEST_F(CollectionTest, Feature_Insert_VectorIndex) {
  auto func = [&](MetricType metric_type = MetricType::IP,
                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {
    int doc_count = 1000;
    // create with normal schema
    auto schema = TestHelper::CreateSchemaWithVectorIndex(
        false, "demo",
        std::make_shared<HnswIndexParams>(metric_type, 16, 20, quantize_type));
    std::cout << "init schema: " << schema->to_string_formatted() << std::endl;

    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (metric_type != MetricType::COSINE) {
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    }

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    collection.reset();
    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (metric_type != MetricType::COSINE) {
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    }

    // insert another 1000 docs
    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,
                                             doc_count * 2, false);
    ASSERT_TRUE(s.ok());
    ASSERT_TRUE(collection->Flush().ok());

    // validate fetch result
    for (int i = 0; i < doc_count * 2; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (metric_type != MetricType::COSINE) {
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    }

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);
  };

  func(MetricType::COSINE);
  func(MetricType::L2);
  func(MetricType::IP);
  func(MetricType::COSINE, QuantizeType::FP16);
  func(MetricType::IP, QuantizeType::FP16);
}

TEST_F(CollectionTest, Feature_Insert_SwitchSegment) {
  auto func = [&](uint64_t segment_doc_count, uint64_t doc_count) {
    std::cout << "**** TEST INFO: segment_doc_count: " << segment_doc_count
              << ", insert_doc_count: " << doc_count << std::endl;

    FileHelper::RemoveDirectory(col_path);

    // create with normal schema
    auto schema = TestHelper::CreateSchemaWithMaxDocCount(segment_doc_count);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    collection.reset();
    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    auto check_doc = [&](int total_doc_count) {
      // validate fetch result
      for (int i = 0; i < total_doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc(doc_count);
    std::cout << "check success 1" << std::endl;

    // insert another 1000 docs
    auto s =
        TestHelper::CollectionInsertDoc(collection, doc_count, doc_count * 2);
    ASSERT_TRUE(s.ok());
    ASSERT_TRUE(collection->Flush().ok());

    // validate fetch result
    check_doc(doc_count * 2);
    std::cout << "check success 2" << std::endl;

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    collection.reset();
    // Reopen collection
    result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    check_doc(doc_count * 2);
    std::cout << "check success 3" << std::endl;
  };

  func(1000, 499);
  func(1000, 500);
  func(1000, 501);
  func(1000, 999);
  func(1000, 1000);
  func(1000, 1001);
}

TEST_F(CollectionTest, Feature_Insert_Duplicate) {
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
  FileHelper::RemoveDirectory(col_path);

  // insert first
  auto collection =
      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 100);

  // update all docs then
  Result<WriteResults> s;
  for (int i = 0; i < 100; i++) {
    Doc new_doc = TestHelper::CreateDoc(i, *schema);
    std::vector<Doc> docs = {new_doc};
    s = collection->Insert(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());
    if (!s.value()[0].ok()) {
      std::cout << "0: " << s.value()[0].message() << std::endl;
    }
    ASSERT_FALSE(s.value()[0].ok());
    ASSERT_EQ(s.value()[0].code(), StatusCode::ALREADY_EXISTS);
  }

  Doc new_doc = TestHelper::CreateDoc(101, *schema);
  std::vector<Doc> docs = {new_doc};
  s = collection->Insert(docs);
  ASSERT_TRUE(s.has_value());
  ASSERT_TRUE(s.value()[0].ok());
}

TEST_F(CollectionTest, Feature_Upsert_General) {
  auto func = [&](bool schema_nullable, bool doc_nullable,
                  int doc_count = 1000) {
    FileHelper::RemoveDirectory(col_path);

    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema(schema_nullable);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, doc_nullable, true);


    if (!schema_nullable && doc_nullable) {
      ASSERT_EQ(collection, nullptr);
      return;
    } else {
      ASSERT_NE(collection, nullptr);
    }

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
    // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    collection.reset();
    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    // insert another 1000 docs
    auto s = TestHelper::CollectionInsertDoc(collection, doc_count,
                                             doc_count * 2, doc_nullable);
    ASSERT_TRUE(s.ok());

    // validate fetch result
    for (int i = 0; i < doc_count * 2; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    ASSERT_EQ(stats.index_completeness["dense_fp16"], 1);
    // ASSERT_EQ(stats.index_completeness["dense_fp64"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp32"], 1);
    ASSERT_EQ(stats.index_completeness["sparse_fp16"], 1);
  };

  func(false, false);
  func(true, true);
  func(true, false);
  func(false, true);

  func(false, false, 0);
  func(false, false, 1);
  func(false, false, 2);
}

TEST_F(CollectionTest, Feature_Upsert_Incremental) {
  auto func = [&](bool schema_nullable, bool doc_nullable,
                  int doc_count = 1000) {
    FileHelper::RemoveDirectory(col_path);

    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema(schema_nullable);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, doc_nullable, true);

    if (!schema_nullable && doc_nullable) {
      ASSERT_EQ(collection, nullptr);
      return;
    } else {
      ASSERT_NE(collection, nullptr);
    }

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    ASSERT_TRUE(collection->Flush().ok());

    ASSERT_NE(collection, nullptr);

    collection.reset();
    // Reopen collection
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    // upsert 1000 docs
    auto s = TestHelper::CollectionInsertDoc(collection, 0, doc_count,
                                             doc_nullable, true);
    ASSERT_TRUE(s.ok());

    // validate fetch result
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                     : TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  func(false, false);
  func(true, true);
  func(true, false);
  func(false, true);

  func(false, false, 0);
  func(false, false, 1);
  func(false, false, 2);
}

TEST_F(CollectionTest, Feature_Upsert_Nullable) {
  auto check_doc = [&](const Collection::Ptr &collection, const std::string &pk,
                       const Doc &expected_doc) {
    auto result = collection->Fetch({pk});
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), 1);
    ASSERT_EQ(result.value().count(pk), 1);
    auto doc = result.value()[pk];
    ASSERT_NE(doc, nullptr);
    if (*doc != expected_doc) {
      std::cout << "       doc:" << doc->to_detail_string() << std::endl;
      std::cout << "expect_doc:" << expected_doc.to_detail_string()
                << std::endl;
    }
    ASSERT_EQ(*doc, expected_doc);
  };

  // schema not nulltable
  {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    auto collection =
        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

    // insert one doc
    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    std::vector<Doc> docs = {insert_doc};
    auto s = collection->Insert(docs);
    ASSERT_TRUE(s.has_value());

    // update doc
    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    update_doc.remove("int32");
    docs = {update_doc};
    s = collection->Upsert(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_FALSE(s.has_value());


    update_doc.set_null("int32");
    docs = {update_doc};
    s = collection->Upsert(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_FALSE(s.has_value());

    // check doc
    check_doc(collection, insert_doc.pk(), insert_doc);
  }

  // schema nulltable
  {
    auto schema = TestHelper::CreateNormalSchema(true);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    auto collection =
        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

    // insert one doc
    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    std::vector<Doc> docs = {insert_doc};
    auto s = collection->Insert(docs);
    ASSERT_TRUE(s.has_value());

    // update doc
    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    update_doc.remove("int32");
    docs = {update_doc};
    s = collection->Upsert(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());
    if (!s.value()[0].ok()) {
      std::cout << s.value()[0].message() << std::endl;
    }
    ASSERT_TRUE(s.value()[0].ok());

    // check doc
    check_doc(collection, insert_doc.pk(), update_doc);

    update_doc.set_null("int32");
    docs = {update_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());

    // check doc
    auto pk = insert_doc.pk();
    auto result = collection->Fetch({pk});
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), 1);
    ASSERT_EQ(result.value().count(pk), 1);
    auto doc = result.value()[pk];
    ASSERT_NE(doc, nullptr);
    auto get_result = doc->get_field<int32_t>("int32");
    ASSERT_EQ(get_result.status(), Doc::FieldGetStatus::NOT_FOUND);
  }
}


TEST_F(CollectionTest, Feature_Update_General) {
  auto func = [&](int doc_count) {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    auto check_doc = [&](int updated_doc_count) {
      for (int i = 0; i < updated_doc_count; i++) {
        auto expect_doc =
            TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }

      // validate fetch result
      for (int i = updated_doc_count; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    // update all docs then
    Result<WriteResults> s;
    for (int i = 0; i < doc_count; i++) {
      Doc new_doc =
          TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
      std::vector<Doc> docs = {new_doc};
      s = collection->Update(docs);
      if (!s.has_value()) {
        std::cout << s.error().message() << std::endl;
      }
      ASSERT_TRUE(s.has_value());
      if (!s.value()[0].ok()) {
        std::cout << s.value()[0].message() << std::endl;
      }
      ASSERT_TRUE(s.value()[0].ok());

      if (i % 100 == 0 || i == 1) {
        check_doc(i + 1);
        collection.reset();
        auto result = Collection::Open(col_path, options);
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
        }
        collection = std::move(result.value());

        check_doc(i + 1);
      }
    }

    collection.reset();
    auto result = Collection::Open(col_path, options);
    if (!result.has_value()) {
      std::cout << result.error().message() << std::endl;
    }
    collection = std::move(result.value());

    check_doc(doc_count);
  };

  func(99);
  func(100);
  func(101);
  func(1000);
}

TEST_F(CollectionTest, Feature_Update_Incremental) {
  auto func = [&](int doc_count, bool doc_nullable) {
    auto schema = TestHelper::CreateNormalSchema(doc_nullable);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, doc_nullable);

    auto rewrite_doc = [&](Doc &doc) {
      // update int32
      int32_t new_int32 = 9999;
      doc.set("int32", new_int32);

      // update float
      float new_float = 9999.0;
      doc.set("float", new_float);

      // update string
      std::string new_string = "string_value";
      doc.set("string", new_string);
    };

    auto check_doc = [&](int updated_doc_count) {
      for (int i = 0; i < updated_doc_count; i++) {
        auto expect_doc =
            TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
        rewrite_doc(expect_doc);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }

      // validate fetch result
      for (int i = updated_doc_count; i < doc_count; i++) {
        auto expect_doc = doc_nullable ? TestHelper::CreateDocNull(i, *schema)
                                       : TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    // update all docs then
    Result<WriteResults> s;
    for (int i = 0; i < doc_count; i++) {
      Doc new_doc =
          TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
      rewrite_doc(new_doc);
      std::vector<Doc> docs = {new_doc};
      s = collection->Update(docs);
      if (!s.has_value()) {
        std::cout << s.error().message() << std::endl;
      }
      ASSERT_TRUE(s.has_value());
      if (!s.value()[0].ok()) {
        std::cout << s.value()[0].message() << std::endl;
      }
      ASSERT_TRUE(s.value()[0].ok());

      if (i % 100 == 0 || i == 1) {
        check_doc(i + 1);
        collection.reset();
        auto result = Collection::Open(col_path, options);
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
        }
        collection = std::move(result.value());

        check_doc(i + 1);
      }
    }

    collection.reset();
    auto result = Collection::Open(col_path, options);
    if (!result.has_value()) {
      std::cout << result.error().message() << std::endl;
    }
    collection = std::move(result.value());

    check_doc(doc_count);
  };

  func(99, false);
  func(99, true);
  func(100, false);
  func(100, true);
  func(101, false);
  func(101, true);
  func(1000, false);
  func(1000, true);
}

TEST_F(CollectionTest, Feature_Update_Nullable) {
  auto check_doc = [&](const Collection::Ptr &collection, const std::string &pk,
                       const Doc &expected_doc) {
    auto result = collection->Fetch({pk});
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), 1);
    ASSERT_EQ(result.value().count(pk), 1);
    auto doc = result.value()[pk];
    ASSERT_NE(doc, nullptr);
    if (*doc != expected_doc) {
      std::cout << "       doc:" << doc->to_detail_string() << std::endl;
      std::cout << "expect_doc:" << expected_doc.to_detail_string()
                << std::endl;
    }
    ASSERT_EQ(*doc, expected_doc);
  };

  // schema not nulltable
  {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    auto collection =
        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

    // insert one doc
    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    std::vector<Doc> docs = {insert_doc};
    auto s = collection->Insert(docs);
    ASSERT_TRUE(s.has_value());

    // update doc
    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    update_doc.remove("int32");
    docs = {update_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());
    if (!s.value()[0].ok()) {
      std::cout << s.value()[0].message() << std::endl;
    }
    ASSERT_TRUE(s.value()[0].ok());

    update_doc.set_null("int32");
    docs = {update_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_FALSE(s.has_value());

    // check doc
    check_doc(collection, insert_doc.pk(), insert_doc);
  }

  // schema nulltable
  {
    auto schema = TestHelper::CreateNormalSchema(true);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    auto collection =
        TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

    // insert one doc
    auto insert_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    std::vector<Doc> docs = {insert_doc};
    auto s = collection->Insert(docs);
    ASSERT_TRUE(s.has_value());

    // update doc
    auto update_doc = TestHelper::CreateDoc(0, *schema, TestHelper::MakePK(0));
    update_doc.remove("int32");
    docs = {update_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());
    if (!s.value()[0].ok()) {
      std::cout << s.value()[0].message() << std::endl;
    }
    ASSERT_TRUE(s.value()[0].ok());

    // check doc
    check_doc(collection, insert_doc.pk(), insert_doc);

    update_doc.set_null("int32");
    docs = {update_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());

    // check doc
    auto pk = insert_doc.pk();
    auto result = collection->Fetch({pk});
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), 1);
    ASSERT_EQ(result.value().count(pk), 1);
    auto doc = result.value()[pk];
    ASSERT_NE(doc, nullptr);
    auto get_result = doc->get_field<int32_t>("int32");
    ASSERT_EQ(get_result.status(), Doc::FieldGetStatus::NOT_FOUND);
  }
}

TEST_F(CollectionTest, Feature_Update_Empty) {
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
  FileHelper::RemoveDirectory(col_path);

  // insert first
  auto collection =
      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

  // update all docs then
  Result<WriteResults> s;
  for (int i = 0; i < 100; i++) {
    Doc new_doc = TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
    std::vector<Doc> docs = {new_doc};
    s = collection->Update(docs);
    if (!s.has_value()) {
      std::cout << s.error().message() << std::endl;
    }
    ASSERT_TRUE(s.has_value());
    if (!s.value()[0].ok()) {
      std::cout << "0: " << s.value()[0].message() << std::endl;
    }
    ASSERT_FALSE(s.value()[0].ok());
    ASSERT_EQ(s.value()[0].code(), StatusCode::NOT_FOUND);
  }
}

TEST_F(CollectionTest, Feature_Delete_General) {
  auto func = [&](int doc_count) {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    auto check_doc = [&](int updated_doc_count) {
      for (int i = 0; i < updated_doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_EQ(doc, nullptr);
      }

      // validate fetch result
      for (int i = updated_doc_count; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    Result<WriteResults> s;
    for (int i = 0; i < doc_count; i++) {
      s = collection->Delete({TestHelper::MakePK(i)});
      if (!s.has_value()) {
        std::cout << s.error().message() << std::endl;
      }
      ASSERT_TRUE(s.has_value());
      if (!s.value()[0].ok()) {
        std::cout << s.value()[0].message() << std::endl;
      }
      ASSERT_TRUE(s.value()[0].ok());

      if (i % 100 == 0 || i == 0) {
        check_doc(i + 1);
        collection.reset();
        auto result = Collection::Open(col_path, options);
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
        }
        collection = std::move(result.value());

        check_doc(i + 1);

        auto stats = collection->Stats().value();
        ASSERT_EQ(stats.doc_count, doc_count - i - 1);
      }
    }

    collection.reset();
    auto result = Collection::Open(col_path, options);
    if (!result.has_value()) {
      std::cout << result.error().message() << std::endl;
    }
    collection = std::move(result.value());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);

    check_doc(doc_count);
  };

  func(99);
  func(100);
  func(101);
  func(1000);
}

TEST_F(CollectionTest, Feature_Delete_Repeated) {
  auto func = [&](int doc_count) {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    auto check_doc = [&](bool deleted) {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        if (deleted) {
          ASSERT_EQ(doc, nullptr);
        } else {
          ASSERT_EQ(*doc, expect_doc);
        }
      }
    };

    for (int i = 0; i < 10; i++) {
      // delete first
      Result<WriteResults> s;
      for (int i = 0; i < doc_count; i++) {
        s = collection->Delete({TestHelper::MakePK(i)});
        if (!s.has_value()) {
          std::cout << s.error().message() << std::endl;
        }
        ASSERT_TRUE(s.has_value());
        if (!s.value()[0].ok()) {
          std::cout << s.value()[0].message() << std::endl;
        }
        ASSERT_TRUE(s.value()[0].ok());
      }

      check_doc(true);

      // insert then
      auto st = TestHelper::CollectionInsertDoc(collection, 0, doc_count);
      if (!st.ok()) {
        std::cout << st.message() << std::endl;
      }
      ASSERT_TRUE(st.ok());
    }
  };

  func(1);
  func(100);
}

TEST_F(CollectionTest, Feature_DeleteByFilter_General) {
  auto func = [&](int doc_count) {
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_TRUE(collection->Flush().ok());

    auto check_doc = [&](int updated_doc_count) {
      for (int i = 0; i < updated_doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        if (doc != nullptr) {
          std::cout << "doc: " << doc->to_detail_string() << std::endl;
        }
        ASSERT_EQ(doc, nullptr);
      }

      // validate fetch result
      for (int i = updated_doc_count; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    Status s;
    for (int i = 0; i < doc_count; i++) {
      s = collection->DeleteByFilter("int32 = " + std::to_string(i));
      if (!s.ok()) {
        std::cout << s.message() << std::endl;
      }
      ASSERT_TRUE(s.ok());

      if (i % 100 == 0 || i == 0) {
        std::cout << "check begin: " << i << std::endl;

        check_doc(i + 1);
        collection.reset();
        auto result = Collection::Open(col_path, options);
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
        }
        collection = std::move(result.value());

        check_doc(i + 1);

        auto stats = collection->Stats().value();
        ASSERT_EQ(stats.doc_count, doc_count - i - 1);
      }
    }

    collection.reset();
    auto result = Collection::Open(col_path, options);
    if (!result.has_value()) {
      std::cout << result.error().message() << std::endl;
    }
    collection = std::move(result.value());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);

    check_doc(doc_count);
  };

  func(99);
  func(100);
  func(101);
  func(1000);
}

TEST_F(CollectionTest, Feature_DeleteByFilter_ScalarIndex) {
  auto func = [&](int doc_count) {
    auto schema = TestHelper::CreateNormalSchema(
        false, "demo", std::make_shared<InvertIndexParams>(false));
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    FileHelper::RemoveDirectory(col_path);

    // insert first
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_TRUE(collection->Flush().ok());

    auto check_doc = [&](int updated_doc_count) {
      for (int i = 0; i < updated_doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        if (doc != nullptr) {
          std::cout << "doc: " << doc->to_detail_string() << std::endl;
        }
        ASSERT_EQ(doc, nullptr);
      }

      // validate fetch result
      for (int i = updated_doc_count; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    Status s;
    for (int i = 0; i < doc_count; i++) {
      s = collection->DeleteByFilter("int32 = " + std::to_string(i));
      if (!s.ok()) {
        std::cout << s.message() << std::endl;
      }
      ASSERT_TRUE(s.ok());

      if (i % 100 == 0 || i == 0) {
        std::cout << "check begin: " << i << std::endl;

        check_doc(i + 1);
        collection.reset();
        auto result = Collection::Open(col_path, options);
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
        }
        collection = std::move(result.value());

        check_doc(i + 1);

        auto stats = collection->Stats().value();
        ASSERT_EQ(stats.doc_count, doc_count - i - 1);
      }
    }

    collection.reset();
    auto result = Collection::Open(col_path, options);
    if (!result.has_value()) {
      std::cout << result.error().message() << std::endl;
    }
    collection = std::move(result.value());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);

    check_doc(doc_count);
  };

  func(1);
  func(100);
  func(101);
  func(1000);
}

TEST_F(CollectionTest, Feature_MixedWrite_General) {
  // case1: insert -> upsert -> update -> delete
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
  FileHelper::RemoveDirectory(col_path);

  // insert first
  auto collection =
      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 0);

  for (int i = 0; i < 100; i++) {
    // std::cout << "insert: " << i << std::endl;

    // insert
    auto new_doc = TestHelper::CreateDoc(i, *schema);
    std::vector<Doc> new_docs = {new_doc};
    auto res = collection->Insert(new_docs);
    ASSERT_TRUE(res.has_value());
    ASSERT_TRUE(res.value()[0].ok());

    // fetch
    auto docs = collection->Fetch({TestHelper::MakePK(i)});
    ASSERT_TRUE(docs.has_value());
    ASSERT_EQ(docs.value().size(), 1);
    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);
    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, i + 1);

    // upsert
    new_doc = TestHelper::CreateDoc(i + 1, *schema, TestHelper::MakePK(i));
    new_docs = {new_doc};
    res = collection->Upsert(new_docs);
    ASSERT_TRUE(res.has_value());
    ASSERT_TRUE(res.value()[0].ok());

    // fetch
    docs = collection->Fetch({TestHelper::MakePK(i)}).value();
    ASSERT_TRUE(docs.has_value());
    ASSERT_EQ(docs.value().size(), 1);
    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);
    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, i + 1);

    // update
    new_doc = TestHelper::CreateDoc(i + 2, *schema, TestHelper::MakePK(i));
    new_docs = {new_doc};
    res = collection->Update(new_docs);
    ASSERT_TRUE(res.has_value());
    ASSERT_TRUE(res.value()[0].ok());

    // fetch
    docs = collection->Fetch({TestHelper::MakePK(i)}).value();
    ASSERT_TRUE(docs.has_value());
    ASSERT_EQ(docs.value().size(), 1);
    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);
    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, i + 1);

    // delete
    res = collection->Delete({TestHelper::MakePK(i)});
    ASSERT_TRUE(res.has_value());
    ASSERT_TRUE(res.value()[0].ok());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, i);

    // insert again
    new_doc = TestHelper::CreateDoc(i, *schema);
    new_docs = {new_doc};
    res = collection->Insert(new_docs);
    ASSERT_TRUE(res.has_value());
    ASSERT_TRUE(res.value()[0].ok());

    // fetch
    docs = collection->Fetch({TestHelper::MakePK(i)});
    ASSERT_TRUE(docs.has_value());
    ASSERT_EQ(docs.value().size(), 1);
    ASSERT_EQ(docs.value().count(TestHelper::MakePK(i)), 1);
    ASSERT_EQ(new_doc, *docs.value()[TestHelper::MakePK(i)]);

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, i + 1);
  }
}

TEST_F(CollectionTest, Feature_CreateIndex_General) {
  // create empty collection
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                        options, 0, 0, false);

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);

  auto index_params = std::make_shared<HnswIndexParams>(MetricType::IP);
  auto s = collection->CreateIndex("dense_fp32", index_params);
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  auto new_index_params = std::make_shared<HnswIndexParams>(MetricType::COSINE);
  s = collection->CreateIndex("dense_fp32", index_params);
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }

  s = collection->CreateIndex("dense_fp32_invalid", index_params);
  ASSERT_FALSE(s.ok());
}

TEST_F(CollectionTest, Feature_CreateIndex_Vector) {
  auto func = [&](std::string field_name,
                  MetricType metric_type = MetricType::IP,
                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {
    std::cout << "**** Test field: " << field_name
              << ", metric: " << MetricTypeCodeBook::AsString(metric_type)
              << ", quantize: " << QuantizeTypeCodeBook::AsString(quantize_type)
              << std::endl;

    FileHelper::RemoveDirectory(col_path);

    int doc_count = 10;

    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);

    auto index_params =
        std::make_shared<HnswIndexParams>(metric_type, 16, 200, quantize_type);
    auto s = collection->CreateIndex(field_name, index_params);
    std::cout << "status: " << s.message()
              << ", code: " << GetDefaultMessage(s.code()) << std::endl;
    ASSERT_TRUE(s.ok());

    VectorQuery query;
    query.topk_ = doc_count;
    query.field_name_ = field_name;
    query.include_vector_ = true;
    auto field_scheama = schema->get_vector_field(field_name);
    ASSERT_NE(field_scheama, nullptr);
    ASSERT_TRUE(field_scheama->is_vector_field());

    bool is_dense = field_scheama->is_dense_vector();

    std::vector<float> vector;
    std::vector<ailego::Float16> vector_fp16;
    std::vector<int8_t> vector_int8;
    std::pair<std::vector<uint32_t>, std::vector<float>> sparse_vector;
    std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>
        sparse_vector_fp16;
    if (is_dense) {
      // std::cout << "vector: " << vector.size() << std::endl;
      if (field_scheama->data_type() == DataType::VECTOR_FP16) {
        vector_fp16 = std::vector<ailego::Float16>(field_scheama->dimension(),
                                                   ailego::Float16(1.0f));
        vector_fp16[0] = 0;
        query.query_vector_.assign(
            (char *)vector_fp16.data(),
            vector_fp16.size() * sizeof(ailego::Float16));
      } else if (field_scheama->data_type() == DataType::VECTOR_FP32) {
        vector = std::vector<float>(field_scheama->dimension(), 1);
        vector[0] = 0;
        query.query_vector_.assign((char *)vector.data(),
                                   vector.size() * sizeof(float));
      } else {
        vector_int8 = std::vector<int8_t>(field_scheama->dimension(), 1);
        vector_int8[0] = 0;
        query.query_vector_.assign((char *)vector_int8.data(),
                                   vector_int8.size() * sizeof(int8_t));
      }
    } else {
      if (field_scheama->data_type() == DataType::SPARSE_VECTOR_FP32) {
        sparse_vector = {{1}, {1}};
        query.query_sparse_indices_.assign(
            (char *)sparse_vector.first.data(),
            sparse_vector.first.size() * sizeof(uint32_t));
        query.query_sparse_values_.assign(
            (char *)sparse_vector.second.data(),
            sparse_vector.second.size() * sizeof(float));
      } else {
        sparse_vector_fp16 = {{1}, {ailego::Float16(1.0f)}};
        query.query_sparse_indices_.assign(
            (char *)sparse_vector_fp16.first.data(),
            sparse_vector_fp16.first.size() * sizeof(uint32_t));
        query.query_sparse_values_.assign(
            (char *)sparse_vector_fp16.second.data(),
            sparse_vector_fp16.second.size() * sizeof(ailego::Float16));
      }
    }
    auto query_result = collection->Query(query);
    if (!query_result.has_value()) {
      std::cout << "status: " << query_result.error().message() << std::endl;
      ASSERT_TRUE(false);
    }
    ASSERT_TRUE(query_result.has_value());
    ASSERT_EQ(query_result.value().size(), doc_count);

    float last_score;
    for (size_t i = 0; i < query_result.value().size(); i++) {
      auto pk = query_result.value()[i]->pk();
      auto score = query_result.value()[i]->score();
      std::cout << "top " << i << ": " << pk << ", score: " << score
                << std::endl;

      auto expect_doc =
          TestHelper::CreateDoc(TestHelper::ExtractDocId(pk), *schema);
      float expect_score;
      if (is_dense) {
        if (field_scheama->data_type() == DataType::VECTOR_FP16) {
          auto query_result_vector =
              expect_doc.get<std::vector<ailego::Float16>>(field_name);
          ASSERT_TRUE(query_result_vector.has_value());
          expect_score = distance_dense(
              vector_fp16, query_result_vector.value(), metric_type);
        } else if (field_scheama->data_type() == DataType::VECTOR_FP32) {
          auto query_result_vector =
              expect_doc.get<std::vector<float>>(field_name);
          ASSERT_TRUE(query_result_vector.has_value());
          expect_score =
              distance_dense(vector, query_result_vector.value(), metric_type);
        } else {
          auto query_result_vector =
              expect_doc.get<std::vector<int8_t>>(field_name);
          ASSERT_TRUE(query_result_vector.has_value());
          expect_score = distance_dense(
              vector_int8, query_result_vector.value(), metric_type);
        }
      } else {
        if (field_scheama->data_type() == DataType::SPARSE_VECTOR_FP32) {
          auto query_result_vector =
              expect_doc
                  .get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
                      field_name);
          ASSERT_TRUE(query_result_vector.has_value());
          expect_score =
              distance_sparse(sparse_vector, query_result_vector.value());
        } else {
          auto query_result_vector = expect_doc.get<
              std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
              field_name);
          ASSERT_TRUE(query_result_vector.has_value());
          expect_score =
              distance_sparse(sparse_vector_fp16, query_result_vector.value());
        }
      }
      std::cout.precision(8);
      std::cout << "score: " << score << ", expect_score: " << expect_score
                << std::endl;
      // ASSERT_FLOAT_EQ(score, expect_score);
      if (i > 0) {
        if (metric_type == MetricType::L2) {
          ASSERT_GE(score, last_score);
        } else if (metric_type == MetricType::IP) {
          ASSERT_LE(score, last_score);
        }
      }
      last_score = score;
    }

    auto new_schema = std::make_shared<CollectionSchema>(*schema);
    s = new_schema->add_index(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());


    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (metric_type != MetricType::COSINE) {
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    }

    collection.reset();

    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());

    collection = result.value();
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);

    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (metric_type != MetricType::COSINE) {
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    }

    // insert another 100 docs
    s = TestHelper::CollectionInsertDoc(collection, doc_count, doc_count + 100,
                                        false);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);
    ASSERT_FLOAT_EQ(collection->Stats().value().index_completeness[field_name],
                    doc_count * 1.0 / (doc_count + 100));

    s = collection->Flush();
    ASSERT_TRUE(s.ok());

    s = collection->CreateIndex(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);
    ASSERT_FLOAT_EQ(collection->Stats().value().index_completeness[field_name],
                    doc_count * 1.0 / (doc_count + 100));
  };

  func("dense_fp32", MetricType::L2);
  func("dense_fp32", MetricType::COSINE);
  func("dense_fp32", MetricType::IP);
  func("dense_fp32", MetricType::L2, QuantizeType::FP16);
  func("dense_fp32", MetricType::COSINE, QuantizeType::FP16);
  func("dense_fp32", MetricType::IP, QuantizeType::FP16);
  func("dense_fp16");
  func("dense_int8");
  func("sparse_fp32");
  func("sparse_fp16");
}

TEST_F(CollectionTest, Feature_CreateIndex_Scalar) {
  auto func = [&](std::string field_name, bool enable_optimize,
                  IndexParams::Ptr scalar_index_params = nullptr) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    auto schema =
        TestHelper::CreateNormalSchema(false, "demo", scalar_index_params);
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    auto index_params = std::make_shared<InvertIndexParams>(enable_optimize);
    auto s = collection->CreateIndex(field_name, index_params);
    std::cout << "status: " << s.message()
              << ", code: " << GetDefaultMessage(s.code()) << std::endl;
    ASSERT_TRUE(s.ok());

    auto new_schema = std::make_shared<CollectionSchema>(*schema);
    s = new_schema->add_index(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());

    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    collection.reset();

    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());

    collection = result.value();
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }

    // insert another 100 docs
    s = TestHelper::CollectionInsertDoc(collection, doc_count, doc_count + 100,
                                        false);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);
    ASSERT_FLOAT_EQ(
        collection->Stats().value().index_completeness["dense_fp32"], 1);

    s = collection->Flush();
    ASSERT_TRUE(s.ok());

    s = collection->CreateIndex(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(collection->Stats().value().doc_count, doc_count + 100);
    ASSERT_FLOAT_EQ(
        collection->Stats().value().index_completeness["dense_fp32"], 1);

    for (int i = 0; i < doc_count + 100; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  func("int32", true);
  func("int32", false);

  func("int32", false, std::make_shared<InvertIndexParams>(true));
  func("int32", true, std::make_shared<InvertIndexParams>(true));
}

TEST_F(CollectionTest, Feature_DropIndex_General) {
  // create empty collection
  auto schema = TestHelper::CreateSchemaWithVectorIndex();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1204};
  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                        options, 0, 0, false);

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  ASSERT_EQ(collection->Schema(), *schema);


  auto s = collection->DropIndex("dense_fp32_invalid");
  ASSERT_FALSE(s.ok());

  s = collection->DropIndex("dense_fp32");
  if (!s.ok()) {
    std::cout << "drop index err: " << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  s = collection->DropIndex("dense_fp32");
  ASSERT_TRUE(s.ok());

  auto new_schema = std::make_shared<CollectionSchema>(*schema);
  s = new_schema->drop_index("dense_fp32");
  ASSERT_TRUE(s.ok());
  ASSERT_EQ(*new_schema, collection->Schema());

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  ASSERT_EQ(*collection->Schema()
                 .value()
                 .get_vector_field("dense_fp32")
                 ->index_params(),
            DefaultVectorIndexParams);

  s = collection->DropIndex("dense_fp32");
  if (!s.ok()) {
    std::cout << "drop index err: " << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  auto schema1 = collection->Schema().value();

  collection.reset();

  auto result = Collection::Open(col_path, options);
  ASSERT_TRUE(result.has_value());

  collection = std::move(result.value());
  auto schema2 = collection->Schema().value();

  if (schema1 != schema2) {
    std::cout << "schema1: " << schema1.to_string_formatted() << std::endl;
    std::cout << "schema2: " << schema2.to_string_formatted() << std::endl;
  }
  ASSERT_EQ(schema1, schema2);

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
}

TEST_F(CollectionTest, Feature_DropIndex_Vector) {
  auto func = [&](const std::string &field_name, bool add_before_drop = true) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    // create empty collection
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 64 * 1024 * 1204};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);
    ASSERT_EQ(collection->Schema(), *schema);

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    // create index first
    auto index_params = std::make_shared<HnswIndexParams>(MetricType::IP);
    auto s = collection->CreateIndex(field_name, index_params);
    ASSERT_TRUE(s.ok());
    auto new_schema = std::make_shared<CollectionSchema>(*schema);
    s = new_schema->add_index(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);

    check_doc();
    std::cout << "check success 2" << std::endl;

    int new_doc_count = doc_count;
    if (add_before_drop) {
      new_doc_count += doc_count;
      s = TestHelper::CollectionInsertDoc(collection, doc_count, new_doc_count);
      ASSERT_TRUE(s.ok());
    }

    // then drop index field_name
    s = collection->DropIndex(field_name);
    ASSERT_TRUE(s.ok());
    check_doc();
    std::cout << "check success 3" << std::endl;
    s = new_schema->drop_index(field_name);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, new_doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);

    collection.reset();
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    check_doc();
    std::cout << "check success 3" << std::endl;
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, new_doc_count);
    ASSERT_EQ(stats.index_completeness[field_name], 1);
  };

  func("dense_fp32", true);
  func("dense_fp32", false);
  func("sparse_fp32");
}

TEST_F(CollectionTest, Feature_DropIndex_Scalar) {
  auto func = [&](std::string field_name, bool enable_optimize) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    auto schema =
        TestHelper::CreateSchemaWithScalarIndex(false, enable_optimize);
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);

    auto s = collection->DropIndex(field_name);
    ASSERT_TRUE(s.ok());

    auto new_schema = std::make_shared<CollectionSchema>(*schema);
    s = new_schema->drop_index(field_name);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());

    check_doc();
    std::cout << "check success 2" << std::endl;
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);

    collection.reset();
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    check_doc();
    std::cout << "check success 3" << std::endl;
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
  };

  func("int32", true);
  func("int32", false);
}

TEST_F(CollectionTest, Feature_DropIndex_AfterCreate) {
  auto func = [&](std::string field_name, bool enable_optimize) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);

    auto index_params = std::make_shared<InvertIndexParams>(enable_optimize);
    auto s = collection->CreateIndex(field_name, index_params);
    std::cout << "status: " << s.message()
              << ", code: " << GetDefaultMessage(s.code()) << std::endl;
    ASSERT_TRUE(s.ok());

    auto new_schema = std::make_shared<CollectionSchema>(*schema);
    s = new_schema->add_index(field_name, index_params);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());

    check_doc();
    std::cout << "check success 2" << std::endl;

    s = collection->DropIndex(field_name);
    ASSERT_TRUE(s.ok());
    check_doc();
    std::cout << "check success 3" << std::endl;
    s = new_schema->drop_index(field_name);
    ASSERT_TRUE(s.ok());
    ASSERT_EQ(*new_schema, collection->Schema());
    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
  };

  func("int32", true);
  func("int32", false);
}

TEST_F(CollectionTest, Feature_Optimize_General) {
  auto func = [](int concurrency) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    // create empty collection
    auto schema = TestHelper::CreateSchemaWithVectorIndex();
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    ASSERT_TRUE(collection->Flush().ok());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

    auto s = collection->Optimize(OptimizeOptions{concurrency});
    if (!s.ok()) {
      std::cout << s.message() << std::endl;
    }
    ASSERT_TRUE(s.ok());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    check_doc();
    std::cout << "check success 2" << std::endl;

    collection.reset();
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    check_doc();
    std::cout << "check success 3" << std::endl;
  };

  func(0);
  func(4);
}

TEST_F(CollectionTest, Feature_Optimize_Repeated) {
  int doc_count = 1000;

  // create empty collection
  auto schema = TestHelper::CreateSchemaWithVectorIndex();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  auto check_doc = [&]() {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      if (doc == nullptr) {
        std::cout << "doc is null, pk: " << expect_doc.pk() << std::endl;
      }
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  check_doc();
  std::cout << "check success 1" << std::endl;

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

  auto s = collection->Optimize();
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  int loop_count = 10;
  uint64_t start_doc_id = doc_count;
  for (int i = 0; i < loop_count; i++) {
    std::cout << "loop: " << i << " begin" << std::endl;

    s = TestHelper::CollectionInsertDoc(collection, start_doc_id,
                                        start_doc_id + 1);
    ASSERT_TRUE(s.ok());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count + i + 1);
    ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"],
                    1.0 * (doc_count + i) / (doc_count + i + 1));


    s = collection->Optimize();
    if (!s.ok()) {
      std::cout << "optimize failed: " << s.message() << std::endl;
    }
    ASSERT_TRUE(s.ok());

    start_doc_id += 1;

    std::cout << "loop: " << i << " end" << std::endl;
  }

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count + loop_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  doc_count += loop_count;
  check_doc();
  std::cout << "check success 2" << std::endl;
}

TEST_F(CollectionTest, Feature_Optimize_MetricType) {
  auto func = [&](MetricType metric_type,
                  QuantizeType quantize_type = QuantizeType::UNDEFINED) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    // create empty collection
    auto schema = TestHelper::CreateSchemaWithVectorIndex(
        false, "demo",
        std::make_shared<HnswIndexParams>(metric_type, 16, 200, quantize_type));
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (metric_type != MetricType::COSINE) {
          if (*doc != expect_doc) {
            std::cout << "       doc:" << doc->to_detail_string() << std::endl;
            std::cout << "expect_doc:" << expect_doc.to_detail_string()
                      << std::endl;
          }
          ASSERT_EQ(*doc, expect_doc);
        }
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    ASSERT_TRUE(collection->Flush().ok());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

    auto s = collection->Optimize();
    ASSERT_TRUE(s.ok());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    check_doc();
    std::cout << "check success 2" << std::endl;

    for (int i = 1; i < 2; i++) {
      auto query_doc = TestHelper::CreateDoc(i, *schema);
      // std::cout << query_doc.to_detail_string() << std::endl;

      VectorQuery query;
      query.topk_ = 10;
      query.include_vector_ = true;
      query.field_name_ = "dense_fp32";

      auto vector = query_doc.get<std::vector<float>>("dense_fp32");
      ASSERT_TRUE(vector.has_value());
      query.query_vector_.assign((char *)vector.value().data(),
                                 vector.value().size() * sizeof(float));


      auto result = collection->Query(query);
      if (!result.has_value()) {
        std::cout << "err: " << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));
    }
  };

  func(MetricType::L2);
  func(MetricType::COSINE);
  func(MetricType::IP);
  func(MetricType::L2, QuantizeType::FP16);
  func(MetricType::COSINE, QuantizeType::FP16);
  func(MetricType::IP, QuantizeType::FP16);
}

TEST_F(CollectionTest, Feature_Optimize_Delete) {
  int doc_count = 1000;

  // create empty collection
  auto schema = TestHelper::CreateSchemaWithVectorIndex();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  auto check_doc = [&]() {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  check_doc();
  std::cout << "check success 1" << std::endl;

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

  auto s = collection->Optimize();
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  check_doc();
  std::cout << "check success 2" << std::endl;

  // delete by filter
  s = collection->DeleteByFilter("int32 < 10");
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count - 10);

  // delete all docs
  std::vector<std::string> pks;
  for (int i = 10; i < doc_count; ++i) {
    pks.push_back(TestHelper::MakePK(i));
  }
  auto res = collection->Delete(pks);
  ASSERT_TRUE(res.has_value());
  for (auto &r : res.value()) {
    ASSERT_TRUE(r.ok());
  }

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  s = collection->Optimize();
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  collection.reset();
  auto result = Collection::Open(col_path, options);
  ASSERT_TRUE(result.has_value());
  collection = std::move(result.value());

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, 0);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);
}

TEST_F(CollectionTest, Feature_Optimize_NormalSchema) {
  int doc_count = 1000;

  // create empty collection
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  auto check_doc = [&]() {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  check_doc();
  std::cout << "check success 1" << std::endl;

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  auto s = collection->Optimize();
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  check_doc();
  std::cout << "check success 2" << std::endl;

  collection.reset();
  auto result = Collection::Open(col_path, options);
  ASSERT_TRUE(result.has_value());
  collection = std::move(result.value());

  check_doc();
  std::cout << "check success 3" << std::endl;
}

TEST_F(CollectionTest, Feature_Optimize_ExceedMaxDocCount) {
  auto func = [&](std::vector<int> segments_count, bool delete_all = false) {
    FileHelper::RemoveDirectory(col_path);

    int max_doc_per_count = 1000;

    // create empty collection
    auto schema = TestHelper::CreateNormalSchema(
        false, "demo", nullptr,
        std::make_shared<HnswIndexParams>(MetricType::IP), max_doc_per_count);
    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

    auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                          options, 0, 0, false);

    auto check_doc = [&](int doc_count) {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    int accu_seg_doc_count = 0;
    for (auto doc_count : segments_count) {
      auto s = TestHelper::CollectionInsertDoc(collection, accu_seg_doc_count,
                                               accu_seg_doc_count + doc_count);

      check_doc(accu_seg_doc_count + doc_count);
      std::cout << "check success 1" << std::endl;

      ASSERT_TRUE(collection->Flush().ok());
      auto stats = collection->Stats().value();
      ASSERT_EQ(stats.doc_count, accu_seg_doc_count + doc_count);
      ASSERT_FLOAT_EQ(
          stats.index_completeness["dense_fp32"],
          accu_seg_doc_count * 1.0 / (accu_seg_doc_count + doc_count));

      s = collection->Optimize();
      if (!s.ok()) {
        std::cout << s.message() << std::endl;
      }
      ASSERT_TRUE(s.ok());

      stats = collection->Stats().value();
      ASSERT_EQ(stats.doc_count, accu_seg_doc_count + doc_count);
      ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

      check_doc(accu_seg_doc_count + doc_count);
      std::cout << "check success 2" << std::endl;

      collection.reset();
      auto result = Collection::Open(col_path, options);
      ASSERT_TRUE(result.has_value());
      collection = std::move(result.value());

      check_doc(accu_seg_doc_count + doc_count);
      std::cout << "check success 3" << std::endl;

      accu_seg_doc_count += doc_count;
    }

    // delete all docs
    if (delete_all) {
      std::vector<std::string> pks;
      for (int i = 0; i < accu_seg_doc_count; ++i) {
        pks.push_back(TestHelper::MakePK(i));
      }
      auto res = collection->Delete(pks);
      ASSERT_TRUE(res.has_value());
      for (auto &r : res.value()) {
        ASSERT_TRUE(r.ok());
      }
    }

    auto s = collection->Optimize();
    if (!s.ok()) {
      std::cout << s.message() << std::endl;
    }
    ASSERT_TRUE(s.ok());

    if (delete_all) {
      check_doc(0);
    } else {
      check_doc(accu_seg_doc_count);
    }
    std::cout << "check success 3" << std::endl;

    auto stats = collection->Stats().value();
    if (delete_all) {
      ASSERT_EQ(stats.doc_count, 0);
    } else {
      ASSERT_EQ(stats.doc_count, accu_seg_doc_count);
    }
    ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 1.0);

    collection.reset();
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    stats = collection->Stats().value();
    if (delete_all) {
      ASSERT_EQ(stats.doc_count, 0);
    } else {
      ASSERT_EQ(stats.doc_count, accu_seg_doc_count);
    }
    ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 1.0);
  };

  func({600, 600});
  func({600, 400});
  func({600, 401});

  func({600, 600}, true);
  func({600, 400}, true);
  func({600, 401}, true);

  func(std::vector<int>(100, 1));
  func(std::vector<int>(100, 1), true);
}

TEST_F(CollectionTest, Feature_Optimize_Rebuild) {
  FileHelper::RemoveDirectory(col_path);

  int max_doc_per_count = 1000;

  // create empty collection
  auto schema = TestHelper::CreateNormalSchema(
      false, "demo", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),
      max_doc_per_count);
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

  // create seg1
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, max_doc_per_count, false);

  auto check_doc = [&](int doc_count, bool delete_half = false) {
    for (int i = 0; i < doc_count; i++) {
      if (delete_half) {
        if (i % 2 == 0) {
          continue;
        }
      }

      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

  // create seg2
  auto s = TestHelper::CollectionInsertDoc(
      collection, max_doc_per_count, max_doc_per_count + max_doc_per_count);
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count + max_doc_per_count);
  ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 0);

  // create seg3
  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count * 2,
                                      max_doc_per_count * 3);
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3);
  ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 0);

  check_doc(max_doc_per_count * 3);
  std::cout << "check success 1" << std::endl;

  // delete half
  std::vector<std::string> pks;
  for (int j = 0; j < 3 * max_doc_per_count; j++) {
    if (j % 2 == 0) {
      pks.push_back(TestHelper::MakePK(j));
    }
  }
  auto res = collection->Delete(pks);
  ASSERT_TRUE(res.has_value());
  for (auto &r : res.value()) {
    ASSERT_TRUE(r.ok());
  }

  s = collection->Optimize();
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  check_doc(max_doc_per_count * 3, true);
  std::cout << "check success 2" << std::endl;

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 1.5);
  ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 1);
}

TEST_F(CollectionTest, Feature_Optimize_IndexOperation) {
  FileHelper::RemoveDirectory(col_path);

  int max_doc_per_count = 1000;

  // create empty collection
  auto schema = TestHelper::CreateNormalSchema(
      false, "demo", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),
      max_doc_per_count);
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

  // create seg1
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, max_doc_per_count / 2, false);

  auto check_doc = [&](int doc_count) {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, *schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count / 2);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);
  auto s = collection->DropIndex("dense_fp32");
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count / 2);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  // create seg2
  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count / 2,
                                      max_doc_per_count);
  ASSERT_TRUE(s.ok());
  s = collection->CreateIndex(
      "dense_fp32", std::make_shared<HnswIndexParams>(MetricType::IP));
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  // create seg3
  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count,
                                      max_doc_per_count * 3 / 2);
  ASSERT_TRUE(s.ok());
  s = collection->DropIndex("dense_fp32");
  ASSERT_TRUE(s.ok());
  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);
  ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

  check_doc(max_doc_per_count * 3 / 2);
  std::cout << "check success 1" << std::endl;

  s = collection->Optimize();
  if (!s.ok()) {
    std::cout << s.message() << std::endl;
  }
  ASSERT_TRUE(s.ok());

  check_doc(max_doc_per_count * 3 / 2);
  std::cout << "check success 2" << std::endl;

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);
  ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 1);

  // reset collection
  collection.reset();
  auto result = Collection::Open(col_path, options);
  collection = std::move(result.value());

  check_doc(max_doc_per_count * 3 / 2);
  std::cout << "check success 2" << std::endl;

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);
  ASSERT_FLOAT_EQ(stats.index_completeness["dense_fp32"], 1);
}

TEST_F(CollectionTest, Feature_Optimize_Temp) {
  auto schema = TestHelper::CreateTempSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

  auto collection =
      TestHelper::CreateCollectionWithDoc(col_path, *schema, options, 0, 10);

  auto s = collection->Optimize(OptimizeOptions{1});
  ASSERT_TRUE(s.ok());
}

TEST_F(CollectionTest, Feature_Query_Validate) {
  FileHelper::RemoveDirectory(col_path);

  int doc_count = 1100;
  // create with normal schema
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                        options, 0, doc_count);

  ASSERT_NE(collection, nullptr);
  std::string field_name = "dense_fp32";
  auto query_doc = TestHelper::CreateDoc(1, *schema);

  {
    VectorQuery query;
    query.topk_ = 1024;
    query.field_name_ = field_name;

    auto field_scheama = schema->get_vector_field(field_name);
    ASSERT_NE(field_scheama, nullptr);
    ASSERT_TRUE(field_scheama->is_vector_field());

    if (field_scheama->is_dense_vector()) {
      auto vector = query_doc.get<std::vector<float>>(field_name);
      ASSERT_TRUE(vector.has_value());
      query.query_vector_.assign((char *)vector.value().data(),
                                 vector.value().size() * sizeof(float));
    } else {
      auto sparse_vector =
          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
              field_name);
      query.query_sparse_indices_.assign(
          (char *)sparse_vector.value().first.data(),
          sparse_vector.value().first.size() * sizeof(uint32_t));
      query.query_sparse_values_.assign(
          (char *)sparse_vector.value().second.data(),
          sparse_vector.value().second.size() * sizeof(float));
    }
    query.include_vector_ = true;

    auto result = collection->Query(query);
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), query.topk_);
  }

  {
    VectorQuery query;
    query.topk_ = 1025;
    query.field_name_ = field_name;

    auto field_scheama = schema->get_vector_field(field_name);
    ASSERT_NE(field_scheama, nullptr);
    ASSERT_TRUE(field_scheama->is_vector_field());

    if (field_scheama->is_dense_vector()) {
      auto vector = query_doc.get<std::vector<float>>(field_name);
      ASSERT_TRUE(vector.has_value());
      query.query_vector_.assign((char *)vector.value().data(),
                                 vector.value().size() * sizeof(float));
    } else {
      auto sparse_vector =
          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
              field_name);
      query.query_sparse_indices_.assign(
          (char *)sparse_vector.value().first.data(),
          sparse_vector.value().first.size() * sizeof(uint32_t));
      query.query_sparse_values_.assign(
          (char *)sparse_vector.value().second.data(),
          sparse_vector.value().second.size() * sizeof(float));
    }
    query.include_vector_ = true;

    auto result = collection->Query(query);
    ASSERT_FALSE(result.has_value());
    std::cout << result.error().message() << std::endl;
  }

  {
    VectorQuery query;
    query.topk_ = 1024;
    query.field_name_ = field_name;
    query.output_fields_ = std::make_optional<std::vector<std::string>>(
        std::vector<std::string>(1025));

    auto field_scheama = schema->get_vector_field(field_name);
    ASSERT_NE(field_scheama, nullptr);
    ASSERT_TRUE(field_scheama->is_vector_field());

    if (field_scheama->is_dense_vector()) {
      auto vector = query_doc.get<std::vector<float>>(field_name);
      ASSERT_TRUE(vector.has_value());
      query.query_vector_.assign((char *)vector.value().data(),
                                 vector.value().size() * sizeof(float));
    } else {
      auto sparse_vector =
          query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
              field_name);
      query.query_sparse_indices_.assign(
          (char *)sparse_vector.value().first.data(),
          sparse_vector.value().first.size() * sizeof(uint32_t));
      query.query_sparse_values_.assign(
          (char *)sparse_vector.value().second.data(),
          sparse_vector.value().second.size() * sizeof(float));
    }
    query.include_vector_ = true;

    auto result = collection->Query(query);
    ASSERT_FALSE(result.has_value());
    std::cout << result.error().message() << std::endl;
  }
}

TEST_F(CollectionTest, Feature_Query_General) {
  auto func = [&](std::string field_name) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;
    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_NE(collection, nullptr);

    auto stats = collection->Stats().value();
    std::cout << stats.to_string_formatted() << std::endl;

    // validate query result
    for (int i = 1; i < 2; i++) {
      auto query_doc = TestHelper::CreateDoc(i, *schema);
      // std::cout << query_doc.to_detail_string() << std::endl;

      VectorQuery query;
      query.topk_ = 10;
      query.field_name_ = field_name;

      auto field_scheama = schema->get_vector_field(field_name);
      ASSERT_NE(field_scheama, nullptr);
      ASSERT_TRUE(field_scheama->is_vector_field());

      if (field_scheama->is_dense_vector()) {
        auto vector = query_doc.get<std::vector<float>>(field_name);
        ASSERT_TRUE(vector.has_value());
        query.query_vector_.assign((char *)vector.value().data(),
                                   vector.value().size() * sizeof(float));
      } else {
        auto sparse_vector =
            query_doc.get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
                field_name);
        query.query_sparse_indices_.assign(
            (char *)sparse_vector.value().first.data(),
            sparse_vector.value().first.size() * sizeof(uint32_t));
        query.query_sparse_values_.assign(
            (char *)sparse_vector.value().second.data(),
            sparse_vector.value().second.size() * sizeof(float));
      }
      query.include_vector_ = true;

      auto result = collection->Query(query);
      if (!result.has_value()) {
        std::cout << "err: " << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), query.topk_);

      for (int j = 0; j < query.topk_; j++) {
        std::cout << "result[" << j
                  << "]:" << result.value()[j]->to_detail_string() << std::endl;
        auto expect_doc = TestHelper::CreateDoc(doc_count - 1 - j, *schema);
        if (*result.value()[j] != expect_doc) {
          std::cout << "       doc:" << result.value()[j]->to_detail_string()
                    << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*result.value()[j], expect_doc);
      }
    }
  };

  func("dense_fp32");
  func("sparse_fp32");
}

TEST_F(CollectionTest, Feature_Query_Empty) {
  auto func = [&](int doc_count, int topk) {
    FileHelper::RemoveDirectory(col_path);
    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_NE(collection, nullptr);

    auto stats = collection->Stats().value();
    std::cout << stats.to_string_formatted() << std::endl;

    // validate query result
    for (int i = 1; i < 2; i++) {
      auto query_doc = TestHelper::CreateDoc(i, *schema);
      // std::cout << query_doc.to_detail_string() << std::endl;

      VectorQuery query;
      query.topk_ = topk;
      query.include_vector_ = true;

      auto result = collection->Query(query);
      if (!result.has_value()) {
        std::cout << "err: " << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));

      auto fields_name = schema->all_field_names();
      for (int j = 0; j < std::min(query.topk_, doc_count); j++) {
        auto result_doc = result.value()[j];
        auto doc_fields_names = result_doc->field_names();
        ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));
      }
    }
  };

  func(1, 1);
  func(1, 2);
  func(1000, 1000);
  func(1000, 1001);
}

TEST_F(CollectionTest, Feature_Query_WithoutVector_CreateScalarIndex) {
  auto func = [&](int doc_count, int topk, std::string field,
                  IndexParams::Ptr index_params, std::string filter,
                  int expected_doc_count) {
    FileHelper::RemoveDirectory(col_path);
    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema();
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_NE(collection, nullptr);

    auto stats = collection->Stats().value();
    std::cout << stats.to_string_formatted() << std::endl;

    // validate query result
    VectorQuery query;
    query.topk_ = topk;
    query.include_vector_ = true;
    query.filter_ = filter;

    auto result = collection->Query(query);
    if (!result.has_value()) {
      std::cout << "err: " << result.error().message() << std::endl;
    }
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), expected_doc_count);

    // create index
    auto s = collection->CreateIndex(field, index_params);
    ASSERT_TRUE(s.ok());

    auto result2 = collection->Query(query);
    if (!result2.has_value()) {
      std::cout << "err: " << result2.error().message() << std::endl;
    }

    ASSERT_TRUE(result2.has_value());
    ASSERT_EQ(result2.value().size(), expected_doc_count);

    for (int j = 0; j < expected_doc_count; j++) {
      auto result1_doc = result2.value()[j];
      auto result2_doc = result2.value()[j];
      ASSERT_EQ(*result1_doc, *result2_doc);
    }
  };

  func(5, 20, "bool", std::make_shared<InvertIndexParams>(false), "bool=true",
       1);
  func(5, 20, "bool", std::make_shared<InvertIndexParams>(true), "bool =true",
       1);
  func(100, 20, "bool", std::make_shared<InvertIndexParams>(true),
       "bool = true", 10);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true), "int32 =1",
       1);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true), "int32 <1",
       1);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true),
       "int32 >= 1", 20);
  func(100, 20, "string", std::make_shared<InvertIndexParams>(true),
       "string = 'value_1'", 1);
  func(5, 20, "array_bool", std::make_shared<InvertIndexParams>(true),
       "array_bool contain_any (true)", 1);

  func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (1)", 1);
  func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (1,2)", 2);
  func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (0,1,2,3,4)", 5);
  func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (0,4)", 2);
  // func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
  //      "array_int32 contain_any ()", 0);

  func(10000, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (0)", 1);
  func(10000, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (9999)", 1);
  func(10000, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (10000)", 0);
  func(10000, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (-1)", 0);
}

TEST_F(CollectionTest, Feature_Query_WithoutVector_WithScalarIndex) {
  auto func = [&](int doc_count, int topk, std::string field,
                  IndexParams::Ptr index_params, std::string filter,
                  int expected_doc_count) {
    FileHelper::RemoveDirectory(col_path);
    // create with normal schema
    auto schema = TestHelper::CreateNormalSchema(false, "demo", index_params);
    auto options = CollectionOptions{false, true, 100 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count);

    ASSERT_NE(collection, nullptr);

    auto stats = collection->Stats().value();
    std::cout << stats.to_string_formatted() << std::endl;

    // validate query result
    VectorQuery query;
    query.topk_ = topk;
    query.include_vector_ = true;
    query.filter_ = filter;

    auto result = collection->Query(query);
    if (!result.has_value()) {
      std::cout << "err: " << result.error().message() << std::endl;
    }
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), expected_doc_count);
  };

  func(5, 20, "bool", std::make_shared<InvertIndexParams>(false), "bool=true",
       1);
  func(5, 20, "bool", std::make_shared<InvertIndexParams>(true), "bool =true",
       1);
  func(100, 20, "bool", std::make_shared<InvertIndexParams>(true),
       "bool = true", 10);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true), "int32 =1",
       1);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true), "int32 <1",
       1);
  func(100, 20, "int32", std::make_shared<InvertIndexParams>(true),
       "int32 >= 1", 20);
  func(5, 20, "array_bool", std::make_shared<InvertIndexParams>(true),
       "array_bool contain_any (true)", 1);
  func(5, 20, "array_int32", std::make_shared<InvertIndexParams>(true),
       "array_int32 contain_any (1)", 1);
}

TEST_F(CollectionTest, Feature_GroupByQuery) {}

TEST_F(CollectionTest, Feature_AddColumn_General) {
  // create collection
  int doc_count = 1000;
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);
  auto field_schema =
      std::make_shared<FieldSchema>("add_int32", DataType::INT32, false);
  auto s = collection->AddColumn(field_schema, "int32", AddColumnOptions());
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  auto new_schema = collection->Schema().value();
  ASSERT_TRUE(new_schema.has_field("add_int32"));

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);

  auto check_doc = [&](int doc_count) {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, new_schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  check_doc(doc_count);

  // validate query result
  for (int i = 1; i < 2; i++) {
    VectorQuery query;
    query.topk_ = 10;
    query.include_vector_ = true;

    auto result = collection->Query(query);
    if (!result.has_value()) {
      std::cout << "err: " << result.error().message() << std::endl;
    }
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));

    auto fields_name = new_schema.all_field_names();
    for (int j = 0; j < std::min(query.topk_, doc_count); j++) {
      auto result_doc = result.value()[j];
      auto doc_fields_names = result_doc->field_names();
      ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));
    }
  }
}

TEST_F(CollectionTest, Feature_AddColumn_CornerCase) {
  int doc_count = 1000;
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  {
    // create collection
    auto schema = TestHelper::CreateNormalSchema();
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
  }

  {
    // open collection and add invalid column
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto s = collection->AddColumn(nullptr, "int32", AddColumnOptions());
    ASSERT_FALSE(s.ok());

    s = collection->AddColumn(nullptr, "", AddColumnOptions());
    ASSERT_FALSE(s.ok());

    auto field_schema =
        std::make_shared<FieldSchema>("add_int32", DataType::INT32, false);
    s = collection->AddColumn(field_schema, "non_exist_field",
                              AddColumnOptions());
    ASSERT_FALSE(s.ok());
  }

  {
    // open collection and add one column
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto field_schema =
        std::make_shared<FieldSchema>("add_int32", DataType::INT32, false);
    auto s = collection->AddColumn(field_schema, "int32", AddColumnOptions());
    if (!s.ok()) {
      std::cout << "status: " << s.message() << std::endl;
      ASSERT_TRUE(false);
    }
    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("add_int32"));
  }

  {
    // open collection and insert more doc
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("add_int32"));

    for (int i = doc_count; i < doc_count * 2; i++) {
      auto doc = TestHelper::CreateDoc(i, new_schema);
      std::vector<Doc> docs = {doc};
      auto res = collection->Insert(docs);
      ASSERT_TRUE(res.has_value());
      ASSERT_TRUE(res.value()[0].ok());
    }
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);

    auto check_doc = [&](int doc_count) {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, new_schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc(doc_count * 2);
  }

  {
    // open collection and add one more column
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto field_schema =
        std::make_shared<FieldSchema>("add_int32_dup", DataType::INT32, false);
    auto s =
        collection->AddColumn(field_schema, "add_int32", AddColumnOptions());
    if (!s.ok()) {
      std::cout << "status: " << s.message() << std::endl;
      ASSERT_TRUE(false);
    }
    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("add_int32_dup"));
  }
}

TEST_F(CollectionTest, Feature_DropColumn_General) {
  // create collection
  int doc_count = 1000;
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);

  auto s = collection->DropColumn("int32");
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  auto new_schema = collection->Schema().value();
  ASSERT_TRUE(!new_schema.has_field("int32"));
}

TEST_F(CollectionTest, Feature_AlterColumn_General) {
  // create collection
  int doc_count = 1000;
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, doc_count, false);

  ASSERT_TRUE(collection->Flush().ok());
  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, doc_count);

  auto field_schema =
      std::make_shared<FieldSchema>("int32", DataType::INT64, false);
  auto s = collection->AlterColumn("int32", "int32", field_schema,
                                   AlterColumnOptions());
  ASSERT_FALSE(s.ok());

  s = collection->AlterColumn("int32", "", field_schema, AlterColumnOptions());
  ASSERT_TRUE(s.ok());

  auto new_schema = collection->Schema().value();
  ASSERT_TRUE(new_schema.has_field("int32"));
  ASSERT_TRUE(new_schema.get_field("int32")->data_type() == DataType::INT64);

  s = collection->AlterColumn("int32", "rename_in32", nullptr,
                              AlterColumnOptions());
  ASSERT_TRUE(s.ok());
  new_schema = collection->Schema().value();
  ASSERT_FALSE(new_schema.has_field("int32"));
  ASSERT_TRUE(new_schema.has_field("rename_in32"));
  ASSERT_TRUE(new_schema.get_field("rename_in32")->data_type() ==
              DataType::INT64);

  // validate query result
  for (int i = 1; i < 2; i++) {
    VectorQuery query;
    query.topk_ = 10;
    query.include_vector_ = true;

    auto result = collection->Query(query);
    if (!result.has_value()) {
      std::cout << "err: " << result.error().message() << std::endl;
    }
    ASSERT_TRUE(result.has_value());
    ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));

    auto fields_name = new_schema.all_field_names();
    for (int j = 0; j < std::min(query.topk_, doc_count); j++) {
      auto result_doc = result.value()[j];
      auto doc_fields_names = result_doc->field_names();
      ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));
    }
  }
}

TEST_F(CollectionTest, Feature_AlterColumn_CornerCase) {
  int doc_count = 1000;
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

  {
    // create collection
    auto schema = TestHelper::CreateNormalSchema();
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
  }

  {
    // open collection and alter column
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto field_schema =
        std::make_shared<FieldSchema>("int32_to_int64", DataType::INT64, false);
    auto s = collection->AlterColumn("int32", "", field_schema,
                                     AlterColumnOptions());
    ASSERT_TRUE(s.ok());

    auto new_schema = collection->Schema().value();
    ASSERT_FALSE(new_schema.has_field("int32"));
    ASSERT_TRUE(new_schema.has_field("int32_to_int64"));
    ASSERT_TRUE(new_schema.get_field("int32_to_int64")->data_type() ==
                DataType::INT64);
  }

  {
    // open collection and insert more doc
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto new_schema = collection->Schema().value();

    for (int i = doc_count; i < doc_count * 2; i++) {
      auto doc = TestHelper::CreateDoc(i, new_schema);
      std::vector<Doc> docs = {doc};
      auto res = collection->Insert(docs);
      ASSERT_TRUE(res.has_value());
      ASSERT_TRUE(res.value()[0].ok());
    }
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count * 2);

    auto check_doc = [&](int doc_count) {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, new_schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc(doc_count * 2);

    // validate query result
    for (int i = 1; i < 2; i++) {
      VectorQuery query;
      query.topk_ = 10;
      query.include_vector_ = true;

      auto result = collection->Query(query);
      if (!result.has_value()) {
        std::cout << "err: " << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), std::min(query.topk_, doc_count));

      auto fields_name = new_schema.all_field_names();
      for (int j = 0; j < std::min(query.topk_, doc_count); j++) {
        auto result_doc = result.value()[j];
        auto doc_fields_names = result_doc->field_names();
        ASSERT_TRUE(vectors_equal_when_sorted(fields_name, doc_fields_names));
      }
    }
  }
}

TEST_F(CollectionTest, Feature_Column_MixOperation) {
  int max_doc_per_count = 1000;
  // create empty collection
  auto schema = TestHelper::CreateNormalSchema(
      false, "demo", nullptr, std::make_shared<HnswIndexParams>(MetricType::IP),
      max_doc_per_count);
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};

  // create seg1
  auto collection = TestHelper::CreateCollectionWithDoc(
      col_path, *schema, options, 0, max_doc_per_count, false);

  // create seg2
  auto s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count,
                                           max_doc_per_count * 3 / 2);

  // add column
  auto field_schema =
      std::make_shared<FieldSchema>("add_int32", DataType::INT32, false);
  s = collection->AddColumn(field_schema, "int32", AddColumnOptions());
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  auto new_schema = collection->Schema().value();
  ASSERT_TRUE(new_schema.has_field("add_int32"));

  auto stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);

  // drop column
  s = collection->DropColumn("uint32");
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  new_schema = collection->Schema().value();
  ASSERT_TRUE(!new_schema.has_field("uint32"));

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);

  // alter column
  s = collection->AlterColumn("int32", "rename_int32", nullptr,
                              AlterColumnOptions());
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  new_schema = collection->Schema().value();
  ASSERT_TRUE(new_schema.has_field("rename_int32"));

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 3 / 2);

  // create seg3
  s = TestHelper::CollectionInsertDoc(collection, max_doc_per_count * 3 / 2,
                                      max_doc_per_count * 5 / 2);

  stats = collection->Stats().value();
  ASSERT_EQ(stats.doc_count, max_doc_per_count * 5 / 2);

  // drop column
  s = collection->DropColumn("rename_int32");
  if (!s.ok()) {
    std::cout << "status: " << s.message() << std::endl;
    ASSERT_TRUE(false);
  }
  new_schema = collection->Schema().value();
  ASSERT_TRUE(!new_schema.has_field("rename_int32"));


  auto check_doc = [&](int doc_count) {
    for (int i = 0; i < doc_count; i++) {
      auto expect_doc = TestHelper::CreateDoc(i, new_schema);
      auto result = collection->Fetch({expect_doc.pk()});
      ASSERT_TRUE(result.has_value());
      ASSERT_EQ(result.value().size(), 1);
      ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
      auto doc = result.value()[expect_doc.pk()];
      ASSERT_NE(doc, nullptr);
      if (*doc != expect_doc) {
        std::cout << "       doc:" << doc->to_detail_string() << std::endl;
        std::cout << "expect_doc:" << expect_doc.to_detail_string()
                  << std::endl;
      }
      ASSERT_EQ(*doc, expect_doc);
    }
  };

  check_doc(max_doc_per_count * 5 / 2);
}

TEST_F(CollectionTest, Feature_Column_MixOperation_Empty) {
  int doc_count = 0;
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  {
    // create empty collection
    auto schema = TestHelper::CreateNormalSchema();
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    ASSERT_TRUE(collection->Flush().ok());

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
  }

  {
    // open collection and do mix operation
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    // add column
    auto field_schema =
        std::make_shared<FieldSchema>("add_int32", DataType::INT32, false);
    auto s = collection->AddColumn(field_schema, "int32", AddColumnOptions());
    ASSERT_TRUE(s.ok());

    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("add_int32"));

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);
  }

  {
    // open collection and do mix operation
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("add_int32"));

    // alter column
    auto s = collection->AlterColumn("add_int32", "rename_int32", nullptr,
                                     AlterColumnOptions());
    ASSERT_TRUE(s.ok());

    new_schema = collection->Schema().value();
    ASSERT_FALSE(new_schema.has_field("add_int32"));
    ASSERT_TRUE(new_schema.has_field("rename_int32"));

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);
  }

  {
    // open collection and do mix operation
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();

    auto new_schema = collection->Schema().value();
    ASSERT_TRUE(new_schema.has_field("rename_int32"));

    // drop column
    auto s = collection->DropColumn("rename_int32");
    ASSERT_TRUE(s.ok());
    new_schema = collection->Schema().value();
    ASSERT_FALSE(new_schema.has_field("rename_int32"));

    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, 0);
  }
}

#if RABITQ_SUPPORTED
TEST_F(CollectionTest, Feature_Optimize_HNSW_RABITQ) {
  auto func = [](MetricType metric_type, int concurrency) {
    FileHelper::RemoveDirectory(col_path);

    int doc_count = 1000;

    // create simple schema with only FP32 dense vector for HNSW_RABITQ
    auto schema = std::make_shared<CollectionSchema>("demo");
    schema->set_max_doc_count_per_segment(MAX_DOC_COUNT_PER_SEGMENT);

    auto hnsw_rabitq_params = std::make_shared<HnswRabitqIndexParams>(
        metric_type, 7, 256, 16, 200, 0);
    schema->add_field(std::make_shared<FieldSchema>(
        "dense_fp32", DataType::VECTOR_FP32, 128, false, hnsw_rabitq_params));

    auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
    auto collection = TestHelper::CreateCollectionWithDoc(
        col_path, *schema, options, 0, doc_count, false);

    auto check_doc = [&]() {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = TestHelper::CreateDoc(i, *schema);
        auto result = collection->Fetch({expect_doc.pk()});
        ASSERT_TRUE(result.has_value());
        ASSERT_EQ(result.value().size(), 1);
        ASSERT_EQ(result.value().count(expect_doc.pk()), 1);
        auto doc = result.value()[expect_doc.pk()];
        ASSERT_NE(doc, nullptr);
        if (*doc != expect_doc) {
          std::cout << "       doc:" << doc->to_detail_string() << std::endl;
          std::cout << "expect_doc:" << expect_doc.to_detail_string()
                    << std::endl;
        }
        ASSERT_EQ(*doc, expect_doc);
      }
    };

    check_doc();
    std::cout << "check success 1" << std::endl;

    ASSERT_TRUE(collection->Flush().ok());
    auto stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 0);

    auto s = collection->Optimize(OptimizeOptions{concurrency});
    if (!s.ok()) {
      std::cout << s.message() << std::endl;
    }
    ASSERT_TRUE(s.ok());

    stats = collection->Stats().value();
    ASSERT_EQ(stats.doc_count, doc_count);
    ASSERT_EQ(stats.index_completeness["dense_fp32"], 1);

    check_doc();
    std::cout << "check success 2" << std::endl;

    collection.reset();
    auto result = Collection::Open(col_path, options);
    ASSERT_TRUE(result.has_value());
    collection = std::move(result.value());

    check_doc();
    std::cout << "check success 3" << std::endl;
  };

  func(MetricType::L2, 0);
  func(MetricType::L2, 4);
  func(MetricType::IP, 0);
  func(MetricType::IP, 4);
  // TODO: cosine dense not match, may be accuracy issue
  // func(MetricType::COSINE, 0);
  // func(MetricType::COSINE, 4);
}
#endif

// **** CORNER CASES **** //
TEST_F(CollectionTest, CornerCase_CreateAndOpen) {
  // Collection::CreateAndOpen
  {
    {
      std::cout << "Collection::CreateAndOpen case 1" << std::endl;
      // create collection with non-exist path with read-only mode
      auto schema = TestHelper::CreateNormalSchema();
      auto result = Collection::CreateAndOpen("non-exist-path", *schema,
                                              CollectionOptions{true, false});
      ASSERT_FALSE(result.has_value());
    }

    {
      std::cout << "Collection::CreateAndOpen case 2" << std::endl;
      // create collection with exist path
      auto schema = TestHelper::CreateNormalSchema();
      FileHelper::CreateDirectory("invalid_path");
      auto result = Collection::CreateAndOpen("invalid_path", *schema,
                                              CollectionOptions{true, true});
      ASSERT_FALSE(result.has_value());
      FileHelper::RemoveDirectory("invalid_path");
    }

    {
      std::cout << "Collection::CreateAndOpen case 3" << std::endl;
      FileHelper::RemoveDirectory("invalid_path");
      // create collection with exist path
      auto schema = TestHelper::CreateNormalSchema();

      auto result = Collection::CreateAndOpen("invalid_path", *schema,
                                              CollectionOptions{false, true});
      if (!result.has_value()) {
        std::cout << result.error().message() << std::endl;
      }
      ASSERT_TRUE(result.has_value());

      std::cout << "Collection::Open again" << std::endl;
      auto new_result = Collection::Open("invalid_path", CollectionOptions{});
      ASSERT_FALSE(new_result.has_value());

      result.value().reset();
      // FileHelper::RemoveDirectory("invalid_path");
    }

    {
      std::cout << "Collection::CreateAndOpen case 4" << std::endl;
      FileHelper::RemoveDirectory(col_path);
      // abnormal schema
      auto schema = TestHelper::CreateNormalSchema(
          false, "demo", std::make_shared<FlatIndexParams>(MetricType::IP));
      auto result = Collection::CreateAndOpen(col_path, *schema,
                                              CollectionOptions{false, true});
      ASSERT_FALSE(result.has_value());
      ASSERT_EQ(result.error().code(), StatusCode::INVALID_ARGUMENT);
      std::cout << result.error().message() << std::endl;
    }

    {
      std::cout << "Collection::CreateAndOpen case 5" << std::endl;
      FileHelper::RemoveDirectory(col_path);
      // abnormal schema
      auto schema = TestHelper::CreateScalarSchema();
      auto result = Collection::CreateAndOpen(col_path, *schema,
                                              CollectionOptions{false, true});
      ASSERT_FALSE(result.has_value());
      ASSERT_EQ(result.error().code(), StatusCode::INVALID_ARGUMENT);
      std::cout << result.error().message() << std::endl;
    }
  }

  {
    std::cout << "Collection::CreateAndOpen case 6" << std::endl;
    FileHelper::RemoveDirectory(col_path);
    auto schema = TestHelper::CreateNormalSchema();

    // start N threas to create_and_open collection
    std::vector<std::thread> threads;
    std::mutex mtx;
    std::vector<Status> statuses;
    for (int i = 0; i < 10; i++) {
      threads.emplace_back([&]() {
        auto result = Collection::CreateAndOpen(col_path, *schema,
                                                CollectionOptions{false, true});
        if (!result.has_value()) {
          std::cout << result.error().message() << std::endl;
          std::lock_guard<std::mutex> lck(mtx);
          statuses.emplace_back(result.error());
        }
      });
    }

    for (auto &t : threads) {
      t.join();
    }

    ASSERT_EQ(statuses.size(), 9);
  }

  // Collection::Open
  {
    {
      std::cout << "Collection::Open case 1" << std::endl;
      // open collection with non-exist path
      auto result = Collection::Open("non-exist-path", CollectionOptions{});
      ASSERT_FALSE(result.has_value());
    }

    {
      std::cout << "Collection::Open case 2" << std::endl;
      // open collection with invalid path which contains no manifest
      FileHelper::RemoveDirectory("invalid_path");
      FileHelper::CreateDirectory("invalid_path");
      auto result = Collection::Open("invalid_path", CollectionOptions{});
      ASSERT_FALSE(result.has_value());
      FileHelper::RemoveDirectory("invalid_path");
    }
  }
}

TEST_F(CollectionTest, CornerCase_CreateIndex) {
  auto schema = TestHelper::CreateNormalSchema();
  auto options = CollectionOptions{false, true, 64 * 1024 * 1024};
  auto collection = TestHelper::CreateCollectionWithDoc(col_path, *schema,
                                                        options, 0, 0, false);

  // create index on non-exist field
  auto s = collection->CreateIndex(
      "non-exist", std::make_shared<FlatIndexParams>(MetricType::IP));
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::NOT_FOUND);

  s = collection->DropIndex("non-exist");
  ASSERT_EQ(s.code(), StatusCode::NOT_FOUND);

  // create vector index on scalar field
  s = collection->CreateIndex(
      "uint32", std::make_shared<FlatIndexParams>(MetricType::IP));
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  // create scalar index on vector field
  s = collection->CreateIndex("dense_fp32",
                              std::make_shared<InvertIndexParams>(true));
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  // create scalar index on sparse vector field
  s = collection->CreateIndex("sparse_fp32",
                              std::make_shared<InvertIndexParams>(true));
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  // create Ivf index on vector field
  s = collection->CreateIndex("sparse_fp32",
                              std::make_shared<IVFIndexParams>(MetricType::IP));
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
}

================================================
FILE: tests/db/common/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(APPLE)
    set(APPLE_FRAMEWORK_LIBS
        -framework CoreFoundation
        -framework CoreGraphics
        -framework CoreData
        -framework CoreText
        -framework Security
        -framework Foundation
        -Wl,-U,_MallocExtension_ReleaseFreeMemory
        -Wl,-U,_ProfilerStart
        -Wl,-U,_ProfilerStop
        -Wl,-U,_RegisterThriftProtocol
    )
endif()

file(GLOB ALL_TEST_SRCS *_test.cc)
foreach(CC_SRCS ${ALL_TEST_SRCS})
    get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
    cc_gmock(
        NAME ${CC_TARGET} STRICT
        LIBS zvec_common
        ${CMAKE_THREAD_LIBS_INIT}
        ${CMAKE_DL_LIBS}
        SRCS ${CC_SRCS}
        INCS .. ../../src
        LDFLAGS ${APPLE_FRAMEWORK_LIBS}
    )
    cc_test_suite(zvec_common ${CC_TARGET})
endforeach()


================================================
FILE: tests/db/common/config_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/config.h"
#include <gtest/gtest.h>
#include "zvec/db/status.h"

using namespace zvec;

class ConfigTest : public ::testing::Test {
 protected:
  void SetUp() override {
    // Reset GlobalConfig for each test
    // Note: Since GlobalConfig is a singleton and uses atomic flag,
    // we cannot easily reset it. In a real test environment, you might
    // need to use a testing framework that supports fixture reset or
    // modify the GlobalConfig to support reset for testing purposes.
  }
};

TEST_F(ConfigTest, InitializeWithDefaultConfig) {
  GlobalConfig::ConfigData config;

  // Test initialization with default config
  auto status = GlobalConfig::Instance().Initialize(config);
  ASSERT_TRUE(status.ok()) << "Initialization failed: " << status.message();

  // Verify default values
  ASSERT_GT(GlobalConfig::Instance().memory_limit_bytes(), 0);
  ASSERT_EQ(GlobalConfig::Instance().log_level(), GlobalConfig::LogLevel::WARN);
  ASSERT_EQ(GlobalConfig::Instance().log_type(), "ConsoleLogger");
  ASSERT_GT(GlobalConfig::Instance().query_thread_count(), 0);
  ASSERT_EQ(GlobalConfig::Instance().invert_to_forward_scan_ratio(), 0.9f);
  ASSERT_EQ(GlobalConfig::Instance().brute_force_by_keys_ratio(), 0.1f);
  ASSERT_GT(GlobalConfig::Instance().optimize_thread_count(), 0);
}

TEST_F(ConfigTest, InitializeWithCustomConsoleLogConfig) {
  GlobalConfig::ConfigData config;
  config.log_config = std::make_shared<GlobalConfig::ConsoleLogConfig>(
      GlobalConfig::LogLevel::DEBUG);
  config.memory_limit_bytes = 1024 * 1024 * 1024;  // 1GB
  config.query_thread_count = 4;
  config.optimize_thread_count = 2;

  auto status = GlobalConfig::Instance().Initialize(config);
  // First initialization should succeed
  if (status.code() == StatusCode::INVALID_ARGUMENT &&
      status.message().find("already initialized") != std::string::npos) {
    // If already initialized, skip this test
    GTEST_SKIP() << "GlobalConfig already initialized";
  }
}

TEST_F(ConfigTest, InitializeWithCustomFileLogConfig) {
  GlobalConfig::ConfigData config;
  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>(
      GlobalConfig::LogLevel::INFO, "/tmp/logs", "test.log", 1024, 14);
  config.log_config = file_config;
  config.memory_limit_bytes = 2 * 1024 * 1024 * 1024ULL;  // 2GB
  config.query_thread_count = 8;
  config.optimize_thread_count = 4;

  auto status = GlobalConfig::Instance().Initialize(config);
  // First initialization should succeed
  if (status.code() == StatusCode::INVALID_ARGUMENT &&
      status.message().find("already initialized") != std::string::npos) {
    // If already initialized, skip this test
    GTEST_SKIP() << "GlobalConfig already initialized";
  }
}

TEST_F(ConfigTest, DoubleInitializationSilentlyFails) {
  GlobalConfig::ConfigData config;

  auto status1 = GlobalConfig::Instance().Initialize(config);
  // If first initialization failed due to already being initialized
  if (status1.code() == StatusCode::INVALID_ARGUMENT &&
      status1.message().find("already initialized") != std::string::npos) {
    // Try again with a fresh config
    auto status2 = GlobalConfig::Instance().Initialize(config);
    ASSERT_FALSE(status2.ok());
    ASSERT_EQ(status2.code(), StatusCode::INVALID_ARGUMENT);
    ASSERT_NE(status2.message().find("already initialized"), std::string::npos);
  } else {
    // First initialization succeeded, second should fail
    ASSERT_TRUE(status1.ok());

    // The second initialization is allowed but becomes a no-op
    auto status2 = GlobalConfig::Instance().Initialize(config);
    ASSERT_TRUE(status2.ok());
  }
}

TEST_F(ConfigTest, ValidateConfigWithInvalidMemoryLimit) {
  GlobalConfig::ConfigData config;
  config.memory_limit_bytes = 0;  // Invalid value

  GlobalConfig
      config_instance;  // Create a local instance for testing validation
  auto status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("memory_limit_bytes must be greater than"),
            std::string::npos);
}

TEST_F(ConfigTest, ValidateConfigWithInvalidQueryThreadCount) {
  GlobalConfig::ConfigData config;
  config.query_thread_count = 0;  // Invalid value

  GlobalConfig config_instance;
  auto status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("query_thread_count must be greater than 0"),
            std::string::npos);
}

TEST_F(ConfigTest, ValidateConfigWithInvalidRatios) {
  GlobalConfig::ConfigData config;

  // Test invalid invert_to_forward_scan_ratio
  config.invert_to_forward_scan_ratio = -0.1f;
  GlobalConfig config_instance;
  auto status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find(
                "invert_to_forward_scan_ratio must be between 0 and 1"),
            std::string::npos);

  // Test invalid brute_force_by_keys_ratio
  config.invert_to_forward_scan_ratio = 0.9f;  // Reset to valid value
  config.brute_force_by_keys_ratio = 1.5f;     // Invalid value
  status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find(
                "brute_force_by_keys_ratio must be between 0 and 1"),
            std::string::npos);
}

TEST_F(ConfigTest, ValidateConfigWithInvalidFileLogSettings) {
  GlobalConfig::ConfigData config;

  // Test with empty log directory
  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>();
  file_config->dir = "";
  config.log_config = file_config;

  GlobalConfig config_instance;
  auto status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("log_dir cannot be empty"),
            std::string::npos);

  // Test with empty basename
  file_config->dir = "/tmp/logs";
  file_config->basename = "";
  status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("log_file basename cannot be empty"),
            std::string::npos);

  // Test with invalid file size
  file_config->basename = "test.log";
  file_config->file_size = 0;
  status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("log file_size must be greater than"),
            std::string::npos);

  // Test with invalid overdue days
  file_config->file_size = 1024;
  file_config->overdue_days = 0;
  status = config_instance.Validate(config);
  ASSERT_FALSE(status.ok());
  ASSERT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  ASSERT_NE(status.message().find("log_overdue_days must be greater than 0"),
            std::string::npos);
}

TEST_F(ConfigTest, LogLevelEnumValues) {
  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::DEBUG), 0);
  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::INFO), 1);
  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::WARN), 2);
  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::ERROR), 3);
  ASSERT_EQ(static_cast<int>(GlobalConfig::LogLevel::FATAL), 4);
}

TEST_F(ConfigTest, LogConfigPolymorphism) {
  auto console_config = std::make_shared<GlobalConfig::ConsoleLogConfig>();
  auto file_config = std::make_shared<GlobalConfig::FileLogConfig>();

  ASSERT_EQ(console_config->GetLoggerType(), CONSOLE_LOG_TYPE_NAME);
  ASSERT_EQ(file_config->GetLoggerType(), FILE_LOG_TYPE_NAME);
}

================================================
FILE: tests/db/common/status_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/status.h"
#include <gtest/gtest.h>

using namespace zvec;

TEST(StatusTest, DefaultConstructor) {
  Status status;
  EXPECT_TRUE(status.ok());
  EXPECT_EQ(status.code(), StatusCode::OK);
  EXPECT_EQ(status.message(), "");
}

TEST(StatusTest, ConstructorWithCodeAndMessage) {
  std::string msg = "Test error message";
  Status status(StatusCode::INVALID_ARGUMENT, msg);

  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  EXPECT_EQ(status.message(), msg);
}

TEST(StatusTest, ConstructorWithRvalueMessage) {
  std::string msg = "Test error message";
  Status status(StatusCode::NOT_FOUND, std::move(msg));

  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), StatusCode::NOT_FOUND);
  EXPECT_EQ(status.message(), "Test error message");
}

TEST(StatusTest, CopyConstructor) {
  Status original(StatusCode::INTERNAL_ERROR, "Copy test");
  Status copy(original);

  EXPECT_FALSE(copy.ok());
  EXPECT_EQ(copy.code(), StatusCode::INTERNAL_ERROR);
  EXPECT_EQ(copy.message(), "Copy test");
  EXPECT_EQ(original.code(), copy.code());
  EXPECT_EQ(original.message(), copy.message());
}

TEST(StatusTest, CopyAssignment) {
  Status original(StatusCode::PERMISSION_DENIED, "Assignment test");
  Status assigned;
  assigned = original;

  EXPECT_FALSE(assigned.ok());
  EXPECT_EQ(assigned.code(), StatusCode::PERMISSION_DENIED);
  EXPECT_EQ(assigned.message(), "Assignment test");
}

TEST(StatusTest, MoveConstructor) {
  Status original(StatusCode::RESOURCE_EXHAUSTED, "Move test");
  Status moved(std::move(original));

  EXPECT_FALSE(moved.ok());
  EXPECT_EQ(moved.code(), StatusCode::RESOURCE_EXHAUSTED);
  EXPECT_EQ(moved.message(), "Move test");
}

TEST(StatusTest, MoveAssignment) {
  Status original(StatusCode::UNAVAILABLE, "Move assignment test");
  Status moved;
  moved = std::move(original);

  EXPECT_FALSE(moved.ok());
  EXPECT_EQ(moved.code(), StatusCode::UNAVAILABLE);
  EXPECT_EQ(moved.message(), "Move assignment test");
}

TEST(StatusTest, ComparisonOperators) {
  Status status1(StatusCode::INVALID_ARGUMENT, "Error 1");
  Status status2(StatusCode::INVALID_ARGUMENT, "Error 1");
  Status status3(StatusCode::NOT_FOUND, "Error 2");
  Status ok1;
  Status ok2;

  EXPECT_TRUE(status1 == status2);
  EXPECT_FALSE(status1 == status3);
  EXPECT_TRUE(ok1 == ok2);
  EXPECT_FALSE(status1 == ok1);

  EXPECT_FALSE(status1 != status2);
  EXPECT_TRUE(status1 != status3);
  EXPECT_FALSE(ok1 != ok2);
  EXPECT_TRUE(status1 != ok1);
}

TEST(StatusTest, FactoryMethods) {
  auto invalid_arg = Status::InvalidArgument("Invalid arg: ", 42);
  EXPECT_FALSE(invalid_arg.ok());
  EXPECT_EQ(invalid_arg.code(), StatusCode::INVALID_ARGUMENT);
  EXPECT_FALSE(invalid_arg.message().empty());

  auto not_found = Status::NotFound("Not found: ", "key");
  EXPECT_FALSE(not_found.ok());
  EXPECT_EQ(not_found.code(), StatusCode::NOT_FOUND);
  EXPECT_FALSE(not_found.message().empty());

  auto already_exists = Status::AlreadyExists("Already exists: ", "item");
  EXPECT_FALSE(already_exists.ok());
  EXPECT_EQ(already_exists.code(), StatusCode::ALREADY_EXISTS);
  EXPECT_FALSE(already_exists.message().empty());

  auto internal_error = Status::InternalError("Internal error: ", "details");
  EXPECT_FALSE(internal_error.ok());
  EXPECT_EQ(internal_error.code(), StatusCode::INTERNAL_ERROR);
  EXPECT_FALSE(internal_error.message().empty());

  auto permission_denied =
      Status::PermissionDenied("Permission denied for: ", "resource");
  EXPECT_FALSE(permission_denied.ok());
  EXPECT_EQ(permission_denied.code(), StatusCode::PERMISSION_DENIED);
  EXPECT_FALSE(permission_denied.message().empty());
}

TEST(StatusTest, OKFactory) {
  auto ok = Status::OK();
  EXPECT_TRUE(ok.ok());
  EXPECT_EQ(ok.code(), StatusCode::OK);
  EXPECT_EQ(ok.message(), "");
}

TEST(StatusTest, CStringConversion) {
  Status status(StatusCode::UNKNOWN, "C string test");
  EXPECT_STREQ(status.c_str(), "C string test");

  Status ok_status;
  EXPECT_STREQ(ok_status.c_str(), "");
}

TEST(StatusTest, OutputStreamOperator) {
  Status status(StatusCode::INVALID_ARGUMENT, "Stream test");
  std::ostringstream oss;
  oss << status;
  EXPECT_FALSE(oss.str().empty());
  EXPECT_NE(oss.str().find(GetDefaultMessage(StatusCode::INVALID_ARGUMENT)),
            std::string::npos);
  EXPECT_NE(oss.str().find("Stream test"), std::string::npos);

  Status ok_status;
  std::ostringstream oss2;
  oss2 << ok_status;
  EXPECT_FALSE(oss2.str().empty());
  EXPECT_NE(oss2.str().find("OK"), std::string::npos);
}

================================================
FILE: tests/db/crash_recovery/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(APPLE)
  set(APPLE_FRAMEWORK_LIBS
    -framework CoreFoundation
    -framework CoreGraphics
    -framework CoreData
    -framework CoreText
    -framework Security
    -framework Foundation
    -Wl,-U,_MallocExtension_ReleaseFreeMemory
    -Wl,-U,_ProfilerStart
    -Wl,-U,_ProfilerStop
    -Wl,-U,_RegisterThriftProtocol
  )
endif()


# Build data_generator executable
cc_binary(
    NAME data_generator
    LIBS zvec_db
    zvec_proto
    core_knn_flat
    core_knn_flat_sparse
    core_knn_hnsw
    core_knn_hnsw_sparse
    core_knn_ivf
    core_knn_hnsw_rabitq
    core_mix_reducer
    core_metric
    core_utility
    core_quantizer
    ${CMAKE_THREAD_LIBS_INIT}
    ${CMAKE_DL_LIBS}
    SRCS data_generator.cc
    INCS .. ../../src
    LDFLAGS ${APPLE_FRAMEWORK_LIBS}
)


# Build test executables
file(GLOB ALL_TEST_SRCS *_test.cc)
foreach(CC_SRCS ${ALL_TEST_SRCS})
    get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
    cc_gmock(
        NAME ${CC_TARGET} STRICT
        LIBS zvec_db
        zvec_proto
        core_knn_flat
        core_knn_flat_sparse
        core_knn_hnsw
        core_knn_hnsw_sparse
        core_knn_ivf
        core_knn_hnsw_rabitq
        core_mix_reducer
        core_metric
        core_utility
        core_quantizer
        ${CMAKE_THREAD_LIBS_INIT}
        ${CMAKE_DL_LIBS}
        SRCS ${CC_SRCS}
        INCS .. ../../src
        LDFLAGS ${APPLE_FRAMEWORK_LIBS}
    )
    add_dependencies(${CC_TARGET} data_generator)
    cc_test_suite(zvec_crash_recovery ${CC_TARGET})
endforeach()


================================================
FILE: tests/db/crash_recovery/data_generator.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <unistd.h>
#include <filesystem>
#include <thread>
#include <zvec/db/collection.h>
#include "zvec/ailego/logger/logger.h"
#include "utility.h"


constexpr int kBatchSize = 20;
constexpr int kBatchDelayMs = 10;


struct Config {
  std::string path;
  int start_id = 0;
  int end_id = 0;
  std::string operation;  // "insert", "upsert", "update", "delete"
  int version = 999999;
};


bool ParseArgs(int argc, char **argv, Config &config) {
  for (int i = 1; i < argc; i++) {
    std::string arg = argv[i];

    if (arg == "--path" && i + 1 < argc) {
      config.path = argv[++i];
    } else if (arg == "--start" && i + 1 < argc) {
      config.start_id = std::stoi(argv[++i]);
    } else if (arg == "--end" && i + 1 < argc) {
      config.end_id = std::stoi(argv[++i]);
    } else if (arg == "--op" && i + 1 < argc) {
      config.operation = argv[++i];
    } else if (arg == "--version" && i + 1 < argc) {
      config.version = std::stoi(argv[++i]);
    } else if (arg == "--help" || arg == "-h") {
      return false;
    }
  }

  // Validate required arguments
  if (config.path.empty() || config.operation.empty() ||
      config.start_id >= config.end_id || config.version == 999999) {
    return false;
  }

  // Validate operation
  if (config.operation != "insert" && config.operation != "upsert" &&
      config.operation != "update" && config.operation != "delete") {
    std::cerr << "Error: Invalid operation '" << config.operation
              << "'. Must be 'insert', 'upsert', 'update', or 'delete'."
              << std::endl;
    return false;
  }

  return true;
}


void PrintUsage(const char *program) {
  std::cout << "Usage: " << program
            << " --path <collection_path> --start <start_id> --end <end_id> "
               "--op <operation>"
            << std::endl;
  std::cout << std::endl;
  std::cout << "Arguments:" << std::endl;
  std::cout << "  --path      Path to the collection (required)" << std::endl;
  std::cout << "  --start     Starting document ID (inclusive, required)"
            << std::endl;
  std::cout << "  --end       Ending document ID (exclusive, required)"
            << std::endl;
  std::cout
      << "  --op        Operation: insert, upsert, update, or delete (required)"
      << std::endl;
  std::cout << "  --version   Operation: version (required)" << std::endl;
  std::cout << std::endl;
  std::cout << "Examples:" << std::endl;
  std::cout << "  # Insert 1000 documents (pk_0 to pk_999)" << std::endl;
  std::cout << "  " << program
            << " --path ./test_db --start 0 --end 1000 --op insert --version 0"
            << std::endl;
  std::cout << std::endl;
  std::cout << "  # Update documents 1000-1999" << std::endl;
  std::cout
      << "  " << program
      << " --path ./test_db --start 1000 --end 2000 --op update --version 1"
      << std::endl;
  std::cout << std::endl;
  std::cout << "  # Upsert documents 0-499" << std::endl;
  std::cout << "  " << program
            << " --path ./test_db --start 0 --end 500 --op upsert --version 2"
            << std::endl;
}


int main(int argc, char **argv) {
  Config config;

  // Parse arguments
  if (!ParseArgs(argc, argv, config)) {
    PrintUsage(argv[0]);
    return 1;
  }

  try {
    std::filesystem::path cwd = std::filesystem::current_path();
    std::cout << "[data_generator] Current Working Directory: " << cwd.string()
              << std::endl;
  } catch (const std::filesystem::filesystem_error &e) {
    std::cout
        << "[data_generator] Failed to get the current working directory: "
        << e.what() << std::endl;
  }

  std::cout << "Configuration:" << std::endl;
  std::cout << "  Path:      " << config.path << std::endl;
  std::cout << "  Range:     [" << config.start_id << ", " << config.end_id
            << ")" << std::endl;
  std::cout << "  Operation: " << config.operation << std::endl;
  std::cout << "  BatchSize: " << kBatchSize << std::endl;
  std::cout << "  BatchDelay: " << kBatchDelayMs << "ms" << std::endl;
  std::cout << std::endl;

  auto result = zvec::Collection::Open(
      config.path, zvec::CollectionOptions{false, true, 4 * 1024 * 1024});
  if (!result) {
    LOG_ERROR("Failed to open collection[%s]: %s", config.path.c_str(),
              result.error().c_str());
    return -1;
  }

  auto collection = result.value();
  LOG_INFO("Collection[%s] opened successfully", config.path.c_str());

  // Process documents in batches
  int total_docs = config.end_id - config.start_id;
  int processed = 0;
  int batch_num = 0;
  int next_progress_threshold = total_docs / 10;  // 10% increments
  int progress_percent = 0;

  while (config.start_id < config.end_id) {
    int batch_end = std::min(config.start_id + kBatchSize, config.end_id);
    int batch_count = batch_end - config.start_id;

    std::vector<zvec::Doc> docs;
    docs.reserve(batch_count);
    for (int i = config.start_id; i < batch_end; i++) {
      docs.push_back(zvec::CreateTestDoc(i, config.version));
    }

    zvec::Result<zvec::WriteResults> results;
    if (config.operation == "insert") {
      results = collection->Insert(docs);
    } else if (config.operation == "upsert") {
      results = collection->Upsert(docs);
    } else if (config.operation == "update") {
      results = collection->Update(docs);
    } else if (config.operation == "delete") {
      std::vector<std::string> pks{};
      for (const auto &doc : docs) {
        pks.emplace_back(doc.pk());
      }
      results = collection->Delete(pks);
    }
    if (!results) {
      LOG_ERROR("Failed to perform operation[%s], reason: %s",
                config.operation.c_str(), results.error().message().c_str());
      return 1;
    }
    for (auto &s : results.value()) {
      if (!s.ok()) {
        LOG_ERROR("Failed to perform operation[%s], reason: %s",
                  config.operation.c_str(), s.message().c_str());
        return 1;
      }
    }

    processed += batch_count;
    config.start_id = batch_end;
    batch_num++;

    // Print progress every 10%
    if (processed >= next_progress_threshold) {
      progress_percent++;
      LOG_INFO("Progress: %d (%d/%d documents)", progress_percent * 10,
               processed, total_docs);
      next_progress_threshold = (progress_percent + 1) * total_docs / 10;
    }

    // Sleep between batches
    if (config.start_id < config.end_id) {
      std::this_thread::sleep_for(std::chrono::milliseconds(kBatchDelayMs));
    }
  }

  std::cout << std::endl;
  std::cout << "Success! Processed " << processed << " documents in "
            << batch_num << " batches." << std::endl;

  return 0;
}


================================================
FILE: tests/db/crash_recovery/utility.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once


#include <zvec/db/collection.h>
#include <zvec/db/doc.h>


namespace zvec {

/**
 * @brief Create a test schema with deterministic field definitions.
 *
 * @param name The collection name (default: "crash_recovery_test")
 * @return CollectionSchema::Ptr The test schema
 */
inline CollectionSchema::Ptr CreateTestSchema(
    const std::string &name = "crash_recovery_test") {
  auto schema = std::make_shared<CollectionSchema>(name);
  schema->set_max_doc_count_per_segment(10000);

  schema->add_field(
      std::make_shared<FieldSchema>("int32_field", DataType::INT32, false));
  schema->add_field(
      std::make_shared<FieldSchema>("int64_field", DataType::INT64, true));
  schema->add_field(
      std::make_shared<FieldSchema>("float_field", DataType::FLOAT, true));
  schema->add_field(
      std::make_shared<FieldSchema>("string_field", DataType::STRING, false));
  schema->add_field(
      std::make_shared<FieldSchema>("bool_field", DataType::BOOL, false));
  schema->add_field(std::make_shared<FieldSchema>("array_int32_field",
                                                  DataType::ARRAY_INT32, true));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_string_field", DataType::ARRAY_STRING, false));
  schema->add_field(std::make_shared<FieldSchema>(
      "dense_fp32_field", DataType::VECTOR_FP32, 128, false,
      std::make_shared<HnswIndexParams>(MetricType::COSINE)));
  schema->add_field(std::make_shared<FieldSchema>(
      "sparse_fp32_field", DataType::SPARSE_VECTOR_FP32, 0, false,
      std::make_shared<HnswIndexParams>(MetricType::IP)));

  return schema;
}


/**
 * @brief Create a test document with deterministic values based on doc_id.
 *
 * Document pattern:
 * - pk: "pk_{doc_id}"
 * - int32_field: doc_id (cast to int32)
 * - int64_field: doc_id, null if doc_id % 60 == 0
 * - float_field: doc_id / 1000.0, null if doc_id % 70 == 0
 * - string_field: "{version}_{doc_id}"
 * - bool_field: doc_id % 2 == 0 or flipped if version % 2 !=0
 * - array_int32_field: [doc_id, doc_id+1, doc_id+2], null if doc_id % 100 == 0
 * - array_string_field: ["str_{version}_0", ...]
 * - dense_fp32_field: vector where dense[i] = (doc_id + i) / 1000.0f
 * - sparse_fp32_field: sparse vector with indices [0, 10, ...]
 *
 * @param doc_id The document ID (determines all field values)
 * @param version The version of the document
 * @return Doc The created document
 */
inline Doc CreateTestDoc(uint64_t doc_id, int version) {
  Doc doc;

  // Set primary key
  std::string pk = "pk_" + std::to_string(doc_id);
  doc.set_pk(pk);

  // Set scalar fields
  doc.set<int32_t>("int32_field", static_cast<int32_t>(doc_id));

  // int64_field: nullable, null if doc_id % 60 == 0
  if (doc_id % 60 != 0) {
    doc.set<int64_t>("int64_field", static_cast<int64_t>(doc_id));
  }

  // float_field: nullable, null if doc_id % 70 == 0
  if (doc_id % 70 != 0) {
    doc.set<float>("float_field", static_cast<float>(doc_id) / 1000.0f);
  }

  // string_field: "value_{id}" or "updated_value_{id}"
  std::string string_value =
      std::to_string(version) + "_" + std::to_string(doc_id);
  doc.set<std::string>("string_field", string_value);

  // bool_field: alternating based on doc_id, flipped if updated
  bool bool_value = (doc_id % 2 == 0);
  if (version % 2 != 0) {
    bool_value = !bool_value;
  }
  doc.set<bool>("bool_field", bool_value);

  // array_int32_field: nullable, null if doc_id % 100 == 0
  if (doc_id % 100 != 0) {
    std::vector<int32_t> array_int32;
    for (int i = 0; i < 3; i++) {
      array_int32.push_back(static_cast<int32_t>(doc_id + i));
    }
    doc.set<std::vector<int32_t>>("array_int32_field", array_int32);
  }

  // array_string_field: ["str_0", "str_1", ...] or ["updated_str_0", ...]
  std::vector<std::string> array_string;
  size_t array_size = doc_id % 5 + 1;  // 1 to 5 elements
  for (size_t i = 0; i < array_size; i++) {
    array_string.push_back("str_" + std::to_string(version) + "_" +
                           std::to_string(i));
  }
  doc.set<std::vector<std::string>>("array_string_field", array_string);

  // dense_fp32_field: deterministic pattern
  std::vector<float> dense(128);
  for (int i = 0; i < 128; i++) {
    dense[i] = static_cast<float>(doc_id + i) / 1000.0f;
  }
  doc.set<std::vector<float>>("dense_fp32_field", dense);

  // sparse_fp32_field: sparse vector with indices [0, 10, 20, ..., 100]
  // Values based on doc_id: value = (doc_id + index) / 1000.0
  std::vector<uint32_t> sparse_indices;
  std::vector<float> sparse_values;
  for (uint32_t idx = 0; idx <= 100; idx += 10) {
    sparse_indices.push_back(idx);
    sparse_values.push_back(static_cast<float>(doc_id + idx) / 1000.0f);
  }
  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
      "sparse_fp32_field", std::make_pair(sparse_indices, sparse_values));

  return doc;
}


}  // namespace zvec


================================================
FILE: tests/db/crash_recovery/write_recovery_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <csignal>
#include <filesystem>
#include <thread>
#include <gtest/gtest.h>
#include <zvec/db/collection.h>
#include <zvec/db/doc.h>
#include <zvec/db/schema.h>
#include "utility.h"


namespace zvec {


static std::string data_generator_bin_;
const std::string collection_name_{"crash_test"};
const std::string dir_path_{"crash_test_db"};
const zvec::CollectionOptions options_{false, true};


static std::string LocateDataGenerator() {
  namespace fs = std::filesystem;
  const std::vector<std::string> candidates{"./data_generator",
                                            "./bin/data_generator"};
  for (const auto &p : candidates) {
    if (fs::exists(p)) {
      return fs::canonical(p).string();
    }
  }
  throw std::runtime_error("data_generator binary not found");
}


void RunGenerator(const std::string &start, const std::string &end,
                  const std::string &op, const std::string &version) {
  pid_t pid = fork();
  ASSERT_GE(pid, 0);

  if (pid == 0) {  // Child process
    char arg_path[] = "--path";
    char arg_start[] = "--start";
    char arg_end[] = "--end";
    char arg_op[] = "--op";
    char arg_version[] = "--version";
    char *args[] = {const_cast<char *>(data_generator_bin_.c_str()),
                    arg_path,
                    const_cast<char *>(dir_path_.c_str()),
                    arg_start,
                    const_cast<char *>(start.c_str()),
                    arg_end,
                    const_cast<char *>(end.c_str()),
                    arg_op,
                    const_cast<char *>(op.c_str()),
                    arg_version,
                    const_cast<char *>(version.c_str()),
                    nullptr};
    execvp(args[0], args);
    perror("execvp failed");
    _exit(1);
  }

  int status;
  waitpid(pid, &status, 0);
  ASSERT_TRUE(WIFEXITED(status))
      << "Child process did not exit normally. Terminated by signal?";
  int exit_code = WEXITSTATUS(status);
  ASSERT_EQ(exit_code, 0) << "data_generator failed with exit code: "
                          << exit_code;
}


void RunGeneratorAndCrash(const std::string &start, const std::string &end,
                          const std::string &op, const std::string &version,
                          int seconds) {
  pid_t pid = fork();
  ASSERT_GE(pid, 0);

  if (pid == 0) {  // Child process
    char arg_path[] = "--path";
    char arg_start[] = "--start";
    char arg_end[] = "--end";
    char arg_op[] = "--op";
    char arg_version[] = "--version";
    char *args[] = {const_cast<char *>(data_generator_bin_.c_str()),
                    arg_path,
                    const_cast<char *>(dir_path_.c_str()),
                    arg_start,
                    const_cast<char *>(start.c_str()),
                    arg_end,
                    const_cast<char *>(end.c_str()),
                    arg_op,
                    const_cast<char *>(op.c_str()),
                    arg_version,
                    const_cast<char *>(version.c_str()),
                    nullptr};
    execvp(args[0], args);
    perror("execvp failed");
    _exit(1);
  }

  std::this_thread::sleep_for(std::chrono::seconds(seconds));
  if (kill(pid, 0) == 0) {
    kill(pid, SIGKILL);
  }
  int status;
  waitpid(pid, &status, 0);
  ASSERT_TRUE(WIFSIGNALED(status))
      << "Child process was not killed by a signal. It exited normally?";
}


class CrashRecoveryTest : public ::testing::Test {
 protected:
  void SetUp() override {
    system("rm -rf ./crash_test_db");
    ASSERT_NO_THROW(data_generator_bin_ = LocateDataGenerator());
  }

  void TearDown() override {
    system("rm -rf ./crash_test_db");
  }
};


TEST_F(CrashRecoveryTest, BasicInsertAndReopen) {
  {
    auto schema = CreateTestSchema(collection_name_);
    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    collection.reset();
  }

  RunGenerator("0", "5000", "insert", "0");
  auto result = Collection::Open(dir_path_, options_);
  ASSERT_TRUE(result.has_value());
  auto collection = result.value();
  ASSERT_EQ(collection->Stats().value().doc_count, 5000)
      << "Document count mismatch";
}


TEST_F(CrashRecoveryTest, CrashRecoveryDuringInsertion) {
  {
    auto schema = CreateTestSchema(collection_name_);
    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    collection.reset();
  }

  RunGeneratorAndCrash("0", "10000", "insert", "0", 3);

  auto result = Collection::Open(dir_path_, options_);
  ASSERT_TRUE(result.has_value()) << "Failed to reopen collection after crash. "
                                     "Recovery mechanism may be broken.";
  auto collection = result.value();
  uint64_t doc_count{collection->Stats().value().doc_count};
  ASSERT_GT(doc_count, 800)
      << "Document count is too low after 3s of insertion and recovery";

  for (uint64_t doc_id = 0; doc_id < doc_count; doc_id++) {
    const auto expected_doc = CreateTestDoc(doc_id, 0);
    std::vector<std::string> pks{};
    pks.emplace_back(expected_doc.pk());
    if (auto res = collection->Fetch(pks); res) {
      auto map = res.value();
      if (map.find(expected_doc.pk()) == map.end()) {
        FAIL() << "Returned map does not contain doc[" << expected_doc.pk()
               << "]";
      }
      const auto actual_doc = map.at(expected_doc.pk());
      ASSERT_EQ(*actual_doc, expected_doc)
          << "Data mismatch for doc[" << expected_doc.pk() << "]";
    } else {
      FAIL() << "Failed to fetch doc[" << expected_doc.pk() << "]";
    }
  }
}


TEST_F(CrashRecoveryTest, CrashRecoveryDuringUpsert) {
  {
    auto schema = CreateTestSchema(collection_name_);
    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    collection.reset();
  }

  RunGenerator("0", "5000", "insert", "0");
  {
    auto result = Collection::Open(dir_path_, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    ASSERT_EQ(collection->Stats().value().doc_count, 5000)
        << "Document count mismatch";
  }

  RunGeneratorAndCrash("4500", "20000", "upsert", "1", 5);

  auto result = Collection::Open(dir_path_, options_);
  ASSERT_TRUE(result.has_value()) << "Failed to reopen collection after crash. "
                                     "Recovery mechanism may be broken.";
  auto collection = result.value();
  uint64_t doc_count{collection->Stats().value().doc_count};
  ASSERT_GT(doc_count, 6000)
      << "Document count is too low after 5s of insertion and recovery";

  for (uint64_t doc_id = 0; doc_id < doc_count; doc_id++) {
    Doc expected_doc;
    if (doc_id < 4500) {
      expected_doc = CreateTestDoc(doc_id, 0);
    } else {
      expected_doc = CreateTestDoc(doc_id, 1);
    }
    std::vector<std::string> pks{};
    pks.emplace_back(expected_doc.pk());
    if (auto res = collection->Fetch(pks); res) {
      auto map = res.value();
      if (map.find(expected_doc.pk()) == map.end()) {
        FAIL() << "Returned map does not contain doc[" << expected_doc.pk()
               << "]";
      }
      const auto actual_doc = map.at(expected_doc.pk());
      ASSERT_EQ(*actual_doc, expected_doc)
          << "Data mismatch for doc[" << expected_doc.pk() << "]";
    } else {
      FAIL() << "Failed to fetch doc[" << expected_doc.pk() << "]";
    }
  }
}


TEST_F(CrashRecoveryTest, CrashRecoveryDuringUpdate) {
  {
    auto schema = CreateTestSchema(collection_name_);
    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    collection.reset();
  }

  RunGenerator("0", "18000", "upsert", "0");
  {
    auto result = Collection::Open(dir_path_, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    ASSERT_EQ(collection->Stats().value().doc_count, 18000)
        << "Document count mismatch";
  }

  RunGeneratorAndCrash("3000", "15000", "update", "3", 4);

  auto result = Collection::Open(dir_path_, options_);
  ASSERT_TRUE(result.has_value()) << "Failed to reopen collection after crash. "
                                     "Recovery mechanism may be broken.";
  auto collection = result.value();
  uint64_t doc_count{collection->Stats().value().doc_count};
  ASSERT_EQ(doc_count, 18000) << "Document count mismatch after crash recovery";

  for (int doc_id = 0; doc_id < 3500; doc_id++) {
    Doc expected_doc;
    if (doc_id < 3000) {
      expected_doc = CreateTestDoc(doc_id, 0);
    } else {
      expected_doc = CreateTestDoc(doc_id, 3);
    }
    std::vector<std::string> pks{};
    pks.emplace_back(expected_doc.pk());
    if (auto res = collection->Fetch(pks); res) {
      auto map = res.value();
      if (map.find(expected_doc.pk()) == map.end()) {
        FAIL() << "Returned map does not contain doc[" << expected_doc.pk()
               << "]";
      }
      const auto actual_doc = map.at(expected_doc.pk());
      ASSERT_EQ(*actual_doc, expected_doc)
          << "Data mismatch for doc[" << expected_doc.pk() << "]";
    } else {
      FAIL() << "Failed to fetch doc[" << expected_doc.pk() << "]";
    }
  }
}


TEST_F(CrashRecoveryTest, CrashRecoveryDuringDelete) {
  {
    auto schema = CreateTestSchema(collection_name_);
    auto result = Collection::CreateAndOpen(dir_path_, *schema, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    collection.reset();
  }

  RunGenerator("0", "18000", "insert", "0");
  {
    auto result = Collection::Open(dir_path_, options_);
    ASSERT_TRUE(result.has_value());
    auto collection = result.value();
    ASSERT_EQ(collection->Stats().value().doc_count, 18000)
        << "Document count mismatch";
  }

  RunGeneratorAndCrash("3000", "15000", "delete", "0", 4);

  auto result = Collection::Open(dir_path_, options_);
  ASSERT_TRUE(result.has_value()) << "Failed to reopen collection after crash. "
                                     "Recovery mechanism may be broken.";
  auto collection = result.value();
  uint64_t doc_count{collection->Stats().value().doc_count};
  ASSERT_LT(doc_count, 18000)
      << "No deletes appear to have been applied before the crash";
  ASSERT_GT(doc_count, 6000)
      << "Too many documents deleted, recovery likely lost data";

  for (int doc_id = 0; doc_id < 3500; doc_id++) {
    auto expected_doc = CreateTestDoc(doc_id, 0);
    std::vector<std::string> pks{};
    pks.emplace_back(expected_doc.pk());
    if (auto res = collection->Fetch(pks); res) {
      auto map = res.value();
      auto it = map.find(expected_doc.pk());
      ASSERT_NE(it, map.end())
          << "Fetch result missing requested pk[" << expected_doc.pk() << "]";
      if (doc_id < 3000) {
        ASSERT_NE(it->second, nullptr)
            << "Existing doc returned as nullptr [" << expected_doc.pk() << "]";
        const auto actual_doc = map.at(expected_doc.pk());
        ASSERT_EQ(*actual_doc, expected_doc)
            << "Data mismatch for doc[" << expected_doc.pk() << "]";
      } else {
        ASSERT_EQ(it->second, nullptr)
            << "Returned doc for deleted pk[" << expected_doc.pk() << "]";
      }
    } else {
      FAIL() << "Failed to fetch doc[" << expected_doc.pk() << "]";
    }
  }
}


}  // namespace zvec


================================================
FILE: tests/db/index/CMakeLists.txt
================================================

include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(APPLE)
  set(APPLE_FRAMEWORK_LIBS
    -framework CoreFoundation
    -framework CoreGraphics
    -framework CoreData
    -framework CoreText
    -framework Security
    -framework Foundation
    -Wl,-U,_MallocExtension_ReleaseFreeMemory
    -Wl,-U,_ProfilerStart
    -Wl,-U,_ProfilerStop
    -Wl,-U,_RegisterThriftProtocol
  )
endif()

file(GLOB_RECURSE ALL_TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *_test.cc)
foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gmock(
    NAME ${CC_TARGET} STRICT
    LIBS zvec_db
    zvec_proto
    core_metric_static
    core_utility_static
    core_quantizer_static
    core_knn_hnsw core_knn_hnsw_sparse sparsehash
    core_knn_flat core_knn_flat_sparse core_knn_ivf
    core_knn_hnsw_rabitq core_mix_reducer
    Arrow::arrow_dataset
    ${CMAKE_THREAD_LIBS_INIT}
    ${CMAKE_DL_LIBS}
    SRCS ${CC_SRCS} utils/utils.cc
    INCS . .. ../../src
    LDFLAGS ${APPLE_FRAMEWORK_LIBS}
  )
  cc_test_suite(zvec_index ${CC_TARGET})
endforeach()


================================================
FILE: tests/db/index/column/inverted_column/inverted_column_indexer_array_numbers_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <gtest/gtest.h>
#include "db/index/column/inverted_column/inverted_indexer.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using File = ailego::File;


const std::string working_dir{"./inverted_column_indexer_array_numbers_dir/"};
const std::string collection_name{"test_collection"};


/**
 * @brief A helper class for testing the InvertedColumnIndexer implementation.
 *
 * This class generates test data with specific patterns to verify the
 * correctness of the inverted index implementation. It provides various methods
 * to populate an InvertedColumnIndexer with predictable data patterns and
 * verify that the indexing and search operations work correctly.
 *
 */
class TestHelper {
 public:
  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)
      : num_docs_(num_docs / 100 * 100),
        num_write_threads_(num_write_threads) {};


  template <typename T>
  void insert_arrays(InvertedColumnIndexer::Ptr indexer) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        auto arr = generate_array<T>(i);
        if (i % 100 == 0) {  // Null value for every 100th doc
          s = indexer->insert_null(i);
        } else {
          s = indexer->insert(
              i, std::string(reinterpret_cast<const char *>(arr.data()),
                             sizeof(T) * arr.size()));
        }
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  template <typename T>
  void verify_arrays(InvertedColumnIndexer::Ptr indexer) {
    std::vector<std::string> values;
    InvertedSearchResult::Ptr res;

    // Search for a non-existent value
    T v = num_docs_ + 100;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);

    // Search for docs containing value "2"
    values.clear();
    v = 2;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    // doc1 and doc2 contain value "2", doc0 is null
    ASSERT_EQ(res->count(), 2);
    ASSERT_TRUE(res->contains(1));
    ASSERT_TRUE(res->contains(2));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 2);
    ASSERT_TRUE(res->contains(1));
    ASSERT_TRUE(res->contains(2));

    // Search for docs containing values of "2", "3" and "10"
    values.clear();
    v = 2;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    v = 3;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    v = 10;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 8);
    ASSERT_TRUE(res->contains(1));
    ASSERT_TRUE(res->contains(2));
    ASSERT_TRUE(res->contains(3));
    ASSERT_TRUE(res->contains(6));
    ASSERT_TRUE(res->contains(7));
    ASSERT_TRUE(res->contains(8));
    ASSERT_TRUE(res->contains(9));
    ASSERT_TRUE(res->contains(10));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);

    // Search for docs containing values of "3" and "6"
    values.clear();
    v = 3;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    v = 6;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 6);
    ASSERT_TRUE(res->contains(1));
    ASSERT_TRUE(res->contains(2));
    ASSERT_TRUE(res->contains(3));
    ASSERT_TRUE(res->contains(4));
    ASSERT_TRUE(res->contains(5));
    ASSERT_TRUE(res->contains(6));
    res = indexer->multi_search(values, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 2);
    ASSERT_TRUE(res->contains(2));
    ASSERT_TRUE(res->contains(3));

    // Search for docs not containing value "1"
    values.clear();
    v = 1;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);
    ASSERT_FALSE(res->contains(1));
    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);
    ASSERT_FALSE(res->contains(1));

    // Search for docs not containing value "10" and "14"
    values.clear();
    v = 10;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    v = 14;
    values.emplace_back(std::string((char *)&v, sizeof(T)));
    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 9);
    for (uint32_t id = 6; id <= 14; ++id) {
      ASSERT_FALSE(res->contains(id));
    }
    res = indexer->multi_search(values, CompareOp::NOT_CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 1);
    ASSERT_FALSE(res->contains(10));

    // Search for docs with array length of 5
    res = indexer->search_array_len(5, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 1000 - (1000 / 100));
    res = indexer->search_array_len(5, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 990);
    res = indexer->search_array_len(6, CompareOp::LT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 1000 - (1000 / 100));
    res = indexer->search_array_len(6, CompareOp::LE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));
    res = indexer->search_array_len(6, CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search_array_len(6, CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 990);
  }


 private:
  template <typename T>
  std::vector<T> generate_array(uint32_t doc_id) {
    std::vector<T> nums;
    for (uint32_t i = 0; i < 5; ++i) {
      T v = doc_id + i;
      nums.push_back(v);
    }
    if (doc_id > 999) {
      T v = doc_id + 5;
      nums.push_back(v);
    }
    return nums;
  }


 private:
  const uint32_t num_docs_;
  const uint32_t num_write_threads_;
};


/**
 *
 * @brief Unit tests for the InvertedColumnIndexer implementation.
 *
 */
class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static TestHelper test_helper_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
TestHelper InvertedIndexTest::test_helper_{100000, 10};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


/*
 *
 * Test Cases
 *
 */
TEST_F(InvertedIndexTest, ARRAY_INT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema array_int32{"array_int32", DataType::ARRAY_INT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(array_int32).ok());
  auto indexer_int32 = (*indexer_)["array_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.insert_arrays<int32_t>(indexer_int32);
  test_helper_.verify_arrays<int32_t>(indexer_int32);
}


TEST_F(InvertedIndexTest, ARRAY_INT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema array_int64{"array_int64", DataType::ARRAY_INT64, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(array_int64).ok());
  auto indexer_int64 = (*indexer_)["array_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.insert_arrays<int64_t>(indexer_int64);
  test_helper_.verify_arrays<int64_t>(indexer_int64);
}


TEST_F(InvertedIndexTest, ARRAY_UINT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema array_uint32{"array_uint32", DataType::ARRAY_UINT32, true,
                           params_};
  ASSERT_TRUE(indexer_->create_column_indexer(array_uint32).ok());
  auto indexer_uint32 = (*indexer_)["array_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.insert_arrays<uint32_t>(indexer_uint32);
  test_helper_.verify_arrays<uint32_t>(indexer_uint32);
}


TEST_F(InvertedIndexTest, ARRAY_UINT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema array_uint64{"array_uint64", DataType::ARRAY_UINT64, true,
                           params_};
  ASSERT_TRUE(indexer_->create_column_indexer(array_uint64).ok());
  auto indexer_uint64 = (*indexer_)["array_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.insert_arrays<uint64_t>(indexer_uint64);
  test_helper_.verify_arrays<uint64_t>(indexer_uint64);
}


TEST_F(InvertedIndexTest, SEALED) {
  ASSERT_TRUE(indexer_);

  ASSERT_TRUE(indexer_->seal().ok());

  auto indexer_int32 = (*indexer_)["array_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.verify_arrays<int32_t>(indexer_int32);

  auto indexer_int64 = (*indexer_)["array_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.verify_arrays<int64_t>(indexer_int64);

  auto indexer_uint32 = (*indexer_)["array_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.verify_arrays<uint32_t>(indexer_uint32);

  auto indexer_uint64 = (*indexer_)["array_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.verify_arrays<uint64_t>(indexer_uint64);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/inverted_column/inverted_column_indexer_bool_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <gtest/gtest.h>
#include "db/index/column/inverted_column/inverted_indexer.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using File = ailego::File;


const std::string working_dir{"./inverted_column_indexer_bool_dir/"};
const std::string collection_name{"test_collection"};


/**
 * @brief A helper class for testing the InvertedColumnIndexer implementation.
 *
 * This class generates test data with specific patterns to verify the
 * correctness of the inverted index implementation. It provides various methods
 * to populate an InvertedColumnIndexer with predictable data patterns and
 * verify that the indexing and search operations work correctly.
 *
 */
class TestHelper {
 public:
  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)
      : num_docs_(num_docs / 100 * 100),
        num_write_threads_(num_write_threads) {};


  void insert_bools(InvertedColumnIndexer::Ptr indexer) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        bool v = generate_bool(i);
        s = indexer->insert(i, v);
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  void verify_bools(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;
    res = indexer->search("true", CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 2);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 2 == 0) {
        ASSERT_TRUE(res->contains(i));
      } else {
        ASSERT_FALSE(res->contains(i));
      }
    }

    res = indexer->search("false", CompareOp::NE);
    ASSERT_EQ(res->count(), num_docs_ / 2);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 2 == 0) {
        ASSERT_TRUE(res->contains(i));
      } else {
        ASSERT_FALSE(res->contains(i));
      }
    }
  }


  void insert_bool_arrays(InvertedColumnIndexer::Ptr indexer) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        auto v = generate_bool_array(i);
        s = indexer->insert(i, v);
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  void verify_bool_arrays(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;
    res = indexer->multi_search({"true"}, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 8);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 10 == 4 || i % 10 == 7) {
        ASSERT_FALSE(res->contains(i));
      } else {
        ASSERT_TRUE(res->contains(i));
      }
    }

    res = indexer->multi_search({"true"}, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 8);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 10 == 4 || i % 10 == 7) {
        ASSERT_FALSE(res->contains(i));
      } else {
        ASSERT_TRUE(res->contains(i));
      }
    }

    res = indexer->multi_search({"true", "false"}, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 10 == 2 || i % 10 == 5 || i % 10 == 8 || i % 10 == 9) {
        ASSERT_TRUE(res->contains(i));
      } else {
        ASSERT_FALSE(res->contains(i));
      }
    }

    res = indexer->multi_search({"true", "false"}, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);

    res = indexer->search_array_len(1, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10);
    res = indexer->search_array_len(2, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 2);
    res = indexer->search_array_len(3, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 3);
    res = indexer->search_array_len(4, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);

    res = indexer->search_array_len(5, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);
    res = indexer->search_array_len(3, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 7);

    res = indexer->search_array_len(1, CompareOp::LT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search_array_len(1, CompareOp::LE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10);
    res = indexer->search_array_len(4, CompareOp::LT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 6);
    res = indexer->search_array_len(4, CompareOp::LE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);

    res = indexer->search_array_len(1, CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 9);
    res = indexer->search_array_len(1, CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);
    res = indexer->search_array_len(4, CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search_array_len(4, CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10 * 4);
  }


 private:
  bool generate_bool(uint32_t doc_id) {
    if (doc_id % 2 == 0) {
      return true;
    } else {
      return false;
    }
  }


  std::vector<bool> generate_bool_array(uint32_t doc_id) {
    switch (doc_id % 10) {
      case 0:
        return {true};
      case 1:
        return {true, true};
      case 2:
        return {true, false};
      case 3:
        return {true, true, true};
      case 4:
        return {false, false, false};
      case 5:
        return {false, true, false};
      case 6:
        return {true, true, true, true};
      case 7:
        return {false, false, false, false};
      case 8:
        return {true, false, true, false};
      case 9:
        return {false, true, false, true};
      default:
        return {};
    }
  }


 private:
  const uint32_t num_docs_;
  const uint32_t num_write_threads_;
};


/**
 *
 * @brief Unit tests for the InvertedColumnIndexer implementation.
 *
 */
class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static TestHelper test_helper_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
TestHelper InvertedIndexTest::test_helper_{100000, 10};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


/*
 *
 * Test Cases
 *
 */
TEST_F(InvertedIndexTest, BOOLS) {
  ASSERT_TRUE(indexer_);

  FieldSchema test_bool{"test_bool", DataType::BOOL, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(test_bool).ok());
  auto indexer_bool = (*indexer_)["test_bool"];
  ASSERT_TRUE(indexer_bool);
  test_helper_.insert_bools(indexer_bool);
  test_helper_.verify_bools(indexer_bool);
}


TEST_F(InvertedIndexTest, BOOL_ARRAYS) {
  ASSERT_TRUE(indexer_);

  FieldSchema test_bool_array{"test_bool_array", DataType::ARRAY_BOOL, true,
                              params_};
  ASSERT_TRUE(indexer_->create_column_indexer(test_bool_array).ok());
  auto indexer_bool_array = (*indexer_)["test_bool_array"];
  ASSERT_TRUE(indexer_bool_array);
  test_helper_.insert_bool_arrays(indexer_bool_array);
  test_helper_.verify_bool_arrays(indexer_bool_array);
}


TEST_F(InvertedIndexTest, SEALED) {
  ASSERT_TRUE(indexer_);
  ASSERT_TRUE(indexer_->seal().ok());

  auto indexer_bool = (*indexer_)["test_bool"];
  ASSERT_TRUE(indexer_bool);
  test_helper_.verify_bools(indexer_bool);

  auto indexer_bool_array = (*indexer_)["test_bool_array"];
  ASSERT_TRUE(indexer_bool_array);
  test_helper_.verify_bool_arrays(indexer_bool_array);
}


TEST_F(InvertedIndexTest, SNAPSHOT) {
  ASSERT_TRUE(indexer_);

  ASSERT_TRUE(indexer_->create_snapshot(working_dir + "snapshot").ok());

  FieldSchema test_bool{"test_bool", DataType::BOOL, true, params_};
  FieldSchema test_bool_array{"test_bool_array", DataType::ARRAY_BOOL, true,
                              params_};

  auto snapshot_indexer =
      InvertedIndexer::CreateAndOpen(collection_name, working_dir + "snapshot",
                                     false, {test_bool, test_bool_array}, true);
  ASSERT_TRUE(snapshot_indexer);

  auto indexer_bool = (*snapshot_indexer)["test_bool"];
  ASSERT_TRUE(indexer_bool);
  test_helper_.verify_bools(indexer_bool);

  auto indexer_bool_array = (*snapshot_indexer)["test_bool_array"];
  ASSERT_TRUE(indexer_bool_array);
  test_helper_.verify_bool_arrays(indexer_bool_array);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/inverted_column/inverted_column_indexer_cyclic_numbers_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <random>
#include <gtest/gtest.h>
#include "db/index/column/inverted_column/inverted_indexer.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using File = ailego::File;


const std::string working_dir{"./inverted_column_indexer_cyclic_numbers_dir/"};
const std::string collection_name{"test_collection"};


/**
 * @brief A helper class for testing the InvertedColumnIndexer implementation.
 *
 * This class generates test data with specific patterns to verify the
 * correctness of the inverted index implementation. It provides various methods
 * to populate an InvertedColumnIndexer with predictable data patterns and
 * verify that the indexing and search operations work correctly.
 *
 */
class TestHelper {
 public:
  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)
      : num_docs_(num_docs / 100 * 100),
        num_write_threads_(num_write_threads) {};


  template <typename T>
  void insert_cyclic_numbers(InvertedColumnIndexer::Ptr indexer,
                             bool include_nulls) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        T v = generate_cyclic_number<T>(i);
        if (include_nulls && i % 100 == 0) {  // Null value for every 100th doc
          s = indexer->insert_null(i);
        } else {
          s = indexer->insert(i, std::string((char *)&v, sizeof(T)));
        }
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  template <typename T>
  void verify_cyclic_numbers(InvertedColumnIndexer::Ptr indexer,
                             bool include_nulls) {
    verify_cyclic_numbers_eq_ne<T>(indexer, include_nulls);
    verify_cyclic_numbers_range<T>(indexer, include_nulls);
    if (include_nulls) {
      verify_cyclic_numbers_null<T>(indexer);
    }
  }


  template <typename T>
  void verify_cyclic_numbers_eq_ne(InvertedColumnIndexer::Ptr indexer,
                                   bool include_nulls) {
    InvertedSearchResult::Ptr res;
    // Test EQ operator
    for (uint32_t i = 0; i < num_docs_ / 100; ++i) {
      uint32_t first_doc_in_cycle = i * 100;
      // Search for the first value in this 100-doc cycle
      T v = generate_cyclic_number<T>(first_doc_in_cycle);
      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);
      ASSERT_TRUE(res);
      if (include_nulls) {
        ASSERT_EQ(res->count(), 9);
        for (uint32_t j = 1; j < 10; ++j) {
          ASSERT_TRUE(res->contains(first_doc_in_cycle + j * 10));
        }
      } else {
        ASSERT_EQ(res->count(), 10);
        for (uint32_t j = 0; j < 10; ++j) {
          ASSERT_TRUE(res->contains(first_doc_in_cycle + j * 10));
        }
      }
      // Search for the 4th value in this 100-doc cycle
      v = generate_cyclic_number<T>(first_doc_in_cycle + 3);
      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);
      ASSERT_TRUE(res);
      ASSERT_EQ(res->count(), 10);
      for (uint32_t j = 0; j < 10; ++j) {
        ASSERT_TRUE(res->contains(first_doc_in_cycle + 3 + j * 10));
      }
      // Search for an non-existent value
      v = first_doc_in_cycle + 11;
      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);
      ASSERT_TRUE(res);
      ASSERT_EQ(res->count(), 0);
    }

    // Test NE operator with a non-existent value
    T v = generate_cyclic_number<T>(num_docs_);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }

    // Test NE operator with a random value
    static std::random_device rd;
    static std::mt19937 gen(rd());
    std::uniform_int_distribution<uint32_t> dis(0, num_docs_ / 100 - 1);
    uint32_t random_cycle = dis(gen);
    v = generate_cyclic_number<T>(random_cycle * 100 + 1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < random_cycle * 100; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    for (uint32_t id = random_cycle * 100; id < (random_cycle + 1) * 100;
         ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id % 10 == 1) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    for (uint32_t id = (random_cycle + 1) * 100; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
  }


  template <typename T>
  void verify_cyclic_numbers_range(InvertedColumnIndexer::Ptr indexer,
                                   bool include_nulls) {
    InvertedSearchResult::Ptr res;
    T v = generate_cyclic_number<T>(0);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), 9);
    } else {
      ASSERT_EQ(res->count(), 10);
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 9);
    } else {
      ASSERT_EQ(res->count(), num_docs_ - 10);
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));
    } else {
      ASSERT_EQ(res->count(), num_docs_);
    }


    uint32_t middle_cycle = num_docs_ / 100 / 2;
    v = generate_cyclic_number<T>(middle_cycle * 100 + 1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;
         ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id % 10 < 1) {
        ASSERT_TRUE(res->contains(id));
      } else {
        ASSERT_FALSE(res->contains(id));
      }
    }
    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {
      ASSERT_FALSE(res->contains(id));
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;
         ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id % 10 <= 1) {
        ASSERT_TRUE(res->contains(id));
      } else {
        ASSERT_FALSE(res->contains(id));
      }
    }
    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {
      ASSERT_FALSE(res->contains(id));
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {
      ASSERT_FALSE(res->contains(id));
    }
    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;
         ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id % 10 > 1) {
        ASSERT_TRUE(res->contains(id));
      } else {
        ASSERT_FALSE(res->contains(id));
      }
    }
    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < middle_cycle * 100; ++id) {
      ASSERT_FALSE(res->contains(id));
    }
    for (uint32_t id = middle_cycle * 100; id < (middle_cycle + 1) * 100;
         ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id % 10 >= 1) {
        ASSERT_TRUE(res->contains(id));
      } else {
        ASSERT_FALSE(res->contains(id));
      }
    }
    for (uint32_t id = (middle_cycle + 1) * 100; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }


    v = generate_cyclic_number<T>(num_docs_ - 1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100) - 10);
    } else {
      ASSERT_EQ(res->count(), num_docs_ - 10);
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), num_docs_ - (num_docs_ / 100));
    } else {
      ASSERT_EQ(res->count(), num_docs_);
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 10);
  }


  template <typename T>
  void verify_cyclic_numbers_null(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res = indexer->search_null();
    ASSERT_TRUE(res);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 100 == 0) {
        ASSERT_TRUE(res->contains(i));
      } else {
        ASSERT_FALSE(res->contains(i));
      }
    }

    res = indexer->search_non_null();
    ASSERT_TRUE(res);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 100 == 0) {
        ASSERT_FALSE(res->contains(i));
      } else {
        ASSERT_TRUE(res->contains(i));
      }
    }
  }


 private:
  template <typename T>
  T generate_cyclic_number(uint32_t doc_id) {
    // Creates a pattern where every 100 consecutive document IDs share a cycle
    // of 10 distinct values.
    // E.g., for int32_t,[id: 304, value: 304], [id: 315, value: 305];
    // for float, [id: 101, value: 101.666], [id: 112, value: 102.666]
    double num_double = (uint32_t)(doc_id / 100) * 100 + doc_id % 10 + 0.666;
    T num = num_double;
    return num;
  }


 private:
  const uint32_t num_docs_;
  const uint32_t num_write_threads_;
};


/**
 *
 * @brief Unit tests for the InvertedColumnIndexer implementation.
 *
 */
class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static TestHelper test_helper_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
TestHelper InvertedIndexTest::test_helper_{100000, 10};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


/*
 *
 * Test Cases
 *
 */
TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_INT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_int32{"cyclic_int32", DataType::INT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int32).ok());
  auto indexer_int32 = (*indexer_)["cyclic_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.insert_cyclic_numbers<int32_t>(indexer_int32, false);
  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32, false);

  FieldSchema cyclic_int32_w_null{"cyclic_int32_w_null", DataType::INT32, true,
                                  params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int32_w_null).ok());
  auto indexer_int32_w_null = (*indexer_)["cyclic_int32_w_null"];
  ASSERT_TRUE(indexer_int32_w_null);
  test_helper_.insert_cyclic_numbers<int32_t>(indexer_int32_w_null, true);
  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32_w_null, true);
}


TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_INT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_int64{"cyclic_int64", DataType::INT64, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int64).ok());
  auto indexer_int64 = (*indexer_)["cyclic_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.insert_cyclic_numbers<int64_t>(indexer_int64, false);
  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64, false);

  FieldSchema cyclic_int64_w_null{"cyclic_int64_w_null", DataType::INT64, true,
                                  params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_int64_w_null).ok());
  auto indexer_int64_w_null = (*indexer_)["cyclic_int64_w_null"];
  ASSERT_TRUE(indexer_int64_w_null);
  test_helper_.insert_cyclic_numbers<int64_t>(indexer_int64_w_null, true);
  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64_w_null, true);
}


TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_UINT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_uint32{"cyclic_uint32", DataType::UINT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint32).ok());
  auto indexer_uint32 = (*indexer_)["cyclic_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.insert_cyclic_numbers<uint32_t>(indexer_uint32, false);
  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32, false);

  FieldSchema cyclic_uint32_w_null{"cyclic_uint32_w_null", DataType::UINT32,
                                   true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint32_w_null).ok());
  auto indexer_uint32_w_null = (*indexer_)["cyclic_uint32_w_null"];
  ASSERT_TRUE(indexer_uint32_w_null);
  test_helper_.insert_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);
  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);
}


TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_UINT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_uint64{"cyclic_uint64", DataType::UINT64, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint64).ok());
  auto indexer_uint64 = (*indexer_)["cyclic_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.insert_cyclic_numbers<uint64_t>(indexer_uint64, false);
  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64, false);

  FieldSchema cyclic_uint64_w_null{"cyclic_uint64_w_null", DataType::UINT64,
                                   true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_uint64_w_null).ok());
  auto indexer_uint64_w_null = (*indexer_)["cyclic_uint64_w_null"];
  ASSERT_TRUE(indexer_uint64_w_null);
  test_helper_.insert_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);
  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);
}


TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_FLOAT) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_float{"cyclic_float", DataType::FLOAT, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_float).ok());
  auto indexer_float = (*indexer_)["cyclic_float"];
  ASSERT_TRUE(indexer_float);
  test_helper_.insert_cyclic_numbers<float>(indexer_float, false);
  test_helper_.verify_cyclic_numbers<float>(indexer_float, false);

  FieldSchema cyclic_float_w_null{"cyclic_float_w_null", DataType::FLOAT, true,
                                  params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_float_w_null).ok());
  auto indexer_float_w_null = (*indexer_)["cyclic_float_w_null"];
  ASSERT_TRUE(indexer_float_w_null);
  test_helper_.insert_cyclic_numbers<float>(indexer_float_w_null, true);
  test_helper_.verify_cyclic_numbers<float>(indexer_float_w_null, true);
}


TEST_F(InvertedIndexTest, CYCLIC_NUMBERS_DOUBLE) {
  ASSERT_TRUE(indexer_);

  FieldSchema cyclic_double{"cyclic_double", DataType::DOUBLE, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_double).ok());
  auto indexer_double = (*indexer_)["cyclic_double"];
  ASSERT_TRUE(indexer_double);
  test_helper_.insert_cyclic_numbers<double>(indexer_double, false);
  test_helper_.verify_cyclic_numbers<double>(indexer_double, false);

  FieldSchema cyclic_double_w_null{"cyclic_double_w_null", DataType::DOUBLE,
                                   true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(cyclic_double_w_null).ok());
  auto indexer_double_w_null = (*indexer_)["cyclic_double_w_null"];
  ASSERT_TRUE(indexer_double_w_null);
  test_helper_.insert_cyclic_numbers<double>(indexer_double_w_null, true);
  test_helper_.verify_cyclic_numbers<double>(indexer_double_w_null, true);
}


TEST_F(InvertedIndexTest, SEALED) {
  ASSERT_TRUE(indexer_);

  ASSERT_TRUE(indexer_->seal().ok());

  auto indexer_int32 = (*indexer_)["cyclic_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32, false);

  auto indexer_int32_w_null = (*indexer_)["cyclic_int32_w_null"];
  ASSERT_TRUE(indexer_int32_w_null);
  test_helper_.verify_cyclic_numbers<int32_t>(indexer_int32_w_null, true);

  auto indexer_int64 = (*indexer_)["cyclic_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64, false);

  auto indexer_int64_w_null = (*indexer_)["cyclic_int64_w_null"];
  ASSERT_TRUE(indexer_int64_w_null);
  test_helper_.verify_cyclic_numbers<int64_t>(indexer_int64_w_null, true);

  auto indexer_uint32 = (*indexer_)["cyclic_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32, false);

  auto indexer_uint32_w_null = (*indexer_)["cyclic_uint32_w_null"];
  ASSERT_TRUE(indexer_uint32_w_null);
  test_helper_.verify_cyclic_numbers<uint32_t>(indexer_uint32_w_null, true);

  auto indexer_uint64 = (*indexer_)["cyclic_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64, false);

  auto indexer_uint64_w_null = (*indexer_)["cyclic_uint64_w_null"];
  ASSERT_TRUE(indexer_uint64_w_null);
  test_helper_.verify_cyclic_numbers<uint64_t>(indexer_uint64_w_null, true);

  auto indexer_float = (*indexer_)["cyclic_float"];
  ASSERT_TRUE(indexer_float);
  test_helper_.verify_cyclic_numbers<float>(indexer_float, false);

  auto indexer_float_w_null = (*indexer_)["cyclic_float_w_null"];
  ASSERT_TRUE(indexer_float_w_null);
  test_helper_.verify_cyclic_numbers<float>(indexer_float_w_null, true);

  auto indexer_double = (*indexer_)["cyclic_double"];
  ASSERT_TRUE(indexer_double);
  test_helper_.verify_cyclic_numbers<double>(indexer_double, false);

  auto indexer_double_w_null = (*indexer_)["cyclic_double_w_null"];
  ASSERT_TRUE(indexer_double_w_null);
  test_helper_.verify_cyclic_numbers<double>(indexer_double_w_null, true);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/inverted_column/inverted_column_indexer_sequential_numbers_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <random>
#include <gtest/gtest.h>
#include "db/index/column/inverted_column/inverted_indexer.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using File = ailego::File;


const std::string working_dir{
    "./inverted_column_indexer_sequential_numbers_dir/"};
const std::string collection_name{"test_collection"};


/**
 * @brief A helper class for testing the InvertedColumnIndexer implementation.
 *
 * This class generates test data with specific patterns to verify the
 * correctness of the inverted index implementation. It provides various methods
 * to populate an InvertedColumnIndexer with predictable data patterns and
 * verify that the indexing and search operations work correctly.
 *
 */
class TestHelper {
 public:
  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)
      : num_docs_(num_docs / 100 * 100),
        num_write_threads_(num_write_threads) {};


  template <typename T>
  void insert_sequential_numbers(InvertedColumnIndexer::Ptr indexer,
                                 bool include_nulls) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        T v = generate_sequential_number<T>(i);
        if (include_nulls && i % 100 == 0) {  // Null value for every 100th doc
          s = indexer->insert_null(i);
        } else {
          s = indexer->insert(i, std::string((char *)&v, sizeof(T)));
        }
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  template <typename T>
  void verify_sequential_numbers(InvertedColumnIndexer::Ptr indexer,
                                 bool include_nulls) {
    verify_sequential_numbers_eq_ne<T>(indexer, include_nulls);
    verify_sequential_numbers_range_less<T>(indexer, include_nulls);
    verify_sequential_numbers_range_greater<T>(indexer, include_nulls);
    if (include_nulls) {
      verify_sequential_numbers_null<T>(indexer);
    }
    if (indexer->is_sealed()) {
      verify_sequential_numbers_range_ratio<T>(indexer, include_nulls);
    }
  }


  template <typename T>
  void verify_sequential_numbers_eq_ne(InvertedColumnIndexer::Ptr indexer,
                                       bool include_nulls) {
    InvertedSearchResult::Ptr res;
    // Test EQ operator
    for (uint32_t id = 0; id < num_docs_; ++id) {
      T v = generate_sequential_number<T>(id);
      res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::EQ);
      ASSERT_TRUE(res);
      if (include_nulls && id % 100 == 0) {
        ASSERT_EQ(res->count(), 0);
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_EQ(res->count(), 1);
        ASSERT_TRUE(res->contains(id));
        auto it = res->create_iterator();
        ASSERT_EQ(it->doc_id(), id);
        it->next();
        ASSERT_FALSE(it->valid());
      }
    }

    // Test NE operator with a non-existent value
    T v = generate_sequential_number<T>(num_docs_);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      for (uint32_t id = 0; id < num_docs_; ++id) {
        if (id % 100 == 0) {
          ASSERT_FALSE(res->contains(id));
        } else {
          ASSERT_TRUE(res->contains(id));
        }
      }
    } else {
      ASSERT_EQ(res->count(), num_docs_);
      auto it = res->create_iterator();
      for (uint32_t id = 0; id < num_docs_; ++id) {
        ASSERT_TRUE(res->contains(id));
        ASSERT_EQ(it->doc_id(), id);
        it->next();
      }
      ASSERT_FALSE(it->valid());
    }

    // Test NE operator with a random value
    static std::random_device rd;
    static std::mt19937 gen(rd());
    std::uniform_int_distribution<uint32_t> dis(0, num_docs_ - 1);
    uint32_t num_random = dis(gen);
    v = generate_sequential_number<T>(num_random);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::NE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else if (id == num_random) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
  }


  template <typename T>
  void verify_sequential_numbers_range_less(InvertedColumnIndexer::Ptr indexer,
                                            bool include_nulls) {
    T v = generate_sequential_number<T>(0);
    auto res =
        indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    ASSERT_FALSE(res->contains(0));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), 0);
      ASSERT_FALSE(res->contains(0));
    } else {
      ASSERT_EQ(res->count(), 1);
      ASSERT_TRUE(res->contains(0));
      ASSERT_FALSE(res->contains(1));
    }

    v = generate_sequential_number<T>(1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), 0);
      ASSERT_FALSE(res->contains(0));
    } else {
      ASSERT_EQ(res->count(), 1);
      ASSERT_TRUE(res->contains(0));
      ASSERT_FALSE(res->contains(1));
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    if (include_nulls) {
      ASSERT_EQ(res->count(), 1);
      ASSERT_FALSE(res->contains(0));
      ASSERT_TRUE(res->contains(1));
      ASSERT_FALSE(res->contains(2));
    } else {
      ASSERT_EQ(res->count(), 2);
      ASSERT_TRUE(res->contains(0));
      ASSERT_TRUE(res->contains(1));
      ASSERT_FALSE(res->contains(2));
    }

    v = generate_sequential_number<T>(num_docs_ / 10);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_ / 10; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 10));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_ / 10 + 1; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 10 + 1));

    v = generate_sequential_number<T>(num_docs_ / 2);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_ / 2; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 2));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_ / 2 + 1; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 2 + 1));

    v = generate_sequential_number<T>(num_docs_ - 1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_ - 1; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ - 1));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_));

    v = generate_sequential_number<T>(num_docs_);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LT);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::LE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_));
  }


  template <typename T>
  void verify_sequential_numbers_range_greater(
      InvertedColumnIndexer::Ptr indexer, bool include_nulls) {
    T v = generate_sequential_number<T>(0);
    auto res =
        indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_FALSE(res->contains(0));
    for (uint32_t id = 1; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    for (uint32_t id = 0; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }

    v = generate_sequential_number<T>(1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_FALSE(res->contains(0));
    ASSERT_FALSE(res->contains(1));
    for (uint32_t id = 2; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_FALSE(res->contains(0));
    for (uint32_t id = 1; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }

    v = generate_sequential_number<T>(num_docs_ / 10);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    for (uint32_t id = num_docs_ / 10 + 1; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 10));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    for (uint32_t id = num_docs_ / 10; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 10 - 1));

    v = generate_sequential_number<T>(num_docs_ / 2);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    for (uint32_t id = num_docs_ / 2 + 1; id < num_docs_; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 2));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    for (uint32_t id = num_docs_ / 2; id < num_docs_ / 2; ++id) {
      if (include_nulls && id % 100 == 0) {
        ASSERT_FALSE(res->contains(id));
      } else {
        ASSERT_TRUE(res->contains(id));
      }
    }
    ASSERT_FALSE(res->contains(num_docs_ / 2 - 1));

    v = generate_sequential_number<T>(num_docs_ - 1);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_FALSE(res->contains(num_docs_ - 1));
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_TRUE(res->contains(num_docs_ - 1));
    ASSERT_FALSE(res->contains(num_docs_));

    v = generate_sequential_number<T>(num_docs_);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GT);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
    res = indexer->search(std::string((char *)&v, sizeof(T)), CompareOp::GE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
  }


  template <typename T>
  void verify_sequential_numbers_null(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res = indexer->search_null();
    ASSERT_TRUE(res);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 100 == 0) {
        ASSERT_TRUE(res->contains(i));
      } else {
        ASSERT_FALSE(res->contains(i));
      }
    }

    res = indexer->search_non_null();
    ASSERT_TRUE(res);
    for (uint32_t i = 0; i < num_docs_; ++i) {
      if (i % 100 == 0) {
        ASSERT_FALSE(res->contains(i));
      } else {
        ASSERT_TRUE(res->contains(i));
      }
    }
  }


  template <typename T>
  void verify_sequential_numbers_range_ratio(InvertedColumnIndexer::Ptr indexer,
                                             bool include_nulls) {
    uint64_t total_size, range_size;
    T v = generate_sequential_number<T>(num_docs_ / 10);
    auto s = indexer->evaluate_ratio(std::string((char *)&v, sizeof(T)),
                                     CompareOp::LT, &total_size, &range_size);
    ASSERT_TRUE(s.ok());
    if (include_nulls) {
      ASSERT_EQ(total_size, num_docs_ - num_docs_ / 100);
      ASSERT_LE(range_size, num_docs_ / 10 * 2);
    } else {
      ASSERT_EQ(total_size, num_docs_);
      ASSERT_LE(range_size, num_docs_ / 10 * 2);
    }

    s = indexer->evaluate_ratio(std::string((char *)&v, sizeof(T)),
                                CompareOp::GT, &total_size, &range_size);
    ASSERT_TRUE(s.ok());
    if (include_nulls) {
      ASSERT_EQ(total_size, num_docs_ - num_docs_ / 100);
      ASSERT_GE(range_size, num_docs_ / 10 * 8);
    } else {
      ASSERT_EQ(total_size, num_docs_);
      ASSERT_GE(range_size, num_docs_ / 10 * 8);
    }
  }


 private:
  template <typename T>
  T generate_sequential_number(uint32_t doc_id) {
    // E.g., for int32_t, [id: 5, value: 5]; for float, [id: 5, value: 5.333]
    double num_double = doc_id + 0.333;
    T num = num_double;
    return num;
  }


 private:
  const uint32_t num_docs_;
  const uint32_t num_write_threads_;
};


/**
 *
 * @brief Unit tests for the InvertedColumnIndexer implementation.
 *
 */
class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static TestHelper test_helper_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
TestHelper InvertedIndexTest::test_helper_{100000, 10};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


/*
 *
 * Test Cases
 *
 */
TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_INT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_int32{"seq_int32", DataType::INT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_int32).ok());
  auto indexer_int32 = (*indexer_)["seq_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.insert_sequential_numbers<int32_t>(indexer_int32, false);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);

  FieldSchema seq_int32_w_null{"seq_int32_w_null", DataType::INT32, true,
                               params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_int32_w_null).ok());
  auto indexer_int32_w_null = (*indexer_)["seq_int32_w_null"];
  ASSERT_TRUE(indexer_int32_w_null);
  test_helper_.insert_sequential_numbers<int32_t>(indexer_int32_w_null, true);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);
}


TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_INT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_int64{"seq_int64", DataType::INT64, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_int64).ok());
  auto indexer_int64 = (*indexer_)["seq_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.insert_sequential_numbers<int64_t>(indexer_int64, false);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);

  FieldSchema seq_int64_w_null{"seq_int64_w_null", DataType::INT64, true,
                               params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_int64_w_null).ok());
  auto indexer_int64_w_null = (*indexer_)["seq_int64_w_null"];
  ASSERT_TRUE(indexer_int64_w_null);
  test_helper_.insert_sequential_numbers<int64_t>(indexer_int64_w_null, true);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);
}


TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_UINT32) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_uint32{"seq_uint32", DataType::UINT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint32).ok());
  auto indexer_uint32 = (*indexer_)["seq_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.insert_sequential_numbers<uint32_t>(indexer_uint32, false);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);

  FieldSchema seq_uint32_w_null{"seq_uint32_w_null", DataType::UINT32, true,
                                params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint32_w_null).ok());
  auto indexer_uint32_w_null = (*indexer_)["seq_uint32_w_null"];
  ASSERT_TRUE(indexer_uint32_w_null);
  test_helper_.insert_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);
}


TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_UINT64) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_uint64{"seq_uint64", DataType::UINT64, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint64).ok());
  auto indexer_uint64 = (*indexer_)["seq_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.insert_sequential_numbers<uint64_t>(indexer_uint64, false);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);

  FieldSchema seq_uint64_w_null{"seq_uint64_w_null", DataType::UINT64, true,
                                params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_uint64_w_null).ok());
  auto indexer_uint64_w_null = (*indexer_)["seq_uint64_w_null"];
  ASSERT_TRUE(indexer_uint64_w_null);
  test_helper_.insert_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);
}


TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_FLOAT) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_float{"seq_float", DataType::FLOAT, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_float).ok());
  auto indexer_float = (*indexer_)["seq_float"];
  ASSERT_TRUE(indexer_float);
  test_helper_.insert_sequential_numbers<float>(indexer_float, false);
  test_helper_.verify_sequential_numbers<float>(indexer_float, false);

  FieldSchema seq_float_w_null{"seq_float_w_null", DataType::FLOAT, true,
                               params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_float_w_null).ok());
  auto indexer_float_w_null = (*indexer_)["seq_float_w_null"];
  ASSERT_TRUE(indexer_float_w_null);
  test_helper_.insert_sequential_numbers<float>(indexer_float_w_null, true);
  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);
}


TEST_F(InvertedIndexTest, SEQUENTIAL_NUMBERS_DOUBLE) {
  ASSERT_TRUE(indexer_);

  FieldSchema seq_double{"seq_double", DataType::DOUBLE, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_double).ok());
  auto indexer_double = (*indexer_)["seq_double"];
  ASSERT_TRUE(indexer_double);
  test_helper_.insert_sequential_numbers<double>(indexer_double, false);
  test_helper_.verify_sequential_numbers<double>(indexer_double, false);

  FieldSchema seq_double_w_null{"seq_double_w_null", DataType::DOUBLE, true,
                                params_};
  ASSERT_TRUE(indexer_->create_column_indexer(seq_double_w_null).ok());
  auto indexer_double_w_null = (*indexer_)["seq_double_w_null"];
  ASSERT_TRUE(indexer_double_w_null);
  test_helper_.insert_sequential_numbers<double>(indexer_double_w_null, true);
  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);
}


TEST_F(InvertedIndexTest, SEALED) {
  ASSERT_TRUE(indexer_);

  ASSERT_TRUE(indexer_->seal().ok());

  auto indexer_int32 = (*indexer_)["seq_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);

  auto indexer_int32_w_null = (*indexer_)["seq_int32_w_null"];
  ASSERT_TRUE(indexer_int32_w_null);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);

  auto indexer_int64 = (*indexer_)["seq_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);

  auto indexer_int64_w_null = (*indexer_)["seq_int64_w_null"];
  ASSERT_TRUE(indexer_int64_w_null);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);

  auto indexer_uint32 = (*indexer_)["seq_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);

  auto indexer_uint32_w_null = (*indexer_)["seq_uint32_w_null"];
  ASSERT_TRUE(indexer_uint32_w_null);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);

  auto indexer_uint64 = (*indexer_)["seq_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);

  auto indexer_uint64_w_null = (*indexer_)["seq_uint64_w_null"];
  ASSERT_TRUE(indexer_uint64_w_null);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);

  auto indexer_float = (*indexer_)["seq_float"];
  ASSERT_TRUE(indexer_float);
  test_helper_.verify_sequential_numbers<float>(indexer_float, false);

  auto indexer_float_w_null = (*indexer_)["seq_float_w_null"];
  ASSERT_TRUE(indexer_float_w_null);
  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);

  auto indexer_double = (*indexer_)["seq_double"];
  ASSERT_TRUE(indexer_double);
  test_helper_.verify_sequential_numbers<double>(indexer_double, false);

  auto indexer_double_w_null = (*indexer_)["seq_double_w_null"];
  ASSERT_TRUE(indexer_double_w_null);
  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);
}


TEST_F(InvertedIndexTest, CREATE_SNAPSHOT) {
  ASSERT_TRUE(indexer_);

  std::string snapshot_dir = working_dir + "snapshot";
  ASSERT_TRUE(indexer_->create_snapshot(snapshot_dir).ok());

  std::vector<FieldSchema> fields = {
      FieldSchema("seq_int32", DataType::INT32, true, params_),
      FieldSchema("seq_int32_w_null", DataType::INT32, true, params_),
      FieldSchema("seq_int64", DataType::INT64, true, params_),
      FieldSchema("seq_int64_w_null", DataType::INT64, true, params_),
      FieldSchema("seq_uint32", DataType::UINT32, true, params_),
      FieldSchema("seq_uint32_w_null", DataType::UINT32, true, params_),
      FieldSchema("seq_uint64", DataType::UINT64, true, params_),
      FieldSchema("seq_uint64_w_null", DataType::UINT64, true, params_),
      FieldSchema("seq_float", DataType::FLOAT, true, params_),
      FieldSchema("seq_float_w_null", DataType::FLOAT, true, params_),
      FieldSchema("seq_double", DataType::DOUBLE, true, params_),
      FieldSchema("seq_double_w_null", DataType::DOUBLE, true, params_)};

  auto snapshot_indexer = InvertedIndexer::CreateAndOpen(
      "snapshot", snapshot_dir, false, fields, false);
  ASSERT_TRUE(snapshot_indexer);

  auto indexer_int32 = (*snapshot_indexer)["seq_int32"];
  ASSERT_TRUE(indexer_int32);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32, false);

  auto indexer_int32_w_null = (*snapshot_indexer)["seq_int32_w_null"];
  ASSERT_TRUE(indexer_int32_w_null);
  test_helper_.verify_sequential_numbers<int32_t>(indexer_int32_w_null, true);

  auto indexer_int64 = (*snapshot_indexer)["seq_int64"];
  ASSERT_TRUE(indexer_int64);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64, false);

  auto indexer_int64_w_null = (*snapshot_indexer)["seq_int64_w_null"];
  ASSERT_TRUE(indexer_int64_w_null);
  test_helper_.verify_sequential_numbers<int64_t>(indexer_int64_w_null, true);

  auto indexer_uint32 = (*snapshot_indexer)["seq_uint32"];
  ASSERT_TRUE(indexer_uint32);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32, false);

  auto indexer_uint32_w_null = (*snapshot_indexer)["seq_uint32_w_null"];
  ASSERT_TRUE(indexer_uint32_w_null);
  test_helper_.verify_sequential_numbers<uint32_t>(indexer_uint32_w_null, true);

  auto indexer_uint64 = (*snapshot_indexer)["seq_uint64"];
  ASSERT_TRUE(indexer_uint64);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64, false);

  auto indexer_uint64_w_null = (*snapshot_indexer)["seq_uint64_w_null"];
  ASSERT_TRUE(indexer_uint64_w_null);
  test_helper_.verify_sequential_numbers<uint64_t>(indexer_uint64_w_null, true);

  auto indexer_float = (*snapshot_indexer)["seq_float"];
  ASSERT_TRUE(indexer_float);
  test_helper_.verify_sequential_numbers<float>(indexer_float, false);

  auto indexer_float_w_null = (*snapshot_indexer)["seq_float_w_null"];
  ASSERT_TRUE(indexer_float_w_null);
  test_helper_.verify_sequential_numbers<float>(indexer_float_w_null, true);

  auto indexer_double = (*snapshot_indexer)["seq_double"];
  ASSERT_TRUE(indexer_double);
  test_helper_.verify_sequential_numbers<double>(indexer_double, false);

  auto indexer_double_w_null = (*snapshot_indexer)["seq_double_w_null"];
  ASSERT_TRUE(indexer_double_w_null);
  test_helper_.verify_sequential_numbers<double>(indexer_double_w_null, true);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/inverted_column/inverted_column_indexer_string_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <random>
#include <gtest/gtest.h>
#include "db/index/column/inverted_column/inverted_indexer.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using File = ailego::File;


const std::string working_dir{"./inverted_column_indexer_string_dir/"};
const std::string collection_name{"test_collection"};


/**
 * @brief A helper class for testing the InvertedColumnIndexer implementation.
 *
 * This class generates test data with specific patterns to verify the
 * correctness of the inverted index implementation. It provides various methods
 * to populate an InvertedColumnIndexer with predictable data patterns and
 * verify that the indexing and search operations work correctly.
 *
 */
class TestHelper {
 public:
  TestHelper(uint32_t num_docs, uint32_t num_write_threads = 10)
      : num_docs_(num_docs / 100 * 100),
        num_write_threads_(num_write_threads) {};


  void insert_strings(InvertedColumnIndexer::Ptr indexer) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        auto v = generate_string(i);
        s = indexer->insert(i, v);
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  void verify_strings(InvertedColumnIndexer::Ptr indexer) {
    verify_strings_eq_ne(indexer);
    verify_strings_like(indexer);
    verify_strings_range(indexer);
  }


  void verify_strings_eq_ne(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;
    // Test EQ operator
    for (uint32_t i = 0; i < 20; i++) {
      auto v = generate_string(i);
      res = indexer->search(v, CompareOp::EQ);
      ASSERT_TRUE(res);
      ASSERT_EQ(res->count(), num_docs_ / 20);
      for (uint32_t j = 0; j < num_docs_ / 20; ++j) {
        ASSERT_TRUE(res->contains(i + j * 20));
      }
    }

    // Test NE operator with a non-existent value
    std::string v = "NotExist";
    res = indexer->search(v, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);

    // Test NE operator with a random value
    static std::random_device rd;
    static std::mt19937 gen(rd());
    std::uniform_int_distribution<uint32_t> dis(0, 19);
    uint32_t random_num = dis(gen);
    v = generate_string(random_num);
    res = indexer->search(v, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - num_docs_ / 20);
    for (uint32_t j = 0; j < num_docs_; ++j) {
      if (j % 20 == random_num) {
        ASSERT_FALSE(res->contains(j));
      } else {
        ASSERT_TRUE(res->contains(j));
      }
    }
  }


  void verify_strings_like(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;

    std::string v = "Three";
    res = indexer->search(v, CompareOp::HAS_PREFIX);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 4);
    for (uint32_t j = 0; j < num_docs_; ++j) {
      if (j % 4 == 2) {
        ASSERT_TRUE(res->contains(j));
      } else {
        ASSERT_FALSE(res->contains(j));
      }
    }

    v = "06";
    res = indexer->search(v, CompareOp::HAS_SUFFIX);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 20);
    for (uint32_t j = 0; j < num_docs_; ++j) {
      if (j % 20 == 6) {
        ASSERT_TRUE(res->contains(j));
      } else {
        ASSERT_FALSE(res->contains(j));
      }
    }

    v = "6";
    res = indexer->search(v, CompareOp::HAS_SUFFIX);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ / 10);
    for (uint32_t j = 0; j < num_docs_; ++j) {
      if (j % 20 == 6 || j % 20 == 16) {
        ASSERT_TRUE(res->contains(j));
      } else {
        ASSERT_FALSE(res->contains(j));
      }
    }

    v = "21";
    res = indexer->search(v, CompareOp::HAS_SUFFIX);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
  }


  void verify_strings_range(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;
    std::string v = "Two";
    res = indexer->search(v, CompareOp::LT);
    ASSERT_TRUE(res);
    // "One", "Three", and "Four" are less than "Two" in string sense
    ASSERT_EQ(res->count(), num_docs_ / 4 * 3);
    for (uint32_t j = 0; j < num_docs_; ++j) {
      if (j % 4 == 1) {
        ASSERT_FALSE(res->contains(j));
      } else {
        ASSERT_TRUE(res->contains(j));
      }
    }
  }


  void insert_string_arrays(InvertedColumnIndexer::Ptr indexer) {
    auto insert_func = [&](uint32_t start, uint32_t end) {
      Status s;
      for (uint32_t i = start; i < end; ++i) {
        auto v = generate_string_array(i);
        s = indexer->insert(i, v);
        ASSERT_TRUE(s.ok());
      }
    };

    uint32_t num_docs_per_thread = num_docs_ / num_write_threads_;
    std::vector<std::thread> threads{};
    for (uint32_t t = 0; t < num_write_threads_; ++t) {
      threads.emplace_back(insert_func, t * num_docs_per_thread,
                           (t + 1) * num_docs_per_thread);
    }
    for (auto &t : threads) {
      t.join();
    }
  }


  void verify_string_arrays(InvertedColumnIndexer::Ptr indexer) {
    InvertedSearchResult::Ptr res;
    auto v = generate_string_array(100);
    res = indexer->multi_search(v, CompareOp::CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 1);
    ASSERT_TRUE(res->contains(100));

    res = indexer->multi_search(v, CompareOp::CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 5);
    ASSERT_TRUE(res->contains(98));
    ASSERT_TRUE(res->contains(99));
    ASSERT_TRUE(res->contains(100));
    ASSERT_TRUE(res->contains(101));
    ASSERT_TRUE(res->contains(102));

    res = indexer->multi_search(v, CompareOp::NOT_CONTAIN_ALL);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - 1);
    ASSERT_FALSE(res->contains(100));

    res = indexer->multi_search(v, CompareOp::NOT_CONTAIN_ANY);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_ - 5);
    ASSERT_FALSE(res->contains(98));
    ASSERT_FALSE(res->contains(99));
    ASSERT_FALSE(res->contains(100));
    ASSERT_FALSE(res->contains(101));
    ASSERT_FALSE(res->contains(102));

    res = indexer->search_array_len(3, CompareOp::EQ);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), num_docs_);
    res = indexer->search_array_len(3, CompareOp::NE);
    ASSERT_TRUE(res);
    ASSERT_EQ(res->count(), 0);
  }


 private:
  std::string generate_string(uint32_t doc_id) {
    std::string prefix;
    switch (doc_id % 4) {
      case 0:
        prefix = "One";
        break;
      case 1:
        prefix = "Two";
        break;
      case 2:
        prefix = "Three";
        break;
      case 3:
        prefix = "Four";
        break;
    }
    std::stringstream suffix;
    suffix << std::setfill('0') << std::setw(2) << doc_id % 20;

    return prefix + "_" + suffix.str();
  }


  std::vector<std::string> generate_string_array(uint32_t doc_id) {
    std::vector<std::string> ret;
    std::stringstream ss1;
    ss1 << std::setfill('0') << std::setw(10) << doc_id;
    ret.emplace_back(ss1.str());
    std::stringstream ss2;
    ss2 << std::setfill('0') << std::setw(10) << doc_id + 1;
    ret.emplace_back(ss2.str());
    std::stringstream ss3;
    ss3 << std::setfill('0') << std::setw(10) << doc_id + 2;
    ret.emplace_back(ss3.str());
    return ret;
  }


 private:
  const uint32_t num_docs_;
  const uint32_t num_write_threads_;
};


/**
 *
 * @brief Unit tests for the InvertedColumnIndexer implementation.
 *
 */
class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true, true);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static TestHelper test_helper_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
TestHelper InvertedIndexTest::test_helper_{100000, 10};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


/*
 *
 * Test Cases
 *
 */
TEST_F(InvertedIndexTest, STRINGS) {
  ASSERT_TRUE(indexer_);

  FieldSchema test_string{"test_string", DataType::STRING, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(test_string).ok());
  auto indexer_string = (*indexer_)["test_string"];
  ASSERT_TRUE(indexer_string);
  test_helper_.insert_strings(indexer_string);
  test_helper_.verify_strings(indexer_string);
}


TEST_F(InvertedIndexTest, STRING_ARRAYS) {
  ASSERT_TRUE(indexer_);

  FieldSchema test_string_array{"test_string_array", DataType::ARRAY_STRING,
                                true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(test_string_array).ok());
  auto indexer_string_array = (*indexer_)["test_string_array"];
  ASSERT_TRUE(indexer_string_array);
  test_helper_.insert_string_arrays(indexer_string_array);
  test_helper_.verify_string_arrays(indexer_string_array);
}


TEST_F(InvertedIndexTest, SEALED) {
  ASSERT_TRUE(indexer_);
  ASSERT_TRUE(indexer_->seal().ok());

  auto indexer_string = (*indexer_)["test_string"];
  ASSERT_TRUE(indexer_string);
  test_helper_.verify_strings(indexer_string);


  auto indexer_string_array = (*indexer_)["test_string_array"];
  ASSERT_TRUE(indexer_string_array);
  test_helper_.verify_string_arrays(indexer_string_array);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/inverted_column/inverted_indexer_util_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <gtest/gtest.h>
#define private public
#define protected public
#include "db/index/column/inverted_column/inverted_indexer.h"
#undef private
#undef protected

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif


using namespace zvec;
using File = ailego::File;


const std::string working_dir{"./inverted_indexer_util_dir/"};
const std::string collection_name{"test_collection"};


class InvertedIndexTest : public testing::Test {
  /*****  Global initialization and cleanup - Start  *****/
 public:
  static void SetUpTestCase() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);

    indexer_ = InvertedIndexer::CreateAndOpen(collection_name, working_dir,
                                              true, {}, false);

    params_ = std::make_shared<InvertIndexParams>(true, false);
  }

  static void TearDownTestCase() {
    indexer_.reset();

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf %s", working_dir.c_str());
    system(cmd_buf);
  }
  /*****  Global initialization and cleanup - End  *****/


  /*****  Per-test initialization and cleanup - Start  *****/
 protected:
  void SetUp() override {}

  void TearDown() override {}
  /*****  Per-test initialization and cleanup - End  *****/


 protected:
  static InvertedIndexer::Ptr indexer_;
  static IndexParams::Ptr params_;
};


InvertedIndexer::Ptr InvertedIndexTest::indexer_{nullptr};
IndexParams::Ptr InvertedIndexTest::params_{nullptr};


TEST_F(InvertedIndexTest, COLLECTION_NAME) {
  ASSERT_TRUE(indexer_);
  ASSERT_EQ(indexer_->collection(), collection_name);
}


TEST_F(InvertedIndexTest, WORKING_DIR) {
  ASSERT_TRUE(indexer_);
  ASSERT_EQ(indexer_->working_dir(), working_dir);
}


TEST_F(InvertedIndexTest, COLUMN_MANIPULATION_EDGE_CASE) {
  ASSERT_FALSE(indexer_->remove_column_indexer("Non-exist").ok());

  FieldSchema field{"field_int32", DataType::INT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());
  auto indexer_int32 = (*indexer_)["field_int32"];
  ASSERT_NE(indexer_int32, nullptr);

  FieldSchema field_duplicate{"field_int32", DataType::INT32, false, params_};
  ASSERT_FALSE(indexer_->create_column_indexer(field_duplicate).ok());

  ASSERT_TRUE(indexer_->remove_column_indexer("field_int32").ok());
}


TEST_F(InvertedIndexTest, COLUMN_MANIPULATION_INT32) {
  ASSERT_TRUE(indexer_);

  // Create column indexer
  FieldSchema field{"field_int32", DataType::INT32, true, params_};
  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());
  auto indexer_int32 = (*indexer_)["field_int32"];
  ASSERT_NE(indexer_int32, nullptr);

  // Insert some data
  int32_t i;
  for (i = 0; i < 3000; i++) {
    auto s = indexer_int32->insert(i, std::string((char *)&i, sizeof(int32_t)));
    ASSERT_TRUE(s.ok());
  }

  // Store variable names for later retrieval
  auto cf_name_terms = indexer_int32->cf_name_terms();
  auto cf_name_ranges = indexer_int32->cf_name_ranges();
  auto cf_name_cdf = indexer_int32->cf_name_cdf();
  auto key_max_id = indexer_int32->key_max_id();
  auto key_null = indexer_int32->key_null();
  auto key_sealed = indexer_int32->key_sealed();

  ASSERT_TRUE(indexer_int32->seal().ok());
  auto s = indexer_int32->insert(i, std::string((char *)&i, sizeof(int32_t)));
  ASSERT_FALSE(s.ok());
  std::string value;
  ASSERT_TRUE(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).ok());
  ASSERT_TRUE(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).ok());

  // Remove column indexer
  ASSERT_TRUE(indexer_->remove_column_indexer("field_int32").ok());
  indexer_int32 = (*indexer_)["field_int32"];
  ASSERT_EQ(indexer_int32, nullptr);

  // No garbage left
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_terms), nullptr);
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);
  auto cdf = indexer_->rocksdb_context_.get_cf(cf_name_cdf);
  ASSERT_NE(cdf, nullptr);
  ASSERT_EQ(
      indexer_->rocksdb_context_.db_->Get({}, cdf, field.name(), &value).code(),
      rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).code(),
            rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_null, &value).code(),
            rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).code(),
            rocksdb::Status::kNotFound);
}


TEST_F(InvertedIndexTest, COLUMN_MANIPULATION_ARRAY_STRING) {
  ASSERT_TRUE(indexer_);

  // Create column indexer
  FieldSchema field{"field_string_array", DataType::ARRAY_STRING, true,
                    params_};
  ASSERT_TRUE(indexer_->create_column_indexer(field).ok());
  auto indexer_string_array = (*indexer_)["field_string_array"];
  ASSERT_NE(indexer_string_array, nullptr);

  // Insert some data
  for (uint32_t i = 0; i < 1500; i++) {
    std::vector<std::string> values;
    for (uint32_t j = 0; j < 5; j++) {
      values.emplace_back("Number_" + std::to_string(i));
    }
    auto s = indexer_string_array->insert(i, values);
    ASSERT_TRUE(s.ok());
  }

  // Store variable names for later retrieval
  auto cf_name_terms = indexer_string_array->cf_name_terms();
  auto cf_name_array_len = indexer_string_array->cf_name_array_len();
  auto cf_name_ranges = indexer_string_array->cf_name_ranges();
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);
  auto cf_name_cdf = indexer_string_array->cf_name_cdf();
  auto key_max_id = indexer_string_array->key_max_id();
  auto key_null = indexer_string_array->key_null();
  auto key_sealed = indexer_string_array->key_sealed();

  // Remove column indexer
  ASSERT_TRUE(indexer_->remove_column_indexer("field_string_array").ok());
  indexer_string_array = (*indexer_)["field_string_array"];
  ASSERT_EQ(indexer_string_array, nullptr);

  // No garbage left
  std::string value;
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_terms), nullptr);
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_array_len), nullptr);
  ASSERT_EQ(indexer_->rocksdb_context_.get_cf(cf_name_ranges), nullptr);
  auto cdf = indexer_->rocksdb_context_.get_cf(cf_name_cdf);
  ASSERT_NE(cdf, nullptr);
  ASSERT_EQ(
      indexer_->rocksdb_context_.db_->Get({}, cdf, field.name(), &value).code(),
      rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_max_id, &value).code(),
            rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_null, &value).code(),
            rocksdb::Status::kNotFound);
  ASSERT_EQ(indexer_->rocksdb_context_.db_->Get({}, key_sealed, &value).code(),
            rocksdb::Status::kNotFound);
}


TEST_F(InvertedIndexTest, INVERTED_SEARCH_RESULT) {
  roaring_bitmap_t *bitmap1 = roaring_bitmap_create();
  roaring_bitmap_add(bitmap1, 1);
  roaring_bitmap_add(bitmap1, 2);
  roaring_bitmap_add(bitmap1, 3);
  auto res1 = std::make_shared<InvertedSearchResult>(bitmap1);

  std::vector<uint32_t> ids;
  res1->extract_ids(&ids);
  ASSERT_EQ(ids.size(), 3);
  ASSERT_EQ(ids[0], 1);
  ASSERT_EQ(ids[1], 2);
  ASSERT_EQ(ids[2], 3);

  roaring_bitmap_t *bitmap2 = roaring_bitmap_create();
  roaring_bitmap_add(bitmap2, 3);
  roaring_bitmap_add(bitmap2, 4);
  roaring_bitmap_add(bitmap2, 5);
  auto res2 = std::make_shared<InvertedSearchResult>(bitmap2);

  res1->AND(*res2);
  ASSERT_EQ(res1->count(), 1);
  auto filter = res1->make_filter();
  ASSERT_TRUE(filter);
  ASSERT_FALSE(filter->is_filtered(3));

  roaring_bitmap_t *bitmap3 = roaring_bitmap_create();
  roaring_bitmap_add(bitmap3, 1);
  roaring_bitmap_add(bitmap3, 3);
  roaring_bitmap_add(bitmap3, 9);
  roaring_bitmap_add(bitmap3, 11);
  auto res3 = std::make_shared<InvertedSearchResult>(bitmap3);

  res2->OR(*res3);
  ASSERT_EQ(res2->count(), 6);
  filter = res2->make_filter();
  ASSERT_FALSE(filter->is_filtered(1));
  ASSERT_FALSE(filter->is_filtered(3));
  ASSERT_FALSE(filter->is_filtered(4));
  ASSERT_FALSE(filter->is_filtered(5));
  ASSERT_FALSE(filter->is_filtered(9));
  ASSERT_FALSE(filter->is_filtered(11));
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/column/vector_column_indexer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// #include "db/doc.h"
#include "db/index/column/vector_column/vector_column_indexer.h"
#include <cassert>
#include <cstdint>
#include <gtest/gtest.h>
#include "db/index/column/vector_column/vector_column_params.h"
#include "zvec/ailego/utility/float_helper.h"
#include "zvec/db/doc.h"
#include "zvec/db/index_params.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;


std::string print_dense_vector(const void *vector, size_t dim,
                               DataType data_type) {
  std::stringstream ss;
  switch (data_type) {
    case DataType::VECTOR_FP32: {
      const float *data = reinterpret_cast<const float *>(vector);

      for (size_t i = 0; i < dim; ++i) {
        ss << data[i] << " ";
      }
    } break;
    case DataType::VECTOR_FP16: {
      const zvec::float16_t *data =
          reinterpret_cast<const zvec::float16_t *>(vector);
      for (size_t i = 0; i < dim; ++i) {
        ss << data[i] << " ";
      }
    } break;
    default:
      LOG_ERROR("Unsupported data type: %d", static_cast<int>(data_type));
      break;
  }
  return ss.str();
}

TEST(VectorColumnIndexerTest, General) {
  auto func = [&](const IndexParams::Ptr index_params,
                  const QueryParams::Ptr query_params) {
    const std::string index_file_path = "test_indexer.index";
    constexpr idx_t kDocId = 2345;

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // 1. create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 4, false, index_params));
    ASSERT_TRUE(indexer);

    // 2. open
    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    {
      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,
      // which will be destroyed immediately
      auto vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};

      // 3. add data
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{vector.data()}};
      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());
    }

    {
      auto vector = std::vector<float>{1.0f, 2000.0f, 3.0f, 0};
      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }

    {  // add_with_id() won't check duplication, overwrite last one
      auto vector = std::vector<float>{1.0f, 0, 3.0f, 0};
      // 1 * 1 + 2 * 0 + 3 * 3 = 10
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }

    // 5. fetch
    auto fetched_data = indexer->Fetch(kDocId);
    ASSERT_TRUE(fetched_data);
    const float *dense_vector = reinterpret_cast<const float *>(
        std::get<vector_column_params::DenseVectorBuffer>(
            fetched_data->vector_buffer)
            .data.data());
    ASSERT_NEAR(dense_vector[0], 1.0, 0.1);
    ASSERT_NEAR(dense_vector[1], 2.0, 0.1);
    ASSERT_NEAR(dense_vector[2], 3.0, 0.1);
    ASSERT_NEAR(dense_vector[3], 0, 0.1);

    // 4. search
    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi
    auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};
    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = query_vector.data()}};
    auto indexer_query_params =
        vector_column_params::QueryParams{.topk = 10,
                                          .filter = nullptr,
                                          .fetch_vector = true,
                                          .query_params = query_params};
    auto results = indexer->Search(query, indexer_query_params);
    ASSERT_TRUE(results.has_value());

    auto vector_results =
        dynamic_cast<VectorIndexResults *>(results.value().get());
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 2);

    {
      int count = 0;
      auto iter = vector_results->create_iterator();
      while (iter->valid()) {
        count++;
        iter->next();
      }
      ASSERT_EQ(count, 2);
    }

    {  // top1 doc
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId);
      if (iter->score() > 14) {
        ASSERT_NEAR(iter->score(), 14.0, 0.1);
      }

      // top2
      iter->next();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId + 10);
      ASSERT_NEAR(iter->score(), 10.0, 0.1);
    }

    auto vector_index_params =
        reinterpret_cast<VectorIndexParams *>(index_params.get());
    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
      ASSERT_TRUE(vector_results->docs().size() == 2);
      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);
      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::IP),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),
       std::make_shared<HnswQueryParams>(300));
  func(std::make_shared<IVFIndexParams>(MetricType::IP),
       std::make_shared<IVFQueryParams>(10));

  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                         QuantizeType::FP16),
       std::make_shared<HnswQueryParams>(300));
  func(std::make_shared<IVFIndexParams>(MetricType::IP, 1024, 10, false,
                                        QuantizeType::FP16),
       std::make_shared<IVFQueryParams>(10));

  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::INT8),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                         QuantizeType::INT8),
       std::make_shared<HnswQueryParams>(300));
  func(std::make_shared<IVFIndexParams>(MetricType::IP, 1024, 10, false,
                                        QuantizeType::INT8),
       std::make_shared<IVFQueryParams>(10));

  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::INT4),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                         QuantizeType::INT4),
       std::make_shared<HnswQueryParams>(300));
}

TEST(VectorColumnIndexerTest, DenseDataTypeFP16) {
  auto func = [&](const IndexParams::Ptr index_params,
                  const QueryParams::Ptr query_params) {
    const std::string index_file_path = "test_indexer.index";
    constexpr idx_t kDocId = 2345;
    constexpr int dimension = 4;

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // 1. create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path, FieldSchema("test", DataType::VECTOR_FP16, dimension,
                                     false, index_params));
    ASSERT_TRUE(indexer);

    // 2. open
    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    {
      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,
      // which will be destroyed immediately
      auto origin_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};
      std::vector<uint16_t> buffer(dimension);
      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,
                                  buffer.data());
      auto vector = buffer;

      // 3. add data
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{vector.data()}};
      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());
    }

    {
      auto origin_vector = std::vector<float>{1.0f, 2000.0f, 3.0f, 0};
      std::vector<uint16_t> buffer(dimension);
      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,
                                  buffer.data());
      auto vector = buffer;
      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }

    {  // add_with_id() won't check duplication, overwrite last one
      auto origin_vector = std::vector<float>{1.0f, 0, 3.0f, 0};
      std::vector<uint16_t> buffer(dimension);
      ailego::FloatHelper::ToFP16((float *)origin_vector.data(), dimension,
                                  buffer.data());
      auto vector = buffer;
      // 1 * 1 + 2 * 0 + 3 * 3 = 10
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }
    // 5. fetch
    {
      auto fetched_data = indexer->Fetch(kDocId);
      ASSERT_TRUE(fetched_data);
      const uint16_t *dense_vector = reinterpret_cast<const uint16_t *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[0]), 1.0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[1]), 2.0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[2]), 3.0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[3]), 0, 0.1);
    }
    {
      auto fetched_data = indexer->Fetch(kDocId + 10);
      ASSERT_TRUE(fetched_data);
      const uint16_t *dense_vector = reinterpret_cast<const uint16_t *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[0]), 1.0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[1]), 0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[2]), 3.0, 0.1);
      ASSERT_NEAR(ailego::FloatHelper::ToFP32(dense_vector[3]), 0, 0.1);
    }

    // 4. search
    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi
    auto origin_query_vector = std::vector<float>{1.0f, 2.0f, 3.0f, 0};
    std::vector<uint16_t> buffer(dimension);
    ailego::FloatHelper::ToFP16((float *)origin_query_vector.data(), dimension,
                                buffer.data());
    auto query_vector = buffer;
    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = query_vector.data()}};
    auto indexer_query_params =
        vector_column_params::QueryParams{.topk = 10,
                                          .filter = nullptr,
                                          .fetch_vector = true,
                                          .query_params = query_params};
    auto results = indexer->Search(query, indexer_query_params);
    ASSERT_TRUE(results.has_value());

    auto vector_results =
        dynamic_cast<VectorIndexResults *>(results.value().get());
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 2);

    {
      int count = 0;
      auto iter = vector_results->create_iterator();
      while (iter->valid()) {
        count++;
        iter->next();
      }
      ASSERT_EQ(count, 2);
    }

    {  // top1 doc
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId);
      if (iter->score() > 14) {
        ASSERT_NEAR(iter->score(), 14.0, 0.1);
      }

      // top2
      iter->next();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId + 10);
      ASSERT_NEAR(iter->score(), 10.0, 0.1);
    }

    auto vector_index_params =
        reinterpret_cast<VectorIndexParams *>(index_params.get());
    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
      ASSERT_TRUE(vector_results->docs().size() == 2);
      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);
      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::IP),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),
       std::make_shared<HnswQueryParams>(300));
}

TEST(VectorColumnIndexerTest, DenseDataTypeINT8) {
  auto func = [&](const IndexParams::Ptr index_params,
                  const QueryParams::Ptr query_params) {
    const std::string index_file_path = "test_indexer.index";
    constexpr idx_t kDocId = 2345;
    constexpr int dimension = 4;

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // 1. create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path, FieldSchema("test", DataType::VECTOR_INT8, dimension,
                                     false, index_params));
    ASSERT_TRUE(indexer);

    // 2. open
    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    {
      // can't use `DenseVector{std::vector<float>{1.0f, 2.0f, 3.0f}.data()}}`,
      // which will be destroyed immediately
      auto vector = std::vector<uint8_t>{1, 2, 3, 0};

      // 3. add data
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{vector.data()}};
      ASSERT_TRUE(indexer->Insert(data, kDocId).ok());
    }

    {
      auto vector = std::vector<uint8_t>{1, 200, 3, 0};
      // 1 * 1 + 2 * 2000 + 3 * 3 = 12006
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }

    {  // add_with_id() won't check duplication, overwrite last one
      auto vector = std::vector<uint8_t>{1, 0, 3, 0};
      // 1 * 1 + 2 * 0 + 3 * 3 = 10
      ASSERT_TRUE(indexer
                      ->Insert(
                          vector_column_params::VectorData{
                              vector_column_params::DenseVector{vector.data()}},
                          kDocId + 10)
                      .ok());
    }
    // 5. fetch
    {
      auto fetched_data = indexer->Fetch(kDocId);
      ASSERT_TRUE(fetched_data);
      const uint8_t *dense_vector = reinterpret_cast<const uint8_t *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(dense_vector[0], 1.0, 0.1);
      ASSERT_NEAR(dense_vector[1], 2.0, 0.1);
      ASSERT_NEAR(dense_vector[2], 3.0, 0.1);
      ASSERT_NEAR(dense_vector[3], 0, 0.1);
    }
    {
      auto fetched_data = indexer->Fetch(kDocId + 10);
      ASSERT_TRUE(fetched_data);
      const uint8_t *dense_vector = reinterpret_cast<const uint8_t *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(dense_vector[0], 1.0, 0.1);
      ASSERT_NEAR(dense_vector[1], 0, 0.1);
      ASSERT_NEAR(dense_vector[2], 3.0, 0.1);
      ASSERT_NEAR(dense_vector[3], 0, 0.1);
    }

    // 4. search
    // https://stackoverflow.com/questions/69009389/how-to-get-away-with-using-designated-initializers-in-c17-or-why-is-it-seemi
    auto query_vector = std::vector<uint8_t>{1, 2, 3, 0};
    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = query_vector.data()}};
    auto indexer_query_params =
        vector_column_params::QueryParams{.topk = 10,
                                          .filter = nullptr,
                                          .fetch_vector = true,
                                          .query_params = query_params};
    auto results = indexer->Search(query, indexer_query_params);
    ASSERT_TRUE(results.has_value());

    auto vector_results =
        dynamic_cast<VectorIndexResults *>(results.value().get());
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 2);

    {
      int count = 0;
      auto iter = vector_results->create_iterator();
      while (iter->valid()) {
        count++;
        iter->next();
      }
      ASSERT_EQ(count, 2);
    }

    {  // top1 doc
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId);
      if (iter->score() > 14) {
        ASSERT_NEAR(iter->score(), 14.0, 0.1);
      }

      // top2
      iter->next();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId + 10);
      ASSERT_NEAR(iter->score(), 10.0, 0.1);
    }

    auto vector_index_params =
        reinterpret_cast<VectorIndexParams *>(index_params.get());
    if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
      ASSERT_TRUE(vector_results->docs().size() == 2);
      ASSERT_TRUE(vector_results->reverted_vector_list().size() == 2);
      ASSERT_TRUE(vector_results->reverted_sparse_values_list().empty());
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::IP),
       std::make_shared<QueryParams>(IndexType::FLAT));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100),
       std::make_shared<HnswQueryParams>(300));
}


TEST(VectorColumnIndexerTest, SparseGeneral) {
  constexpr uint32_t kSparseCount = 3;
  auto func = [&](const IndexParams::Ptr index_params) {
    const std::string index_file_path = "test_indexer.index";
    constexpr idx_t kDocId = 2345;

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::SPARSE_VECTOR_FP32, false, index_params));
    ASSERT_TRUE(indexer);

    // open
    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});
        !ret.ok()) {
      std::cout << ret.message() << std::endl;
      ASSERT_TRUE(false);
    }

    std::vector<uint32_t> indices(kSparseCount);
    std::vector<float> values(kSparseCount);
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      indices[i] = i;
      values[i] = i;
    }
    vector_column_params::SparseVector vector{kSparseCount, indices.data(),
                                              values.data()};
    ASSERT_TRUE(
        indexer->Insert(vector_column_params::VectorData{vector}, kDocId).ok());

    // fetch
    auto fetched_data = indexer->Fetch(kDocId);
    ASSERT_TRUE(fetched_data.has_value());
    auto fetched_sparse_vector =
        std::get<vector_column_params::SparseVectorBuffer>(
            fetched_data.value().vector_buffer);
    auto fetched_indices = reinterpret_cast<const uint32_t *>(
        fetched_sparse_vector.indices.data());
    auto fetched_values =
        reinterpret_cast<const float *>(fetched_sparse_vector.values.data());
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      ASSERT_EQ(i, fetched_indices[i]);
      ASSERT_FLOAT_EQ(i, fetched_values[i]);
    }

    // search
    auto query =
        vector_column_params::VectorData{vector_column_params::SparseVector{
            kSparseCount, indices.data(), values.data()}};
    auto query_params = vector_column_params::QueryParams{
        .topk = 10, .filter = nullptr, .fetch_vector = true};
    auto results = indexer->Search(query, query_params);
    ASSERT_TRUE(results.has_value());

    auto vector_results =
        dynamic_cast<VectorIndexResults *>(results.value().get());
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 1);

    {
      int count = 0;
      auto iter = vector_results->create_iterator();
      while (iter->valid()) {
        count++;
        iter->next();
      }
      ASSERT_EQ(count, 1);
    }

    {
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId);
      ASSERT_FLOAT_EQ(iter->score(), 5.0);

      auto vector = iter->vector();
      auto sparse_vector =
          std::get<vector_column_params::SparseVector>(vector.vector);
      auto indices = reinterpret_cast<const uint32_t *>(sparse_vector.indices);
      auto values = reinterpret_cast<const float *>(sparse_vector.values);
      ASSERT_EQ(sparse_vector.count, kSparseCount);
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, indices[i]);
        ASSERT_FLOAT_EQ(i, values[i]);
      }
      auto vector_index_params =
          reinterpret_cast<VectorIndexParams *>(index_params.get());
      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
        ASSERT_TRUE(vector_results->docs().size() == 1);
        ASSERT_TRUE(vector_results->reverted_sparse_values_list().size() == 1);
        ASSERT_TRUE(vector_results->reverted_vector_list().empty());
      }
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::IP));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));
  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                         QuantizeType::FP16));
}

TEST(VectorColumnIndexerTest, SparseDataTypeFP16) {
  constexpr uint32_t kSparseCount = 3;
  auto func = [&](const IndexParams::Ptr index_params) {
    const std::string index_file_path = "test_indexer.index";
    constexpr idx_t kDocId = 2345;

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::SPARSE_VECTOR_FP16, false, index_params));
    ASSERT_TRUE(indexer);

    // open
    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});
        !ret.ok()) {
      std::cout << ret.message() << std::endl;
      ASSERT_TRUE(false);
    }

    std::vector<uint32_t> indices(kSparseCount);
    std::vector<float> origin_values(kSparseCount);
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      indices[i] = i;
      origin_values[i] = i;
    }
    std::vector<uint16_t> buffer1(kSparseCount);
    ailego::FloatHelper::ToFP16((float *)origin_values.data(), kSparseCount,
                                buffer1.data());
    auto values = buffer1;
    vector_column_params::SparseVector vector{kSparseCount, indices.data(),
                                              values.data()};
    ASSERT_TRUE(
        indexer->Insert(vector_column_params::VectorData{vector}, kDocId).ok());

    // fetch
    auto fetched_data = indexer->Fetch(kDocId);
    ASSERT_TRUE(fetched_data.has_value());
    auto fetched_sparse_vector =
        std::get<vector_column_params::SparseVectorBuffer>(
            fetched_data.value().vector_buffer);
    auto fetched_indices = reinterpret_cast<const uint32_t *>(
        fetched_sparse_vector.indices.data());
    auto fetched_values =
        reinterpret_cast<const uint16_t *>(fetched_sparse_vector.values.data());
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      ASSERT_EQ(i, fetched_indices[i]);
      ASSERT_FLOAT_EQ(i, ailego::FloatHelper::ToFP32(fetched_values[i]));
    }

    // search
    auto query =
        vector_column_params::VectorData{vector_column_params::SparseVector{
            kSparseCount, indices.data(), values.data()}};
    auto query_params = vector_column_params::QueryParams{
        .topk = 10, .filter = nullptr, .fetch_vector = true};
    auto results = indexer->Search(query, query_params);
    ASSERT_TRUE(results.has_value());

    auto vector_results =
        dynamic_cast<VectorIndexResults *>(results.value().get());
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 1);

    {
      int count = 0;
      auto iter = vector_results->create_iterator();
      while (iter->valid()) {
        count++;
        iter->next();
      }
      ASSERT_EQ(count, 1);
    }

    {
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), kDocId);
      ASSERT_FLOAT_EQ(iter->score(), 5.0);

      auto vector = iter->vector();
      auto sparse_vector =
          std::get<vector_column_params::SparseVector>(vector.vector);
      auto indices = reinterpret_cast<const uint32_t *>(sparse_vector.indices);
      auto values = reinterpret_cast<const uint16_t *>(sparse_vector.values);
      ASSERT_EQ(sparse_vector.count, kSparseCount);
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, indices[i]);
        ASSERT_FLOAT_EQ(i, ailego::FloatHelper::ToFP32(values[i]));
      }
      auto vector_index_params =
          reinterpret_cast<VectorIndexParams *>(index_params.get());
      if (vector_index_params->quantize_type() != QuantizeType::UNDEFINED) {
        ASSERT_TRUE(vector_results->docs().size() == 1);
        ASSERT_TRUE(vector_results->reverted_sparse_values_list().size() == 1);
        ASSERT_TRUE(vector_results->reverted_vector_list().empty());
      }
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::IP));
  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));
}

TEST(VectorColumnIndexerTest, Merge) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test_indexer.index"};

  auto del_index_file_func = [&](const std::string file_name) {
    auto cmd_buf = "rm -f " + file_name;
    system(cmd_buf.c_str());
  };

  auto create_indexer_func =
      [&](const IndexParams::Ptr &index_params,
          const std::string &index_name) -> VectorColumnIndexer::Ptr {
    del_index_file_func(index_name);
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_name, FieldSchema("test", DataType::VECTOR_FP32, kDimension,
                                false, index_params));
    if (indexer == nullptr ||
        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {
      return nullptr;
    }
    return indexer;
  };

  auto func = [&](const IndexParams::Ptr &param1,
                  const IndexParams::Ptr &param2,
                  const IndexParams::Ptr &param3) {
    auto indexer1 = create_indexer_func(param1, index_name + "1");
    ASSERT_NE(nullptr, indexer1);
    auto indexer2 = create_indexer_func(param2, index_name + "2");
    ASSERT_NE(nullptr, indexer2);

    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 123.0f;
    auto vector_data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());

    vector[1] = 2.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());
    vector[1] = 3.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());

    {
      auto fetched_data = indexer1->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(1.0f, fetched_vector[1], 0.1);
      ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
    }
    {
      auto fetched_data = indexer2->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);
      ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
    }
    {
      auto fetched_data = indexer2->Fetch(1);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      ASSERT_NEAR(3.0f, fetched_vector[1], 0.1);
      ASSERT_FLOAT_EQ(123.0f, fetched_vector[2]);
    }

    {  // test reduce
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        const float *fetched_vector = reinterpret_cast<const float *>(
            std::get<vector_column_params::DenseVectorBuffer>(
                fetched_data->vector_buffer)
                .data.data());
        ASSERT_NEAR(1.0f, fetched_vector[1], 0.1);
        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
      }
      {
        auto fetched_data = indexer3->Fetch(1);
        ASSERT_TRUE(fetched_data.has_value());
        const float *fetched_vector = reinterpret_cast<const float *>(
            std::get<vector_column_params::DenseVectorBuffer>(
                fetched_data->vector_buffer)
                .data.data());
        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);
        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
      }
      indexer3->Close();
      del_index_file_func(index_name + "3");
    }

    {  // test reduce with filter
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      auto filter = std::make_shared<EasyIndexFilter>(
          [](uint64_t key) { return key == 0; });
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());
      // 0.0 -> x ; 1.0 -> 0 ; 1.1 -> 1
      ASSERT_TRUE(indexer3->doc_count() == 2);
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        const float *fetched_vector = reinterpret_cast<const float *>(
            std::get<vector_column_params::DenseVectorBuffer>(
                fetched_data->vector_buffer)
                .data.data());
        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);
        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
      }

      {
        // search with fetch vector
        auto query = vector_column_params::VectorData{
            vector_column_params::DenseVector{vector.data()}};
        auto query_params = vector_column_params::QueryParams{
            .topk = 10, .filter = nullptr, .fetch_vector = true};
        auto results = indexer2->Search(query, query_params);
        ASSERT_TRUE(results.has_value());
        auto vector_results =
            dynamic_cast<VectorIndexResults *>(results.value().get());
        ASSERT_TRUE(vector_results);
        ASSERT_EQ(vector_results->count(), 2);
        auto iter = vector_results->create_iterator();
        ASSERT_TRUE(iter->valid());

        {
          ASSERT_TRUE(iter->valid());
          auto doc_id = iter->doc_id();
          LOG_DEBUG("topk1 pk: %zu", (size_t)doc_id);
          LOG_DEBUG("topk1 score: %.10f", iter->score());

          LOG_DEBUG(
              "topk1 fetched_vector:%s",
              print_dense_vector(std::get<vector_column_params::DenseVector>(
                                     iter->vector().vector)
                                     .data,
                                 3, DataType::VECTOR_FP32)
                  .c_str());
          {
            auto fetched_vector = vector_results->docs()[0].vector();

            LOG_DEBUG(
                "topk1 fetched_vector - original:%s",
                print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP16)
                    .c_str());
          }
          if (!vector_results->reverted_vector_list().empty()) {
            auto fetched_vector =
                vector_results->reverted_vector_list()[0].data();

            LOG_DEBUG(
                "topk1 fetched_vector - reverted:%s",
                print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32)
                    .c_str());
          }
          // ASSERT_TRUE(iter->score() < 2.01);
          // ASSERT_TRUE(iter->score() > -0.01);
        }
      }

      indexer3->Close();
      del_index_file_func(index_name + "3");
    }

    {  // test reduce with filter in parallel
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      auto filter = std::make_shared<EasyIndexFilter>(
          [](uint64_t key) { return key == 0; });
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());

      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        const float *fetched_vector = reinterpret_cast<const float *>(
            std::get<vector_column_params::DenseVectorBuffer>(
                fetched_data->vector_buffer)
                .data.data());
        ASSERT_NEAR(2.0f, fetched_vector[1], 0.1);
        ASSERT_NEAR(123.0f, fetched_vector[2], 0.1);
      }
      indexer3->Close();
      del_index_file_func(index_name + "3");
    }


    indexer1->Close();
    indexer2->Close();
    del_index_file_func(index_name + "1");
    del_index_file_func(index_name + "2");
  };

  // same index with different quantize type
  auto test_different_quantize_type = [&](MetricType metric_type,
                                          QuantizeType quantize_type) {
    LOG_INFO(
        "Merge test_different_quantize_type(): with metric type %s and "
        "quantize type %s",
        MetricTypeCodeBook::AsString(metric_type).c_str(),
        QuantizeTypeCodeBook::AsString(quantize_type).c_str());

    auto param_flat = std::make_shared<FlatIndexParams>(metric_type);
    auto param_flat_fp16 =
        std::make_shared<FlatIndexParams>(metric_type, quantize_type);
    auto param_hnsw = std::make_shared<HnswIndexParams>(metric_type, 10, 100);
    auto param_hnsw_fp16 =
        std::make_shared<HnswIndexParams>(metric_type, 10, 100, quantize_type);

    func(param_flat, param_flat, param_hnsw_fp16);

    std::vector<IndexParams::Ptr> fp32_params = {param_flat, param_hnsw};
    std::vector<IndexParams::Ptr> fp16_params = {param_flat_fp16,
                                                 param_hnsw_fp16};
    // can't mix
    for (auto param_target : fp32_params) {
      func(param_flat_fp16, param_hnsw_fp16, param_target);
      // for (auto param1 : fp16_params) {
      //   for (auto param2 : fp16_params) {
      //     func(param1, param2, param_target);
      //   }
      // }
      func(param_hnsw, param_flat, param_target);
      // for (auto param1 : fp32_params) {
      //   for (auto param2 : fp32_params) {
      //     func(param1, param2, param_target);
      //   }
      // }
    }

    for (auto param_target : fp16_params) {
      func(param_flat_fp16, param_hnsw_fp16, param_target);
      // for (auto param1 : fp16_params) {
      //   for (auto param2 : fp16_params) {
      //     func(param1, param2, param_target);
      //   }
      // }
      func(param_hnsw, param_flat, param_target);
      // for (auto param1 : fp32_params) {
      //   for (auto param2 : fp32_params) {
      //     func(param1, param2, param_target);
      //   }
      // }
    }
  };
  test_different_quantize_type(MetricType::L2, QuantizeType::UNDEFINED);
  test_different_quantize_type(MetricType::L2, QuantizeType::FP16);
  test_different_quantize_type(MetricType::IP, QuantizeType::FP16);
  test_different_quantize_type(MetricType::L2, QuantizeType::INT8);
  // test_different_quantize_type(MetricType::IP, QuantizeType::INT8);
  // The quantization error is toooooo large for INT4 =_=
  // test_different_quantize_type(MetricType::L2, QuantizeType::INT4);
  // test_different_quantize_type(MetricType::IP, QuantizeType::INT4);
  // test_different_quantize_type(MetricType::COSINE);
}

TEST(VectorColumnIndexerTest, SparseMerge) {
  constexpr uint32_t kSparseCount = 3;
  constexpr uint32_t kUnitSize = sizeof(float);  // VECTOR_FP32
  const std::string index_name{"test_indexer.index"};

  auto del_index_file_func = [&](const std::string file_name) {
    auto cmd_buf = "rm -f " + file_name;
    system(cmd_buf.c_str());
  };

  auto create_indexer_func =
      [&](const IndexParams::Ptr &index_params,
          const std::string &index_name) -> VectorColumnIndexer::Ptr {
    del_index_file_func(index_name);
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_name,
        FieldSchema("test", DataType::SPARSE_VECTOR_FP32, false, index_params));
    if (indexer == nullptr ||
        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {
      return nullptr;
    }
    return indexer;
  };

  auto func = [&](const IndexParams::Ptr &param1,
                  const IndexParams::Ptr &param2,
                  const IndexParams::Ptr &param3) {
    auto indexer1 = create_indexer_func(param1, index_name + "1");
    ASSERT_NE(nullptr, indexer1);
    auto indexer2 = create_indexer_func(param2, index_name + "2");
    ASSERT_NE(nullptr, indexer2);

    std::vector<uint32_t> indices(kSparseCount);
    std::vector<float> values(kSparseCount);
    for (uint32_t i = 0; i < kSparseCount; ++i) {
      indices[i] = i;
      values[i] = (float)i;
    }
    vector_column_params::SparseVector vector{kSparseCount, indices.data(),
                                              values.data()};
    auto vector_data = vector_column_params::VectorData{vector};
    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());

    values[1] = 2.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());
    values[1] = 3.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());

    {
      auto fetched_data = indexer1->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      auto fetched_sparse_vector =
          std::get<vector_column_params::SparseVectorBuffer>(
              fetched_data->vector_buffer);
      ASSERT_EQ(kSparseCount,
                fetched_sparse_vector.indices.size() / sizeof(uint32_t));
      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);

      auto fetched_indices = reinterpret_cast<const uint32_t *>(
          fetched_sparse_vector.indices.data());
      auto fetched_values =
          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, fetched_indices[i]);
      }
      ASSERT_EQ(0.0f, fetched_values[0]);
      ASSERT_EQ(1.0f, fetched_values[1]);
      ASSERT_EQ(2.0f, fetched_values[2]);
    }
    {
      auto fetched_data = indexer2->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      auto fetched_sparse_vector =
          std::get<vector_column_params::SparseVectorBuffer>(
              fetched_data->vector_buffer);
      ASSERT_EQ(kSparseCount,
                fetched_sparse_vector.indices.size() / sizeof(uint32_t));
      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);

      auto fetched_indices = reinterpret_cast<const uint32_t *>(
          fetched_sparse_vector.indices.data());
      auto fetched_values =
          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, fetched_indices[i]);
      }
      ASSERT_EQ(0.0f, fetched_values[0]);
      ASSERT_EQ(2.0f, fetched_values[1]);
      ASSERT_EQ(2.0f, fetched_values[2]);
    }
    {
      auto fetched_data = indexer2->Fetch(1);
      ASSERT_TRUE(fetched_data.has_value());
      auto fetched_sparse_vector =
          std::get<vector_column_params::SparseVectorBuffer>(
              fetched_data->vector_buffer);
      ASSERT_EQ(kSparseCount,
                fetched_sparse_vector.indices.size() / sizeof(uint32_t));
      ASSERT_EQ(kSparseCount, fetched_sparse_vector.values.size() / kUnitSize);

      auto fetched_indices = reinterpret_cast<const uint32_t *>(
          fetched_sparse_vector.indices.data());
      auto fetched_values =
          reinterpret_cast<const float *>(fetched_sparse_vector.values.data());
      for (uint32_t i = 0; i < kSparseCount; ++i) {
        ASSERT_EQ(i, fetched_indices[i]);
      }
      ASSERT_EQ(0.0f, fetched_values[0]);
      ASSERT_EQ(3.0f, fetched_values[1]);
      ASSERT_EQ(2.0f, fetched_values[2]);
    }

    {  // test reduce
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        auto fetched_sparse_vector =
            std::get<vector_column_params::SparseVectorBuffer>(
                fetched_data->vector_buffer);
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.values.size() / kUnitSize);
        auto fetched_indices = reinterpret_cast<const uint32_t *>(
            fetched_sparse_vector.indices.data());
        auto fetched_values = reinterpret_cast<const float *>(
            fetched_sparse_vector.values.data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, fetched_indices[i]);
        }
        ASSERT_EQ(0.0f, fetched_values[0]);
        ASSERT_EQ(1.0f, fetched_values[1]);
        ASSERT_EQ(2.0f, fetched_values[2]);
      }
      {
        auto fetched_data = indexer3->Fetch(1);
        ASSERT_TRUE(fetched_data.has_value());
        auto fetched_sparse_vector =
            std::get<vector_column_params::SparseVectorBuffer>(
                fetched_data->vector_buffer);
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.values.size() / kUnitSize);
        auto fetched_indices = reinterpret_cast<const uint32_t *>(
            fetched_sparse_vector.indices.data());
        auto fetched_values = reinterpret_cast<const float *>(
            fetched_sparse_vector.values.data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, fetched_indices[i]);
        }
        ASSERT_EQ(0.0f, fetched_values[0]);
        ASSERT_EQ(2.0f, fetched_values[1]);
        ASSERT_EQ(2.0f, fetched_values[2]);
      }
      indexer3->Close();
      del_index_file_func(index_name + "3");
    }

    {  // test reduce with filter
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      auto filter = std::make_shared<EasyIndexFilter>(
          [](uint64_t key) { return key == 0; });
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        auto fetched_sparse_vector =
            std::get<vector_column_params::SparseVectorBuffer>(
                fetched_data->vector_buffer);
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.values.size() / kUnitSize);
        auto fetched_indices = reinterpret_cast<const uint32_t *>(
            fetched_sparse_vector.indices.data());
        auto fetched_values = reinterpret_cast<const float *>(
            fetched_sparse_vector.values.data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, fetched_indices[i]);
        }
        ASSERT_EQ(0.0f, fetched_values[0]);
        ASSERT_EQ(2.0f, fetched_values[1]);
        ASSERT_EQ(2.0f, fetched_values[2]);
      }
      indexer3->Close();
      del_index_file_func(index_name + "3");
    }

    {  // test reduce with filter in parallel
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      auto filter = std::make_shared<EasyIndexFilter>(
          [](uint64_t key) { return key == 0; });
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        auto fetched_sparse_vector =
            std::get<vector_column_params::SparseVectorBuffer>(
                fetched_data->vector_buffer);
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.indices.size() / sizeof(uint32_t));
        ASSERT_EQ(kSparseCount,
                  fetched_sparse_vector.values.size() / kUnitSize);
        auto fetched_indices = reinterpret_cast<const uint32_t *>(
            fetched_sparse_vector.indices.data());
        auto fetched_values = reinterpret_cast<const float *>(
            fetched_sparse_vector.values.data());
        for (uint32_t i = 0; i < kSparseCount; ++i) {
          ASSERT_EQ(i, fetched_indices[i]);
        }
        ASSERT_EQ(0.0f, fetched_values[0]);
        ASSERT_EQ(2.0f, fetched_values[1]);
        ASSERT_EQ(2.0f, fetched_values[2]);
      }
      indexer3->Close();
      del_index_file_func(index_name + "3");
    }


    indexer1->Close();
    indexer2->Close();
    del_index_file_func(index_name + "1");
    del_index_file_func(index_name + "2");
  };


  //===============================================
  // Fp32
  //===============================================
  {
    auto param_flat = std::make_shared<FlatIndexParams>(MetricType::IP);
    auto param_hnsw =
        std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100);
    LOG_INFO("SparseMerge: param_flat, param_flat, param_flat");
    func(param_flat, param_flat, param_flat);

    LOG_INFO("SparseMerge: param_hnsw, param_hnsw, param_hnsw");
    func(param_hnsw, param_hnsw, param_hnsw);

    LOG_INFO("SparseMerge: param_flat, param_hnsw, param_hnsw");
    func(param_flat, param_hnsw, param_hnsw);

    LOG_INFO("SparseMerge: param_hnsw, param_flat, param_flat");
    func(param_hnsw, param_flat, param_flat);
    LOG_INFO("SparseMerge: param_flat, param_hnsw, param_flat");
    func(param_flat, param_hnsw, param_flat);

    LOG_INFO("SparseMerge: param_hnsw, param_flat, param_hnsw");
    func(param_hnsw, param_flat, param_hnsw);
  }

  //===============================================
  // Fp16 fp32
  //===============================================
  {
    auto param_flat = std::make_shared<FlatIndexParams>(MetricType::IP);
    auto param_hnsw = std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                                        QuantizeType::FP16);
    LOG_INFO("SparseMerge - fp16: param_flat, param_flat -> param_flat");
    func(param_flat, param_flat, param_flat);

    LOG_INFO("SparseMerge - fp16: param_hnsw, param_hnsw -> param_hnsw");
    func(param_hnsw, param_hnsw, param_hnsw);

    LOG_INFO("SparseMerge - fp16: param_hnsw, param_hnsw -> param_flat");
    func(param_hnsw, param_hnsw, param_flat);

    LOG_INFO("SparseMerge - fp16: param_flat, param_flat -> param_hnsw");
    func(param_flat, param_flat, param_hnsw);
  }
}


TEST(VectorColumnIndexerTest, BfPks) {
  auto func = [&](const IndexParams::Ptr index_params) {
    const std::string index_file_path = "test_indexer.index";

    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
    system(cmd_buf);

    // 1. create indexer
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false, index_params));
    ASSERT_TRUE(indexer);

    // 2. open
    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    auto vector1 = std::vector<float>{1.0f, 2.0f, 3.0f};
    auto vector2 = std::vector<float>{4.0f, 5.0f, 6.0f};

    // 3. add data
    auto data1 = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector1.data()}};
    ASSERT_TRUE(indexer->Insert(data1, 1).ok());

    auto data2 = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector2.data()}};
    ASSERT_TRUE(indexer->Insert(data2, 2).ok());

    {
      auto bf_pks = std::vector<uint64_t>{1};
      auto query =
          vector_column_params::VectorData{vector_column_params::DenseVector{
              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};
      auto query_params =
          vector_column_params::QueryParams{.topk = 10,
                                            .filter = nullptr,
                                            .fetch_vector = true,
                                            .bf_pks = {bf_pks}};
      auto results = indexer->Search(query, query_params);
      ASSERT_TRUE(results.has_value());

      auto vector_results =
          dynamic_cast<VectorIndexResults *>(results.value().get());
      ASSERT_TRUE(vector_results);
      ASSERT_EQ(vector_results->count(), 1);
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), 1);
      auto fetched_vector =
          std::get<vector_column_params::DenseVector>(iter->vector().vector);
      const float *fetched_vector_data =
          reinterpret_cast<const float *>(fetched_vector.data);
      for (int i = 0; i < 3; ++i) {
        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector1[i]);
      }
    }

    {
      auto bf_pks = std::vector<uint64_t>{1, 2};
      auto query =
          vector_column_params::VectorData{vector_column_params::DenseVector{
              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};
      auto query_params =
          vector_column_params::QueryParams{.topk = 10,
                                            .filter = nullptr,
                                            .fetch_vector = true,
                                            .bf_pks = {bf_pks}};
      auto results = indexer->Search(query, query_params);
      ASSERT_TRUE(results.has_value());

      auto vector_results =
          dynamic_cast<VectorIndexResults *>(results.value().get());
      ASSERT_TRUE(vector_results);
      ASSERT_EQ(vector_results->count(), 2);
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), 1);
      auto fetched_vector =
          std::get<vector_column_params::DenseVector>(iter->vector().vector);
      const float *fetched_vector_data =
          reinterpret_cast<const float *>(fetched_vector.data);
      for (int i = 0; i < 3; ++i) {
        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector1[i]);
      }
    }

    {
      auto bf_pks = std::vector<uint64_t>{2};
      auto query =
          vector_column_params::VectorData{vector_column_params::DenseVector{
              .data = std::vector<float>{1.0f, 2.0f, 3.0f}.data()}};
      auto query_params =
          vector_column_params::QueryParams{.topk = 10,
                                            .filter = nullptr,
                                            .fetch_vector = true,
                                            .bf_pks = {bf_pks}};
      auto results = indexer->Search(query, query_params);
      ASSERT_TRUE(results.has_value());

      auto vector_results =
          dynamic_cast<VectorIndexResults *>(results.value().get());
      ASSERT_TRUE(vector_results);
      ASSERT_EQ(vector_results->count(), 1);
      auto iter = vector_results->create_iterator();
      ASSERT_TRUE(iter->valid());
      ASSERT_EQ(iter->doc_id(), 2);
      auto fetched_vector =
          std::get<vector_column_params::DenseVector>(iter->vector().vector);
      const float *fetched_vector_data =
          reinterpret_cast<const float *>(fetched_vector.data);
      for (int i = 0; i < 3; ++i) {
        ASSERT_FLOAT_EQ(fetched_vector_data[i], vector2[i]);
      }
    }

    indexer->Close();

    system(cmd_buf);
  };

  func(std::make_shared<FlatIndexParams>(MetricType::COSINE));
  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100));
}


using DenseVectorDataBuffer = vector_column_params::DenseVectorBuffer;
using SparseVectorBuffer = vector_column_params::SparseVectorBuffer;

DenseVectorDataBuffer create_dense_vector(int dim, DataType data_type, int pk,
                                          size_t count,
                                          float float_offset = 0.1f) {
  count += 1;
  switch (data_type) {
    case DataType::VECTOR_FP32: {
      std::string ret;
      ret.resize(dim * sizeof(float));
      float *data = reinterpret_cast<float *>(ret.data());
      for (int i = 0; i < dim; ++i) {
        data[i] = pk + i + float_offset;
      }
      return DenseVectorDataBuffer{std::move(ret)};
    }
    case DataType::VECTOR_FP16: {
      std::string ret;
      ret.resize(dim * sizeof(zvec::float16_t));
      zvec::float16_t *data = reinterpret_cast<zvec::float16_t *>(ret.data());
      for (int i = 0; i < dim; ++i) {
        data[i] = pk + i + float_offset;
      }
      return DenseVectorDataBuffer{std::move(ret)};
    }
    case DataType::VECTOR_INT8: {
      std::string ret;
      ret.resize(dim * sizeof(int8_t));
      int8_t *data = reinterpret_cast<int8_t *>(ret.data());
      for (int i = 0; i < dim; ++i) {
        data[i] = pk + i;
      }
      return DenseVectorDataBuffer{std::move(ret)};
    }
    case DataType::VECTOR_INT16: {
      std::string ret;
      ret.resize(dim * sizeof(int16_t));
      int16_t *data = reinterpret_cast<int16_t *>(ret.data());
      for (int i = 0; i < dim; ++i) {
        data[i] = pk + i;
      }
      return DenseVectorDataBuffer{std::move(ret)};
    }
    case DataType::VECTOR_BINARY32:
    case DataType::VECTOR_BINARY64: {
      std::string ret;
      ret.resize(dim / 8);
      uint8_t *data = reinterpret_cast<uint8_t *>(ret.data());
      for (int i = 0; i < dim; ++i) {
        data[i / 8] |= ((pk + i) % 2) << (i % 8);
      }
      return DenseVectorDataBuffer{std::move(ret)};
    }
    default:
      LOG_ERROR("Unsupported data type: %d", static_cast<int>(data_type));
      return DenseVectorDataBuffer{};
  }
}


SparseVectorBuffer create_sparse_vector(int dim, DataType data_type, int pk,
                                        float float_offset = 0.1f) {
  SparseVectorBuffer ret;
  switch (data_type) {
    case DataType::SPARSE_VECTOR_FP32: {
      std::vector<float> values(dim);
      for (int i = 0; i < dim; ++i) {
        values[i] = pk * 100 + i + float_offset;
      }
      ret.values = std::string(reinterpret_cast<char *>(values.data()),
                               values.size() * sizeof(float));
    } break;
    case DataType::SPARSE_VECTOR_FP16: {
      std::vector<zvec::float16_t> values(dim);
      for (int i = 0; i < dim; ++i) {
        values[i] = pk * 100 + i + float_offset;
      }
      ret.values = std::string(reinterpret_cast<char *>(values.data()),
                               values.size() * sizeof(zvec::float16_t));
    } break;
    default:
      LOG_ERROR("Unsupported data type: %d", static_cast<int>(data_type));
      return SparseVectorBuffer{};
  }
  std::vector<uint32_t> indices(dim);
  for (int i = 0; i < dim; ++i) {
    indices[i] = i;
  }
  ret.indices = std::string(reinterpret_cast<char *>(indices.data()),
                            indices.size() * sizeof(uint32_t));
  return ret;
}

bool compare_dense_vector(const DenseVectorDataBuffer &lhs, const void *rhs,
                          DataType data_type) {
  switch (data_type) {
    case DataType::VECTOR_FP32: {
      size_t dim = lhs.data.size() / sizeof(float);
      auto rhs_data = reinterpret_cast<const float *>(rhs);
      auto lhs_data = reinterpret_cast<const float *>(lhs.data.data());
      for (size_t i = 0; i < dim; ++i) {
        if (std::abs(lhs_data[i] - rhs_data[i]) > 1) {  // reformer
          LOG_ERROR("lhs_data[%zu] = %f, rhs_data[%zu] = %f", i,
                    (float)lhs_data[i], i, (float)rhs_data[i]);
          return false;
        }
      }
      return true;
    };
    case DataType::VECTOR_FP16: {
      size_t dim = lhs.data.size() / sizeof(zvec::float16_t);
      auto rhs_data = reinterpret_cast<const zvec::float16_t *>(rhs);
      auto lhs_data =
          reinterpret_cast<const zvec::float16_t *>(lhs.data.data());
      for (size_t i = 0; i < dim; ++i) {
        if (std::abs(lhs_data[i] - rhs_data[i]) > 1e-2) {  // reformer
          LOG_ERROR("lhs_data[%zu] = %f, rhs_data[%zu] = %f", i,
                    (float)lhs_data[i], i, (float)rhs_data[i]);
          return false;
        }
      }
      return true;
    }
    default:
      return memcmp(lhs.data.data(), rhs, lhs.data.size()) == 0;
  }
}


bool compare_sparse_vector(const SparseVectorBuffer &lhs,
                           const void *rhs_indices, const void *rhs_values,
                           DataType data_type) {
  if (memcmp(lhs.indices.data(), rhs_indices, lhs.indices.size()) != 0) {
    return false;
  }
  size_t dim = lhs.indices.size() / sizeof(uint32_t);
  switch (data_type) {
    case DataType::SPARSE_VECTOR_FP32: {
      auto rhs_values_data = reinterpret_cast<const float *>(rhs_values);
      auto lhs_values_data = reinterpret_cast<const float *>(lhs.values.data());
      for (size_t i = 0; i < dim; ++i) {
        if (std::abs(lhs_values_data[i] - rhs_values_data[i]) >
            1e-2) {  // reformer
          LOG_ERROR("lhs_values_data[%zu] = %f, rhs_values_data[%zu] = %f", i,
                    (float)lhs_values_data[i], i, (float)rhs_values_data[i]);
          return false;
        }
      }
      return true;
    }
    case DataType::SPARSE_VECTOR_FP16: {
      auto rhs_values_data =
          reinterpret_cast<const zvec::float16_t *>(rhs_values);
      auto lhs_values_data =
          reinterpret_cast<const zvec::float16_t *>(lhs.values.data());
      for (size_t i = 0; i < dim; ++i) {
        if (std::abs(lhs_values_data[i] - rhs_values_data[i]) >
            1e-2) {  // reformer
          LOG_ERROR("lhs_values_data[%zu] = %f, rhs_values_data[%zu] = %f", i,
                    (float)lhs_values_data[i], i, (float)rhs_values_data[i]);
          return false;
        }
      }
      return true;
    }
    default:
      return memcmp(lhs.values.data(), rhs_values, lhs.values.size()) == 0;
  }
}


TEST(VectorColumnIndexerTest, CosineGeneral) {
  const std::string index_file_path = "test_indexer.index";
  const int kDim = 20;
  const int kCount = 20;  // can't set too large, or the qunatization error
                          // will be too large due to float's precision
  const int kTopk = 10;

  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
  system(cmd_buf);

  auto func = [&](const IndexParams::Ptr index_params, DataType data_type) {
    system(cmd_buf);
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", data_type, kDim, false, index_params));
    ASSERT_TRUE(indexer);

    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});
        !ret.ok()) {
      LOG_ERROR("Failed to open indexer: %s", ret.message().c_str());
      return;
    }

    // insert
    for (int i = 0; i < kCount; ++i) {
      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.1f);
      // print_dense_vector(buffer.data.data(), kDim, data_type);
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{buffer.data.data()}};
      ASSERT_TRUE(indexer->Insert(data, i).ok());
    }

    // fetch
    for (int i = 0; i < kCount; ++i) {
      auto fetched_data = indexer->Fetch(i);
      ASSERT_TRUE(fetched_data);
      ASSERT_TRUE(compare_dense_vector(
          create_dense_vector(kDim, data_type, i, kCount, 0.1f),
          std::get<DenseVectorDataBuffer>(fetched_data->vector_buffer)
              .data.data(),
          data_type));
    }

    // query
    for (int i = 0; i < kCount; ++i) {
      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.3f);
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{buffer.data.data()}};
      auto _t = std::make_shared<zvec::HnswQueryParams>(100);
      _t->set_is_linear(true);
      auto query_params =
          vector_column_params::QueryParams{.topk = kTopk,
                                            .filter = nullptr,
                                            .fetch_vector = true,
                                            .query_params = _t};
      auto results = indexer->Search(data, query_params);
      ASSERT_TRUE(results.has_value());
      auto vector_results =
          dynamic_cast<VectorIndexResults *>(results.value().get());
      ASSERT_TRUE(vector_results);
      ASSERT_EQ(vector_results->count(), kTopk);
      auto iter = vector_results->create_iterator();
      LOG_INFO("===query pk: %d", i);
      LOG_INFO("query_vector:%s",
               print_dense_vector(buffer.data.data(), kDim, data_type).c_str());
      {  // topk1
        ASSERT_TRUE(iter->valid());
        LOG_INFO("topk1 pk:%zu", (size_t)iter->doc_id());
        LOG_INFO("topk1 score:%.10f", iter->score());

        if (!(iter->score() > -0.01 && iter->score() < 2.01)) {
          ASSERT_TRUE(iter->score() < 2.01);
        }

        ASSERT_TRUE(iter->score() < 2.01);
        ASSERT_TRUE(iter->score() > -0.01);

        auto fetched_vector =
            std::get<vector_column_params::DenseVector>(iter->vector().vector);
        LOG_INFO(
            "topk1 fetched_vector:%s",
            print_dense_vector(fetched_vector.data, kDim, data_type).c_str());

        // ASSERT_EQ(iter->doc_id(), i);
        ASSERT_TRUE(compare_dense_vector(
            create_dense_vector(kDim, data_type, iter->doc_id(), kCount, 0.1f),
            fetched_vector.data, data_type));
      }
    }
    indexer->Destroy();
  };

  LOG_INFO("Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32");
  func(std::make_shared<FlatIndexParams>(MetricType::COSINE),
       DataType::VECTOR_FP32);
  LOG_INFO("Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32");
  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100),
       DataType::VECTOR_FP32);
  LOG_INFO(
      "Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::FP16");
  func(
      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::FP16),
      DataType::VECTOR_FP32);
  LOG_INFO(
      "Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::FP16");
  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,
                                         QuantizeType::FP16),
       DataType::VECTOR_FP32);

  LOG_INFO(
      "Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::INT8");
  func(
      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::INT8),
      DataType::VECTOR_FP32);
  LOG_INFO(
      "Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::INT8");
  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,
                                         QuantizeType::INT8),
       DataType::VECTOR_FP32);

  LOG_INFO(
      "Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::INT4");
  func(
      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::INT4),
      DataType::VECTOR_FP32);
  LOG_INFO(
      "Test HnswIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::INT4");
  func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,
                                         QuantizeType::INT4),
       DataType::VECTOR_FP32);

  // cosine doesn't support int8/int4 datatype, but support int8/int4 quantizer

  // LOG_INFO("Test FlatIndexParams(MetricType::COSINE), VECTOR_FP16");
  // func(
  //     std::make_shared<FlatIndexParams>(MetricType::COSINE,
  //     QuantizeType::FP16), DataType::VECTOR_FP16);
  // LOG_INFO("Test HnswIndexParams(MetricType::COSINE), VECTOR_FP16");
  // func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,
  //                                        QuantizeType::FP16),
  //      DataType::VECTOR_FP16);
}


TEST(VectorColumnIndexerTest, Score) {
  const std::string index_file_path = "test_indexer.index";
  const int kTopk = 10;
  constexpr idx_t kDocId1 = 2345;
  constexpr idx_t kDocId2 = 5432;
  auto vector1 = std::vector<float>{3.0f, 4.0f, 5.0f};
  auto vector2 = std::vector<float>{1.0f, 20.0f, 3.0f};
  auto vector_id_map = std::unordered_map<idx_t, std::vector<float>>{
      {kDocId1, vector1},
      {kDocId2, vector2},
  };
  auto sparse_indices = std::vector<uint32_t>{0, 1, 2};
  auto query_vector = std::vector<float>{1.0f, 2.0f, 3.0f};

  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
  system(cmd_buf);


  auto check_score = [&](VectorIndexResults *vector_results,
                         MetricType metric_type) {
    ASSERT_TRUE(vector_results);
    ASSERT_EQ(vector_results->count(), 2);

    auto inner_produce_score_func = [&](const std::vector<float> &v1,
                                        const std::vector<float> &v2) {
      return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2];
    };

    auto cosine_score_func = [&](const std::vector<float> &v1,
                                 const std::vector<float> &v2) {
      return 1 - inner_produce_score_func(v1, v2) /
                     (std::sqrt(inner_produce_score_func(v1, v1)) *
                      std::sqrt(inner_produce_score_func(v2, v2)));
    };

    // SquaredEuclidean
    auto l2_score_func = [&](const std::vector<float> &v1,
                             const std::vector<float> &v2) {
      assert(v1.size() == 3);
      assert(v2.size() == 3);
      float ret = 0.0f;
      for (size_t i = 0; i < v1.size(); ++i) {
        ret += (v1[i] - v2[i]) * (v1[i] - v2[i]);
      }
      return ret;
    };

    std::function<float(const std::vector<float> &, const std::vector<float> &)>
        score_func;

    switch (metric_type) {
      case MetricType::IP:
        score_func = inner_produce_score_func;
        break;
      case MetricType::COSINE:
        score_func = cosine_score_func;
        break;
      case MetricType::L2:
        score_func = l2_score_func;
        break;
      default:
        ASSERT_TRUE(false);
    }
    auto iter = vector_results->create_iterator();
    ASSERT_TRUE(iter->valid());
    printf("iter->score() top1: %f\n", iter->score());
    printf("score_func(vector_id_map[iter->doc_id()], query_vector): %f\n",
           score_func(vector_id_map[iter->doc_id()], query_vector));
    ASSERT_TRUE(
        std::abs(iter->score() - score_func(vector_id_map[iter->doc_id()],
                                            query_vector)) < 1e-2);
    iter->next();
    ASSERT_TRUE(iter->valid());
    printf("iter->score() top2: %f\n", iter->score());
    printf("score_func(vector_id_map[iter->doc_id()], query_vector): %f\n",
           score_func(vector_id_map[iter->doc_id()], query_vector));
    ASSERT_TRUE(
        std::abs(iter->score() - score_func(vector_id_map[iter->doc_id()],
                                            query_vector)) < 1e-2);
  };

  auto dense_func = [&](const std::shared_ptr<VectorIndexParams>
                            &index_params) {
    auto metric_type = index_params->metric_type();
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false, index_params));
    ASSERT_TRUE(indexer);

    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});
        !ret.ok()) {
      LOG_ERROR("Failed to open indexer: %s", ret.message().c_str());
      ASSERT_TRUE(false);
    }

    ASSERT_TRUE(indexer
                    ->Insert(
                        vector_column_params::VectorData{
                            vector_column_params::DenseVector{vector1.data()}},
                        kDocId1)
                    .ok());
    ASSERT_TRUE(indexer
                    ->Insert(
                        vector_column_params::VectorData{
                            vector_column_params::DenseVector{vector2.data()}},
                        kDocId2)
                    .ok());

    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = query_vector.data()}};
    auto query_params = vector_column_params::QueryParams{
        .topk = kTopk, .filter = nullptr, .fetch_vector = true};
    auto results = indexer->Search(query, query_params);
    ASSERT_TRUE(results.has_value());

    check_score(dynamic_cast<VectorIndexResults *>(results.value().get()),
                metric_type);

    indexer->Destroy();
  };

  auto sparse_func = [&](const std::shared_ptr<VectorIndexParams>
                             &index_params) {
    auto metric_type = index_params->metric_type();
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::SPARSE_VECTOR_FP32, false, index_params));
    ASSERT_TRUE(indexer);

    if (auto ret = indexer->Open(vector_column_params::ReadOptions{true, true});
        !ret.ok()) {
      LOG_ERROR("Failed to open indexer: %s", ret.message().c_str());
      ASSERT_TRUE(false);
    }

    ASSERT_TRUE(
        indexer
            ->Insert(
                vector_column_params::VectorData{
                    vector_column_params::SparseVector{
                        3,
                        reinterpret_cast<const void *>(sparse_indices.data()),
                        vector1.data()}},
                kDocId1)
            .ok());
    ASSERT_TRUE(
        indexer
            ->Insert(
                vector_column_params::VectorData{
                    vector_column_params::SparseVector{
                        3,
                        reinterpret_cast<const void *>(sparse_indices.data()),
                        vector2.data()}},
                kDocId2)
            .ok());

    auto query =
        vector_column_params::VectorData{vector_column_params::SparseVector{
            3, reinterpret_cast<const void *>(sparse_indices.data()),
            query_vector.data()}};
    auto query_params = vector_column_params::QueryParams{
        .topk = 10, .filter = nullptr, .fetch_vector = true};
    auto results = indexer->Search(query, query_params);
    ASSERT_TRUE(results.has_value());

    check_score(dynamic_cast<VectorIndexResults *>(results.value().get()),
                metric_type);
    indexer->Destroy();
  };

  LOG_INFO("Test DenseVector, MetricType::IP");
  dense_func(std::make_shared<FlatIndexParams>(MetricType::IP));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));
  LOG_INFO("Test DenseVector, MetricType::IP, QuantizeType::FP16");
  dense_func(
      std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                               QuantizeType::FP16));

  LOG_INFO("Test DenseVector, MetricType::COSINE");
  dense_func(std::make_shared<FlatIndexParams>(MetricType::COSINE));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100));

  LOG_INFO("Test DenseVector, MetricType::COSINE, QuantizeType::FP16");
  dense_func(std::make_shared<FlatIndexParams>(MetricType::COSINE,
                                               QuantizeType::FP16));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::COSINE, 10, 100,
                                               QuantizeType::FP16));

  LOG_INFO("Test DenseVector, MetricType::L2");
  dense_func(std::make_shared<FlatIndexParams>(MetricType::L2));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::L2, 10, 100));
  LOG_INFO("Test DenseVector, MetricType::L2, QuantizeType::FP16");
  dense_func(
      std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::FP16));
  dense_func(std::make_shared<HnswIndexParams>(MetricType::L2, 10, 100,
                                               QuantizeType::FP16));

  LOG_INFO("Test SparseVector, MetricType::IP");
  sparse_func(std::make_shared<FlatIndexParams>(MetricType::IP));
  sparse_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100));
  LOG_INFO("Test SparseVector, MetricType::IP, QuantizeType::FP16");
  sparse_func(
      std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16));
  sparse_func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                                QuantizeType::FP16));
}

TEST(VectorColumnIndexerTest, Failure) {
  const std::string index_file_path = "test_indexer_failure.index";
  constexpr idx_t kDocId = 1234;
  auto vector = std::vector<float>{1.0f, 2.0f, 3.0f};

  char cmd_buf[100];
  snprintf(cmd_buf, 100, "rm -f %s", index_file_path.c_str());
  system(cmd_buf);

  // Test case 1: Operations on unopened indexer
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    // Test Flush on unopened indexer
    auto flush_result = indexer->Flush();
    ASSERT_FALSE(flush_result.ok());
    ASSERT_EQ(flush_result.message(), "Index not opened");

    // Test Close on unopened indexer
    auto close_result = indexer->Close();
    ASSERT_FALSE(close_result.ok());
    ASSERT_EQ(close_result.message(), "Index not opened");

    // Test Destroy on unopened indexer
    auto destroy_result = indexer->Destroy();
    ASSERT_FALSE(destroy_result.ok());
    ASSERT_EQ(destroy_result.message(), "Index not opened");

    // Test Insert on unopened indexer
    auto data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    auto insert_result = indexer->Insert(data, kDocId);
    ASSERT_FALSE(insert_result.ok());
    ASSERT_EQ(insert_result.message(), "Index not opened");

    // Test Fetch on unopened indexer
    auto fetch_result = indexer->Fetch(kDocId);
    ASSERT_FALSE(fetch_result.has_value());
    ASSERT_EQ(fetch_result.error().message(), "Index not opened");

    // Test Search on unopened indexer
    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = vector.data()}};
    auto query_params = vector_column_params::QueryParams{
        .topk = 10, .filter = nullptr, .fetch_vector = false};
    auto search_result = indexer->Search(query, query_params);
    ASSERT_FALSE(search_result.has_value());
    ASSERT_EQ(search_result.error().message(), "Index not opened");

    // Test Merge on unopened indexer
    auto merge_result = indexer->Merge({}, nullptr);
    ASSERT_FALSE(merge_result.ok());
    ASSERT_EQ(merge_result.message(), "Index not opened");
  }

  // Test case 2: Unsupported engine name
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)),
        "unsupported_engine");
    ASSERT_TRUE(indexer);

    auto open_result =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result.ok());
    ASSERT_EQ(open_result.message(), "Engine name not supported");
  }

  // Test case 3: Invalid field schema (nullptr index_params)
  {
    FieldSchema invalid_schema("test", DataType::VECTOR_FP32, 3, false,
                               nullptr);
    auto indexer =
        std::make_shared<VectorColumnIndexer>(index_file_path, invalid_schema);
    ASSERT_TRUE(indexer);

    auto open_result =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result.ok());
    ASSERT_EQ(open_result.message(), "field_schema.index_params nullptr");
  }

  // Test case 4: Unsupported data type in engine helper
  {
    // Create a mock index params with unsupported data type
    // We'll use a data type that's not supported by convert_to_engine_data_type
    FieldSchema unsupported_schema(
        "test", DataType::UNDEFINED, 3, false,
        std::make_shared<FlatIndexParams>(MetricType::IP));
    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,
                                                         unsupported_schema);
    ASSERT_TRUE(indexer);

    auto open_result =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result.ok());
    ASSERT_EQ(open_result.message(),
              "failed to build index param: unsupported data type");
  }

  // Test case 5: Unsupported metric type in engine helper
  {
    FieldSchema unsupported_schema(
        "test", DataType::VECTOR_FP32, 3, false,
        std::make_shared<FlatIndexParams>(MetricType::UNDEFINED));
    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,
                                                         unsupported_schema);
    ASSERT_TRUE(indexer);

    auto open_result =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result.ok());
    ASSERT_EQ(open_result.message(),
              "failed to build index param: unsupported metric type");
  }

  // Test case 6: Unsupported quantize type in engine helper
  {
    auto index_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    index_params->set_quantize_type(static_cast<QuantizeType>(999));


    FieldSchema unsupported_schema("test", DataType::VECTOR_FP32, 3, false,
                                   index_params);
    auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,
                                                         unsupported_schema);
    ASSERT_TRUE(indexer);

    auto open_result =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result.ok());
    ASSERT_EQ(open_result.message(),
              "failed to build index param: unsupported quantize type");
  }

  // // Test case 7: Unsupported index type in engine helper
  // {
  //   // Create a custom index params with unsupported index type
  //   class UnsupportedIndexTypeParams : public FlatIndexParams {
  //    public:
  //     UnsupportedIndexTypeParams() : FlatIndexParams(MetricType::IP) {}
  //     void mock() {
  //       type_ = static_cast<IndexType>(999);
  //     }
  //   };
  //   auto index_params = std::make_shared<UnsupportedIndexTypeParams>();
  //   index_params->mock();
  //   FieldSchema unsupported_schema("test", DataType::VECTOR_FP32, 3, false,
  //                                  index_params);
  //   auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,
  //                                                        unsupported_schema);
  //   ASSERT_TRUE(indexer);
  //
  //   auto open_result =
  //       indexer->Open(vector_column_params::ReadOptions{true, true});
  //   ASSERT_FALSE(open_result.ok());
  //   ASSERT_EQ(open_result.message(), "not supported");
  // }

  // Test case 8: bf_pks size > 1 error
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    // Insert some data first
    auto data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    ASSERT_TRUE(indexer->Insert(data, kDocId).ok());

    // Test search with bf_pks size > 1
    auto query = vector_column_params::VectorData{
        vector_column_params::DenseVector{.data = vector.data()}};
    auto bf_pks1 = std::vector<uint64_t>{1, 2};
    auto bf_pks2 = std::vector<uint64_t>{3, 4};
    auto query_params =
        vector_column_params::QueryParams{.topk = 10,
                                          .filter = nullptr,
                                          .fetch_vector = false,
                                          .bf_pks = {bf_pks1, bf_pks2}};

    auto search_result = indexer->Search(query, query_params);
    ASSERT_FALSE(search_result.has_value());
    ASSERT_EQ(search_result.error().message(),
              "bf_pks size > 1 is not supported");

    indexer->Destroy();
  }

  // Test case 9: Invalid field schema for query param conversion
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false, nullptr));
    ASSERT_TRUE(indexer);

    ASSERT_FALSE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());
  }

  // Test case 10: use_mmap = false
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);
    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true, false})
            .ok());
    // Insert some data first
    auto data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    ASSERT_TRUE(indexer->Insert(data, kDocId).ok());
    ASSERT_TRUE(indexer->Flush().ok());
    ASSERT_TRUE(indexer->Close().ok());
    {
      auto indexer = std::make_shared<VectorColumnIndexer>(
          index_file_path,
          FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                      std::make_shared<FlatIndexParams>(MetricType::IP)));
      ASSERT_TRUE(indexer);
      auto open_result =
          indexer->Open(vector_column_params::ReadOptions{false, false, true});
      ASSERT_TRUE(open_result.ok());
      indexer->Destroy();
    }
  }

  // Test case 11: Index already opened error
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    // First open should succeed
    auto open_result1 =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_TRUE(open_result1.ok());

    // Second open should fail
    auto open_result2 =
        indexer->Open(vector_column_params::ReadOptions{true, true});
    ASSERT_FALSE(open_result2.ok());
    ASSERT_EQ(open_result2.message(), "Index already opened");

    indexer->Destroy();
  }

  // Test case 12: Test doc_count() on unopened indexer
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    // doc_count() should return -1 for unopened indexer
    ASSERT_EQ(indexer->doc_count(), static_cast<size_t>(-1));
  }

  // Test case 13: Test Merge with empty indexers list
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    // Merge with empty indexers list should succeed
    auto merge_result = indexer->Merge({}, nullptr);
    ASSERT_TRUE(merge_result.ok());

    indexer->Destroy();
  }

  // Test case 14: Test Merge with same index file path (should be skipped)
  {
    auto indexer1 = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer1);

    ASSERT_TRUE(
        indexer1->Open(vector_column_params::ReadOptions{true, true}).ok());

    // Insert some data
    auto data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    ASSERT_TRUE(indexer1->Insert(data, kDocId).ok());

    // Merge with itself (same index file path) should succeed (skipped)
    auto merge_result = indexer1->Merge({indexer1}, nullptr);
    ASSERT_TRUE(merge_result.ok());

    indexer1->Destroy();
  }

  // Test case 15: Test Fetch with non-existent doc_id
  {
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", DataType::VECTOR_FP32, 3, false,
                    std::make_shared<FlatIndexParams>(MetricType::IP)));
    ASSERT_TRUE(indexer);

    ASSERT_TRUE(
        indexer->Open(vector_column_params::ReadOptions{true, true}).ok());

    // Fetch non-existent doc_id should fail
    auto fetch_result = indexer->Fetch(99999);
    ASSERT_FALSE(fetch_result.has_value());
    ASSERT_EQ(fetch_result.error().message(),
              "Failed to fetch vector from index");

    indexer->Destroy();
  }

  // // Test case 16: Test Search with invalid query params (unsupported index
  // // type)
  // {
  //   // Create a custom index params with unsupported index type for query
  //   class UnsupportedQueryIndexParams : public IndexParams {
  //    public:
  //     IndexType type() const override {
  //       return static_cast<IndexType>(999);
  //     }
  //     MetricType metric_type() const override {
  //       return MetricType::IP;
  //     }
  //     QuantizeType quantize_type() const override {
  //       return QuantizeType::UNDEFINED;
  //     }
  //     IndexParams::Ptr clone() const override {
  //       return std::make_shared<UnsupportedQueryIndexParams>();
  //     }
  //   };
  //
  //   FieldSchema unsupported_schema(
  //       "test", DataType::VECTOR_FP32, 3, false,
  //       std::make_shared<UnsupportedQueryIndexParams>());
  //   auto indexer = std::make_shared<VectorColumnIndexer>(index_file_path,
  //                                                        unsupported_schema);
  //   ASSERT_TRUE(indexer);
  //
  //   ASSERT_TRUE(
  //       indexer->Open(vector_column_params::ReadOptions{true, true}).ok());
  //
  //   // Insert some data first
  //   auto data = vector_column_params::VectorData{
  //       vector_column_params::DenseVector{vector.data()}};
  //   ASSERT_TRUE(indexer->Insert(data, kDocId).ok());
  //
  //   // Test search with unsupported index type
  //   auto query = vector_column_params::VectorData{
  //       vector_column_params::DenseVector{.data = vector.data()}};
  //   auto query_params = vector_column_params::QueryParams{
  //       .topk = 10, .filter = nullptr, .fetch_vector = false};
  //
  //   auto search_result = indexer->Search(query, query_params);
  //   ASSERT_FALSE(search_result.has_value());
  //   ASSERT_EQ(search_result.error().message(), "not supported");
  //
  //   indexer->Close();
  // }

  system(cmd_buf);
}

TEST(VectorColumnIndexerTest, CosineMerge) {
  constexpr uint32_t kDimension = 64;
  const std::string index_name{"test_indexer.index"};

  auto del_index_file_func = [&](const std::string file_name) {
    auto cmd_buf = "rm -f " + file_name;
    system(cmd_buf.c_str());
  };

  auto create_indexer_func =
      [&](const IndexParams::Ptr &index_params,
          const std::string &index_name) -> VectorColumnIndexer::Ptr {
    del_index_file_func(index_name);
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_name, FieldSchema("test", DataType::VECTOR_FP32, kDimension,
                                false, index_params));
    if (indexer == nullptr ||
        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {
      return nullptr;
    }
    return indexer;
  };

  auto func = [&](const IndexParams::Ptr &param1,
                  const IndexParams::Ptr &param2,
                  const IndexParams::Ptr &param3) {
    auto indexer1 = create_indexer_func(param1, index_name + "1");
    ASSERT_NE(nullptr, indexer1);
    auto indexer2 = create_indexer_func(param2, index_name + "2");
    ASSERT_NE(nullptr, indexer2);

    std::vector<float> vector(kDimension);
    vector[1] = 1.0f;
    vector[2] = 123.0f;
    auto vector_data = vector_column_params::VectorData{
        vector_column_params::DenseVector{vector.data()}};
    ASSERT_TRUE(indexer1->Insert(vector_data, 0).ok());

    vector[1] = 2.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 0).ok());
    vector[1] = 3.0f;
    ASSERT_TRUE(indexer2->Insert(vector_data, 1).ok());

    {
      auto fetched_data = indexer1->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      LOG_INFO(
          "indexer1 fetched_vector doc_id:0:%s",
          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());
      ASSERT_TRUE(fetched_vector[1] - 1.0f < 1e-2);
      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    }
    {
      auto fetched_data = indexer2->Fetch(0);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      LOG_INFO(
          "indexer2 fetched_vector doc_id:0:%s",
          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());
      ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);
      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    }
    {
      auto fetched_data = indexer2->Fetch(1);
      ASSERT_TRUE(fetched_data.has_value());
      const float *fetched_vector = reinterpret_cast<const float *>(
          std::get<vector_column_params::DenseVectorBuffer>(
              fetched_data->vector_buffer)
              .data.data());
      LOG_INFO(
          "indexer2 fetched_vector doc_id:1:%s",
          print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32).c_str());
      ASSERT_TRUE(fetched_vector[1] - 3.0f < 1e-2);
      ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    }

    // {  // test reduce
    //   auto indexer3 = create_indexer_func(param3, index_name + "3");
    //   ASSERT_NE(nullptr, indexer3);
    //   ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, nullptr).ok());
    //   {
    //     auto fetched_data = indexer3->Fetch(0);
    //     ASSERT_TRUE(fetched_data.has_value());
    //     const float *fetched_vector = reinterpret_cast<const float *>(
    //         std::get<vector_column_params::DenseVectorBuffer>(
    //             fetched_data->vector_buffer)
    //             .data.data());
    //     LOG_INFO("indexer3 fetched_vector doc_id:0:%s",
    //              print_dense_vector(fetched_vector, 3,
    //              DataType::VECTOR_FP32)
    //                  .c_str());
    //     ASSERT_TRUE(fetched_vector[1] - 1.0f < 1e-2);
    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    //   }
    //   {
    //     auto fetched_data = indexer3->Fetch(1);
    //     ASSERT_TRUE(fetched_data.has_value());
    //     const float *fetched_vector = reinterpret_cast<const float *>(
    //         std::get<vector_column_params::DenseVectorBuffer>(
    //             fetched_data->vector_buffer)
    //             .data.data());
    //     LOG_INFO("indexer3 fetched_vector doc_id:1:%s",
    //              print_dense_vector(fetched_vector, 3,
    //              DataType::VECTOR_FP32)
    //                  .c_str());
    //     ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);
    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    //   }
    //   indexer3->Close();
    //   del_index_file_func(index_name + "3");
    // }
    //
    {  // test reduce with filter
      auto indexer3 = create_indexer_func(param3, index_name + "3");
      ASSERT_NE(nullptr, indexer3);
      auto filter = std::make_shared<EasyIndexFilter>(
          [](uint64_t key) { return key == 0; });
      ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter).ok());
      // 0.0 -> x ; 1.0 -> 0 ; 1.1 -> 1
      ASSERT_TRUE(indexer3->doc_count() == 2);
      {
        auto fetched_data = indexer3->Fetch(0);
        ASSERT_TRUE(fetched_data.has_value());
        const float *fetched_vector = reinterpret_cast<const float *>(
            std::get<vector_column_params::DenseVectorBuffer>(
                fetched_data->vector_buffer)
                .data.data());
        LOG_INFO("indexer3 fetched_vector doc_id:0:%s",
                 print_dense_vector(fetched_vector, 3, DataType::VECTOR_FP32)
                     .c_str());
        ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);
        ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
      }

      {
        vector[1] = 3.0f;
        // search with fetch vector
        auto query = vector_column_params::VectorData{
            vector_column_params::DenseVector{vector.data()}};
        auto query_params = vector_column_params::QueryParams{
            .topk = 10, .filter = nullptr, .fetch_vector = true};
        auto results = indexer2->Search(query, query_params);
        ASSERT_TRUE(results.has_value());
        auto vector_results =
            dynamic_cast<VectorIndexResults *>(results.value().get());
        ASSERT_TRUE(vector_results);
        ASSERT_EQ(vector_results->count(), 2);
        auto iter = vector_results->create_iterator();
        ASSERT_TRUE(iter->valid());

        {
          int doc_idx = 0;
          auto query_results_doc = vector_results->docs()[doc_idx];
          LOG_INFO("topk%d pk: %zu", doc_idx, (size_t)query_results_doc.key());
          LOG_INFO("topk%d score: %.10f", doc_idx, query_results_doc.score());
          LOG_INFO("topk%d fetched_vector - reverted:%s", doc_idx,
                   print_dense_vector(
                       vector_results->reverted_vector_list()[doc_idx].data(),
                       kDimension, DataType::VECTOR_FP32)
                       .c_str());
          LOG_INFO("topk%d fetched_vector - original:%s", doc_idx,
                   print_dense_vector(query_results_doc.vector(), kDimension,
                                      DataType::VECTOR_FP16)
                       .c_str());
          ASSERT_TRUE(query_results_doc.score() < 2.01);
          ASSERT_TRUE(query_results_doc.score() > -0.01);
        }
        {
          int doc_idx = 1;
          auto query_results_doc = vector_results->docs()[doc_idx];
          LOG_INFO("topk%d pk: %zu", doc_idx, (size_t)query_results_doc.key());
          LOG_INFO("topk%d score: %.10f", doc_idx, query_results_doc.score());
          LOG_INFO("topk%d fetched_vector - reverted:%s", doc_idx,
                   print_dense_vector(
                       vector_results->reverted_vector_list()[doc_idx].data(),
                       kDimension, DataType::VECTOR_FP32)
                       .c_str());
          LOG_INFO("topk%d fetched_vector - original:%s", doc_idx,
                   print_dense_vector(query_results_doc.vector(), kDimension,
                                      DataType::VECTOR_FP16)
                       .c_str());
          ASSERT_TRUE(query_results_doc.score() < 2.01);
          ASSERT_TRUE(query_results_doc.score() > -0.01);
        }
        // ASSERT_TRUE(vector_results->docs()[0].key() == 1);
      }

      indexer3->Close();
      del_index_file_func(index_name + "3");
    }
    //
    // {  // test reduce with filter in parallel
    //   auto indexer3 = create_indexer_func(param3, index_name + "3");
    //   ASSERT_NE(nullptr, indexer3);
    //   auto filter = std::make_shared<EasyIndexFilter>(
    //       [](uint64_t key) { return key == 0; });
    //   ASSERT_TRUE(indexer3->Merge({indexer1, indexer2}, filter, {3}).ok());
    //
    //   {
    //     auto fetched_data = indexer3->Fetch(0);
    //     ASSERT_TRUE(fetched_data.has_value());
    //     const float *fetched_vector = reinterpret_cast<const float *>(
    //         std::get<vector_column_params::DenseVectorBuffer>(
    //             fetched_data->vector_buffer)
    //             .data.data());
    //     LOG_INFO("indexer3 fetched_vector doc_id:0:%s",
    //              print_dense_vector(fetched_vector, 3,
    //              DataType::VECTOR_FP32)
    //                  .c_str());
    //     ASSERT_TRUE(fetched_vector[1] - 2.0f < 1e-2);
    //     ASSERT_TRUE(fetched_vector[2] - 123.0f < 1);
    //   }
    //   indexer3->Close();
    //   del_index_file_func(index_name + "3");
    // }


    indexer1->Close();
    indexer2->Close();
    del_index_file_func(index_name + "1");
    del_index_file_func(index_name + "2");
  };

  // same index with different quantize type
  {
    LOG_INFO("Merge: same index - FlatIndex with different quantize type");
    auto metric_type = MetricType::COSINE;
    auto param_flat = std::make_shared<FlatIndexParams>(metric_type);
    auto param_flat_fp16 =
        std::make_shared<FlatIndexParams>(metric_type, QuantizeType::FP16);
    auto param_hnsw = std::make_shared<HnswIndexParams>(metric_type, 10, 100);
    auto param_hnsw_fp16 = std::make_shared<HnswIndexParams>(
        metric_type, 10, 100, QuantizeType::FP16);
    // func(param, param_fp16, param_fp16);
    // func(param, param_fp16, param);
    // func(param_fp16, param, param_fp16);
    // func(param_fp16, param, param);
    // func(param_fp16, param_fp16, param_fp16);
    func(param_hnsw_fp16, param_flat_fp16, param_flat_fp16);
  }
}

TEST(VectorColumnIndexerTest, Refiner) {
  const std::string kIndexFilePath = "test_indexer.index";
  const int kDim = 20;
  const int kCount = 20;  // can't set too large, or the qunatization error
                          // will be too large due to float's precision
  const int kTopk = 10;

  auto del_index_file_func = [&](const std::string &file_name) {
    auto cmd_buf = "rm -f " + file_name;
    system(cmd_buf.c_str());
  };

  auto create_indexer_func =
      [&](const IndexParams::Ptr &index_params,
          const std::string &index_file_path,
          DataType data_type) -> VectorColumnIndexer::Ptr {
    del_index_file_func(index_file_path);
    auto indexer = std::make_shared<VectorColumnIndexer>(
        index_file_path,
        FieldSchema("test", data_type, kDim, false, index_params));
    if (indexer == nullptr ||
        !indexer->Open(vector_column_params::ReadOptions{true, true}).ok()) {
      return nullptr;
    }
    return indexer;
  };

  auto func = [&](const IndexParams::Ptr &index_params,
                  const IndexParams::Ptr &reference_index_params,
                  DataType data_type) {
    auto indexer = create_indexer_func(index_params, kIndexFilePath, data_type);
    if (indexer == nullptr) {
      return;
    }
    auto reference_indexer = create_indexer_func(
        reference_index_params, kIndexFilePath + "_reference", data_type);
    if (reference_indexer == nullptr) {
      return;
    }

    // insert
    for (int i = 0; i < kCount; ++i) {
      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.1f);
      // print_dense_vector(buffer.data.data(), kDim, data_type);
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{buffer.data.data()}};
      ASSERT_TRUE(indexer->Insert(data, i).ok());
      ASSERT_TRUE(reference_indexer->Insert(data, i).ok());
    }

    // query
    for (int i = 0; i < kCount; ++i) {
      auto buffer = create_dense_vector(kDim, data_type, i, kCount, 0.3f);
      auto data = vector_column_params::VectorData{
          vector_column_params::DenseVector{buffer.data.data()}};
      ;
      auto query_params = vector_column_params::QueryParams{
          .topk = kTopk,
          .filter = nullptr,
          .fetch_vector = true,
          .query_params = std::make_shared<zvec::HnswQueryParams>(100),
          .refiner_param = std::make_shared<vector_column_params::RefinerParam>(
              vector_column_params::RefinerParam{
                  .scale_factor_ = 10,
                  .reference_indexer = reference_indexer})};
      auto results = indexer->Search(data, query_params);
      ASSERT_TRUE(results.has_value());
      auto vector_results =
          dynamic_cast<VectorIndexResults *>(results.value().get());
      ASSERT_TRUE(vector_results);
      ASSERT_EQ(vector_results->count(), kTopk);
      auto iter = vector_results->create_iterator();
      LOG_INFO("===query pk: %d", i);
      LOG_INFO("query_vector:%s",
               print_dense_vector(buffer.data.data(), kDim, data_type).c_str());
    }
    indexer->Destroy();
  };

  LOG_INFO(
      "Test FlatIndexParams(MetricType::IP), VECTOR_FP32, "
      "QuantizeType::FP16");

  func(std::make_shared<HnswIndexParams>(MetricType::IP, 10, 100,
                                         QuantizeType::FP16),
       std::make_shared<FlatIndexParams>(MetricType::IP),
       DataType::VECTOR_FP32);

  func(std::make_shared<FlatIndexParams>(MetricType::IP, QuantizeType::FP16),
       std::make_shared<FlatIndexParams>(MetricType::IP),
       DataType::VECTOR_FP32);

  LOG_INFO(
      "Test FlatIndexParams(MetricType::MIPSL2), VECTOR_FP32, "
      "QuantizeType::FP16");

  func(std::make_shared<HnswIndexParams>(MetricType::MIPSL2, 10, 100,
                                         QuantizeType::FP16),
       std::make_shared<FlatIndexParams>(MetricType::IP),
       DataType::VECTOR_FP32);

  func(
      std::make_shared<FlatIndexParams>(MetricType::MIPSL2, QuantizeType::FP16),
      std::make_shared<FlatIndexParams>(MetricType::IP), DataType::VECTOR_FP32);

  LOG_INFO(
      "Test FlatIndexParams(MetricType::COSINE), VECTOR_FP32, "
      "QuantizeType::FP16");
  func(
      std::make_shared<FlatIndexParams>(MetricType::COSINE, QuantizeType::FP16),
      std::make_shared<FlatIndexParams>(MetricType::COSINE),
      DataType::VECTOR_FP32);

  LOG_INFO(
      "Test FlatIndexParams(MetricType::L2), VECTOR_FP32, "
      "QuantizeType::Int8");
  func(std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::INT8),
       std::make_shared<FlatIndexParams>(MetricType::L2),
       DataType::VECTOR_FP32);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/common/db_proto_converter_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "db/index/common/proto_converter.h"
#include "db/index/common/type_helper.h"

using namespace zvec;

TEST(ConverterTest, InvertIndexParamsConversion) {
  // Test conversion from protobuf to C++ InvertIndexParams
  proto::InvertIndexParams invert_pb;
  invert_pb.set_enable_range_optimization(true);

  auto invert_params = ProtoConverter::FromPb(invert_pb);
  ASSERT_NE(invert_params, nullptr);
  EXPECT_TRUE(invert_params->enable_range_optimization());
  EXPECT_EQ(invert_params->type(), IndexType::INVERT);

  // Test with false value
  proto::InvertIndexParams invert_pb2;
  invert_pb2.set_enable_range_optimization(false);

  auto invert_params2 = ProtoConverter::FromPb(invert_pb2);
  ASSERT_NE(invert_params2, nullptr);
  EXPECT_FALSE(invert_params2->enable_range_optimization());

  // Test conversion from C++ to protobuf
  InvertIndexParams original_params(true);
  auto pb_result = ProtoConverter::ToPb(&original_params);
  EXPECT_TRUE(pb_result.enable_range_optimization());
}

TEST(ConverterTest, HnswIndexParamsConversion) {
  // Test conversion from protobuf to C++ HnswIndexParams
  proto::HnswIndexParams hnsw_pb;
  auto *base_params = hnsw_pb.mutable_base();
  base_params->set_metric_type(proto::MT_L2);
  base_params->set_quantize_type(proto::QT_FP16);
  hnsw_pb.set_m(16);
  hnsw_pb.set_ef_construction(100);

  auto hnsw_params = ProtoConverter::FromPb(hnsw_pb);
  ASSERT_NE(hnsw_params, nullptr);
  EXPECT_EQ(hnsw_params->metric_type(), MetricType::L2);
  EXPECT_EQ(hnsw_params->m(), 16);
  EXPECT_EQ(hnsw_params->ef_construction(), 100);
  EXPECT_EQ(hnsw_params->quantize_type(), QuantizeType::FP16);
  EXPECT_EQ(hnsw_params->type(), IndexType::HNSW);

  // Test conversion from C++ to protobuf
  HnswIndexParams original_params(MetricType::IP, 32, 200, QuantizeType::INT8);
  auto pb_result = ProtoConverter::ToPb(&original_params);
  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP);
  EXPECT_EQ(pb_result.m(), 32);
  EXPECT_EQ(pb_result.ef_construction(), 200);
  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT8);
}

TEST(ConverterTest, FlatIndexParamsConversion) {
  // Test conversion from protobuf to C++ FlatIndexParams
  proto::FlatIndexParams flat_pb;
  auto *base_params = flat_pb.mutable_base();
  base_params->set_metric_type(proto::MT_COSINE);
  base_params->set_quantize_type(proto::QT_INT4);

  auto flat_params = ProtoConverter::FromPb(flat_pb);
  ASSERT_NE(flat_params, nullptr);
  EXPECT_EQ(flat_params->metric_type(), MetricType::COSINE);
  EXPECT_EQ(flat_params->quantize_type(), QuantizeType::INT4);
  EXPECT_EQ(flat_params->type(), IndexType::FLAT);

  // Test conversion from C++ to protobuf
  FlatIndexParams original_params(MetricType::L2, QuantizeType::FP16);
  auto pb_result = ProtoConverter::ToPb(&original_params);
  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_L2);
  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_FP16);
}

TEST(ConverterTest, IVFIndexParamsConversion) {
  // Test conversion from protobuf to C++ IVFIndexParams
  proto::IVFIndexParams ivf_pb;
  auto *base_params = ivf_pb.mutable_base();
  base_params->set_metric_type(proto::MT_IP);
  base_params->set_quantize_type(proto::QT_INT8);
  ivf_pb.set_n_list(128);

  auto ivf_params = ProtoConverter::FromPb(ivf_pb);
  ASSERT_NE(ivf_params, nullptr);
  EXPECT_EQ(ivf_params->metric_type(), MetricType::IP);
  EXPECT_EQ(ivf_params->n_list(), 128);
  EXPECT_EQ(ivf_params->quantize_type(), QuantizeType::INT8);
  EXPECT_EQ(ivf_params->type(), IndexType::IVF);

  // Test conversion from C++ to protobuf
  IVFIndexParams original_params(MetricType::COSINE, 256, 10, false,
                                 QuantizeType::INT4);
  auto pb_result = ProtoConverter::ToPb(&original_params);
  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_COSINE);
  EXPECT_EQ(pb_result.n_list(), 256);
  EXPECT_EQ(pb_result.n_iters(), 10);
  EXPECT_FALSE(pb_result.use_soar());
  EXPECT_EQ(pb_result.base().quantize_type(), proto::QT_INT4);
}

TEST(ConverterTest, IndexParamsConversion) {
  // Test conversion from protobuf to C++ IndexParams for HNSW
  proto::IndexParams index_pb;
  auto *hnsw_pb = index_pb.mutable_hnsw();
  auto *base_params = hnsw_pb->mutable_base();
  base_params->set_metric_type(proto::MT_L2);
  base_params->set_quantize_type(proto::QT_FP16);
  hnsw_pb->set_m(16);
  hnsw_pb->set_ef_construction(100);

  auto index_params = ProtoConverter::FromPb(index_pb);
  ASSERT_NE(index_params, nullptr);
  EXPECT_EQ(index_params->type(), IndexType::HNSW);
  auto hnsw_cast = std::dynamic_pointer_cast<HnswIndexParams>(index_params);
  ASSERT_NE(hnsw_cast, nullptr);
  EXPECT_EQ(hnsw_cast->metric_type(), MetricType::L2);
  EXPECT_EQ(hnsw_cast->m(), 16);
  EXPECT_EQ(hnsw_cast->ef_construction(), 100);
  EXPECT_EQ(hnsw_cast->quantize_type(), QuantizeType::FP16);

  // Test conversion from C++ HnswIndexParams to protobuf IndexParams
  HnswIndexParams hnsw_original(MetricType::IP, 32, 200);
  auto pb_result = ProtoConverter::ToPb(&hnsw_original);
  EXPECT_EQ(pb_result.base().metric_type(), proto::MT_IP);
  EXPECT_EQ(pb_result.m(), 32);
  EXPECT_EQ(pb_result.ef_construction(), 200);

  // Test conversion from protobuf to C++ IndexParams for FLAT
  proto::IndexParams index_pb2;
  auto *flat_pb = index_pb2.mutable_flat();
  auto *base_params2 = flat_pb->mutable_base();
  base_params2->set_metric_type(proto::MT_COSINE);
  base_params2->set_quantize_type(proto::QT_INT8);

  auto index_params2 = ProtoConverter::FromPb(index_pb2);
  ASSERT_NE(index_params2, nullptr);
  EXPECT_EQ(index_params2->type(), IndexType::FLAT);
  auto flat_cast = std::dynamic_pointer_cast<FlatIndexParams>(index_params2);
  ASSERT_NE(flat_cast, nullptr);
  EXPECT_EQ(flat_cast->metric_type(), MetricType::COSINE);
  EXPECT_EQ(flat_cast->quantize_type(), QuantizeType::INT8);

  // Test conversion from C++ FlatIndexParams to protobuf IndexParams
  FlatIndexParams flat_original(MetricType::L2);
  auto pb_result2 = ProtoConverter::ToPb(&flat_original);
  EXPECT_EQ(pb_result2.base().metric_type(), proto::MT_L2);

  // Test conversion from protobuf to C++ IndexParams for IVF
  proto::IndexParams index_pb3;
  auto *ivf_pb = index_pb3.mutable_ivf();
  auto *base_params3 = ivf_pb->mutable_base();
  base_params3->set_metric_type(proto::MT_IP);
  base_params3->set_quantize_type(proto::QT_INT4);
  ivf_pb->set_n_list(128);

  auto index_params3 = ProtoConverter::FromPb(index_pb3);
  ASSERT_NE(index_params3, nullptr);
  EXPECT_EQ(index_params3->type(), IndexType::IVF);
  auto ivf_cast = std::dynamic_pointer_cast<IVFIndexParams>(index_params3);
  ASSERT_NE(ivf_cast, nullptr);
  EXPECT_EQ(ivf_cast->metric_type(), MetricType::IP);
  EXPECT_EQ(ivf_cast->n_list(), 128);
  EXPECT_EQ(ivf_cast->quantize_type(), QuantizeType::INT4);

  // Test conversion from C++ IVFIndexParams to protobuf IndexParams
  IVFIndexParams ivf_original(MetricType::COSINE, 256);
  auto pb_result3 = ProtoConverter::ToPb(&ivf_original);
  EXPECT_EQ(pb_result3.base().metric_type(), proto::MT_COSINE);
  EXPECT_EQ(pb_result3.n_list(), 256);

  // Test conversion from protobuf to C++ IndexParams for INVERT
  proto::IndexParams index_pb4;
  auto *invert_pb = index_pb4.mutable_invert();
  invert_pb->set_enable_range_optimization(true);

  auto index_params4 = ProtoConverter::FromPb(index_pb4);
  ASSERT_NE(index_params4, nullptr);
  EXPECT_EQ(index_params4->type(), IndexType::INVERT);
  auto invert_cast =
      std::dynamic_pointer_cast<InvertIndexParams>(index_params4);
  ASSERT_NE(invert_cast, nullptr);
  EXPECT_TRUE(invert_cast->enable_range_optimization());

  // Test conversion from C++ InvertIndexParams to protobuf IndexParams
  InvertIndexParams invert_original(false);
  auto pb_result4 = ProtoConverter::ToPb(&invert_original);
  EXPECT_FALSE(pb_result4.enable_range_optimization());
}

TEST(ConverterTest, FieldSchemaConversion) {
  // Test conversion from protobuf to C++ FieldSchema
  proto::FieldSchema field_pb;
  field_pb.set_name("test_field");
  field_pb.set_data_type(proto::DT_VECTOR_FP32);
  field_pb.set_dimension(128);
  field_pb.set_nullable(true);

  // Add index params
  auto *index_params_pb = field_pb.mutable_index_params();
  auto *hnsw_pb = index_params_pb->mutable_hnsw();
  auto *base_params = hnsw_pb->mutable_base();
  base_params->set_metric_type(proto::MT_L2);
  base_params->set_quantize_type(proto::QT_FP16);
  hnsw_pb->set_m(16);
  hnsw_pb->set_ef_construction(100);

  auto field_schema = ProtoConverter::FromPb(field_pb);
  ASSERT_NE(field_schema, nullptr);
  EXPECT_EQ(field_schema->name(), "test_field");
  EXPECT_EQ(field_schema->data_type(), DataType::VECTOR_FP32);
  EXPECT_TRUE(field_schema->nullable());
  EXPECT_EQ(field_schema->dimension(), 128u);
  ASSERT_NE(field_schema->index_params(), nullptr);
  EXPECT_EQ(field_schema->index_params()->type(), IndexType::HNSW);

  // Test conversion from C++ to protobuf
  FieldSchema original_field("another_field", DataType::ARRAY_INT32, 64, false,
                             nullptr);
  auto pb_result = ProtoConverter::ToPb(original_field);
  EXPECT_EQ(pb_result.name(), "another_field");
  EXPECT_EQ(pb_result.data_type(), proto::DT_ARRAY_INT32);
  EXPECT_FALSE(pb_result.nullable());
  EXPECT_EQ(pb_result.dimension(), 64u);
}

TEST(ConverterTest, CollectionSchemaConversion) {
  // Test conversion from protobuf to C++ CollectionSchema
  proto::CollectionSchema schema_pb;
  schema_pb.set_name("test_collection");
  schema_pb.set_max_doc_count_per_segment(1000000);

  auto *field1_pb = schema_pb.add_fields();
  field1_pb->set_name("field1");
  field1_pb->set_data_type(proto::DT_STRING);

  auto *field2_pb = schema_pb.add_fields();
  field2_pb->set_name("field2");
  field2_pb->set_data_type(proto::DT_VECTOR_FP32);
  field2_pb->set_dimension(128);

  auto collection_schema = ProtoConverter::FromPb(schema_pb);
  ASSERT_NE(collection_schema, nullptr);
  EXPECT_EQ(collection_schema->name(), "test_collection");
  EXPECT_EQ(collection_schema->fields().size(), 2);
  EXPECT_EQ(collection_schema->max_doc_count_per_segment(), 1000000u);

  // Test conversion from C++ to protobuf
  CollectionSchema original_schema;
  original_schema.set_name("original_collection");

  auto pb_result = ProtoConverter::ToPb(original_schema);
  EXPECT_EQ(pb_result.name(), "original_collection");
}

TEST(ConverterTest, BlockMetaConversion) {
  // Test conversion from protobuf to C++ BlockMeta
  proto::BlockMeta meta_pb;
  meta_pb.set_block_id(1);
  meta_pb.set_block_type(proto::BT_SCALAR);
  meta_pb.set_min_doc_id(100);
  meta_pb.set_max_doc_id(200);
  meta_pb.set_doc_count(50);
  meta_pb.add_columns("col1");
  meta_pb.add_columns("col2");

  auto block_meta = ProtoConverter::FromPb(meta_pb);
  ASSERT_NE(block_meta, nullptr);
  EXPECT_EQ(block_meta->id(), 1u);
  EXPECT_EQ(block_meta->type(), BlockType::SCALAR);
  EXPECT_EQ(block_meta->min_doc_id(), 100u);
  EXPECT_EQ(block_meta->max_doc_id(), 200u);
  EXPECT_EQ(block_meta->doc_count(), 50u);
  EXPECT_EQ(block_meta->columns().size(), 2);
  EXPECT_EQ(block_meta->columns()[0], "col1");
  EXPECT_EQ(block_meta->columns()[1], "col2");

  // Test conversion from C++ to protobuf
  BlockMeta original_meta(2, BlockType::VECTOR_INDEX, 300, 400);
  original_meta.set_doc_count(75);
  original_meta.add_column("col3");
  original_meta.add_column("col4");

  auto pb_result = ProtoConverter::ToPb(original_meta);
  EXPECT_EQ(pb_result.block_id(), 2u);
  EXPECT_EQ(pb_result.block_type(), proto::BT_VECTOR_INDEX);
  EXPECT_EQ(pb_result.min_doc_id(), 300u);
  EXPECT_EQ(pb_result.max_doc_id(), 400u);
  EXPECT_EQ(pb_result.doc_count(), 75u);
  EXPECT_EQ(pb_result.columns_size(), 2);
  EXPECT_EQ(pb_result.columns(0), "col3");
  EXPECT_EQ(pb_result.columns(1), "col4");
}

TEST(ConverterTest, SegmentMetaConversion) {
  // Test conversion from protobuf to C++ SegmentMeta
  proto::SegmentMeta segment_pb;
  segment_pb.set_segment_id(10);

  // Add persisted blocks
  auto *block1_pb = segment_pb.add_persisted_blocks();
  block1_pb->set_block_id(1);
  block1_pb->set_block_type(proto::BT_SCALAR);
  block1_pb->set_min_doc_id(0);
  block1_pb->set_max_doc_id(100);
  block1_pb->set_doc_count(50);
  block1_pb->add_columns("col1");
  block1_pb->add_columns("col2");

  auto *block2_pb = segment_pb.add_persisted_blocks();
  block2_pb->set_block_id(2);
  block2_pb->set_block_type(proto::BT_VECTOR_INDEX);
  block2_pb->set_min_doc_id(101);
  block2_pb->set_max_doc_id(200);
  block2_pb->set_doc_count(75);
  block2_pb->add_columns("vec_col");

  // Add writing forward block
  auto *writing_block_pb = segment_pb.mutable_writing_forward_block();
  writing_block_pb->set_block_id(3);
  writing_block_pb->set_block_type(proto::BT_SCALAR);
  writing_block_pb->set_min_doc_id(201);
  writing_block_pb->set_max_doc_id(300);
  writing_block_pb->set_doc_count(25);
  writing_block_pb->add_columns("col3");

  // Add indexed vector fields
  segment_pb.add_indexed_vector_fields("vec_col1");
  segment_pb.add_indexed_vector_fields("vec_col2");

  auto segment_meta = ProtoConverter::FromPb(segment_pb);
  ASSERT_NE(segment_meta, nullptr);
  EXPECT_EQ(segment_meta->id(), 10u);
  EXPECT_EQ(segment_meta->persisted_blocks().size(), 2);
  EXPECT_TRUE(segment_meta->has_writing_forward_block());

  // Check first persisted block
  const auto &block1 = segment_meta->persisted_blocks()[0];
  EXPECT_EQ(block1.id(), 1u);
  EXPECT_EQ(block1.type(), BlockType::SCALAR);
  EXPECT_EQ(block1.min_doc_id(), 0u);
  EXPECT_EQ(block1.max_doc_id(), 100u);
  EXPECT_EQ(block1.doc_count(), 50u);
  EXPECT_EQ(block1.columns().size(), 2);
  EXPECT_EQ(block1.columns()[0], "col1");
  EXPECT_EQ(block1.columns()[1], "col2");

  // Check second persisted block
  const auto &block2 = segment_meta->persisted_blocks()[1];
  EXPECT_EQ(block2.id(), 2u);
  EXPECT_EQ(block2.type(), BlockType::VECTOR_INDEX);
  EXPECT_EQ(block2.min_doc_id(), 101u);
  EXPECT_EQ(block2.max_doc_id(), 200u);
  EXPECT_EQ(block2.doc_count(), 75u);
  EXPECT_EQ(block2.columns().size(), 1);
  EXPECT_EQ(block2.columns()[0], "vec_col");

  // Check writing forward block
  const auto &writing_block = segment_meta->writing_forward_block();
  EXPECT_EQ(writing_block.value().id(), 3u);
  EXPECT_EQ(writing_block.value().type(), BlockType::SCALAR);
  EXPECT_EQ(writing_block.value().min_doc_id(), 201u);
  EXPECT_EQ(writing_block.value().max_doc_id(), 300u);
  EXPECT_EQ(writing_block.value().doc_count(), 25u);
  EXPECT_EQ(writing_block.value().columns().size(), 1);
  EXPECT_EQ(writing_block.value().columns()[0], "col3");

  // Check indexed vector fields
  EXPECT_TRUE(segment_meta->vector_indexed("vec_col1"));
  EXPECT_TRUE(segment_meta->vector_indexed("vec_col2"));
  EXPECT_FALSE(segment_meta->vector_indexed("non_existent_field"));

  // Test conversion from C++ to protobuf
  SegmentMeta original_meta(20);

  // Add persisted blocks
  BlockMeta block1_meta(1, BlockType::SCALAR_INDEX, 0, 50);
  block1_meta.set_doc_count(25);
  block1_meta.add_column("col3");
  block1_meta.add_column("col4");
  original_meta.add_persisted_block(block1_meta);

  BlockMeta block2_meta(2, BlockType::VECTOR_INDEX_QUANTIZE, 51, 100);
  block2_meta.set_doc_count(30);
  block2_meta.add_column("vec_col2");
  original_meta.add_persisted_block(block2_meta);

  // Set writing forward block
  BlockMeta writing_block_meta(3, BlockType::SCALAR, 101, 150);
  writing_block_meta.set_doc_count(40);
  writing_block_meta.add_column("col5");
  original_meta.set_writing_forward_block(writing_block_meta);

  // Add indexed vector fields
  original_meta.add_indexed_vector_field("vec_field1");
  original_meta.add_indexed_vector_field("vec_field2");

  auto pb_result = ProtoConverter::ToPb(original_meta);
  EXPECT_EQ(pb_result.segment_id(), 20u);
  EXPECT_EQ(pb_result.persisted_blocks_size(), 2);

  // Check first persisted block
  const auto &pb_block1 = pb_result.persisted_blocks(0);
  EXPECT_EQ(pb_block1.block_id(), 1u);
  EXPECT_EQ(pb_block1.block_type(), proto::BT_SCALAR_INDEX);
  EXPECT_EQ(pb_block1.min_doc_id(), 0u);
  EXPECT_EQ(pb_block1.max_doc_id(), 50u);
  EXPECT_EQ(pb_block1.doc_count(), 25u);
  EXPECT_EQ(pb_block1.columns_size(), 2);
  EXPECT_EQ(pb_block1.columns(0), "col3");
  EXPECT_EQ(pb_block1.columns(1), "col4");

  // Check second persisted block
  const auto &pb_block2 = pb_result.persisted_blocks(1);
  EXPECT_EQ(pb_block2.block_id(), 2u);
  EXPECT_EQ(pb_block2.block_type(), proto::BT_VECTOR_INDEX_QUANTIZE);
  EXPECT_EQ(pb_block2.min_doc_id(), 51u);
  EXPECT_EQ(pb_block2.max_doc_id(), 100u);
  EXPECT_EQ(pb_block2.doc_count(), 30u);
  EXPECT_EQ(pb_block2.columns_size(), 1);
  EXPECT_EQ(pb_block2.columns(0), "vec_col2");

  // Check writing forward block
  const auto &pb_writing_block = pb_result.writing_forward_block();
  EXPECT_EQ(pb_writing_block.block_id(), 3u);
  EXPECT_EQ(pb_writing_block.block_type(), proto::BT_SCALAR);
  EXPECT_EQ(pb_writing_block.min_doc_id(), 101u);
  EXPECT_EQ(pb_writing_block.max_doc_id(), 150u);
  EXPECT_EQ(pb_writing_block.doc_count(), 40u);
  EXPECT_EQ(pb_writing_block.columns_size(), 1);
  EXPECT_EQ(pb_writing_block.columns(0), "col5");

  // Check indexed vector fields
  EXPECT_EQ(pb_result.indexed_vector_fields_size(), 2);
  EXPECT_EQ(pb_result.indexed_vector_fields(0), "vec_field1");
  EXPECT_EQ(pb_result.indexed_vector_fields(1), "vec_field2");
}

TEST(ConverterTest, SegmentMetaWithEmptyFields) {
  // Test conversion with minimal data
  proto::SegmentMeta segment_pb;
  segment_pb.set_segment_id(1);

  auto segment_meta = ProtoConverter::FromPb(segment_pb);
  ASSERT_NE(segment_meta, nullptr);
  EXPECT_EQ(segment_meta->id(), 1u);
  EXPECT_EQ(segment_meta->persisted_blocks().size(), 0);
  EXPECT_FALSE(segment_meta->has_writing_forward_block());
  EXPECT_EQ(segment_meta->indexed_vector_fields().size(), 0);

  // Test conversion from C++ to protobuf with minimal data
  SegmentMeta original_meta(5);
  auto pb_result = ProtoConverter::ToPb(original_meta);
  EXPECT_EQ(pb_result.segment_id(), 5u);
  EXPECT_EQ(pb_result.persisted_blocks_size(), 0);
  EXPECT_FALSE(pb_result.has_writing_forward_block());
  EXPECT_EQ(pb_result.indexed_vector_fields_size(), 0);
}

================================================
FILE: tests/db/index/common/db_type_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "db/index/common/type_helper.h"

using namespace zvec;

TEST(IndexTypeCodeBookTest, ProtoToCppConversion) {
  // Test conversion from protobuf to C++ IndexType
  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_HNSW), IndexType::HNSW);
  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_FLAT), IndexType::FLAT);
  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_IVF), IndexType::IVF);
  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_INVERT), IndexType::INVERT);
  EXPECT_EQ(IndexTypeCodeBook::Get(proto::IT_UNDEFINED), IndexType::UNDEFINED);
  EXPECT_EQ(IndexTypeCodeBook::Get(static_cast<proto::IndexType>(999)),
            IndexType::UNDEFINED);
}

TEST(IndexTypeCodeBookTest, CppToProtoConversion) {
  // Test conversion from C++ IndexType to protobuf IndexType
  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::HNSW), proto::IT_HNSW);
  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::FLAT), proto::IT_FLAT);
  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::IVF), proto::IT_IVF);
  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::INVERT), proto::IT_INVERT);
  EXPECT_EQ(IndexTypeCodeBook::Get(IndexType::UNDEFINED), proto::IT_UNDEFINED);
  EXPECT_EQ(IndexTypeCodeBook::Get(static_cast<IndexType>(999)),
            proto::IT_UNDEFINED);
}

TEST(IndexTypeCodeBookTest, CppToStringConversion) {
  // Test conversion from C++ IndexType to string
  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::HNSW), "HNSW");
  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::INVERT), "INVERT");
  EXPECT_EQ(IndexTypeCodeBook::AsString(IndexType::UNDEFINED), "UNDEFINED");
  EXPECT_EQ(IndexTypeCodeBook::AsString(static_cast<IndexType>(999)),
            "UNDEFINED");
}

TEST(DataTypeCodeBookTest, IsArrayType) {
  // Test array type detection
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BINARY));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_STRING));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_BOOL));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT32));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_INT64));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT32));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_UINT64));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_FLOAT));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_DOUBLE));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY32));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_BINARY64));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP16));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP32));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_FP64));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT4));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT8));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_VECTOR_INT16));
  EXPECT_FALSE(DataTypeCodeBook::IsArrayType(proto::DT_SPARSE_VECTOR_FP32));

  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BINARY));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_STRING));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_BOOL));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT32));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_INT64));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT32));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_UINT64));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_FLOAT));
  EXPECT_TRUE(DataTypeCodeBook::IsArrayType(proto::DT_ARRAY_DOUBLE));
}

TEST(DataTypeCodeBookTest, ProtoToCppConversion) {
  // Test conversion from protobuf to C++ DataType
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BINARY), DataType::BINARY);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_STRING), DataType::STRING);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_BOOL), DataType::BOOL);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT32), DataType::INT32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_INT64), DataType::INT64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT32), DataType::UINT32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UINT64), DataType::UINT64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_FLOAT), DataType::FLOAT);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_DOUBLE), DataType::DOUBLE);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY32),
            DataType::VECTOR_BINARY32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_BINARY64),
            DataType::VECTOR_BINARY64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP16),
            DataType::VECTOR_FP16);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP32),
            DataType::VECTOR_FP32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_FP64),
            DataType::VECTOR_FP64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT4),
            DataType::VECTOR_INT4);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT8),
            DataType::VECTOR_INT8);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_VECTOR_INT16),
            DataType::VECTOR_INT16);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_SPARSE_VECTOR_FP32),
            DataType::SPARSE_VECTOR_FP32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BINARY),
            DataType::ARRAY_BINARY);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_STRING),
            DataType::ARRAY_STRING);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_BOOL), DataType::ARRAY_BOOL);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT32),
            DataType::ARRAY_INT32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_INT64),
            DataType::ARRAY_INT64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT32),
            DataType::ARRAY_UINT32);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_UINT64),
            DataType::ARRAY_UINT64);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_FLOAT),
            DataType::ARRAY_FLOAT);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_ARRAY_DOUBLE),
            DataType::ARRAY_DOUBLE);
  EXPECT_EQ(DataTypeCodeBook::Get(proto::DT_UNDEFINED), DataType::UNDEFINED);
  EXPECT_EQ(DataTypeCodeBook::Get(static_cast<proto::DataType>(999)),
            DataType::UNDEFINED);
}

TEST(DataTypeCodeBookTest, CppToProtoConversion) {
  // Test conversion from C++ DataType to protobuf DataType
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::BINARY), proto::DT_BINARY);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::STRING), proto::DT_STRING);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::BOOL), proto::DT_BOOL);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT32), proto::DT_INT32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::INT64), proto::DT_INT64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT32), proto::DT_UINT32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UINT64), proto::DT_UINT64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::FLOAT), proto::DT_FLOAT);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::DOUBLE), proto::DT_DOUBLE);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY32),
            proto::DT_VECTOR_BINARY32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_BINARY64),
            proto::DT_VECTOR_BINARY64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP16),
            proto::DT_VECTOR_FP16);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP32),
            proto::DT_VECTOR_FP32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_FP64),
            proto::DT_VECTOR_FP64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT4),
            proto::DT_VECTOR_INT4);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT8),
            proto::DT_VECTOR_INT8);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::VECTOR_INT16),
            proto::DT_VECTOR_INT16);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP16),
            proto::DT_SPARSE_VECTOR_FP16);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::SPARSE_VECTOR_FP32),
            proto::DT_SPARSE_VECTOR_FP32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BINARY),
            proto::DT_ARRAY_BINARY);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_STRING),
            proto::DT_ARRAY_STRING);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_BOOL), proto::DT_ARRAY_BOOL);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT32),
            proto::DT_ARRAY_INT32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_INT64),
            proto::DT_ARRAY_INT64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT32),
            proto::DT_ARRAY_UINT32);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_UINT64),
            proto::DT_ARRAY_UINT64);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_FLOAT),
            proto::DT_ARRAY_FLOAT);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::ARRAY_DOUBLE),
            proto::DT_ARRAY_DOUBLE);
  EXPECT_EQ(DataTypeCodeBook::Get(DataType::UNDEFINED), proto::DT_UNDEFINED);
  EXPECT_EQ(DataTypeCodeBook::Get(static_cast<DataType>(999)),
            proto::DT_UNDEFINED);
}

TEST(DataTypeCodeBookTest, CppToStringConversion) {
  // Test conversion from C++ DataType to string
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BINARY), "BINARY");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::STRING), "STRING");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::BOOL), "BOOL");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::INT32), "INT32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::INT64), "INT64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UINT32), "UINT32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UINT64), "UINT64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::FLOAT), "FLOAT");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::DOUBLE), "DOUBLE");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_BINARY32),
            "VECTOR_BINARY32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_BINARY64),
            "VECTOR_BINARY64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP16), "VECTOR_FP16");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP32), "VECTOR_FP32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_FP64), "VECTOR_FP64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT4), "VECTOR_INT4");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT8), "VECTOR_INT8");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::VECTOR_INT16), "VECTOR_INT16");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BINARY), "ARRAY_BINARY");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_STRING), "ARRAY_STRING");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_BOOL), "ARRAY_BOOL");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_INT32), "ARRAY_INT32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_INT64), "ARRAY_INT64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_UINT32), "ARRAY_UINT32");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_UINT64), "ARRAY_UINT64");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_FLOAT), "ARRAY_FLOAT");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::ARRAY_DOUBLE), "ARRAY_DOUBLE");
  EXPECT_EQ(DataTypeCodeBook::AsString(DataType::UNDEFINED), "");
  EXPECT_EQ(DataTypeCodeBook::AsString(static_cast<DataType>(999)), "");
}

TEST(MetricTypeCodeBookTest, ProtoToCppConversion) {
  // Test conversion from protobuf to C++ MetricType
  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_IP), MetricType::IP);
  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_L2), MetricType::L2);
  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_COSINE), MetricType::COSINE);
  EXPECT_EQ(MetricTypeCodeBook::Get(proto::MT_UNDEFINED),
            MetricType::UNDEFINED);
  EXPECT_EQ(MetricTypeCodeBook::Get(static_cast<proto::MetricType>(999)),
            MetricType::UNDEFINED);
}

TEST(MetricTypeCodeBookTest, CppToProtoConversion) {
  // Test conversion from C++ MetricType to protobuf MetricType
  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::IP), proto::MT_IP);
  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::L2), proto::MT_L2);
  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::COSINE), proto::MT_COSINE);
  EXPECT_EQ(MetricTypeCodeBook::Get(MetricType::UNDEFINED),
            proto::MT_UNDEFINED);
  EXPECT_EQ(MetricTypeCodeBook::Get(static_cast<MetricType>(999)),
            proto::MT_UNDEFINED);
}

TEST(QuantizeTypeCodeBookTest, ProtoToCppConversion) {
  // Test conversion from protobuf to C++ QuantizeType
  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_FP16), QuantizeType::FP16);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT4), QuantizeType::INT4);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_INT8), QuantizeType::INT8);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(proto::QT_UNDEFINED),
            QuantizeType::UNDEFINED);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast<proto::QuantizeType>(999)),
            QuantizeType::UNDEFINED);
}

TEST(QuantizeTypeCodeBookTest, CppToProtoConversion) {
  // Test conversion from C++ QuantizeType to protobuf QuantizeType
  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::FP16), proto::QT_FP16);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT4), proto::QT_INT4);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::INT8), proto::QT_INT8);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(QuantizeType::UNDEFINED),
            proto::QT_UNDEFINED);
  EXPECT_EQ(QuantizeTypeCodeBook::Get(static_cast<QuantizeType>(999)),
            proto::QT_UNDEFINED);
}

TEST(BlockTypeCodeBookTest, ProtoToCppConversion) {
  // Test conversion from protobuf to C++ BlockType
  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR), BlockType::SCALAR);
  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_SCALAR_INDEX),
            BlockType::SCALAR_INDEX);
  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX),
            BlockType::VECTOR_INDEX);
  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_VECTOR_INDEX_QUANTIZE),
            BlockType::VECTOR_INDEX_QUANTIZE);
  EXPECT_EQ(BlockTypeCodeBook::Get(proto::BT_UNDEFINED), BlockType::UNDEFINED);
  EXPECT_EQ(BlockTypeCodeBook::Get(static_cast<proto::BlockType>(999)),
            BlockType::UNDEFINED);
}

TEST(BlockTypeCodeBookTest, CppToProtoConversion) {
  // Test conversion from C++ BlockType to protobuf BlockType
  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR), proto::BT_SCALAR);
  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::SCALAR_INDEX),
            proto::BT_SCALAR_INDEX);
  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX),
            proto::BT_VECTOR_INDEX);
  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::VECTOR_INDEX_QUANTIZE),
            proto::BT_VECTOR_INDEX_QUANTIZE);
  EXPECT_EQ(BlockTypeCodeBook::Get(BlockType::UNDEFINED), proto::BT_UNDEFINED);
  EXPECT_EQ(BlockTypeCodeBook::Get(static_cast<BlockType>(999)),
            proto::BT_UNDEFINED);
}

================================================
FILE: tests/db/index/common/doc_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/doc.h"
#include <cstdint>
#include <limits>
#include <random>
#include <gtest/gtest.h>
#include <zvec/ailego/utility/float_helper.h>
#include "utils/utils.h"
#include "zvec/db/status.h"
#include "zvec/db/type.h"


using namespace zvec;

class DocDetailedTest : public ::testing::Test {
 protected:
  void SetUp() override {
    test_doc_ = std::make_shared<Doc>();
    test_doc_->set_pk("test_pk");
    test_doc_->set_doc_id(12345);
    test_doc_->set_score(0.95f);
    test_doc_->set_operator(Operator::INSERT);
  }

  Doc::Ptr test_doc_;
};

// Test serialization and deserialization of basic data types
TEST_F(DocDetailedTest, BasicTypeSerializationDeserialization) {
  // Test boundary values
  test_doc_->set("bool_true", true);
  test_doc_->set("bool_false", false);
  test_doc_->set("int32_min", std::numeric_limits<int32_t>::min());
  test_doc_->set("int32_max", std::numeric_limits<int32_t>::max());
  test_doc_->set("uint32_min", std::numeric_limits<uint32_t>::min());
  test_doc_->set("uint32_max", std::numeric_limits<uint32_t>::max());
  test_doc_->set("int64_min", std::numeric_limits<int64_t>::min());
  test_doc_->set("int64_max", std::numeric_limits<int64_t>::max());
  test_doc_->set("uint64_min", std::numeric_limits<uint64_t>::min());
  test_doc_->set("uint64_max", std::numeric_limits<uint64_t>::max());
  test_doc_->set("float_min", std::numeric_limits<float>::min());
  test_doc_->set("float_max", std::numeric_limits<float>::max());
  test_doc_->set("float_lowest", std::numeric_limits<float>::lowest());
  test_doc_->set("double_min", std::numeric_limits<double>::min());
  test_doc_->set("double_max", std::numeric_limits<double>::max());
  test_doc_->set("double_lowest", std::numeric_limits<double>::lowest());

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  EXPECT_EQ(deserialized_doc->get<bool>("bool_true").value(), true);
  EXPECT_EQ(deserialized_doc->get<bool>("bool_false").value(), false);
  EXPECT_EQ(deserialized_doc->get<int32_t>("int32_min").value(),
            std::numeric_limits<int32_t>::min());
  EXPECT_EQ(deserialized_doc->get<int32_t>("int32_max").value(),
            std::numeric_limits<int32_t>::max());
  EXPECT_EQ(deserialized_doc->get<uint32_t>("uint32_min").value(),
            std::numeric_limits<uint32_t>::min());
  EXPECT_EQ(deserialized_doc->get<uint32_t>("uint32_max").value(),
            std::numeric_limits<uint32_t>::max());
  EXPECT_EQ(deserialized_doc->get<int64_t>("int64_min").value(),
            std::numeric_limits<int64_t>::min());
  EXPECT_EQ(deserialized_doc->get<int64_t>("int64_max").value(),
            std::numeric_limits<int64_t>::max());
  EXPECT_EQ(deserialized_doc->get<uint64_t>("uint64_min").value(),
            std::numeric_limits<uint64_t>::min());
  EXPECT_EQ(deserialized_doc->get<uint64_t>("uint64_max").value(),
            std::numeric_limits<uint64_t>::max());

  // For floating point numbers, use approximate comparison
  EXPECT_FLOAT_EQ(deserialized_doc->get<float>("float_min").value(),
                  std::numeric_limits<float>::min());
  EXPECT_FLOAT_EQ(deserialized_doc->get<float>("float_max").value(),
                  std::numeric_limits<float>::max());
  EXPECT_FLOAT_EQ(deserialized_doc->get<float>("float_lowest").value(),
                  std::numeric_limits<float>::lowest());
  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>("double_min").value(),
                   std::numeric_limits<double>::min());
  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>("double_max").value(),
                   std::numeric_limits<double>::max());
  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>("double_lowest").value(),
                   std::numeric_limits<double>::lowest());
}

// Test various cases of string types
TEST_F(DocDetailedTest, StringTypeSerializationDeserialization) {
  // Test empty string
  test_doc_->set("empty_string", std::string(""));

  // Test long string
  std::string long_string(10000, 'a');
  test_doc_->set("long_string", long_string);

  // Test string with special characters
  test_doc_->set("special_chars",
                 std::string("Special characters\t\n\r\0included", 15));

  // Test string with binary data
  std::string binary_string;
  for (int i = 0; i < 256; ++i) {
    binary_string.push_back(static_cast<char>(i));
  }
  test_doc_->set("binary_string", binary_string);

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  EXPECT_EQ(deserialized_doc->get<std::string>("empty_string").value(), "");
  EXPECT_EQ(deserialized_doc->get<std::string>("long_string").value(),
            long_string);
  EXPECT_EQ(deserialized_doc->get<std::string>("special_chars").value(),
            std::string("Special characters\t\n\r\0included", 15));
  EXPECT_EQ(deserialized_doc->get<std::string>("binary_string").value(),
            binary_string);
}


// Test vector<bool> type
TEST_F(DocDetailedTest, VectorBoolSerializationDeserialization) {
  std::vector<bool> bool_vec;
  // Create a vector<bool> with a large number of elements
  for (int i = 0; i < 1000; ++i) {
    bool_vec.push_back(i % 2 == 0);
  }
  test_doc_->set("bool_vec", bool_vec);

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  auto deserialized_vec =
      deserialized_doc->get<std::vector<bool>>("bool_vec").value();

  ASSERT_EQ(deserialized_vec.size(), bool_vec.size());
  for (size_t i = 0; i < bool_vec.size(); ++i) {
    EXPECT_EQ(deserialized_vec[i], bool_vec[i]) << "Mismatch at index " << i;
  }
}

// Test numeric vector types
TEST_F(DocDetailedTest, NumericVectorSerializationDeserialization) {
  // Test int8_t vector
  std::vector<int8_t> int8_vec = {std::numeric_limits<int8_t>::min(), -1, 0, 1,
                                  std::numeric_limits<int8_t>::max()};
  test_doc_->set("int8_vec", int8_vec);

  // Test int16_t vector
  std::vector<int16_t> int16_vec = {std::numeric_limits<int16_t>::min(), -1, 0,
                                    1, std::numeric_limits<int16_t>::max()};
  test_doc_->set("int16_vec", int16_vec);

  // Test int32_t vector
  std::vector<int32_t> int32_vec = {std::numeric_limits<int32_t>::min(), -1, 0,
                                    1, std::numeric_limits<int32_t>::max()};
  test_doc_->set("int32_vec", int32_vec);

  // Test int64_t vector
  std::vector<int64_t> int64_vec = {std::numeric_limits<int64_t>::min(), -1, 0,
                                    1, std::numeric_limits<int64_t>::max()};
  test_doc_->set("int64_vec", int64_vec);

  // Test uint32_t vector
  std::vector<uint32_t> uint32_vec = {std::numeric_limits<uint32_t>::min(), 1,
                                      100,
                                      std::numeric_limits<uint32_t>::max()};
  test_doc_->set("uint32_vec", uint32_vec);

  // Test uint64_t vector
  std::vector<uint64_t> uint64_vec = {std::numeric_limits<uint64_t>::min(), 1,
                                      100,
                                      std::numeric_limits<uint64_t>::max()};
  test_doc_->set("uint64_vec", uint64_vec);

  // Test float vector
  std::vector<float> float_vec = {std::numeric_limits<float>::min(), -1.0f,
                                  0.0f, 1.0f,
                                  std::numeric_limits<float>::max()};
  test_doc_->set("float_vec", float_vec);

  // Test double vector
  std::vector<double> double_vec = {std::numeric_limits<double>::min(), -1.0,
                                    0.0, 1.0,
                                    std::numeric_limits<double>::max()};
  test_doc_->set("double_vec", double_vec);

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());
  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  EXPECT_EQ(deserialized_doc->get<std::vector<int8_t>>("int8_vec").value(),
            int8_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<int16_t>>("int16_vec").value(),
            int16_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>("int32_vec").value(),
            int32_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<int64_t>>("int64_vec").value(),
            int64_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<uint32_t>>("uint32_vec").value(),
            uint32_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<uint64_t>>("uint64_vec").value(),
            uint64_vec);


  // Floating point numbers use approximate comparison
  auto deserialized_float_vec =
      deserialized_doc->get<std::vector<float>>("float_vec").value();

  ASSERT_EQ(deserialized_float_vec.size(), float_vec.size());
  for (size_t i = 0; i < float_vec.size(); ++i) {
    EXPECT_FLOAT_EQ(deserialized_float_vec[i], float_vec[i])
        << "Mismatch at index " << i;
  }

  auto deserialized_double_vec =
      deserialized_doc->get<std::vector<double>>("double_vec").value();
  ASSERT_EQ(deserialized_double_vec.size(), double_vec.size());
  for (size_t i = 0; i < double_vec.size(); ++i) {
    EXPECT_DOUBLE_EQ(deserialized_double_vec[i], double_vec[i])
        << "Mismatch at index " << i;
  }
}

// Test string vector types
TEST_F(DocDetailedTest, StringVectorSerializationDeserialization) {
  std::vector<std::string> string_vec;
  string_vec.push_back("");  // Empty string
  string_vec.push_back("normal string");
  string_vec.push_back(std::string(1000, 'x'));  // Long string
  string_vec.push_back("Special character test");
  string_vec.push_back(
      std::string("binary\0data", 11));  // Contains binary data

  test_doc_->set("string_vec", string_vec);

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  auto deserialized_vec =
      deserialized_doc->get<std::vector<std::string>>("string_vec").value();
  ASSERT_EQ(deserialized_vec.size(), string_vec.size());
  for (size_t i = 0; i < string_vec.size(); ++i) {
    EXPECT_EQ(deserialized_vec[i], string_vec[i]) << "Mismatch at index " << i;
  }
}

// Test sparse vector types
TEST_F(DocDetailedTest, SparseVectorSerializationDeserialization) {
  // Test float type sparse vector
  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;
  sparse_float_vec.first = {0, 100, 1000, 10000};
  sparse_float_vec.second = {0.1f, 100.5f, -200.7f,
                             std::numeric_limits<float>::max()};

  test_doc_->set("sparse_float_vec", sparse_float_vec);

  // Test ailego::Float16 type sparse vector
  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>
      sparse_float16_vec;
  sparse_float16_vec.first = {1, 50, 500};
  sparse_float16_vec.second = {ailego::Float16(0.5f), ailego::Float16(-10.25f),
                               ailego::Float16(1000.0f)};

  test_doc_->set("sparse_float16_vec", sparse_float16_vec);

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  // Verify float sparse vector
  auto deserialized_float_vec =
      deserialized_doc
          ->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
              "sparse_float_vec")
          .value();

  EXPECT_EQ(deserialized_float_vec.first, sparse_float_vec.first);
  ASSERT_EQ(deserialized_float_vec.second.size(),
            sparse_float_vec.second.size());
  for (size_t i = 0; i < sparse_float_vec.second.size(); ++i) {
    EXPECT_FLOAT_EQ(deserialized_float_vec.second[i],
                    sparse_float_vec.second[i])
        << "Mismatch at index " << i;
  }

  // Verify float16 sparse vector
  auto deserialized_float16_vec =
      deserialized_doc
          ->get<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
              "sparse_float16_vec")
          .value();

  EXPECT_EQ(deserialized_float16_vec.first, sparse_float16_vec.first);
  EXPECT_EQ(deserialized_float16_vec.second, sparse_float16_vec.second);
}

// Test case with many fields
TEST_F(DocDetailedTest, ManyFieldsSerializationDeserialization) {
  const int field_count = 1000;
  for (int i = 0; i < field_count; ++i) {
    test_doc_->set("field_" + std::to_string(i), i);
  }

  auto serialized = test_doc_->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  for (int i = 0; i < field_count; ++i) {
    std::string field_name = "field_" + std::to_string(i);
    EXPECT_EQ(deserialized_doc->get<int32_t>(field_name).value(), i);
  }
}

// Test empty document
TEST_F(DocDetailedTest, EmptyDocSerializationDeserialization) {
  Doc::Ptr empty_doc = std::make_shared<Doc>();
  empty_doc->set_pk("");  // Empty primary key

  auto serialized = empty_doc->serialize();
  ASSERT_FALSE(serialized.empty());

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);
  EXPECT_EQ(deserialized_doc->pk(), "");
}

// Test large document
TEST_F(DocDetailedTest, LargeDocSerializationDeserialization) {
  // Create a document with a large amount of data
  std::string large_string(100000, 'A');
  test_doc_->set("large_string", large_string);

  std::vector<int32_t> large_vector(50000);
  std::iota(large_vector.begin(), large_vector.end(), 0);
  test_doc_->set("large_vector", large_vector);

  auto serialized = test_doc_->serialize();
  EXPECT_GT(serialized.size(), 100000);  // Should be a large document

  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());
  ASSERT_NE(deserialized_doc, nullptr);

  EXPECT_EQ(deserialized_doc->get<std::string>("large_string").value(),
            large_string);
  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>("large_vector").value(),
            large_vector);
}

// Test memory usage calculation
TEST_F(DocDetailedTest, MemoryUsageCalculation) {
  size_t initial_usage = test_doc_->memory_usage();

  // Add some fields
  test_doc_->set("small_string", std::string("small"));
  test_doc_->set("int_field", int32_t(42));
  test_doc_->set("float_field", 3.14f);

  size_t usage_with_fields = test_doc_->memory_usage();
  EXPECT_GT(usage_with_fields, initial_usage);

  // Add a large field
  std::string large_string(10000, 'B');
  test_doc_->set("large_string", large_string);

  size_t usage_with_large_field = test_doc_->memory_usage();
  EXPECT_GT(usage_with_large_field, usage_with_fields);
}

// Test detailed string representation
TEST_F(DocDetailedTest, DetailStringRepresentation) {
  test_doc_->set("test_bool", true);
  test_doc_->set("test_int", int32_t(-42));
  test_doc_->set("test_string", std::string("hello"));

  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};
  test_doc_->set("test_float_vec", float_vec);

  std::string detail_str = test_doc_->to_detail_string();
  EXPECT_FALSE(detail_str.empty());
  EXPECT_NE(detail_str.find("test_pk"), std::string::npos);
  EXPECT_NE(detail_str.find("test_bool"), std::string::npos);
  EXPECT_NE(detail_str.find("test_int"), std::string::npos);
  EXPECT_NE(detail_str.find("test_string"), std::string::npos);
  EXPECT_NE(detail_str.find("test_float_vec"), std::string::npos);
}

// Test operator types
TEST_F(DocDetailedTest, OperatorTypes) {
  test_doc_->set_operator(Operator::INSERT);
  EXPECT_EQ(test_doc_->get_operator(), Operator::INSERT);

  test_doc_->set_operator(Operator::DELETE);
  EXPECT_EQ(test_doc_->get_operator(), Operator::DELETE);

  test_doc_->set_operator(Operator::UPDATE);
  EXPECT_EQ(test_doc_->get_operator(), Operator::UPDATE);
}

// Test document ID and score
TEST_F(DocDetailedTest, DocIdAndScore) {
  test_doc_->set_doc_id(0);
  EXPECT_EQ(test_doc_->doc_id(), 0);

  test_doc_->set_doc_id(std::numeric_limits<uint64_t>::max());
  EXPECT_EQ(test_doc_->doc_id(), std::numeric_limits<uint64_t>::max());

  test_doc_->set_score(0.0f);
  EXPECT_FLOAT_EQ(test_doc_->score(), 0.0f);

  test_doc_->set_score(1.0f);
  EXPECT_FLOAT_EQ(test_doc_->score(), 1.0f);

  test_doc_->set_score(-1.0f);
  EXPECT_FLOAT_EQ(test_doc_->score(), -1.0f);

  test_doc_->set_score(std::numeric_limits<float>::max());
  EXPECT_FLOAT_EQ(test_doc_->score(), std::numeric_limits<float>::max());
}

// Test primary key
TEST_F(DocDetailedTest, PrimaryKey) {
  test_doc_->set_pk("");
  EXPECT_EQ(test_doc_->pk(), "");

  std::string long_pk(10000, 'X');
  test_doc_->set_pk(long_pk);
  EXPECT_EQ(test_doc_->pk(), long_pk);

  test_doc_->set_pk("normal_pk");
  EXPECT_EQ(test_doc_->pk(), "normal_pk");
}

// Test duplicate field names (should overwrite old values)
TEST_F(DocDetailedTest, DuplicateFieldNames) {
  test_doc_->set("duplicate_field", int32_t(1));
  test_doc_->set("duplicate_field", int32_t(2));  // Overwrite old value

  auto serialized = test_doc_->serialize();
  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());

  EXPECT_EQ(deserialized_doc->get<int32_t>("duplicate_field").value(), 2);
}

// Test combination of various data types
TEST_F(DocDetailedTest, MixedDataTypes) {
  test_doc_->set("bool_field", true);
  test_doc_->set("int_field", int32_t(-1000));
  test_doc_->set("uint_field", uint32_t(2000));
  test_doc_->set("float_field", 3.14159f);
  test_doc_->set("double_field", 2.718281828459045);
  test_doc_->set("string_field", std::string("Hello, World!"));

  std::vector<int32_t> int_vec = {1, 2, 3, 4, 5};
  test_doc_->set("int_vec", int_vec);

  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};
  test_doc_->set("float_vec", float_vec);

  std::vector<std::string> string_vec = {"apple", "banana", "cherry"};
  test_doc_->set("string_vec", string_vec);

  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_vec;
  sparse_vec.first = {1, 10, 100};
  sparse_vec.second = {0.1f, 1.0f, 10.0f};
  test_doc_->set("sparse_vec", sparse_vec);

  auto serialized = test_doc_->serialize();
  auto deserialized_doc =
      Doc::deserialize(serialized.data(), serialized.size());

  EXPECT_EQ(deserialized_doc->get<bool>("bool_field").value(), true);
  EXPECT_EQ(deserialized_doc->get<int32_t>("int_field").value(), -1000);
  EXPECT_EQ(deserialized_doc->get<uint32_t>("uint_field").value(), 2000);
  EXPECT_FLOAT_EQ(deserialized_doc->get<float>("float_field").value(),
                  3.14159f);
  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>("double_field").value(),
                   2.718281828459045);
  EXPECT_EQ(deserialized_doc->get<std::string>("string_field").value(),
            "Hello, World!");
  EXPECT_EQ(deserialized_doc->get<std::vector<int32_t>>("int_vec").value(),
            int_vec);
  EXPECT_EQ(deserialized_doc->get<std::vector<float>>("float_vec").value(),
            float_vec);
  EXPECT_EQ(
      deserialized_doc->get<std::vector<std::string>>("string_vec").value(),
      string_vec);

  auto deserialized_sparse =
      deserialized_doc
          ->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
              "sparse_vec")
          .value();
  EXPECT_EQ(deserialized_sparse.first, sparse_vec.first);
  EXPECT_EQ(deserialized_sparse.second, sparse_vec.second);
}

// Test doc validate with schema
TEST_F(DocDetailedTest, Validate) {
  // test schema nullable=false, but doc's field is null
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);

    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    doc = test::TestHelper::CreateDocNull(1, *schema);
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto schema = test::TestHelper::CreateNormalSchema(true);
    auto doc = test::TestHelper::CreateDoc(1, *schema);

    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    doc = test::TestHelper::CreateDocNull(1, *schema);
    s = doc.validate(schema);
    ASSERT_TRUE(s.ok());
  }

  // doc contained another field which not contained in schema
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    doc.set("another_field", 1);
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc contained a mismatch scalar field
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    doc.set("int32", std::string("1"));
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc contained a mismatch type vector field
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    std::string field = "dense_fp32";
    auto field_schema = schema->get_field(field);
    ASSERT_NE(field_schema, nullptr);

    doc.set(field, std::vector<int16_t>(field_schema->dimension(), 1));
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc contained a vector field with invalid dimension
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    std::string field = "dense_fp32";
    auto field_schema = schema->get_field(field);
    ASSERT_NE(field_schema, nullptr);

    doc.set(field, std::vector<float>(field_schema->dimension() - 1, 1.0));
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

    doc.set(field, std::vector<float>());
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc contained a sparse vector field with mismatch type
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    std::string field = "sparse_fp32";
    auto field_schema = schema->get_field(field);
    ASSERT_NE(field_schema, nullptr);

    doc.set(field, std::vector<int16_t>(field_schema->dimension(), 1));
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc contained a sparse vector field with indices/values size mismatch
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    auto doc = test::TestHelper::CreateDoc(1, *schema);
    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());

    std::string field = "sparse_fp32";
    auto field_schema = schema->get_field(field);
    ASSERT_NE(field_schema, nullptr);

    std::vector<uint32_t> indices;
    std::vector<float> values;
    for (uint32_t i = 0; i < 100; i++) {
      indices.push_back(i);
      values.push_back(float(0.1));
    }
    values.push_back(float(0.1));
    std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec{
        indices, values};
    doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
        field, sparse_float_vec);
    s = doc.validate(schema);
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc validate error
  {
    Doc doc;
    // schema is null
    auto s = doc.validate(nullptr);
    EXPECT_EQ(s.code(), StatusCode::INTERNAL_ERROR);

    // pk is null
    auto schema = test::TestHelper::CreateNormalSchema(false);
    s = doc.validate(schema);
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

    // field type is undefined
    schema->add_field(
        std::make_shared<FieldSchema>("undefined", DataType::UNDEFINED, true));
    s = doc.validate(schema);
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // doc validate more data type
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    schema->add_field(
        std::make_shared<FieldSchema>("binary", DataType::BINARY, false));

    schema->add_field(std::make_shared<FieldSchema>(
        "array_binary", DataType::ARRAY_BINARY, false));

    schema->add_field(std::make_shared<FieldSchema>(
        "vector_binary32", DataType::VECTOR_BINARY32, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    schema->add_field(std::make_shared<FieldSchema>(
        "vector_binary64", DataType::VECTOR_BINARY64, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    schema->add_field(std::make_shared<FieldSchema>(
        "vector_int8", DataType::VECTOR_INT8, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    schema->add_field(std::make_shared<FieldSchema>(
        "vector_int8", DataType::VECTOR_INT8, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    schema->add_field(std::make_shared<FieldSchema>(
        "vector_int16", DataType::VECTOR_INT16, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    schema->add_field(std::make_shared<FieldSchema>(
        "dense_fp16", DataType::VECTOR_FP16, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));
    schema->add_field(std::make_shared<FieldSchema>(
        "dense_fp64", DataType::VECTOR_FP64, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));
    schema->add_field(std::make_shared<FieldSchema>(
        "sparse_fp16", DataType::SPARSE_VECTOR_FP16, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));
    schema->add_field(std::make_shared<FieldSchema>(
        "sparse_fp32", DataType::SPARSE_VECTOR_FP32, 128, false,
        std::make_shared<FlatIndexParams>(MetricType::IP)));

    auto doc = test::TestHelper::CreateDoc(1, *schema);

    auto s = doc.validate(schema);
    ASSERT_TRUE(s.ok());
  }
  // doc validate pk
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    std::vector<std::string> valid_names = {
        // Min length = 1
        "a",
        "Z",
        "0",
        "_",
        "-",
        "!",
        "@",
        "#",
        "$",
        "%",
        "+",
        "=",
        ".",

        // Mixed
        "a1_",
        "user.name",
        "test@example",
        "log_2025!@#",
        "metric+=value",
        "score%change",

        "user.name",        // '.' allowed
        "test@example",     // '@' allowed
        "log_2025!@#",      // !@# allowed
        "metric+=value",    // + = allowed
        "score%change",     // % allowed
        "file-name_v1.2",   // -, _, . allowed
        "a-b_c.d!@#$%+=.",  // all specials in one

        // Max length = 64
        std::string(64, 'a'),
        std::string(63, 'a') + "_",
        "_" + std::string(62, 'x') + ".",
        "!" + std::string(62, '0') + "@",
    };
    for (auto pk : valid_names) {
      auto doc = test::TestHelper::CreateDoc(1, *schema, pk);
      auto s = doc.validate(schema);
      ASSERT_TRUE(s.ok());
    }
  }
  {
    auto schema = test::TestHelper::CreateNormalSchema(false);
    std::vector<std::string> invalid_names = {
        // Too long (>64)
        std::string(65, 'a'), std::string(64, 'a') + "_",

        // Illegal characters
        "a b",   // space
        "a&b",   // & not in set
        "a*b",   // *
        "a(b)",  // ( )
        "a:b",   // :
        "a;b",   // ;
        "a/b",   // /
        "a\\b",  // backslash
        "a\"b",  // "
        "a'b",   // '
        "a<b",
        "a>b",  // < >
        "a?b",  // ?
        "a~b",  // ~
        "a`b",  // `
        "a[b",
        "a]b",  // [ ]
        "a{b",
        "a}b",     // { }
        "a|b",     // |
        "a^b",     // ^
        "a,b",     // ,
        "用户",    // non-ASCII (Chinese)
        "αβγ",     // Greek
        "résumé",  // accented chars (é not in [a-zA-Z])
    };
    for (auto pk : invalid_names) {
      auto doc = test::TestHelper::CreateDoc(1, *schema, pk);
      auto s = doc.validate(schema);
      if (s.ok()) std::cout << "pk:" << pk << std::endl;
      ASSERT_FALSE(s.ok());
    }
  }
}

TEST_F(DocDetailedTest, GetValueTypeNameCoverage) {
  Doc::Value bool_val = true;
  EXPECT_EQ(get_value_type_name(bool_val, false), "BOOL");

  Doc::Value int32_val = int32_t(42);
  EXPECT_EQ(get_value_type_name(int32_val, false), "INT32");

  Doc::Value uint32_val = uint32_t(42);
  EXPECT_EQ(get_value_type_name(uint32_val, false), "UINT32");

  Doc::Value int64_val = int64_t(42);
  EXPECT_EQ(get_value_type_name(int64_val, false), "INT64");

  Doc::Value uint64_val = uint64_t(42);
  EXPECT_EQ(get_value_type_name(uint64_val, false), "UINT64");

  Doc::Value float_val = 3.14f;
  EXPECT_EQ(get_value_type_name(float_val, false), "FLOAT");

  Doc::Value double_val = 3.14;
  EXPECT_EQ(get_value_type_name(double_val, false), "DOUBLE");

  Doc::Value string_val = std::string("test");
  EXPECT_EQ(get_value_type_name(string_val, false), "STRING");

  Doc::Value vector_bool_val = std::vector<bool>{true, false};
  EXPECT_EQ(get_value_type_name(vector_bool_val, false), "ARRAY_BOOL");

  Doc::Value vector_int8_val = std::vector<int8_t>{1, 2, 3};
  EXPECT_EQ(get_value_type_name(vector_int8_val, true), "VECTOR_INT8");

  Doc::Value vector_int16_val = std::vector<int16_t>{10, 20, 30};
  EXPECT_EQ(get_value_type_name(vector_int16_val, true), "VECTOR_INT16");

  Doc::Value vector_int32_val = std::vector<int32_t>{100, 200, 300};
  EXPECT_EQ(get_value_type_name(vector_int32_val, true), "VECTOR_INT32");

  Doc::Value vector_int64_val = std::vector<int64_t>{1000, 2000, 3000};
  EXPECT_EQ(get_value_type_name(vector_int64_val, true), "VECTOR_INT64");

  Doc::Value vector_uint32_val = std::vector<uint32_t>{10, 20, 30};
  EXPECT_EQ(get_value_type_name(vector_uint32_val, true), "VECTOR_UINT32");

  Doc::Value vector_uint64_val = std::vector<uint64_t>{100, 200, 300};
  EXPECT_EQ(get_value_type_name(vector_uint64_val, true), "VECTOR_UINT64");

  Doc::Value vector_float_val = std::vector<float>{1.1f, 2.2f, 3.3f};
  EXPECT_EQ(get_value_type_name(vector_float_val, true), "VECTOR_FP32");

  Doc::Value vector_double_val = std::vector<double>{1.1, 2.2, 3.3};
  EXPECT_EQ(get_value_type_name(vector_double_val, true), "VECTOR_FP64");

  Doc::Value vector_float16_val = std::vector<ailego::Float16>{
      ailego::Float16(1.1f), ailego::Float16(2.2f), ailego::Float16(3.3f)};
  EXPECT_EQ(get_value_type_name(vector_float16_val, true), "VECTOR_FP16");

  Doc::Value vector_string_val = std::vector<std::string>{"a", "b", "c"};
  EXPECT_EQ(get_value_type_name(vector_string_val, false), "ARRAY_STRING");

  Doc::Value sparse_fp32_val =
      std::pair<std::vector<uint32_t>, std::vector<float>>(
          std::vector<uint32_t>{1, 2, 3}, std::vector<float>{1.1f, 2.2f, 3.3f});
  EXPECT_EQ(get_value_type_name(sparse_fp32_val, true), "SPARSE_VECTOR_FP32");

  Doc::Value sparse_fp16_val =
      std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>(
          std::vector<uint32_t>{1, 2, 3},
          std::vector<ailego::Float16>{ailego::Float16(1.1f),
                                       ailego::Float16(2.2f),
                                       ailego::Float16(3.3f)});
  EXPECT_EQ(get_value_type_name(sparse_fp16_val, true), "SPARSE_VECTOR_FP16");

  // Test monostate (null) value
  Doc::Value null_val = std::monostate{};
  EXPECT_EQ(get_value_type_name(null_val, false), "EMPTY");
}

TEST_F(DocDetailedTest, SerializeValueCoverage) {
  Doc doc;

  doc.set<bool>("bool_field", true);
  doc.set<int32_t>("int32_field", 42);
  doc.set<uint32_t>("uint32_field", 42);
  doc.set<int64_t>("int64_field", 42);
  doc.set<uint64_t>("uint64_field", 42);
  doc.set<float>("float_field", 3.14f);
  doc.set<double>("double_field", 3.14);
  doc.set<std::string>("string_field", "test");

  std::vector<bool> bool_vec = {true, false};
  doc.set<std::vector<bool>>("vector_bool_field", bool_vec);

  std::vector<int8_t> int8_vec = {1, 2, 3};
  doc.set<std::vector<int8_t>>("vector_int8_field", int8_vec);

  std::vector<int16_t> int16_vec = {10, 20, 30};
  doc.set<std::vector<int16_t>>("vector_int16_field", int16_vec);

  std::vector<int32_t> int32_vec = {100, 200, 300};
  doc.set<std::vector<int32_t>>("vector_int32_field", int32_vec);

  std::vector<int64_t> int64_vec = {1000, 2000, 3000};
  doc.set<std::vector<int64_t>>("vector_int64_field", int64_vec);

  std::vector<uint32_t> uint32_vec = {10, 20, 30};
  doc.set<std::vector<uint32_t>>("vector_uint32_field", uint32_vec);

  std::vector<uint64_t> uint64_vec = {100, 200, 300};
  doc.set<std::vector<uint64_t>>("vector_uint64_field", uint64_vec);

  std::vector<float> float_vec = {1.1f, 2.2f, 3.3f};
  doc.set<std::vector<float>>("vector_float_field", float_vec);

  std::vector<double> double_vec = {1.1, 2.2, 3.3};
  doc.set<std::vector<double>>("vector_double_field", double_vec);

  std::vector<ailego::Float16> float16_vec = {
      ailego::Float16(1.1f), ailego::Float16(2.2f), ailego::Float16(3.3f)};
  doc.set<std::vector<ailego::Float16>>("vector_float16_field", float16_vec);

  std::vector<std::string> string_vec = {"a", "b", "c"};
  doc.set<std::vector<std::string>>("vector_string_field", string_vec);

  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(
      std::vector<uint32_t>{1, 2, 3}, std::vector<float>{1.1f, 2.2f, 3.3f});
  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
      "sparse_fp32_field", sparse_fp32);

  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(
      std::vector<uint32_t>{1, 2, 3},
      std::vector<ailego::Float16>{ailego::Float16(1.1f), ailego::Float16(2.2f),
                                   ailego::Float16(3.3f)});
  doc.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
      "sparse_fp16_field", sparse_fp16);

  // Test null value
  doc.set_null("null_field");

  // for code coverage
  EXPECT_GT(doc.to_detail_string().size(), doc.to_string().size());

  auto buffer = doc.serialize();
  EXPECT_FALSE(buffer.empty());

  auto deserialized_doc = Doc::deserialize(buffer.data(), buffer.size());
  EXPECT_NE(deserialized_doc, nullptr);

  EXPECT_EQ(deserialized_doc->get<bool>("bool_field"), true);
  EXPECT_EQ(deserialized_doc->get<int32_t>("int32_field"), 42);
  EXPECT_EQ(deserialized_doc->get<uint32_t>("uint32_field"), 42u);
  EXPECT_EQ(deserialized_doc->get<int64_t>("int64_field"), 42);
  EXPECT_EQ(deserialized_doc->get<uint64_t>("uint64_field"), 42u);
  EXPECT_FLOAT_EQ(deserialized_doc->get<float>("float_field").value(), 3.14f);
  EXPECT_DOUBLE_EQ(deserialized_doc->get<double>("double_field").value(), 3.14);
  EXPECT_EQ(deserialized_doc->get<std::string>("string_field"), "test");

  // Test null value deserialization
  EXPECT_TRUE(deserialized_doc->is_null("null_field"));
  EXPECT_FALSE(deserialized_doc->has_value("null_field"));
  EXPECT_TRUE(deserialized_doc->has("null_field"));
}

TEST_F(DocDetailedTest, ToDetailStringCoverage) {
  Doc doc;
  doc.set_pk("test_pk");
  doc.set_doc_id(1);
  doc.set_score(0.95f);

  doc.set<bool>("bool_field", true);
  doc.set<int32_t>("int32_field", 42);
  doc.set<uint32_t>("uint32_field", 42);
  doc.set<int64_t>("int64_field", 42);
  doc.set<uint64_t>("uint64_field", 42);
  doc.set<float>("float_field", 3.14f);
  doc.set<double>("double_field", 3.14);
  doc.set<std::string>("string_field", "test");

  std::vector<bool> bool_vec = {true, false};
  doc.set<std::vector<bool>>("vector_bool_field", bool_vec);

  std::vector<int8_t> int8_vec = {1, 2};
  doc.set<std::vector<int8_t>>("vector_int8_field", int8_vec);

  std::vector<int16_t> int16_vec = {10, 20};
  doc.set<std::vector<int16_t>>("vector_int16_field", int16_vec);

  std::vector<int32_t> int32_vec = {100, 200};
  doc.set<std::vector<int32_t>>("vector_int32_field", int32_vec);

  std::vector<int64_t> int64_vec = {1000, 2000};
  doc.set<std::vector<int64_t>>("vector_int64_field", int64_vec);

  std::vector<uint32_t> uint32_vec = {10, 20};
  doc.set<std::vector<uint32_t>>("vector_uint32_field", uint32_vec);

  std::vector<uint64_t> uint64_vec = {100, 200};
  doc.set<std::vector<uint64_t>>("vector_uint64_field", uint64_vec);

  std::vector<float> float_vec = {1.1f, 2.2f};
  doc.set<std::vector<float>>("vector_float_field", float_vec);

  std::vector<double> double_vec = {1.1, 2.2};
  doc.set<std::vector<double>>("vector_double_field", double_vec);

  std::vector<ailego::Float16> float16_vec = {ailego::Float16(1.1f),
                                              ailego::Float16(2.2f)};
  doc.set<std::vector<ailego::Float16>>("vector_float16_field", float16_vec);

  std::vector<std::string> string_vec = {"a", "b"};
  doc.set<std::vector<std::string>>("vector_string_field", string_vec);

  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(
      std::vector<uint32_t>{1, 2}, std::vector<float>{1.1f, 2.2f});
  doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
      "sparse_fp32_field", sparse_fp32);

  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(
      std::vector<uint32_t>{1, 2},
      std::vector<ailego::Float16>{ailego::Float16(1.1f),
                                   ailego::Float16(2.2f)});
  doc.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
      "sparse_fp16_field", sparse_fp16);

  // Test null value in detail string
  doc.set_null("null_field");

  std::string detail_str = doc.to_detail_string();
  EXPECT_FALSE(detail_str.empty());
  EXPECT_NE(detail_str.find("bool_field"), std::string::npos);
  EXPECT_NE(detail_str.find("int32_field"), std::string::npos);
  EXPECT_NE(detail_str.find("vector_float_field"), std::string::npos);
  EXPECT_NE(detail_str.find("null"),
            std::string::npos);  // Should contain "null" for null field
}

TEST_F(DocDetailedTest, EqualityOperatorCoverage) {
  Doc doc1, doc2;
  doc1.set_pk("test_pk");
  doc2.set_pk("test_pk");

  doc1.set_doc_id(1);
  doc2.set_doc_id(1);

  doc1.set<bool>("bool_field", true);
  doc2.set<bool>("bool_field", true);

  doc1.set<int32_t>("int32_field", 42);
  doc2.set<int32_t>("int32_field", 42);

  doc1.set<uint32_t>("uint32_field", 42);
  doc2.set<uint32_t>("uint32_field", 42);

  doc1.set<int64_t>("int64_field", 42);
  doc2.set<int64_t>("int64_field", 42);

  doc1.set<uint64_t>("uint64_field", 42);
  doc2.set<uint64_t>("uint64_field", 42);

  doc1.set<float>("float_field", 3.14f);
  doc2.set<float>("float_field", 3.14f);

  doc1.set<double>("double_field", 3.14);
  doc2.set<double>("double_field", 3.14);

  doc1.set<std::string>("string_field", "test");
  doc2.set<std::string>("string_field", "test");

  std::vector<bool> bool_vec = {true, false};
  doc1.set<std::vector<bool>>("vector_bool_field", bool_vec);
  doc2.set<std::vector<bool>>("vector_bool_field", bool_vec);

  std::vector<int8_t> int8_vec = {1, 2};
  doc1.set<std::vector<int8_t>>("vector_int8_field", int8_vec);
  doc2.set<std::vector<int8_t>>("vector_int8_field", int8_vec);

  std::vector<int16_t> int16_vec = {10, 20};
  doc1.set<std::vector<int16_t>>("vector_int16_field", int16_vec);
  doc2.set<std::vector<int16_t>>("vector_int16_field", int16_vec);

  std::vector<int32_t> int32_vec = {100, 200};
  doc1.set<std::vector<int32_t>>("vector_int32_field", int32_vec);
  doc2.set<std::vector<int32_t>>("vector_int32_field", int32_vec);

  std::vector<int64_t> int64_vec = {1000, 2000};
  doc1.set<std::vector<int64_t>>("vector_int64_field", int64_vec);
  doc2.set<std::vector<int64_t>>("vector_int64_field", int64_vec);

  std::vector<uint32_t> uint32_vec = {10, 20};
  doc1.set<std::vector<uint32_t>>("vector_uint32_field", uint32_vec);
  doc2.set<std::vector<uint32_t>>("vector_uint32_field", uint32_vec);

  std::vector<uint64_t> uint64_vec = {100, 200};
  doc1.set<std::vector<uint64_t>>("vector_uint64_field", uint64_vec);
  doc2.set<std::vector<uint64_t>>("vector_uint64_field", uint64_vec);

  std::vector<float> float_vec = {1.1f, 2.2f};
  doc1.set<std::vector<float>>("vector_float_field", float_vec);
  doc2.set<std::vector<float>>("vector_float_field", float_vec);

  std::vector<double> double_vec = {1.1, 2.2};
  doc1.set<std::vector<double>>("vector_double_field", double_vec);
  doc2.set<std::vector<double>>("vector_double_field", double_vec);

  std::vector<ailego::Float16> float16_vec = {ailego::Float16(1.1f),
                                              ailego::Float16(2.2f)};
  doc1.set<std::vector<ailego::Float16>>("vector_float16_field", float16_vec);
  doc2.set<std::vector<ailego::Float16>>("vector_float16_field", float16_vec);

  std::vector<std::string> string_vec = {"a", "b"};
  doc1.set<std::vector<std::string>>("vector_string_field", string_vec);
  doc2.set<std::vector<std::string>>("vector_string_field", string_vec);

  std::pair<std::vector<uint32_t>, std::vector<float>> sparse_fp32(
      std::vector<uint32_t>{1, 2}, std::vector<float>{1.1f, 2.2f});
  doc1.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
      "sparse_fp32_field", sparse_fp32);
  doc2.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
      "sparse_fp32_field", sparse_fp32);

  std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>> sparse_fp16(
      std::vector<uint32_t>{1, 2},
      std::vector<ailego::Float16>{ailego::Float16(1.1f),
                                   ailego::Float16(2.2f)});
  doc1.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
      "sparse_fp16_field", sparse_fp16);
  doc2.set<std::pair<std::vector<uint32_t>, std::vector<ailego::Float16>>>(
      "sparse_fp16_field", sparse_fp16);

  // Test equality with null values
  doc1.set_null("null_field");
  doc2.set_null("null_field");

  EXPECT_TRUE(doc1 == doc2);

  doc2.set<int32_t>("int32_field", 43);
  EXPECT_FALSE(doc1 == doc2);

  doc1.set_pk("test_pk1");
  EXPECT_FALSE(doc1 == doc2);

  doc1.set_pk("test_pk");
  doc1.set<uint32_t>("int32_field", 42);
  EXPECT_FALSE(doc1 == doc2);

  doc1.set<int32_t>("int32_field", 42);
  doc1.set<int32_t>("rename_int32_field", 42);
  EXPECT_FALSE(doc1 == doc2);

  // Test inequality with different null values
  Doc doc3, doc4;
  doc3.set_pk("test");
  doc4.set_pk("test");
  doc3.set_null("null_field");
  doc4.set<int32_t>("null_field", 42);
  EXPECT_FALSE(doc3 == doc4);
}


TEST(VectorQuery, Validate) {
  // field schema is null when query without vector
  {
    VectorQuery query;
    query.topk_ = 10;
    query.field_name_ = "field_name";
    auto s = query.validate(nullptr);
    EXPECT_TRUE(s.ok());
  }

  // field schema is null when query without vector
  {
    VectorQuery query;
    query.topk_ = 10;
    query.field_name_ = "field_name";
    std::vector<float> query_vector = {1.0f, 2.0f, 3.0f, 4.0f};
    std::string query_vector_str =
        std::string(reinterpret_cast<char *>(query_vector.data()),
                    query_vector.size() * sizeof(float));
    query.query_vector_ = query_vector_str;
    auto s = query.validate(nullptr);
    EXPECT_FALSE(s.ok());
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }
  // vector_query exceed topk
  {
    VectorQuery query;
    query.field_name_ = "field_name";
    query.topk_ = 1000;
    FieldSchema schema =
        FieldSchema("field_name", DataType::VECTOR_FP32, 128, true);
    auto s = query.validate(&schema);
    EXPECT_FALSE(s.ok());
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }
  // vector_query output_fields size exceed
  {
    VectorQuery query;
    query.field_name_ = "field_name";
    query.topk_ = 10;
    query.output_fields_ = std::vector<std::string>(1025);
    FieldSchema schema = FieldSchema("field_name", DataType::INT32);
    auto s = query.validate(&schema);
    EXPECT_FALSE(s.ok());
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // validate dense vector dimension
  {
    VectorQuery query;
    query.field_name_ = "field_name";
    query.topk_ = 100;
    std::vector<float> query_vector = {1.0f, 2.0f, 3.0f, 4.0f};
    std::string query_vector_str =
        std::string(reinterpret_cast<char *>(query_vector.data()),
                    query_vector.size() * sizeof(float));
    query.query_vector_ = query_vector_str;
    FieldSchema schema =
        FieldSchema("field_name", DataType::VECTOR_FP32, 4, true);
    auto s = query.validate(&schema);
    EXPECT_TRUE(s.ok());

    query.query_vector_ = query_vector_str.substr(0, 3);
    s = query.validate(&schema);
    EXPECT_FALSE(s.ok());
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }

  // validate sparse indices
  {
    VectorQuery query;
    query.field_name_ = "field_name";
    query.topk_ = 100;
    std::vector<uint32_t> query_indices = std::vector<uint32_t>(16385);
    std::string query_indices_str =
        std::string(reinterpret_cast<char *>(query_indices.data()),
                    query_indices.size() * sizeof(uint32_t));
    query.query_sparse_indices_ = query_indices_str;
    FieldSchema schema =
        FieldSchema("field_name", DataType::SPARSE_VECTOR_FP32);
    auto s = query.validate(&schema);
    EXPECT_FALSE(s.ok());
    EXPECT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

    query.query_sparse_indices_ = query_indices_str.substr(0, 3);
    s = query.validate(&schema);
    EXPECT_TRUE(s.ok());
  }
}

// Test null value
TEST_F(DocDetailedTest, NullValue) {
  Doc doc;

  // Test setting null value
  doc.set_null("null_field");
  EXPECT_TRUE(doc.is_null("null_field"));
  EXPECT_FALSE(doc.has_value("null_field"));
  EXPECT_TRUE(doc.has("null_field"));

  // Test get_field with null field
  auto result = doc.get_field<int32_t>("null_field");
  EXPECT_EQ(result.status(), Doc::FieldGetStatus::IS_NULL);
  EXPECT_FALSE(result.ok());

  // Test get with null field
  auto opt_result = doc.get<int32_t>("null_field");
  EXPECT_FALSE(opt_result.has_value());

  // Test overwriting null with actual value
  doc.set<int32_t>("null_field", 42);
  EXPECT_FALSE(doc.is_null("null_field"));
  EXPECT_TRUE(doc.has_value("null_field"));
  EXPECT_TRUE(doc.has("null_field"));
  EXPECT_EQ(doc.get<int32_t>("null_field").value(), 42);

  // Test overwriting value with null
  doc.set_null("null_field");
  EXPECT_TRUE(doc.is_null("null_field"));
  EXPECT_FALSE(doc.has_value("null_field"));
  EXPECT_TRUE(doc.has("null_field"));

  // Test serialization/deserialization of null values
  auto buffer = doc.serialize();
  auto deserialized_doc = Doc::deserialize(buffer.data(), buffer.size());
  EXPECT_NE(deserialized_doc, nullptr);
  EXPECT_TRUE(deserialized_doc->is_null("null_field"));
  EXPECT_FALSE(deserialized_doc->has_value("null_field"));
  EXPECT_TRUE(deserialized_doc->has("null_field"));
}

// Test field existence checks
TEST_F(DocDetailedTest, FieldExistenceChecks) {
  Doc doc;

  // Test non-existent field
  EXPECT_FALSE(doc.has("nonexistent"));
  EXPECT_FALSE(doc.has_value("nonexistent"));
  EXPECT_FALSE(doc.is_null("nonexistent"));

  // Test get_field with non-existent field
  auto result = doc.get_field<int32_t>("nonexistent");
  EXPECT_EQ(result.status(), Doc::FieldGetStatus::NOT_FOUND);
  EXPECT_FALSE(result.ok());

  // Test get with non-existent field
  auto opt_result = doc.get<int32_t>("nonexistent");
  EXPECT_FALSE(opt_result.has_value());

  // Add a field and test again
  doc.set<int32_t>("existent", 123);
  EXPECT_TRUE(doc.has("existent"));
  EXPECT_TRUE(doc.has_value("existent"));
  EXPECT_FALSE(doc.is_null("existent"));

  // Test type mismatch
  auto type_mismatch_result = doc.get_field<std::string>("existent");
  EXPECT_EQ(type_mismatch_result.status(), Doc::FieldGetStatus::TYPE_MISMATCH);
  EXPECT_FALSE(type_mismatch_result.ok());

  auto type_mismatch_opt = doc.get<std::string>("existent");
  EXPECT_FALSE(type_mismatch_opt.has_value());
}

================================================
FILE: tests/db/index/common/index_params_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/index_params.h"
#include <gtest/gtest.h>

using namespace zvec;

TEST(IndexParamsTest, IndexParamsBaseClass) {
  // Test that IndexParams is abstract and can't be instantiated directly
  // This is more of a compile-time check - we can't directly instantiate an
  // abstract class

  // Test is_vector_index_type method
  HnswIndexParams hnsw_params(MetricType::L2, 16, 100);
  EXPECT_TRUE(hnsw_params.is_vector_index_type());

  FlatIndexParams flat_params(MetricType::IP);
  EXPECT_TRUE(flat_params.is_vector_index_type());

  IVFIndexParams ivf_params(MetricType::COSINE, 100);
  EXPECT_TRUE(ivf_params.is_vector_index_type());

  InvertIndexParams invert_params(true);
  EXPECT_FALSE(invert_params.is_vector_index_type());
}

TEST(IndexParamsTest, InvertIndexParams) {
  // Test constructor
  InvertIndexParams params(true);
  EXPECT_EQ(params.type(), IndexType::INVERT);
  EXPECT_TRUE(params.enable_range_optimization());

  InvertIndexParams params2(false);
  EXPECT_FALSE(params2.enable_range_optimization());

  // Test clone method
  auto cloned = params.clone();
  EXPECT_NE(cloned.get(), &params);  // Should be different objects
  EXPECT_EQ(cloned->type(), IndexType::INVERT);

  // Test comparison operators
  InvertIndexParams params3(true);
  InvertIndexParams params4(false);

  EXPECT_TRUE(params == params3);
  EXPECT_FALSE(params == params4);
  EXPECT_TRUE(params != params4);

  // Test setter
  params2.set_enable_range_optimization(true);
  EXPECT_TRUE(params2.enable_range_optimization());
  EXPECT_TRUE(params2 == params);
}

TEST(IndexParamsTest, VectorIndexParamsBase) {
  // Test constructor and basic methods
  FlatIndexParams flat_params(MetricType::L2, QuantizeType::FP16);
  EXPECT_EQ(flat_params.type(), IndexType::FLAT);
  EXPECT_EQ(flat_params.metric_type(), MetricType::L2);
  EXPECT_EQ(flat_params.quantize_type(), QuantizeType::FP16);

  // Test setters
  flat_params.set_metric_type(MetricType::IP);
  EXPECT_EQ(flat_params.metric_type(), MetricType::IP);

  flat_params.set_quantize_type(QuantizeType::INT8);
  EXPECT_EQ(flat_params.quantize_type(), QuantizeType::INT8);
}

TEST(IndexParamsTest, HnswIndexParams) {
  // Test constructor
  HnswIndexParams params(MetricType::COSINE, 20, 150, QuantizeType::INT4);
  EXPECT_EQ(params.type(), IndexType::HNSW);
  EXPECT_EQ(params.metric_type(), MetricType::COSINE);
  EXPECT_EQ(params.m(), 20);
  EXPECT_EQ(params.ef_construction(), 150);
  EXPECT_EQ(params.quantize_type(), QuantizeType::INT4);

  // Test clone
  auto cloned = params.clone();
  EXPECT_NE(cloned.get(), &params);
  EXPECT_EQ(cloned->type(), IndexType::HNSW);

  // Test comparison
  HnswIndexParams params2(MetricType::COSINE, 20, 150, QuantizeType::INT4);
  HnswIndexParams params3(MetricType::L2, 20, 150, QuantizeType::INT4);
  HnswIndexParams params4(MetricType::COSINE, 16, 150, QuantizeType::INT4);
  HnswIndexParams params5(MetricType::COSINE, 20, 200, QuantizeType::INT4);

  EXPECT_TRUE(params == params2);
  EXPECT_FALSE(params == params3);
  EXPECT_FALSE(params == params4);
  EXPECT_FALSE(params == params5);

  // Test setters
  params.set_m(10);
  EXPECT_EQ(params.m(), 10);

  params.set_ef_construction(75);
  EXPECT_EQ(params.ef_construction(), 75);
}

TEST(IndexParamsTest, FlatIndexParams) {
  // Test constructor
  FlatIndexParams params(MetricType::IP, QuantizeType::FP16);
  EXPECT_EQ(params.type(), IndexType::FLAT);
  EXPECT_EQ(params.metric_type(), MetricType::IP);
  EXPECT_EQ(params.quantize_type(), QuantizeType::FP16);

  // Test clone
  auto cloned = params.clone();
  EXPECT_NE(cloned.get(), &params);
  EXPECT_EQ(cloned->type(), IndexType::FLAT);

  // Test comparison
  FlatIndexParams params2(MetricType::IP, QuantizeType::FP16);
  FlatIndexParams params3(MetricType::L2, QuantizeType::FP16);
  FlatIndexParams params4(MetricType::IP, QuantizeType::INT8);

  EXPECT_TRUE(params == params2);
  EXPECT_FALSE(params == params3);
  EXPECT_FALSE(params == params4);
}

TEST(IndexParamsTest, IVFIndexParams) {
  // Test constructor
  IVFIndexParams params(MetricType::L2, 128, 10, false, QuantizeType::INT8);
  EXPECT_EQ(params.type(), IndexType::IVF);
  EXPECT_EQ(params.metric_type(), MetricType::L2);
  EXPECT_EQ(params.n_list(), 128);
  EXPECT_EQ(params.quantize_type(), QuantizeType::INT8);

  // Test clone
  auto cloned = params.clone();
  EXPECT_NE(cloned.get(), &params);
  EXPECT_EQ(cloned->type(), IndexType::IVF);

  // Test comparison
  IVFIndexParams params2(MetricType::L2, 128, 10, false, QuantizeType::INT8);
  IVFIndexParams params3(MetricType::IP, 128, 10, false, QuantizeType::INT8);
  IVFIndexParams params4(MetricType::L2, 256, 10, false, QuantizeType::INT8);
  IVFIndexParams params5(MetricType::L2, 128, 10, false, QuantizeType::FP16);

  EXPECT_TRUE(params == params2);
  EXPECT_FALSE(params == params3);
  EXPECT_FALSE(params == params4);
  EXPECT_FALSE(params == params5);

  // Test setter
  params.set_n_list(64);
  EXPECT_EQ(params.n_list(), 64);
}

TEST(IndexParamsTest, DefaultVectorIndexParams) {
  // Test default vector index params
  EXPECT_EQ(DefaultVectorIndexParams.type(), IndexType::FLAT);
  EXPECT_EQ(DefaultVectorIndexParams.metric_type(), MetricType::IP);
  EXPECT_EQ(DefaultVectorIndexParams.quantize_type(), QuantizeType::UNDEFINED);
}

TEST(IndexParamsTest, DynamicPointerCast) {
  // Test dynamic_pointer_cast functionality with IndexParams
  IndexParams::Ptr base_ptr =
      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
  auto hnsw_ptr = std::dynamic_pointer_cast<HnswIndexParams>(base_ptr);
  EXPECT_NE(hnsw_ptr, nullptr);
  EXPECT_EQ(hnsw_ptr->type(), IndexType::HNSW);

  // Test casting to wrong type
  auto flat_ptr = std::dynamic_pointer_cast<FlatIndexParams>(base_ptr);
  EXPECT_EQ(flat_ptr, nullptr);

  // Test casting from base class reference
  IndexParams &base_ref = *base_ptr;
  auto &hnsw_ref = dynamic_cast<HnswIndexParams &>(base_ref);
  EXPECT_EQ(hnsw_ref.type(), IndexType::HNSW);
}

================================================
FILE: tests/db/index/common/meta_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/common/meta.h"
#include <gtest/gtest.h>

using namespace zvec;

TEST(SegmentMetaTest, DefaultConstruction) {
  SegmentMeta segment_meta;
  EXPECT_EQ(segment_meta.id(), 0u);
  EXPECT_TRUE(segment_meta.persisted_blocks().empty());
  EXPECT_FALSE(segment_meta.has_writing_forward_block());
  EXPECT_EQ(segment_meta.min_doc_id(), 0u);
  EXPECT_EQ(segment_meta.max_doc_id(), 0u);
  EXPECT_EQ(segment_meta.doc_count(), 0u);
  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());
}

TEST(SegmentMetaTest, ConstructionWithID) {
  SegmentMeta segment_meta(42);
  EXPECT_EQ(segment_meta.id(), 42u);
  EXPECT_TRUE(segment_meta.persisted_blocks().empty());
  EXPECT_FALSE(segment_meta.has_writing_forward_block());
  EXPECT_EQ(segment_meta.min_doc_id(), 0u);
  EXPECT_EQ(segment_meta.max_doc_id(), 0u);
  EXPECT_EQ(segment_meta.doc_count(), 0u);
  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());
}

TEST(SegmentMetaTest, PersistedBlocksOperations) {
  SegmentMeta segment_meta(1);

  // Add persisted blocks
  BlockMeta block1(1, BlockType::SCALAR, 0, 100);
  block1.set_doc_count(50);
  block1.add_column("col1");
  block1.add_column("col2");

  BlockMeta block2(2, BlockType::VECTOR_INDEX, 101, 200);
  block2.set_doc_count(75);
  block2.add_column("vec_col");

  segment_meta.add_persisted_block(block1);
  segment_meta.add_persisted_block(block2);

  EXPECT_EQ(segment_meta.persisted_blocks().size(), 2u);

  const auto &blocks = segment_meta.persisted_blocks();
  EXPECT_EQ(blocks[0].id(), 1u);
  EXPECT_EQ(blocks[0].type(), BlockType::SCALAR);
  EXPECT_EQ(blocks[0].min_doc_id(), 0u);
  EXPECT_EQ(blocks[0].max_doc_id(), 100u);
  EXPECT_EQ(blocks[0].doc_count(), 50u);
  EXPECT_EQ(blocks[0].columns().size(), 2u);

  EXPECT_EQ(blocks[1].id(), 2u);
  EXPECT_EQ(blocks[1].type(), BlockType::VECTOR_INDEX);
  EXPECT_EQ(blocks[1].min_doc_id(), 101u);
  EXPECT_EQ(blocks[1].max_doc_id(), 200u);
  EXPECT_EQ(blocks[1].doc_count(), 75u);
  EXPECT_EQ(blocks[1].columns().size(), 1u);
}

TEST(SegmentMetaTest, WritingForwardBlockOperations) {
  SegmentMeta segment_meta(1);

  // Initially no writing forward block
  EXPECT_FALSE(segment_meta.has_writing_forward_block());

  // Set writing forward block
  BlockMeta writing_block(3, BlockType::SCALAR, 201, 300);
  writing_block.set_doc_count(25);
  writing_block.add_column("col3");

  segment_meta.set_writing_forward_block(writing_block);

  // Now should have writing forward block
  EXPECT_TRUE(segment_meta.has_writing_forward_block());

  const auto &wfb = segment_meta.writing_forward_block();
  EXPECT_EQ(wfb.value().id(), 3u);
  EXPECT_EQ(wfb.value().type(), BlockType::SCALAR);
  EXPECT_EQ(wfb.value().min_doc_id(), 201u);
  EXPECT_EQ(wfb.value().max_doc_id(), 300u);
  EXPECT_EQ(wfb.value().doc_count(), 25u);
  EXPECT_EQ(wfb.value().columns().size(), 1u);
  EXPECT_EQ(wfb.value().columns()[0], "col3");
}

TEST(SegmentMetaTest, MinDocIDCalculation) {
  SegmentMeta segment_meta(1);

  // Case 1: No persisted blocks, no writing forward block
  EXPECT_EQ(segment_meta.min_doc_id(), 0u);

  // Case 2: No persisted blocks, but has writing forward block
  BlockMeta writing_block(1, BlockType::SCALAR, 100, 200);
  segment_meta.set_writing_forward_block(writing_block);
  EXPECT_EQ(segment_meta.min_doc_id(), 100u);

  // Case 3: Has persisted blocks (should take precedence)
  BlockMeta persisted_block(1, BlockType::SCALAR, 50, 150);
  segment_meta.add_persisted_block(persisted_block);
  EXPECT_EQ(segment_meta.min_doc_id(), 50u);
}

TEST(SegmentMetaTest, MaxDocIDCalculation) {
  SegmentMeta segment_meta(1);

  // Case 1: No blocks at all
  EXPECT_EQ(segment_meta.max_doc_id(), 0u);

  // Case 2: Only persisted blocks
  BlockMeta persisted_block(1, BlockType::SCALAR, 0, 100);
  segment_meta.add_persisted_block(persisted_block);
  EXPECT_EQ(segment_meta.max_doc_id(), 100u);

  // Case 3: Both persisted and writing forward blocks (writing forward takes
  // precedence)
  BlockMeta writing_block(2, BlockType::SCALAR, 101, 200);
  segment_meta.set_writing_forward_block(writing_block);
  EXPECT_EQ(segment_meta.max_doc_id(), 100u);

  // Case 4: Only writing forward block
  SegmentMeta segment_meta2(2);
  segment_meta2.set_writing_forward_block(writing_block);
  EXPECT_EQ(segment_meta2.max_doc_id(), 0u);
}

TEST(SegmentMetaTest, DocCountCalculation) {
  SegmentMeta segment_meta(1);

  // Initially 0
  EXPECT_EQ(segment_meta.doc_count(), 0u);

  // Add persisted blocks
  BlockMeta block1(1, BlockType::SCALAR, 0, 100);
  block1.set_doc_count(50);
  segment_meta.add_persisted_block(block1);

  EXPECT_EQ(segment_meta.doc_count(), 50u);

  // Add another persisted block
  BlockMeta block2(2, BlockType::VECTOR_INDEX, 101, 200);
  block2.set_doc_count(75);
  segment_meta.add_persisted_block(block2);

  EXPECT_EQ(segment_meta.doc_count(), 50u);

  // Add writing forward block
  BlockMeta writing_block(3, BlockType::SCALAR, 201, 300);
  writing_block.set_doc_count(25);
  segment_meta.set_writing_forward_block(writing_block);

  EXPECT_EQ(segment_meta.doc_count(), 75);
}

TEST(SegmentMetaTest, IndexedVectorFieldsOperations) {
  SegmentMeta segment_meta(1);

  // Initially empty
  EXPECT_FALSE(segment_meta.vector_indexed("field1"));
  EXPECT_TRUE(segment_meta.indexed_vector_fields().empty());

  // Add indexed fields
  segment_meta.add_indexed_vector_field("field1");
  segment_meta.add_indexed_vector_field("field2");

  EXPECT_TRUE(segment_meta.vector_indexed("field1"));
  EXPECT_TRUE(segment_meta.vector_indexed("field2"));
  EXPECT_FALSE(segment_meta.vector_indexed("field3"));

  EXPECT_EQ(segment_meta.indexed_vector_fields().size(), 2u);

  // Check set operation
  std::set<std::string> fields = {"field3", "field4"};
  segment_meta.set_indexed_vector_fields(fields);

  EXPECT_FALSE(segment_meta.vector_indexed("field1"));
  EXPECT_FALSE(segment_meta.vector_indexed("field2"));
  EXPECT_TRUE(segment_meta.vector_indexed("field3"));
  EXPECT_TRUE(segment_meta.vector_indexed("field4"));
  EXPECT_EQ(segment_meta.indexed_vector_fields().size(), 2u);
}

TEST(SegmentMetaTest, UpdateMaxDocId) {
  SegmentMeta segment_meta(1);

  // Try to update when no writing forward block - should not crash
  segment_meta.update_max_doc_id(100);

  // Set writing forward block and update
  BlockMeta writing_block(1, BlockType::SCALAR, 0, 50);
  segment_meta.set_writing_forward_block(writing_block);
  EXPECT_EQ(segment_meta.writing_forward_block().value().max_doc_id(), 50u);

  segment_meta.update_max_doc_id(100);
  EXPECT_EQ(segment_meta.writing_forward_block().value().max_doc_id(), 100u);
}

TEST(SegmentMetaTest, EqualityOperators) {
  SegmentMeta segment1(1);
  SegmentMeta segment2(1);
  SegmentMeta segment3(2);

  // Same empty segments
  EXPECT_TRUE(segment1 == segment2);
  EXPECT_FALSE(segment1 != segment2);

  // Different IDs
  EXPECT_FALSE(segment1 == segment3);
  EXPECT_TRUE(segment1 != segment3);

  // Add same persisted block to both
  BlockMeta block(1, BlockType::SCALAR, 0, 100);
  block.set_doc_count(50);
  segment1.add_persisted_block(block);
  segment2.add_persisted_block(block);

  EXPECT_TRUE(segment1 == segment2);

  // Add writing forward block
  BlockMeta wfb(2, BlockType::VECTOR_INDEX, 101, 200);
  segment1.set_writing_forward_block(wfb);
  segment2.set_writing_forward_block(wfb);

  EXPECT_TRUE(segment1 == segment2);

  // Add indexed fields
  segment1.add_indexed_vector_field("vec_field");
  segment2.add_indexed_vector_field("vec_field");

  EXPECT_TRUE(segment1 == segment2);

  // Make them different again
  segment1.add_indexed_vector_field("vec_field2");

  EXPECT_FALSE(segment1 == segment2);
  EXPECT_TRUE(segment1 != segment2);
}

================================================
FILE: tests/db/index/common/query_params_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/query_params.h"
#include <gtest/gtest.h>

using namespace zvec;

TEST(QueryParamsTest, QueryParamsBaseClass) {
  // Test constructor
  QueryParams params(IndexType::HNSW);
  EXPECT_EQ(params.type(), IndexType::HNSW);

  // Test setter
  params.set_type(IndexType::IVF);
  EXPECT_EQ(params.type(), IndexType::IVF);
}

TEST(QueryParamsTest, HnswQueryParams) {
  // Test constructor
  HnswQueryParams params(100);
  EXPECT_EQ(params.type(), IndexType::HNSW);
  EXPECT_EQ(params.ef(), 100);

  // Test setter
  params.set_ef(200);
  EXPECT_EQ(params.ef(), 200);
}

TEST(QueryParamsTest, IVFQueryParams) {
  // Test constructor
  IVFQueryParams params(50);
  EXPECT_EQ(params.type(), IndexType::IVF);
  EXPECT_EQ(params.nprobe(), 50);

  // Test setter
  params.set_nprobe(75);
  EXPECT_EQ(params.nprobe(), 75);
}

TEST(QueryParamsTest, Polymorphism) {
  // Test polymorphic behavior
  QueryParams::Ptr hnsw_ptr = std::make_shared<HnswQueryParams>(100);
  QueryParams::Ptr ivf_ptr = std::make_shared<IVFQueryParams>(50);

  // Verify types
  EXPECT_EQ(hnsw_ptr->type(), IndexType::HNSW);
  EXPECT_EQ(ivf_ptr->type(), IndexType::IVF);

  // Test dynamic casting
  auto hnsw_cast = std::dynamic_pointer_cast<HnswQueryParams>(hnsw_ptr);
  auto ivf_cast = std::dynamic_pointer_cast<IVFQueryParams>(ivf_ptr);
  auto invalid_cast = std::dynamic_pointer_cast<HnswQueryParams>(ivf_ptr);

  EXPECT_NE(hnsw_cast, nullptr);
  EXPECT_NE(ivf_cast, nullptr);
  EXPECT_EQ(invalid_cast, nullptr);

  // Verify values after casting
  EXPECT_EQ(hnsw_cast->ef(), 100);
  EXPECT_EQ(ivf_cast->nprobe(), 50);
}

TEST(QueryParamsTest, VirtualDestructor) {
  // Test that virtual destructor allows proper deletion
  QueryParams *hnsw_ptr = new HnswQueryParams(100);
  QueryParams *ivf_ptr = new IVFQueryParams(50);

  // This should not cause memory issues
  delete hnsw_ptr;
  delete ivf_ptr;
}

================================================
FILE: tests/db/index/common/schema_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/schema.h"
#include <gtest/gtest.h>
#include "zvec/db/index_params.h"
#include "zvec/db/status.h"

using namespace zvec;

TEST(FieldSchemaTest, DefaultConstructor) {
  FieldSchema field;
  EXPECT_EQ(field.name(), "");
  EXPECT_EQ(field.data_type(), DataType::UNDEFINED);
  EXPECT_FALSE(field.nullable());
  EXPECT_EQ(field.dimension(), 0u);
  EXPECT_EQ(field.index_params(), nullptr);
}

TEST(FieldSchemaTest, ConstructorWithParameters) {
  auto index_params =
      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
  FieldSchema field("test_field", DataType::VECTOR_FP32, 128, true,
                    index_params);

  EXPECT_EQ(field.name(), "test_field");
  EXPECT_EQ(field.data_type(), DataType::VECTOR_FP32);
  EXPECT_TRUE(field.nullable());
  EXPECT_EQ(field.dimension(), 128u);
  EXPECT_NE(field.index_params(), nullptr);
  EXPECT_EQ(field.index_params()->type(), IndexType::HNSW);
}

TEST(FieldSchemaTest, SettersAndGetters) {
  FieldSchema field;

  field.set_name("new_field");
  EXPECT_EQ(field.name(), "new_field");

  field.set_data_type(DataType::STRING);
  EXPECT_EQ(field.data_type(), DataType::STRING);

  field.set_nullable(true);
  EXPECT_TRUE(field.nullable());

  field.set_dimension(256);
  EXPECT_EQ(field.dimension(), 256u);
}

TEST(FieldSchemaTest, ElementDataType) {
  FieldSchema array_field;
  array_field.set_data_type(DataType::ARRAY_BINARY);
  EXPECT_EQ(array_field.element_data_type(), DataType::BINARY);

  array_field.set_data_type(DataType::ARRAY_STRING);
  EXPECT_EQ(array_field.element_data_type(), DataType::STRING);

  array_field.set_data_type(DataType::ARRAY_BOOL);
  EXPECT_EQ(array_field.element_data_type(), DataType::BOOL);

  array_field.set_data_type(DataType::ARRAY_INT32);
  EXPECT_EQ(array_field.element_data_type(), DataType::INT32);

  array_field.set_data_type(DataType::ARRAY_INT64);
  EXPECT_EQ(array_field.element_data_type(), DataType::INT64);

  array_field.set_data_type(DataType::ARRAY_UINT32);
  EXPECT_EQ(array_field.element_data_type(), DataType::UINT32);

  array_field.set_data_type(DataType::ARRAY_UINT64);
  EXPECT_EQ(array_field.element_data_type(), DataType::UINT64);

  array_field.set_data_type(DataType::ARRAY_FLOAT);
  EXPECT_EQ(array_field.element_data_type(), DataType::FLOAT);

  array_field.set_data_type(DataType::ARRAY_DOUBLE);
  EXPECT_EQ(array_field.element_data_type(), DataType::DOUBLE);

  // Non-array types should return the same type
  FieldSchema non_array_field;
  non_array_field.set_data_type(DataType::STRING);
  EXPECT_EQ(non_array_field.element_data_type(), DataType::STRING);
}

TEST(FieldSchemaTest, VectorFieldDetection) {
  FieldSchema field;

  // Test dense vector field detection
  field.set_data_type(DataType::VECTOR_BINARY32);
  EXPECT_TRUE(field.is_vector_field());
  EXPECT_TRUE(field.is_dense_vector());
  EXPECT_FALSE(field.is_sparse_vector());

  field.set_data_type(DataType::VECTOR_FP32);
  EXPECT_TRUE(field.is_vector_field());
  EXPECT_TRUE(field.is_dense_vector());
  EXPECT_FALSE(field.is_sparse_vector());

  field.set_data_type(DataType::VECTOR_INT16);
  EXPECT_TRUE(field.is_vector_field());
  EXPECT_TRUE(field.is_dense_vector());
  EXPECT_FALSE(field.is_sparse_vector());

  // Test sparse vector field detection
  field.set_data_type(DataType::SPARSE_VECTOR_FP32);
  EXPECT_TRUE(field.is_vector_field());
  EXPECT_FALSE(field.is_dense_vector());
  EXPECT_TRUE(field.is_sparse_vector());

  // Test non-vector field
  field.set_data_type(DataType::STRING);
  EXPECT_FALSE(field.is_vector_field());
  EXPECT_FALSE(field.is_dense_vector());
  EXPECT_FALSE(field.is_sparse_vector());

  // Test static methods
  EXPECT_TRUE(FieldSchema::is_dense_vector_field(DataType::VECTOR_FP32));
  EXPECT_FALSE(FieldSchema::is_dense_vector_field(DataType::STRING));

  EXPECT_TRUE(
      FieldSchema::is_sparse_vector_field(DataType::SPARSE_VECTOR_FP32));
  EXPECT_FALSE(FieldSchema::is_sparse_vector_field(DataType::VECTOR_FP32));

  EXPECT_TRUE(FieldSchema::is_vector_field(DataType::VECTOR_FP32));
  EXPECT_TRUE(FieldSchema::is_vector_field(DataType::SPARSE_VECTOR_FP32));
  EXPECT_FALSE(FieldSchema::is_vector_field(DataType::STRING));
}

TEST(FieldSchemaTest, ArrayTypeDetection) {
  FieldSchema field;

  field.set_data_type(DataType::ARRAY_BINARY);
  EXPECT_TRUE(field.is_array_type());

  field.set_data_type(DataType::ARRAY_STRING);
  EXPECT_TRUE(field.is_array_type());

  field.set_data_type(DataType::ARRAY_DOUBLE);
  EXPECT_TRUE(field.is_array_type());

  field.set_data_type(DataType::STRING);
  EXPECT_FALSE(field.is_array_type());

  field.set_data_type(DataType::VECTOR_FP32);
  EXPECT_FALSE(field.is_array_type());
}

TEST(FieldSchemaTest, IndexTypeAndParams) {
  FieldSchema field;
  EXPECT_EQ(field.index_type(), IndexType::UNDEFINED);
  EXPECT_EQ(field.index_params(), nullptr);

  auto hnsw_params = std::make_shared<HnswIndexParams>(MetricType::IP, 32, 200);
  field.set_index_params(hnsw_params);
  EXPECT_EQ(field.index_type(), IndexType::HNSW);
  EXPECT_NE(field.index_params(), nullptr);

  // Test setting with nullptr
  field.set_index_params(nullptr);
  EXPECT_EQ(field.index_type(), IndexType::UNDEFINED);
  EXPECT_EQ(field.index_params(), nullptr);
}

TEST(FieldSchemaTest, CopyConstructorAndAssignment) {
  auto index_params = std::make_shared<FlatIndexParams>(MetricType::L2);
  FieldSchema original("original", DataType::STRING, 100, true, index_params);

  // Test copy constructor
  FieldSchema copy(original);
  EXPECT_EQ(copy.name(), "original");
  EXPECT_EQ(copy.data_type(), DataType::STRING);
  EXPECT_TRUE(copy.nullable());
  EXPECT_EQ(copy.dimension(), 100u);
  EXPECT_NE(copy.index_params(), nullptr);
  EXPECT_EQ(copy.index_params()->type(), IndexType::FLAT);

  // Test copy assignment
  FieldSchema assigned;
  assigned = original;
  EXPECT_EQ(assigned.name(), "original");
  EXPECT_EQ(assigned.data_type(), DataType::STRING);
  EXPECT_TRUE(assigned.nullable());
  EXPECT_EQ(assigned.dimension(), 100u);
  EXPECT_NE(assigned.index_params(), nullptr);
  EXPECT_EQ(assigned.index_params()->type(), IndexType::FLAT);

  // Verify deep copy - modifying original shouldn't affect copy
  original.set_name("modified");
  EXPECT_EQ(copy.name(), "original");      // Copy should be unchanged
  EXPECT_EQ(assigned.name(), "original");  // Assigned should be unchanged
}

TEST(FieldSchemaTest, MoveConstructorAndAssignment) {
  auto index_params = std::make_shared<IVFIndexParams>(MetricType::COSINE, 128);
  FieldSchema original("move_test", DataType::VECTOR_FP32, 256, false,
                       index_params);

  // Test move constructor
  FieldSchema moved(std::move(original));
  EXPECT_EQ(moved.name(), "move_test");
  EXPECT_EQ(moved.data_type(), DataType::VECTOR_FP32);
  EXPECT_FALSE(moved.nullable());
  EXPECT_EQ(moved.dimension(), 256u);
  EXPECT_NE(moved.index_params(), nullptr);
  EXPECT_EQ(moved.index_params()->type(), IndexType::IVF);

  // After move, original should be in valid but unspecified state
  // Note: In practice, the name would likely be moved, but we don't test that
  // as it's implementation-dependent
}

TEST(FieldSchemaTest, ComparisonOperators) {
  auto index_params1 =
      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
  auto index_params2 =
      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
  auto index_params3 = std::make_shared<FlatIndexParams>(MetricType::IP);

  FieldSchema field1("field", DataType::STRING, 100, false, index_params1);
  FieldSchema field2("field", DataType::STRING, 100, false, index_params2);
  FieldSchema field3("field", DataType::STRING, 100, false, index_params3);
  FieldSchema field4("field", DataType::STRING, 100, true, index_params1);
  FieldSchema field5("different", DataType::STRING, 100, false, index_params1);

  // Equal fields
  EXPECT_TRUE(field1 == field2);
  EXPECT_FALSE(field1 != field2);

  // Different index params
  EXPECT_FALSE(field1 == field3);
  EXPECT_TRUE(field1 != field3);

  // Different nullable
  EXPECT_FALSE(field1 == field4);
  EXPECT_TRUE(field1 != field4);

  // Different name
  EXPECT_FALSE(field1 == field5);
  EXPECT_TRUE(field1 != field5);
}

TEST(FieldSchemaTest, Validate) {
  {
    FieldSchema field("", DataType::UNDEFINED);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    FieldSchema field("", DataType::STRING);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 0,
                      false);  // Zero dimension
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    FieldSchema field("dense_vector", DataType::VECTOR_FP32, 20001,
                      false);  // Zero dimension
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto ivf_params = std::make_shared<IVFIndexParams>(MetricType::IP, 128);
    FieldSchema field("sparse_field", DataType::SPARSE_VECTOR_FP32, 0, false,
                      ivf_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto hnsw_params =
        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
    FieldSchema field("sparse_field", DataType::SPARSE_VECTOR_FP32, 0, false,
                      hnsw_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto invalid_params = std::make_shared<InvertIndexParams>(false);
    FieldSchema field("dense_field", DataType::VECTOR_FP32, 128, false,
                      invalid_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto hnsw_params =
        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
    FieldSchema field("scalar_field", DataType::STRING, 0, false, hnsw_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
    EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
  }

  {
    auto hnsw_params =
        std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 128, false,
                      hnsw_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());
  }

  {
    auto flat_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    FieldSchema field("sparse_field", DataType::SPARSE_VECTOR_FP32, 0, false,
                      flat_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());
  }

  {
    auto invert_params = std::make_shared<InvertIndexParams>(false);
    FieldSchema field("scalar_field", DataType::STRING, 0, false,
                      invert_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());
  }

  {
    FieldSchema field("simple_field", DataType::STRING);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());  // Scalar fields without index params are valid

    FieldSchema vector_field("vector_field", DataType::VECTOR_FP32, 128, false);
    status = vector_field.validate();
    EXPECT_TRUE(
        status.ok());  // Vector fields without index params are also valid
  }

  {
    // Test that VECTOR_FP32 with FP16 quantize type is valid
    auto hnsw_params = std::make_shared<HnswIndexParams>(
        MetricType::L2, 16, 100, QuantizeType::FP16);
    FieldSchema field("fp32_vector", DataType::VECTOR_FP32, 128, false,
                      hnsw_params);
    auto status = field.validate();
    if (!status.ok()) {
      std::cout << "status: " << status.message() << std::endl;
    }
    EXPECT_TRUE(status.ok());
  }

  {
    // Test that VECTOR_FP32 with UNDEFINED quantize type is valid
    auto hnsw_params = std::make_shared<HnswIndexParams>(
        MetricType::L2, 16, 100, QuantizeType::UNDEFINED);
    FieldSchema field("fp32_vector_no_quantize", DataType::VECTOR_FP32, 128,
                      false, hnsw_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());
  }

  {
    // Test that SPARSE_VECTOR_FP32 with FP16 quantize type should fail
    auto hnsw_params = std::make_shared<HnswIndexParams>(
        MetricType::IP, 16, 100, QuantizeType::FP16);
    FieldSchema field("sparse_fp32_vector", DataType::SPARSE_VECTOR_FP32, 0,
                      false, hnsw_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());
  }

  {
    // Test that VECTOR_FP64 with FP16 quantize type is valid
    auto hnsw_params = std::make_shared<HnswIndexParams>(
        MetricType::L2, 16, 100, QuantizeType::FP16);
    FieldSchema field("fp64_vector", DataType::VECTOR_FP64, 128, false,
                      hnsw_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok());
  }

  {
    // already support int8/int4 quantizer
    // Test that VECTOR_FP32 with INT8 quantize type should succeed
    auto hnsw_params = std::make_shared<HnswIndexParams>(
        MetricType::L2, 16, 100, QuantizeType::INT8);
    FieldSchema field("fp32_vector_int8_quantize", DataType::VECTOR_FP32, 128,
                      false, hnsw_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok());

    auto flat_params =
        std::make_shared<FlatIndexParams>(MetricType::L2, QuantizeType::INT4);
    FieldSchema flat_field("fp32_vector_int4_quantize", DataType::VECTOR_FP32,
                           128, false, flat_params);
    EXPECT_TRUE(field.validate().ok());
  }

  {
    std::vector<std::string> valid_names = {
        "a",  // min len = 1
        "A",
        "0",
        "_",
        "-",  // single allowed char
        "abc",
        "ABC",
        "a1_",
        "user_name",
        "test-123",
        "aBc123_-",
        std::string(32, 'a'),  // max len = 32
        "a_b-c1",
        "__test__",
        "123_test"};
    for (auto name : valid_names) {
      FieldSchema field(name, DataType::STRING);
      auto status = field.validate();
      if (!status.ok()) {
        std::cout << "status: " << status.message() << std::endl;
      }
      EXPECT_TRUE(status.ok());
    }
  }

  {
    std::vector<std::string> invalid_names = {
        "",                    // empty — len < 1
        std::string(33, 'a'),  // len > 32
        "a b",                 // space
        "a.b",
        "a@b",
        "a#b",  // illegal chars: . @ #
        "a$b",
        "a%",
        "a&",  // $ % & etc.
        "中文",
        "用户",  // non-ASCII
        "a..b",
        "a__b?",  // ? not allowed
    };
    for (auto name : invalid_names) {
      FieldSchema field(name, DataType::STRING);
      auto status = field.validate();
      EXPECT_FALSE(status.ok());
      EXPECT_EQ(status.code(), StatusCode::INVALID_ARGUMENT);
    }
  }
}

TEST(CollectionSchemaTest, DefaultConstructor) {
  CollectionSchema schema;
  EXPECT_EQ(schema.name(), "");
  EXPECT_EQ(schema.fields().size(), 0);
  EXPECT_EQ(schema.max_doc_count_per_segment(), MAX_DOC_COUNT_PER_SEGMENT);
}

TEST(CollectionSchemaTest, ConstructorWithParameters) {
  FieldSchemaPtrList fields;
  auto field1 = std::make_shared<FieldSchema>("field1", DataType::STRING);
  auto field2 = std::make_shared<FieldSchema>("field2", DataType::VECTOR_FP32);
  fields.push_back(field1);
  fields.push_back(field2);

  CollectionSchema schema("test_collection", fields);
  EXPECT_EQ(schema.name(), "test_collection");
  EXPECT_EQ(schema.fields().size(), 2);
  EXPECT_TRUE(schema.has_field("field1"));
  EXPECT_TRUE(schema.has_field("field2"));
}

TEST(CollectionSchemaTest, NameManagement) {
  CollectionSchema schema;
  EXPECT_EQ(schema.name(), "");

  schema.set_name("new_name");
  EXPECT_EQ(schema.name(), "new_name");
}

TEST(CollectionSchemaTest, MaxDocCountPerSegment) {
  CollectionSchema schema;
  EXPECT_EQ(schema.max_doc_count_per_segment(), MAX_DOC_COUNT_PER_SEGMENT);

  schema.set_max_doc_count_per_segment(500000);
  EXPECT_EQ(schema.max_doc_count_per_segment(), 500000u);
}

TEST(CollectionSchemaTest, AddField) {
  CollectionSchema schema;
  auto field = std::make_shared<FieldSchema>("test_field", DataType::STRING);

  auto status = schema.add_field(field);
  EXPECT_TRUE(status.ok());
  EXPECT_TRUE(schema.has_field("test_field"));
  EXPECT_EQ(schema.fields().size(), 1);

  // Try to add the same field again
  auto status2 = schema.add_field(field);
  EXPECT_FALSE(status2.ok());
  EXPECT_EQ(status2.code(), StatusCode::ALREADY_EXISTS);
}

TEST(CollectionSchemaTest, DropField) {
  CollectionSchema schema;
  auto field1 = std::make_shared<FieldSchema>("field1", DataType::STRING);
  auto field2 = std::make_shared<FieldSchema>("field2", DataType::VECTOR_FP32);

  schema.add_field(field1);
  schema.add_field(field2);
  EXPECT_EQ(schema.fields().size(), 2);

  // Drop existing field
  auto status = schema.drop_field("field1");
  EXPECT_TRUE(status.ok());
  EXPECT_FALSE(schema.has_field("field1"));
  EXPECT_TRUE(schema.has_field("field2"));
  EXPECT_EQ(schema.fields().size(), 1);

  // Try to drop non-existing field
  auto status2 = schema.drop_field("nonexistent");
  EXPECT_FALSE(status2.ok());
  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);
}

TEST(CollectionSchemaTest, AlterField) {
  CollectionSchema schema;
  auto original_field =
      std::make_shared<FieldSchema>("field", DataType::STRING);
  schema.add_field(original_field);

  auto new_field =
      std::make_shared<FieldSchema>("field", DataType::VECTOR_FP32);
  auto status = schema.alter_field("field", new_field);
  EXPECT_TRUE(status.ok());

  auto *field = schema.get_field("field");
  EXPECT_NE(field, nullptr);
  EXPECT_EQ(field->data_type(), DataType::VECTOR_FP32);

  // Try to alter non-existing field
  auto status2 = schema.alter_field("nonexistent", new_field);
  EXPECT_FALSE(status2.ok());
  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);
}

TEST(CollectionSchemaTest, FieldRetrieval) {
  CollectionSchema schema;
  auto string_field =
      std::make_shared<FieldSchema>("string_field", DataType::STRING);
  auto vector_field =
      std::make_shared<FieldSchema>("vector_field", DataType::VECTOR_FP32);

  schema.add_field(string_field);
  schema.add_field(vector_field);

  // Test get_field
  const auto *const_string_field = schema.get_field("string_field");
  EXPECT_NE(const_string_field, nullptr);
  EXPECT_EQ(const_string_field->data_type(), DataType::STRING);

  auto *mutable_string_field = schema.get_field("string_field");
  EXPECT_NE(mutable_string_field, nullptr);
  EXPECT_EQ(mutable_string_field->data_type(), DataType::STRING);

  // Test get_forward_field
  const auto *const_forward_field = schema.get_forward_field("string_field");
  EXPECT_NE(const_forward_field, nullptr);
  EXPECT_EQ(const_forward_field->data_type(), DataType::STRING);

  auto *mutable_forward_field = schema.get_forward_field("string_field");
  EXPECT_NE(mutable_forward_field, nullptr);
  EXPECT_EQ(mutable_forward_field->data_type(), DataType::STRING);

  // Forward field should return nullptr for vector fields
  EXPECT_EQ(schema.get_forward_field("vector_field"), nullptr);

  // Test get_vector_field
  const auto *const_vector_field = schema.get_vector_field("vector_field");
  EXPECT_NE(const_vector_field, nullptr);
  EXPECT_EQ(const_vector_field->data_type(), DataType::VECTOR_FP32);

  auto *mutable_vector_field = schema.get_vector_field("vector_field");
  EXPECT_NE(mutable_vector_field, nullptr);
  EXPECT_EQ(mutable_vector_field->data_type(), DataType::VECTOR_FP32);

  // Vector field should return nullptr for string fields
  EXPECT_EQ(schema.get_vector_field("string_field"), nullptr);

  // Test non-existing field
  EXPECT_EQ(schema.get_field("nonexistent"), nullptr);
  EXPECT_EQ(schema.get_forward_field("nonexistent"), nullptr);
  EXPECT_EQ(schema.get_vector_field("nonexistent"), nullptr);
}

TEST(CollectionSchemaTest, FieldLists) {
  CollectionSchema schema;
  auto string_field =
      std::make_shared<FieldSchema>("string_field", DataType::STRING);
  auto vector_field =
      std::make_shared<FieldSchema>("vector_field", DataType::VECTOR_FP32);
  auto array_field =
      std::make_shared<FieldSchema>("array_field", DataType::ARRAY_INT32);

  schema.add_field(string_field);
  schema.add_field(vector_field);
  schema.add_field(array_field);

  // Test fields()
  auto all_fields = schema.fields();
  EXPECT_EQ(all_fields.size(), 3);

  // Test forward_fields()
  auto forward_fields = schema.forward_fields();
  EXPECT_EQ(forward_fields.size(), 2);  // string_field and array_field

  // Test forward_field_names()
  auto forward_field_names = schema.forward_field_names();
  EXPECT_EQ(forward_field_names.size(), 2);
  EXPECT_TRUE(std::find(forward_field_names.begin(), forward_field_names.end(),
                        "string_field") != forward_field_names.end());
  EXPECT_TRUE(std::find(forward_field_names.begin(), forward_field_names.end(),
                        "array_field") != forward_field_names.end());

  // Test vector_fields()
  auto vector_fields = schema.vector_fields();
  EXPECT_EQ(vector_fields.size(), 1);
  EXPECT_EQ(vector_fields[0]->name(), "vector_field");
}

TEST(CollectionSchemaTest, IndexManagement) {
  CollectionSchema schema;
  auto field =
      std::make_shared<FieldSchema>("indexed_field", DataType::VECTOR_FP32);
  schema.add_field(field);

  auto forward_field =
      std::make_shared<FieldSchema>("forward_field", DataType::STRING);
  schema.add_field(forward_field);

  // Test has_index on field without index
  EXPECT_FALSE(schema.has_index("indexed_field"));
  EXPECT_FALSE(schema.has_index("forward_field"));

  // Add index
  auto index_params =
      std::make_shared<HnswIndexParams>(MetricType::L2, 16, 100);
  auto status = schema.add_index("indexed_field", index_params);
  EXPECT_TRUE(status.ok());
  EXPECT_TRUE(schema.has_index("indexed_field"));

  // Try to add index to non-existing field
  auto status2 = schema.add_index("nonexistent", index_params);
  EXPECT_FALSE(status2.ok());
  EXPECT_EQ(status2.code(), StatusCode::NOT_FOUND);

  // Drop index
  auto status3 = schema.drop_index("indexed_field");
  EXPECT_TRUE(status3.ok());
  EXPECT_FALSE(schema.has_index("indexed_field"));

  // Try to drop index from non-existing field
  auto status4 = schema.drop_index("nonexistent");
  EXPECT_FALSE(status4.ok());
  EXPECT_EQ(status4.code(), StatusCode::NOT_FOUND);

  auto forward_index_params = std::make_shared<InvertIndexParams>(false);
  auto status5 = schema.add_index("forward_field", forward_index_params);
  EXPECT_TRUE(status5.ok());
  EXPECT_TRUE(schema.has_index("forward_field"));

  auto status6 = schema.drop_index("forward_field");
  EXPECT_TRUE(status5.ok());
  EXPECT_FALSE(schema.has_index("forward_field"));
}

TEST(CollectionSchemaTest, CopyConstructor) {
  CollectionSchema original("original_schema", {});
  auto field = std::make_shared<FieldSchema>("field", DataType::STRING);
  original.add_field(field);
  original.set_max_doc_count_per_segment(100000);

  CollectionSchema copy(original);
  EXPECT_EQ(copy.name(), "original_schema");
  EXPECT_EQ(copy.fields().size(), 1);
  EXPECT_TRUE(copy.has_field("field"));
  EXPECT_EQ(copy.max_doc_count_per_segment(), 100000u);
}

TEST(CollectionSchemaTest, Validate) {
  CollectionSchema original("original_schema", {});
  auto field =
      std::make_shared<FieldSchema>("sparse", DataType::SPARSE_VECTOR_FP32);
  original.add_field(field);
  original.set_max_doc_count_per_segment(100000);

  ASSERT_TRUE(original.validate().ok());

  CollectionSchema c1;
  auto s = c1.validate();
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  CollectionSchema c2("c2", {});
  s = c1.validate();
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  auto f1 = std::make_shared<FieldSchema>();
  CollectionSchema c3("c3", {f1});
  s = c3.validate();
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  auto f2 = std::make_shared<FieldSchema>("f2", DataType::INT32);
  CollectionSchema c4("c4", {f2});
  s = c4.validate();
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  auto f3 = std::make_shared<FieldSchema>("f3", DataType::VECTOR_FP16);
  CollectionSchema c5("c5", {f3});
  s = c5.validate();
  ASSERT_FALSE(s.ok());
  ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

  // validate collection name regex "^[a-zA-Z0-9_-]{3,32}$"
  {
    std::vector<std::string> invalid_names = {
        "",                    // empty
        "ab",                  // too short (<3)
        std::string(65, 'a'),  // too long (>64)
        "a b",                 // space not allowed
        "a.b",                 // dot not allowed
        "a$b",                 // $ not allowed
        "中文",                // non-ASCII
        "a\nb",                // newline not allowed
        "a\tb",                // tab not allowed
        "a\rb",                // carriage return not allowed
    };

    for (const auto &name : invalid_names) {
      CollectionSchema c(name, {field});
      s = c.validate();
      if (!s.ok()) {
        std::cout << "Invalid name: " << name << std::endl;
      }
      ASSERT_FALSE(s.ok());
      ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
    }

    std::vector<std::string> valid_names = {
        "test_collection_supported_vectors",
        std::string(64, 'a'),
        "a_b",     // underscore allowed
        "a-b",     // dash allowed
        "a_1",     // underscore and digit allowed
        "a-1",     // dash and digit allowed
        "a_1b",    // underscore, digit and letter allowed
        "a-1b",    // dash, digit and letter allowed
        "-start",  // allowed! (regex permits leading -/_)
        "_start",  // also allowed
        "end-",
        "end_",  // trailing -/_ allowed
        "a--b",
        "__b",
        "a__b"  // consecutive allowed
    };
    for (const auto &name : valid_names) {
      CollectionSchema c(name, {field});
      s = c.validate();
      ASSERT_TRUE(s.ok());
    }
  }

  // validate vector/scalar field size
  {
    std::vector<FieldSchema::Ptr> fields;
    for (int i = 0; i < 1025; ++i) {
      auto f = std::make_shared<FieldSchema>("f" + std::to_string(i),
                                             DataType::VECTOR_FP32, 1024);
      fields.emplace_back(f);
    }
    CollectionSchema c5("c5", fields);
    s = c5.validate();
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);

    std::vector<FieldSchema::Ptr> vectors;
    for (int i = 0; i < 5; ++i) {
      auto f = std::make_shared<FieldSchema>(
          "f" + std::to_string(i), DataType::VECTOR_FP32, 1024, false);
      fields.emplace_back(f);
    }
    CollectionSchema c6("c6", fields);
    s = c6.validate();
    ASSERT_FALSE(s.ok());
    ASSERT_EQ(s.code(), StatusCode::INVALID_ARGUMENT);
  }
}

#if RABITQ_SUPPORTED
TEST(FieldSchemaTest, HnswRabitqIndexValidationMetricTypes) {
  // Test supported combinations: FP32 + (L2/IP/COSINE)

  // FP32 + L2
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok())
        << "FP32 + L2 should be supported, but got error: " << status.message();
  }

  // FP32 + IP
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::IP, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok())
        << "FP32 + IP should be supported, but got error: " << status.message();
  }

  // FP32 + COSINE
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::COSINE, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok())
        << "FP32 + COSINE should be supported, but got error: "
        << status.message();
  }

  // FP32 + MIPSL2
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::MIPSL2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "FP32 + MIPSL2 should not be supported, but got error: "
        << status.message();
  }
}


TEST(FieldSchemaTest, HnswRabitqIndexValidation_Dimension) {
  // Dimension less than 64 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 63, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "Dimension 63 should not be supported with HNSW_RABITQ";
    EXPECT_NE(
        status.message().find("HNSW_RABITQ index only support dimension in"),
        std::string::npos)
        << "Error message should mention dimension range, got: "
        << status.message();
  }

  // Dimension equal to 1 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 1, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "Dimension 1 should not be supported with HNSW_RABITQ";
  }

  // Dimension greater than 4095 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 4096, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "Dimension 4096 should not be supported with HNSW_RABITQ";
    EXPECT_NE(
        status.message().find("HNSW_RABITQ index only support dimension in"),
        std::string::npos)
        << "Error message should mention dimension range, got: "
        << status.message();
  }

  // Boundary: dimension 64 should be supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 64, false,
                      index_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok())
        << "Dimension 64 should be supported, but got error: "
        << status.message();
  }

  // Boundary: dimension 4095 should be supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP32, 4095, false,
                      index_params);
    auto status = field.validate();
    EXPECT_TRUE(status.ok())
        << "Dimension 4095 should be supported, but got error: "
        << status.message();
  }
}
#endif

TEST(FieldSchemaTest, HnswRabitqIndexValidation_UnsupportedDataTypes) {
  // Test unsupported data types with HNSW_RABITQ index

  // FP16 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP16, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "FP16 should not be supported with HNSW_RABITQ";
    EXPECT_NE(
        status.message().find("HNSW_RABITQ index only support FP32 data type"),
        std::string::npos)
        << "Error message should mention FP32 support only, got: "
        << status.message();
  }

  // INT8 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_INT8, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "INT8 should not be supported with HNSW_RABITQ";
    EXPECT_NE(
        status.message().find("HNSW_RABITQ index only support FP32 data type"),
        std::string::npos)
        << "Error message should mention FP32 support only, got: "
        << status.message();
  }

  // FP64 is not supported
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::L2, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::VECTOR_FP64, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "FP64 should not be supported with HNSW_RABITQ";
  }

  // Sparse vector is not supported with HNSW_RABITQ
  {
    auto index_params = std::make_shared<HnswRabitqIndexParams>(
        MetricType::IP, 7, 256, 16, 200, 0);
    FieldSchema field("vector_field", DataType::SPARSE_VECTOR_FP32, 128, false,
                      index_params);
    auto status = field.validate();
    EXPECT_FALSE(status.ok())
        << "Sparse vector should not be supported with HNSW_RABITQ";
    EXPECT_NE(
        status.message().find("sparse_vector's index_params only support"),
        std::string::npos)
        << "Error message should mention sparse vector index support, got: "
        << status.message();
  }
}


================================================
FILE: tests/db/index/common/version_manager_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/common/version_manager.h"
#include <filesystem>
#include <memory>
#include <gtest/gtest.h>
#include "db/common/file_helper.h"
#include "db/index/common/meta.h"
#include "proto/zvec.pb.h"
#include "zvec/db/schema.h"

namespace zvec {

class VersionManagerTest : public ::testing::Test {
 protected:
  void SetUp() override {
    // Create a temporary directory for testing
    test_path_ = "./version_manager_test";
    FileHelper::RemoveDirectory(test_path_);
    FileHelper::CreateDirectory(test_path_);
  }

  void TearDown() override {
    // Clean up temporary files
    FileHelper::RemoveDirectory(test_path_);
  }

  std::string test_path_;
};

// Test basic Version functionality
TEST_F(VersionManagerTest, VersionBasicOperations) {
  Version version;

  // Create a sample schema
  CollectionSchema schema;
  schema.set_name("test_collection");

  version.set_schema(schema);

  // Verify schema is set correctly
  EXPECT_EQ(version.schema().name(), "test_collection");

  // Test segment meta operations
  auto segment_meta = std::make_shared<SegmentMeta>(1);
  segment_meta->set_id(1);

  // Add segment meta
  EXPECT_TRUE(version.add_persisted_segment_meta(segment_meta).ok());

  // Try to add duplicate - should fail
  EXPECT_FALSE(version.add_persisted_segment_meta(segment_meta).ok());

  // Get segment metas
  auto segment_metas = version.persisted_segment_metas();
  EXPECT_EQ(segment_metas.size(), 1);
  EXPECT_EQ(segment_metas[0]->id(), 1);

  // Remove segment meta
  EXPECT_TRUE(version.remove_persisted_segment_meta(1).ok());
  EXPECT_EQ(version.persisted_segment_metas().size(), 0);

  // Try to remove non-existent segment - should fail
  EXPECT_FALSE(version.remove_persisted_segment_meta(1).ok());

  std::cout << version.to_string() << std::endl;
  std::cout << version.to_string_formatted() << std::endl;
}

// Test Version Load/Save operations
TEST_F(VersionManagerTest, VersionLoadSave) {
  std::string manifest_path = test_path_ + "/manifest";

  // Create and populate a version
  Version version;

  CollectionSchema schema;
  schema.set_name("test_collection");
  version.set_schema(schema);

  auto segment_meta = std::make_shared<SegmentMeta>(1);
  segment_meta->set_id(1);
  version.add_persisted_segment_meta(segment_meta);

  version.set_id_map_path_suffix(100);
  version.set_delete_snapshot_path_suffix(200);
  version.set_next_segment_id(2);

  // Save version
  EXPECT_TRUE(Version::Save(manifest_path, version).ok());

  // Load version
  Version loaded_version;
  EXPECT_TRUE(Version::Load(manifest_path, &loaded_version).ok());

  // Verify loaded version matches original
  EXPECT_EQ(loaded_version.schema().name(), "test_collection");
  EXPECT_EQ(loaded_version.persisted_segment_metas().size(), 1);
  EXPECT_EQ(loaded_version.id_map_path_suffix(), 100);
  EXPECT_EQ(loaded_version.delete_snapshot_path_suffix(), 200);
  EXPECT_EQ(loaded_version.next_segment_id(), 2);
}

// Test VersionManager creation and recovery
TEST_F(VersionManagerTest, VersionManagerCreateAndRecover) {
  std::string version_path = test_path_ + "/version";

  std::filesystem::create_directories(version_path);

  // Create initial version
  Version initial_version;
  CollectionSchema schema;
  schema.set_name("initial_collection");
  initial_version.set_schema(schema);

  auto segment_meta = std::make_shared<SegmentMeta>(1);
  segment_meta->set_id(1);
  initial_version.add_persisted_segment_meta(segment_meta);

  // Create VersionManager
  auto create_result = VersionManager::Create(version_path, initial_version);
  EXPECT_TRUE(create_result.has_value());

  auto version_manager = create_result.value();

  // Get current version and verify
  auto current_version = version_manager->get_current_version();
  EXPECT_EQ(current_version.schema().name(), "initial_collection");

  // Modify version
  auto new_segment = std::make_shared<SegmentMeta>(2);
  new_segment->set_id(2);
  EXPECT_TRUE(version_manager->add_persisted_segment_meta(new_segment).ok());

  // Flush changes
  ASSERT_TRUE(version_manager->flush().ok());

  // Recover VersionManager
  auto recover_result = VersionManager::Recovery(version_path);
  EXPECT_TRUE(recover_result.has_value());

  auto recovered_manager = recover_result.value();
  auto recovered_version = recovered_manager->get_current_version();

  // Verify recovered version matches modified version
  EXPECT_EQ(recovered_version.schema().name(), "initial_collection");
  EXPECT_EQ(recovered_version.persisted_segment_metas().size(), 2);
}

// Test VersionManager operations
TEST_F(VersionManagerTest, VersionManagerOperations) {
  std::string version_path = test_path_ + "/version_ops";

  std::filesystem::create_directories(version_path);

  // Create initial version
  Version initial_version;
  CollectionSchema schema;
  schema.set_name("test_collection");
  initial_version.set_schema(schema);

  auto create_result = VersionManager::Create(version_path, initial_version);
  auto version_manager = create_result.value();

  // Test segment meta operations through VersionManager
  auto segment_meta = std::make_shared<SegmentMeta>(1);
  segment_meta->set_id(1);
  EXPECT_TRUE(version_manager->add_persisted_segment_meta(segment_meta).ok());

  // Test reset writing segment meta
  auto writing_segment = std::make_shared<SegmentMeta>(100);
  writing_segment->set_id(100);
  EXPECT_TRUE(
      version_manager->reset_writing_segment_meta(writing_segment).ok());

  // Test suffix setters
  version_manager->set_id_map_path_suffix(50);
  version_manager->set_delete_snapshot_path_suffix(60);
  version_manager->set_next_segment_id(3);

  // Flush and verify
  EXPECT_TRUE(version_manager->flush().ok());

  auto current_version = version_manager->get_current_version();
  EXPECT_EQ(current_version.id_map_path_suffix(), 50);
  EXPECT_EQ(current_version.delete_snapshot_path_suffix(), 60);
  EXPECT_EQ(current_version.next_segment_id(), 3);
  EXPECT_EQ(current_version.writing_segment_meta()->id(), 100);
}

// Test Version equality operator
TEST_F(VersionManagerTest, VersionEquality) {
  Version version1, version2;

  CollectionSchema schema1, schema2;
  schema1.set_name("collection1");
  schema2.set_name("collection1");

  version1.set_schema(schema1);
  version2.set_schema(schema2);

  auto segment_meta1 = std::make_shared<SegmentMeta>(1);
  segment_meta1->set_id(1);
  version1.add_persisted_segment_meta(segment_meta1);

  auto segment_meta2 = std::make_shared<SegmentMeta>(1);
  segment_meta2->set_id(1);
  version2.add_persisted_segment_meta(segment_meta2);

  // Versions should be equal
  EXPECT_TRUE(version1 == version2);

  // Make them different
  auto segment_meta3 = std::make_shared<SegmentMeta>(2);
  segment_meta3->set_id(2);
  version2.add_persisted_segment_meta(segment_meta3);

  // Versions should not be equal now
  EXPECT_FALSE(version1 == version2);
}

// Test error conditions
TEST_F(VersionManagerTest, ErrorConditions) {
  std::string version_path = test_path_ + "/error_test";

  std::filesystem::create_directories(version_path);

  // Create initial version
  Version initial_version;
  CollectionSchema schema;
  schema.set_name("test");
  initial_version.set_schema(schema);

  auto create_result = VersionManager::Create(version_path, initial_version);
  auto version_manager = create_result.value();

  // Test operations with null segment meta
  EXPECT_FALSE(version_manager->add_persisted_segment_meta(nullptr).ok());

  // Test operations with non-existent segment ID
  EXPECT_FALSE(version_manager->remove_persisted_segment_meta(999).ok());
}

// Test conversion between protobuf and internal schema
TEST_F(VersionManagerTest, SchemaConversion) {
  // Create protobuf schema
  zvec::proto::CollectionSchema pb_schema;
  pb_schema.set_name("test_collection");

  auto pb_field = pb_schema.add_fields();
  pb_field->set_name("vector_field");
  pb_field->set_data_type(zvec::proto::DataType::DT_VECTOR_FP32);
  pb_field->set_dimension(128);

  // Convert to internal schema (this would be done in the Load method)
  CollectionSchema internal_schema;
  internal_schema.set_name(pb_schema.name());
  // In a real implementation, fields would be converted here

  // Test that we can set and retrieve the schema
  Version version;
  version.set_schema(internal_schema);

  EXPECT_EQ(version.schema().name(), "test_collection");
}

// Test SegmentMeta functionality
TEST_F(VersionManagerTest, SegmentMetaOperations) {
  SegmentMeta segment_meta(10);

  EXPECT_EQ(segment_meta.id(), 10);

  // Test block operations
  BlockMeta block(1, BlockType::SCALAR, 0, 100);
  segment_meta.add_persisted_block(block);

  EXPECT_EQ(segment_meta.persisted_blocks().size(), 1);
  EXPECT_EQ(segment_meta.persisted_blocks()[0].id(), 1);

  // Test indexed vector fields
  EXPECT_FALSE(segment_meta.vector_indexed("field1"));
  segment_meta.add_indexed_vector_field("field1");
  EXPECT_TRUE(segment_meta.vector_indexed("field1"));

  // Test min/max doc id
  EXPECT_EQ(segment_meta.min_doc_id(), 0);
  EXPECT_EQ(segment_meta.max_doc_id(), 100);
}

}  // namespace zvec

================================================
FILE: tests/db/index/segment/column_merging_reader_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/segment/column_merging_reader.h"
#include <memory>
#include <vector>
#include <arrow/api.h>
#include <arrow/builder.h>
#include <arrow/ipc/writer.h>
#include <arrow/testing/gtest_util.h>
#include <arrow/testing/util.h>
#include <gtest/gtest.h>

using namespace zvec;

arrow::Result<std::shared_ptr<arrow::Array>> MakeInt32Array(
    const std::vector<int32_t> &values) {
  arrow::Int32Builder builder;
  ARROW_RETURN_NOT_OK(builder.AppendValues(values));
  std::shared_ptr<arrow::Array> array;
  ARROW_RETURN_NOT_OK(builder.Finish(&array));
  return array;
}

arrow::Result<std::shared_ptr<arrow::RecordBatch>> MakeInt32RecordBatch(
    const std::string &column_name, const std::vector<int32_t> &values) {
  ARROW_ASSIGN_OR_RAISE(auto array, MakeInt32Array(values));
  auto schema = arrow::schema({arrow::field(column_name, arrow::int32())});
  return arrow::RecordBatch::Make(schema, values.size(), {array});
}

// Mock RecordBatchReader for testing error conditions
class MockErrorRecordBatchReader : public arrow::ipc::RecordBatchReader {
 public:
  explicit MockErrorRecordBatchReader(arrow::StatusCode error_code)
      : error_code_(error_code) {}

  std::shared_ptr<arrow::Schema> schema() const override {
    return arrow::schema({arrow::field("dummy", arrow::int32())});
  }

  arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
    *out = nullptr;
    return arrow::Status(error_code_, "Mock error");
  }

 private:
  arrow::StatusCode error_code_;
};

// Test fixture
class ColumnMergingReaderTest : public ::testing::Test {
 protected:
  void SetUp() override {
    // Create test schemas
    schema1_ = arrow::schema({arrow::field("col1", arrow::int32()),
                              arrow::field("col2", arrow::int32())});

    schema2_ = arrow::schema({arrow::field("col3", arrow::int32()),
                              arrow::field("col4", arrow::int32())});

    target_schema_ = arrow::schema({arrow::field("col1", arrow::int32()),
                                    arrow::field("col2", arrow::int32()),
                                    arrow::field("col3", arrow::int32()),
                                    arrow::field("col4", arrow::int32())});
  }

  std::shared_ptr<arrow::Schema> schema1_;
  std::shared_ptr<arrow::Schema> schema2_;
  std::shared_ptr<arrow::Schema> target_schema_;
};

// Test Make factory method
TEST_F(ColumnMergingReaderTest, Make) {
  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  auto reader = ColumnMergingReader::Make(target_schema_, std::move(readers));
  ASSERT_NE(reader, nullptr);
  EXPECT_EQ(reader->schema(), target_schema_);
}

// Test constructor and schema method
TEST_F(ColumnMergingReaderTest, ConstructorAndSchema) {
  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  auto reader =
      std::make_shared<ColumnMergingReader>(target_schema_, std::move(readers));
  EXPECT_EQ(reader->schema(), target_schema_);
}

// Test normal operation with two readers
TEST_F(ColumnMergingReaderTest, NormalOperation) {
  // Create first batch with col1 and col2
  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();
  auto array2 = MakeInt32Array({4, 5, 6}).ValueOrDie();
  auto batch1 = arrow::RecordBatch::Make(schema1_, 3, {array1, array2});

  // Create second batch with col3 and col4
  auto array3 = MakeInt32Array({7, 8, 9}).ValueOrDie();
  auto array4 = MakeInt32Array({10, 11, 12}).ValueOrDie();
  auto batch2 = arrow::RecordBatch::Make(schema2_, 3, {array3, array4});

  // Create mock readers
  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {
   public:
    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)
        : batch_(batch), returned_(false) {}

    std::shared_ptr<arrow::Schema> schema() const override {
      return batch_->schema();
    }

    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
      if (!returned_) {
        *out = batch_;
        returned_ = true;
      } else {
        *out = nullptr;
      }
      return arrow::Status::OK();
    }

   private:
    std::shared_ptr<arrow::RecordBatch> batch_;
    bool returned_;
  };

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));

  auto merging_reader =
      ColumnMergingReader::Make(target_schema_, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  ASSERT_NE(result_batch, nullptr);
  EXPECT_EQ(result_batch->num_rows(), 3);
  EXPECT_EQ(result_batch->num_columns(), 4);

  // Check column values
  auto col1 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));
  auto col2 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));
  auto col3 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(2));
  auto col4 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(3));

  EXPECT_EQ(col1->Value(0), 1);
  EXPECT_EQ(col1->Value(1), 2);
  EXPECT_EQ(col1->Value(2), 3);

  EXPECT_EQ(col2->Value(0), 4);
  EXPECT_EQ(col2->Value(1), 5);
  EXPECT_EQ(col2->Value(2), 6);

  EXPECT_EQ(col3->Value(0), 7);
  EXPECT_EQ(col3->Value(1), 8);
  EXPECT_EQ(col3->Value(2), 9);

  EXPECT_EQ(col4->Value(0), 10);
  EXPECT_EQ(col4->Value(1), 11);
  EXPECT_EQ(col4->Value(2), 12);

  // Second read should return nullptr (EOF)
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  EXPECT_EQ(result_batch, nullptr);
}

// Test with empty readers
TEST_F(ColumnMergingReaderTest, EmptyReaders) {
  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  auto merging_reader =
      ColumnMergingReader::Make(target_schema_, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  EXPECT_EQ(result_batch, nullptr);
}

// Test with inconsistent row counts
TEST_F(ColumnMergingReaderTest, InconsistentRowCounts) {
  // Create first batch with 3 rows
  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();
  auto batch1 = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col1", arrow::int32())}), 3, {array1});

  // Create second batch with 2 rows
  auto array2 = MakeInt32Array({4, 5}).ValueOrDie();
  auto batch2 = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col2", arrow::int32())}), 2, {array2});

  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {
   public:
    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)
        : batch_(batch), returned_(false) {}

    std::shared_ptr<arrow::Schema> schema() const override {
      return batch_->schema();
    }

    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
      if (!returned_) {
        *out = batch_;
        returned_ = true;
      } else {
        *out = nullptr;
      }
      return arrow::Status::OK();
    }

   private:
    std::shared_ptr<arrow::RecordBatch> batch_;
    bool returned_;
  };

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));

  auto merging_reader =
      ColumnMergingReader::Make(target_schema_, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  arrow::Status status = merging_reader->ReadNext(&result_batch);
  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), arrow::StatusCode::Invalid);
}

// Test missing column
TEST_F(ColumnMergingReaderTest, MissingColumn) {
  // Create batch with only col1
  auto array1 = MakeInt32Array({1, 2, 3}).ValueOrDie();
  auto batch1 = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col1", arrow::int32())}), 3, {array1});

  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {
   public:
    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)
        : batch_(batch), returned_(false) {}

    std::shared_ptr<arrow::Schema> schema() const override {
      return batch_->schema();
    }

    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
      if (!returned_) {
        *out = batch_;
        returned_ = true;
      } else {
        *out = nullptr;
      }
      return arrow::Status::OK();
    }

   private:
    std::shared_ptr<arrow::RecordBatch> batch_;
    bool returned_;
  };

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));

  // Target schema requires col1 and col2 but we only provide col1
  auto target_schema = arrow::schema({arrow::field("col1", arrow::int32()),
                                      arrow::field("col2", arrow::int32())});

  auto merging_reader =
      ColumnMergingReader::Make(target_schema, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  arrow::Status status = merging_reader->ReadNext(&result_batch);
  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), arrow::StatusCode::Invalid);
}

// Test read error
TEST_F(ColumnMergingReaderTest, ReadError) {
  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(
      std::make_shared<MockErrorRecordBatchReader>(arrow::StatusCode::IOError));

  auto merging_reader =
      ColumnMergingReader::Make(target_schema_, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  arrow::Status status = merging_reader->ReadNext(&result_batch);
  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), arrow::StatusCode::IOError);
}

// Test multiple reads
TEST_F(ColumnMergingReaderTest, MultipleReads) {
  // Create batches
  auto array1a = MakeInt32Array({1, 2}).ValueOrDie();
  auto batch1a = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col1", arrow::int32())}), 2, {array1a});

  auto array1b = MakeInt32Array({3, 4}).ValueOrDie();
  auto batch1b = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col1", arrow::int32())}), 2, {array1b});

  auto array2a = MakeInt32Array({5, 6}).ValueOrDie();
  auto batch2a = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col2", arrow::int32())}), 2, {array2a});

  auto array2b = MakeInt32Array({7, 8}).ValueOrDie();
  auto batch2b = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col2", arrow::int32())}), 2, {array2b});

  class MultiBatchRecordBatchReader : public arrow::ipc::RecordBatchReader {
   public:
    explicit MultiBatchRecordBatchReader(
        std::vector<std::shared_ptr<arrow::RecordBatch>> batches)
        : batches_(std::move(batches)), index_(0) {}

    std::shared_ptr<arrow::Schema> schema() const override {
      return batches_.empty() ? arrow::schema({}) : batches_[0]->schema();
    }

    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
      if (index_ < batches_.size()) {
        *out = batches_[index_++];
      } else {
        *out = nullptr;
      }
      return arrow::Status::OK();
    }

   private:
    std::vector<std::shared_ptr<arrow::RecordBatch>> batches_;
    size_t index_;
  };

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(std::make_shared<MultiBatchRecordBatchReader>(
      std::vector<std::shared_ptr<arrow::RecordBatch>>{batch1a, batch1b}));
  readers.push_back(std::make_shared<MultiBatchRecordBatchReader>(
      std::vector<std::shared_ptr<arrow::RecordBatch>>{batch2a, batch2b}));

  auto target_schema = arrow::schema({arrow::field("col1", arrow::int32()),
                                      arrow::field("col2", arrow::int32())});

  auto merging_reader =
      ColumnMergingReader::Make(target_schema, std::move(readers));

  // First read
  std::shared_ptr<arrow::RecordBatch> result_batch;
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  ASSERT_NE(result_batch, nullptr);
  EXPECT_EQ(result_batch->num_rows(), 2);

  auto col1 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));
  auto col2 =
      std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));
  EXPECT_EQ(col1->Value(0), 1);
  EXPECT_EQ(col1->Value(1), 2);
  EXPECT_EQ(col2->Value(0), 5);
  EXPECT_EQ(col2->Value(1), 6);

  // Second read
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  ASSERT_NE(result_batch, nullptr);
  EXPECT_EQ(result_batch->num_rows(), 2);

  col1 = std::static_pointer_cast<arrow::Int32Array>(result_batch->column(0));
  col2 = std::static_pointer_cast<arrow::Int32Array>(result_batch->column(1));
  EXPECT_EQ(col1->Value(0), 3);
  EXPECT_EQ(col1->Value(1), 4);
  EXPECT_EQ(col2->Value(0), 7);
  EXPECT_EQ(col2->Value(1), 8);

  // Third read - should be EOF
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  EXPECT_EQ(result_batch, nullptr);
}

// Test zero row batches
TEST_F(ColumnMergingReaderTest, ZeroRowBatches) {
  auto array1 = MakeInt32Array({}).ValueOrDie();
  auto batch1 = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col1", arrow::int32())}), 0, {array1});

  auto array2 = MakeInt32Array({}).ValueOrDie();
  auto batch2 = arrow::RecordBatch::Make(
      arrow::schema({arrow::field("col2", arrow::int32())}), 0, {array2});

  class MockRecordBatchReader : public arrow::ipc::RecordBatchReader {
   public:
    explicit MockRecordBatchReader(std::shared_ptr<arrow::RecordBatch> batch)
        : batch_(batch), returned_(false) {}

    std::shared_ptr<arrow::Schema> schema() const override {
      return batch_->schema();
    }

    arrow::Status ReadNext(std::shared_ptr<arrow::RecordBatch> *out) override {
      if (!returned_) {
        *out = batch_;
        returned_ = true;
      } else {
        *out = nullptr;
      }
      return arrow::Status::OK();
    }

   private:
    std::shared_ptr<arrow::RecordBatch> batch_;
    bool returned_;
  };

  std::vector<std::shared_ptr<arrow::ipc::RecordBatchReader>> readers;
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch1));
  readers.push_back(std::make_shared<MockRecordBatchReader>(batch2));

  auto target_schema = arrow::schema({arrow::field("col1", arrow::int32()),
                                      arrow::field("col2", arrow::int32())});

  auto merging_reader =
      ColumnMergingReader::Make(target_schema, std::move(readers));

  std::shared_ptr<arrow::RecordBatch> result_batch;
  ASSERT_OK(merging_reader->ReadNext(&result_batch));
  EXPECT_EQ(result_batch, nullptr);
}

================================================
FILE: tests/db/index/segment/segment_helper_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/segment/segment_helper.h"
#include <cstdint>
#include <filesystem>
#include <iostream>
#include <memory>
#include <thread>
#include <variant>
#include <arrow/array/array_binary.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/pretty_print.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include "db/common/constants.h"
#include "db/common/file_helper.h"
#include "db/index/common/delete_store.h"
#include "db/index/common/id_map.h"
#include "db/index/common/meta.h"
#include "db/index/common/version_manager.h"
#include "db/index/segment/segment.h"
#include "utils/utils.h"
#include "zvec/db/options.h"
#include "zvec/db/schema.h"

using namespace zvec;

class SegmentHelperTest : public testing::Test {
 protected:
  void SetUp() override {
    ailego::LoggerBroker::SetLevel(ailego::Logger::LEVEL_INFO);

    FileHelper::RemoveDirectory(col_path);
    FileHelper::CreateDirectory(col_path);

    std::string idmap_path =
        FileHelper::MakeFilePath(col_path, FileID::ID_FILE, 0);
    id_map = IDMap::CreateAndOpen(col_name, idmap_path, true, false);
    if (id_map == nullptr) {
      throw std::runtime_error("Failed to create id map");
    }

    std::string delete_store_path =
        FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
    delete_store = std::make_shared<DeleteStore>(col_name);
  }

  void TearDown() override {
    id_map.reset();
    delete_store.reset();

    // FileHelper::RemoveDirectory(col_path);
  }

 public:
  std::string GetColPath() {
    return col_path;
  }

 protected:
  std::string col_name = "test_segment_helper";
  std::string col_path = "./test_collection";
  IDMap::Ptr id_map;
  DeleteStore::Ptr delete_store;
};

TEST_F(SegmentHelperTest, CompactTask_General) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  // Create segments
  auto seg1 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      seg_options, 0, 1000);
  ASSERT_TRUE(seg1 != nullptr);
  ASSERT_TRUE(seg1->flush().ok());

  auto seg2 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,
      seg_options, 1000, 1000);
  ASSERT_TRUE(seg2 != nullptr);
  ASSERT_TRUE(seg2->flush().ok());
  std::cout << "seg2: " << seg2->meta()->to_string_formatted() << std::endl;

  // Prepare segments for compaction
  std::vector<Segment::Ptr> segments = {seg1, seg2};

  // Create compact task
  SegmentID output_segment_id = 2;
  CompactTask task(GetColPath(), schema, segments,
                   output_segment_id,    // output_segment_id
                   nullptr,              // filter
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateComapctTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  ASSERT_EQ(output_segment_meta->id(), output_segment_id);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  // Move segment directory
  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  auto new_segment_path =
      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);
  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);

  seg_options.read_only_ = true;
  version_manager->set_enable_mmap(!forward_use_parquet);
  auto seg3_ret = Segment::Open(
      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,
      delete_store, version_manager, seg_options);
  if (!seg3_ret.has_value()) {
    std::cout << seg3_ret.error().message() << std::endl;
    ASSERT_TRUE(false);
  }

  auto seg3 = std::move(seg3_ret.value());
  ASSERT_EQ(seg3->id(), output_segment_id);

  std::cout << seg3->meta()->to_string_formatted() << std::endl;
  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());

  for (uint64_t i = 0; i < seg3->doc_count(); i++) {
    auto doc = seg3->Fetch(i);
    ASSERT_NE(doc, nullptr);
    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);
    ASSERT_EQ(*doc, expect_doc);
  }

  ASSERT_TRUE(seg1->destroy().ok());
  ASSERT_TRUE(seg2->destroy().ok());
}

TEST_F(SegmentHelperTest, CompactTask_ScalarIndex) {
  auto schema = test::TestHelper::CreateSchemaWithScalarIndex(false);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  // Create segments
  auto seg1 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      seg_options, 0, 1000);
  ASSERT_TRUE(seg1 != nullptr);
  ASSERT_TRUE(seg1->flush().ok());

  auto seg2 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,
      seg_options, 1000, 1000);
  ASSERT_TRUE(seg2 != nullptr);
  ASSERT_TRUE(seg2->flush().ok());
  std::cout << "seg2: " << seg2->meta()->to_string_formatted() << std::endl;

  // Prepare segments for compaction
  std::vector<Segment::Ptr> segments = {seg1, seg2};

  // Create compact task
  SegmentID output_segment_id = 2;
  CompactTask task(GetColPath(), schema, segments,
                   output_segment_id,    // output_segment_id
                   nullptr,              // filter
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateComapctTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  ASSERT_EQ(output_segment_meta->id(), output_segment_id);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  // Move segment directory
  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  auto new_segment_path =
      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);
  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);

  seg_options.read_only_ = true;
  version_manager->set_enable_mmap(!forward_use_parquet);
  auto seg3_ret = Segment::Open(
      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,
      delete_store, version_manager, seg_options);
  if (!seg3_ret.has_value()) {
    std::cout << seg3_ret.error().message() << std::endl;
    ASSERT_TRUE(false);
  }

  auto seg3 = std::move(seg3_ret.value());
  ASSERT_EQ(seg3->id(), output_segment_id);

  std::cout << seg3->meta()->to_string_formatted() << std::endl;
  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());

  for (uint64_t i = 0; i < seg3->doc_count(); i++) {
    auto doc = seg3->Fetch(i);
    ASSERT_NE(doc, nullptr);
    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);
    ASSERT_EQ(*doc, expect_doc);
  }

  ASSERT_TRUE(seg1->destroy().ok());
  ASSERT_TRUE(seg2->destroy().ok());
}

TEST_F(SegmentHelperTest, CompactTask_VectorIndex) {
  auto schema = test::TestHelper::CreateSchemaWithVectorIndex();

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  // Create segments
  auto seg1 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      seg_options, 0, 1000);
  ASSERT_TRUE(seg1 != nullptr);
  ASSERT_TRUE(seg1->flush().ok());

  auto seg2 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 1, 1000, id_map, delete_store, version_manager,
      seg_options, 1000, 1000);
  ASSERT_TRUE(seg2 != nullptr);
  ASSERT_TRUE(seg2->flush().ok());
  std::cout << "seg2: " << seg2->meta()->to_string_formatted() << std::endl;

  // Prepare segments for compaction
  std::vector<Segment::Ptr> segments = {seg1, seg2};

  // Create compact task
  SegmentID output_segment_id = 2;
  CompactTask task(GetColPath(), schema, segments,
                   output_segment_id,    // output_segment_id
                   nullptr,              // filter
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateComapctTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  ASSERT_EQ(output_segment_meta->id(), output_segment_id);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  // Move segment directory
  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  auto new_segment_path =
      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);
  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);

  seg_options.read_only_ = true;
  version_manager->set_enable_mmap(!forward_use_parquet);
  auto seg3_ret = Segment::Open(
      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,
      delete_store, version_manager, seg_options);
  if (!seg3_ret.has_value()) {
    std::cout << seg3_ret.error().message() << std::endl;
    ASSERT_TRUE(false);
  }

  auto seg3 = std::move(seg3_ret.value());
  ASSERT_EQ(seg3->id(), output_segment_id);

  std::cout << seg3->meta()->to_string_formatted() << std::endl;
  ASSERT_EQ(seg3->doc_count(), seg1->doc_count() + seg2->doc_count());

  for (uint64_t i = 0; i < seg3->doc_count(); i++) {
    auto doc = seg3->Fetch(i);
    ASSERT_NE(doc, nullptr);
    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);
    ASSERT_EQ(*doc, expect_doc);
  }

  ASSERT_TRUE(seg1->destroy().ok());
  ASSERT_TRUE(seg2->destroy().ok());
}

TEST_F(SegmentHelperTest, CompactTask_MultipleSegments) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  std::vector<Segment::Ptr> input_segs;
  int seg_count = 10;
  int doc_count_per_seg = 100;
  for (int i = 0; i < seg_count; i++) {
    auto seg = test::TestHelper::CreateSegmentWithDoc(
        GetColPath(), *schema, i, i * doc_count_per_seg, id_map, delete_store,
        version_manager, seg_options, i * doc_count_per_seg, doc_count_per_seg);
    ASSERT_TRUE(seg != nullptr);
    ASSERT_TRUE(seg->flush().ok());
    input_segs.push_back(seg);
  }

  // Create compact task
  SegmentID output_segment_id = seg_count;
  CompactTask task(GetColPath(), schema, input_segs,
                   output_segment_id,    // output_segment_id
                   nullptr,              // filter
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateComapctTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  ASSERT_EQ(output_segment_meta->id(), output_segment_id);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  // Move segment directory
  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  auto new_segment_path =
      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);
  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);

  seg_options.read_only_ = true;
  version_manager->set_enable_mmap(!forward_use_parquet);
  auto seg3_ret = Segment::Open(
      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,
      delete_store, version_manager, seg_options);
  if (!seg3_ret.has_value()) {
    std::cout << seg3_ret.error().message() << std::endl;
    ASSERT_TRUE(false);
  }

  auto seg3 = std::move(seg3_ret.value());
  ASSERT_EQ(seg3->id(), output_segment_id);

  std::cout << seg3->meta()->to_string_formatted() << std::endl;
  ASSERT_EQ(seg3->doc_count(), seg_count * doc_count_per_seg);

  for (uint64_t i = 0; i < seg3->doc_count(); i++) {
    auto doc = seg3->Fetch(i);
    if (doc == nullptr) {
      std::cout << "doc is null: " << i << std::endl;
    }
    ASSERT_NE(doc, nullptr);
    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);
    ASSERT_EQ(*doc, expect_doc);
  }
}

TEST_F(SegmentHelperTest, CompactTask_Filter) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  // Create segments
  auto seg1 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      seg_options, 0, 1000);
  ASSERT_TRUE(seg1 != nullptr);
  ASSERT_TRUE(seg1->flush().ok());

  // Create a simple filter
  auto filter = std::make_shared<EasyIndexFilter>(
      [&](uint64_t id) -> bool { return id < 10; });
  // Note: Actual filter configuration would depend on the IndexFilter
  // implementation

  // Create compact task with filter
  SegmentID output_segment_id = 1;
  CompactTask task(GetColPath(), schema, {seg1},  // Single segment with filter
                   output_segment_id,             // output_segment_id
                   filter,
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create and execute task
  auto segment_task = SegmentTask::CreateComapctTask(task);
  ASSERT_TRUE(segment_task != nullptr);

  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  std::cout << output_segment_meta->to_string_formatted() << std::endl;
  ASSERT_EQ(output_segment_meta->id(), output_segment_id);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  // Move segment directory
  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  auto new_segment_path =
      FileHelper::MakeSegmentPath(GetColPath(), output_segment_id);
  FileHelper::MoveDirectory(tmp_segment_path, new_segment_path);

  seg_options.read_only_ = true;
  version_manager->set_enable_mmap(!forward_use_parquet);
  auto seg2_ret = Segment::Open(
      GetColPath(), *schema, *segment_compact_task.output_segment_meta_, id_map,
      delete_store, version_manager, seg_options);
  if (!seg2_ret.has_value()) {
    std::cout << seg2_ret.error().message() << std::endl;
    ASSERT_TRUE(false);
  }

  auto seg2 = std::move(seg2_ret.value());
  ASSERT_EQ(seg2->id(), output_segment_id);

  std::cout << seg2->meta()->to_string_formatted() << std::endl;
  ASSERT_EQ(seg2->doc_count(), seg1->doc_count() - 10);

  ASSERT_TRUE(seg1->destroy().ok());
}

TEST_F(SegmentHelperTest, CompactTask_FilterAll) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  bool forward_use_parquet = false;
  auto seg_options =
      SegmentOptions{false, !forward_use_parquet, DEFAULT_MAX_BUFFER_SIZE};

  // Create segments
  auto seg1 = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      seg_options, 0, 1000);
  ASSERT_TRUE(seg1 != nullptr);
  ASSERT_TRUE(seg1->flush().ok());

  // Create a simple filter
  auto filter = std::make_shared<EasyIndexFilter>(
      [&](uint64_t id) -> bool { return true; });
  // Note: Actual filter configuration would depend on the IndexFilter
  // implementation

  // Create compact task with filter
  SegmentID output_segment_id = 1;
  CompactTask task(GetColPath(), schema, {seg1},  // Single segment with filter
                   output_segment_id,             // output_segment_id
                   filter,
                   forward_use_parquet,  // forward_use_parquet
                   1                     // concurrency
  );

  // Create and execute task
  auto segment_task = SegmentTask::CreateComapctTask(task);
  ASSERT_TRUE(segment_task != nullptr);

  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  ASSERT_TRUE(status.ok());

  auto segment_compact_task = std::get<CompactTask>(segment_task->task_info());
  // Verify output segment
  auto output_segment_meta = segment_compact_task.output_segment_meta_;
  ASSERT_EQ(output_segment_meta, nullptr);

  auto tmp_segment_path =
      FileHelper::MakeTempSegmentPath(GetColPath(), output_segment_id);
  ASSERT_FALSE(FileHelper::DirectoryExists(tmp_segment_path));
}

TEST_F(SegmentHelperTest, CreateVectorIndexTask_AllFields) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  // Create a segment
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      SegmentOptions{false, true, DEFAULT_MAX_BUFFER_SIZE}, 0, 1000);
  ASSERT_TRUE(segment != nullptr);
  ASSERT_TRUE(segment->dump().ok());

  // Create index params
  auto index_params =
      std::make_shared<HnswIndexParams>(MetricType::L2,  // metric_type
                                        16,              // m
                                        100              // ef_construction
      );

  // Create create index task
  CreateVectorIndexTask task(
      segment,
      "",  // column_to_build_vector_index (empty means all vector columns)
      index_params,
      1  // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateCreateVectorIndexTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  EXPECT_TRUE(status.ok());

  // Verify output segment meta
  auto index_task = std::get<CreateVectorIndexTask>(segment_task->task_info());
  auto output_segment_meta = index_task.output_segment_meta_;
  std::cout << "output_segment_meta: "
            << output_segment_meta->to_string_formatted() << std::endl;
  ASSERT_EQ(output_segment_meta->id(), 0);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());

  auto segment_meta = std::make_shared<SegmentMeta>(*segment->meta());
  segment_meta->remove_writing_forward_block();
  // create all vector index will not change segment meta
  ASSERT_EQ(*output_segment_meta, *segment_meta);
}

TEST_F(SegmentHelperTest, CreateVectorIndexTask_SingleField) {
  auto schema = test::TestHelper::CreateNormalSchema(false, col_name);

  Version version;
  version.set_schema(*schema);
  auto version_manager_tmp = VersionManager::Create(col_path, version);
  if (!version_manager_tmp.has_value()) {
    throw std::runtime_error("Failed to create version manager");
  }

  auto version_manager = version_manager_tmp.value();

  // Create a segment
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      GetColPath(), *schema, 0, 0, id_map, delete_store, version_manager,
      SegmentOptions{false, true, DEFAULT_MAX_BUFFER_SIZE}, 0, 1000);
  ASSERT_TRUE(segment != nullptr);
  ASSERT_TRUE(segment->dump().ok());

  // Create index params
  auto index_params =
      std::make_shared<HnswIndexParams>(MetricType::IP,  // metric_type
                                        16,              // m
                                        100              // ef_construction
      );

  // Create create index task
  CreateVectorIndexTask task(segment,
                             "dense_fp32",  // column_to_build_vector_index
                                            // (empty means all vector columns)
                             index_params,
                             1  // concurrency
  );

  // Create segment task
  auto segment_task = SegmentTask::CreateCreateVectorIndexTask(task);

  // Verify task creation
  ASSERT_TRUE(segment_task != nullptr);

  // Execute the task
  Status status = SegmentHelper::Execute(segment_task);
  std::cout << "status: " << status.message() << std::endl;
  EXPECT_TRUE(status.ok());

  // Verify output segment meta
  auto index_task = std::get<CreateVectorIndexTask>(segment_task->task_info());
  auto output_segment_meta = index_task.output_segment_meta_;
  std::cout << "output_segment_meta: "
            << output_segment_meta->to_string_formatted() << std::endl;
  ASSERT_EQ(output_segment_meta->id(), 0);
  ASSERT_FALSE(output_segment_meta->writing_forward_block().has_value());
}

================================================
FILE: tests/db/index/segment/segment_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <filesystem>
#include <iostream>
#define private public
#define protected public
#include "db/index/segment/segment.h"
#undef private
#undef protected
#include <cstdint>
#include <memory>
#include <thread>
#include <arrow/array/array_binary.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/pretty_print.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include <zvec/ailego/buffer/buffer_manager.h>
#include "db/common/file_helper.h"
#include "db/index/common/delete_store.h"
#include "db/index/common/id_map.h"
#include "db/index/common/version_manager.h"
#include "db/index/storage/store_helper.h"
#include "db/index/storage/wal/wal_file.h"
#include "utils/utils.h"
#include "zvec/db/options.h"

using namespace zvec;

class SegmentTest : public testing::TestWithParam<bool> {
 protected:
  void SetUp() override {
    ailego::LoggerBroker::SetLevel(ailego::Logger::LEVEL_INFO);

    FileHelper::RemoveDirectory(col_path);
    FileHelper::CreateDirectory(col_path);

    ailego::BufferManager::Instance().init(MIN_MEMORY_LIMIT_BYTES, 1);

    std::string idmap_path =
        FileHelper::MakeFilePath(col_path, FileID::ID_FILE, 0);
    id_map = IDMap::CreateAndOpen(col_name, idmap_path, true, false);
    if (id_map == nullptr) {
      throw std::runtime_error("Failed to create id map");
    }

    std::string delete_store_path =
        FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
    delete_store = std::make_shared<DeleteStore>(col_name);

    schema =
        test::TestHelper::CreateSchemaWithScalarIndex(false, false, col_name);

    schema->add_field(
        std::make_shared<FieldSchema>("id", DataType::INT32, false));
    schema->add_field(
        std::make_shared<FieldSchema>("name", DataType::STRING, false));
    schema->add_field(
        std::make_shared<FieldSchema>("age", DataType::UINT32, false));

    schema->add_field(
        std::make_shared<FieldSchema>("binary", DataType::BINARY, false));

    schema->add_field(std::make_shared<FieldSchema>(
        "array_binary", DataType::ARRAY_BINARY, false));

    bool enable_mmap = GetParam();

    Version version;
    version.set_schema(*schema);
    version.set_enable_mmap(enable_mmap);
    auto version_manager_tmp = VersionManager::Create(col_path, version);
    if (!version_manager_tmp.has_value()) {
      throw std::runtime_error("Failed to create version manager");
    }

    version_manager = version_manager_tmp.value();

    // default options
    options.read_only_ = false;
    options.enable_mmap_ = enable_mmap;
    options.max_buffer_size_ = 64 * 1024 * 1024;
  }

  void TearDown() override {
    id_map.reset();
    delete_store.reset();
    version_manager.reset();

    // FileHelper::RemoveDirectory(col_path);
  }

 public:
  std::string GetColPath() {
    return col_path;
  }

 protected:
  std::string col_name = "test_segment";
  std::string col_path = "./test_collection";
  IDMap::Ptr id_map;
  DeleteStore::Ptr delete_store;
  VersionManager::Ptr version_manager;
  CollectionSchema::Ptr schema;
  SegmentOptions options;
};

TEST_P(SegmentTest, EmptySchema) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);
  EXPECT_EQ(segment->id(), 0);

  segment.reset();
}


TEST_P(SegmentTest, General) {
  options.max_buffer_size_ = 1 * 1024;

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 25);
  ASSERT_TRUE(segment != nullptr);

  auto combined_reader = segment->scan({LOCAL_ROW_ID, "id", "name", "age"});
  ASSERT_TRUE(combined_reader != nullptr);
  EXPECT_TRUE(combined_reader->schema() != nullptr);

  std::shared_ptr<arrow::RecordBatch> batch;
  uint32_t total_doc = 0;
  while (true) {
    auto status = combined_reader->ReadNext(&batch);
    if (status.ok() == false) break;
    if (batch == nullptr) break;

    EXPECT_EQ(batch->num_columns(), 4);

    total_doc += batch->num_rows();
  }
  EXPECT_EQ(total_doc, 25);

  std::vector<int> indices = {0, 3, 6, 1, 0, 14, 12, 21};
  auto combined_table = segment->fetch(
      {LOCAL_ROW_ID, "id", "name", "age", "binary", "array_binary"}, indices);
  ASSERT_TRUE(combined_table != nullptr);
  EXPECT_EQ(combined_table->num_columns(), 6);
  EXPECT_EQ(combined_table->num_rows(), 8);

  auto field = combined_table->schema()->field(0);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the LOCAL_ROW_ID column for each row
  auto id_column = combined_table->column(0);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> &expected_ids = indices;
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_P(SegmentTest, InsertMoreData) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);

  uint64_t MAX_DOC = 1000;
  auto start = std::chrono::system_clock::now();
  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);
  auto end = std::chrono::system_clock::now();
  auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
                  .count();
  std::cout << "insert cost " << cost << "ms" << std::endl;

  auto combined_reader = segment->scan({"id", "name", "age"});
  std::shared_ptr<arrow::RecordBatch> batch;
  uint32_t total_doc = 0;
  while (true) {
    auto status = combined_reader->ReadNext(&batch);
    if (status.ok() == false) break;
    if (batch == nullptr) break;
    total_doc += batch->num_rows();
  }

  EXPECT_EQ(total_doc, MAX_DOC);
}

TEST_P(SegmentTest, InsertScalarTypes) {
  auto tmp_schema =
      test::TestHelper::CreateSchemaWithScalarIndex(true, true, col_name);

  auto invert_params = std::make_shared<InvertIndexParams>(false);
  schema->add_field(std::make_shared<FieldSchema>("binary", DataType::BINARY,
                                                  false, invert_params));

  schema->add_field(std::make_shared<FieldSchema>(
      "array_binary", DataType::ARRAY_BINARY, false, invert_params));

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);
}

TEST_P(SegmentTest, InsertVectorTypes) {
  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(
      false, col_name,
      std::make_shared<HnswIndexParams>(MetricType::IP, 16, 20,
                                        QuantizeType::FP16));

  // first insert 100 doc
  int doc_count = 100;
  {
    auto segment = test::TestHelper::CreateSegmentWithDoc(
        col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,
        options, 0, doc_count);
    ASSERT_TRUE(segment != nullptr);
  }

  // Open
  {
    Version v = version_manager->get_current_version();
    auto result =
        Segment::Open(col_path, *tmp_schema, *v.writing_segment_meta(), id_map,
                      delete_store, version_manager, options);
    ASSERT_TRUE(result.has_value());
    auto segment = result.value();

    EXPECT_GT(segment->get_vector_indexer("dense_fp32").size(), 0);
    EXPECT_GT(segment->get_quant_vector_indexer("dense_fp32").size(), 0);
  }
}

TEST_P(SegmentTest, FetchByGlobalDocID) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 1);
  ASSERT_TRUE(segment != nullptr);

  auto ret_doc = segment->Fetch(0);
  EXPECT_TRUE(ret_doc != nullptr);
  EXPECT_EQ(ret_doc->doc_id(), 0);
  EXPECT_EQ(ret_doc->pk(), "pk_0");
}

TEST_P(SegmentTest, FetchSingleRow) {
  int doc_count = 10;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  auto func = [&](int index) -> void {
    ExecBatchPtr batch = segment->fetch({"id", "name", "age"}, index);
    ASSERT_TRUE(batch != nullptr);
    EXPECT_EQ(batch->length, 1);
    EXPECT_EQ(batch->values.size(), 3);

    auto id_scalar = batch->values[0].scalar();
    ASSERT_TRUE(id_scalar != nullptr);
    auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
    ASSERT_TRUE(id_value != nullptr);
    EXPECT_EQ(id_value->value, index);
  };

  for (int i = 0; i < doc_count; ++i) {
    func(i);
  }
}

TEST_P(SegmentTest, FetchSingleRowWithPersistStore) {
  // first insert 1000 doc
  int doc_count = 1000;
  {
    auto segment = test::TestHelper::CreateSegmentWithDoc(
        col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
        0, doc_count);
    ASSERT_TRUE(segment != nullptr);
  }

  // Open
  {
    Version v = version_manager->get_current_version();
    SegmentOptions options;
    options.read_only_ = false;
    auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),
                                id_map, delete_store, version_manager, options);
    ASSERT_TRUE(result.has_value());
    auto segment = result.value();

    test::TestHelper::SegmentInsertDoc(segment, *schema, doc_count,
                                       doc_count * 2);

    auto func = [&](int index) -> void {
      ExecBatchPtr batch = segment->fetch({"id", "name", "age"}, index);
      ASSERT_TRUE(batch != nullptr);
      EXPECT_EQ(batch->length, 1);
      EXPECT_EQ(batch->values.size(), 3);

      auto id_scalar = batch->values[0].scalar();
      ASSERT_TRUE(id_scalar != nullptr);
      auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
      ASSERT_TRUE(id_value != nullptr);
      EXPECT_EQ(id_value->value, index);
    };

    for (int i = 0; i < doc_count * 2; ++i) {
      func(i);
    }
  }
}

TEST_P(SegmentTest, FetchSingleRowWithUserID) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({USER_ID, "id", "name"}, 2);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto user_id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(user_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::StringScalar>(user_id_scalar) !=
              nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithGlobalDocID) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({GLOBAL_DOC_ID, "id", "name"}, 4);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto global_doc_id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(global_doc_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(
                  global_doc_id_scalar) != nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithLocalRowID) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({LOCAL_ROW_ID, "id", "name"}, 4);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto local_doc_id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(local_doc_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(
                  local_doc_id_scalar) != nullptr);
  auto local_doc_id_value =
      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);
  EXPECT_EQ(local_doc_id_value->value, 4);
}

TEST_P(SegmentTest, FetchSingleRowWithLocalRowIDMiddle) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", LOCAL_ROW_ID, "name"}, 4);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto local_doc_id_scalar = batch->values[1].scalar();
  ASSERT_TRUE(local_doc_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(
                  local_doc_id_scalar) != nullptr);
  auto local_doc_id_value =
      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);
  EXPECT_EQ(local_doc_id_value->value, 4);
}

TEST_P(SegmentTest, FetchSingleRowWithLocalRowIDEnd) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", "name", LOCAL_ROW_ID}, 4);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto local_doc_id_scalar = batch->values[2].scalar();
  ASSERT_TRUE(local_doc_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(
                  local_doc_id_scalar) != nullptr);
  auto local_doc_id_value =
      std::dynamic_pointer_cast<arrow::UInt64Scalar>(local_doc_id_scalar);
  EXPECT_EQ(local_doc_id_value->value, 4);
}

TEST_P(SegmentTest, CheckOrderWithLocalRowID) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  auto combined_reader = segment->scan({LOCAL_ROW_ID, "id", "name"});
  ASSERT_TRUE(combined_reader != nullptr);
  EXPECT_TRUE(combined_reader->schema() != nullptr);

  std::shared_ptr<arrow::RecordBatch> batch;
  uint32_t total_doc = 0;
  while (true) {
    auto status = combined_reader->ReadNext(&batch);
    if (status.ok() == false) break;
    if (batch == nullptr) break;
    EXPECT_EQ(batch->num_columns(), 3);
    EXPECT_EQ(batch->column(0)->type()->id(), arrow::Type::UINT64);
    EXPECT_EQ(batch->column_name(0), LOCAL_ROW_ID);
    total_doc += batch->num_rows();
  }
  EXPECT_EQ(total_doc, 10);


  std::vector<int> indices = {0, 3, 6, 1, 0};
  auto combined_table = segment->fetch({LOCAL_ROW_ID, "id", "name"}, indices);
  ASSERT_TRUE(combined_table != nullptr);
  EXPECT_EQ(combined_table->num_columns(), 3);
  EXPECT_EQ(combined_table->num_rows(), 5);

  auto field = combined_table->schema()->field(0);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the LOCAL_ROW_ID column for each row
  auto id_column = combined_table->column(0);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> &expected_ids = indices;
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_P(SegmentTest, CheckOrderWithLocalRowIDMiddle) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  auto combined_reader = segment->scan({"id", LOCAL_ROW_ID, "name"});
  ASSERT_TRUE(combined_reader != nullptr);
  EXPECT_TRUE(combined_reader->schema() != nullptr);

  std::shared_ptr<arrow::RecordBatch> batch;
  uint32_t total_doc = 0;
  while (true) {
    auto status = combined_reader->ReadNext(&batch);
    if (status.ok() == false) break;
    if (batch == nullptr) break;

    EXPECT_EQ(batch->num_columns(), 3);
    EXPECT_EQ(batch->column(1)->type()->id(), arrow::Type::UINT64);
    EXPECT_EQ(batch->column_name(1), LOCAL_ROW_ID);

    total_doc += batch->num_rows();
  }
  EXPECT_EQ(total_doc, 10);

  std::vector<int> indices = {0, 3, 6, 1, 0};
  auto combined_table = segment->fetch({"id", LOCAL_ROW_ID, "name"}, indices);
  ASSERT_TRUE(combined_table != nullptr);
  EXPECT_EQ(combined_table->num_columns(), 3);
  EXPECT_EQ(combined_table->num_rows(), 5);

  auto field = combined_table->schema()->field(1);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the LOCAL_ROW_ID column for each row
  auto id_column = combined_table->column(1);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> &expected_ids = indices;
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_P(SegmentTest, CheckOrderWithLocalRowIDEnd) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  auto combined_reader = segment->scan({"id", "name", LOCAL_ROW_ID});
  ASSERT_TRUE(combined_reader != nullptr);
  EXPECT_TRUE(combined_reader->schema() != nullptr);

  std::shared_ptr<arrow::RecordBatch> batch;
  uint32_t total_doc = 0;
  while (true) {
    auto status = combined_reader->ReadNext(&batch);
    if (status.ok() == false) break;
    if (batch == nullptr) break;

    EXPECT_EQ(batch->num_columns(), 3);
    EXPECT_EQ(batch->column(2)->type()->id(), arrow::Type::UINT64);
    EXPECT_EQ(batch->column_name(2), LOCAL_ROW_ID);

    total_doc += batch->num_rows();
  }
  EXPECT_EQ(total_doc, 10);

  std::vector<int> indices = {0, 3, 6, 1, 0};
  auto combined_table = segment->fetch({"id", "name", LOCAL_ROW_ID}, indices);
  ASSERT_TRUE(combined_table != nullptr);
  EXPECT_EQ(combined_table->num_columns(), 3);
  EXPECT_EQ(combined_table->num_rows(), 5);

  auto field = combined_table->schema()->field(2);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the LOCAL_ROW_ID column for each row
  auto id_column = combined_table->column(2);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> &expected_ids = indices;
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_P(SegmentTest, FetchSingleRowWithNegativeIndex) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", "name"}, -1);
  EXPECT_EQ(batch, nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithOutOfRangeIndex) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", "name"}, 15);
  EXPECT_EQ(batch, nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithInvalidColumn) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", "invalid_column"}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithEmptyColumns) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_P(SegmentTest, FetchSingleRowFromEmptySegment) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"id", "name"}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_P(SegmentTest, FetchSingleRowWithBinaryFields) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  ExecBatchPtr batch = segment->fetch({"binary", "array_binary"}, 1);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 2);

  auto binary_scalar = batch->values[0].scalar();
  ASSERT_TRUE(binary_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::BinaryScalar>(binary_scalar) !=
              nullptr);

  auto array_binary_scalar = batch->values[1].scalar();
  ASSERT_TRUE(array_binary_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::ListScalar>(
                  array_binary_scalar) != nullptr);
}

TEST_P(SegmentTest, Recover) {
  // first insert 100 doc
  int doc_count = 100;
  {
    auto segment = test::TestHelper::CreateSegmentWithDoc(
        col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
        0, doc_count);
    ASSERT_TRUE(segment != nullptr);
  }

  // simulate wal file
  {
    Version v = version_manager->get_current_version();
    auto writing_block_id =
        v.writing_segment_meta()->writing_forward_block_->id();
    auto wal_file = FileHelper::MakeWalPath(col_path, 0, writing_block_id);
    WalOptions wal_option{0, true};
    WalFilePtr wal_file_;
    WalFile::CreateAndOpen(wal_file, wal_option, &wal_file_);
    ASSERT_TRUE(wal_file_ != nullptr);

    for (int i = doc_count; i < doc_count + 100; i++) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      doc.set_operator(Operator::INSERT);
      std::vector<uint8_t> buf = doc.serialize();
      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));
      ASSERT_EQ(ret, 0);
    }

    for (int i = 0; i < doc_count; i++) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      doc.set_doc_id(i);  // global doc id
      doc.set_operator(Operator::UPDATE);
      std::vector<uint8_t> buf = doc.serialize();
      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));
      ASSERT_EQ(ret, 0);
    }

    for (int i = 0; i < doc_count; i++) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      doc.set_operator(Operator::UPSERT);
      std::vector<uint8_t> buf = doc.serialize();
      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));
      ASSERT_EQ(ret, 0);
    }

    for (int i = 0; i < doc_count; i++) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      doc.set_doc_id(i + 300);  // global doc id
      doc.set_operator(Operator::DELETE);
      std::vector<uint8_t> buf = doc.serialize();
      auto ret = wal_file_->append(std::string(buf.begin(), buf.end()));
      ASSERT_EQ(ret, 0);
    }
  }

  // recover
  {
    Version v = version_manager->get_current_version();
    SegmentOptions options;
    options.read_only_ = false;
    auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),
                                id_map, delete_store, version_manager, options);
    ASSERT_TRUE(result.has_value());
    auto segment = result.value();

    auto combined_reader = segment->scan({"id"});
    std::shared_ptr<arrow::RecordBatch> batch;
    uint32_t total_doc = 0;
    while (true) {
      auto status = combined_reader->ReadNext(&batch);
      if (status.ok() == false) break;
      if (batch == nullptr) break;

      total_doc += batch->num_rows();
      EXPECT_EQ(batch->num_columns(), 1);
    }
    // Why 400 ? because in segment we just mark deleted doc
    EXPECT_EQ(total_doc, 400);

    // auto filter = segment->get_filter();
    auto filter = delete_store->make_filter();
    auto actual_doc_count = segment->doc_count(filter);
    EXPECT_EQ(actual_doc_count, 100);
  }
}

TEST_P(SegmentTest, UpdateDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 10);

  // Create a new document to update
  Doc update_doc = test::TestHelper::CreateDoc(5, *schema);
  update_doc.set<std::string>("name", "updated_name");
  update_doc.set<uint32_t>("age", 99);

  // Update the document
  auto status = segment->Update(update_doc);
  EXPECT_TRUE(status.ok()) << "Update failed: " << status.message();

  // after update
  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 10);

  // Fetch the updated document and verify changes
  // Note: The parameter here is the internal global_doc_id, not user-specified
  auto ret_doc = segment->Fetch(10);
  EXPECT_TRUE(ret_doc != nullptr);
  EXPECT_EQ(ret_doc->get<std::string>("name"), "updated_name");
  EXPECT_EQ(ret_doc->get<uint32_t>("age"), 99);
}

TEST_P(SegmentTest, UpdateDocBatch) {
  int doc_count = 10;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);
  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, doc_count);

  // Create a new document to update
  for (int i = 0; i < doc_count; i++) {
    Doc update_doc = test::TestHelper::CreateDoc(i, *schema);
    // Update the document
    auto status = segment->Update(update_doc);
    EXPECT_TRUE(status.ok()) << "Update failed: " << status.message();
  }

  // after update
  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, doc_count);

  // Fetch the updated document and verify changes
  // Note: The parameter here is the internal global_doc_id, not user-specified
  auto ret_doc = segment->Fetch(doc_count * 2 - 1);
  EXPECT_TRUE(ret_doc != nullptr);
  EXPECT_EQ(ret_doc->get<std::string>("name"),
            "value_" + std::to_string(doc_count - 1));
}

TEST_P(SegmentTest, DeleteDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 10);

  // Delete a document by primary key
  auto status = segment->Delete("pk_5");
  EXPECT_TRUE(status.ok()) << "Delete by pk failed: " << status.message();

  // after delete
  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 9);

  // Delete a document by global doc id
  status = segment->Delete(3);
  EXPECT_TRUE(status.ok()) << "Delete by global doc id failed: "
                           << status.message();

  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 8);
}

TEST_P(SegmentTest, DeleteBatch) {
  int doc_count = 10;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, doc_count);

  for (int i = 0; i < doc_count; i++) {
    auto status = segment->Delete("pk_" + std::to_string(i));
    EXPECT_TRUE(status.ok()) << "Delete by pk failed: " << status.message();
  }

  // after delete
  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 0);
}


TEST_P(SegmentTest, UpsertDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 5);

  // Upsert an existing document
  Doc upsert_doc1 = test::TestHelper::CreateDoc(3, *schema);
  upsert_doc1.set<std::string>("name", "upserted_name");
  auto status = segment->Upsert(upsert_doc1);
  EXPECT_TRUE(status.ok()) << "Upsert existing doc failed: "
                           << status.message();

  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 5);

  // Verify the update
  auto ret_doc = segment->Fetch(5);
  EXPECT_TRUE(ret_doc != nullptr);
  EXPECT_EQ(ret_doc->get<std::string>("name"), "upserted_name");

  // Upsert a new document
  Doc upsert_doc2 = test::TestHelper::CreateDoc(6, *schema);
  upsert_doc2.set<std::string>("name", "new_upserted_doc");
  status = segment->Upsert(upsert_doc2);
  EXPECT_TRUE(status.ok()) << "Upsert new doc failed: " << status.message();

  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 6);

  // Verify the new document was inserted
  ret_doc = segment->Fetch(6);
  EXPECT_TRUE(ret_doc != nullptr);
  EXPECT_EQ(ret_doc->get<std::string>("name"), "new_upserted_doc");
}

TEST_P(SegmentTest, UpsertDocBatch) {
  int doc_count = 10;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  // before update
  uint64_t count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, doc_count);

  for (int i = 0; i < doc_count; i++) {
    // Upsert existing document
    Doc upsert_doc1 = test::TestHelper::CreateDoc(i, *schema);
    upsert_doc1.set<std::string>("name", "upserted_name" + std::to_string(i));
    auto status = segment->Upsert(upsert_doc1);
    EXPECT_TRUE(status.ok())
        << "Upsert existing doc failed: " << status.message();

    // Upsert new document
    Doc upsert_doc2 = test::TestHelper::CreateDoc(doc_count + i, *schema);
    upsert_doc2.set<std::string>("name",
                                 "new_upserted_doc" + std::to_string(i));
    status = segment->Upsert(upsert_doc2);
    EXPECT_TRUE(status.ok()) << "Upsert new doc failed: " << status.message();
  }

  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, doc_count * 2);

  int incr_idx = 0;
  for (int i = doc_count; i < doc_count + doc_count * 2; i += 2) {
    // Verify the update
    auto ret_doc = segment->Fetch(i);
    EXPECT_TRUE(ret_doc != nullptr);
    EXPECT_EQ(ret_doc->get<std::string>("name"),
              "upserted_name" + std::to_string(incr_idx));

    // Verify the new document was inserted
    ret_doc = segment->Fetch(i + 1);
    EXPECT_EQ(ret_doc->get<std::string>("name"),
              "new_upserted_doc" + std::to_string(incr_idx));
    incr_idx++;
  }
}

TEST_P(SegmentTest, Flush) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 100);
  ASSERT_TRUE(segment != nullptr);

  // Flush the segment
  auto status = segment->flush();
  EXPECT_TRUE(status.ok()) << "Flush failed: " << status.message();
}

TEST_P(SegmentTest, FlushAfterInsert) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 100);
  ASSERT_TRUE(segment != nullptr);

  // Flush the segment
  auto status = segment->flush();
  EXPECT_TRUE(status.ok()) << "Flush failed: " << status.message();

  test::TestHelper::SegmentInsertDoc(segment, *schema, 100, 150);

  ASSERT_EQ(segment->doc_count(), 150);

  for (int i = 0; i < 150; i++) {
    auto ret_doc = segment->Fetch(i);
    EXPECT_TRUE(ret_doc != nullptr);

    Doc verify_doc = test::TestHelper::CreateDoc(i, *schema);
    auto vv = verify_doc.get<std::vector<float>>("dense_fp32").value();
    auto v = ret_doc->get<std::vector<float>>("dense_fp32").value();
    for (uint32_t j = 0; j < vv.size(); j++) {
      ASSERT_FLOAT_EQ(v[j], vv[j]);
    }
  }
}

TEST_P(SegmentTest, Dump) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 100);
  ASSERT_TRUE(segment != nullptr);

  // Dump the segment
  auto status = segment->dump();
  EXPECT_TRUE(status.ok()) << "Flush failed: " << status.message();

  status = segment->dump();
  EXPECT_FALSE(status.ok());
  EXPECT_EQ(status.code(), StatusCode::NOT_SUPPORTED);
}

TEST_P(SegmentTest, DocCount) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 50);
  ASSERT_TRUE(segment != nullptr);

  // Get document count
  uint64_t count = segment->doc_count();
  EXPECT_EQ(count, 50);

  // Delete some documents
  segment->Delete("pk_10");
  segment->Delete("pk_20");
  segment->Delete("pk_30");

  // Get document count again
  count = segment->doc_count(segment->get_filter());
  EXPECT_EQ(count, 47);
}

// TEST_P(SegmentTest, Insert100WData) {
//   options.max_buffer_size_ = 8 * 1024 * 1024;

//   auto segment = test::TestHelper::CreateSegmentWithDoc(
//       col_path, *schema, 0, 0, id_map, delete_store, version_manager,
//       options, 0, 0);
//   ASSERT_TRUE(segment != nullptr);

//   uint64_t MAX_DOC = 1000000;
//   auto start = std::chrono::system_clock::now();
//   test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);
//   auto end = std::chrono::system_clock::now();
//   auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end -
//   start)
//                   .count();
//   std::cout << "insert cost " << cost << "ms" << std::endl;

//   start = std::chrono::system_clock::now();
//   ;
//   auto combined_reader = segment->scan(
//       {"id", "name", "age", USER_ID, GLOBAL_DOC_ID, LOCAL_ROW_ID});
//   std::shared_ptr<arrow::RecordBatch> batch;
//   uint32_t total_doc = 0;
//   while (true) {
//     auto status = combined_reader->ReadNext(&batch);
//     if (status.ok() == false) break;
//     if (batch == nullptr) break;
//     total_doc += batch->num_rows();
//   }
//   end = std::chrono::system_clock::now();
//   cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
//              .count();
//   std::cout << "scan cost " << cost << "ms" << std::endl;

//   EXPECT_EQ(total_doc, MAX_DOC);
// }

TEST_P(SegmentTest, CombinedVectorColumnIndexer) {
  options.max_buffer_size_ = 10 * 1024;

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);


  uint64_t MAX_DOC = 1000;
  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);

  Doc new_doc = test::TestHelper::CreateDoc(1000, *schema);
  auto status = segment->Insert(new_doc);
  ASSERT_TRUE(status.ok());

  auto combined_indexer = segment->get_combined_vector_indexer("dense_fp32");
  ASSERT_TRUE(combined_indexer != nullptr);

  // fetch
  auto fetched_data = combined_indexer->Fetch(1000);
  ASSERT_TRUE(fetched_data);
  const float *dense_vector = reinterpret_cast<const float *>(
      std::get<vector_column_params::DenseVectorBuffer>(
          fetched_data->vector_buffer)
          .data.data());

  auto vv = new_doc.get<std::vector<float>>("dense_fp32").value();

  for (uint32_t i = 0; i < vv.size(); i++) {
    ASSERT_FLOAT_EQ(dense_vector[i], vv[i]);
  }

  // query
  auto dense_fp32_field = schema->get_field("dense_fp32");
  auto query_vector = new_doc.get<std::vector<float>>("dense_fp32").value();
  auto query = vector_column_params::VectorData{
      vector_column_params::DenseVector{.data = query_vector.data()}};
  auto query_params = vector_column_params::QueryParams{
      .dimension = dense_fp32_field->dimension(),
      .topk = 10,
      .filter = nullptr,
      .fetch_vector = false};
  auto results = combined_indexer->Search(query, query_params);
  ASSERT_TRUE(results.has_value());

  auto vector_results =
      dynamic_cast<VectorIndexResults *>(results.value().get());
  ASSERT_TRUE(vector_results);
  ASSERT_EQ(vector_results->count(), 10);

  int count = 0;
  auto iter = vector_results->create_iterator();
  while (iter->valid()) {
    count++;
    iter->next();
  }
  ASSERT_EQ(count, 10);
}

TEST_P(SegmentTest, CombinedVectorColumnIndexerWithQuantVectorIndex) {
  options.max_buffer_size_ = 10 * 1024;

  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(
      false, "demo",
      std::make_shared<HnswIndexParams>(MetricType::IP, 16, 20,
                                        QuantizeType::FP16));

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,
      options, 0, 0);
  ASSERT_TRUE(segment != nullptr);


  uint64_t MAX_DOC = 1000;
  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);

  Doc new_doc = test::TestHelper::CreateDoc(1000, *schema);
  auto status = segment->Insert(new_doc);
  ASSERT_TRUE(status.ok());

  auto combined_indexer =
      segment->get_quant_combined_vector_indexer("dense_fp32");
  ASSERT_TRUE(combined_indexer != nullptr);

  // fetch
  auto fetched_data = combined_indexer->Fetch(1000);
  ASSERT_TRUE(fetched_data);
  const float *dense_vector = reinterpret_cast<const float *>(
      std::get<vector_column_params::DenseVectorBuffer>(
          fetched_data->vector_buffer)
          .data.data());

  auto vv = new_doc.get<std::vector<float>>("dense_fp32").value();

  for (uint32_t i = 0; i < vv.size(); i++) {
    EXPECT_NEAR(dense_vector[i], vv[i], 0.1);
  }

  // query
  auto dense_fp32_field = schema->get_field("dense_fp32");
  auto query_vector = new_doc.get<std::vector<float>>("dense_fp32").value();
  auto query = vector_column_params::VectorData{
      vector_column_params::DenseVector{.data = query_vector.data()}};
  auto query_params = vector_column_params::QueryParams{
      .dimension = dense_fp32_field->dimension(),
      .topk = 10,
      .filter = nullptr,
      .fetch_vector = false,
      .query_params = std::make_shared<zvec::QueryParams>(IndexType::HNSW)};
  query_params.query_params->set_is_using_refiner(true);

  auto results = combined_indexer->Search(query, query_params);
  ASSERT_TRUE(results.has_value());

  auto vector_results =
      dynamic_cast<VectorIndexResults *>(results.value().get());
  ASSERT_TRUE(vector_results);
  ASSERT_EQ(vector_results->count(), 10);

  int count = 0;
  auto iter = vector_results->create_iterator();
  while (iter->valid()) {
    count++;
    iter->next();
  }
  ASSERT_EQ(count, 10);
}

TEST_P(SegmentTest, CombinedVectorColumnIndexerQueryWithPks) {
  options.max_buffer_size_ = 10 * 1024;

  auto tmp_schema = test::TestHelper::CreateSchemaWithVectorIndex(
      false, "demo", std::make_shared<HnswIndexParams>(MetricType::IP));

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *tmp_schema, 0, 0, id_map, delete_store, version_manager,
      options, 0, 0);
  ASSERT_TRUE(segment != nullptr);


  uint64_t MAX_DOC = 1000;
  test::TestHelper::SegmentInsertDoc(segment, *schema, 0, MAX_DOC);

  auto combined_indexer = segment->get_combined_vector_indexer("dense_fp32");
  ASSERT_TRUE(combined_indexer != nullptr);

  Doc verify_doc = test::TestHelper::CreateDoc(999, *schema);
  std::vector<std::vector<uint64_t>> bf_pks = {
      {10, 20, 30, 40, 50, 60, 70, 80, 90, 999}};
  // query
  auto dense_fp32_field = schema->get_field("dense_fp32");
  auto query_vector = verify_doc.get<std::vector<float>>("dense_fp32").value();
  auto query = vector_column_params::VectorData{
      vector_column_params::DenseVector{.data = query_vector.data()}};
  auto query_params = vector_column_params::QueryParams{
      .data_type = dense_fp32_field->data_type(),
      .dimension = dense_fp32_field->dimension(),
      .topk = 10,
      .filter = nullptr,
      .fetch_vector = false,
      .query_params = std::make_shared<zvec::QueryParams>(IndexType::HNSW),
      .group_by = nullptr,
      .bf_pks = bf_pks,
      .refiner_param = nullptr,
      .extra_params = {}};

  auto results = combined_indexer->Search(query, query_params);
  ASSERT_TRUE(results.has_value());

  auto vector_results =
      dynamic_cast<VectorIndexResults *>(results.value().get());
  ASSERT_TRUE(vector_results);
  ASSERT_EQ(vector_results->count(), 10);

  int count = 0;
  std::vector<uint64_t> result_doc_ids;
  auto iter = vector_results->create_iterator();
  while (iter->valid()) {
    count++;
    result_doc_ids.push_back(iter->doc_id());
    iter->next();
  }
  ASSERT_EQ(count, 10);
  // need reverse result_doc_ids
  std::reverse(result_doc_ids.begin(), result_doc_ids.end());
  ASSERT_EQ(result_doc_ids, bf_pks[0]);
}


TEST_P(SegmentTest, ConcurrentInsertOperations) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);

  const int num_threads = 4;
  const int docs_per_thread = 50;
  std::vector<std::thread> threads;

  // Launch multiple threads to insert documents concurrently
  for (int t = 0; t < num_threads; ++t) {
    threads.emplace_back([&, t]() {
      for (int i = 0; i < docs_per_thread; ++i) {
        int doc_id = t * docs_per_thread + i;
        Doc doc = test::TestHelper::CreateDoc(doc_id, *schema);
        auto status = segment->Insert(doc);
        EXPECT_TRUE(status.ok())
            << "Thread " << t << " insert failed for doc " << doc_id;
      }
    });
  }

  // Wait for all threads to complete
  for (auto &thread : threads) {
    thread.join();
  }

  // Verify total document count
  uint64_t count = segment->doc_count();
  EXPECT_EQ(count, num_threads * docs_per_thread);
}

TEST_P(SegmentTest, ConcurrentMixedOperations) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 100);
  ASSERT_TRUE(segment != nullptr);

  std::vector<std::thread> threads;

  // Thread 1: Insert new documents
  threads.emplace_back([&]() {
    for (int i = 100; i < 120; ++i) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      auto status = segment->Insert(doc);
      EXPECT_TRUE(status.ok() || status.code() == StatusCode::ALREADY_EXISTS);
    }
  });

  // Thread 2: Update existing documents
  threads.emplace_back([&]() {
    for (int i = 0; i < 50; i += 5) {
      Doc doc = test::TestHelper::CreateDoc(i, *schema);
      doc.set<std::string>("name", "updated_concurrent_" + std::to_string(i));
      auto status = segment->Update(doc);
      EXPECT_TRUE(status.ok() || status.code() == StatusCode::NOT_FOUND);
    }
  });

  // Thread 3: Delete documents
  threads.emplace_back([&]() {
    for (int i = 50; i < 100; i += 10) {
      auto status = segment->Delete("pk_" + std::to_string(i));
      EXPECT_TRUE(status.ok() || status.code() == StatusCode::NOT_FOUND);
    }
  });

  // Wait for all threads to complete
  for (auto &thread : threads) {
    thread.join();
  }
}

// corner cases
TEST_P(SegmentTest, DuplicateInsert) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 0);
  ASSERT_TRUE(segment != nullptr);

  Doc doc1 = test::TestHelper::CreateDoc(0, *schema);
  auto status1 = segment->Insert(doc1);
  EXPECT_TRUE(status1.ok()) << "First insert failed: " << status1.message();

  auto meta = segment->meta();
  ASSERT_TRUE(meta != nullptr);
  auto &mem_block = meta->writing_forward_block().value();
  EXPECT_EQ(mem_block.doc_count_, 1);
  EXPECT_EQ(mem_block.min_doc_id_, 0);
  EXPECT_EQ(mem_block.max_doc_id_, 0);

  auto doc = segment->Fetch(0);
  EXPECT_TRUE(doc != nullptr);
  EXPECT_EQ(*doc, doc1);

  auto status2 = segment->Insert(doc1);
  EXPECT_FALSE(status2.ok()) << "Duplicate insert should fail";

  auto fetched_doc = segment->Fetch(0);
  ASSERT_TRUE(fetched_doc != nullptr);
  EXPECT_NE(fetched_doc->get<std::string>("name").value(), "duplicate_name");
}

TEST_P(SegmentTest, DuplicateDelete) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  auto status1 = segment->Delete("pk_2");
  EXPECT_TRUE(status1.ok()) << "First delete failed: " << status1.message();

  auto status2 = segment->Delete("pk_2");
  EXPECT_FALSE(status2.ok()) << "Duplicate delete should fail";

  auto status3 = segment->Delete(2);
  EXPECT_FALSE(status3.ok())
      << "Delete by doc_id of already deleted doc should fail";
}

TEST_P(SegmentTest, DeleteNonExistentDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  auto status1 = segment->Delete("pk_999");
  EXPECT_FALSE(status1.ok()) << "Delete non-existent pk should fail";
}

TEST_P(SegmentTest, UpdateNonExistentDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  Doc doc = test::TestHelper::CreateDoc(999, *schema);
  doc.set<std::string>("name", "non_existent_doc");

  auto status = segment->Update(doc);
  EXPECT_FALSE(status.ok()) << "Update non-existent doc should fail";
}

TEST_P(SegmentTest, UpsertNonExistentDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  Doc doc = test::TestHelper::CreateDoc(999, *schema);
  doc.set<std::string>("name", "new_upserted_doc");

  auto status = segment->Upsert(doc);
  EXPECT_TRUE(status.ok()) << "Upsert non-existent doc should succeed: "
                           << status.message();

  auto filter = segment->get_filter();
  uint64_t count = segment->doc_count(filter);
  EXPECT_EQ(count, 6);
}

TEST_P(SegmentTest, ScanWithEmptyColumns) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  auto reader = segment->scan({});
  ASSERT_TRUE(reader == nullptr);
}

TEST_P(SegmentTest, ScanWithInvalidColumns) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  // Try to scan with invalid column name
  auto reader = segment->scan({"invalid_column"});
  EXPECT_TRUE(reader == nullptr);
}

TEST_P(SegmentTest, FetchNonExistentDoc) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  auto doc = segment->Fetch(999);
  EXPECT_TRUE(doc == nullptr) << "Fetch non-existent doc should return nullptr";
}

TEST_P(SegmentTest, FetchWithInvalidIndices) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  std::vector<int> invalid_indices = {999, 1000};
  auto table = segment->fetch({"id", "name"}, invalid_indices);

  ASSERT_TRUE(table == nullptr);
}

TEST_P(SegmentTest, FetchWithInvalidColumns) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 10);
  ASSERT_TRUE(segment != nullptr);

  // Try to fetch with invalid column name
  std::vector<int> indices = {0, 1, 2};
  auto table = segment->fetch({"invalid_column"}, indices);
  EXPECT_TRUE(table == nullptr);
}

TEST_P(SegmentTest, InsertEmptyDocWithNullableSchema) {
  auto nullable_schema = test::TestHelper::CreateNormalSchema(true, col_name);

  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *nullable_schema, 0, 0, id_map, delete_store, version_manager,
      options, 0, 0);
  ASSERT_TRUE(segment != nullptr);

  Doc empty_doc;
  empty_doc.set_pk("pk_empty");
  auto status = segment->Insert(empty_doc);
  EXPECT_TRUE(status.ok());
}

TEST_P(SegmentTest, MultipleDuplicateDeletes) {
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, 5);
  ASSERT_TRUE(segment != nullptr);

  auto status1 = segment->Delete("pk_1");
  EXPECT_TRUE(status1.ok());

  for (int i = 0; i < 10; ++i) {
    auto status = segment->Delete("pk_1");
    EXPECT_FALSE(status.ok()) << "Delete iteration " << i << " should fail";
  }

  auto filter = segment->get_filter();
  uint64_t count = segment->doc_count(filter);
  EXPECT_EQ(count, 4);
}

TEST_P(SegmentTest, FetchWithTwoVectorFields) {
  schema->add_field(std::make_shared<FieldSchema>(
      "dense2_fp32", DataType::VECTOR_FP32, 128, false,
      std::make_shared<FlatIndexParams>(MetricType::IP)));

  int doc_count = 1000;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);
  segment.reset();
  version_manager.reset();
  id_map->flush();
  id_map.reset();

  std::string delete_store_path =
      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
  delete_store->flush(delete_store_path);
  delete_store.reset();

  auto recover_version_manager = VersionManager::Recovery(col_path);
  auto recover_version_mgr = recover_version_manager.value();
  ASSERT_TRUE(recover_version_mgr != nullptr);

  auto v = recover_version_mgr->get_current_version();

  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,
                                                    v.id_map_path_suffix());
  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);
  auto status = recover_id_map->open(idmap_path, false, false);
  ASSERT_TRUE(status.ok());

  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,
                                               v.delete_snapshot_path_suffix());
  auto recover_delete_store =
      DeleteStore::CreateAndLoad(col_name, delete_store_path);
  ASSERT_TRUE(recover_delete_store != nullptr);

  int incr_doc_count = 1000;
  auto result = Segment::Open(col_path, *schema, *v.writing_segment_meta(),
                              recover_id_map, recover_delete_store,
                              recover_version_mgr, options);
  ASSERT_TRUE(result.has_value());
  segment = std::move(result).value();
  ASSERT_TRUE(segment != nullptr);

  auto s = test::TestHelper::SegmentInsertDoc(
      segment, *schema, doc_count, doc_count + incr_doc_count, false);
  ASSERT_TRUE(s.ok());

  for (int i = 0; i < doc_count + incr_doc_count; i++) {
    auto expect_doc = test::TestHelper::CreateDoc(i, *schema);
    auto ret_doc = segment->Fetch(i);
    if (*ret_doc != expect_doc) {
      std::cout << "   ret_doc: " << ret_doc->to_string() << std::endl;
      std::cout << "expect_doc: " << expect_doc.to_string() << std::endl;
    }
    ASSERT_EQ(*ret_doc, expect_doc);
  }
}

TEST_P(SegmentTest, FetchPerf) {
  // create segment
  int doc_count = 1000;
  options.max_buffer_size_ = 100 * 1024;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  segment->dump();
  auto writing_segment_meta = segment->meta();

  // convert writing segment meta to persisted segment meta
  Version version = version_manager->get_current_version();
  writing_segment_meta->remove_writing_forward_block();
  auto s = version.add_persisted_segment_meta(writing_segment_meta);
  ASSERT_TRUE(s.ok());

  s = version_manager->apply(version);
  ASSERT_TRUE(s.ok());
  s = version_manager->flush();
  ASSERT_TRUE(s.ok());

  segment.reset();
  version_manager.reset();
  id_map->flush();
  id_map.reset();

  std::string delete_store_path =
      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
  delete_store->flush(delete_store_path);
  delete_store.reset();

  auto recover_version_manager = VersionManager::Recovery(col_path);
  auto recover_version_mgr = recover_version_manager.value();
  ASSERT_TRUE(recover_version_mgr != nullptr);

  Version v = recover_version_mgr->get_current_version();
  const auto &persist_metas = v.persisted_segment_metas();
  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,
                                                    v.id_map_path_suffix());
  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);
  auto status = recover_id_map->open(idmap_path, false, false);
  ASSERT_TRUE(status.ok());

  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,
                                               v.delete_snapshot_path_suffix());
  auto recover_delete_store =
      DeleteStore::CreateAndLoad(col_name, delete_store_path);
  ASSERT_TRUE(recover_delete_store != nullptr);

  // open persist segment
  options.read_only_ = true;
  auto result =
      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,
                    recover_delete_store, recover_version_mgr, options);
  ASSERT_TRUE(result.has_value());
  segment = std::move(result).value();
  ASSERT_TRUE(segment != nullptr);

  s = segment->add_column(
      std::make_shared<FieldSchema>("add_int32", DataType::INT32, false),
      "int32 + 1", AddColumnOptions());
  EXPECT_TRUE(s.ok());

  std::vector<int> indices = {0, 3, 6, 1, 0, 501, 999};
  auto func = [&](const std::vector<std::string> columns,
                  int local_row_id_idx) -> void {
    auto combined_table = segment->fetch(columns, indices);
    ASSERT_TRUE(combined_table != nullptr);
    EXPECT_EQ(combined_table->num_columns(), columns.size());
    EXPECT_EQ(combined_table->num_rows(), indices.size());

    auto field = combined_table->schema()->field(local_row_id_idx);
    EXPECT_EQ(field->name(), LOCAL_ROW_ID);

    // Get data from the LOCAL_ROW_ID column for each row
    auto id_column = combined_table->column(local_row_id_idx);
    auto id_array =
        std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

    std::vector<int32_t> &expected_ids = indices;
    std::vector<int32_t> actual_ids;

    for (int i = 0; i < id_array->length(); ++i) {
      actual_ids.push_back(id_array->Value(i));
    }

    EXPECT_EQ(actual_ids, expected_ids)
        << "ID column values don't match expected order";
  };

  func({LOCAL_ROW_ID, "id", "name", "add_int32"}, 0);
  func(
      {
          "id",
          LOCAL_ROW_ID,
          "name",
          "add_int32",
      },
      1);
  func({"id", "name", "add_int32", LOCAL_ROW_ID}, 3);
}

TEST_P(SegmentTest, AddColumn) {
  // create segment
  options.max_buffer_size_ = 10 * 1024 * 1024;
  int doc_count = 1000;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  auto s = segment->add_column(
      std::make_shared<FieldSchema>("add_int32", DataType::INT32, false),
      "int32 + 1", AddColumnOptions());
  EXPECT_FALSE(s.ok());

  segment->dump();
  auto writing_segment_meta = segment->meta();

  // convert writing segment meta to persisted segment meta
  Version version = version_manager->get_current_version();
  writing_segment_meta->remove_writing_forward_block();
  s = version.add_persisted_segment_meta(writing_segment_meta);
  ASSERT_TRUE(s.ok());

  s = version_manager->apply(version);
  ASSERT_TRUE(s.ok());
  s = version_manager->flush();
  ASSERT_TRUE(s.ok());

  segment.reset();
  version_manager.reset();
  id_map->flush();
  id_map.reset();

  std::string delete_store_path =
      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
  delete_store->flush(delete_store_path);
  delete_store.reset();

  auto recover_version_manager = VersionManager::Recovery(col_path);
  auto recover_version_mgr = recover_version_manager.value();
  ASSERT_TRUE(recover_version_mgr != nullptr);

  Version v = recover_version_mgr->get_current_version();
  const auto &persist_metas = v.persisted_segment_metas();
  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,
                                                    v.id_map_path_suffix());
  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);
  auto status = recover_id_map->open(idmap_path, false, false);
  ASSERT_TRUE(status.ok());

  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,
                                               v.delete_snapshot_path_suffix());
  auto recover_delete_store =
      DeleteStore::CreateAndLoad(col_name, delete_store_path);
  ASSERT_TRUE(recover_delete_store != nullptr);

  // open persist segment
  options.read_only_ = true;
  auto result =
      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,
                    recover_delete_store, recover_version_mgr, options);
  ASSERT_TRUE(result.has_value());
  segment = std::move(result).value();
  ASSERT_TRUE(segment != nullptr);

  s = segment->add_column(
      std::make_shared<FieldSchema>("add_int32", DataType::INT32, false), "",
      AddColumnOptions());
  EXPECT_FALSE(s.ok());

  s = segment->add_column(std::make_shared<FieldSchema>(
                              "add_undefined", DataType::UNDEFINED, false),
                          "", AddColumnOptions());
  EXPECT_FALSE(s.ok());

  // before add column
  auto meta = segment->meta();
  auto &persist_blocks = meta->persisted_blocks();
  int old_scalar_blocks_cnt = 0;
  for (auto &block : persist_blocks) {
    if (block.type() == BlockType::SCALAR) {
      old_scalar_blocks_cnt++;
    }
  }

  int add_column_cnt = 0;
  auto func = [&](const std::shared_ptr<FieldSchema> &field_schema,
                  const std::string &expression) {
    auto &column_name = field_schema->name();
    AddColumnOptions add_options;
    status = segment->add_column(field_schema, expression, add_options);
    EXPECT_TRUE(status.ok());

    // after add column
    int new_scalar_blocks_cnt = 0;
    for (auto &block : persist_blocks) {
      if (block.type() == BlockType::SCALAR) {
        new_scalar_blocks_cnt++;
      }
    }
    EXPECT_EQ(
        new_scalar_blocks_cnt,
        old_scalar_blocks_cnt + old_scalar_blocks_cnt * (++add_column_cnt));
    auto combined_reader = segment->scan({"id", "name", "age", column_name});
    ASSERT_TRUE(combined_reader != nullptr);
    std::shared_ptr<arrow::RecordBatch> batch;
    uint32_t total_doc = 0;
    while (true) {
      auto status = combined_reader->ReadNext(&batch);
      if (status.ok() == false) break;
      if (batch == nullptr) break;

      EXPECT_EQ(batch->num_columns(), 4);

      total_doc += batch->num_rows();
    }
    EXPECT_EQ(total_doc, doc_count);

    auto new_schema = *schema;
    new_schema.add_field(field_schema);

    auto check_doc = [&](int doc_count) {
      for (int i = 0; i < doc_count; i++) {
        auto expect_doc = test::TestHelper::CreateDoc(i, new_schema);
        auto doc = segment->Fetch(i);
        ASSERT_EQ(doc->pk(), expect_doc.pk());

        // column in same persist block
        {
          ExecBatchPtr exec_batch = segment->fetch({"id", "name", "age"}, i);
          ASSERT_TRUE(exec_batch != nullptr);
          EXPECT_EQ(exec_batch->length, 1);
          EXPECT_EQ(exec_batch->values.size(), 3);

          auto id_scalar = exec_batch->values[0].scalar();
          ASSERT_TRUE(id_scalar != nullptr);
          auto id_value =
              std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
          ASSERT_TRUE(id_value != nullptr);
          EXPECT_EQ(id_value->value, i);
        }

        {
          ExecBatchPtr exec_batch = segment->fetch({column_name}, i);
          ASSERT_TRUE(exec_batch != nullptr);
          EXPECT_EQ(exec_batch->length, 1);
          EXPECT_EQ(exec_batch->values.size(), 1);

          auto id_scalar = exec_batch->values[0].scalar();
          ASSERT_TRUE(id_scalar != nullptr);
        }

        // column in different persist block
        {
          ExecBatchPtr exec_batch =
              segment->fetch({"id", "name", "age", column_name}, i);
          ASSERT_TRUE(exec_batch != nullptr);
          EXPECT_EQ(exec_batch->length, 1);
          EXPECT_EQ(exec_batch->values.size(), 4);

          auto id_scalar = exec_batch->values[0].scalar();
          ASSERT_TRUE(id_scalar != nullptr);
          auto id_value =
              std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
          ASSERT_TRUE(id_value != nullptr);
          EXPECT_EQ(id_value->value, i);
        }
      }
    };
    check_doc(doc_count);
  };

  auto index_param = std::make_shared<InvertIndexParams>();
  std::vector<std::pair<std::string, std::shared_ptr<FieldSchema>>>
      test_column_schemas = {
          {"add_int32", std::make_shared<FieldSchema>("", DataType::INT32,
                                                      false, index_param)},
          {"add_int64", std::make_shared<FieldSchema>("", DataType::INT64,
                                                      false, index_param)},
          {"add_uint32", std::make_shared<FieldSchema>("", DataType::UINT32,
                                                       false, index_param)},
          {"add_uint64", std::make_shared<FieldSchema>("", DataType::UINT64,
                                                       false, index_param)},
          {"add_float", std::make_shared<FieldSchema>("", DataType::FLOAT,
                                                      false, index_param)},
          {"add_double", std::make_shared<FieldSchema>("", DataType::DOUBLE,
                                                       false, index_param)},
          {"add_int32_nullable", std::make_shared<FieldSchema>(
                                     "", DataType::INT32, true, index_param)},
          {"add_int64_nullable", std::make_shared<FieldSchema>(
                                     "", DataType::INT64, true, index_param)},
          {"add_uint32_nullable", std::make_shared<FieldSchema>(
                                      "", DataType::UINT32, true, index_param)},
          {"add_uint64_nullable", std::make_shared<FieldSchema>(
                                      "", DataType::UINT64, true, index_param)},
          {"add_float_nullable", std::make_shared<FieldSchema>(
                                     "", DataType::FLOAT, true, index_param)},
          {"add_double_nullable", std::make_shared<FieldSchema>(
                                      "", DataType::DOUBLE, true, index_param)},
      };

  std::unordered_map<std::string, std::vector<std::string>> test_expressions = {
      {"add_int32", {"int32 + 1", "-int32", "+int32", "1", "-1"}},
      {"add_int64", {"int64 + 1", "-int64", "+int64", "1", "-1"}},
      {"add_uint32", {"uint32 + 1", "-uint32", "+int32", "1", "0"}},
      {"add_uint64", {"uint64 + 1", "-uint64", "+uint64", "1", "0"}},
      {"add_float", {"float + 1.0", "-float", "+float", "0.1", "-0.1"}},
      {"add_double", {"double + 1.0", "-double", "+double", "0.1", "-0.1"}},
      {"add_int32_nullable", {""}},
      {"add_int64_nullable", {""}},
      {"add_uint32_nullable", {""}},
      {"add_uint64_nullable", {""}},
      {"add_float_nullable", {""}},
      {"add_double_nullable", {""}},
  };

  for (auto &[column_name, field_schema] : test_column_schemas) {
    auto expressions = test_expressions[column_name];
    for (auto &expression : expressions) {
      std::string col_name = column_name + "_" +
                             std::to_string(ailego::Crc32c::Hash(
                                 expression.data(), expression.size()));
      auto new_field_schema = std::make_shared<FieldSchema>(
          field_schema->name(), field_schema->data_type(),
          field_schema->nullable(), field_schema->index_params());
      new_field_schema->set_name(col_name);
      func(new_field_schema, expression);
    }
  }
}

TEST_P(SegmentTest, AlterColumn) {
  // create segment
  int doc_count = 1000;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  auto s = segment->alter_column(
      "alter_int32",
      std::make_shared<FieldSchema>("alter_int32", DataType::INT32, false),
      AlterColumnOptions());
  EXPECT_FALSE(s.ok());

  segment->dump();
  auto writing_segment_meta = segment->meta();

  // convert writing segment meta to persisted segment meta
  Version version = version_manager->get_current_version();
  writing_segment_meta->remove_writing_forward_block();
  s = version.add_persisted_segment_meta(writing_segment_meta);
  ASSERT_TRUE(s.ok());

  s = version_manager->apply(version);
  ASSERT_TRUE(s.ok());
  s = version_manager->flush();
  ASSERT_TRUE(s.ok());

  segment.reset();
  version_manager.reset();
  id_map->flush();
  id_map.reset();

  std::string delete_store_path =
      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
  delete_store->flush(delete_store_path);
  delete_store.reset();

  auto recover_version_manager = VersionManager::Recovery(col_path);
  auto recover_version_mgr = recover_version_manager.value();
  ASSERT_TRUE(recover_version_mgr != nullptr);

  Version v = recover_version_mgr->get_current_version();
  const auto &persist_metas = v.persisted_segment_metas();

  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,
                                                    v.id_map_path_suffix());
  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);
  auto status = recover_id_map->open(idmap_path, false, false);
  ASSERT_TRUE(status.ok());

  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,
                                               v.delete_snapshot_path_suffix());
  auto recover_delete_store =
      DeleteStore::CreateAndLoad(col_name, delete_store_path);
  ASSERT_TRUE(recover_delete_store != nullptr);

  // open persist segment
  options.read_only_ = true;
  auto result =
      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,
                    recover_delete_store, recover_version_mgr, options);
  ASSERT_TRUE(result.has_value());
  segment = std::move(result).value();
  ASSERT_TRUE(segment != nullptr);

  s = segment->alter_column(
      "alter_int32",
      std::make_shared<FieldSchema>("alter_int32", DataType::INT32, false),
      AlterColumnOptions());
  EXPECT_FALSE(s.ok());  // not found

  s = segment->alter_column(
      "int32",
      std::make_shared<FieldSchema>("int32", DataType::UNDEFINED, false),
      AlterColumnOptions());
  EXPECT_FALSE(s.ok());  // undefined type

  auto func = [&](const std::string &column_name,
                  const std::shared_ptr<FieldSchema> &field_schema) {
    AlterColumnOptions alter_options;
    status = segment->alter_column(column_name, field_schema, alter_options);
    EXPECT_TRUE(status.ok());

    auto combined_reader = segment->scan({"id", "name", "age", column_name});
    ASSERT_TRUE(combined_reader != nullptr);
    std::shared_ptr<arrow::RecordBatch> batch;
    uint32_t total_doc = 0;
    while (true) {
      auto status = combined_reader->ReadNext(&batch);
      if (status.ok() == false) break;
      if (batch == nullptr) break;

      EXPECT_EQ(batch->num_columns(), 4);

      total_doc += batch->num_rows();
    }
    EXPECT_EQ(total_doc, doc_count);
  };

  std::vector<std::string> test_alter_columns = {"int32",  "int64", "uint32",
                                                 "uint64", "float", "double"};

  for (auto &column_name : test_alter_columns) {
    // std::string column_name = "int32";
    for (auto &dest_column : test_alter_columns) {
      if (column_name == dest_column) continue;
      auto field_schema = schema->get_field(dest_column);
      auto new_field_schema = std::make_shared<FieldSchema>(*field_schema);
      new_field_schema->set_name(column_name);
      func(column_name, new_field_schema);
    }
  }
}

TEST_P(SegmentTest, DropColumn) {
  // create segment
  int doc_count = 1000;
  auto segment = test::TestHelper::CreateSegmentWithDoc(
      col_path, *schema, 0, 0, id_map, delete_store, version_manager, options,
      0, doc_count);
  ASSERT_TRUE(segment != nullptr);

  auto s = segment->drop_column("int32");
  EXPECT_FALSE(s.ok());

  segment->dump();
  auto writing_segment_meta = segment->meta();

  // convert writing segment meta to persisted segment meta
  Version version = version_manager->get_current_version();
  writing_segment_meta->remove_writing_forward_block();
  s = version.add_persisted_segment_meta(writing_segment_meta);
  ASSERT_TRUE(s.ok());

  s = version_manager->apply(version);
  ASSERT_TRUE(s.ok());
  s = version_manager->flush();
  ASSERT_TRUE(s.ok());

  segment.reset();
  version_manager.reset();
  id_map->flush();
  id_map.reset();

  std::string delete_store_path =
      FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE, 0);
  delete_store->flush(delete_store_path);
  delete_store.reset();

  auto recover_version_manager = VersionManager::Recovery(col_path);
  auto recover_version_mgr = recover_version_manager.value();
  ASSERT_TRUE(recover_version_mgr != nullptr);

  Version v = recover_version_mgr->get_current_version();
  const auto &persist_metas = v.persisted_segment_metas();
  // idmap
  std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE,
                                                    v.id_map_path_suffix());
  IDMap::Ptr recover_id_map = std::make_shared<IDMap>(col_name);
  auto status = recover_id_map->open(idmap_path, false, false);
  ASSERT_TRUE(status.ok());

  delete_store_path = FileHelper::MakeFilePath(col_path, FileID::DELETE_FILE,
                                               v.delete_snapshot_path_suffix());
  auto recover_delete_store =
      DeleteStore::CreateAndLoad(col_name, delete_store_path);
  ASSERT_TRUE(recover_delete_store != nullptr);

  // open persist segment
  options.read_only_ = true;
  auto result =
      Segment::Open(col_path, *schema, *persist_metas[0], recover_id_map,
                    recover_delete_store, recover_version_mgr, options);
  ASSERT_TRUE(result.has_value());
  segment = std::move(result).value();
  ASSERT_TRUE(segment != nullptr);

  auto meta = segment->meta();
  auto &persist_blocks = meta->persisted_blocks();

  auto func = [&](const std::string &column_name) {
    status = segment->drop_column(column_name);
    EXPECT_TRUE(status.ok());

    // after drop column
    bool col_exit = false;
    for (auto &block : persist_blocks) {
      if (block.type() == BlockType::SCALAR) {
        if (block.contain_column(column_name)) {
          col_exit = true;
          break;
        }
      }
    }

    EXPECT_EQ(col_exit, false);

    auto combined_reader = segment->scan({column_name});
    ASSERT_TRUE(combined_reader == nullptr);
  };

  std::vector<std::string> test_drop_columns = {"int32",  "int64", "uint32",
                                                "uint64", "float", "double"};

  for (auto &column_name : test_drop_columns) {
    func(column_name);
  }
}


INSTANTIATE_TEST_SUITE_P(MMapTest, SegmentTest, testing::Values(true, false));


================================================
FILE: tests/db/index/segment/sql_expr_parser_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "db/index/segment/sql_expr_parser.h"
#include <arrow/array.h>
#include <arrow/compute/api.h>
#include <arrow/dataset/api.h>
#include <arrow/dataset/discovery.h>
#include <arrow/memory_pool.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <arrow/testing/gtest_util.h>
#include <gtest/gtest.h>
#include "utils/utils.h"

using namespace arrow;
using namespace arrow::dataset;
using namespace zvec;

class SqlExprParserTest : public ::testing::Test {
 protected:
  void SetUp() override {
    // Setup code if needed
  }

  void TearDown() override {
    // Cleanup code if needed
  }
};

TEST_F(SqlExprParserTest, ParseAllSupportedTypes) {
  auto schema = arrow::schema({arrow::field("int32", arrow::int32()),
                               arrow::field("uint32", arrow::uint32()),
                               arrow::field("float", arrow::float32()),
                               arrow::field("double", arrow::float64()),
                               arrow::field("int64", arrow::int64()),
                               arrow::field("uint64", arrow::uint64()),

                               arrow::field("string", arrow::utf8()),

                               arrow::field("bool", arrow::boolean())});

  EXPECT_TRUE(ParseToExpression("int32 + uint32", schema).ok());
  EXPECT_TRUE(ParseToExpression("float * double", schema).ok());
  EXPECT_TRUE(ParseToExpression("int64 - uint64", schema).ok());
  EXPECT_TRUE(ParseToExpression("int32 / float", schema).ok());
  EXPECT_TRUE(ParseToExpression("double + int64", schema).ok());
  EXPECT_TRUE(ParseToExpression("uint32 * int32", schema).ok());

  EXPECT_TRUE(ParseToExpression("int32 + float - double", schema).ok());
  EXPECT_TRUE(ParseToExpression("int64 * uint32 / float", schema).ok());

  EXPECT_TRUE(ParseToExpression("(int32 + float) * double", schema).ok());
  EXPECT_TRUE(
      ParseToExpression("int32 + (float - double) * int64", schema).ok());

  EXPECT_TRUE(
      ParseToExpression("((int32 + uint32) * float) - (double / int64)", schema)
          .ok());

  EXPECT_TRUE(ParseToExpression("int32 + 100", schema).ok());
  EXPECT_TRUE(ParseToExpression("float * 3.14", schema).ok());
  EXPECT_TRUE(ParseToExpression("double - 2.5", schema).ok());
  EXPECT_TRUE(ParseToExpression("(int64 + 10) * (uint32 - 5)", schema).ok());

  EXPECT_TRUE(ParseToExpression("-int32", schema).ok());
  EXPECT_TRUE(ParseToExpression("-(float + double)", schema).ok());
}

TEST_F(SqlExprParserTest, ParseStringExpression) {
  auto schema = arrow::schema({arrow::field("name", arrow::utf8()),
                               arrow::field("age", arrow::int32())});

  auto result = ParseToExpression("name = 'John'", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseBooleanExpression) {
  auto schema = arrow::schema({arrow::field("active", arrow::boolean()),
                               arrow::field("age", arrow::int32())});

  auto result = ParseToExpression("active AND age > 18", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseListExpression) {
  auto schema = arrow::schema(
      {arrow::field("int32_list", arrow::list(arrow::int32())),
       arrow::field("float64_list", arrow::list(arrow::float64())),
       arrow::field("int32", arrow::int32()),
       arrow::field("float64", arrow::float64())});

  auto result = ParseToExpression("int32 + int32_list", schema);
  EXPECT_FALSE(result.ok());
  result = ParseToExpression("float64 + float64_list", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseComplexExpression) {
  auto schema = arrow::schema({arrow::field("price", arrow::float64()),
                               arrow::field("quantity", arrow::int32()),
                               arrow::field("discount", arrow::float64())});

  auto result = ParseToExpression("price * quantity * (1 - discount)", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();
}

TEST_F(SqlExprParserTest, ParseInvalidExpression) {
  auto schema = arrow::schema({arrow::field("a", arrow::int32())});

  auto result = ParseToExpression("a + ", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseNonExistentField) {
  auto schema = arrow::schema({arrow::field("a", arrow::int32())});

  auto result = ParseToExpression("b + 1", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseFunctionCall) {
  auto schema = arrow::schema({arrow::field("value", arrow::float64())});

  auto result = ParseToExpression("sqrt(value)", schema);
  EXPECT_FALSE(result.ok());
}

TEST_F(SqlExprParserTest, ParseComplexCombinations) {
  auto schema = arrow::schema(
      {arrow::field("a", arrow::int32()), arrow::field("b", arrow::float64()),
       arrow::field("c", arrow::int64()), arrow::field("d", arrow::float32())});

  // Deeply nested expressions
  auto result = ParseToExpression("((a + b) * (c - d)) / (a + 1)", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();

  // Multi-level parentheses expressions
  result = ParseToExpression("(((a + b) - c) * d) + (a / b)", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();

  // Mixed constants and variables
  result = ParseToExpression("(a + 10) * (b - 2.5) / (c + 100)", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();
}

// Test negative number expressions
TEST_F(SqlExprParserTest, ParseNegativeNumbers) {
  auto schema = arrow::schema({arrow::field("id", arrow::int32()),
                               arrow::field("value", arrow::float64())});

  // Test negative fields
  auto result = ParseToExpression("-id", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();

  // Test negative numbers combined with other operators
  result = ParseToExpression("-id + value", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();

  // Test nested negative expressions
  result = ParseToExpression("-(-id)", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();

  // Test complex negative expressions
  result = ParseToExpression("-(id + value) * 2", schema);
  EXPECT_TRUE(result.ok()) << "Failed to parse SQL expression status:"
                           << result.status().ToString();
}


// Create a simple Table
std::shared_ptr<arrow::Table> MakeTestTable() {
  // Create integer column
  arrow::Int32Builder int_builder;
  ARROW_EXPECT_OK(int_builder.AppendValues({1, 2, 3, 4, 5}));
  std::shared_ptr<arrow::Array> int_array;
  ARROW_EXPECT_OK(int_builder.Finish(&int_array));

  // Create double column
  arrow::DoubleBuilder double_builder;
  ARROW_EXPECT_OK(double_builder.AppendValues({1.1, 2.2, 3.3, 4.4, 5.5}));
  std::shared_ptr<arrow::Array> double_array;
  ARROW_EXPECT_OK(double_builder.Finish(&double_array));

  // Create string column
  arrow::StringBuilder string_builder;
  ARROW_EXPECT_OK(string_builder.Append("a"));
  ARROW_EXPECT_OK(string_builder.Append("b"));
  ARROW_EXPECT_OK(string_builder.Append("c"));
  ARROW_EXPECT_OK(string_builder.Append("d"));
  ARROW_EXPECT_OK(string_builder.Append("e"));
  std::shared_ptr<arrow::Array> string_array;
  ARROW_EXPECT_OK(string_builder.Finish(&string_array));

  // Create boolean column
  arrow::BooleanBuilder bool_builder;
  ARROW_EXPECT_OK(bool_builder.Append(true));
  ARROW_EXPECT_OK(bool_builder.Append(false));
  ARROW_EXPECT_OK(bool_builder.Append(true));
  ARROW_EXPECT_OK(bool_builder.Append(false));
  ARROW_EXPECT_OK(bool_builder.Append(true));
  std::shared_ptr<arrow::Array> bool_array;
  ARROW_EXPECT_OK(bool_builder.Finish(&bool_array));

  // Build table
  auto schema = arrow::schema({arrow::field("int_col", arrow::int32()),
                               arrow::field("double_col", arrow::float64()),
                               arrow::field("string_col", arrow::utf8()),
                               arrow::field("bool_col", arrow::boolean())});

  auto int_chunked = std::make_shared<arrow::ChunkedArray>(int_array);
  auto double_chunked = std::make_shared<arrow::ChunkedArray>(double_array);
  auto string_chunked = std::make_shared<arrow::ChunkedArray>(string_array);
  auto bool_chunked = std::make_shared<arrow::ChunkedArray>(bool_array);

  return arrow::Table::Make(
      schema, {int_chunked, double_chunked, string_chunked, bool_chunked});
}


// Convert Table to Dataset (for testing)
arrow::Result<std::shared_ptr<arrow::dataset::Dataset>> MakeTestDataset(
    const std::shared_ptr<arrow::Table> &table) {
  return std::make_shared<arrow::dataset::InMemoryDataset>(table);
}

TEST_F(SqlExprParserTest, ParseAndScanDataSet) {
  auto status = arrow::compute::Initialize();

  auto schema = arrow::schema({arrow::field("int_col", arrow::int32()),
                               arrow::field("double_col", arrow::float64()),
                               arrow::field("string_col", arrow::utf8()),
                               arrow::field("bool_col", arrow::boolean())});

  // Step 1: Create test table
  auto table = MakeTestTable();

  // Step 2: Convert to Dataset
  auto dataset = MakeTestDataset(table).ValueOrDie();

  // Step 3: Create scanner and project expression A + B
  auto scanner_builder = dataset->NewScan().ValueOrDie();

  auto expr = ParseToExpression("int_col + double_col", schema).ValueOrDie();
  status = scanner_builder->Project({expr}, {"sum"});

  auto scanner = scanner_builder->Finish().ValueOrDie();

  // Step 4: Execute and get results
  auto result_table = scanner->ToTable().ValueOrDie();
  ASSERT_TRUE(result_table != nullptr);
  ASSERT_EQ(result_table->num_rows(), 5);

  auto int_col = table->column(0);         // int_col
  auto double_col = table->column(1);      // double_col
  auto sum_col = result_table->column(0);  // sum column

  for (int64_t i = 0; i < table->num_rows(); ++i) {
    auto int_value =
        std::static_pointer_cast<arrow::Int32Array>(int_col->chunk(0))
            ->Value(i);
    auto double_value =
        std::static_pointer_cast<arrow::DoubleArray>(double_col->chunk(0))
            ->Value(i);
    auto sum_value =
        std::static_pointer_cast<arrow::DoubleArray>(sum_col->chunk(0))
            ->Value(i);

    ASSERT_NEAR(int_value + double_value, sum_value, 1e-10);
  }
}

================================================
FILE: tests/db/index/segment/sql_expr_validator_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <arrow/array.h>
#include <arrow/builder.h>
#include <arrow/dataset/api.h>
#include <arrow/table.h>
#include <arrow/type.h>
#include <gmock/gmock-matchers.h>
#include <gtest/gtest.h>
#include "db/index/segment/sql_expr_parser.h"

using arrow::Status;
using arrow::compute::Expression;
namespace compute = arrow::compute;
using namespace zvec;

arrow::Result<Expression> ParseAndValidate(
    const std::string &expr, const std::shared_ptr<arrow::Schema> &schema) {
  ARROW_ASSIGN_OR_RAISE(auto parsed, ParseToExpression(expr, schema));
  return CheckSupportedArithmeticExpression(parsed, *schema);
}

class ExprValidatorTest : public ::testing::Test {
 protected:
  void SetUp() override {
    schema_ = arrow::schema({arrow::field("int32_col", arrow::int32()),
                             arrow::field("double_col", arrow::float64()),
                             arrow::field("str_col", arrow::utf8())});

    std::vector<std::shared_ptr<arrow::Array>> arrays;
    for (const auto &field : schema_->fields()) {
      std::unique_ptr<arrow::ArrayBuilder> builder;
      ASSERT_TRUE(arrow::MakeBuilder(arrow::default_memory_pool(),
                                     field->type(), &builder)
                      .ok());
      std::shared_ptr<arrow::Array> array;
      ASSERT_TRUE(builder->Finish(&array).ok());
      arrays.push_back(array);
    }

    auto table = arrow::Table::Make(schema_, arrays);
    dataset_ = std::make_shared<arrow::dataset::InMemoryDataset>(table);
  }

  std::shared_ptr<arrow::Schema> schema_;
  std::shared_ptr<arrow::dataset::Dataset> dataset_;
};

TEST_F(ExprValidatorTest, SingleNumericColumn_Valid) {
  auto result = ParseAndValidate("int32_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();

  result = ParseAndValidate("double_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();
}

TEST_F(ExprValidatorTest, UnaryPositive_Supported) {
  auto result = ParseAndValidate("+int32_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();

  result = ParseAndValidate("+double_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();
}

TEST_F(ExprValidatorTest, UnaryNegative_Supported) {
  auto result = ParseAndValidate("-int32_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();

  result = ParseAndValidate("-double_col", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();
}

TEST_F(ExprValidatorTest, Binary_Op_With_Literal_Valid) {
  auto result = ParseAndValidate("int32_col + 1", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();

  result = ParseAndValidate("int32_col - 100", schema_);
  EXPECT_TRUE(result.ok());

  result = ParseAndValidate("1.5 * double_col", schema_);
  EXPECT_TRUE(result.ok());

  result = ParseAndValidate("double_col / 2.0", schema_);
  EXPECT_TRUE(result.ok());

  result = ParseAndValidate("100 - int32_col", schema_);
  EXPECT_TRUE(result.ok());
}

TEST_F(ExprValidatorTest, NonNumericColumn_Rejected) {
  auto result = ParseAndValidate("str_col", schema_);
  EXPECT_FALSE(result.ok());
  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr("not numeric"));

  result = ParseAndValidate("+str_col", schema_);
  EXPECT_FALSE(result.ok());
  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr("not numeric"));

  result = ParseAndValidate("-str_col", schema_);
  EXPECT_FALSE(result.ok());
  EXPECT_THAT(result.status().ToString(), ::testing::HasSubstr("not numeric"));
}

TEST_F(ExprValidatorTest, TwoColumns_Operations_Rejected) {
  auto result = ParseAndValidate("int32_col + double_col", schema_);
  EXPECT_FALSE(result.ok());
  result = ParseAndValidate("int32_col + int32_col", schema_);
  EXPECT_FALSE(result.ok());
}

TEST_F(ExprValidatorTest, PureLiteral_Rejected) {
  auto result = ParseAndValidate("123", schema_);
  EXPECT_TRUE(result.ok());

  result = ParseAndValidate("+123", schema_);
  EXPECT_TRUE(result.ok());

  result = ParseAndValidate("-456", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();
}

TEST_F(ExprValidatorTest, NestedExpression_Rejected) {
  auto result = ParseAndValidate("(int32_col + 1)", schema_);
  EXPECT_TRUE(result.ok()) << result.status().ToString();
}

TEST_F(ExprValidatorTest, InvalidFunctionOrSyntax) {
  auto result = ParseAndValidate("int32_col || 'abc'", schema_);
  EXPECT_FALSE(result.ok());

  result = ParseAndValidate("sqrt(int32_col)", schema_);
  EXPECT_FALSE(result.ok());
}


================================================
FILE: tests/db/index/storage/arrow_ipc_writer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/storage/arrow_ipc_writer.h"
#include <iostream>
#include <arrow/array/builder_primitive.h>
#include <arrow/ipc/reader.h>
#include <arrow/record_batch.h>
#include <arrow/status.h>
#include <gtest/gtest.h>
#include "db/index/storage/store_helper.h"

using namespace zvec;

auto schema = arrow::schema(
    {arrow::field("id", arrow::int32()), arrow::field("name", arrow::utf8())});

std::shared_ptr<arrow::RecordBatchReader> CreateTestReader(int start_id,
                                                           int count) {
  arrow::Int32Builder id_builder;
  arrow::StringBuilder name_builder;

  arrow::Status s;

  for (int i = 0; i < count; ++i) {
    s = id_builder.Append(start_id + i);
    if (!s.ok()) {
      return nullptr;
    }
    s = name_builder.Append("User" + std::to_string(start_id + i));
    if (!s.ok()) {
      return nullptr;
    }
  }

  std::shared_ptr<arrow::Array> id_array, name_array;
  s = id_builder.Finish(&id_array);
  if (!s.ok()) {
    return nullptr;
  }
  s = name_builder.Finish(&name_array);
  if (!s.ok()) {
    return nullptr;
  }

  auto batch = arrow::RecordBatch::Make(schema, count, {id_array, name_array});
  auto maybe_reader = arrow::RecordBatchReader::Make({batch}, schema);
  if (!maybe_reader.ok()) {
    return nullptr;
  }
  return *maybe_reader;
}

TEST(ArrowIpcWriter, General) {
  std::string output_file_path = "output.ipc";

  ArrowIpcWriter writer(output_file_path);
  // writer.SetMaxRowsPerGroup(1000); // 可选：控制每组行数

  // 第一次插入
  {
    auto reader1 = CreateTestReader(1, 3);
    ASSERT_NE(reader1, nullptr);
    auto status = writer.insert(reader1);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 1" << std::endl;
  }

  // 第二次插入
  {
    auto reader2 = CreateTestReader(4, 2);
    ASSERT_NE(reader2, nullptr);
    auto status = writer.insert(reader2);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 2" << std::endl;
  }

  // 第三次插入
  {
    auto reader3 = CreateTestReader(6, 4);
    ASSERT_NE(reader3, nullptr);
    auto status = writer.insert(reader3);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 3" << std::endl;
  }

  // 最后关闭文件
  auto status = writer.finalize();
  if (!status.ok()) {
    std::cerr << "Finalize failed: " << status.ToString() << std::endl;
  }

  std::cout << "Parquet file written successfully to output.parquet"
            << std::endl;

  // 读取文件
  std::shared_ptr<arrow::io::RandomAccessFile> output_file_;
  std::string output_file_path_cp;
  auto as = CreateRandomAccessFileByUri(output_file_path, &output_file_,
                                        &output_file_path_cp);
  ASSERT_TRUE(as.ok());

  auto result = arrow::ipc::RecordBatchFileReader::Open(output_file_);
  ASSERT_TRUE(result.ok());

  auto reader = std::move(result).ValueOrDie();
  ASSERT_EQ(reader->num_record_batches(), 3);

  int num_rows = 0;
  for (int i = 0; i < reader->num_record_batches(); i++) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto res = reader->ReadRecordBatch(i);
    ASSERT_TRUE(res.ok());
    batch = std::move(res).ValueOrDie();
    num_rows += batch->num_rows();
  }

  ASSERT_EQ(num_rows, 9);
}


================================================
FILE: tests/db/index/storage/bufferpool_store_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <memory>
#include <thread>
#include <arrow/api.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include "db/index/storage/bufferpool_forward_store.h"
#include "utils/utils.h"

using namespace zvec;

class BufferPoolStoreTest : public testing::Test {
 protected:
  void SetUp() override {
    auto s = test::TestHelper::WriteTestFile(parquet_path, FileFormat::PARQUET);
    if (!s.ok()) {
      std::cout << "err: " << s.message() << std::endl;
      exit(1);
    }
    ailego::BufferManager::Instance().init(10 * 1024 * 1024, 1);
  }

  void TearDown() override {
    if (std::filesystem::exists(parquet_path)) {
      std::filesystem::remove(parquet_path);
    }
  }
  std::string parquet_path = "test.parquet";
};


TEST_F(BufferPoolStoreTest, ParquetFetch) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({"id", "name", "score"}, {0, 1, 2});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 3);
}


TEST_F(BufferPoolStoreTest, ParquetFetchWithSelectColumns) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({"id", "name"}, {0, 1, 2});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 2);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWithUID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto table = store->fetch({USER_ID, "id", "name"}, {0, 1, 2});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 3);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWithGlobalDocID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto table = store->fetch({GLOBAL_DOC_ID, "id", "name"}, {0, 1, 2});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 3);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWitEmptyColumns) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({}, std::vector<int>{});
  EXPECT_EQ(table, nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWitEmptyIndices) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({"id", "name"}, std::vector<int>{});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 0);
  EXPECT_EQ(table->num_columns(), 2);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWithMoreIndices) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({"id"}, {0, 1, 2, 3, 6, 2, 1, 7});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 8);
  EXPECT_EQ(table->num_columns(), 1);
}

TEST_F(BufferPoolStoreTest, ParquetFetchWithInvalidIndices) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table = store->fetch({"id"}, {0, 1, 30});
  ASSERT_TRUE(table == nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchCheckOrderWithLocalRowIDMiddle) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table =
      store->fetch({"id", "name", LOCAL_ROW_ID, "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 5);
  EXPECT_EQ(table->num_columns(), 4);
  auto field = table->schema()->field(2);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = table->column(2);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}


TEST_F(BufferPoolStoreTest, ParquetFetchCheckOrderWithLocalRowIDEnd) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  TablePtr table =
      store->fetch({"id", "name", "score", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(table != nullptr);
  EXPECT_EQ(table->num_rows(), 5);
  EXPECT_EQ(table->num_columns(), 4);
  auto field = table->schema()->field(3);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = table->column(3);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}


TEST_F(BufferPoolStoreTest, ParquetScan) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto reader = store->scan({"id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 3);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(BufferPoolStoreTest, ParquetScanWithSelectColumns) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto reader = store->scan({"id", "name"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 2);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(BufferPoolStoreTest, ParquetScanWithInvalidColumn) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto reader = store->scan({"id", "unknown_column"});
  ASSERT_TRUE(reader == nullptr);
}


TEST_F(BufferPoolStoreTest, ParquetScanWithUserID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto reader = store->scan({USER_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(BufferPoolStoreTest, ParquetScanWithGlobalDocID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());
  auto reader = store->scan({GLOBAL_DOC_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRow) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "name", "score"}, 0);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(id_scalar != nullptr);
  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
  ASSERT_TRUE(id_value != nullptr);
  EXPECT_EQ(id_value->value, 1);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSpecificRow) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "name", "score"}, 3);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(id_scalar != nullptr);
  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
  ASSERT_TRUE(id_value != nullptr);
  EXPECT_EQ(id_value->value, 4);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithUserID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({USER_ID, "id", "name"}, 1);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto user_id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(user_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::StringScalar>(user_id_scalar) !=
              nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithGlobalDocID) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({GLOBAL_DOC_ID, "id", "name"}, 4);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);

  auto global_doc_id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(global_doc_id_scalar != nullptr);
  EXPECT_TRUE(std::dynamic_pointer_cast<arrow::UInt64Scalar>(
                  global_doc_id_scalar) != nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithNegativeIndex) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "name"}, -1);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithOutOfRangeIndex) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "name"}, 15);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithInvalidColumn) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "invalid_column"}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(BufferPoolStoreTest, ParquetFetchSingleRowWithEmptyColumns) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(BufferPoolStoreTest, AllDataTypeFetchSingleRow) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  EXPECT_TRUE(store->Open().ok());

  ExecBatchPtr batch = store->fetch({"id", "list_int32"}, 2);
  ASSERT_TRUE(batch != nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 2);

  auto id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(id_scalar != nullptr);
  auto id_value = std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar);
  ASSERT_TRUE(id_value != nullptr);
  EXPECT_EQ(id_value->value, 3);

  auto list_scalar = batch->values[1].scalar();
  ASSERT_TRUE(list_scalar != nullptr);
  auto list_value = std::dynamic_pointer_cast<arrow::ListScalar>(list_scalar);
  ASSERT_TRUE(list_value != nullptr);
  EXPECT_EQ(list_value->value->length(), 128);

  auto list_array =
      std::dynamic_pointer_cast<arrow::Int32Array>(list_value->value);
  ASSERT_TRUE(list_array != nullptr);
  for (int i = 0; i < 10 && i < list_array->length(); ++i) {
    EXPECT_EQ(list_array->Value(i), 2 * 10 + i);
  }
}

TEST_F(BufferPoolStoreTest, AllDataType) {
  auto mmap_store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());

  std::vector<std::string> columns = {"id", "list_int32"};
  std::vector<int> indices = {0, 3, 6, 1, 0};

  TablePtr mmap_table = mmap_store->fetch(columns, indices);
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 2);

  for (size_t j = 0; j < columns.size(); ++j) {
    auto column = mmap_table->column(j);
    for (int k = 0; k < column->num_chunks(); ++k) {
      auto array = column->chunk(k);
      if (array->type()->id() == arrow::Type::INT32) {
        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);
        for (int i = 0; i < array->length(); ++i) {
          int32_t value = int_array->Value(i);
          EXPECT_EQ(value, indices[i] + 1);
        }
      } else if (array->type()->id() == arrow::Type::LIST) {
        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);
        for (int i = 0; i < array->length(); ++i) {
          auto list_value = list_array->value_slice(i);
          auto list_value_array =
              std::static_pointer_cast<arrow::Int32Array>(list_value);
          EXPECT_EQ(list_value_array->length(), 128);
          for (int m = 0; m < list_value_array->length(); ++m) {
            int32_t value = list_value_array->Value(m);
            EXPECT_EQ(value, indices[i] * 10 + m);
          }
        }
      }
    }
  }
}

TEST_F(BufferPoolStoreTest, DeleteDestructs) {
  BufferPoolForwardStore *store = new BufferPoolForwardStore(parquet_path);
  delete store;
}

TEST_F(BufferPoolStoreTest, PhysicSchema) {
  auto store = std::make_shared<BufferPoolForwardStore>(parquet_path);
  ASSERT_NE(store, nullptr);
  EXPECT_TRUE(store->Open().ok());
  EXPECT_NE(store->physic_schema(), nullptr);
}


================================================
FILE: tests/db/index/storage/mem_store_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "zvec/db/doc.h"
#define private public
#define protected public
#include "db/index/storage/memory_forward_store.h"
#undef private
#undef protected
#include <cstdint>
#include <filesystem>
#include <future>
#include <memory>
#include <string>
#include <thread>
#include <vector>
#include <arrow/array.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include "utils/utils.h"

using namespace zvec;

// Helper function
CollectionSchema::Ptr GetCollectionSchema() {
  auto collection_schema = std::make_shared<CollectionSchema>(
      "test_collection",
      std::vector<FieldSchema::Ptr>{
          std::make_shared<FieldSchema>("id", DataType::UINT64, false, nullptr),
          std::make_shared<FieldSchema>("name", DataType::STRING, false,
                                        nullptr),
          std::make_shared<FieldSchema>("age", DataType::INT32, false, nullptr),
          std::make_shared<FieldSchema>("score", DataType::DOUBLE, false,
                                        nullptr),
      });

  return collection_schema;
}

Doc CreateDoc(const uint64_t doc_id) {
  Doc new_doc;
  new_doc.set_pk("pk_" + std::to_string(doc_id));
  new_doc.set_doc_id(doc_id);

  new_doc.set<uint64_t>("id", doc_id);
  new_doc.set<int32_t>("age", rand() % 100 + 1);
  new_doc.set<std::string>(
      "name", std::string("user_") + std::to_string(rand() % 1000));
  new_doc.set<double>("score", static_cast<double>(rand() % 1000) / 10.0);
  return new_doc;
}

void InsertDoc(const MemForwardStore::Ptr &store, const uint64_t start_doc_id,
               const uint64_t end_doc_id) {
  srand(time(NULL));
  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {
    if (store) {
      Doc new_doc = CreateDoc(doc_id);
      store->insert(new_doc);
    }
  }
}

class MemStoreTest : public testing::Test {
 protected:
  void SetUp() override {
    schema_ = GetCollectionSchema();
    store_ = std::make_shared<MemForwardStore>(schema_, "./scalar.block.0",
                                               FileFormat::IPC);
    EXPECT_TRUE(store_->Open().ok());
  }

  void TearDown() override {
    auto path = store_->path();
    if (std::filesystem::exists(path)) {
      std::filesystem::remove(path);
    }
    store_.reset();
  }

  std::shared_ptr<CollectionSchema> schema_;
  std::shared_ptr<MemForwardStore> store_;
};

// Test constructor
TEST_F(MemStoreTest, ConstructorTest) {
  auto schema = GetCollectionSchema();
  MemForwardStore store(schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store.Open().ok());
}

// Test open method
TEST_F(MemStoreTest, OpenTest) {
  EXPECT_TRUE(store_->Open().ok());
}

// Test insert method with valid data
TEST_F(MemStoreTest, InsertValidData) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());
  EXPECT_EQ(store_->num_rows(), 1);
}

// Test insert method with multiple documents
TEST_F(MemStoreTest, InsertMultipleDoc) {
  // Insert multiple documents
  for (uint64_t i = 0; i < 5; ++i) {
    Doc doc = CreateDoc(i);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }
  EXPECT_EQ(store_->num_rows(), 5);
  auto table = store_->fetch({"id"}, std::vector<int>{});
  EXPECT_EQ(table->num_rows(), 0);
}

// Test insert method with nullable data
TEST_F(MemStoreTest, InsertNullableData) {
  auto schema = GetCollectionSchema();
  std::string id = "id";
  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(
                              "id", DataType::UINT64, true, nullptr)));
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  doc.remove("id");
  EXPECT_EQ(store->insert(doc), Status::OK());
  EXPECT_EQ(store->num_rows(), 1);
  auto table = store->fetch({"id"}, std::vector<int>{});
  EXPECT_EQ(table->num_rows(), 0);
}


// Test flush method with empty cache
TEST_F(MemStoreTest, FlushEmptyCache) {
  EXPECT_EQ(store_->flush(), Status::OK());
}

// Test convertToBuilder method
TEST_F(MemStoreTest, convertToBuilder) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());
  auto rb_builder = store_->createBuilder();
  auto result = store_->convertToBuilder(rb_builder);
  EXPECT_TRUE(result.ok());
  EXPECT_EQ(store_->num_rows(), 1);

  // re convert to builder
  result = store_->convertToBuilder(rb_builder);
  EXPECT_TRUE(result.ok());
  EXPECT_EQ(store_->num_rows(), 1);
}

// Test convertToBuilder method with nullable data
TEST_F(MemStoreTest, convertToBuilderWithNullableData) {
  auto schema = GetCollectionSchema();
  std::string id = "id";
  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(
                              "id", DataType::UINT64, true, nullptr)));
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    if (i % 2 == 0) {
      doc.remove("id");
    }
    EXPECT_EQ(store->insert(doc), Status::OK());
  }

  auto rb_builder = store_->createBuilder();
  auto result = store_->convertToBuilder(rb_builder);
  EXPECT_TRUE(result.ok());

  EXPECT_EQ(store->num_rows(), 10);
}

// Test convertToRecordBatch method
TEST_F(MemStoreTest, ConvertToRecordBatch) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  auto result = store_->convertToRecordBatch();
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  auto rb = result.ValueOrDie();
  EXPECT_EQ(rb->num_rows(), 1);

  // re convert to record batch
  result = store_->convertToRecordBatch();
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  rb = result.ValueOrDie();
  EXPECT_EQ(rb->num_rows(), 1);
}

// Test convertToTable method
TEST_F(MemStoreTest, ConvertToTable) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {};

  auto result = store_->convertToTable(columns, {});
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  auto table = result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 2 + 4);

  // re convert to table
  result = store_->convertToTable(columns, {});
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  table = result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 2 + 4);
}

// Test convertToTable method  with column filtering
TEST_F(MemStoreTest, ConvertToTableWithColumnFiltering) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id", "name"};

  auto result = store_->convertToTable(columns, {});
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  auto table = result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 2);

  // re convert to table
  result = store_->convertToTable(columns, {});
  EXPECT_TRUE(result.ok());
  EXPECT_NE(result.ValueOrDie(), nullptr);
  table = result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 2);
}

// Test convertToTable with index filtering
TEST_F(MemStoreTest, ConvertToTableWithIndexFiltering) {
  // Insert multiple documents
  for (size_t i = 0; i < 200; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  std::vector<std::string> columns = {};
  std::vector<int> indices = {0, 2, 4};  // Select specific rows

  auto result = store_->convertToTable(columns, indices);
  EXPECT_TRUE(result.ok());

  auto table = result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), 3);  // Only selected rows
}

// Test fetch method
TEST_F(MemStoreTest, Fetch) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id", "name", "score", "age"};
  std::vector<int> indices = {};

  auto table = store_->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 0);
  EXPECT_EQ(table->num_columns(), 4);

  // re fetch
  table = store_->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 0);
  EXPECT_EQ(table->num_columns(), 4);
}


// Test fetch method more data
TEST_F(MemStoreTest, FetchWithMoreData) {
  auto schema = GetCollectionSchema();
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  for (size_t i = 0; i < 200; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store->insert(doc), Status::OK());
  }

  std::vector<std::string> columns = {"id", "name", "score", "age"};
  std::vector<int> indices = {0, 1, 2};

  auto table = store->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 4);

  // re fetch
  table = store->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 4);
}

// Test fetch method
TEST_F(MemStoreTest, FetchOneField) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id"};
  std::vector<int> indices = {0};

  auto table = store_->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 1);

  // re fetch
  table = store_->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 1);
}

TEST_F(MemStoreTest, FetchOneFieldWithNullable) {
  auto schema = GetCollectionSchema();
  std::string id = "id";
  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(
                              "id", DataType::UINT64, true, nullptr)));
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    if (i % 2 == 0) {
      doc.remove("id");
    }
    EXPECT_EQ(store->insert(doc), Status::OK());
  }

  std::vector<std::string> columns = {"id"};
  std::vector<int> indices = {0};

  auto table = store->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 1);

  // re fetch
  table = store->fetch(columns, indices);
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 1);
  EXPECT_EQ(table->num_columns(), 1);
}

// Test fetch method with empty columns
TEST_F(MemStoreTest, FetchWithEmptyColumns) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {};

  auto table = store_->fetch(columns, std::vector<int>{});
  EXPECT_EQ(table, nullptr);
}

// Test fetch method with empty data
TEST_F(MemStoreTest, FetchWithEmptyData) {
  std::vector<std::string> columns = {"id"};
  auto table = store_->fetch(columns, std::vector<int>{});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 0);
  EXPECT_EQ(table->num_columns(), 1);
}

// Test fetch method with invalid column names
TEST_F(MemStoreTest, FetchWithInvalidColumns) {
  std::vector<std::string> columns = {"invalid_column"};
  auto table_reader = store_->fetch(columns, std::vector<int>{});
  EXPECT_EQ(table_reader, nullptr);
}

TEST_F(MemStoreTest, FetchWithLocalRowID) {
  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  auto table = store_->fetch({LOCAL_ROW_ID, "id"}, {0, 1, 2});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 2);
}

TEST_F(MemStoreTest, FetchWithUID) {
  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  auto table = store_->fetch({USER_ID, "id"}, {0, 1, 2});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 2);
}

TEST_F(MemStoreTest, FetchWithGlobalDocID) {
  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  auto table = store_->fetch({GLOBAL_DOC_ID, "id"}, {0, 1, 2});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 2);
}

TEST_F(MemStoreTest, FetchCheckOrderWithLocalRowIDMiddle) {
  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  auto table =
      store_->fetch({"id", "name", LOCAL_ROW_ID, "score"}, {0, 3, 6, 1, 0});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 5);
  EXPECT_EQ(table->num_columns(), 4);
  auto field = table->schema()->field(2);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = table->column(2);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MemStoreTest, FetchCheckOrderWithLocalRowIDEnd) {
  for (size_t i = 0; i < 10; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  auto table =
      store_->fetch({"id", "name", "score", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 5);
  EXPECT_EQ(table->num_columns(), 4);
  auto field = table->schema()->field(3);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = table->column(3);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MemStoreTest, FetchSingleRow) {
  for (uint64_t i = 0; i < 5; ++i) {
    Doc doc = CreateDoc(i);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  ExecBatchPtr batch = store_->fetch({"id", "name", "age", "score"}, 0);
  ASSERT_NE(batch, nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 4);

  auto id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(id_scalar != nullptr);
  auto id_value = std::dynamic_pointer_cast<arrow::UInt64Scalar>(id_scalar);
  ASSERT_NE(id_value, nullptr);
  EXPECT_EQ(id_value->value, 0);
}

TEST_F(MemStoreTest, FetchSpecificRowIndex) {
  for (uint64_t i = 0; i < 10; ++i) {
    Doc doc = CreateDoc(i);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  ExecBatchPtr batch = store_->fetch({"id", "name", "age", "score"}, 5);
  ASSERT_NE(batch, nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 4);

  auto id_scalar = batch->values[0].scalar();
  ASSERT_TRUE(id_scalar != nullptr);
  auto id_value = std::dynamic_pointer_cast<arrow::UInt64Scalar>(id_scalar);
  ASSERT_NE(id_value, nullptr);
  EXPECT_EQ(id_value->value, 5);
}

TEST_F(MemStoreTest, FetchSingleRowWithNegativeIndex) {
  Doc doc = CreateDoc(0);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  ExecBatchPtr batch = store_->fetch({"id", "name"}, -1);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(MemStoreTest, FetchSingleRowWithOutOfRangeIndex) {
  for (uint64_t i = 0; i < 5; ++i) {
    Doc doc = CreateDoc(i);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  ExecBatchPtr batch = store_->fetch({"id", "name"}, 100);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(MemStoreTest, FetchSingleRowWithInvalidColumn) {
  Doc doc = CreateDoc(0);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  ExecBatchPtr batch = store_->fetch({"id", "invalid_column"}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(MemStoreTest, FetchSingleRowWithEmptyColumns) {
  Doc doc = CreateDoc(0);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  ExecBatchPtr batch = store_->fetch({}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(MemStoreTest, FetchSingleRowFromEmptyStore) {
  ExecBatchPtr batch = store_->fetch({"id", "name"}, 0);
  EXPECT_EQ(batch, nullptr);
}

TEST_F(MemStoreTest, FetchSingleRowWithNullableData) {
  auto schema = GetCollectionSchema();
  std::string id = "id";
  schema->alter_field(id, FieldSchema::Ptr(new FieldSchema(
                              "id", DataType::UINT64, true, nullptr)));
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  doc.remove("id");
  EXPECT_EQ(store->insert(doc), Status::OK());

  ExecBatchPtr batch = store->fetch({"id", "name", "age"}, 0);
  ASSERT_NE(batch, nullptr);
  EXPECT_EQ(batch->length, 1);
  EXPECT_EQ(batch->values.size(), 3);
}


// Test scan method
TEST_F(MemStoreTest, Scan) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id", "name", "score", "age"};

  auto table_reader = store_->scan(columns);
  EXPECT_NE(table_reader, nullptr);

  int batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 1);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);

  // re scan
  table_reader = store_->scan(columns);
  EXPECT_NE(table_reader, nullptr);
  batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 1);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);
}

// Test scan method more data
TEST_F(MemStoreTest, ScanWithMoreData) {
  auto schema = GetCollectionSchema();
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  for (size_t i = 0; i < 200; i++) {
    uint64_t doc_id = 0;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store->insert(doc), Status::OK());
  }

  std::vector<std::string> columns = {"id", "name", "score", "age"};

  auto table_reader = store->scan(columns);
  EXPECT_NE(table_reader, nullptr);

  int batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 200);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);

  // re scan
  table_reader = store->scan(columns);
  EXPECT_NE(table_reader, nullptr);
  batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 200);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);
}

// Test scan method with empty columns
TEST_F(MemStoreTest, ScanWithEmptyColumns) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {};

  auto table_reader = store_->scan(columns);
  EXPECT_EQ(table_reader, nullptr);
}

// Test scan method with empty data
TEST_F(MemStoreTest, ScanWithEmptyData) {
  std::vector<std::string> columns = {"id"};
  auto table_reader = store_->scan(columns);
  EXPECT_NE(table_reader, nullptr);
  std::shared_ptr<arrow::RecordBatch> batch;
  auto status = table_reader->ReadNext(&batch);
  EXPECT_TRUE(status.ok());
  EXPECT_EQ(batch, nullptr);
}

// Test scan method with invalid column names
TEST_F(MemStoreTest, ScanWithInvalidColumns) {
  std::vector<std::string> columns = {"invalid_column"};
  auto table_reader = store_->scan(columns);
  EXPECT_EQ(table_reader, nullptr);
}

TEST_F(MemStoreTest, ScanWithWithUID) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id", "name", "score", USER_ID};

  auto table_reader = store_->scan(columns);
  EXPECT_NE(table_reader, nullptr);

  int batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 1);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);
}

TEST_F(MemStoreTest, ScanWithGlobalDocID) {
  uint64_t doc_id = 0;
  Doc doc = CreateDoc(doc_id);
  EXPECT_EQ(store_->insert(doc), Status::OK());

  std::vector<std::string> columns = {"id", "name", "score", GLOBAL_DOC_ID};

  auto table_reader = store_->scan(columns);
  EXPECT_NE(table_reader, nullptr);

  int batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_EQ(batch->num_rows(), 1);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
  }
  EXPECT_EQ(batch_count, 1);
}

// Test flush method with data
TEST_F(MemStoreTest, FlushWithData) {
  for (int i = 0; i < 100; i++) {
    uint64_t doc_id = i;
    Doc doc = CreateDoc(doc_id);
    EXPECT_EQ(store_->insert(doc), Status::OK());
  }

  EXPECT_EQ(store_->flush(), Status::OK());

  // check file exists
  auto path = store_->path();
  EXPECT_EQ(std::filesystem::exists(path), true);
}

// Test thread safety
TEST_F(MemStoreTest, ThreadSafety) {
  const int num_threads = 4;
  const int inserts_per_thread = 100;

  std::vector<std::future<void>> futures;

  for (int t = 0; t < num_threads; ++t) {
    futures.push_back(std::async(std::launch::async, [this, t]() {
      for (int i = 0; i < inserts_per_thread; ++i) {
        uint64_t doc_id = t * inserts_per_thread + i;
        store_->insert(CreateDoc(doc_id));
      }
    }));
  }

  // Wait for all threads to complete
  for (auto &future : futures) {
    future.wait();
  }

  // Check that all documents were inserted
  EXPECT_EQ(store_->num_rows(), num_threads * inserts_per_thread);
}

// Test edge case with empty schema
TEST_F(MemStoreTest, EmptySchema) {
  auto empty_schema = std::make_shared<CollectionSchema>();
  auto empty_store = std::make_unique<MemForwardStore>(
      empty_schema, "./scalar.block.0", FileFormat::IPC);

  EXPECT_TRUE(empty_store->Open().ok());
}

arrow::Result<std::shared_ptr<arrow::Table>> ReadArrowIPCFile(
    const std::string &filename) {
  std::shared_ptr<arrow::io::ReadableFile> input_file;
  ARROW_ASSIGN_OR_RAISE(input_file, arrow::io::ReadableFile::Open(filename));

  std::shared_ptr<arrow::ipc::RecordBatchFileReader> file_reader;
  ARROW_ASSIGN_OR_RAISE(file_reader,
                        arrow::ipc::RecordBatchFileReader::Open(input_file));

  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
  auto num_record_batches = file_reader->num_record_batches();

  for (int i = 0; i < num_record_batches; ++i) {
    std::shared_ptr<arrow::RecordBatch> batch;
    ARROW_ASSIGN_OR_RAISE(batch, file_reader->ReadRecordBatch(i));
    batches.push_back(batch);
  }

  std::shared_ptr<arrow::Table> table;
  ARROW_ASSIGN_OR_RAISE(table, arrow::Table::FromRecordBatches(batches));

  return table;
}

TEST_F(MemStoreTest, Flush) {
  size_t MAX_DOC = 10010;
  for (size_t i = 0; i < MAX_DOC; i++) {
    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());
  }
  EXPECT_EQ(store_->flush(), Status::OK());
  EXPECT_EQ(store_->close(), Status::OK());

  auto read_result = ReadArrowIPCFile(store_->path());
  ASSERT_TRUE(read_result.ok())
      << "Failed to read Arrow IPC file: " << read_result.status().ToString();

  auto table = read_result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), MAX_DOC);
  EXPECT_EQ(table->num_columns(), 2 + 4);

  auto column_names = table->ColumnNames();
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "id"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "name"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "age"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "score"),
            column_names.end());
}


TEST_F(MemStoreTest, ReFlush) {
  size_t MAX_DOC = 10010;
  for (size_t i = 0; i < MAX_DOC; i++) {
    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());
  }
  EXPECT_EQ(store_->flush(), Status::OK());

  for (size_t i = MAX_DOC; i < MAX_DOC + 10; i++) {
    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());
  }
  EXPECT_EQ(store_->flush(), Status::OK());

  for (size_t i = MAX_DOC + 10; i < MAX_DOC + 20; i++) {
    EXPECT_EQ(store_->insert(CreateDoc(i)), Status::OK());
  }
  EXPECT_EQ(store_->flush(), Status::OK());

  EXPECT_EQ(store_->close(), Status::OK());

  auto read_result = ReadArrowIPCFile(store_->path());
  ASSERT_TRUE(read_result.ok())
      << "Failed to read Arrow IPC file: " << read_result.status().ToString();

  auto table = read_result.ValueOrDie();
  EXPECT_EQ(table->num_rows(), MAX_DOC + 20);
  EXPECT_EQ(table->num_columns(), 2 + 4);

  auto column_names = table->ColumnNames();
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "id"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "name"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "age"),
            column_names.end());
  EXPECT_NE(std::find(column_names.begin(), column_names.end(), "score"),
            column_names.end());
}

// Test with max cache bytes limit
TEST_F(MemStoreTest, MaxCacheBytesLimit) {
  uint32_t max_cache_rows = 105;
  uint32_t max_buffer_size = 260 * 100 * 100;
  uint32_t max_cache_size_ = max_buffer_size / 100;
  std::vector<int> batch_num_rows;

  auto schema = GetCollectionSchema();
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      schema, "./scalar.block.0", FileFormat::IPC, max_buffer_size);
  EXPECT_TRUE(store->Open().ok());

  // Insert more documents than cache limit
  uint32_t cur_doc_total_bytes = 0;
  int cur_batch_num_row = 0;
  for (uint64_t i = 0; i < max_cache_rows; ++i) {
    Doc doc = CreateDoc(i);
    EXPECT_EQ(store->insert(doc), Status::OK());
    cur_doc_total_bytes += doc.memory_usage();
    cur_batch_num_row++;
    if (cur_doc_total_bytes >= max_cache_size_) {
      batch_num_rows.push_back(cur_batch_num_row);
      cur_doc_total_bytes = 0;
      cur_batch_num_row = 0;
    }
  }
  if (cur_batch_num_row > 0) {
    batch_num_rows.push_back(cur_batch_num_row);
  }

  EXPECT_EQ(store->num_rows(), max_cache_rows);

  std::vector<std::string> columns = {"id", "name", "score", "age"};
  auto table_reader = store->scan(columns);
  EXPECT_NE(table_reader, nullptr);
  std::shared_ptr<arrow::RecordBatch> batch;

  int total_doc_cnt = 0;
  int cur_batch_idx = 0;
  while (true) {
    auto status = table_reader->ReadNext(&batch);
    EXPECT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_NE(batch, nullptr);
    EXPECT_EQ(batch->num_columns(), 4);
    total_doc_cnt += batch->num_rows();
    EXPECT_EQ(batch->num_rows(), batch_num_rows[cur_batch_idx++]);
  }
  EXPECT_EQ(total_doc_cnt, max_cache_rows);
}


TEST_F(MemStoreTest, AllDataType) {
  uint32_t max_cache_rows = 100;
  auto all_type_schema =
      test::TestHelper::CreateNormalSchema(false, "test_collection");

  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      all_type_schema, "./scalar.block.0", FileFormat::IPC, 64 * 1024 * 1024);
  EXPECT_TRUE(store->Open().ok());

  // Insert more documents than cache limit
  for (uint64_t i = 0; i < max_cache_rows; ++i) {
    Doc doc = test::TestHelper::CreateDoc(i, *all_type_schema);
    EXPECT_EQ(store->insert(std::move(doc)), Status::OK());
  }
  EXPECT_EQ(store->num_rows(), max_cache_rows);

  std::vector<std::string> columns = {"int32", "array_int32"};

  auto table = store->fetch(columns, {1, 2, 3});
  EXPECT_NE(table, nullptr);
  EXPECT_EQ(table->num_rows(), 3);
  EXPECT_EQ(table->num_columns(), 2);

  for (size_t j = 0; j < columns.size(); ++j) {
    auto column = table->column(j);
    for (int k = 0; k < column->num_chunks(); ++k) {
      auto array = column->chunk(k);
      if (array->type()->id() == arrow::Type::INT32) {
        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);
        for (int i = 0; i < array->length(); ++i) {
          int32_t value = int_array->Value(i);
          EXPECT_EQ(value, i + 1);
        }
      } else if (array->type()->id() == arrow::Type::LIST) {
        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);
        for (int i = 0; i < array->length(); ++i) {
          auto list_value = list_array->value_slice(i);
          auto list_value_array =
              std::static_pointer_cast<arrow::Int32Array>(list_value);
          EXPECT_EQ(list_value_array->length(), 10);
          for (int m = 0; m < list_value_array->length(); ++m) {
            int32_t value = list_value_array->Value(m);
            EXPECT_EQ(value, i + 1);
          }
        }
      }
    }
  }
}

TEST_F(MemStoreTest, PhysicSchema) {
  ASSERT_NE(store_, nullptr);
  EXPECT_NE(store_->physic_schema(), nullptr);
}

TEST_F(MemStoreTest, IsFull) {
  ASSERT_NE(store_, nullptr);
  EXPECT_EQ(store_->is_full(), false);
  EXPECT_EQ(store_->total_bytes(), 0);
}

TEST_F(MemStoreTest, TotalBytes) {
  ASSERT_NE(store_, nullptr);
  EXPECT_EQ(store_->total_bytes(), 0);
}

// =========================== performance test ===============================
#ifdef PERFORMANCE_TEST
TEST_F(MemStoreTest, General) {
  auto collection_schema = GetCollectionSchema();
  MemForwardStore::Ptr store = std::make_shared<MemForwardStore>(
      collection_schema, "./scalar.block.0", FileFormat::IPC);
  EXPECT_TRUE(store->Open().ok());

  size_t MAX_DOC = 1000000;

  auto start = std::chrono::system_clock::now();
  for (int i = 0; i < MAX_DOC; i++) {
    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());
  }
  auto end = std::chrono::system_clock::now();
  auto cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
                  .count();
  std::cout << "insert cost " << cost << "ms" << std::endl;

  start = std::chrono::system_clock::now();
  auto table = store->fetch({"age", "name", "score"}, {});
  end = std::chrono::system_clock::now();
  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
             .count();
  std::cout << "fetch cost " << cost << "ms" << std::endl;

  int64_t num_rows = table->num_rows();
  int64_t num_cols = table->num_columns();
  std::cout << "num_cols: " << num_rows << " num_cols:" << num_cols
            << std::endl;

  for (int i = MAX_DOC; i < MAX_DOC + 100; i++) {
    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());
  }

  start = std::chrono::system_clock::now();
  table = store->fetch({"age", "name", "score"}, {});
  end = std::chrono::system_clock::now();
  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
             .count();
  std::cout << "re fetch cost " << cost << "ms" << std::endl;

  num_rows = table->num_rows();
  num_cols = table->num_columns();
  std::cout << "num_cols: " << num_rows << " num_cols:" << num_cols
            << std::endl;

  for (int i = MAX_DOC + 100; i < MAX_DOC + 200; i++) {
    EXPECT_EQ(store->insert(CreateDoc(i)), Status::OK());
  }

  start = std::chrono::system_clock::now();
  table = store->fetch({"age", "name", "score"}, {});
  end = std::chrono::system_clock::now();
  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
             .count();
  std::cout << "re re fetch cost " << cost << "ms" << std::endl;

  num_rows = table->num_rows();
  num_cols = table->num_columns();
  std::cout << "num_cols: " << num_rows << " num_cols:" << num_cols
            << std::endl;


  std::vector<std::string> column_names = table->ColumnNames();
  std::shared_ptr<arrow::ChunkedArray> column = table->column(0);

  std::shared_ptr<arrow::ChunkedArray> named_column =
      table->GetColumnByName("age");

  std::shared_ptr<arrow::Schema> schema = table->schema();
  auto num_fields = schema->num_fields();
  std::cout << "num_fields: " << num_fields << std::endl;

  start = std::chrono::system_clock::now();

  for (int j = 0; j < schema->num_fields(); ++j) {
    auto column = table->column(j);
    for (int k = 0; k < column->num_chunks(); ++k) {
      auto array = column->chunk(k);
      if (array->type()->id() == arrow::Type::INT32) {
        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);
        for (int i = 0; i < array->length(); ++i) {
          int32_t value = int_array->Value(i);
        }
        // std::cout << "Row " << i << ",Column " << j << ": " << value
        //           << std::endl;
      }
    }
    // if (j > 10) {
    //   break;
    // }
  }
  end = std::chrono::system_clock::now();
  cost = std::chrono::duration_cast<std::chrono::milliseconds>(end - start)
             .count();
  std::cout << "scan all cost " << cost << "ms" << std::endl;

  auto first_column = table->column(0);
  if (first_column->num_chunks() > 0) {
    auto array = first_column->chunk(0);
    if (array->type()->id() == arrow::Type::INT32) {
      auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);
      int32_t value = int_array->Value(0);
      std::cout << "Value at [0,0]: " << value << std::endl;
    }
  }

  EXPECT_EQ(store->is_full(), true);
}

#endif


================================================
FILE: tests/db/index/storage/mmap_store_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdint>
#include <filesystem>
#include <iostream>
#include <memory>
#include <thread>
#include <arrow/api.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include "db/common/constants.h"
#define private public
#define protected public
#include "db/index/storage/mmap_forward_store.h"
#undef private
#undef protected
#include "utils/utils.h"

using namespace zvec;

class MmapStoreTest : public testing::Test {
 protected:
  void SetUp() override {
    auto s = test::TestHelper::WriteTestFile(ipc_path, FileFormat::IPC);
    if (!s.ok()) {
      std::cout << s.message() << std::endl;
      exit(1);
    }
    s = test::TestHelper::WriteTestFile(parquet_path, FileFormat::PARQUET);
    if (!s.ok()) {
      std::cout << s.message() << std::endl;
      exit(1);
    }
  }

  void TearDown() override {
    if (std::filesystem::exists(ipc_path)) {
      std::filesystem::remove(ipc_path);
    }
    if (std::filesystem::exists(parquet_path)) {
      std::filesystem::remove(parquet_path);
    }
  }

  std::string ipc_path = "test.ipc";
  std::string parquet_path = "test.parquet";
};


TEST_F(MmapStoreTest, GeneralIPC) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({"id", "name", "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(ipc_table != nullptr);
  EXPECT_EQ(ipc_table->num_rows(), 5);

  auto table_reader = ipc_store->scan({"id", "name", "score"});
  int batch_count = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    ASSERT_GT(batch->num_rows(), 0);
    batch_count++;
  }
  ASSERT_EQ(batch_count, 4);
}

TEST_F(MmapStoreTest, IPCFetchWithLocalRowID) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({LOCAL_ROW_ID, "id", "name", "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(ipc_table != nullptr);
  EXPECT_EQ(ipc_table->num_columns(), 4);
  EXPECT_EQ(ipc_table->num_rows(), 5);
}

TEST_F(MmapStoreTest, IPCCheckOrderWithLocalRowIDMiddle) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr mmap_table =
      ipc_store->fetch({"id", "name", LOCAL_ROW_ID, "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 4);
  auto field = mmap_table->schema()->field(2);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = mmap_table->column(2);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MmapStoreTest, IPCCheckOrderWithLocalRowIDEnd) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr mmap_table =
      ipc_store->fetch({"id", "name", "score", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 4);
  auto field = mmap_table->schema()->field(3);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = mmap_table->column(3);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}


TEST_F(MmapStoreTest, IPCFetchWithUID) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({USER_ID, "id", "name", "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(ipc_table != nullptr);
  EXPECT_EQ(ipc_table->num_columns(), 4);
  EXPECT_EQ(ipc_table->num_rows(), 5);
}

TEST_F(MmapStoreTest, IPCFetchWithGlobalDocID) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({GLOBAL_DOC_ID, "id", "name", "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(ipc_table != nullptr);
  EXPECT_EQ(ipc_table->num_columns(), 4);
  EXPECT_EQ(ipc_table->num_rows(), 5);
}

TEST_F(MmapStoreTest, IPCFetchWithEmptyColumns) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table = ipc_store->fetch({}, std::vector<int>{});
  EXPECT_EQ(ipc_table, nullptr);
}

TEST_F(MmapStoreTest, IPCFetchWithInvalidColumns) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({"id", "unknown_column"}, std::vector<int>{});
  EXPECT_EQ(ipc_table, nullptr);
}

TEST_F(MmapStoreTest, IPCFetchWithEmptyIndices) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({"id", "name", "score"}, std::vector<int>{});
  ASSERT_TRUE(ipc_table != nullptr);
  EXPECT_EQ(ipc_table->num_rows(), 0);
  EXPECT_EQ(ipc_table->num_columns(), 3);
}

TEST_F(MmapStoreTest, IPCFetchWithInvalidIndices) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table =
      ipc_store->fetch({"id"}, std::vector<int>{-1});  // Negative index
  EXPECT_EQ(ipc_table, nullptr);

  ipc_table =
      ipc_store->fetch({"id"}, std::vector<int>{100});  // Out of range index
  EXPECT_EQ(ipc_table, nullptr);
}

TEST_F(MmapStoreTest, IPCFetchWithEmptyColumnsValidIndices) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  TablePtr ipc_table = ipc_store->fetch({}, {0, 1});
  EXPECT_EQ(ipc_table, nullptr);
}

TEST_F(MmapStoreTest, IPCScan) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  auto table_reader = ipc_store->scan({"id", "name", "score"});
  ASSERT_TRUE(table_reader != nullptr);
  EXPECT_NE(table_reader->schema(), nullptr);
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 3);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, IPCScanWithSelectColumns) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  auto table_reader = ipc_store->scan({"id", "name"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 2);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, IPCScanWithInvalidColumn) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  auto table_reader = ipc_store->scan({"id", "unknown_column"});
  ASSERT_TRUE(table_reader == nullptr);
}

TEST_F(MmapStoreTest, IPCScanWithUserID) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  auto table_reader = ipc_store->scan({USER_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, IPCScanWithGlobalDocID) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  auto table_reader = ipc_store->scan({GLOBAL_DOC_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}


TEST_F(MmapStoreTest, GeneralParquet) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  TablePtr mmap_table = mmap_store->fetch({"id", "name", "score"}, {0, 1, 2});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 3);
  EXPECT_EQ(mmap_table->num_columns(), 3);
}

TEST_F(MmapStoreTest, ParquetFetchWitEmptyColumns) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  TablePtr mmap_table = mmap_store->fetch({}, std::vector<int>{});
  EXPECT_EQ(mmap_table, nullptr);
}

TEST_F(MmapStoreTest, ParquetFetchWithInvalidIndices) {
  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(parquet_store->Open().ok());
  TablePtr parquet_table =
      parquet_store->fetch({"id"}, std::vector<int>{-1});  // Negative index
  EXPECT_EQ(parquet_table, nullptr);

  parquet_table = parquet_store->fetch(
      {"id"}, std::vector<int>{100});  // Out of range index
  EXPECT_EQ(parquet_table, nullptr);
}

TEST_F(MmapStoreTest, ParquetCheckOrder) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  TablePtr mmap_table =
      mmap_store->fetch({"id", "name", "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 3);

  // Get data from the id column for each row
  auto id_column = mmap_table->column(0);  // id column is the first column
  auto id_array =
      std::dynamic_pointer_cast<arrow::Int32Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {
      1, 4, 7, 2, 1};  // Corresponding to indices 0, 3, 6, 1, 0
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MmapStoreTest, ParquetCheckOrderWithLocalRowIDMiddle) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  TablePtr mmap_table =
      mmap_store->fetch({"id", "name", LOCAL_ROW_ID, "score"}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 4);
  auto field = mmap_table->schema()->field(2);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = mmap_table->column(2);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MmapStoreTest, ParquetCheckOrderWithLocalRowIDEnd) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  TablePtr mmap_table =
      mmap_store->fetch({"id", "name", "score", LOCAL_ROW_ID}, {0, 3, 6, 1, 0});
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 4);
  auto field = mmap_table->schema()->field(3);
  EXPECT_EQ(field->name(), LOCAL_ROW_ID);

  // Get data from the _zvec_row_id_ column for each row
  auto id_column = mmap_table->column(3);
  auto id_array =
      std::dynamic_pointer_cast<arrow::UInt64Array>(id_column->chunk(0));

  std::vector<int32_t> expected_ids = {0, 3, 6, 1, 0};
  std::vector<int32_t> actual_ids;

  for (int i = 0; i < id_array->length(); ++i) {
    actual_ids.push_back(id_array->Value(i));
  }

  EXPECT_EQ(actual_ids, expected_ids)
      << "ID column values don't match expected order";
}

TEST_F(MmapStoreTest, ParquetScan) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  auto table_reader = mmap_store->scan({"id", "name", "score"});
  ASSERT_TRUE(table_reader != nullptr);
  EXPECT_NE(table_reader->schema(), nullptr);
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 3);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, ParquetScanWithInvalidColumn) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  auto table_reader = mmap_store->scan({"id", "unknown_column"});
  ASSERT_TRUE(table_reader == nullptr);
}

TEST_F(MmapStoreTest, ParquetScanWithUserID) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  auto table_reader = mmap_store->scan({USER_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, ParquetScanWithGlobalDocID) {
  auto mmap_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(mmap_store->Open().ok());
  auto table_reader = mmap_store->scan({GLOBAL_DOC_ID, "id", "name", "score"});
  int batch_count = 0;
  int total_rows = 0;
  while (true) {
    std::shared_ptr<arrow::RecordBatch> batch;
    auto status = table_reader->ReadNext(&batch);
    ASSERT_TRUE(status.ok());
    if (batch == nullptr) {
      break;
    }
    EXPECT_GT(batch->num_rows(), 0);
    EXPECT_EQ(batch->num_columns(), 4);
    batch_count++;
    total_rows += batch->num_rows();
  }
  EXPECT_GT(batch_count, 0);
  EXPECT_EQ(total_rows, 10);
}

TEST_F(MmapStoreTest, IPCFetchSingleRow) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());

  auto func = [&](int index) -> void {
    ExecBatchPtr ipc_batch = ipc_store->fetch({"id", "name", "score"}, index);
    ASSERT_TRUE(ipc_batch != nullptr);
    EXPECT_EQ(ipc_batch->length, 1);
    EXPECT_EQ(ipc_batch->values.size(), 3);

    auto id_scalar = ipc_batch->values[0].scalar();
    auto name_scalar = ipc_batch->values[1].scalar();
    auto score_scalar = ipc_batch->values[2].scalar();

    EXPECT_EQ(std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar)->value,
              index + 1);
  };

  for (size_t i = 0; i < 10; i++) {
    func(i);
  }
}

TEST_F(MmapStoreTest, ParquetFetchSingleRow) {
  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(parquet_store->Open().ok());

  auto func = [&](int index) -> void {
    ExecBatchPtr parquet_batch =
        parquet_store->fetch({"id", "name", "score"}, index);
    ASSERT_TRUE(parquet_batch != nullptr);
    EXPECT_EQ(parquet_batch->length, 1);
    EXPECT_EQ(parquet_batch->values.size(), 3);

    auto id_scalar = parquet_batch->values[0].scalar();
    auto name_scalar = parquet_batch->values[1].scalar();
    auto score_scalar = parquet_batch->values[2].scalar();

    EXPECT_EQ(std::dynamic_pointer_cast<arrow::Int32Scalar>(id_scalar)->value,
              index + 1);
  };

  for (size_t i = 0; i < 10; i++) {
    func(i);
  }
}

TEST_F(MmapStoreTest, IPCFetchSingleRowWithInvalidIndex) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());

  ExecBatchPtr ipc_batch = ipc_store->fetch({"id", "name"}, -1);
  EXPECT_EQ(ipc_batch, nullptr);

  ipc_batch = ipc_store->fetch({"id", "name"}, 100);
  EXPECT_EQ(ipc_batch, nullptr);
}

TEST_F(MmapStoreTest, IPCFetchSingleRowWithInvalidColumn) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());

  ExecBatchPtr ipc_batch = ipc_store->fetch({"id", "invalid_column"}, 0);
  EXPECT_EQ(ipc_batch, nullptr);
}

TEST_F(MmapStoreTest, IPCFetchSingleRowWithEmptyColumns) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());

  ExecBatchPtr ipc_batch = ipc_store->fetch({}, 0);
  EXPECT_EQ(ipc_batch, nullptr);
}

TEST_F(MmapStoreTest, ParquetFetchSingleRowWithInvalidIndex) {
  auto parquet_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(parquet_store->Open().ok());

  ExecBatchPtr parquet_batch = parquet_store->fetch({"id", "name"}, -1);
  EXPECT_EQ(parquet_batch, nullptr);

  parquet_batch = parquet_store->fetch({"id", "name"}, 100);
  EXPECT_EQ(parquet_batch, nullptr);
}

TEST_F(MmapStoreTest, AllDataType) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());

  std::vector<std::string> columns = {"id", "list_int32"};
  std::vector<int> indices = {0, 3, 6, 1, 0};

  TablePtr mmap_table = mmap_store->fetch(columns, indices);
  ASSERT_TRUE(mmap_table != nullptr);
  EXPECT_EQ(mmap_table->num_rows(), 5);
  EXPECT_EQ(mmap_table->num_columns(), 2);

  for (size_t j = 0; j < columns.size(); ++j) {
    auto column = mmap_table->column(j);
    for (int k = 0; k < column->num_chunks(); ++k) {
      auto array = column->chunk(k);
      if (array->type()->id() == arrow::Type::INT32) {
        auto int_array = std::static_pointer_cast<arrow::Int32Array>(array);
        for (int i = 0; i < array->length(); ++i) {
          int32_t value = int_array->Value(i);
          EXPECT_EQ(value, indices[i] + 1);
        }
      } else if (array->type()->id() == arrow::Type::LIST) {
        auto list_array = std::static_pointer_cast<arrow::ListArray>(array);
        for (int i = 0; i < array->length(); ++i) {
          auto list_value = list_array->value_slice(i);
          auto list_value_array =
              std::static_pointer_cast<arrow::Int32Array>(list_value);
          EXPECT_EQ(list_value_array->length(), 128);
          for (int m = 0; m < list_value_array->length(); ++m) {
            int32_t value = list_value_array->Value(m);
            EXPECT_EQ(value, indices[i] * 10 + m);
          }
        }
      }
    }
  }
}

TEST_F(MmapStoreTest, FindRowGroupForRow) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());

  EXPECT_EQ(mmap_store->FindRowGroupForRow(0), 0);
  EXPECT_EQ(mmap_store->FindRowGroupForRow(1), 0);
  EXPECT_EQ(mmap_store->FindRowGroupForRow(2), 0);
  EXPECT_EQ(mmap_store->FindRowGroupForRow(3), 1);
  EXPECT_EQ(mmap_store->FindRowGroupForRow(6), 2);
  EXPECT_EQ(mmap_store->FindRowGroupForRow(9), 3);

  EXPECT_EQ(mmap_store->FindRowGroupForRow(100), 3);
}

TEST_F(MmapStoreTest, GetRowGroupOffset) {
  auto mmap_store = std::make_shared<MmapForwardStore>(parquet_path);
  ASSERT_TRUE(mmap_store->Open().ok());

  EXPECT_EQ(mmap_store->GetRowGroupOffset(0), 0);
  EXPECT_EQ(mmap_store->GetRowGroupOffset(1), 3);
  EXPECT_EQ(mmap_store->GetRowGroupOffset(2), 6);
  EXPECT_EQ(mmap_store->GetRowGroupOffset(3), 9);
}

TEST_F(MmapStoreTest, InvalidPath) {
  std::vector<std::string> err_path = {
      "err_path",
      "err_" + ipc_path,
      "err_" + parquet_path,
      ipc_path + ".unknown_file_type",
  };
  for (const auto &path : err_path) {
    auto ipc_store = std::make_shared<MmapForwardStore>(path);
    ASSERT_FALSE(ipc_store->Open().ok());
  }
}

TEST_F(MmapStoreTest, InvalidFileFormat) {
  std::string err_path = ipc_path + ".unknown_file_format";
  EXPECT_EQ(InferFileFormat(err_path), FileFormat::UNKNOWN);
}

TEST_F(MmapStoreTest, ValidateEmptyColumns) {
  auto ipc_store = std::make_shared<MmapForwardStore>(ipc_path);
  ASSERT_TRUE(ipc_store->Open().ok());
  EXPECT_FALSE(ipc_store->validate({}));
}

TEST_F(MmapStoreTest, ConstructorAndPhysicSchema) {
  MmapForwardStore store(ipc_path);
  EXPECT_EQ(store.physic_schema(), nullptr);
}

TEST_F(MmapStoreTest, DeleteDestructs) {
  MmapForwardStore *store = new MmapForwardStore(ipc_path);
  delete store;
}

================================================
FILE: tests/db/index/storage/parquet_writer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/index/storage/parquet_writer.h"
#include <iostream>
#include <arrow/array/builder_primitive.h>
#include <arrow/record_batch.h>
#include <arrow/status.h>
#include <gtest/gtest.h>

using namespace zvec;

std::shared_ptr<arrow::RecordBatchReader> CreateTestReader(int start_id,
                                                           int count) {
  auto schema = arrow::schema({arrow::field("id", arrow::int32()),
                               arrow::field("name", arrow::utf8())});

  arrow::Int32Builder id_builder;
  arrow::StringBuilder name_builder;

  arrow::Status s;

  for (int i = 0; i < count; ++i) {
    s = id_builder.Append(start_id + i);
    if (!s.ok()) {
      return nullptr;
    }
    s = name_builder.Append("User" + std::to_string(start_id + i));
    if (!s.ok()) {
      return nullptr;
    }
  }

  std::shared_ptr<arrow::Array> id_array, name_array;
  s = id_builder.Finish(&id_array);
  if (!s.ok()) {
    return nullptr;
  }
  s = name_builder.Finish(&name_array);
  if (!s.ok()) {
    return nullptr;
  }

  auto batch = arrow::RecordBatch::Make(schema, count, {id_array, name_array});
  auto maybe_reader = arrow::RecordBatchReader::Make({batch}, schema);
  if (!maybe_reader.ok()) {
    return nullptr;
  }
  return *maybe_reader;
}

TEST(ParquetWriter, General) {
  ParquetWriter writer("output.parquet");
  // writer.SetMaxRowsPerGroup(1000); // 可选：控制每组行数

  // 第一次插入
  {
    auto reader1 = CreateTestReader(1, 3);
    ASSERT_NE(reader1, nullptr);
    auto status = writer.insert(reader1);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 1" << std::endl;
  }

  // 第二次插入
  {
    auto reader2 = CreateTestReader(4, 2);
    ASSERT_NE(reader2, nullptr);
    auto status = writer.insert(reader2);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 2" << std::endl;
  }

  // 第三次插入
  {
    auto reader3 = CreateTestReader(6, 4);
    ASSERT_NE(reader3, nullptr);
    auto status = writer.insert(reader3);
    ASSERT_TRUE(status.ok());
    std::cout << "Inserted batch 3" << std::endl;
  }

  // 最后关闭文件
  auto status = writer.finalize();
  if (!status.ok()) {
    std::cerr << "Finalize failed: " << status.ToString() << std::endl;
  }

  std::cout << "Parquet file written successfully to output.parquet"
            << std::endl;
}


================================================
FILE: tests/db/index/storage/wal_file_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#define private public
#define protected public
#include "db/index/storage/wal/wal_file.h"
#undef private
#undef protected

#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <gtest/gtest.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include "db/common/file_helper.h"

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
#endif

using namespace zvec;
using SegmentID = uint32_t;

class WalFileTest : public testing::Test {
 protected:
  void SetUp() {
    char cmd_buf[100];
    snprintf(cmd_buf, 100, "rm -rf ./data.wal.*");
    system(cmd_buf);
  }

  void TearDown() {}
};

TEST_F(WalFileTest, TestGeneral) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);

  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 0;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 100 same record
  for (size_t i = 0; i < 100; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // add 100-200 record
  wal_option.create_new = false;
  wal_option.max_docs_wal_flush = 1;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 100; i < 200; i++) {
    std::string record = "hello";
    ret = wal_file->append(record + std::to_string(i));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // reopen and add next 100 record
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 200; i < 300; i++) {
    std::string record = "hello";
    ret = wal_file->append(record + std::to_string(i));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // reopen and add batch model 100 record
  wal_option.max_docs_wal_flush = 10;
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 300; i < 400; i++) {
    std::string record = "hello";
    ret = wal_file->append(record + std::to_string(i));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    if (idx < 100) {
      ASSERT_EQ(record, "hello");
    } else {
      ASSERT_EQ(record, std::string("hello") + std::to_string(idx));
    }
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 400);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}

void do_append(WalFile *wal_file, size_t number) {
  std::string record = "hello" + std::to_string(number);
  int ret = wal_file->append(std::move(record));
  ASSERT_EQ(ret, 0);
}

TEST_F(WalFileTest, TestMultiThread) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  ailego::ThreadPool pool(10, false);
  for (size_t i = 0; i < 10000; i++) {
    pool.execute(do_append, wal_file.get(), i);
  }
  pool.wait_finish();
  wal_file->flush();
  wal_file->close();

  // reopen for batch model
  wal_option.create_new = false;
  wal_option.max_docs_wal_flush = 1000;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 10000; i < 20000; i++) {
    pool.execute(do_append, wal_file.get(), i);
  }
  pool.wait_finish();
  wal_file->flush();
  wal_file->close();

  // reopen for batch model
  wal_option.create_new = false;
  wal_option.max_docs_wal_flush = 0;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 20000; i < 30000; i++) {
    pool.execute(do_append, wal_file.get(), i);
  }
  pool.wait_finish();
  wal_file->flush();
  wal_file->close();

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 30000);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestBoundaryCondition) {
  // read empty file
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  uint32_t idx = 0;
  std::string record = wal_file->next();
  while (!record.empty()) {
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // write and read binary struct
  std::vector<uint8_t> bin_v{0, 1, 2, 3};
  std::string str(bin_v.begin(), bin_v.end());
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  ret = wal_file->append(std::move(str));
  ASSERT_EQ(ret, 0);
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record.size(), 4);
    for (size_t i = 0; i < 4; i++) {
      ASSERT_EQ(record[i], i);
    }
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 1);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);


  // write very large record 4Mb
  size_t BIG_DATA_SIZE = 4 * 1024 * 1024;
  std::vector<uint8_t> big_data(BIG_DATA_SIZE);
  for (size_t i = 0; i < BIG_DATA_SIZE; i++) {
    big_data[i] = i % 256;
  }
  str.clear();
  str.assign((const char *)big_data.data(), BIG_DATA_SIZE);
  wal_option.create_new = true;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  ret = wal_file->append(std::move(str));
  ASSERT_EQ(ret, 0);
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record.size(), BIG_DATA_SIZE);
    for (size_t i = 0; i < BIG_DATA_SIZE; i++) {
      ASSERT_EQ((uint8_t)record[i], i % 256);
    }
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 1);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);

  // batch model 100, just add 99 record and close
  wal_option.max_docs_wal_flush = 100;
  wal_option.create_new = true;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  for (size_t i = 0; i < 99; i++) {
    std::string record = "hello";
    ret = wal_file->append(record + std::to_string(i));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);
  idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, std::string("hello") + std::to_string(idx));
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 99);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}

TEST_F(WalFileTest, TestNotExistErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  // reopen for read
  WalOptions wal_option;
  wal_option.create_new = false;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, -1);
}


TEST_F(WalFileTest, TestFirstErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  // destory first record
  lseek(wal_fd, 64 + 8, SEEK_SET);
  // write err data
  char buf[6] = "nihao";
  write(wal_fd, buf, 5);
  close(wal_fd);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 0);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestMiddleErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  // destory middle record
  lseek(wal_fd, 64 + 13 * 5 + 8, SEEK_SET);
  // write err data
  char buf[6] = "nihao";
  write(wal_fd, buf, 5);
  close(wal_fd);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 5);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestLastErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // destory last record
  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  off_t fsize = lseek(wal_fd, 0, SEEK_END);
  close(wal_fd);
  truncate(wal_path.c_str(), (fsize - 4));

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 9);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestLengthSmallErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // write error length
  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  uint32_t err_length = 2;
  lseek(wal_fd, 64, SEEK_SET);
  write(wal_fd, (const void *)&err_length, 4);
  close(wal_fd);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 0);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestLengthBigErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // write error length
  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  uint32_t err_length = 200;  // exceed file size 130

  lseek(wal_fd, 64, SEEK_SET);
  write(wal_fd, (const void *)&err_length, 4);
  close(wal_fd);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 0);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}


TEST_F(WalFileTest, TestCRCErrorCase) {
  std::string dir_path = "./";
  SegmentID segment_id = 0;
  std::string wal_file_path =
      FileHelper::MakeFilePath(dir_path, FileID::WAL_FILE, segment_id);
  WalFilePtr wal_file = WalFile::Create(wal_file_path);
  ASSERT_TRUE(wal_file != nullptr);

  WalOptions wal_option;
  wal_option.create_new = true;
  wal_option.max_docs_wal_flush = 1;
  int ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  // add 10 same record
  for (size_t i = 0; i < 10; i++) {
    ret = wal_file->append(std::string("hello"));
    ASSERT_EQ(ret, 0);
  }
  ret = wal_file->flush();
  ASSERT_EQ(ret, 0);
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);

  // write error crc
  std::string wal_path = ailego::StringHelper::Concat(
      dir_path, "data.wal.", std::to_string(segment_id));
  int wal_fd = open(wal_path.c_str(), O_RDWR, 0644);
  ASSERT_GT(wal_fd, 0);
  // second record crc 64+(4+4+len(hello))+4
  lseek(wal_fd, 64 + 17, SEEK_SET);
  uint32_t err_crc = 123;
  write(wal_fd, (const void *)&err_crc, 4);
  close(wal_fd);

  // reopen for read
  wal_option.create_new = false;
  ret = wal_file->open(wal_option);
  ASSERT_EQ(ret, 0);

  uint32_t idx = 0;
  ret = wal_file->prepare_for_read();
  ASSERT_EQ(ret, 0);
  std::string record = wal_file->next();
  while (!record.empty()) {
    ASSERT_EQ(record, "hello");
    record = wal_file->next();
    idx++;
  }
  ASSERT_EQ(idx, 1);
  // close
  ret = wal_file->close();
  ASSERT_EQ(ret, 0);
  // remove
  ret = wal_file->remove();
  ASSERT_EQ(ret, 0);
}

#if defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

================================================
FILE: tests/db/index/utils/utils.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "utils.h"
#include <cstdint>
#include <memory>
#include <vector>
#include <zvec/ailego/logger/logger.h>
#include "zvec/db/collection.h"
#include "zvec/db/doc.h"
#include "zvec/db/index_params.h"
#include "zvec/db/schema.h"
#include "zvec/db/status.h"
#include "zvec/db/type.h"

using namespace zvec;
using namespace zvec::test;

CollectionSchema::Ptr TestHelper::CreateTempSchema() {
  auto schema = std::make_shared<CollectionSchema>("demo");
  schema->set_max_doc_count_per_segment(1000);

  schema->add_field(std::make_shared<FieldSchema>(
      "id", DataType::INT64, false, std::make_shared<InvertIndexParams>(true)));
  schema->add_field(std::make_shared<FieldSchema>(
      "name", DataType::STRING, false,
      std::make_shared<InvertIndexParams>(false)));
  schema->add_field(
      std::make_shared<FieldSchema>("weight", DataType::FLOAT, true));

  schema->add_field(std::make_shared<FieldSchema>(
      "dense", DataType::VECTOR_FP32, 128, false,
      std::make_shared<HnswIndexParams>(MetricType::IP)));
  schema->add_field(std::make_shared<FieldSchema>(
      "sparse", DataType::SPARSE_VECTOR_FP32, 0, false,
      std::make_shared<HnswIndexParams>(MetricType::IP)));
  return schema;
}

CollectionSchema::Ptr TestHelper::CreateScalarSchema() {
  auto schema = std::make_shared<CollectionSchema>("demo");

  // scalar
  schema->add_field(std::make_shared<FieldSchema>("int32", DataType::INT32));
  schema->add_field(std::make_shared<FieldSchema>("string", DataType::STRING));

  return schema;
}

// Helper function
CollectionSchema::Ptr TestHelper::CreateNormalSchema(
    bool nullable, std::string name, IndexParams::Ptr scalar_index_params,
    IndexParams::Ptr vector_index_params, uint64_t max_doc_count) {
  auto schema = std::make_shared<CollectionSchema>(name);
  schema->set_max_doc_count_per_segment(max_doc_count);

  // scalar
  schema->add_field(std::make_shared<FieldSchema>(
      "int32", DataType::INT32, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "string", DataType::STRING, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "uint32", DataType::UINT32, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "bool", DataType::BOOL, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "float", DataType::FLOAT, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "double", DataType::DOUBLE, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "int64", DataType::INT64, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "uint64", DataType::UINT64, nullable, scalar_index_params));

  // array
  schema->add_field(std::make_shared<FieldSchema>(
      "array_int32", DataType::ARRAY_INT32, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_string", DataType::ARRAY_STRING, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_uint32", DataType::ARRAY_UINT32, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_bool", DataType::ARRAY_BOOL, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_float", DataType::ARRAY_FLOAT, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_double", DataType::ARRAY_DOUBLE, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_int64", DataType::ARRAY_INT64, nullable, scalar_index_params));
  schema->add_field(std::make_shared<FieldSchema>(
      "array_uint64", DataType::ARRAY_UINT64, nullable, scalar_index_params));

  schema->add_field(std::make_shared<FieldSchema>(
      "dense_fp32", DataType::VECTOR_FP32, 128, false,
      vector_index_params ? vector_index_params
                          : std::make_shared<FlatIndexParams>(MetricType::IP)));
  schema->add_field(std::make_shared<FieldSchema>(
      "dense_fp16", DataType::VECTOR_FP16, 128, false,
      std::make_shared<FlatIndexParams>(MetricType::IP)));
  schema->add_field(std::make_shared<FieldSchema>(
      "dense_int8", DataType::VECTOR_INT8, 128, false,
      std::make_shared<FlatIndexParams>(MetricType::IP)));

  IndexParams::Ptr sparse_index_params;
  if (vector_index_params) {
    sparse_index_params = vector_index_params->clone();
    auto v = std::dynamic_pointer_cast<VectorIndexParams>(sparse_index_params);
    // sparse always use IP
    v->set_metric_type(MetricType::IP);
  }
  schema->add_field(std::make_shared<FieldSchema>(
      "sparse_fp32", DataType::SPARSE_VECTOR_FP32, 128, false,
      sparse_index_params ? sparse_index_params
                          : std::make_shared<FlatIndexParams>(MetricType::IP)));
  schema->add_field(std::make_shared<FieldSchema>(
      "sparse_fp16", DataType::SPARSE_VECTOR_FP16, 128, false,
      std::make_shared<FlatIndexParams>(MetricType::IP)));

  return schema;
}

CollectionSchema::Ptr TestHelper::CreateSchemaWithScalarIndex(
    bool nullable, bool enable_optimize, std::string name) {
  return CreateNormalSchema(
      nullable, name, std::make_shared<InvertIndexParams>(enable_optimize));
}

CollectionSchema::Ptr TestHelper::CreateSchemaWithVectorIndex(
    bool nullable, std::string name, IndexParams::Ptr vector_index_params) {
  return CreateNormalSchema(
      nullable, name, nullptr,
      vector_index_params ? vector_index_params
                          : std::make_shared<HnswIndexParams>(MetricType::IP));
}

CollectionSchema::Ptr TestHelper::CreateSchemaWithMaxDocCount(
    uint64_t doc_count) {
  return CreateNormalSchema(false, "demo", nullptr, nullptr, doc_count);
}

std::string TestHelper::MakePK(const uint64_t doc_id) {
  return "pk_" + std::to_string(doc_id);
}

uint64_t TestHelper::ExtractDocId(const std::string &pk) {
  return std::stoull(pk.substr(3));
}

Doc TestHelper::CreateDoc(const uint64_t doc_id, const CollectionSchema &schema,
                          std::string pk) {
  Doc new_doc;
  if (pk.empty()) {
    pk = MakePK(doc_id);
  }
  new_doc.set_pk(pk);

  for (auto &field : schema.fields()) {
    switch (field->data_type()) {
      case DataType::BINARY: {
        std::string binary_str("binary_" + std::to_string(doc_id));
        new_doc.set<std::string>(field->name(), binary_str);
        break;
      }
      case DataType::BOOL:
        new_doc.set<bool>(field->name(), doc_id % 10 == 0);
        break;
      case DataType::INT32:
        new_doc.set<int32_t>(field->name(), (int32_t)doc_id);
        break;
      case DataType::INT64:
        new_doc.set<int64_t>(field->name(), (int64_t)doc_id);
        break;
      case DataType::UINT32:
        new_doc.set<uint32_t>(field->name(), (uint32_t)doc_id);
        break;
      case DataType::UINT64:
        new_doc.set<uint64_t>(field->name(), (uint64_t)doc_id);
        break;
      case DataType::FLOAT:
        new_doc.set<float>(field->name(), (float)doc_id);
        break;
      case DataType::DOUBLE:
        new_doc.set<double>(field->name(), (double)doc_id);
        break;
      case DataType::STRING:
        new_doc.set<std::string>(field->name(),
                                 "value_" + std::to_string(doc_id));
        break;
      case DataType::ARRAY_BINARY: {
        std::vector<std::string> bin_vec;
        for (size_t i = 0; i < (doc_id % 10); i++) {
          bin_vec.push_back("bin_" + std::to_string(i));
        }
        new_doc.set<std::vector<std::string>>(field->name(), bin_vec);
        break;
      }
      case DataType::ARRAY_BOOL:
        new_doc.set<std::vector<bool>>(field->name(),
                                       std::vector<bool>(10, doc_id % 10 == 0));
        break;
      case DataType::ARRAY_INT32:
        new_doc.set<std::vector<int32_t>>(
            field->name(), std::vector<int32_t>(10, (int32_t)doc_id));
        break;
      case DataType::ARRAY_INT64:
        new_doc.set<std::vector<int64_t>>(
            field->name(), std::vector<int64_t>(10, (int64_t)doc_id));
        break;
      case DataType::ARRAY_UINT32:
        new_doc.set<std::vector<uint32_t>>(
            field->name(), std::vector<uint32_t>(10, (uint32_t)doc_id));
        break;
      case DataType::ARRAY_UINT64:
        new_doc.set<std::vector<uint64_t>>(
            field->name(), std::vector<uint64_t>(10, (uint64_t)doc_id));
        break;
      case DataType::ARRAY_FLOAT:
        new_doc.set<std::vector<float>>(field->name(),
                                        std::vector<float>(10, (float)doc_id));
        break;
      case DataType::ARRAY_DOUBLE:
        new_doc.set<std::vector<double>>(
            field->name(), std::vector<double>(10, (double)doc_id));
        break;
      case DataType::ARRAY_STRING:
        new_doc.set<std::vector<std::string>>(
            field->name(),
            std::vector<std::string>(10, "value_" + std::to_string(doc_id)));
        break;
      case DataType::VECTOR_BINARY32:
        new_doc.set<std::vector<uint32_t>>(
            field->name(),
            std::vector<uint32_t>(field->dimension(), uint32_t(doc_id + 0.1)));
        break;
      case DataType::VECTOR_BINARY64:
        new_doc.set<std::vector<uint64_t>>(
            field->name(),
            std::vector<uint64_t>(field->dimension(), uint64_t(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP32:
        new_doc.set<std::vector<float>>(
            field->name(),
            std::vector<float>(field->dimension(), float(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP64:
        new_doc.set<std::vector<double>>(
            field->name(),
            std::vector<double>(field->dimension(), double(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP16:
        new_doc.set<std::vector<float16_t>>(
            field->name(), std::vector<float16_t>(
                               field->dimension(),
                               static_cast<float16_t>(float(doc_id + 0.1))));
        break;
      case DataType::VECTOR_INT8:
        new_doc.set<std::vector<int8_t>>(
            field->name(),
            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));
        break;
      case DataType::VECTOR_INT16:
        new_doc.set<std::vector<int16_t>>(
            field->name(),
            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));
        break;
      case DataType::SPARSE_VECTOR_FP16: {
        std::vector<uint32_t> indices;
        std::vector<float16_t> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(float16_t(float(doc_id + 0.1)));
        }
        std::pair<std::vector<uint32_t>, std::vector<float16_t>>
            sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(
            field->name(), sparse_float_vec);
        break;
      }
      case DataType::SPARSE_VECTOR_FP32: {
        std::vector<uint32_t> indices;
        std::vector<float> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(float(doc_id + 0.1));
        }
        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            field->name(), sparse_float_vec);
        break;
      }
      default:
        std::cout << "Unsupported data type: " << field->name() << std::endl;
        throw std::runtime_error("Unsupported vector data type");
    }
  }

  return new_doc;
}

Doc TestHelper::CreateDocNull(const uint64_t doc_id,
                              const CollectionSchema &schema, std::string pk) {
  Doc new_doc;
  if (pk.empty()) {
    pk = "pk_" + std::to_string(doc_id);
  }
  new_doc.set_pk(pk);

  for (auto &field : schema.fields()) {
    switch (field->data_type()) {
      case DataType::BINARY:
      case DataType::BOOL:
      case DataType::INT32:
      case DataType::INT64:
      case DataType::UINT32:
      case DataType::UINT64:
      case DataType::FLOAT:
      case DataType::DOUBLE:
      case DataType::STRING:
      case DataType::ARRAY_BINARY:
      case DataType::ARRAY_BOOL:
      case DataType::ARRAY_INT32:
      case DataType::ARRAY_INT64:
      case DataType::ARRAY_UINT32:
      case DataType::ARRAY_UINT64:
      case DataType::ARRAY_FLOAT:
      case DataType::ARRAY_DOUBLE:
      case DataType::ARRAY_STRING:
        break;
      case DataType::VECTOR_FP32:
        new_doc.set<std::vector<float>>(
            field->name(),
            std::vector<float>(field->dimension(), float(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP64:
        new_doc.set<std::vector<double>>(
            field->name(),
            std::vector<double>(field->dimension(), double(doc_id + 0.1)));
        break;
      case DataType::VECTOR_FP16:
        new_doc.set<std::vector<float16_t>>(
            field->name(), std::vector<float16_t>(
                               field->dimension(),
                               static_cast<float16_t>(float(doc_id + 0.1))));
        break;
      case DataType::VECTOR_INT8:
        new_doc.set<std::vector<int8_t>>(
            field->name(),
            std::vector<int8_t>(field->dimension(), (int8_t)doc_id));
        break;
      case DataType::VECTOR_INT16:
        new_doc.set<std::vector<int16_t>>(
            field->name(),
            std::vector<int16_t>(field->dimension(), (int16_t)doc_id));
        break;
      case DataType::SPARSE_VECTOR_FP16: {
        std::vector<uint32_t> indices;
        std::vector<float16_t> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(float16_t(float(doc_id + 0.1)));
        }
        std::pair<std::vector<uint32_t>, std::vector<float16_t>>
            sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float16_t>>>(
            field->name(), sparse_float_vec);
        break;
      }
      case DataType::SPARSE_VECTOR_FP32: {
        std::vector<uint32_t> indices;
        std::vector<float> values;
        for (uint32_t i = 0; i < 100; i++) {
          indices.push_back(i);
          values.push_back(float(doc_id + 0.1));
        }
        std::pair<std::vector<uint32_t>, std::vector<float>> sparse_float_vec;
        sparse_float_vec.first = indices;
        sparse_float_vec.second = values;
        new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            field->name(), sparse_float_vec);
        break;
      }
      default:
        throw std::runtime_error("Unsupported vector data type");
    }
  }

  return new_doc;
}

Status TestHelper::SegmentInsertDoc(const Segment::Ptr &segment,
                                    const CollectionSchema &schema,
                                    const uint64_t start_doc_id,
                                    const uint64_t end_doc_id, bool nullable,
                                    bool upsert, bool batch) {
  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {
    if (segment) {
      Doc new_doc;
      if (nullable) {
        new_doc = CreateDocNull(doc_id, schema);
      } else {
        new_doc = CreateDoc(doc_id, schema);
      }

      Status s;
      if (upsert) {
        s = segment->Upsert(new_doc);
        CHECK_RETURN_STATUS(s);
      } else {
        s = segment->Insert(new_doc);
        CHECK_RETURN_STATUS(s);
      }
    }
  }
  return Status::OK();
}

Status TestHelper::CollectionInsertDoc(const Collection::Ptr &collection,
                                       const uint64_t start_doc_id,
                                       const uint64_t end_doc_id, bool nullable,
                                       bool upsert, bool batch) {
  if (!collection) {
    return Status::InvalidArgument("collection is nullptr");
  }
  auto schema = collection->Schema().value();
  auto make_doc = [&](uint64_t doc_id) -> Doc {
    return nullable ? CreateDocNull(doc_id, schema) : CreateDoc(doc_id, schema);
  };
  auto exec_write = [&](std::vector<Doc> &docs) -> Status {
    Result<WriteResults> result =
        upsert ? collection->Upsert(docs) : collection->Insert(docs);

    if (!result.has_value()) {
      LOG_ERROR("Failed to %s docs (count=%zu), error: %s.",
                upsert ? "upsert" : "insert", docs.size(),
                result.error().message().c_str());
      return result.error();
    }

    const auto &write_results = result.value();
    if (write_results.empty()) {
      return Status::InternalError("WriteResults is unexpectedly empty");
    }

    for (const auto &wr : write_results) {
      if (!wr.ok()) {
        return wr;
      }
    }
    return Status::OK();
  };

  if (batch) {
    std::vector<Doc> docs;
    docs.reserve(end_doc_id - start_doc_id);
    for (uint64_t doc_id = start_doc_id; doc_id < end_doc_id; ++doc_id) {
      docs.emplace_back(make_doc(doc_id));
    }
    return exec_write(docs);
  } else {
    std::vector<Doc> single_doc;
    single_doc.reserve(1);  // 可选优化

    for (uint64_t doc_id = start_doc_id; doc_id < end_doc_id; ++doc_id) {
      single_doc.clear();
      single_doc.push_back(make_doc(doc_id));
      Status s = exec_write(single_doc);
      if (!s.ok()) {
        LOG_ERROR("Failed at doc_id=%" PRIu64 ", doc: %s", doc_id,
                  single_doc[0].to_detail_string().c_str());
        return s;
      }
    }
  }
  return Status::OK();
}

Status TestHelper::CollectionUpsertDoc(const Collection::Ptr &collection,
                                       const uint64_t start_doc_id,
                                       const uint64_t end_doc_id, bool nullable,
                                       bool batch) {
  return CollectionInsertDoc(collection, start_doc_id, end_doc_id, nullable,
                             true, batch);
}

Segment::Ptr TestHelper::CreateSegmentWithDoc(
    const std::string &col_path, const CollectionSchema &schema,
    SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,
    const DeleteStore::Ptr &delete_store,
    const VersionManager::Ptr &version_manager, const SegmentOptions &options,
    uint64_t start_doc_id, uint32_t doc_count, bool nullable, bool upsert) {
  auto result =
      Segment::CreateAndOpen(col_path, schema, segment_id, min_doc_id, id_map,
                             delete_store, version_manager, options);

  if (!result.has_value()) {
    return nullptr;
  }

  auto segment = std::move(result).value();

  auto s = SegmentInsertDoc(segment, schema, start_doc_id,
                            start_doc_id + doc_count, nullable, upsert);
  if (!s.ok()) {
    LOG_ERROR("Failed to insert doc, err: %s", s.message().c_str());
    return nullptr;
  }

  return segment;
}

Collection::Ptr TestHelper::CreateCollectionWithDoc(
    const std::string &path, const CollectionSchema &schema,
    const CollectionOptions &options, uint64_t start_doc_id, uint32_t doc_count,
    bool nullable, bool upsert) {
  auto result = Collection::CreateAndOpen(path, schema, options);

  if (!result.has_value()) {
    LOG_ERROR("Failed to create collection, err: %s",
              result.error().message().c_str());
    return nullptr;
  }

  auto collection = std::move(result).value();

  auto s = CollectionInsertDoc(collection, start_doc_id,
                               start_doc_id + doc_count, nullable, upsert);
  if (!s.ok()) {
    LOG_ERROR("Failed to insert doc, err: %s", s.message().c_str());
    return nullptr;
  }

  return collection;
}

arrow::Status TestHelper::WriteTestFile(const std::string &filepath,
                                        FileFormat format,
                                        uint32_t start_doc_id,
                                        uint32_t end_doc_id,
                                        uint32_t batch_size) {
  // Define schema with additional list types
  auto schema = arrow::schema(
      {arrow::field(GLOBAL_DOC_ID, arrow::uint64()),
       arrow::field(USER_ID, arrow::utf8()), arrow::field("id", arrow::int32()),
       arrow::field("name", arrow::utf8()),
       arrow::field("score", arrow::float64()),
       arrow::field("list_binary", arrow::list(arrow::binary())),
       arrow::field("list_utf8", arrow::list(arrow::utf8())),
       arrow::field("list_boolean", arrow::list(arrow::boolean())),
       arrow::field("list_int32", arrow::list(arrow::int32())),
       arrow::field("list_int64", arrow::list(arrow::int64())),
       arrow::field("list_uint32", arrow::list(arrow::uint32())),
       arrow::field("list_uint64", arrow::list(arrow::uint64())),
       arrow::field("list_float32", arrow::list(arrow::float32())),
       arrow::field("list_float64", arrow::list(arrow::float64()))});

  // Create builders
  auto g_doc_id_builder = std::make_shared<arrow::UInt64Builder>();
  auto uid_builder = std::make_shared<arrow::StringBuilder>();
  auto id_builder = std::make_shared<arrow::Int32Builder>();
  auto name_builder = std::make_shared<arrow::StringBuilder>();
  auto score_builder = std::make_shared<arrow::DoubleBuilder>();

  // Array field builders
  auto list_binary_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::BinaryBuilder>());
  auto list_utf8_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::StringBuilder>());
  auto list_boolean_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::BooleanBuilder>());
  auto list_int32_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::Int32Builder>());
  auto list_int64_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::Int64Builder>());
  auto list_uint32_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::UInt32Builder>());
  auto list_uint64_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::UInt64Builder>());
  auto list_float32_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::FloatBuilder>());
  auto list_float64_builder = std::make_shared<arrow::ListBuilder>(
      arrow::default_memory_pool(), std::make_shared<arrow::DoubleBuilder>());

  // Cast child builders for easier access
  auto binary_builder =
      static_cast<arrow::BinaryBuilder *>(list_binary_builder->value_builder());
  auto utf8_child_builder =
      static_cast<arrow::StringBuilder *>(list_utf8_builder->value_builder());
  auto boolean_child_builder = static_cast<arrow::BooleanBuilder *>(
      list_boolean_builder->value_builder());
  auto int32_child_builder =
      static_cast<arrow::Int32Builder *>(list_int32_builder->value_builder());
  auto int64_child_builder =
      static_cast<arrow::Int64Builder *>(list_int64_builder->value_builder());
  auto uint32_child_builder =
      static_cast<arrow::UInt32Builder *>(list_uint32_builder->value_builder());
  auto uint64_child_builder =
      static_cast<arrow::UInt64Builder *>(list_uint64_builder->value_builder());
  auto float32_child_builder =
      static_cast<arrow::FloatBuilder *>(list_float32_builder->value_builder());
  auto float64_child_builder = static_cast<arrow::DoubleBuilder *>(
      list_float64_builder->value_builder());

  // Fill data
  for (uint32_t i = start_doc_id; i < end_doc_id; ++i) {
    ARROW_RETURN_NOT_OK(g_doc_id_builder->Append(i + 1));
    ARROW_RETURN_NOT_OK(uid_builder->Append("user_" + std::to_string(i + 1)));
    ARROW_RETURN_NOT_OK(id_builder->Append(i + 1));
    ARROW_RETURN_NOT_OK(name_builder->Append("Name" + std::to_string(i)));
    ARROW_RETURN_NOT_OK(score_builder->Append(80.0 + i));

    const int dim = 128;
    // Append list_binary data
    ARROW_RETURN_NOT_OK(list_binary_builder->Append());
    for (int j = 0; j < dim; ++j) {
      std::string binary_data =
          "binary_" + std::to_string(i) + "_" + std::to_string(j);
      ARROW_RETURN_NOT_OK(binary_builder->Append(binary_data));
    }

    // Append list_utf8 data
    ARROW_RETURN_NOT_OK(list_utf8_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(utf8_child_builder->Append(
          "string_" + std::to_string(i) + "_" + std::to_string(j)));
    }

    // Append list_boolean data
    ARROW_RETURN_NOT_OK(list_boolean_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(boolean_child_builder->Append((i + j) % 2 == 0));
    }

    // Append list_int32 data
    ARROW_RETURN_NOT_OK(list_int32_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(int32_child_builder->Append(i * 10 + j));
    }

    // Append list_int64 data
    ARROW_RETURN_NOT_OK(list_int64_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(
          int64_child_builder->Append(static_cast<int64_t>(i) * 100 + j));
    }

    // Append list_uint32 data
    ARROW_RETURN_NOT_OK(list_uint32_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(
          uint32_child_builder->Append(static_cast<uint32_t>(i) * 10 + j));
    }

    // Append list_uint64 data
    ARROW_RETURN_NOT_OK(list_uint64_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(
          uint64_child_builder->Append(static_cast<uint64_t>(i) * 100 + j));
    }

    // Append list_float32 data
    ARROW_RETURN_NOT_OK(list_float32_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(
          float32_child_builder->Append(static_cast<float>(i) + j * 0.1f));
    }

    // Append list_float64 data
    ARROW_RETURN_NOT_OK(list_float64_builder->Append());
    for (int j = 0; j < dim; ++j) {
      ARROW_RETURN_NOT_OK(
          float64_child_builder->Append(static_cast<double>(i) + j * 0.01));
    }
  }

  // Construct arrays
  std::shared_ptr<arrow::Array> g_doc_id_array, uid_array, id_array, name_array,
      score_array, list_binary_array, list_utf8_array, list_boolean_array,
      list_int32_array, list_int64_array, list_uint32_array, list_uint64_array,
      list_float32_array, list_float64_array;

  ARROW_RETURN_NOT_OK(g_doc_id_builder->Finish(&g_doc_id_array));
  ARROW_RETURN_NOT_OK(uid_builder->Finish(&uid_array));
  ARROW_RETURN_NOT_OK(id_builder->Finish(&id_array));
  ARROW_RETURN_NOT_OK(name_builder->Finish(&name_array));
  ARROW_RETURN_NOT_OK(score_builder->Finish(&score_array));
  ARROW_RETURN_NOT_OK(list_binary_builder->Finish(&list_binary_array));
  ARROW_RETURN_NOT_OK(list_utf8_builder->Finish(&list_utf8_array));
  ARROW_RETURN_NOT_OK(list_boolean_builder->Finish(&list_boolean_array));
  ARROW_RETURN_NOT_OK(list_int32_builder->Finish(&list_int32_array));
  ARROW_RETURN_NOT_OK(list_int64_builder->Finish(&list_int64_array));
  ARROW_RETURN_NOT_OK(list_uint32_builder->Finish(&list_uint32_array));
  ARROW_RETURN_NOT_OK(list_uint64_builder->Finish(&list_uint64_array));
  ARROW_RETURN_NOT_OK(list_float32_builder->Finish(&list_float32_array));
  ARROW_RETURN_NOT_OK(list_float64_builder->Finish(&list_float64_array));

  // Set rows per batch
  std::vector<std::shared_ptr<arrow::RecordBatch>> batches;

  // Split data into multiple batches
  auto doc_count = (int)(end_doc_id - start_doc_id);
  for (int start = 0; start < doc_count; start += batch_size) {
    int current_batch_size = std::min((int)batch_size, doc_count - start);

    auto g_doc_id_slice = g_doc_id_array->Slice(start, current_batch_size);
    auto uid_slice = uid_array->Slice(start, current_batch_size);
    auto id_slice = id_array->Slice(start, current_batch_size);
    auto name_slice = name_array->Slice(start, current_batch_size);
    auto score_slice = score_array->Slice(start, current_batch_size);
    auto list_binary_slice =
        list_binary_array->Slice(start, current_batch_size);
    auto list_utf8_slice = list_utf8_array->Slice(start, current_batch_size);
    auto list_boolean_slice =
        list_boolean_array->Slice(start, current_batch_size);
    auto list_int32_slice = list_int32_array->Slice(start, current_batch_size);
    auto list_int64_slice = list_int64_array->Slice(start, current_batch_size);
    auto list_uint32_slice =
        list_uint32_array->Slice(start, current_batch_size);
    auto list_uint64_slice =
        list_uint64_array->Slice(start, current_batch_size);
    auto list_float32_slice =
        list_float32_array->Slice(start, current_batch_size);
    auto list_float64_slice =
        list_float64_array->Slice(start, current_batch_size);

    auto batch = arrow::RecordBatch::Make(
        schema, current_batch_size,
        {g_doc_id_slice, uid_slice, id_slice, name_slice, score_slice,
         list_binary_slice, list_utf8_slice, list_boolean_slice,
         list_int32_slice, list_int64_slice, list_uint32_slice,
         list_uint64_slice, list_float32_slice, list_float64_slice});
    batches.push_back(batch);
  }

  // Open output stream
  ARROW_ASSIGN_OR_RAISE(auto out, arrow::io::FileOutputStream::Open(filepath));

  if (format == FileFormat::PARQUET) {
    // Parquet write logic - create table with multiple record batches
    auto table = arrow::Table::Make(
        schema, {g_doc_id_array, uid_array, id_array, name_array, score_array,
                 list_binary_array, list_utf8_array, list_boolean_array,
                 list_int32_array, list_int64_array, list_uint32_array,
                 list_uint64_array, list_float32_array, list_float64_array});

    parquet::WriterProperties::Builder builder;
    builder.data_pagesize(1024);
    // 3 rows per row group
    builder.max_row_group_length(batch_size);
    auto props = builder.build();

    auto status = parquet::arrow::WriteTable(
        *table, arrow::default_memory_pool(), out, batch_size, props);
    if (!status.ok()) {
      std::cerr << "Write failed: " << status.ToString() << std::endl;
      return status;
    }

    std::cout << "Wrote test Parquet file with multiple row groups: "
              << filepath << std::endl;
  } else if (format == FileFormat::IPC) {
    // IPC write logic - write multiple record batches
    auto writer_result = arrow::ipc::MakeFileWriter(out, schema);
    ARROW_RETURN_NOT_OK(writer_result.status());
    auto writer = std::move(writer_result).ValueOrDie();

    // Write multiple batches
    for (const auto &batch : batches) {
      ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
    }

    ARROW_RETURN_NOT_OK(writer->Close());

    std::cout << "Wrote test IPC file with " << batches.size()
              << " batches: " << filepath << std::endl;
  }

  ARROW_RETURN_NOT_OK(out->Close());
  return arrow::Status::OK();
}

================================================
FILE: tests/db/index/utils/utils.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include <cstdint>
#include <iostream>
#include <memory>
#include <string>
#include <arrow/array/array_binary.h>
#include <arrow/io/file.h>
#include <arrow/ipc/reader.h>
#include <arrow/ipc/writer.h>
#include <arrow/pretty_print.h>
#include <arrow/result.h>
#include <arrow/table.h>
#include <gtest/gtest.h>
#include <parquet/arrow/writer.h>
#include "db/common/constants.h"
#include "db/common/typedef.h"
#include "db/index/common/meta.h"
#include "db/index/segment/segment.h"
#include "db/index/storage/store_helper.h"
#include "zvec/db/collection.h"
#include "zvec/db/doc.h"
#include "zvec/db/schema.h"
#include "zvec/db/type.h"

namespace zvec::test {

template <typename T>
bool vectors_equal_when_sorted(std::vector<T> a, std::vector<T> b) {
  if (a.size() != b.size()) {
    return false;
  }
  std::sort(a.begin(), a.end());
  std::sort(b.begin(), b.end());
  return a == b;
}

template <typename T>
double inner_produce_double(const std::vector<T> &vec1,
                            const std::vector<T> &vec2) {
  double result = 0.0;
  for (size_t i = 0; i < vec1.size(); ++i) {
    result += vec1[i] * vec2[i];
  }
  return result;
}


template <typename T>
inline float cosine_distance_dense(const std::vector<T> &vec1,
                                   const std::vector<T> &vec2) {
  const auto dot = inner_produce_double(vec1, vec2);
  const auto norm1 = std::sqrt((inner_produce_double(vec1, vec1)));
  const auto norm2 = std::sqrt((inner_produce_double(vec2, vec2)));

  if (norm1 == 0.0f || norm2 == 0.0f) return 0.0f;
  return 1.0f - dot / (norm1 * norm2);
}

template <typename T>
inline float dp_distance_dense(const std::vector<T> &vec1,
                               const std::vector<T> &vec2) {
  double result = 0.0;
  for (size_t i = 0; i < vec1.size(); ++i) {
    result += vec1[i] * vec2[i];
  }
  return result;
}

template <typename T>
inline float euclidean_distance_dense(const std::vector<T> &vec1,
                                      const std::vector<T> &vec2) {
  double sum = 0.0f;
  for (size_t i = 0; i < vec1.size(); ++i) {
    const float diff =
        static_cast<float>(vec1[i]) - static_cast<float>(vec2[i]);
    sum += diff * diff;
  }
  return sum;
}

template <typename T>
inline float distance_dense(const std::vector<T> &vec1,
                            const std::vector<T> &vec2, MetricType metric) {
  switch (metric) {
    case MetricType::COSINE:
      return cosine_distance_dense(vec1, vec2);
    case MetricType::L2:
      return euclidean_distance_dense(vec1, vec2);
    case MetricType::IP:
      return dp_distance_dense(vec1, vec2);
    default:
      throw std::invalid_argument("Unsupported metric for FP32");
  }
}

using SparseVecFP32 = std::pair<std::vector<uint32_t>, std::vector<float>>;
using SparseVecFP16 = std::pair<std::vector<uint32_t>, std::vector<float16_t>>;
using SparseVec = SparseVecFP32;

template <typename T>
inline float sparse_dot_product(const std::vector<uint32_t> &idx1,
                                const std::vector<T> &val1,
                                const std::vector<uint32_t> &idx2,
                                const std::vector<T> &val2) {
  double dot = 0.0f;
  size_t i = 0, j = 0;

  while (i < idx1.size() && j < idx2.size()) {
    if (idx1[i] == idx2[j]) {
      dot += static_cast<float>(val1[i]) * static_cast<float>(val2[j]);
      ++i;
      ++j;
    } else if (idx1[i] < idx2[j]) {
      ++i;
    } else {
      ++j;
    }
  }
  return dot;
}

inline float distance_sparse(const SparseVecFP32 &vec1,
                             const SparseVecFP32 &vec2) {
  return sparse_dot_product(vec1.first, vec1.second, vec2.first, vec2.second);
}

inline float distance_sparse(const SparseVecFP16 &vec1,
                             const SparseVecFP16 &vec2) {
  return sparse_dot_product(vec1.first, vec1.second, vec2.first, vec2.second);
}


class TestHelper {
 public:
  static CollectionSchema::Ptr CreateTempSchema();

  static CollectionSchema::Ptr CreateScalarSchema();

  static CollectionSchema::Ptr CreateNormalSchema(
      bool nullable = false, std::string name = "demo",
      IndexParams::Ptr scalar_index_params = nullptr,
      IndexParams::Ptr vector_index_params = nullptr,
      uint64_t max_doc_count = MAX_DOC_COUNT_PER_SEGMENT);

  static CollectionSchema::Ptr CreateSchemaWithScalarIndex(
      bool nullable = false, bool enable_optimize = false,
      std::string name = "demo");

  static CollectionSchema::Ptr CreateSchemaWithVectorIndex(
      bool nullable = false, std::string name = "demo",
      IndexParams::Ptr vector_index_params = nullptr);

  static CollectionSchema::Ptr CreateSchemaWithMaxDocCount(uint64_t doc_count);

  static std::string MakePK(const uint64_t doc_id);

  static uint64_t ExtractDocId(const std::string &pk);

  static Doc CreateDoc(const uint64_t doc_id, const CollectionSchema &schema,
                       std::string pk = "");

  static Doc CreateDocNull(const uint64_t doc_id,
                           const CollectionSchema &schema, std::string pk = "");

  static Status SegmentInsertDoc(const Segment::Ptr &segment,
                                 const CollectionSchema &schema,
                                 const uint64_t start_doc_id,
                                 const uint64_t end_doc_id,
                                 bool nullable = false, bool upsert = false,
                                 bool batch = false);

  static Status CollectionInsertDoc(const Collection::Ptr &collection,
                                    const uint64_t start_doc_id,
                                    const uint64_t end_doc_id,
                                    bool nullable = false, bool upsert = false,
                                    bool batch = false);

  static Status CollectionUpsertDoc(const Collection::Ptr &collection,
                                    const uint64_t start_doc_id,
                                    const uint64_t end_doc_id,
                                    bool nullable = false, bool batch = false);

  static Segment::Ptr CreateSegmentWithDoc(
      const std::string &col_path, const CollectionSchema &schema,
      SegmentID segment_id, uint64_t min_doc_id, const IDMap::Ptr &id_map,
      const DeleteStore::Ptr &delete_store,
      const VersionManager::Ptr &version_manager, const SegmentOptions &options,
      uint64_t start_doc_id, uint32_t doc_count, bool nullable = false,
      bool upsert = false);

  static Collection::Ptr CreateCollectionWithDoc(
      const std::string &path, const CollectionSchema &schema,
      const CollectionOptions &options, uint64_t start_doc_id,
      uint32_t doc_count, bool nullable = false, bool upsert = false);


  static arrow::Status WriteTestFile(const std::string &filepath,
                                     FileFormat format,
                                     uint32_t start_doc_id = 0,
                                     uint32_t end_doc_id = 10,
                                     uint32_t batch_size = 3);
};


}  // namespace zvec::test

================================================
FILE: tests/db/sqlengine/CMakeLists.txt
================================================

include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

if(APPLE)
  set(APPLE_FRAMEWORK_LIBS
    -framework CoreFoundation
    -framework CoreGraphics
    -framework CoreData
    -framework CoreText
    -framework Security
    -framework Foundation
    -Wl,-U,_MallocExtension_ReleaseFreeMemory
    -Wl,-U,_ProfilerStart
    -Wl,-U,_ProfilerStop
    -Wl,-U,_RegisterThriftProtocol
  )
endif()

file(GLOB ALL_TEST_SRCS *_test.cc)
foreach(CC_SRCS ${ALL_TEST_SRCS})
  get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE)
  cc_gmock(
    NAME ${CC_TARGET} STRICT
    LIBS zvec_common
    zvec_proto
    zvec_sqlengine
    zvec_ailego
    core_metric
    core_utility
    core_quantizer
    core_knn_hnsw core_knn_hnsw_sparse sparsehash
    core_knn_flat core_knn_flat_sparse core_knn_ivf
    core_knn_hnsw_rabitq core_mix_reducer
    ${CMAKE_THREAD_LIBS_INIT}
    ${CMAKE_DL_LIBS}
    SRCS ${CC_SRCS}
    INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/db ${PROJECT_ROOT_DIR}/src/db/common
    LDFLAGS ${APPLE_FRAMEWORK_LIBS}
  )
  cc_test_suite(zvec_sqlengine ${CC_TARGET})
endforeach()


================================================
FILE: tests/db/sqlengine/contain_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include <cstdint>
#include <cstdlib>
#include <memory>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <gtest/gtest.h>
#include "db/common/file_helper.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/sqlengine.h"
#include "zvec/db/index_params.h"
#include "zvec/db/schema.h"
#include "zvec/db/type.h"
#include "test_helper.h"

namespace zvec::sqlengine {

static Doc create_doc(const uint64_t doc_id) {
  Doc new_doc;
  new_doc.set_pk("pk_" + std::to_string(doc_id));
  new_doc.set_doc_id(doc_id);

  auto size = doc_id % 100;
  if (size > 0) {
    std::vector<std::string> str_array;
    std::vector<int32_t> i32_array;
    std::vector<int64_t> i64_array;
    std::vector<uint32_t> u32_array;
    std::vector<uint64_t> u64_array;
    std::vector<float> fp32_array;
    std::vector<double> fp64_array;
    std::vector<bool> bool_array;

    for (uint32_t i = 1; i <= size; i++) {
      i32_array.push_back(i);
      i64_array.push_back(i);
      u32_array.push_back(i);
      u64_array.push_back(i);
      fp32_array.push_back(i);
      fp64_array.push_back(i);
      bool_array.push_back(i % 2 == 0);
      str_array.push_back("name" + std::to_string(i));
    }
    new_doc.set("i32_array", i32_array);
    new_doc.set("i64_array", i64_array);
    new_doc.set("u32_array", u32_array);
    new_doc.set("u64_array", u64_array);
    new_doc.set("fp32_array", fp32_array);
    new_doc.set("fp64_array", fp64_array);
    new_doc.set("bool_array", bool_array);
    new_doc.set("str_array", str_array);
  }
  return new_doc;
}

class ContainTest : public testing::Test {
 protected:
  static void SetUpTestSuite() {
    FileHelper::RemoveDirectory(seg_path_);
    FileHelper::CreateDirectory(seg_path_);

    auto invert_params = std::make_shared<InvertIndexParams>(true);
    collection_schema_ = std::make_shared<CollectionSchema>(
        "test_collection",
        std::vector<FieldSchema::Ptr>{
            std::make_shared<FieldSchema>("str_array", DataType::ARRAY_STRING,
                                          true, nullptr),
            std::make_shared<FieldSchema>("i32_array", DataType::ARRAY_INT32,
                                          true, nullptr),
            std::make_shared<FieldSchema>("i64_array", DataType::ARRAY_INT64,
                                          true, nullptr),
            std::make_shared<FieldSchema>("u32_array", DataType::ARRAY_UINT32,
                                          true, nullptr),
            std::make_shared<FieldSchema>("u64_array", DataType::ARRAY_UINT64,
                                          true, nullptr),
            std::make_shared<FieldSchema>("fp32_array", DataType::ARRAY_FLOAT,
                                          true, nullptr),
            std::make_shared<FieldSchema>("fp64_array", DataType::ARRAY_DOUBLE,
                                          true, nullptr),
            std::make_shared<FieldSchema>("bool_array", DataType::ARRAY_BOOL,
                                          true, nullptr),

        });

    auto segment = create_segment(seg_path_, *collection_schema_);
    if (segment == nullptr) {
      LOG_ERROR("create segment failed");
      EXPECT_TRUE(segment != nullptr);
      std::exit(EXIT_FAILURE);
    }
    auto status = InsertDoc(segment, 0, 10000, &create_doc);
    if (!status.ok()) {
      LOG_ERROR("insert doc failed: %s", status.c_str());
      EXPECT_TRUE(status.ok());
      std::exit(EXIT_FAILURE);
    }
    segments_.push_back(segment);
  }

  static void TearDownTestSuite() {
    segments_.clear();
    FileHelper::RemoveDirectory(seg_path_);
  }

 protected:
  static inline std::string seg_path_ = "./test_collection";
  static inline CollectionSchema::Ptr collection_schema_;
  static inline std::vector<Segment::Ptr> segments_;
};


TEST_F(ContainTest, ContainAllInt32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "i32_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllInt64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "i64_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllUint32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "u32_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllUint64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "u64_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllFp32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "fp32_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllFp64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "fp64_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAllString) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "str_array contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += "'name" + std::to_string(i) + "'";
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyInt32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "i32_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyInt64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "i64_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyUint32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "u32_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyUint64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "u64_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyFp32) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "fp32_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyFp64) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "fp64_array contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ContainTest, ContainAnyString) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;
  query.filter_ = "str_array contain_any ('name98','name99','name100')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}


}  // namespace zvec::sqlengine

================================================
FILE: tests/db/sqlengine/forward_recall_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include <cstdint>
#include <memory>
#include <gtest/gtest.h>
#include "db/sqlengine/sqlengine.h"
#include "zvec/db/schema.h"
#include "recall_base.h"

namespace zvec::sqlengine {

class ForwardRecallTest : public RecallTest {};

TEST_F(ForwardRecallTest, Basic) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, BasicWithDocId) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.include_doc_id_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(doc->doc_id(), i);
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, OutputNoFields) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>{};
  query.topk_ = 200;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(doc->field_names().size(), 0);
  }
}

TEST_F(ForwardRecallTest, DenseVector) {
  VectorQuery query;
  query.output_fields_ = {"id", "dense"};
  query.topk_ = 200;
  query.include_vector_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto dense = doc->get<std::vector<float>>("dense");
    ASSERT_TRUE(dense.has_value());
    EXPECT_EQ(dense.value().size(), 4);
    for (auto v : dense.value()) {
      EXPECT_FLOAT_EQ(v, (float)i);
    }
  }
}

TEST_F(ForwardRecallTest, SparseVector) {
  VectorQuery query;
  query.output_fields_ = {"id", "sparse"};
  query.topk_ = 200;
  query.include_vector_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    // EXPECT_EQ(doc->field_names().size(), 2);
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto sparse =
        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            "sparse");
    if (i % 100 == 0) {
      // set with empty vector
      ASSERT_FALSE(sparse.has_value());
      continue;
    }

    ASSERT_TRUE(sparse.has_value());
    const auto &[indices, values] = sparse.value();
    EXPECT_EQ(indices.size(), i % 100);
    EXPECT_EQ(values.size(), i % 100);
    for (int j = 0; j < i % 100; j++) {
      EXPECT_EQ(indices[j], j);
      EXPECT_FLOAT_EQ(values[j], (float)i);
    }
  }
}

TEST_F(ForwardRecallTest, MultiSegment) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>();
  query.topk_ = 200;
  query.include_vector_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  std::vector<Segment::Ptr> segments = segments_;
  segments.push_back(segments_[0]);
  auto ret = engine->execute(collection_schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto dense = doc->get<std::vector<float>>("dense");
    ASSERT_TRUE(dense.has_value());
    EXPECT_EQ(dense.value().size(), 4);
    for (auto v : dense.value()) {
      EXPECT_FLOAT_EQ(v, (float)i);
    }

    auto sparse =
        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            "sparse");
    if (i % 100 == 0) {
      // set with empty vector
      ASSERT_FALSE(sparse.has_value());
      continue;
    }

    ASSERT_TRUE(sparse.has_value());
    const auto &[indices, values] = sparse.value();
    EXPECT_EQ(indices.size(), i % 100);
    EXPECT_EQ(values.size(), i % 100);
    for (int j = 0; j < i % 100; j++) {
      EXPECT_EQ(indices[j], j);
      EXPECT_FLOAT_EQ(values[j], (float)i);
    }
  }
}

TEST_F(ForwardRecallTest, Eq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "age = 1";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, Gt) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id > 1000";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    auto i = j + 1001;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, Ge) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id >= 1000";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    auto i = j + 1000;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, Lt) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id < 100";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, Le) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id <= 100";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 101);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, And) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id <= 100 and id > 50";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 50);
  for (int j = 0, i = 51; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, Or) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "id < 100 or id > 200";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 200);
  for (int j = 0; j < (int)docs.size(); j++) {
    int i = j < 100 ? j : j + 101;
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, StrEq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "name = 'user_1'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, StrGe) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "name >= 'user_1'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    if (i % 100 == 0) {
      i += 1;
    }
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, StrIn) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "name IN ('user_1', 'user_2')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 1) {
      i += 1;
    } else if (i % 100 == 2) {
      i += 99;
    }
  }
}

TEST_F(ForwardRecallTest, StrNotIn) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "name NOT IN ('user_1', 'user_2')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 0) {
      i += 3;
    } else {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, StrLike) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "name like 'user_9%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 9; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 9) {
      i += 81;
    } else if (i % 100 == 99) {
      i += 10;
    } else {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, IsNull) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "optional_age is null";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, IsNotNull) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "optional_age is not null";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    if (i % 100 == 0) {
      i += 1;
    }
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, IsNullNoResult) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "age is null";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 0);
}

TEST_F(ForwardRecallTest, ContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, NotContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set not contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    // i % 100 == 0 has null category
    while (i % 100 >= 32 || i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, ContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, NotContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set not contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    // i % 100 == 0 has null category
    while (i % 100 >= 98 || i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, BoolContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "bool_array contain_all (true, false)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 3;
  }
}

TEST_F(ForwardRecallTest, BoolContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "bool_array contain_any (true)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    if (i % 3 == 2) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, ContainAllEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set contain_all ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, NotContainAllEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set not contain_all ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 0);
}

TEST_F(ForwardRecallTest, ContainAnyEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set contain_any ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 0);
}

TEST_F(ForwardRecallTest, NotContainAnyEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "category_set not contain_any ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, BoolEqTrue) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "bool = TRuE";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(ForwardRecallTest, BoolEqFalse) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "bool = false";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    if (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(ForwardRecallTest, ArrayLengthEq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "array_length(category_set) = 32";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 100;
  }
}

TEST_F(ForwardRecallTest, ArrayLengthGe) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "array_length(category_set) >= 32";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}


}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/invert_recall_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include <cstdint>
#include <memory>
#include <gtest/gtest.h>
#include "db/sqlengine/sqlengine.h"
#include "zvec/db/schema.h"
#include "recall_base.h"

namespace zvec::sqlengine {

class InvertRecallTest : public RecallTest {};

TEST_F(InvertRecallTest, Eq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_age = 1";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, Gt) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id > 1000";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    auto i = j + 1001;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, Ge) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id >= 1000";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    auto i = j + 1000;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, Lt) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id < 100";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, Le) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id <= 100";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 101);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, And) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id <= 100 and invert_id > 50";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 50);
  for (int j = 0, i = 51; j < (int)docs.size(); j++, i += 1) {
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, Or) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_id < 100 or invert_id > 200";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 200);
  for (int j = 0; j < (int)docs.size(); j++) {
    int i = j < 100 ? j : j + 101;
    auto &doc = docs[j];
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, StrEq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_name = 'user_1'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 1; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, StrGe) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_name >= 'user_1'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    if (i % 100 == 0) {
      i += 1;
    }
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, StrIn) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_name IN ('user_1', 'user_2')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 1) {
      i += 1;
    } else if (i % 100 == 2) {
      i += 99;
    }
  }
}

TEST_F(InvertRecallTest, StrNotIn) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_name NOT IN ('user_1', 'user_2')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 0) {
      i += 3;
    } else {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, StrLike) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_name like 'user\\_9%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 9; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    if (i % 100 == 9) {
      i += 81;
    } else if (i % 100 == 99) {
      i += 10;
    } else {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, ContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, NotContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set not contain_all (";
  for (int i = 1; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    // i % 100 == 0 has null category
    while (i % 100 >= 32 || i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, ContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 98; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 98) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, NotContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set not contain_any (98,99,100)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    // i % 100 == 0 has null category
    while (i % 100 >= 98 || i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, BoolContainAll) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_bool_array contain_all (true, false)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 3;
  }
}

TEST_F(InvertRecallTest, BoolContainAny) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_bool_array contain_any (true)";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    if (i % 3 == 2) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, ContainAllEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set contain_all ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, NotContainAllEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set not contain_all ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 0);
}

TEST_F(InvertRecallTest, ContainAnyEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set contain_any ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  ASSERT_EQ(docs.size(), 0);
}

TEST_F(InvertRecallTest, NotContainAnyEmptySet) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_category_set not contain_any ()";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, IsNull) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_optional_age is null";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, IsNotNull) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_optional_age is not null";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 1) {
    if (i % 100 == 0) {
      i += 1;
    }
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, BoolEqTrue) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_bool = TRuE";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 0; j < (int)docs.size(); j++, i += 100) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
  }
}

TEST_F(InvertRecallTest, BoolEqFalse) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "invert_bool = false";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 1; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    if (i % 100 == 0) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, ArrayLengthGe) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "array_length(invert_category_set) >= 32";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 200);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 1;
    while (i % 100 < 32) {
      i += 1;
    }
  }
}

TEST_F(InvertRecallTest, ArrayLengthEq) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  query.filter_ = "array_length(invert_category_set) = 32";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (int j = 0, i = 32; j < (int)docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    EXPECT_EQ(i, doc->get<uint64_t>("id"));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));

    i += 100;
  }
}

TEST_F(InvertRecallTest, MultiSegment) {
  VectorQuery query;
  query.output_fields_ = std::vector<std::string>();
  query.topk_ = 200;
  query.include_vector_ = true;
  query.filter_ = "invert_id <= 5000";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  std::vector<Segment::Ptr> segments = segments_;
  segments.push_back(segments_[0]);
  auto ret = engine->execute(collection_schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto dense = doc->get<std::vector<float>>("dense");
    ASSERT_TRUE(dense.has_value());
    EXPECT_EQ(dense.value().size(), 4);
    for (auto v : dense.value()) {
      EXPECT_FLOAT_EQ(v, (float)i);
    }

    auto sparse =
        doc->get<std::pair<std::vector<uint32_t>, std::vector<float>>>(
            "sparse");
    if (i % 100 == 0) {
      // set with empty vector
      ASSERT_FALSE(sparse.has_value());
      continue;
    }

    ASSERT_TRUE(sparse.has_value());
    const auto &[indices, values] = sparse.value();
    EXPECT_EQ(indices.size(), i % 100);
    EXPECT_EQ(values.size(), i % 100);
    for (int j = 0; j < i % 100; j++) {
      EXPECT_EQ(indices[j], j);
      EXPECT_FLOAT_EQ(values[j], (float)i);
    }
  }
}

}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/like_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <gtest/gtest.h>
#include "db/common/file_helper.h"
#include "db/index/common/version_manager.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/sqlengine.h"
#include "zvec/db/index_params.h"
#include "zvec/db/schema.h"
#include "zvec/db/type.h"
#include "test_helper.h"

namespace zvec::sqlengine {

static Doc create_doc(const uint64_t doc_id) {
  Doc new_doc;
  new_doc.set_pk("pk_" + std::to_string(doc_id));
  new_doc.set_doc_id(doc_id);

  auto name = std::string("user-");
  if (doc_id >= 5000 && doc_id < 8000) {
    name += "%";
  } else if (doc_id >= 8000) {
    name += '_';
  }
  name += std::to_string(doc_id % 100);
  new_doc.set<std::string>("name", name);
  new_doc.set<std::string>("invert_name", name);
  new_doc.set<std::string>("extended_invert_name", name);
  return new_doc;
}

class LikeTest : public testing::Test {
 protected:
  static void SetUpTestSuite() {
    FileHelper::RemoveDirectory(seg_path_);
    FileHelper::CreateDirectory(seg_path_);

    auto invert_params = std::make_shared<InvertIndexParams>(true);
    collection_schema_ = std::make_shared<CollectionSchema>(
        "test_collection",
        std::vector<FieldSchema::Ptr>{
            std::make_shared<FieldSchema>("name", DataType::STRING, false,
                                          nullptr),
            std::make_shared<FieldSchema>(
                "invert_name", DataType::STRING, false,
                std::make_shared<InvertIndexParams>(false, false)),
            std::make_shared<FieldSchema>(
                "extended_invert_name", DataType::STRING, false,
                std::make_shared<InvertIndexParams>(false, true)),
        });
    auto segment = create_segment(seg_path_, *collection_schema_);
    if (segment == nullptr) {
      LOG_ERROR("create segment failed");
      EXPECT_TRUE(segment != nullptr);
      std::exit(EXIT_FAILURE);
    }
    auto status = InsertDoc(segment, 0, 10000, &create_doc);
    if (!status.ok()) {
      LOG_ERROR("insert doc failed: %s", status.c_str());
      EXPECT_TRUE(status.ok());
      std::exit(EXIT_FAILURE);
    }
    segments_.push_back(segment);
  }

  static void TearDownTestSuite() {
    segments_.clear();
    FileHelper::RemoveDirectory(seg_path_);
  }

 protected:
  static inline std::string seg_path_ = "./test_collection";
  static inline CollectionSchema::Ptr collection_schema_;
  static inline std::vector<Segment::Ptr> segments_;
};


TEST_F(LikeTest, ForwardLikeAll) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "name like '%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
  }
}

TEST_F(LikeTest, InvertLikeAll) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "invert_name like '%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
  }
}

TEST_F(LikeTest, ForwardPrefixLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "name like 'user-22%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, InvertPrefixLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "invert_name like 'user-22%'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ForwardSuffixLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "name like '%ser-22'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, NotExtendedInvertSuffixLikeRunAsForward) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "invert_name like '%ser-22'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ExtendedInvertSuffixLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "extended_invert_name like '%ser-22'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ForwardMiddleLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "name like 'user%2'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    while (doc_id % 100 % 10 != 2) {
      doc_id++;
    }
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ExtendedInvertMiddleLike) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "extended_invert_name like 'user%2'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    while (doc_id % 100 % 10 != 2) {
      doc_id++;
    }
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, UnderScore) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "name like 'user-_2'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    while (doc_id % 100 % 10 != 2 || doc_id % 100 < 10) {
      doc_id++;
    }
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, InvertUnderScoreRunAsForward) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = "invert_name like 'user-_2'";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 0; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    while (doc_id % 100 % 10 != 2 || doc_id % 100 < 10) {
      doc_id++;
    }
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ForwardEscapePercent) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = R"(name like 'user-\%%')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 5000; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, InvertEscapePercent) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = R"(invert_name like 'user-\%%')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 5000; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, ForwardEscapeUnderscore) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = R"(name like 'user-\_%')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 8000; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, InvertEscapeUnderscore) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = R"(invert_name like 'user-\_%')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  ASSERT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0, doc_id = 8000; i < docs.size(); i++, doc_id++) {
    auto doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

TEST_F(LikeTest, NoPercentRunAsEqual) {
  VectorQuery query;
  query.output_fields_ = {"name"};
  query.topk_ = 200;
  query.filter_ = R"(invert_name like 'user-22')";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  EXPECT_TRUE(ret.has_value()) << ret.error();
  auto docs = std::move(ret.value());
  for (size_t i = 0; i < docs.size(); i++) {
    auto doc = docs[i];
    int doc_id = i * 100 + 22;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
  }
}

}  // namespace zvec::sqlengine

================================================
FILE: tests/db/sqlengine/mock_segment.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>
#include <string>
#include <vector>
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <gtest/gtest.h>
#include <zvec/ailego/logger/logger.h>
#include "db/index/column/common/index_results.h"
#include "db/index/column/vector_column/vector_column_indexer.h"
#include "db/index/segment/segment.h"
#include "index/column/inverted_column/inverted_column_indexer.h"
#include "index/column/vector_column/vector_column_params.h"
#include "index/common/index_filter.h"
namespace zvec {


class MockIndexResult : public InvertedSearchResult {
 public:
  MockIndexResult(const std::vector<idx_t> &doc_ids,
                  const std::vector<float> &scores)
      : doc_ids_(doc_ids), scores_(scores) {}

  MockIndexResult(const std::vector<idx_t> &doc_ids,
                  const std::vector<float> &scores,
                  const std::vector<std::string> &groups)
      : doc_ids_(doc_ids), scores_(scores), group_ids_(groups) {}

  size_t count() const override {
    return doc_ids_.size();
  }

  IteratorUPtr create_iterator() override {
    return std::make_unique<MockIterator>(*this);
  }

 private:
  struct MockIterator : public IndexResults::Iterator {
    MockIterator(MockIndexResult &parent) : parent_(parent) {}

    idx_t doc_id() const override {
      return parent_.doc_ids_[current_index_];
    }

    float score() const override {
      return parent_.scores_[current_index_];
    }

    void next() override {
      ++current_index_;
    }

    bool valid() const override {
      return current_index_ < parent_.count();
    }

    const std::string &group_id() const override {
      return parent_.group_ids_[current_index_];
    }

    MockIndexResult &parent_;
    size_t current_index_{0};
  };

  std::vector<idx_t> doc_ids_;
  std::vector<float> scores_;
  std::vector<std::string> group_ids_;
};

class MockVectorIndexer : public CombinedVectorColumnIndexer {
 public:
  //! Search results with query
  Result<IndexResults::Ptr> Search(
      const vector_column_params::VectorData &vector_data,
      const vector_column_params::QueryParams &query_params) override {
    // return tl::make_unexpected(Status::InternalError("err"));
    return std::make_shared<MockIndexResult>(
        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,
                           1.0F},
        std::vector<std::string>{"group_0", "group_1", "group_2", "group_0",
                                 "group_1", "group_2", "group_0", "group_1",
                                 "group_2", "group_0"});
  }

  Result<vector_column_params::VectorDataBuffer> Fetch(
      uint32_t doc_id) const override {
    // float f = doc_id;
    // std::vector<float> v(4, f);
    // std::string v_str = std::string(reinterpret_cast<char *>(v.data()),
    //                                 v.size() * sizeof(float));
    // return vector_column_params::VectorDataBuffer{
    //     vector_column_params::DenseVectorBuffer{v_str}};

    // sparse
    uint32_t count = doc_id % 5;
    std::vector<uint32_t> indices(count);
    std::vector<float> values(count);
    for (uint32_t i = 0; i < count; i++) {
      indices[i] = i;
      values[i] = i / 100.0;
    }
    return vector_column_params::VectorDataBuffer{
        vector_column_params::SparseVectorBuffer{
            std::string(reinterpret_cast<char *>(indices.data()),
                        indices.size() * sizeof(uint32_t)),
            std::string(reinterpret_cast<char *>(values.data()),
                        values.size() * sizeof(float))}};
  }
};

class MockInvertIndexer : public InvertedColumnIndexer {
 public:
  MockInvertIndexer() : InvertedColumnIndexer(ctx) {}

  InvertedSearchResult::Ptr search(const std::string &value,
                                   CompareOp op) const override {
    return std::make_shared<MockIndexResult>(
        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,
                           1.0F});
  }

  InvertedSearchResult::Ptr search_null() const override {
    return std::make_shared<MockIndexResult>(
        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,
                           1.0F});
  }

  InvertedSearchResult::Ptr search_non_null() const override {
    return std::make_shared<MockIndexResult>(
        std::vector<idx_t>{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
        std::vector<float>{0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F, 0.9F,
                           1.0F});
  }

 private:
  RocksdbContext ctx;
};

//   std::make_shared<FieldSchema>("id", DataType::INT32, false, 0, false,
//                                 nullptr),
//   std::make_shared<FieldSchema>("name", DataType::STRING, false, 0,
//                                 false, nullptr),
//   std::make_shared<FieldSchema>("age", DataType::INT64, false, 0,
//                                 false, nullptr),
//   std::make_shared<FieldSchema>("score", DataType::DOUBLE, false, 0,
//                                 false, nullptr),
inline arrow::Result<std::shared_ptr<arrow::Table>> CreateTable(
    int count = 10000000) {
  auto schema = arrow::schema({
      arrow::field("id", arrow::int32()),
      arrow::field("name", arrow::utf8()),
      arrow::field("age", arrow::int64()),
      arrow::field("score", arrow::float64()),
      arrow::field("_zvec_uid_", arrow::utf8()),
      arrow::field("_zvec_row_id_", arrow::uint64()),
      arrow::field("_zvec_g_doc_id_", arrow::uint64()),
      arrow::field("tag_list", arrow::list(arrow::int32())),
  });
  std::shared_ptr<arrow::Array> array_id;
  std::shared_ptr<arrow::Array> array_name;
  std::shared_ptr<arrow::Array> array_age;
  std::shared_ptr<arrow::Array> array_score;
  std::shared_ptr<arrow::Array> array_uid;
  arrow::NumericBuilder<arrow::Int64Type> builder;
  auto has_value = [](int i) { return i % 13 != 0; };
  ARROW_RETURN_NOT_OK(builder.Reserve(count));
  for (int i = 0; i < count; i++) {
    if (has_value(i)) {
      ARROW_RETURN_NOT_OK((builder.Append(i)));
    } else {
      ARROW_RETURN_NOT_OK((builder.AppendNull()));
    }
  }
  ARROW_RETURN_NOT_OK(builder.Finish(&array_age));
  builder.Reset();

  arrow::NumericBuilder<arrow::Int32Type> builder_id;
  ARROW_RETURN_NOT_OK(builder_id.Reserve(count));
  for (int i = 0; i < count; i++) {
    if (has_value(i)) {
      ARROW_RETURN_NOT_OK((builder_id.Append(i)));
    } else {
      ARROW_RETURN_NOT_OK((builder_id.AppendNull()));
    }
  }
  ARROW_RETURN_NOT_OK(builder_id.Finish(&array_id));

  arrow::NumericBuilder<arrow::DoubleType> builder_score;
  ARROW_RETURN_NOT_OK(builder_score.Reserve(count));
  for (int i = 0; i < count; i++) {
    if (has_value(i)) {
      ARROW_RETURN_NOT_OK((builder_score.Append(i / 100.0)));
    } else {
      ARROW_RETURN_NOT_OK((builder_score.AppendNull()));
    }
  }
  ARROW_RETURN_NOT_OK(builder_score.Finish(&array_score));


  arrow::StringBuilder builder_d;
  ARROW_RETURN_NOT_OK(builder_d.Reserve(count));
  for (int i = 0; i < count; i++) {
    if (has_value(i)) {
      ARROW_RETURN_NOT_OK((builder_d.Append("name_" + std::to_string(i))));
    } else {
      ARROW_RETURN_NOT_OK((builder_d.AppendNull()));
    }
  }
  ARROW_RETURN_NOT_OK(builder_d.Finish(&array_name));

  arrow::StringBuilder builder_uid;
  ARROW_RETURN_NOT_OK(builder_uid.Reserve(count));
  for (int i = 0; i < count; i++) {
    ARROW_RETURN_NOT_OK((builder_uid.Append("uid_" + std::to_string(i))));
  }
  ARROW_RETURN_NOT_OK(builder_uid.Finish(&array_uid));

  arrow::NumericBuilder<arrow::UInt64Type> builder_row_id;
  ARROW_RETURN_NOT_OK(builder_row_id.Reserve(count));
  for (int i = 0; i < count; i++) {
    ARROW_RETURN_NOT_OK((builder_row_id.Append(i)));
  }
  std::shared_ptr<arrow::Array> array_row_id;
  ARROW_RETURN_NOT_OK(builder_row_id.Finish(&array_row_id));

  arrow::NumericBuilder<arrow::UInt64Type> builder_doc_id;
  ARROW_RETURN_NOT_OK(builder_doc_id.Reserve(count));
  for (int i = 0; i < count; i++) {
    ARROW_RETURN_NOT_OK((builder_doc_id.Append(i)));
  }
  std::shared_ptr<arrow::Array> array_doc_id;
  ARROW_RETURN_NOT_OK(builder_doc_id.Finish(&array_doc_id));

  arrow::ListBuilder list_builder(arrow::default_memory_pool(),
                                  std::make_shared<arrow::Int32Builder>());
  auto *tag_value_builder =
      static_cast<arrow::Int32Builder *>(list_builder.value_builder());

  for (int i = 0; i < count; ++i) {
    // 开始一个新的 list
    ARROW_RETURN_NOT_OK(list_builder.Append());

    int idx = i % 5;  // 对应模式
    for (int j = 0; j < idx + 1; ++j) {
      ARROW_RETURN_NOT_OK(tag_value_builder->Append(j + 1));
    }
  }
  std::shared_ptr<arrow::Array> tag_list_array;
  auto status = list_builder.Finish(&tag_list_array);
  ;

  return arrow::Table::Make(
      schema, {array_id, array_name, array_age, array_score, array_uid,
               array_row_id, array_doc_id, tag_list_array});
}

class MockIndexFilter : public IndexFilter {
 public:
  bool is_filtered(uint64_t id) const override {
    return id % 2 == 1;
  }
};

inline arrow::Result<std::shared_ptr<Table>> TakeRowsByIndices(
    const std::shared_ptr<Table> &table, const std::vector<int> &row_indices) {
  arrow::MemoryPool *pool = arrow::default_memory_pool();
  arrow::Int32Builder indices_builder(pool);
  ARROW_RETURN_NOT_OK(
      indices_builder.AppendValues(row_indices.data(), row_indices.size()));
  std::shared_ptr<arrow::Array> indices_array;
  ARROW_RETURN_NOT_OK(indices_builder.Finish(&indices_array));


  // 2. 对每一列执行 Take 操作
  std::vector<std::shared_ptr<arrow::ChunkedArray>> new_columns;
  for (const auto &column : table->columns()) {
    // 使用 Take 提取指定索引的元素
    ARROW_ASSIGN_OR_RAISE(auto taken_array, cp::Take(column, indices_array));
    new_columns.emplace_back(taken_array.chunked_array());
  }

  // 3. 构造新的 Table
  return arrow::Table::Make(table->schema(), new_columns, row_indices.size());
}


class MockSegment : public Segment {
 public:
  MockSegment() : Segment() {}

  virtual ~MockSegment() = default;

  SegmentID id() const override {
    return 0;
  }

  TablePtr fetch(const std::vector<std::string> &columns,
                 const std::vector<int> &indices) const override {
    std::string s = "";
    for (auto i : indices) {
      s += std::to_string(i);
      s += ",";
    }
    LOG_INFO("Fetch indices: %s %s", get_column_names(columns).c_str(),
             s.c_str());
    auto table = CreateTable(1000).MoveValueUnsafe();

    auto res = TakeRowsByIndices(table, indices);
    if (!res.ok()) {
      LOG_ERROR("Take error: %s", res.status().ToString().c_str());
      return nullptr;
    }
    LOG_INFO("Take: %s", res.ValueOrDie()->ToString().c_str());
    return res.MoveValueUnsafe();
  }

  ExecBatchPtr fetch(const std::vector<std::string> &columns,
                     int index) const override {
    LOG_ERROR("Not implemented");
    return nullptr;
  }

  static std::string get_column_names(const std::vector<std::string> &columns) {
    std::string s = "";
    for (auto i : columns) {
      s += i;
      s += ",";
    }
    return s;
  }

  RecordBatchReaderPtr scan(
      const std::vector<std::string> &columns) const override {
    auto table = CreateTable(10000);
    LOG_INFO("Scan return: %s %s", get_column_names(columns).c_str(),
             table.ValueOrDie()->ToString().c_str());
    return std::make_shared<arrow::TableBatchReader>(table.ValueOrDie());
  }

  const IndexFilter::Ptr get_filter() override {
    return std::make_shared<MockIndexFilter>();
  }

  CombinedVectorColumnIndexer::Ptr get_quant_combined_vector_indexer(
      const std::string &field_name) const override {
    return std::make_shared<MockVectorIndexer>();
  }

  CombinedVectorColumnIndexer::Ptr get_combined_vector_indexer(
      const std::string &field_name) const override {
    return std::make_shared<MockVectorIndexer>();
  }

  InvertedColumnIndexer::Ptr get_scalar_indexer(
      const std::string &field_name) const override {
    return std::make_shared<MockInvertIndexer>();
  }

  SegmentMeta::Ptr meta() const override {
    return nullptr;
  }

  uint64_t doc_count(const IndexFilter::Ptr filter = nullptr) override {
    return 0;
  }

  Status add_column(FieldSchema::Ptr column_schema,
                    const std::string &expression,
                    const AddColumnOptions &options) override {
    return Status::InternalError();
  }

  Status alter_column(const std::string &column_name,
                      const FieldSchema::Ptr &new_column_schema,
                      const AlterColumnOptions &options) override {
    return Status::InternalError();
  }


  Status drop_column(const std::string &column_name) override {
    return Status::OK();
  }

  Status create_all_vector_index(
      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) override {
    return Status::OK();
  }

  Status create_vector_index(
      const std::string &column, const IndexParams::Ptr &index_params,
      int concurrency, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *quant_vector_indexers) override {
    return Status::OK();
  }

  Status drop_vector_index(
      const std::string &column, SegmentMeta::Ptr *new_segmnet_meta,
      std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          *vector_indexers) override {
    return Status::OK();
  }

  Status reload_vector_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &vector_indexers,
      const std::unordered_map<std::string, VectorColumnIndexer::Ptr>
          &quant_vector_indexers) override {
    return Status::OK();
  }

  bool vector_index_ready(const std::string &column,
                          const IndexParams::Ptr &index_params) const override {
    return true;
  }

  bool all_vector_index_ready() const override {
    return true;
  }

  Status create_scalar_index(
      const std::vector<std::string> &columns,
      const IndexParams::Ptr &index_params, SegmentMeta::Ptr *new_segment_meta,
      InvertedIndexer::Ptr *new_scalar_indexer) override {
    return Status::OK();
  }

  Status drop_scalar_index(const std::vector<std::string> &columns,
                           SegmentMeta::Ptr *new_segment_meta,
                           InvertedIndexer::Ptr *new_scalar_indexer) override {
    return Status::OK();
  }

  Status reload_scalar_index(
      const CollectionSchema &schema, const SegmentMeta::Ptr &segment_meta,
      const InvertedIndexer::Ptr &scalar_indexer) override {
    return Status::OK();
  }

  Status Insert(Doc &doc) override {
    return Status::OK();
  }

  Status Upsert(Doc &doc) override {
    return Status::OK();
  }

  Status Update(Doc &doc) override {
    return Status::OK();
  }

  Status Delete(const std::string &pk) override {
    return Status::OK();
  }

  Status Delete(uint64_t doc_id) override {
    return Status::OK();
  }

  Doc::Ptr Fetch(uint64_t doc_id) override {
    return nullptr;
  }

  std::vector<VectorColumnIndexer::Ptr> get_vector_indexer(
      const std::string &field_name) const override {
    return {};
  }

  std::vector<VectorColumnIndexer::Ptr> get_quant_vector_indexer(
      const std::string &field_name) const override {
    return {};
  }

  Status flush() override {
    return Status::OK();
  }

  Status dump() override {
    return Status::OK();
  }

  Status destroy() override {
    return Status::OK();
  }
};

}  // namespace zvec


================================================
FILE: tests/db/sqlengine/optimizer_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include "db/sqlengine/analyzer/query_info_helper.h"
#include "db/sqlengine/sqlengine_impl.h"
#include "zvec/db/index_params.h"
// #define private public
#include <memory>
#include "db/sqlengine/planner/optimizer.h"
#include "mock_segment.h"
// #undef private


namespace zvec::sqlengine {

class MockInvertCondOptimizer : public InvertCondOptimizer {
 public:
  explicit MockInvertCondOptimizer(CollectionSchema *collection_schema)
      : InvertCondOptimizer(collection_schema) {}

 public:
  bool invert_rule(Segment *segment, QueryRelNode *invert_cond) override;
};

bool MockInvertCondOptimizer::invert_rule(Segment *segment,
                                          QueryRelNode *invert_cond) {
  if (invert_cond->op() == QueryNodeOp::Q_IN) {
    return true;
  }

  std::string invert_value = invert_cond->right()->text();

  std::string numeric_text{""};
  QueryInfoHelper::data_buf_2_text(invert_value, DataType::INT32,
                                   &numeric_text);

  int age = atoi(numeric_text.c_str());
  std::cout << "invert cond: age is " << age << std::endl;

  // invert cond as less than 100
  if (age < 100) {
    return true;
  }

  return false;
}

class OptimizerTest : public testing::Test {
 public:
  // Sets up the test fixture.
  static void SetUpTestSuite() {
    schema = std::make_shared<CollectionSchema>();
    auto &collection_schema_ = *schema;
    collection_schema_.set_name("collection");

    // feature field
    auto column1 = std::make_shared<FieldSchema>();
    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    column1->set_name("face_feature");
    column1->set_index_params(vector_params);
    column1->set_dimension(4);
    column1->set_data_type(DataType::VECTOR_FP32);
    collection_schema_.add_field(column1);

    // invert field
    auto column2 = std::make_shared<FieldSchema>();
    column2->set_name("age");
    column2->set_data_type(DataType::INT32);
    column2->set_index_params(std::make_shared<InvertIndexParams>(false));
    collection_schema_.add_field(column2);
  }

  // Tears down the test fixture.
  static void TearDownTestSuite() {}

 protected:
  inline static CollectionSchema::Ptr schema;
  Profiler::Ptr profiler_{new Profiler};
};


TEST_F(OptimizerTest, Basic) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 200";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);
}

// case 1. invert subroot same as invert cond, do nothing
TEST_F(OptimizerTest, Case1) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 12";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_FALSE(optimized);
}

// case 2.1 invert subroot is not found, all conds are forward cond
TEST_F(OptimizerTest, Case2_1) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 100 and age > 101 or age > 102";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);
}

// case 2.2 invert subroot is not found, some conds are forward cond
// while left invert cond cannot be invert cond any more
TEST_F(OptimizerTest, Case2_2) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 100 or age > 90";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_FALSE(optimized);
}


// case 3.1 subroot is found and be part of invert cond
TEST_F(OptimizerTest, Case3_1) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 100 and age > 101 and age > 10";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);
  ASSERT_TRUE(ret);
}

// case 3.2 subroot is found and be part of invert cond
TEST_F(OptimizerTest, Case3_2) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 10 and age > 11 and age > 100";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);
}

// case 3.3 subroot is found and be part of invert cond
TEST_F(OptimizerTest, Case3_3) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "(age > 10 or age > 11) and age > 100";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);
}

// case 3.4 subroot is found and be part of invert cond, but others also have
// invert
TEST_F(OptimizerTest, Case3_4) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age > 10 and (age > 101 and (age > 10 and age > 10))";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_FALSE(optimized);
}


// case 4, optimize with in expr
TEST_F(OptimizerTest, Case4) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.filter_ = "age in (10, 20)";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr query_info = ret.value();

  Optimizer::Ptr optimizer =
      std::make_shared<MockInvertCondOptimizer>(schema.get());

  auto segment = std::make_shared<MockSegment>();

  bool optimized = optimizer->optimize(segment.get(), query_info.get());
  // in will not optimized
  ASSERT_FALSE(optimized);

  // in and optimizable, optimize optimizable
  query.filter_ = "age in (10, 20) and age > 100";
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  query_info = ret.value();
  optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_TRUE(optimized);

  // in or optimizable, not optimized
  query.filter_ = "age in (10, 20) or age > 100";
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  query_info = ret.value();
  optimized = optimizer->optimize(segment.get(), query_info.get());
  ASSERT_FALSE(optimized);
}

}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/query_info_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <memory>
#include <gmock/gmock-matchers.h>
#include <gtest/gtest.h>
#include "db/sqlengine/sqlengine_impl.h"
#include "zvec/db/doc.h"
#include "zvec/db/schema.h"
#include "profiler.h"


namespace zvec::sqlengine {

class QueryInfoTest : public testing::Test {
 public:
  // Sets up the test fixture.
  static void SetUpTestSuite() {
    schema = std::make_shared<CollectionSchema>();
    auto &param = *schema;
    param.set_name("1collection");

    auto column1 = std::make_shared<FieldSchema>();
    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    column1->set_name("face_feature");
    column1->set_index_params(vector_params);
    column1->set_dimension(4);
    column1->set_data_type(DataType::VECTOR_FP32);
    param.add_field(column1);

    auto column2 = std::make_shared<FieldSchema>();
    column2->set_name("name");
    column2->set_data_type(DataType::UINT32);
    param.add_field(column2);

    auto column3 = std::make_shared<FieldSchema>();
    column3->set_name("category");
    column3->set_data_type(DataType::STRING);
    param.add_field(column3);

    auto column4 = std::make_shared<FieldSchema>();
    column4->set_name("face_feature");
    column4->set_dimension(4);
    column4->set_data_type(DataType::VECTOR_FP32);
    param.add_field(column4);

    auto column5 = std::make_shared<FieldSchema>();
    column5->set_name("1-dash_score_field");
    column5->set_dimension(5);
    column5->set_data_type(DataType::STRING);
    param.add_field(column5);

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("name_array");
      column->set_data_type(DataType::ARRAY_UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("category_array");
      column->set_data_type(DataType::ARRAY_STRING);
      param.add_field(column);
    }
  }

  // Tears down the test fixture.
  static void TearDownTestSuite() {}

 protected:
  Profiler::Ptr profiler_{new Profiler};
  inline static CollectionSchema::Ptr schema;
};


TEST_F(QueryInfoTest, BasicQueryRequest) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value()) << ret.error().c_str();
  QueryInfo::Ptr new_query_info = ret.value();
  auto &query_fields = new_query_info->query_fields();
  EXPECT_EQ(query_fields.size(), 5);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(new_query_info->query_topn(), 11);
  EXPECT_FALSE(new_query_info->filter_cond());
  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());
  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());
  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());
  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());
  EXPECT_EQ(query.query_params_, vector_cond->query_params());
}

TEST_F(QueryInfoTest, QueryRequestWithFilter) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);
  query.filter_ = "name<3 or name=4 or 1-dash_score_field='test'";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();
  auto &query_fields = new_query_info->query_fields();
  EXPECT_EQ(query_fields.size(), 5);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(new_query_info->query_topn(), 11);
  EXPECT_TRUE(new_query_info->filter_cond());
  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());
  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());
  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());
  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());
  EXPECT_EQ(query.query_params_, vector_cond->query_params());

  EXPECT_TRUE(new_query_info->filter_cond());
  // (nullptr) and (xxx)
  auto filter_cond = new_query_info->filter_cond();
  EXPECT_EQ(filter_cond->op_name(), "and");
  EXPECT_FALSE(filter_cond->left());

  // ((name<3) or (name=4)) or (1-dash_score_field=test)
  auto right = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());
  EXPECT_TRUE(right);
  EXPECT_EQ(right->op_name(), "or");

  // 1-dash_score_field=test
  auto number_field_filter =
      std::dynamic_pointer_cast<QueryNode>(right->right());
  ASSERT_TRUE(number_field_filter);
  EXPECT_EQ(number_field_filter->op_name(), "=");
  auto left_key =
      std::dynamic_pointer_cast<QueryIDNode>(number_field_filter->left());
  EXPECT_EQ(left_key->op_name(), "ID");
  EXPECT_EQ(left_key->value(), "1-dash_score_field");
  auto right_const = std::dynamic_pointer_cast<QueryConstantNode>(
      number_field_filter->right());
  ASSERT_TRUE(right_const);
  EXPECT_EQ(right_const->op_name(), "STRING_VALUE");
  EXPECT_EQ(right_const->value(), "test");

  // (name<3) or (name=4)
  auto left = std::dynamic_pointer_cast<QueryNode>(right->left());
  ASSERT_TRUE(left);
  EXPECT_EQ(left->op_name(), "or");
  auto or1 = std::dynamic_pointer_cast<QueryNode>(left->left());
  EXPECT_EQ(or1->op_name(), "<");
  auto id1 = std::dynamic_pointer_cast<QueryIDNode>(or1->left());
  ASSERT_TRUE(id1);
  EXPECT_EQ(id1->op_name(), "ID");
  EXPECT_EQ(id1->value(), "name");
  auto const1 = std::dynamic_pointer_cast<QueryConstantNode>(or1->right());
  ASSERT_TRUE(const1);
  EXPECT_EQ(const1->op_name(), "INT_VALUE");
  EXPECT_EQ(const1->value(), "3");
  auto or2 = std::dynamic_pointer_cast<QueryNode>(left->right());
  EXPECT_EQ(or2->op_name(), "=");
}

TEST_F(QueryInfoTest, QueryRequestWithIncludeVector) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);
  query.include_vector_ = true;

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();
  auto &query_fields = new_query_info->query_fields();
  EXPECT_EQ(query_fields.size(), 6);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(query_fields[5]->field_name(), "face_feature");
  EXPECT_EQ(new_query_info->query_topn(), 11);
  EXPECT_FALSE(new_query_info->filter_cond());
  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());
  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());
  EXPECT_EQ(query.query_sparse_indices_, vector_cond->vector_sparse_indices());
  EXPECT_EQ(query.query_sparse_values_, vector_cond->vector_sparse_values());
  EXPECT_EQ(query.query_params_, vector_cond->query_params());
}

TEST_F(QueryInfoTest, OR_ANCESTOR) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);
  query.filter_ = "name=1 and (name=2 or name=3)";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();
}

TEST_F(QueryInfoTest, QueryRequestWithInFilter) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 10;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);
  query.filter_ =
      "name=3 or name in (1, 2, 3) or category not in (\"a\", \"b\", \"c\")";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();

  auto &query_fields = new_query_info->query_fields();
  EXPECT_EQ(query_fields.size(), 5);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(new_query_info->query_topn(), 10);

  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());
  std::vector<float> data{1.1, 2.2, 3.3, 4.4};
  EXPECT_EQ(query.query_vector_, vector_cond->vector_term());

  EXPECT_TRUE(new_query_info->filter_cond());
  // (nullptr) and (xxx)
  auto filter_cond = new_query_info->filter_cond();
  EXPECT_EQ(filter_cond->op_name(), "and");
  EXPECT_FALSE(filter_cond->left());

  // ((name=3) or (name in (1, 2, 3))) or (category not in ("a", "b", "c"))
  auto right = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());
  EXPECT_TRUE(right);
  EXPECT_EQ(right->op_name(), "or");

  // category in ("a", "b", "c")
  auto category_filter = std::dynamic_pointer_cast<QueryNode>(right->right());
  ASSERT_TRUE(category_filter);
  EXPECT_EQ(category_filter->op_name(), " in ");
  auto left_key =
      std::dynamic_pointer_cast<QueryIDNode>(category_filter->left());
  EXPECT_EQ(left_key->op_name(), "ID");
  EXPECT_EQ(left_key->value(), "category");
  auto right_const =
      std::dynamic_pointer_cast<QueryListNode>(category_filter->right());
  ASSERT_TRUE(right_const);
  EXPECT_EQ(right_const->op_name(), "LIST_VALUE");
  EXPECT_EQ(right_const->text(), "NOT (a, b, c)");

  // (name=3) or (name in (1, 2, 3))
  auto left = std::dynamic_pointer_cast<QueryNode>(right->left());
  ASSERT_TRUE(left);
  EXPECT_EQ(left->op_name(), "or");
  auto or1 = std::dynamic_pointer_cast<QueryNode>(left->left());
  EXPECT_EQ(or1->op_name(), "=");
  auto id1 = std::dynamic_pointer_cast<QueryIDNode>(or1->left());
  ASSERT_TRUE(id1);
  EXPECT_EQ(id1->op_name(), "ID");
  EXPECT_EQ(id1->value(), "name");
  auto const1 = std::dynamic_pointer_cast<QueryConstantNode>(or1->right());
  ASSERT_TRUE(const1);
  EXPECT_EQ(const1->op_name(), "INT_VALUE");
  EXPECT_EQ(const1->value(), "3");

  auto or2 = std::dynamic_pointer_cast<QueryNode>(left->right());
  EXPECT_EQ(or2->op_name(), " in ");
  auto id2 = std::dynamic_pointer_cast<QueryIDNode>(or2->left());
  ASSERT_TRUE(id2);
  EXPECT_EQ(id2->op_name(), "ID");
  EXPECT_EQ(id2->value(), "name");
  auto const2 = std::dynamic_pointer_cast<QueryListNode>(or2->right());
  ASSERT_TRUE(const2);
  EXPECT_EQ(const2->op_name(), "LIST_VALUE");
  EXPECT_EQ(const2->text(), "(1, 2, 3)");
}


TEST_F(QueryInfoTest, QueryRequestWithInFilterWrong) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());

  query.filter_ = ("name in ()");
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());

  query.filter_ = ("name in (\"a\", 2, 3)");
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());

  query.filter_ = ("name in (1.1, 2, 3)");
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());

  query.filter_ = ("category in (1.1, \"b\")");
  ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
}

TEST_F(QueryInfoTest, QueryRequestWithInFilterNum1024) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 10;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);

  std::string filter_str;
  for (int i = 0; i < 1024; i++) {
    if (i != 0) {
      filter_str += " or ";
    }
    filter_str += "name=" + std::to_string(i);
  }
  query.filter_ = filter_str;

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();

  auto &query_fields = new_query_info->query_fields();
  EXPECT_EQ(query_fields.size(), 5);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(new_query_info->query_topn(), 10);

  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());
}


TEST_F(QueryInfoTest, QueryRequestWithFilter_contain) {
  VectorQuery query;
  query.output_fields_ = {"*"};
  query.topk_ = 10;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "face_feature";
  query.include_vector_ = false;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);
  query.filter_ =
      R"( name_array contain_all (1, 2, 3) and )"
      R"( (name_array not contain_all (4, 5) or category_array contain_any
      ("a", "b")) )"
      R"( or category_array not contain_any ("c", "d", "e")
      )";

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_TRUE(ret.has_value());
  QueryInfo::Ptr new_query_info = ret.value();
  auto &query_fields = new_query_info->query_fields();
  // pre-defined schema field
  EXPECT_EQ(query_fields.size(), 5);
  EXPECT_EQ(query_fields[0]->field_name(), "name");
  EXPECT_EQ(query_fields[1]->field_name(), "category");
  EXPECT_EQ(query_fields[2]->field_name(), "1-dash_score_field");
  EXPECT_EQ(query_fields[3]->field_name(), "name_array");
  EXPECT_EQ(query_fields[4]->field_name(), "category_array");
  EXPECT_EQ(new_query_info->query_topn(), 10);

  EXPECT_FALSE(new_query_info->invert_cond());
  EXPECT_FALSE(new_query_info->post_filter_cond());
  EXPECT_FALSE(new_query_info->post_invert_cond());

  ASSERT_TRUE(new_query_info->vector_cond_info());
  auto vector_cond = new_query_info->vector_cond_info();
  EXPECT_EQ(1, vector_cond->batch());
  EXPECT_EQ("face_feature", vector_cond->vector_field_name());

  EXPECT_TRUE(new_query_info->filter_cond());
  /*
                     _________________[and]__________________
                   /                                         \
      [nullptr(vector_cond)]                            [filter condition]
  */
  // (nullptr) and (xxx)
  auto filter_cond = new_query_info->filter_cond();
  EXPECT_EQ(filter_cond->op_name(), "and");
  EXPECT_FALSE(filter_cond->left());

  /*
                                _______________[or]_______________
                               /                                   \
                _____________[and]_____________  [category_array not
                contain_any
 ("c", "d", "e")]
               /                               \
 [name_array contain_all (1, 2, 3)]  ___________[or]______________
                                  /                              \
                   [name_array not contain_all (4, 5)]    [category_array
 contain_any ("a", "b")]
  */
  // name_array contain_all (1, 2, 3) and
  // (name_array not contain_all (4, 5) or category_array contain_any ("a",
  // "b")) or category_array not contain_any ("c", "d", "e")
  auto parent_node = std::dynamic_pointer_cast<QueryNode>(filter_cond);
  auto cur_node = std::dynamic_pointer_cast<QueryNode>(filter_cond->right());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), "or");


  // category_array not contain_any ("c", "d", "e")
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), " contain_any ");
  {
    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());
    EXPECT_EQ(left_key->op_name(), "ID");
    EXPECT_EQ(left_key->value(), "category_array");
    auto right_const =
        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());
    ASSERT_TRUE(right_const);
    EXPECT_EQ(right_const->op_name(), "LIST_VALUE");
    EXPECT_EQ(right_const->text(), "NOT (c, d, e)");
  }
  cur_node = parent_node;

  //  name_array contain_all (1, 2, 3) and
  // (name_array not contain_all (4, 5) or category_array contain_any ("a",
  // "b"))
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), "and");

  // the left side of 'and'
  // name_array contain_all (1, 2, 3)
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), " contain_all ");
  {
    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());
    EXPECT_EQ(left_key->op_name(), "ID");
    EXPECT_EQ(left_key->value(), "name_array");
    auto right_const =
        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());
    ASSERT_TRUE(right_const);
    EXPECT_EQ(right_const->op_name(), "LIST_VALUE");
    EXPECT_EQ(right_const->text(), "(1, 2, 3)");
  }
  cur_node = parent_node;

  // the right side of 'and'
  // (name_array not contain_all (4, 5) or category_array contain_any ("a",
  // "b"))
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), "or");

  // name_array not contain_all (4, 5)
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->left());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), " contain_all ");
  {
    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());
    EXPECT_EQ(left_key->op_name(), "ID");
    EXPECT_EQ(left_key->value(), "name_array");
    auto right_const =
        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());
    ASSERT_TRUE(right_const);
    EXPECT_EQ(right_const->op_name(), "LIST_VALUE");
    EXPECT_EQ(right_const->text(), "NOT (4, 5)");
  }
  cur_node = parent_node;

  // category_array contain_any ("a", "b"))
  parent_node = std::dynamic_pointer_cast<QueryNode>(cur_node);
  cur_node = std::dynamic_pointer_cast<QueryNode>(cur_node->right());
  EXPECT_TRUE(cur_node);
  EXPECT_EQ(cur_node->op_name(), " contain_any ");
  {
    auto left_key = std::dynamic_pointer_cast<QueryIDNode>(cur_node->left());
    EXPECT_EQ(left_key->op_name(), "ID");
    EXPECT_EQ(left_key->value(), "category_array");
    auto right_const =
        std::dynamic_pointer_cast<QueryListNode>(cur_node->right());
    ASSERT_TRUE(right_const);
    EXPECT_EQ(right_const->op_name(), "LIST_VALUE");
    EXPECT_EQ(right_const->text(), "(a, b)");
  }
  cur_node = parent_node;
}

TEST_F(QueryInfoTest, SelectNonExistField) {
  VectorQuery query;
  query.output_fields_ = {"category_array", "not_exist_field"};
  query.topk_ = 11;
  query.include_vector_ = false;

  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(),
              testing::HasSubstr("not defined in schema"));
}

TEST_F(QueryInfoTest, ContainAllExceedLimit) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "name_array not contain_all (";
  for (int i = 0; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(),
              testing::HasSubstr(
                  "Contain_* rel expr only support list size no more than 32"));
}

TEST_F(QueryInfoTest, ContainAnyExceedLimit) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "name_array not contain_any (";
  for (int i = 0; i <= 32; i++) {
    query.filter_ += std::to_string(i);
    if (i < 32) {
      query.filter_ += ", ";
    }
  }
  query.filter_ += ")";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(),
              testing::HasSubstr(
                  "Contain_* rel expr only support list size no more than 32"));
}

TEST_F(QueryInfoTest, ArrayLengthNonExistField) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "array_length(not_exist_field) > 1";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(),
              testing::HasSubstr("array_length argument not found in schema"));
}

TEST_F(QueryInfoTest, ArrayLengthOnNonArrayField) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "array_length(name) > 1";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(),
              testing::HasSubstr("array_length only support array"));
}

TEST_F(QueryInfoTest, ArrayLengthInvalidArgument) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "array_length(name_array) > '1'";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(
      ret.error().message(),
      testing::HasSubstr("array_length right side only support integer"));
}

TEST_F(QueryInfoTest, ArrayLengthInvalidOp) {
  VectorQuery query;
  query.topk_ = 200;
  query.filter_ = "array_length(name_array) like '%'";
  auto engine = std::make_shared<SQLEngineImpl>(std::make_shared<Profiler>());
  auto ret = engine->parse_request(schema, query, nullptr);
  ASSERT_FALSE(ret.has_value());
  EXPECT_THAT(ret.error().message(), testing::HasSubstr("syntax error"));
}

}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/recall_base.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <gtest/gtest.h>
#include "db/common/file_helper.h"
#include "db/index/common/version_manager.h"
#include "db/index/segment/segment.h"
#include "zvec/db/index_params.h"
#include "zvec/db/schema.h"
#include "zvec/db/type.h"

namespace zvec {

inline CollectionSchema::Ptr GetCollectionSchema() {
  auto invert_params = std::make_shared<InvertIndexParams>(true);
  auto collection_schema = std::make_shared<CollectionSchema>(
      "test_collection",
      std::vector<FieldSchema::Ptr>{
          std::make_shared<FieldSchema>("id", DataType::UINT64, false, nullptr),
          std::make_shared<FieldSchema>("invert_id", DataType::UINT64, false,
                                        invert_params),

          std::make_shared<FieldSchema>("bool", DataType::BOOL, false, nullptr),
          std::make_shared<FieldSchema>("invert_bool", DataType::BOOL, false,
                                        invert_params),

          std::make_shared<FieldSchema>("bool_array", DataType::ARRAY_BOOL,
                                        false, nullptr),
          std::make_shared<FieldSchema>(
              "invert_bool_array", DataType::ARRAY_BOOL, false, invert_params),

          std::make_shared<FieldSchema>("name", DataType::STRING, false,
                                        nullptr),
          std::make_shared<FieldSchema>("invert_name", DataType::STRING, false,
                                        invert_params),

          std::make_shared<FieldSchema>("age", DataType::INT32, false, nullptr),
          std::make_shared<FieldSchema>(
              "invert_age", DataType::INT32, false,
              std::make_shared<InvertIndexParams>(true)),

          std::make_shared<FieldSchema>("score", DataType::DOUBLE, false,
                                        nullptr),

          std::make_shared<FieldSchema>("optional_age", DataType::UINT32, true,
                                        nullptr),
          std::make_shared<FieldSchema>("invert_optional_age", DataType::UINT32,
                                        true, invert_params),

          std::make_shared<FieldSchema>("category_set", DataType::ARRAY_INT32,
                                        true, nullptr),
          std::make_shared<FieldSchema>("invert_category_set",
                                        DataType::ARRAY_INT32, true,
                                        invert_params),

          // add vector field
          std::make_shared<FieldSchema>(
              "dense", DataType::VECTOR_FP32, 4, false,
              std::make_shared<FlatIndexParams>(MetricType::L2)),

          // add sparse vector
          std::make_shared<FieldSchema>(
              "sparse", DataType::SPARSE_VECTOR_FP32, 0, false,
              std::make_shared<FlatIndexParams>(MetricType::IP)),
      });

  return collection_schema;
}

inline Doc CreateDoc(const uint64_t doc_id) {
  Doc new_doc;
  new_doc.set_pk("pk_" + std::to_string(doc_id));
  new_doc.set_doc_id(doc_id);

  new_doc.set<uint64_t>("id", doc_id);
  new_doc.set<uint64_t>("invert_id", doc_id);
  new_doc.set<bool>("bool", doc_id % 100 == 0);
  new_doc.set<bool>("invert_bool", doc_id % 100 == 0);
  new_doc.set<int32_t>("age", doc_id % 100);
  new_doc.set<int32_t>("invert_age", doc_id % 100);
  if (uint32_t v = doc_id % 100; v) {
    new_doc.set("optional_age", v);
    new_doc.set("invert_optional_age", v);
  }
  auto name = "user_" + std::to_string(doc_id % 100);
  new_doc.set<std::string>("name", name);
  new_doc.set<std::string>("invert_name", name);
  new_doc.set<double>("score", static_cast<double>(rand() % 1000) / 10.0);

  // vector
  std::vector<float> vv;
  for (uint32_t i = 0; i < 4; i++) {
    vv.push_back(static_cast<float>(doc_id));
  }
  new_doc.set<std::vector<float>>("dense", vv);

  // sparse vector
  {
    std::vector<uint32_t> indices;
    std::vector<float> values;
    for (uint32_t i = 0; i < doc_id % 100; i++) {
      indices.push_back(i);
      values.push_back(static_cast<float>(doc_id));
    }
    new_doc.set<std::pair<std::vector<uint32_t>, std::vector<float>>>(
        "sparse", std::make_pair(indices, values));
  }

  auto category_size = doc_id % 100;
  if (category_size > 0) {
    std::vector<int32_t> category;
    for (uint32_t i = 1; i <= category_size; i++) {
      category.push_back(i);
    }
    new_doc.set("category_set", category);
    new_doc.set("invert_category_set", category);
  }

  if (doc_id % 3 == 0) {
    new_doc.set<std::vector<bool>>("bool_array", {true, false, true});
    new_doc.set<std::vector<bool>>("invert_bool_array", {true, false, true});
  } else if (doc_id % 3 == 1) {
    new_doc.set<std::vector<bool>>("bool_array", {true, true, true});
    new_doc.set<std::vector<bool>>("invert_bool_array", {true, true, true});
  } else {
    new_doc.set<std::vector<bool>>("bool_array", {false, false, false});
    new_doc.set<std::vector<bool>>("invert_bool_array", {false, false, false});
  }

  return new_doc;
}

inline Status InsertDoc(const Segment::Ptr &segment,
                        const uint64_t start_doc_id,
                        const uint64_t end_doc_id) {
  srand(time(NULL));
  long long create_total = 0;
  long long insert_total = 0;
  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {
    if (segment) {
      auto start = std::chrono::system_clock::now();
      Doc new_doc = CreateDoc(doc_id);
      auto end = std::chrono::system_clock::now();
      auto create_cost =
          std::chrono::duration_cast<std::chrono::microseconds>(end - start)
              .count();
      create_total += create_cost;

      start = std::chrono::system_clock::now();
      auto status = segment->Insert(new_doc);
      if (!status.ok()) {
        return status;
      }
      end = std::chrono::system_clock::now();
      auto insert_cost =
          std::chrono::duration_cast<std::chrono::microseconds>(end - start)
              .count();
      insert_total += insert_cost;
    }
  }
  std::cout << "pure create cost " << create_total << "us" << std::endl;
  std::cout << "pure insert cost " << insert_total << "us" << std::endl;
  return Status::OK();
}

class RecallTest : public testing::Test {
 protected:
  static void SetUpTestSuite() {
    FileHelper::RemoveDirectory(seg_path_);
    FileHelper::CreateDirectory(seg_path_);

    collection_schema_ = GetCollectionSchema();
    auto segment = create_segment();
    if (segment == nullptr) {
      LOG_ERROR("create segment failed");
      EXPECT_TRUE(segment != nullptr);
      std::exit(EXIT_FAILURE);
    }
    auto status = InsertDoc(segment, 0, 10000);
    if (!status.ok()) {
      LOG_ERROR("insert doc failed: %s", status.c_str());
      EXPECT_TRUE(status.ok());
      std::exit(EXIT_FAILURE);
    }
    segments_.push_back(segment);
  }

  static void TearDownTestSuite() {
    segments_.clear();
    FileHelper::RemoveDirectory(seg_path_);
  }

 public:
  static std::string GetPath() {
    return seg_path_;
  }

  static Segment::Ptr create_segment();

 protected:
  static inline std::string seg_path_ = "./test_collection";
  static inline CollectionSchema::Ptr collection_schema_;
  static inline std::vector<Segment::Ptr> segments_;
};

inline Segment::Ptr RecallTest::create_segment() {
  auto seg_path = GetPath();
  auto segment_meta = std::make_shared<SegmentMeta>();
  segment_meta->set_id(0);

  auto id_map = IDMap::CreateAndOpen("test_collection", GetPath() + "/id_map",
                                     true, false);
  auto delete_store = std::make_shared<DeleteStore>("test_collection");

  Version v1;
  v1.set_schema(*collection_schema_);
  std::string v_path = GetPath() + "/test_manifest";
  FileHelper::CreateDirectory(v_path);
  auto vm = VersionManager::Create(v_path, v1);
  if (!vm.has_value()) {
    LOG_ERROR("create version manager failed: %s", vm.error().c_str());
    return nullptr;
  }

  BlockMeta mem_block;
  mem_block.id_ = 0;
  mem_block.type_ = BlockType::SCALAR;
  mem_block.min_doc_id_ = 0;
  mem_block.max_doc_id_ = 0;
  mem_block.doc_count_ = 0;
  segment_meta->set_writing_forward_block(mem_block);

  SegmentOptions options;
  options.read_only_ = false;
  options.enable_mmap_ = true;
  options.max_buffer_size_ = 256 * 1024;

  auto result =
      Segment::CreateAndOpen(GetPath(), *collection_schema_, 0, 0, id_map,
                             delete_store, vm.value(), options);

  if (!result) {
    LOG_ERROR("create segment failed: %s", result.error().c_str());
    return nullptr;
  }
  auto segment = result.value();
  return segment;
}

}  // namespace zvec


================================================
FILE: tests/db/sqlengine/simple_rewriter_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "sqlengine/analyzer/simple_rewriter.h"
#include <gtest/gtest.h>
#include "db/sqlengine/analyzer/query_info.h"
#include "db/sqlengine/sqlengine_impl.h"
#include "zvec/db/doc.h"
#include "zvec/db/schema.h"

namespace zvec::sqlengine {

class SimpleRewriterTest : public testing::Test {
 public:
  // Sets up the test fixture.
  static void SetUpTestSuite() {
    schema = std::make_shared<CollectionSchema>();
    auto &param = *schema;
    param.set_name("1collection");

    auto column1 = std::make_shared<FieldSchema>();
    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    column1->set_name("face_feature");
    column1->set_index_params(vector_params);
    column1->set_dimension(4);
    column1->set_data_type(DataType::VECTOR_FP32);
    param.add_field(column1);

    auto column2 = std::make_shared<FieldSchema>();
    column2->set_name("age");
    column2->set_data_type(DataType::UINT32);
    param.add_field(column2);

    auto column_gender = std::make_shared<FieldSchema>();
    column_gender->set_name("gender");
    column_gender->set_data_type(DataType::UINT32);
    param.add_field(column_gender);

    auto column3 = std::make_shared<FieldSchema>();
    column3->set_name("category");
    column3->set_data_type(DataType::STRING);
    param.add_field(column3);

    auto column4 = std::make_shared<FieldSchema>();
    column4->set_name("face_feature");
    column4->set_dimension(4);
    column4->set_data_type(DataType::VECTOR_FP32);
    param.add_field(column4);

    auto column5 = std::make_shared<FieldSchema>();
    column5->set_name("filename");
    column5->set_dimension(5);
    column5->set_data_type(DataType::STRING);
    param.add_field(column5);

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("loc");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("fid");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("agent_id");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("state");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("categoryId");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("passed_days");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("category_in");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("category_out");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("intAttr");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("intAttr");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("strAttr");
      column->set_data_type(DataType::STRING);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("partitionName");
      column->set_data_type(DataType::STRING);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("doc_id");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("a");
      column->set_data_type(DataType::UINT32);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("is_type1");
      column->set_data_type(DataType::BOOL);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("is_type2");
      column->set_data_type(DataType::BOOL);
      param.add_field(column);
    }

    {
      auto column = std::make_shared<FieldSchema>();
      column->set_name("category_array");
      column->set_data_type(DataType::ARRAY_STRING);
      param.add_field(column);
    }
  }

  // Tears down the test fixture.
  static void TearDownTestSuite() {}

  QueryInfo::Ptr parse(const std::string &filter) {
    VectorQuery query;
    query.output_fields_ = {"*"};
    query.topk_ = 11;
    query.include_vector_ = false;
    query.filter_ = filter;

    auto engine = std::make_shared<SQLEngineImpl>(profiler_);
    auto ret = engine->parse_request(schema, query, nullptr);

    // ASSERT_TRUE(ret.has_value());
    QueryInfo::Ptr new_query_info = ret.value();
    return new_query_info;
  }


 protected:
  Profiler::Ptr profiler_{new Profiler};
  inline static CollectionSchema::Ptr schema;
};

class EqOrRewriteTest : public SimpleRewriterTest {};

TEST_F(EqOrRewriteTest, SimpleEqOr) {
  auto info = parse("age = 10 or age = 20 ");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(), "age in (10, 20)(FORWARD)");
}

TEST_F(EqOrRewriteTest, SimpleManyEqOr) {
  auto info = parse(
      "age = 1 or age = 2 or age = 3 or age = 4 "
      "or age = 5 or age = 6 or age = 7 or age = 8 or age = 9 or age = 10 or "
      "age = 11 or age = 12 or age = 13 or age = 14 or age = 15 or age = 16 or "
      "age = 17 or age = 18 or age = 19 or age = 20 or age = 21 or age = 22 or "
      "age = 23 or age = 24 or age = 25 or age = 26 or age = 27 or age = 28 or "
      "age = 29 or age = 30 or age = 31 or age = 32 or age = 33 or age = 34 or "
      "age = 35 or age = 36 or age = 37 or age = 38 or age = 39 or age = 40 or "
      "age = 41 or age = 42 or age = 43 or age = 44 or age = 45 or age = 46 or "
      "age = 47 or age = 48 or age = 49 or age = 50 or age = 51 or age = 52 or "
      "age = 53 or age = 54 or age = 55 or age = 56 or age = 57 or age = 58 or "
      "age = 59 or age = 60 or age = 61 or age = 62 or age = 63 or age = 64 or "
      "age = 65 or age = 66 or age = 67 or age = 68 or age = 69 or age = 70 or "
      "age = 71 or age = 72 or age = 73 or age = 74 or age = 75 or age = 76 or "
      "age = 77 or age = 78 or age = 79 or age = 80 or age = 81 or age = 82 or "
      "age = 83 or age = 84 or age = 85 or age = 86 or age = 87 or age = 88 or "
      "age = 89 or age = 90 or age = 91 or age = 92 or age = 93 or age = 94 or "
      "age = 95 or age = 96 or age = 97 or age = 98 or age = 99 or age = 100");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(
      info->filter_cond()->text(),
      "age in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, "
      "19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, "
      "37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, "
      "55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, "
      "73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, "
      "91, 92, 93, 94, 95, 96, 97, 98, 99, 100)(FORWARD)");
}

TEST_F(EqOrRewriteTest, SimpleManyEqOrParas) {
  auto info = parse(
      "age = 1 or age = 2 or age = 3 or age = 4 "
      "or age = 5 or age = 6 or (age = 7 or age = 8 or age = 9 or age = 10 or "
      "age = 11 or age = 12 or age = 13) or age = 14 or age = 15 or age = 16 "
      "or "
      "age = 17 or age = 18 or age = 19 or age = 20 or age = 21 or age = 22 or "
      "age = 23 or age = 24 or age = 25 or age = 26 or age = 27 or age = 28 or "
      "age = 29 or age = 30 or age = 31 or age = 32 or age = 33 or age = 34 or "
      "age = 35 or age = 36 or age = 37 or (age = 38 or age = 39 or age = 40 "
      "or "
      "age = 41 or age = 42 or age = 43 or age = 44 or age = 45 or age = 46 or "
      "age = 47 or age = 48 or age = 49 or age = 50 or age = 51 or age = 52 or "
      "age = 53 or age = 54 or age = 55 or age = 56 or age = 57 or age = 58 or "
      "age = 59 or age = 60 or age = 61 or age = 62 or age = 63 or age = 64 or "
      "age = 65 or age = 66 or age = 67 or age = 68 or age = 69 or age = 70 or "
      "age = 71 or age = 72 or age = 73 or age = 74 or age = 75 or age = 76 or "
      "age = 77 or age = 78 or age = 79 or age = 80 or age = 81 or age = 82 or "
      "age = 83 or age = 84 or age = 85) or age = 86 or age = 87 or age = 88 "
      "or "
      "age = 89 or age = 90 or age = 91 or age = 92 or age = 93 or age = 94 or "
      "age = 95 or age = 96 or age = 97 or (age = 98 or age = 99) or age = "
      "100");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(
      info->filter_cond()->text(),
      "age in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, "
      "19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, "
      "37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, "
      "55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, "
      "73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, "
      "91, 92, 93, 94, 95, 96, 97, 98, 99, 100)(FORWARD)");
}

TEST_F(EqOrRewriteTest, SimpleNeOr) {
  auto info = parse("age != 10 or age != 20 ");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(), "age in NOT (10, 20)(FORWARD)");
}

TEST_F(EqOrRewriteTest, SimpleManyNeOr) {
  auto info = parse(
      "age != 1 or age != 2 or age != 3 or age "
      "!= 4 or age != 5 or age != 6 or age != 7 or age != 8 or age != 9 or age "
      "!= 10 or age != 11 or age != 12 or age != 13 or age != 14 or age != 15 "
      "or age != 16 or age != 17 or age != 18 or age != 19 or age != 20");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "age in NOT (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, "
            "16, 17, 18, "
            "19, 20)(FORWARD)");
}

TEST_F(EqOrRewriteTest, EqAndNe) {
  auto info = parse(
      "age != 10 or age != 20 or age = 30 or "
      "age = 40");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(age in NOT (10, 20)(FORWARD)(OR_A)) or (age in (30, "
            "40)(FORWARD)(OR_A))");
}

TEST_F(EqOrRewriteTest, PreEqOr) {
  {
    auto info = parse(
        "gender =1 or age = 10 or age = 20 or "
        "age = 30 or age = 40");
    ASSERT_NE(info, nullptr);
    EXPECT_EQ(info->filter_cond()->text(),
              "(gender=1(FORWARD)(OR_A)) or (age in (10, 20, 30, "
              "40)(FORWARD)(OR_A))");
  }
  {
    auto info = parse(
        "gender =1 and age = 10 or age = 20 or "
        "age = 30 or age = 40");
    ASSERT_NE(info, nullptr);
    EXPECT_EQ(info->filter_cond()->text(),
              "((gender=1(FORWARD)(OR_A)) and (age=10(FORWARD)(OR_A))) or (age "
              "in (20, 30, 40)(FORWARD)(OR_A))");
  }
}

TEST_F(EqOrRewriteTest, PostEqOr) {
  {
    auto info = parse(
        "age = 10 or age = 20 or "
        "age = 30 or age = 40 or gender = 1");
    ASSERT_NE(info, nullptr);
    EXPECT_EQ(info->filter_cond()->text(),
              "(age in (10, 20, 30, 40)(FORWARD)(OR_A)) or "
              "(gender=1(FORWARD)(OR_A))");
  }
  {
    auto info = parse(
        "age = 10 or age = 20 or "
        "age = 30 or age = 40 and gender = 1");
    ASSERT_NE(info, nullptr);
    EXPECT_EQ(info->filter_cond()->text(),
              "(age in (10, 20, 30)(FORWARD)(OR_A)) or "
              "((age=40(FORWARD)(OR_A)) and (gender=1(FORWARD)(OR_A)))");
  }
}

TEST_F(EqOrRewriteTest, PreEqAnd) {
  auto info = parse(
      "gender =1 and (age = 10 or age = 20 or "
      "age = 30 or age = 40)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(gender=1(FORWARD)) and (age in (10, 20, 30, 40)(FORWARD))");
}

TEST_F(EqOrRewriteTest, PostEqAnd) {
  auto info = parse(
      "(age = 10 or age = 20 or "
      "age = 30 or age = 40) and gender=1");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(age in (10, 20, 30, 40)(FORWARD)) and (gender=1(FORWARD))");
}

TEST_F(EqOrRewriteTest, PrePostEqAnd) {
  auto info = parse(
      "gender =1 and (age = 10 or age = 20 or "
      "age = 30 or age = 40) and loc != 3");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "((gender=1(FORWARD)) and (age in (10, 20, 30, 40)(FORWARD))) and "
            "(loc!=3(FORWARD))");
}

TEST_F(EqOrRewriteTest, UserCases1) {
  auto info = parse(
      "(agent_id=20) and state=1 and (fid=107 "
      "or fid=174 or fid=593 or fid=602 or fid=592 or fid=134 or fid=135 or "
      "fid=136 or fid=137 or fid=138 or fid=139 or fid=141 or fid=267 or "
      "fid=271 or fid=176 or fid=177 or fid=178 or fid=179 or fid=180 or "
      "fid=182 or fid=183 or fid=184 or fid=270 or fid=479 or fid=488 or "
      "fid=502 or fid=508 or fid=522 or fid=553 or fid=554 or fid=557 or "
      "fid=561 or fid=567 or fid=570 or fid=588 or fid=594 or fid=595 or "
      "fid=596 or fid=597 or fid=598 or fid=603 or fid=604 or fid=605 or "
      "fid=606 or fid=426 or fid=427 or fid=428 or fid=429 or fid=430 or "
      "fid=431 or fid=432 or fid=433 or fid=434 or fid=435 or fid=436 or "
      "fid=437 or fid=438 or fid=439 or fid=440 or fid=441 or fid=442 or "
      "fid=443 or fid=444 or fid=445 or fid=446 or fid=447 or fid=448 or "
      "fid=215 or fid=216 or fid=217 or fid=469 or fid=473 or fid=475 or "
      "fid=476 or fid=477 or fid=478 or fid=524 or fid=528 or fid=529 or "
      "fid=532 or fid=533 or fid=534 or fid=542 or fid=543 or fid=560 or "
      "fid=243 or fid=244 or fid=245 or fid=246 or fid=247 or fid=496 or "
      "fid=497 or fid=506 or fid=248 or fid=249 or fid=250 or fid=251 or "
      "fid=252 or fid=494 or fid=495 or fid=507 or fid=535 or fid=536 or "
      "fid=586 or fid=589 or fid=259 or fid=260 or fid=261 or fid=262 or "
      "fid=263 or fid=264 or fid=265 or fid=491 or fid=492 or fid=493 or "
      "fid=530 or fid=531 or fid=227 or fid=228 or fid=229 or fid=230 or "
      "fid=231 or fid=232 or fid=233 or fid=235 or fid=472 or fid=487 or "
      "fid=537 or fid=559 or fid=236 or fid=237 or fid=238 or fid=239 or "
      "fid=240 or fid=241 or fid=242 or fid=273 or fid=546 or fid=587 or "
      "fid=454 or fid=455 or fid=456 or fid=457 or fid=458 or fid=459 or "
      "fid=460 or fid=461 or fid=449 or fid=450 or fid=451 or fid=452 or "
      "fid=453 or fid=480 or fid=481 or fid=482 or fid=483 or fid=484 or "
      "fid=489 or fid=490 or fid=538 or fid=539 or fid=540 or fid=545 or "
      "fid=503 or fid=504 or fid=547 or fid=548 or fid=549 or fid=550 or "
      "fid=509 or fid=510 or fid=511 or fid=512 or fid=513 or fid=523 or "
      "fid=558 or fid=555 or fid=556 or fid=600 or fid=601 or fid=562 or "
      "fid=563 or fid=564 or fid=565 or fid=566 or fid=591 or fid=568 or "
      "fid=569 or fid=590 or fid=571 or fid=572 or fid=573 or fid=574 or "
      "fid=575 or fid=701 or fid=711 or fid=713 or fid=616 or fid=617 or "
      "fid=618 or fid=619 or fid=620 or fid=621 or fid=622 or fid=623 or "
      "fid=624 or fid=625 or fid=626 or fid=629 or fid=672 or fid=607 or "
      "fid=700 or fid=635 or fid=612 or fid=613 or fid=614 or fid=615 or "
      "fid=679 or fid=670 or fid=680 or fid=681 or fid=702 or fid=706 or "
      "fid=714 or fid=675 or fid=676 or fid=640 or fid=643 or fid=649 or "
      "fid=653 or fid=655 or fid=657 or fid=662 or fid=703 or fid=704 or "
      "fid=705 or fid=707 or fid=641 or fid=642 or fid=644 or fid=645 or "
      "fid=646 or fid=647 or fid=648 or fid=709 or fid=650 or fid=651 or "
      "fid=652 or fid=710 or fid=654 or fid=656 or fid=658 or fid=659 or "
      "fid=660 or fid=661 or fid=663 or fid=664 or fid=665 or fid=666 or "
      "fid=667 or fid=668 or fid=669 or fid=678)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(
      info->filter_cond()->text(),
      "((agent_id=20(FORWARD)) and (state=1(FORWARD))) and (fid in (107, 174, "
      "593, 602, 592, 134, 135, 136, 137, 138, 139, 141, 267, 271, 176, 177, "
      "178, 179, 180, 182, 183, 184, 270, 479, 488, 502, 508, 522, 553, 554, "
      "557, 561, 567, 570, 588, 594, 595, 596, 597, 598, 603, 604, 605, 606, "
      "426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, "
      "440, 441, 442, 443, 444, 445, 446, 447, 448, 215, 216, 217, 469, 473, "
      "475, 476, 477, 478, 524, 528, 529, 532, 533, 534, 542, 543, 560, 243, "
      "244, 245, 246, 247, 496, 497, 506, 248, 249, 250, 251, 252, 494, 495, "
      "507, 535, 536, 586, 589, 259, 260, 261, 262, 263, 264, 265, 491, 492, "
      "493, 530, 531, 227, 228, 229, 230, 231, 232, 233, 235, 472, 487, 537, "
      "559, 236, 237, 238, 239, 240, 241, 242, 273, 546, 587, 454, 455, 456, "
      "457, 458, 459, 460, 461, 449, 450, 451, 452, 453, 480, 481, 482, 483, "
      "484, 489, 490, 538, 539, 540, 545, 503, 504, 547, 548, 549, 550, 509, "
      "510, 511, 512, 513, 523, 558, 555, 556, 600, 601, 562, 563, 564, 565, "
      "566, 591, 568, 569, 590, 571, 572, 573, 574, 575, 701, 711, 713, 616, "
      "617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 629, 672, 607, 700, "
      "635, 612, 613, 614, 615, 679, 670, 680, 681, 702, 706, 714, 675, 676, "
      "640, 643, 649, 653, 655, 657, 662, 703, 704, 705, 707, 641, 642, 644, "
      "645, 646, 647, 648, 709, 650, 651, 652, 710, 654, 656, 658, 659, 660, "
      "661, 663, 664, 665, 666, 667, 668, 669, 678)(FORWARD))");
}

TEST_F(EqOrRewriteTest, UserCases2) {
  auto info = parse(
      "partitionName = '114634' or "
      "partitionName = '114632' or partitionName = '114635' or partitionName = "
      "'114629' or partitionName = '114630' or partitionName = '114633' or "
      "partitionName = '114636' or partitionName = '114637' or partitionName = "
      "'114631'");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "partitionName in (114634, 114632, 114635, 114629, 114630, 114633, "
            "114636, 114637, 114631)(FORWARD)");
}

TEST_F(EqOrRewriteTest, UserCases3) {
  auto info = parse(
      "(doc_id=1319620650600837120 or "
      "doc_id=1319621497753739264 or doc_id=1319629144649367552 or "
      "doc_id=1319630319721377793 or doc_id=1319667286769324032 or "
      "doc_id=1319671157117808640 or doc_id=1319671403998793728 or "
      "doc_id=2319684930499055617 or doc_id=1319685259995140096)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "doc_id in (1319620650600837120, 1319621497753739264, "
            "1319629144649367552, 1319630319721377793, 1319667286769324032, "
            "1319671157117808640, 1319671403998793728, 2319684930499055617, "
            "1319685259995140096)(FORWARD)");
}

TEST_F(EqOrRewriteTest, UserCases4) {
  auto info = parse(
      "(strAttr ='' or strAttr = 'prd') and "
      "categoryId = 4");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(strAttr in (, prd)(FORWARD)) and (categoryId=4(FORWARD))");
}

TEST_F(EqOrRewriteTest, UserCases5) {
  auto info = parse(
      "intAttr = 1  OR intAttr = 5  OR intAttr "
      "= 6  OR intAttr = 9  and categoryId = 1");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(intAttr in (1, 5, 6)(FORWARD)(OR_A)) or "
            "((intAttr=9(FORWARD)(OR_A)) and (categoryId=1(FORWARD)(OR_A)))");
}

TEST_F(EqOrRewriteTest, UserCases6) {
  auto info = parse(
      ""
      "filename='OhbVrpoi.pdf' or "
      "filename='wRyoG4dB.pdf' or "
      "filename='dJ3fawFf.pdf' or "
      "filename='ZJS9dk3Q.pdf' or "
      "filename='fY2JD8dL.pdf' or "
      "filename='HnJpdoxC.pdf' or "
      "filename='Hbxm1zvi.pdf' or "
      "filename='r5Q8cxHu.pdf' or "
      "filename='dwF9cZtI.pdf'");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "filename in (OhbVrpoi.pdf, "
            "wRyoG4dB.pdf, "
            "dJ3fawFf.pdf, "
            "ZJS9dk3Q.pdf, "
            "fY2JD8dL.pdf, "
            "HnJpdoxC.pdf, "
            "Hbxm1zvi.pdf, "
            "r5Q8cxHu.pdf, "
            "dwF9cZtI.pdf)(FORWARD)");
}

TEST_F(EqOrRewriteTest, NotChanged1) {
  auto info = parse(
      "passed_days>3 and (loc >= "
      "500 or age > 10)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(passed_days>3(FORWARD)) and ((loc>=500(FORWARD)(OR_A)) "
            "or (age>10(FORWARD)(OR_A)))");
}

TEST_F(EqOrRewriteTest, NotChanged2) {
  auto info = parse(
      "strAttr=\"online_252\" AND (intAttr > "
      "103775813 OR intAttr < 103775813) and categoryId = 88888888");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(
      info->filter_cond()->text(),
      "((strAttr=online_252(FORWARD)) and ((intAttr>103775813(FORWARD)(OR_A)) "
      "or (intAttr<103775813(FORWARD)(OR_A)))) and "
      "(categoryId=88888888(FORWARD))");
}

TEST_F(EqOrRewriteTest, NotChanged3) {
  auto info = parse(
      "(is_type1 = true or is_type2 = "
      "true)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(is_type1=true(FORWARD)(OR_A)) or (is_type2=true(FORWARD)(OR_A))");
}

TEST_F(EqOrRewriteTest, NotChanged4) {
  auto info = parse("(a = 1 or a != 2)");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "(a=1(FORWARD)(OR_A)) or (a!=2(FORWARD)(OR_A))");
}

class ContainRewriteTest : public SimpleRewriterTest {};

TEST_F(ContainRewriteTest, ContainAllEmptySet) {
  auto info = parse("category_array contain_all ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "category_array IS_NOT_NULL (FORWARD)");
}

TEST_F(ContainRewriteTest, NotContainAllEmptySet) {
  auto info = parse("category_array not contain_all ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}

TEST_F(ContainRewriteTest, NotContainAnyEmptySet) {
  auto info = parse("category_array not contain_any ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(),
            "category_array IS_NOT_NULL (FORWARD)");
}

TEST_F(ContainRewriteTest, ContainAnyEmptySet) {
  auto info = parse("category_array contain_any ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionAnd) {
  auto info = parse("category_array not contain_all () and a = 1");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionMultiAnd) {
  auto info = parse(
      "category_array not contain_all () and a > 1 and a > 2 and a > 3 and a > "
      "4");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionOr) {
  auto info = parse("category_array not contain_all () or a = 1");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->filter_cond()->text(), "a=1(FORWARD)");
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionMultiOr) {
  auto info =
      parse("category_array not contain_all () or a > 1 or a > 2 or a > 3");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(
      info->filter_cond()->text(),
      "((a>1(FORWARD)(OR_A)) or (a>2(FORWARD)(OR_A))) or (a>3(FORWARD)(OR_A))");
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionAndComplex) {
  auto info = parse("(a > 1 or a < 0) and category_array contain_any () ");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}

TEST_F(ContainRewriteTest, AlwaysFalseConditionOrComplex) {
  auto info = parse("(a > 1 or a < 0) or category_array contain_any () ");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), false);
  EXPECT_EQ(info->filter_cond()->text(),
            "(a>1(FORWARD)(OR_A)) or (a<0(FORWARD)(OR_A))");
}

TEST_F(SimpleRewriterTest, MiscOr) {
  auto info = parse("a = 1 or a = 2 or a = 3 or category_array contain_any ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), false);
  EXPECT_EQ(info->filter_cond()->text(), "a in (1, 2, 3)(FORWARD)");
}

TEST_F(SimpleRewriterTest, MiscAnd) {
  auto info =
      parse("(a = 1 or a = 2 or a = 3) and category_array contain_any ()");
  ASSERT_NE(info, nullptr);
  EXPECT_EQ(info->is_filter_unsatisfiable(), true);
}


}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/sqlengine_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "db/sqlengine/sqlengine.h"
#include <cstdint>
#include <memory>
#include <gtest/gtest.h>
#include "zvec/db//schema.h"
#include "zvec/db/query_params.h"
#include "zvec/db/type.h"
#include "mock_segment.h"

namespace zvec::sqlengine {

class SqlEngineTest : public testing::Test {
 public:
  void SetUp() override {
    auto vector_params = std::make_shared<FlatIndexParams>(MetricType::IP);
    schema_ = std::make_shared<CollectionSchema>(
        "test_collection",
        std::vector<FieldSchema::Ptr>{
            std::make_shared<FieldSchema>("id", DataType::INT32, false, 0,
                                          nullptr),
            std::make_shared<FieldSchema>(
                "name", DataType::STRING, false, 0,  // nullptr
                std::make_shared<InvertIndexParams>(false)),
            std::make_shared<FieldSchema>("age", DataType::INT64, false, 0,
                                          nullptr),
            std::make_shared<FieldSchema>("score", DataType::DOUBLE, false, 0,
                                          nullptr),
            std::make_shared<FieldSchema>("tag_list", DataType::ARRAY_INT32,
                                          false, 0, nullptr),
            std::make_shared<FieldSchema>("vector",
                                          DataType::SPARSE_VECTOR_FP32, false,
                                          4, vector_params),
        });
  }

 protected:
  CollectionSchema::Ptr schema_;
};

TEST_F(SqlEngineTest, Forward) {
  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age", "tag_list"};
  query.topk_ = 11;
  // query.filter_ = "id > 3 and score < 0.1";
  // query.filter_ = "name like 'name_2%'";
  // query.filter_ = "name not in ('name_2','name_4')";
  // query.filter_ = "tag_list contain_all (1,2,3,4)";
  query.filter_ = "tag_list is null";
  if (const char *env_var = std::getenv("FILTER"); env_var != nullptr) {
    query.filter_ = env_var;
  }
  query.include_vector_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  EXPECT_TRUE(ret.has_value());
}

TEST_F(SqlEngineTest, Vector) {
  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};
  VectorQuery query;
  query.output_fields_ = {"id", "name", "score"};
  query.topk_ = 11;
  query.filter_ = "id > 3 and score < 0.1";
  if (const char *env_var = std::getenv("FILTER"); env_var != nullptr) {
    query.filter_ = env_var;
  }
  // query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.query_sparse_indices_ = "[0, 1, 2, 3]";
  query.query_sparse_values_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "vector";
  query.include_vector_ = true;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  EXPECT_TRUE(ret.has_value());
}

TEST_F(SqlEngineTest, Invert) {
  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};
  VectorQuery query;
  query.output_fields_ = {"id", "age", "score"};
  query.topk_ = 11;
  // query.filter_ = "name = 'test_name'";
  query.filter_ = "name is not null";
  if (const char *env_var = std::getenv("FILTER"); env_var != nullptr) {
    query.filter_ = env_var;
  }
  query.include_vector_ = true;

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  EXPECT_TRUE(ret.has_value());
}

TEST_F(SqlEngineTest, MultiSegments) {
  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>(),
                                        std::make_shared<MockSegment>()};
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age", "score"};
  query.topk_ = 11;
  query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "vector";
  // query.filter_ = "name = 'test_name'";
  if (const char *env_var = std::getenv("FILTER"); env_var != nullptr) {
    query.filter_ = env_var;
  }

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  EXPECT_TRUE(ret.has_value());
}

TEST_F(SqlEngineTest, GroupBy) {
  std::vector<Segment::Ptr> segments = {std::make_shared<MockSegment>()};
  GroupByVectorQuery query;
  query.group_by_field_name_ = "name";
  query.group_count_ = 3;
  query.group_topk_ = 2;
  query.output_fields_ = {"id", "name", "score"};
  query.filter_ = "id > 3 and score < 0.1";
  if (const char *env_var = std::getenv("FILTER"); env_var != nullptr) {
    query.filter_ = env_var;
  }
  // query.query_vector_ = "[0.1, 0.2, 0.3, 0.4]";
  query.query_sparse_indices_ = "[0, 1, 2, 3]";
  query.query_sparse_values_ = "[0.1, 0.2, 0.3, 0.4]";
  query.field_name_ = "vector";
  query.include_vector_ = true;
  query.query_params_ = std::make_shared<QueryParams>(IndexType::FLAT);
  query.query_params_->set_radius(0.8F);

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute_group_by(schema_, query, segments);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  EXPECT_TRUE(ret.has_value());
}

}  // namespace zvec::sqlengine


================================================
FILE: tests/db/sqlengine/test_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#pragma once

#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <arrow/ipc/api.h>
#include <gtest/gtest.h>
#include "db/common/file_helper.h"
#include "db/index/common/version_manager.h"
#include "db/index/segment/segment.h"
#include "db/sqlengine/sqlengine.h"
#include "zvec/db/index_params.h"
#include "zvec/db/schema.h"
#include "zvec/db/type.h"

namespace zvec::sqlengine {

using CreateDocFun = Doc (*)(const uint64_t doc_id);

inline Status InsertDoc(const Segment::Ptr &segment,
                        const uint64_t start_doc_id, const uint64_t end_doc_id,
                        CreateDocFun create_doc) {
  srand(time(NULL));
  long long create_total = 0;
  long long insert_total = 0;
  for (auto doc_id = start_doc_id; doc_id < end_doc_id; doc_id++) {
    if (segment) {
      auto start = std::chrono::system_clock::now();
      Doc new_doc = create_doc(doc_id);
      auto end = std::chrono::system_clock::now();
      auto create_cost =
          std::chrono::duration_cast<std::chrono::microseconds>(end - start)
              .count();
      create_total += create_cost;

      start = std::chrono::system_clock::now();
      auto status = segment->Insert(new_doc);
      if (!status.ok()) {
        return status;
      }
      end = std::chrono::system_clock::now();
      auto insert_cost =
          std::chrono::duration_cast<std::chrono::microseconds>(end - start)
              .count();
      insert_total += insert_cost;
    }
  }
  std::cout << "pure create cost " << create_total << "us" << std::endl;
  std::cout << "pure insert cost " << insert_total << "us" << std::endl;
  return Status::OK();
}

inline Segment::Ptr create_segment(const std::string &seg_path,
                                   const CollectionSchema &schema) {
  auto segment_meta = std::make_shared<SegmentMeta>();
  segment_meta->set_id(0);

  auto id_map = IDMap::CreateAndOpen("test_collection", seg_path + "/id_map",
                                     true, false);
  auto delete_store = std::make_shared<DeleteStore>("test_collection");

  Version v1;
  v1.set_schema(schema);
  std::string v_path = seg_path + "/test_manifest";
  FileHelper::CreateDirectory(v_path);
  auto vm = VersionManager::Create(v_path, v1);
  if (!vm.has_value()) {
    LOG_ERROR("create version manager failed: %s", vm.error().c_str());
    return nullptr;
  }

  BlockMeta mem_block;
  mem_block.id_ = 0;
  mem_block.type_ = BlockType::SCALAR;
  mem_block.min_doc_id_ = 0;
  mem_block.max_doc_id_ = 0;
  mem_block.doc_count_ = 0;
  segment_meta->set_writing_forward_block(mem_block);

  SegmentOptions options;
  options.read_only_ = false;
  options.enable_mmap_ = true;

  auto result = Segment::CreateAndOpen(seg_path, schema, 0, 0, id_map,
                                       delete_store, vm.value(), options);

  if (!result) {
    LOG_ERROR("create segment failed: %s", result.error().c_str());
    return nullptr;
  }
  auto segment = result.value();
  return segment;
}

}  // namespace zvec::sqlengine

================================================
FILE: tests/db/sqlengine/vector_recall_test.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License

#include <cstdint>
#include <memory>
#include <gtest/gtest.h>
#include "db/sqlengine/sqlengine.h"
#include "recall_base.h"

namespace zvec::sqlengine {

class VectorRecallTest : public RecallTest {};

TEST_F(VectorRecallTest, Basic) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);
  }
}

TEST_F(VectorRecallTest, HybridInvertFilter) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.filter_ = "invert_id >= 1";
  query.topk_ = 200;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    int i = j + 1;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);
  }
}

TEST_F(VectorRecallTest, HybridInvertFilterBfByKeys) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.filter_ = "invert_id < 199";
  query.topk_ = 199;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int i = 0; i < query.topk_; i++) {
    auto &doc = docs[i];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);
  }
}

TEST_F(VectorRecallTest, HybridForwardFilter) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.filter_ = "id >= 1";
  query.topk_ = 200;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);
  for (int j = 0; j < query.topk_; j++) {
    auto &doc = docs[j];
    int i = j + 1;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(i));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), i % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(i % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)i * i * 4);
  }
}

TEST_F(VectorRecallTest, HybridInvertForwardFilter) {
  VectorQuery query;
  query.output_fields_ = {"name", "age"};
  query.filter_ = "invert_id >= 1 and id <= 100";
  query.topk_ = 200;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (size_t j = 0; j < docs.size(); j++) {
    auto &doc = docs[j];
    int doc_id = j + 1;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), doc_id % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(doc_id % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);
  }
}

TEST_F(VectorRecallTest, Sparse) {
  VectorQuery query;
  query.output_fields_ = {"id", "name", "age"};
  query.topk_ = 200;
  std::vector<float> feature(4, 1.0);
  std::vector<uint32_t> indices{0, 1, 2, 3};
  query.query_sparse_indices_.assign((const char *)indices.data(),
                                     indices.size() * sizeof(uint32_t));
  query.query_sparse_values_.assign((const char *)feature.data(),
                                    feature.size() * sizeof(float));
  query.field_name_ = "sparse";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), query.topk_);

  int doc_id = 9999;
  for (size_t j = 0; j < docs.size(); j++) {
    auto &doc = docs[j];
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), doc_id % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(doc_id % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * 4);
    doc_id--;
    while (doc_id % 100 <= 3) {
      doc_id--;
    }
  }
}

TEST_F(VectorRecallTest, DeleteFilter) {
  // This test uses only one segment and thus we only operate on the first one
  for (int i = 0; i < 4000; i++) {
    segments_[0]->Delete("pk_" + std::to_string(i));
  }

  VectorQuery query;
  query.output_fields_ = {"name", "age"};
  query.topk_ = 100;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 100);
  for (size_t j = 0; j < docs.size(); j++) {
    auto &doc = docs[j];
    int doc_id = j + 4000;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), doc_id % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(doc_id % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);
  }
}

TEST_F(VectorRecallTest, HybridInvertForwardDeleteFilter) {
  // In previous test, docs[0-4000) has been deleted
  VectorQuery query;
  query.output_fields_ = {"name", "age"};
  query.filter_ = "invert_id >= 6000 and id < 6080";
  query.topk_ = 100;
  std::vector<float> feature(4, 0.0);
  query.query_vector_.assign((const char *)feature.data(),
                             feature.size() * sizeof(float));
  query.field_name_ = "dense";

  auto engine = SQLEngine::create(std::make_shared<Profiler>());
  auto ret = engine->execute(collection_schema_, query, segments_);
  if (!ret) {
    LOG_ERROR("execute failed: [%s]", ret.error().c_str());
  }
  ASSERT_TRUE(ret.has_value());
  auto docs = ret.value();
  EXPECT_EQ(docs.size(), 80);
  for (size_t j = 0; j < docs.size(); j++) {
    auto &doc = docs[j];
    int doc_id = j + 6000;
    EXPECT_EQ(doc->pk(), "pk_" + std::to_string(doc_id));
    auto age = doc->get<int32_t>("age");
    EXPECT_EQ(age.value(), doc_id % 100);
    auto name = doc->get<std::string>("name");
    ASSERT_TRUE(name);
    EXPECT_EQ(name.value(), "user_" + std::to_string(doc_id % 100));
    EXPECT_FLOAT_EQ(doc->score(), (float)doc_id * doc_id * 4);
  }
}

}  // namespace zvec::sqlengine


================================================
FILE: thirdparty/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
cmake_policy(SET CMP0048 NEW)
project(thirdparty)

include(${PROJECT_ROOT_DIR}/cmake/utils.cmake)

set(CMAKE_MACOSX_RPATH ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(EXTERNAL_BINARY_DIR ${CMAKE_BINARY_DIR}/external)
set(EXTERNAL_INC_DIR ${CMAKE_BINARY_DIR}/external/usr/local/include)
set(EXTERNAL_LIB_DIR ${CMAKE_BINARY_DIR}/external/usr/local/lib)
file(MAKE_DIRECTORY ${EXTERNAL_INC_DIR})
file(MAKE_DIRECTORY ${EXTERNAL_LIB_DIR})

add_subdirectory(googletest googletest EXCLUDE_FROM_ALL)
add_subdirectory(gflags gflags EXCLUDE_FROM_ALL)
add_subdirectory(glog glog EXCLUDE_FROM_ALL)
add_subdirectory(sparsehash sparsehash EXCLUDE_FROM_ALL)
add_subdirectory(yaml-cpp yaml-cpp EXCLUDE_FROM_ALL)
add_subdirectory(protobuf protobuf EXCLUDE_FROM_ALL)
add_subdirectory(antlr antlr EXCLUDE_FROM_ALL)
add_subdirectory(lz4 lz4 EXCLUDE_FROM_ALL)
add_subdirectory(rocksdb rocksdb EXCLUDE_FROM_ALL)
add_subdirectory(CRoaring CRoaring EXCLUDE_FROM_ALL)
add_subdirectory(arrow arrow EXCLUDE_FROM_ALL)
add_subdirectory(magic_enum magic_enum EXCLUDE_FROM_ALL)
add_subdirectory(RaBitQ-Library RaBitQ-Library EXCLUDE_FROM_ALL)


================================================
FILE: thirdparty/CRoaring/CMakeLists.txt
================================================
set(ENABLE_ROARING_TESTS OFF CACHE BOOL "Disable testing in CRoaring" FORCE)

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(CRoaring-2.0.4)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    target_compile_options(roaring PRIVATE 
        -Wno-unused-parameter 
        -Wno-unused-but-set-variable
    )
endif()

get_target_property(roaring_SOURCE_DIR roaring INTERFACE_SOURCE_DIR)
set(ROARING_FOUND TRUE PARENT_SCOPE)
set(ROARING_INCLUDE_DIR ${roaring_SOURCE_DIR}/include PARENT_SCOPE)
set(ROARING_INCLUDE_DIRS ${roaring_SOURCE_DIR}/include PARENT_SCOPE)
set(ROARING_LIBRARY $<TARGET_FILE:roaring> PARENT_SCOPE)
set(ROARING_LIBRARIES $<TARGET_FILE:roaring> PARENT_SCOPE)


================================================
FILE: thirdparty/RaBitQ-Library/CMakeLists.txt
================================================
add_library(rabitqlib INTERFACE)
target_include_directories(
    rabitqlib INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/RaBitQ-Library-0.1/include"
  )


================================================
FILE: thirdparty/antlr/CMakeLists.txt
================================================
set(ANTLR_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/antlr4)
set(ANTLR_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/antlr4.patch)
apply_patch_once("antlr4_fix" "${ANTLR_SRC_DIR}" "${ANTLR_PATCH}")

add_subdirectory(antlr4/runtime/Cpp/)

add_library(antlr4 UNKNOWN IMPORTED GLOBAL)
set_target_properties(
  antlr4 PROPERTIES
  INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/antlr4/runtime/Cpp/runtime/src/"
  IMPORTED_LOCATION "${EXTERNAL_LIB_DIR}/libantlr4-runtime.a"
  )
add_dependencies(antlr4 antlr4_static)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    target_compile_options(antlr4_static PRIVATE -Wno-unknown-pragmas -Wno-unqualified-std-cast-call)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    target_compile_options(antlr4_static PRIVATE -Wno-unknown-pragmas -Wno-unqualified-std-cast-call -Wno-attributes -Wno-implicit-fallthrough)
endif()

================================================
FILE: thirdparty/antlr/antlr4.patch
================================================
diff --git a/runtime/Cpp/CMakeLists.txt b/runtime/Cpp/CMakeLists.txt
index 390078151..213258ac8 100644
--- a/runtime/Cpp/CMakeLists.txt
+++ b/runtime/Cpp/CMakeLists.txt
@@ -28,21 +28,21 @@ project(LIBANTLR4)
 if(CMAKE_VERSION VERSION_EQUAL "3.0.0" OR
    CMAKE_VERSION VERSION_GREATER "3.0.0")
   CMAKE_POLICY(SET CMP0026 NEW)
-  CMAKE_POLICY(SET CMP0054 OLD)
-  CMAKE_POLICY(SET CMP0045 OLD)
-  CMAKE_POLICY(SET CMP0042 OLD)
+  CMAKE_POLICY(SET CMP0054 NEW)
+  CMAKE_POLICY(SET CMP0045 NEW)
+  CMAKE_POLICY(SET CMP0042 NEW)
 endif()
 
 if(CMAKE_VERSION VERSION_EQUAL "3.3.0" OR
    CMAKE_VERSION VERSION_GREATER "3.3.0")
-  CMAKE_POLICY(SET CMP0059 OLD)
-  CMAKE_POLICY(SET CMP0054 OLD)
+  CMAKE_POLICY(SET CMP0059 NEW)
+  CMAKE_POLICY(SET CMP0054 NEW)
 endif()
 
-if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-  find_package(PkgConfig REQUIRED)
-  pkg_check_modules(UUID REQUIRED uuid)
-endif()
+#if(CMAKE_SYSTEM_NAME MATCHES "Linux")
+#  find_package(PkgConfig REQUIRED)
+#  pkg_check_modules(UUID REQUIRED uuid)
+#endif()
 if(APPLE)
   find_library(COREFOUNDATION_LIBRARY CoreFoundation)
 endif()
diff --git a/runtime/Cpp/runtime/CMakeLists.txt b/runtime/Cpp/runtime/CMakeLists.txt
index 2c5e7376f..ae992f9cc 100644
--- a/runtime/Cpp/runtime/CMakeLists.txt
+++ b/runtime/Cpp/runtime/CMakeLists.txt
@@ -25,7 +25,7 @@ file(GLOB libantlrcpp_SRC
 add_library(antlr4_shared SHARED ${libantlrcpp_SRC})
 add_library(antlr4_static STATIC ${libantlrcpp_SRC})
 
-set(LIB_OUTPUT_DIR "${CMAKE_HOME_DIRECTORY}/dist") # put generated libraries here.
+set(LIB_OUTPUT_DIR "${EXTERNAL_LIB_DIR}")
 message(STATUS "Output libraries to ${LIB_OUTPUT_DIR}")
 
 # make sure 'make' works fine even if ${LIB_OUTPUT_DIR} is deleted.


================================================
FILE: thirdparty/arrow/CMakeLists.txt
================================================
set(ARROW_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0)
if(ANDROID)
        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.android.patch)
        apply_patch_once("arrow_android_fix" "${ARROW_SRC_DIR}" "${ARROW_PATCH}")
else()
        set(ARROW_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/arrow.patch)
        apply_patch_once("arrow_fix" "${ARROW_SRC_DIR}" "${ARROW_PATCH}")
endif()

include(ExternalProject)
include(ProcessorCount)

ProcessorCount(NPROC)

set(LIB_PARQUET ${EXTERNAL_LIB_DIR}/libparquet.a)
set(LIB_ARROW ${EXTERNAL_LIB_DIR}/libarrow.a)
set(LIB_COMPUTE ${EXTERNAL_LIB_DIR}/libarrow_compute.a)
set(LIB_ACERO ${EXTERNAL_LIB_DIR}/libarrow_acero.a)
set(LIB_ARROW_DEPENDS ${EXTERNAL_LIB_DIR}/libarrow_bundled_dependencies.a)
set(LIB_ARROW_DATASET ${EXTERNAL_LIB_DIR}/libarrow_dataset.a)

set(CONFIGURE_ENV_LIST "")
if(USE_OSS_MIRROR)
        list(APPEND CONFIGURE_ENV_LIST
                "ARROW_BOOST_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/boost-1.88.0-cmake.tar.gz"
                "ARROW_RAPIDJSON_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/rapidjson-232389d4f1012dddec4ef84861face2d2ba85709.tar.gz"
                "ARROW_RE2_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/re2-2022-06-01.tar.gz"
                "ARROW_THRIFT_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/thrift-0.22.0.tar.gz"
                "ARROW_UTF8PROC_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/utf8proc-2.10.0.tar.gz"
                "ARROW_XSIMD_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/xsimd-13.0.0.tar.gz"
                "ARROW_ZLIB_URL=https://zvec-bj.oss-cn-beijing.aliyuncs.com/thirdparty/zlib-1.3.1.tar.gz"
        )
        message(STATUS "Using OSS mirror for third-party downloads")
endif()

if(ANDROID)
        ExternalProject_Add(
                ARROW.BUILD PREFIX arrow
                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
                DOWNLOAD_COMMAND ""
                BUILD_IN_SOURCE false
                CONFIGURE_COMMAND env ${CONFIGURE_ENV_LIST} "${CMAKE_COMMAND}" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DANDROID_ABI=${ANDROID_ABI} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} -DARROW_WITH_MUSL=OFF "<SOURCE_DIR>/cpp"
                BUILD_COMMAND "${CMAKE_COMMAND}" --build . --target all -- -j ${NPROC}
                INSTALL_COMMAND "${CMAKE_COMMAND}" --install "<BINARY_DIR>" --prefix=${EXTERNAL_BINARY_DIR}/usr/local
                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}
                LOG_DOWNLOAD ON
                LOG_CONFIGURE ON
                LOG_BUILD ON
                LOG_INSTALL ON
        )
else()
        ExternalProject_Add(
                ARROW.BUILD PREFIX arrow
                SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/apache-arrow-21.0.0
                DOWNLOAD_COMMAND ""
                BUILD_IN_SOURCE false
                CONFIGURE_COMMAND env ${CONFIGURE_ENV_LIST} "${CMAKE_COMMAND}" ${CMAKE_CACHE_ARGS} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_DEBUG_POSTFIX= -DARROW_BUILD_SHARED=OFF -DARROW_ACERO=ON -DARROW_FILESYSTEM=ON -DARROW_DATASET=ON -DARROW_PARQUET=ON -DARROW_COMPUTE=ON -DARROW_WITH_ZLIB=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_MIMALLOC=OFF -DCMAKE_INSTALL_LIBDIR=lib "<SOURCE_DIR>/cpp"
                BUILD_COMMAND "${CMAKE_COMMAND}" --build . --target all -- -j ${NPROC}
                INSTALL_COMMAND "${CMAKE_COMMAND}" --install "<BINARY_DIR>" --prefix=${EXTERNAL_BINARY_DIR}/usr/local
                BYPRODUCTS ${LIB_PARQUET} ${LIB_ARROW} ${LIB_COMPUTE} ${LIB_ACERO} ${LIB_ARROW_DEPENDS} ${LIB_ARROW_DATASET}
                LOG_DOWNLOAD ON
                LOG_CONFIGURE ON
                LOG_BUILD ON
                LOG_INSTALL ON
        )
endif()

add_library(arrow UNKNOWN IMPORTED GLOBAL)
add_dependencies(arrow ARROW.BUILD)

set(Arrow_FOUND TRUE PARENT_SCOPE)
set(Arrow_INCLUDE_DIR ${EXTERNAL_INC_DIR} PARENT_SCOPE)
set(Arrow_LIBRARIES ${EXTERNAL_LIB_DIR}/libarrow.a PARENT_SCOPE)
set(Arrow_DIR ${EXTERNAL_BINARY_DIR} PARENT_SCOPE)
set(Arrow_LIBRARY_DIR ${EXTERNAL_BINARY_DIR} PARENT_SCOPE)

add_library(Arrow::arrow_depends UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::arrow_depends PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_ARROW_DEPENDS}"
)
add_dependencies(Arrow::arrow_depends ARROW.BUILD)


add_library(Arrow::arrow_static UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::arrow_static PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_ARROW}"
        INTERFACE_LINK_LIBRARIES "Arrow::arrow_depends"
)
add_dependencies(Arrow::arrow_static ARROW.BUILD)

add_library(Arrow::parquet_static UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::parquet_static PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_PARQUET}"
        INTERFACE_LINK_LIBRARIES "Arrow::arrow_depends;Arrow::arrow_static"
)
add_dependencies(Arrow::parquet_static ARROW.BUILD)

add_library(Arrow::arrow_compute UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::arrow_compute PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_COMPUTE}"
        INTERFACE_LINK_LIBRARIES "Arrow::arrow_depends;Arrow::arrow_static"
)
add_dependencies(Arrow::arrow_compute ARROW.BUILD)

add_library(Arrow::arrow_acero UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::arrow_acero PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_ACERO}"
        INTERFACE_LINK_LIBRARIES "Arrow::arrow_depends;Arrow::arrow_static;Arrow::arrow_compute"
)
add_dependencies(Arrow::arrow_acero ARROW.BUILD)

add_library(Arrow::arrow_dataset UNKNOWN IMPORTED GLOBAL)
set_target_properties(
        Arrow::arrow_dataset PROPERTIES
        INTERFACE_INCLUDE_DIRECTORIES ${EXTERNAL_INC_DIR}
        IMPORTED_LOCATION "${LIB_ARROW_DATASET}"
        INTERFACE_LINK_LIBRARIES "Arrow::arrow_depends;Arrow::arrow_static;Arrow::arrow_compute;Arrow::arrow_acero"
)
add_dependencies(Arrow::arrow_dataset ARROW.BUILD)


================================================
FILE: thirdparty/arrow/arrow.android.patch
================================================
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 7fa4b66d4b..78bcb6d47e 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -950,6 +950,13 @@ set(EP_COMMON_CMAKE_ARGS
     # https://github.com/apache/arrow/issues/45985
     -DCMAKE_POLICY_VERSION_MINIMUM=3.5)
 
+if(ANDROID)
+  list(APPEND EP_COMMON_CMAKE_ARGS 
+  -DANDROID_ABI=${ANDROID_ABI}
+  -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL}
+  -DANDROID_NDK=${ANDROID_NDK})
+endif()
+
 # if building with a toolchain file, pass that through
 if(CMAKE_TOOLCHAIN_FILE)
   list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE})
diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc
index 0f58406760..cf68bfdcbe 100644
--- a/cpp/src/arrow/acero/source_node.cc
+++ b/cpp/src/arrow/acero/source_node.cc
@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {
 struct RecordBatchReaderSourceNode : public SourceNode {
   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,
                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)
-      : SourceNode(plan, schema, generator) {}
+      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}
 
   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
                                 const ExecNodeOptions& options) {
diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp
index 2cf6c62a84..9e64b62297 100644
--- a/cpp/src/arrow/vendored/datetime/tz.cpp
+++ b/cpp/src/arrow/vendored/datetime/tz.cpp
@@ -605,7 +605,9 @@ tzdb_list
 create_tzdb()
 {
     tzdb_list tz_db;
+#if !defined(ANDROID) && !defined(__ANDROID__)
     tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release());
+#endif // !defined(ANDROID) && !defined(__ANDROID__)
     return tz_db;
 }
 
@@ -3900,7 +3902,9 @@ reload_tzdb()
     if (!v.empty() && v == remote_version())
         return get_tzdb_list().front();
 #endif  // AUTO_DOWNLOAD
+#if !defined(ANDROID) && !defined(__ANDROID__)
     tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release());
+#endif  // !defined(ANDROID) && !defined(__ANDROID__)
     return get_tzdb_list().front();
 }
 
diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h
index 61ab3df106..d456d6765f 100644
--- a/cpp/src/arrow/vendored/datetime/tz.h
+++ b/cpp/src/arrow/vendored/datetime/tz.h
@@ -858,7 +858,9 @@ private:
     load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt,
                                  std::int32_t tzh_typecnt, std::int32_t tzh_charcnt);
 # if defined(ANDROID) || defined(__ANDROID__)
+public:
     void parse_from_android_tzdata(std::ifstream& inf, const std::size_t off);
+private:
 # endif // defined(ANDROID) || defined(__ANDROID__)
 #else  // !USE_OS_TZDB
     DATE_API sys_info   get_info_impl(sys_seconds tp, int tz_int) const;
diff --git a/cpp/src/arrow/vendored/musl/strptime.c b/cpp/src/arrow/vendored/musl/strptime.c
index 41912fd1bb..9d0b4dc1bf 100644
--- a/cpp/src/arrow/vendored/musl/strptime.c
+++ b/cpp/src/arrow/vendored/musl/strptime.c
@@ -17,7 +17,7 @@
 
 #undef HAVE_LANGINFO
 
-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(__ANDROID__)
 #define HAVE_LANGINFO 1
 #endif
 

================================================
FILE: thirdparty/arrow/arrow.patch
================================================
diff --git a/cpp/src/arrow/acero/source_node.cc b/cpp/src/arrow/acero/source_node.cc
index 0f5840676..cf68bfdcb 100644
--- a/cpp/src/arrow/acero/source_node.cc
+++ b/cpp/src/arrow/acero/source_node.cc
@@ -407,7 +407,7 @@ struct SchemaSourceNode : public SourceNode {
 struct RecordBatchReaderSourceNode : public SourceNode {
   RecordBatchReaderSourceNode(ExecPlan* plan, std::shared_ptr<Schema> schema,
                               arrow::AsyncGenerator<std::optional<ExecBatch>> generator)
-      : SourceNode(plan, schema, generator) {}
+      : SourceNode(plan, schema, generator, Ordering::Implicit()) {}
 
   static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
                                 const ExecNodeOptions& options) {


================================================
FILE: thirdparty/gflags/CMakeLists.txt
================================================
set(BUILD_TESTING OFF CACHE BOOL "Disable Unit Tests" FORCE)
set(GFLAGS_BUILD_TESTING OFF CACHE BOOL "Disable unittest in gflags" FORCE)
if(NOT BUILD_SHARED_LIBS)
  set(GFLAGS_BUILD_SHARED_LIBS OFF)
  set(GFLAGS_BUILD_STATIC_LIBS ON)
else()
  set(GFLAGS_BUILD_SHARED_LIBS ON)
  set(GFLAGS_BUILD_STATIC_LIBS OFF)
endif()

set(GFLAGS_IS_SUBPROJECT ON)
set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(gflags-2.2.2)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

get_target_property(gflags_OUTPUT_NAME gflags OUTPUT_NAME)
get_target_property(gflags_BINARY_DIR gflags BINARY_DIR)
get_target_property(gflags_SOURCE_DIR gflags SOURCE_DIR)
get_target_property(gflags_PREFIX gflags PREFIX)
get_target_property(gflags_SUFFIX gflags SUFFIX)

set(gflags_OUTPUT_DIR ${gflags_BINARY_DIR})
if(NOT BUILD_SHARED_LIBS)
  if (NOT gflags_PREFIX)
    set(gflags_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
  endif()
  if (NOT gflags_SUFFIX)
    set(gflags_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
  endif()
  if(CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
    set(gflags_OUTPUT_DIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
  endif()
else()
  if (NOT gflags_PREFIX)
    set(gflags_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
  endif()
  if (NOT gflags_SUFFIX)
    set(gflags_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
  endif()
  if(CMAKE_LIBRARY_OUTPUT_DIRECTORY)
    set(gflags_OUTPUT_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
  endif()
endif()

set(gflags_FOUND TRUE PARENT_SCOPE)
set(gflags_INCLUDE_DIR ${gflags_BINARY_DIR}/include PARENT_SCOPE)
set(gflags_LIBRARIES ${gflags_OUTPUT_DIR}/${gflags_PREFIX}${gflags_OUTPUT_NAME}${gflags_SUFFIX} PARENT_SCOPE)
set(gflags_DIR ${gflags_BINARY_DIR} PARENT_SCOPE)

set(GFLAGS_FOUND TRUE PARENT_SCOPE)
set(GFLAGS_INCLUDE_DIR ${gflags_BINARY_DIR}/include PARENT_SCOPE)
set(GFLAGS_LIBRARIES ${gflags_OUTPUT_DIR}/${gflags_PREFIX}${gflags_OUTPUT_NAME}${gflags_SUFFIX} PARENT_SCOPE)
set(GFLAGS_DIR ${gflags_BINARY_DIR} PARENT_SCOPE)


================================================
FILE: thirdparty/glog/CMakeLists.txt
================================================
set(BUILD_TESTING OFF CACHE BOOL "Disable Unit Tests" FORCE)
set(WITH_GFLAGS OFF CACHE BOOL "Disable find_package(gflags) in glog" FORCE)
set(WITH_UNWIND OFF CACHE BOOL "Disable find_package(unwind) in glog" FORCE)
set(HAVE_LIB_GFLAGS TRUE CACHE BOOL "")

add_compile_options(-Wno-deprecated-declarations)

set(GLOG_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/glog-0.5.0)
if (ANDROID)
    set(GLOG_ANDROID_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.android.patch)
    apply_patch_once("glog_android_fix" "${GLOG_SRC_DIR}" "${GLOG_ANDROID_PATCH}")
else()
    set(GLOG_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/glog.patch)
    apply_patch_once("glog_fix" "${GLOG_SRC_DIR}" "${GLOG_PATCH}")
endif()

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(glog-0.5.0)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

add_dependencies(glog gflags)

if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    target_compile_options(glog PRIVATE -Wno-sign-compare)
endif()

get_target_property(glog_BINARY_DIR glog BINARY_DIR)
get_target_property(glog_SOURCE_DIR glog SOURCE_DIR)
# get_target_property(GLOG_INCLUDE_DIRS glog INTERFACE_INCLUDE_DIRECTORIES)

set(GLOG_INCLUDE_DIRS ${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src)
set(GLOG_FOUND TRUE PARENT_SCOPE)
set(GLOG_INCLUDE_DIR ${GLOG_INCLUDE_DIRS} PARENT_SCOPE)
set(GLOG_INCLUDE_DIRS ${GLOG_INCLUDE_DIRS} PARENT_SCOPE)
set(GLOG_LIBRARY $<TARGET_FILE:glog> PARENT_SCOPE)
set(GLOG_LIBRARIES $<TARGET_FILE:glog> PARENT_SCOPE)


================================================
FILE: thirdparty/glog/glog.android.patch
================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62ebbcc..e17f67e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ set (CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
 set (CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
 set (CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
 
-option (BUILD_SHARED_LIBS "Build shared libraries" ON)
+option (BUILD_STATIC_LIBS "Build shared libraries" ON)
 option (PRINT_UNSYMBOLIZED_STACK_TRACES
   "Print file offsets in traces instead of symbolizing" OFF)
 option (WITH_CUSTOM_PREFIX "Enable support for user-generated message prefixes" OFF)
@@ -802,12 +802,12 @@ if (BUILD_TESTING)
     FIXTURES_REQUIRED "cmake_package_config;cmake_package_config_working")
 endif (BUILD_TESTING)
 
-install (TARGETS glog
-  EXPORT glog-targets
-  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
-  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
-  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
-  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
+#install (TARGETS glog
+#  EXPORT glog-targets
+#  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
+#  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
+#  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
+#  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
 
 if (WITH_PKGCONFIG)
   install (
@@ -840,8 +840,8 @@ write_basic_package_version_file (
   ${CMAKE_CURRENT_BINARY_DIR}/glog-config-version.cmake
   COMPATIBILITY SameMajorVersion)
 
-export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
-export (PACKAGE glog)
+#export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
+#export (PACKAGE glog)
 
 get_filename_component (_PREFIX "${CMAKE_INSTALL_PREFIX}" ABSOLUTE)
 
@@ -885,5 +885,5 @@ install (DIRECTORY ${_glog_BINARY_CMake_DATADIR}
   FILES_MATCHING PATTERN "*.cmake"
 )
 
-install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
-  ${_glog_CMake_INSTALLDIR})
+#install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
+#  ${_glog_CMake_INSTALLDIR})
diff --git a/src/stacktrace_generic-inl.h b/src/stacktrace_generic-inl.h
index fad81d3..67209ac 100644
--- a/src/stacktrace_generic-inl.h
+++ b/src/stacktrace_generic-inl.h
@@ -39,21 +39,7 @@ _START_GOOGLE_NAMESPACE_
 
 // If you change this function, also change GetStackFrames below.
 int GetStackTrace(void** result, int max_depth, int skip_count) {
-  static const int kStackLength = 64;
-  void * stack[kStackLength];
-  int size;
-
-  size = backtrace(stack, kStackLength);
-  skip_count++;  // we want to skip the current frame as well
-  int result_count = size - skip_count;
-  if (result_count < 0)
-    result_count = 0;
-  if (result_count > max_depth)
-    result_count = max_depth;
-  for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
-
-  return result_count;
+  return 0;
 }
 
 _END_GOOGLE_NAMESPACE_


================================================
FILE: thirdparty/glog/glog.patch
================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62ebbcc..e17f67e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,7 +17,7 @@ set (CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
 set (CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
 set (CPACK_PACKAGE_VERSION ${PROJECT_VERSION})
 
-option (BUILD_SHARED_LIBS "Build shared libraries" ON)
+option (BUILD_STATIC_LIBS "Build shared libraries" ON)
 option (PRINT_UNSYMBOLIZED_STACK_TRACES
   "Print file offsets in traces instead of symbolizing" OFF)
 option (WITH_CUSTOM_PREFIX "Enable support for user-generated message prefixes" OFF)
@@ -802,12 +802,12 @@ if (BUILD_TESTING)
     FIXTURES_REQUIRED "cmake_package_config;cmake_package_config_working")
 endif (BUILD_TESTING)
 
-install (TARGETS glog
-  EXPORT glog-targets
-  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
-  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
-  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
-  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
+#install (TARGETS glog
+#  EXPORT glog-targets
+#  RUNTIME DESTINATION ${_glog_CMake_BINDIR}
+#  PUBLIC_HEADER DESTINATION ${_glog_CMake_INCLUDE_DIR}/glog
+#  LIBRARY DESTINATION ${_glog_CMake_LIBDIR}
+#  ARCHIVE DESTINATION ${_glog_CMake_LIBDIR})
 
 if (WITH_PKGCONFIG)
   install (
@@ -840,8 +840,8 @@ write_basic_package_version_file (
   ${CMAKE_CURRENT_BINARY_DIR}/glog-config-version.cmake
   COMPATIBILITY SameMajorVersion)
 
-export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
-export (PACKAGE glog)
+#export (TARGETS glog NAMESPACE glog:: FILE glog-targets.cmake)
+#export (PACKAGE glog)
 
 get_filename_component (_PREFIX "${CMAKE_INSTALL_PREFIX}" ABSOLUTE)
 
@@ -885,5 +885,5 @@ install (DIRECTORY ${_glog_BINARY_CMake_DATADIR}
   FILES_MATCHING PATTERN "*.cmake"
 )
 
-install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
-  ${_glog_CMake_INSTALLDIR})
+#install (EXPORT glog-targets NAMESPACE glog:: DESTINATION
+#  ${_glog_CMake_INSTALLDIR})


================================================
FILE: thirdparty/googletest/CMakeLists.txt
================================================
add_compile_options(-Wno-deprecated-copy)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    add_compile_options(-Wno-maybe-uninitialized)
endif()

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(googletest-1.10.0)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

get_target_property(GTEST_INCLUDE_DIRS gtest INTERFACE_INCLUDE_DIRECTORIES)

set(GTEST_FOUND TRUE PARENT_SCOPE)
set(GTEST_INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} PARENT_SCOPE)
set(GTEST_INCLUDE_DIR ${GTEST_INCLUDE_DIRS} PARENT_SCOPE)
set(GTEST_LIBRARIES $<TARGET_FILE:gtest> PARENT_SCOPE)
set(GTEST_LIBRARY ${GTEST_LIBRARIES} PARENT_SCOPE)
set(GTEST_MAIN_LIBRARIES $<TARGET_FILE:gtest_main> PARENT_SCOPE)
set(GTEST_MAIN_LIBRARY ${GTEST_MAIN_LIBRARIES} PARENT_SCOPE)
set(GTEST_BOTH_LIBRARIES $<TARGET_FILE:gtest> $<TARGET_FILE:gtest_main> PARENT_SCOPE)


================================================
FILE: thirdparty/lz4/CMakeLists.txt
================================================
set(lz4_INCLUDE_DIR "${EXTERNAL_BINARY_DIR}/usr/local/include")
set(lz4_LIBRARY_DIR "${EXTERNAL_BINARY_DIR}/usr/local/lib/")
file(MAKE_DIRECTORY ${lz4_INCLUDE_DIR})
file(MAKE_DIRECTORY ${lz4_LIBRARY_DIR})

include(ExternalProject)

set(_lz4_env "")
if(ANDROID)
  string(REGEX REPLACE "^android-([0-9]+)$" "\\1" ANDROID_API_LEVEL "${ANDROID_PLATFORM}")

  if(ANDROID_ABI STREQUAL "arm64-v8a")
    set(TARGET_TRIPLE "aarch64-linux-android")
  elseif(ANDROID_ABI STREQUAL "armeabi-v7a")
    set(TARGET_TRIPLE "armv7a-linux-androideabi")
  elseif(ANDROID_ABI STREQUAL "x86")
    set(TARGET_TRIPLE "i686-linux-android")
  elseif(ANDROID_ABI STREQUAL "x86_64")
    set(TARGET_TRIPLE "x86_64-linux-android")
  else()
    message(FATAL_ERROR "Unsupported ANDROID_ABI: ${ANDROID_ABI}")
  endif()

  set(SYSROOT "${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/sysroot")
  set(COMMON_FLAGS
    "--sysroot=${SYSROOT}"
    "-target ${TARGET_TRIPLE}${ANDROID_API_LEVEL}"
    "-fPIC"
    "-D__ANDROID_API__=${ANDROID_API_LEVEL}"
  )

  list(APPEND COMMON_FLAGS ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE}})

  string(JOIN " " _lz4_cflags ${COMMON_FLAGS})

  list(APPEND _lz4_env
    "CC=${CMAKE_C_COMPILER}"
    "AR=${CMAKE_AR}"
    "RANLIB=${CMAKE_RANLIB}"
    "STRIP=${ANDROID_NDK}/toolchains/llvm/prebuilt/${ANDROID_HOST_TAG}/bin/llvm-strip"
    "CFLAGS=${_lz4_cflags}"
  )

else()
  list(APPEND _lz4_env "CFLAGS=-fPIC")
endif()

ExternalProject_Add(
  Lz4.BUILD
  PREFIX lz4
  URL "${CMAKE_CURRENT_SOURCE_DIR}/lz4-1.9.4"
  CONFIGURE_COMMAND ""
    BUILD_COMMAND env ${_lz4_env} BUILD_SHARED=no make -j
  INSTALL_COMMAND make DESTDIR=${EXTERNAL_BINARY_DIR} BUILD_SHARED=no install
  BUILD_IN_SOURCE ON
  LOG_DOWNLOAD ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  BUILD_BYPRODUCTS ${lz4_LIBRARY_DIR}/liblz4.a
)

add_library(lz4 STATIC IMPORTED GLOBAL)
set_target_properties(
  lz4 PROPERTIES
  INTERFACE_INCLUDE_DIRECTORIES "${lz4_INCLUDE_DIR}"
  IMPORTED_LOCATION "${lz4_LIBRARY_DIR}/liblz4.a"
)
add_dependencies(lz4 Lz4.BUILD)

set(lz4_FOUND TRUE PARENT_SCOPE)
set(lz4_LIBRARY ${lz4_LIBRARY_DIR}/liblz4.a PARENT_SCOPE)
set(lz4_LIBRARIES ${lz4_LIBRARY_DIR}/liblz4.a PARENT_SCOPE)
set(lz4_INCLUDE_DIR "${EXTERNAL_BINARY_DIR}/usr/local/include" PARENT_SCOPE)
set(lz4_INCLUDE_DIRS "${EXTERNAL_BINARY_DIR}/usr/local/include" PARENT_SCOPE)
set(lz4_VERSION 1.9.4 PARENT_SCOPE)


================================================
FILE: thirdparty/magic_enum/CMakeLists.txt
================================================
add_library(magic_enum INTERFACE)
target_include_directories(
    magic_enum INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/magic_enum-0.9.7/include/"
  )


================================================
FILE: thirdparty/protobuf/CMakeLists.txt
================================================
set(protobuf_BUILD_TESTS OFF CACHE BOOL "Disable testing in protobuf" FORCE)
set(protobuf_WITH_ZLIB ON CACHE BOOL "Disable zlib support in protobuf" FORCE)

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(protobuf-3.21.12/cmake protobuf-3.21.12)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    target_compile_options(libprotobuf PRIVATE 
        -Wno-deprecated-declarations 
        -Wno-invalid-noreturn 
        -Wno-unused-function
    )
    target_compile_options(libprotoc PRIVATE 
        -Wno-unused-private-field 
        -Wno-unused-function
    )
    target_compile_options(protoc PRIVATE 
        -Wno-unused-private-field 
        -Wno-unused-function
    )
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    target_compile_options(libprotobuf PRIVATE 
        -Wno-deprecated-declarations 
        -Wno-unused-function
        -Wno-maybe-uninitialized
        -Wno-sign-compare
        -Wno-return-type
        -Wno-stringop-overflow
    )
    target_compile_options(libprotoc PRIVATE 
        -Wno-unused-private-field 
        -Wno-unused-function
        -Wno-unused-but-set-variable
        -Wno-sign-compare
    )
    target_compile_options(protoc PRIVATE 
        -Wno-unused-private-field 
        -Wno-unused-function
        -Wno-unused-but-set-variable
        -Wno-sign-compare
    )
endif()

get_target_property(libprotobuf_SOURCE_DIR libprotobuf SOURCE_DIR)
get_filename_component(libprotobuf_INCLUDE_DIR ${libprotobuf_SOURCE_DIR}/../src ABSOLUTE)

set(PROTOBUF_FOUND TRUE PARENT_SCOPE)
set(PROTOBUF_INCLUDE_DIR ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE)
set(PROTOBUF_INCLUDE_DIRS ${libprotobuf_INCLUDE_DIR} PARENT_SCOPE)

set(PROTOBUF_LIBRARY $<TARGET_FILE:libprotobuf> PARENT_SCOPE)
set(PROTOBUF_LIBRARIES $<TARGET_FILE:libprotobuf> PARENT_SCOPE)

set(PROTOBUF_LITE_LIBRARY $<TARGET_FILE:libprotobuf-lite> PARENT_SCOPE)
set(PROTOBUF_LITE_LIBRARIES $<TARGET_FILE:libprotobuf-lite> PARENT_SCOPE)

set(PROTOBUF_PROTOC_LIBRARY $<TARGET_FILE:libprotoc> PARENT_SCOPE)
set(PROTOBUF_PROTOC_LIBRARIES $<TARGET_FILE:libprotoc> PARENT_SCOPE)
set(PROTOBUF_PROTOC_EXECUTABLE $<TARGET_FILE:protoc> PARENT_SCOPE)


================================================
FILE: thirdparty/rocksdb/CMakeLists.txt
================================================
set(ROCKSDB_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb-8.1.1)
if (ANDROID)
    set(ROCKSDB_ANDROID_PATCH ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb.android.patch)
    apply_patch_once("rocksdb_android_fix" "${ROCKSDB_SRC_DIR}" "${ROCKSDB_ANDROID_PATCH}")
endif()

set(ROCKSDB_BUILD_SHARED OFF CACHE BOOL "Disable install in rocksdb" FORCE)

set(WITH_TESTS OFF CACHE BOOL "Disable testing in rocksdb" FORCE)
set(WITH_ALL_TESTS OFF CACHE BOOL "Build all test, rather than a small subset" FORCE)
set(WITH_BENCHMARK_TOOLS OFF CACHE BOOL "Disable benchmarks in rocksdb" FORCE)
set(WITH_CORE_TOOLS OFF CACHE BOOL "build with ldb and sst_dump" FORCE)
set(WITH_TOOLS OFF CACHE BOOL "build with tools" FORCE)
set(WITH_LZ4 ON CACHE BOOL "build with lz4" FORCE)
set(USE_RTTI ON CACHE BOOL "build with RTTI" FORCE)
set(FAIL_ON_WARNINGS OFF CACHE BOOL "build with no Werror" FORCE)
set(PORTABLE ON CACHE BOOL "build a portable lib" FORCE)

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(rocksdb-8.1.1)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

get_target_property(rocksdb_SOURCE_DIR rocksdb SOURCE_DIR)
set(ROCKSDB_INCLUDE_DIR ${rocksdb_SOURCE_DIR}/include)

target_include_directories(rocksdb PUBLIC $<BUILD_INTERFACE:${ROCKSDB_INCLUDE_DIR}>)
add_dependencies(rocksdb Lz4.BUILD)


================================================
FILE: thirdparty/rocksdb/rocksdb.android.patch
================================================
diff --git a/env/io_posix.cc b/env/io_posix.cc
index 0ec0e9c83..a78ac5a13 100644
--- a/env/io_posix.cc
+++ b/env/io_posix.cc
@@ -27,7 +27,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#ifdef OS_LINUX
+#if defined(OS_LINUX) || defined(OS_ANDROID)
 #include <sys/statfs.h>
 #include <sys/sysmacros.h>
 #endif


================================================
FILE: thirdparty/sparsehash/CMakeLists.txt
================================================
set(SPARSE_SRC "${CMAKE_CURRENT_SOURCE_DIR}/sparseconfig.h")
set(SPARSE_DST "${CMAKE_CURRENT_SOURCE_DIR}/sparsehash-2.0.4/src/sparsehash/internal/sparseconfig.h")
get_filename_component(DESTINATION_DIR "${SPARSE_DST}" DIRECTORY)
if(NOT EXISTS "${SPARSE_DST}")
    file(COPY "${SPARSE_SRC}" DESTINATION "${DESTINATION_DIR}")
endif()

add_library(sparsehash INTERFACE)
target_include_directories(
    sparsehash INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/sparsehash-2.0.4/src/"
  )


================================================
FILE: thirdparty/sparsehash/sparseconfig.h
================================================
/*
 * NOTE: This file is for internal use only.
 *       Do not use these #defines in your own program!
 */

/* Namespace for Google classes */
#define GOOGLE_NAMESPACE ::google

/* the location of the header defining hash functions */
#define HASH_FUN_H <functional>

/* the namespace of the hash<> function */
#define HASH_NAMESPACE std

/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1

/* Define to 1 if the system has the type `long long'. */
#define HAVE_LONG_LONG 1

/* Define to 1 if you have the `memcpy' function. */
#define HAVE_MEMCPY 1

/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1

/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1

/* Define to 1 if the system has the type `uint16_t'. */
#define HAVE_UINT16_T 1

/* Define to 1 if the system has the type `u_int16_t'. */
#define HAVE_U_INT16_T 1

/* Define to 1 if the system has the type `__uint16'. */
/* #undef HAVE___UINT16 */

/* The system-provided hash function including the namespace. */
#define SPARSEHASH_HASH HASH_NAMESPACE::hash

/* Stops putting the code inside the Google namespace */
#define _END_GOOGLE_NAMESPACE_ }

/* Puts following code inside the Google namespace */
#define _START_GOOGLE_NAMESPACE_ namespace google {


================================================
FILE: thirdparty/yaml-cpp/CMakeLists.txt
================================================
set(YAML_CPP_BUILD_TESTS OFF CACHE BOOL "Disable testing in yaml-cpp" FORCE)
set(YAML_CPP_BUILD_TOOLS OFF CACHE BOOL "Disable parse tools in yaml-cpp" FORCE)
set(YAML_CPP_BUILD_CONTRIB OFF CACHE BOOL "Disable contrib stuff in yaml-cpp" FORCE)

set(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${EXTERNAL_LIB_DIR})
add_subdirectory(yaml-cpp-0.6.3)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
unset(_SAVED_CMAKE_ARCHIVE_OUTPUT_DIRECTORY)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    target_compile_options(yaml-cpp PRIVATE -Wno-shadow)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    target_compile_options(yaml-cpp PRIVATE -Wno-effc++)
endif()

================================================
FILE: tools/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

# Retrieve version from git repository
git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR})

# Add repository
cc_directory(core)

================================================
FILE: tools/core/CMakeLists.txt
================================================
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)

cc_binary(
    NAME txt2vecs
    STRICT PACKED
    SRCS txt2vecs.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags core_framework zvec_ailego
  )

cc_binary(
    NAME local_builder
    STRICT PACKED
    SRCS local_builder.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf core_interface
  )

cc_binary(
    NAME recall
    STRICT PACKED
    SRCS recall.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface
  )

cc_binary(
    NAME bench
    STRICT PACKED
    SRCS bench.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface
)


cc_binary(
    NAME recall_original
    STRICT PACKED
    SRCS recall_original.cc flow.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface
)

cc_binary(
    NAME bench_original
    STRICT PACKED
    SRCS bench_original.cc flow.cc
    INCS ${PROJECT_ROOT_DIR}/src/core/
    LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf roaring core_interface
)

cc_binary(
        NAME local_builder_original
        STRICT PACKED
        SRCS local_builder_original.cc
        INCS ${PROJECT_ROOT_DIR}/src/core/
        LIBS gflags yaml-cpp magic_enum core_framework core_metric core_quantizer core_utility core_knn_flat core_knn_flat_sparse core_knn_hnsw core_knn_hnsw_sparse core_knn_hnsw_rabitq core_knn_cluster core_knn_ivf core_interface
)


================================================
FILE: tools/core/README.md
================================================

# Benchmarking scripts

This directory contains benchmarking scripts and reproducing steps.

## COHERE experiments

### Getting COHERE Data

Please download the COHERE 10M dataset to cohere_large_10m as follows:

```bash
... ...           
neighbors.parquet    
shuffle_train-00-of-10.parquet     
shuffle_train-01-of-10.parquet          
shuffle_train-02-of-10.parquet  
shuffle_train-03-of-10.parquet 
shuffle_train-04-of-10.parquet  
shuffle_train-05-of-10.parquet 
shuffle_train-06-of-10.parquet
shuffle_train-07-of-10.parquet
shuffle_train-08-of-10.parquet
shuffle_train-09-of-10.parquet
scalar_labels.parquet     
test.parquet      
```

For convenience, we prepared a docker image with cohere bench datasets: registry.cn-hongkong.cr.aliyuncs.com/zvec/cohere-bench-data. 

You can run a container as follows:

```bash
docker run -it --net=host -d -e DEBUG_MODE=true  --user root --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v /home/zvec/:/home/zvec/  -w /home/zvec --name=cohere_bench zvec-registry.cn-hongkong.cr.aliyuncs.com/zvec/cohere-bench-data:0.0.1 bash

docker exec -it cohere_bench bash
```

The datasets locate at /tmp/cohere/

### Preparing Environment 
Clone code and init:
```bash
$ git clone git@github.com:alibaba/zvec.git
$ cd zvec
$ git submodule update --init
```

Build source code:
```
$ cd /home/zvec/workspace/zvec
$ mkdir build
$ cd build  
$ cmake -DENABLE_SKYLAKE=ON -DCMAKE_BUILD_TYPE=Release ..
```

### Converting Dataset 
Export vector data using python script:
```bash
$ mkdir 10m.output
$ python3 convert_cohere_parquet.py
```

Convert vector data to binary formatted file.
```bash
/home/zvec/workspace/zvec/bin/txt2vecs -input=cohere_train_vector_10m.txt --output=cohere_train_vector_10m.zvec.vecs --dimension=768
```

We've also prepared preprocessed binary formatted files, which can be found in the container below:

```bash
root@iZj6caifjouj5yu8xgsiysZ:/home/zvec# ls -al /tmp/cohere/*zvec 
/tmp/cohere/cohere_large_10m_zvec:
total 30204572
drwxr-xr-x 2 root root        4096 Feb  5 13:12 .
drwxr-xr-x 6 root root        4096 Feb  6 03:38 ..
-rw-r--r-- 1 root root     8664837 Feb  5 13:06 cohere_test_vector_10m.1000.new.txt
-rw-r--r-- 1 root root 30920004295 Feb  5 13:04 cohere_train_vector_10m.new.zvec.vecs
-rw-r--r-- 1 root root      792835 Feb  5 13:05 neighbors.txt

/tmp/cohere/cohere_medium_1m_zvec:
total 3028688
drwxr-xr-x 2 root root       4096 Feb  5 13:14 .
drwxr-xr-x 6 root root       4096 Feb  6 03:38 ..
-rw-r--r-- 1 root root    8661108 Feb  5 13:07 cohere_test_vector_1m.1000.new.txt
-rw-r--r-- 1 root root 3092004295 Feb  5 13:08 cohere_train_vector_1m.new.zvec.vecs
-rw-r--r-- 1 root root     692969 Feb  5 13:08 neighbors.txt
```

### Preparing Bench Config 
Prepare Build Config

```yaml
BuilderCommon:
    BuilderClass: HnswStreamer
    BuildFile: /tmp/cohere/cohere_large_10m_zvec/cohere_train_vector_10m.zvec.vecs
    NeedTrain: true 
    TrainFile: /tmp/cohere/cohere_large_10m_zvec/cohere_train_vector_10m.zvec.vecs
    DumpPath:  /home/zvec/bench/config/cohere_train_vector_10m.dump.index
    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.index

    ConverterName: CosineInt8Converter
    MetricName: Cosine

    ThreadCount: 16

BuilderParams: 
    proxima.general.builder.thread_count: !!int 16
    proxima.hnsw.builder.thread_count: !!int 16
```

Prepare Search Config

```yaml
SearcherCommon:
    SearcherClass: HnswStreamer
    IndexPath: /home/zvec/bench/config/cohere_train_vector_10m.index
    TopK: 1,10,50,100 
    QueryFile: /tmp/cohere/cohere_large_10m_zvec/cohere_test_vector_1000.new.txt
    QueryType: float 
    QueryFirstSep: ";" 
    QuerySecondSep: " "
    GroundTruthFile: /tmp/cohere/cohere_large_10m_zvec/neighbors.txt
    RecallThreadCount: 1
    BenchThreadCount: 16 
    BenchIterCount: 1000000000 
    CompareById: true

SearcherParams: 
    proxima.hnsw.streamer.ef: !!int 250
```

### Building Index 
Conduct Build 
```bash
$ /home/zvec/workspace/zvec/build/bin/local_build_original ./build.yaml 
```

### Performing Bench
Conduct Recall 
```bash
$ /home/zvec/workspace/zvec/build/bin/recall_original ./search.yaml
```

Conduct Bench 
```bash
$ /home/zvec/workspace/zvec/build/bin/bench_original ./search.yaml
```


================================================
FILE: tools/core/bench.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "bench_result.h"
#include "helper.h"

static bool g_debug_mode = 0;

//------------------------------------------------------------
// Bench
//------------------------------------------------------------
enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

enum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };

template <typename T>
class Bench {
 public:
  Bench(size_t threads, size_t bench_secs, size_t batch_count,
        RetrievalMode &retrieval_mode, FilterMode filter_mode)
      : threads_(threads),
        bench_secs_(bench_secs),
        batch_count_(batch_count),
        retrieval_mode_{retrieval_mode},
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(false);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, false);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;
    vector<vector<T>> queries;
    vector<SparseData<T>> sparse_data;
    vector<vector<uint64_t>> taglists;

    if (!reader.load_query(query_file, first_sep, second_sep, queries,
                           sparse_data, taglists)) {
      LOG_ERROR("Load query error");
      return false;
    }

    if (batch_count_ == 1) {
      batch_queries_ = queries;

      for (size_t i = 0; i < sparse_data.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(sparse_data[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(sparse_data[i].indices);
        batch_sparse_features_.push_back(sparse_data[i].features);
      }

      for (size_t i = 0; i < taglists.size(); ++i) {
        vector<vector<uint64_t>> new_taglists;
        new_taglists.push_back(taglists[i]);

        batch_taglists_.push_back(std::move(new_taglists));
      }
    } else {
      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<T> batch_query;
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;
        vector<vector<uint64_t>> batch_taglists;

        for (size_t i = 0; i < batch_count_; ++i) {
          for (size_t k = 0; k < queries[idx].size(); ++k) {
            batch_query.push_back(queries[idx][k]);
          }

          batch_sparse_count.push_back(sparse_data[idx].count);

          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);
          }

          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {
            batch_sparse_feature.push_back(sparse_data[idx].features[k]);
          }

          if (taglists.size() > idx) {
            batch_taglists.push_back(taglists[idx]);
          }

          idx = (idx + 1) % queries.size();
        }

        batch_queries_.push_back(batch_query);
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
        batch_taglists_.push_back(batch_taglists);
      }
    }

    dim_ = queries[0].size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);
    } else {
      LOG_ERROR("unsupported type");
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  void run(core_interface::Index::Pointer index,
           core_interface::BaseIndexQueryParam::Pointer query_param,
           int max_iter, int topk) {
    // Check
    if (batch_queries_.size() == 0) {
      return;
    }

    query_param_ = query_param;
    query_param_->topk = topk;
    query_param_->is_linear = false;

    // Do bench
    signal(SIGINT, stop);
    bench_result_.mark_start();
    auto start_time = Monotime::MilliSeconds();
    for (size_t i = 0; i < threads_; ++i) {
      pool_->execute(this, &Bench<T>::start_bench, index, max_iter, &STOP_NOW);
    }

    while (!pool_->is_finished()) {
      this_thread::sleep_for(chrono::milliseconds(1));
      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {
        STOP_NOW = true;
      }
    }

    pool_->wait_finish();

    bench_result_.mark_end();
    bench_result_.print();
  }

  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,
                     const std::vector<uint64_t> &tag_key_list) {
    id_to_tags_list_ = id_to_tags_list;
    tag_key_list_ = tag_key_list;
  }

 private:
  void start_bench(core_interface::Index::Pointer index, size_t max_iter,
                   const bool *is_stop) {
    size_t thread_index = pool_->indexof_this();

    size_t i = thread_index;
    for (; i < max_iter && !*is_stop; i += threads_) {
      int idx = i % batch_queries_.size();

      // prefilter
      FilterResultCache filter_cache;
      std::shared_ptr<IndexFilter> filter_ptr = nullptr;
      if (filter_mode_ == FM_TAG) {
        if (batch_taglists_[idx].size() != 1) {
          LOG_ERROR("query tag list not equal to one!");
          return;
        }

        int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],
                                      tag_key_list_);
        if (ret != 0) {
          LOG_ERROR("prefilter failed, idx: %d", idx);
          return;
        }

        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

        filter_ptr = std::make_shared<IndexFilter>();
        filter_ptr->set(filterFunc);
      }

      auto query_param = query_param_->Clone();
      query_param->filter = filter_ptr;


      // Do knn_search
      uint64_t start = Monotime::MicroSeconds();
      int ret;
      if (retrieval_mode_ == RM_DENSE) {
        if (batch_count_ == 1) {
          ret = do_knn_search<T>(index, batch_queries_[idx], query_param);
        } else {
          ret = do_knn_search_batch<T>(index, batch_queries_[idx], query_param);
        }

        if (ret != 0) {
          LOG_ERROR("Failed to knn search, ret=%d %s", ret,
                    IndexError::What(ret));
          return;
        }
      } else {
        std::string mode = retrieval_mode_ == 1 ? "Dense" : "Sparse";
        LOG_ERROR("unsupported retrieval mode: %s", mode.c_str());
      }

      uint64_t end = Monotime::MicroSeconds();

      // Do sample
      bench_result_.add_time(batch_count_, end - start);
    }
  }

  template <typename U>
  typename std::enable_if<
      std::is_same<float, U>::value || std::is_same<int8_t, U>::value ||
          std::is_same<uint32_t, U>::value || std::is_same<uint64_t, U>::value,
      int>::type
  do_knn_search(core_interface::Index::Pointer index, const vector<U> &query,
                core_interface::BaseIndexQueryParam::Pointer query_param) {
    core_interface::DenseVector dense_query;
    dense_query.data = query.data();
    core_interface::VectorData query_data;
    query_data.vector = dense_query;

    core_interface::SearchResult search_result;
    int ret = index->Search(query_data, query_param, &search_result);
    if (ret < 0) {
      return ret;
    }

    if (search_result.doc_list_.empty()) {
      LOG_ERROR("Search results is empty");
    }

    return 0;
  }

  template <typename U>
  typename std::enable_if<
      std::is_same<float, U>::value || std::is_same<int8_t, U>::value ||
          std::is_same<uint32_t, U>::value || std::is_same<uint64_t, U>::value,
      int>::type
  do_knn_search_batch(
      core_interface::Index::Pointer index, const vector<U> &query,
      core_interface::BaseIndexQueryParam::Pointer query_param) {
    // For batch search, we search each query separately
    size_t qnum = query.size() / dim_;
    for (size_t i = 0; i < qnum; ++i) {
      core_interface::DenseVector dense_query;
      dense_query.data = query.data() + i * dim_;
      core_interface::VectorData query_data;
      query_data.vector = dense_query;

      core_interface::SearchResult search_result;
      int ret = index->Search(query_data, query_param, &search_result);
      if (ret < 0) {
        return ret;
      }

      if (search_result.doc_list_.empty()) {
        LOG_ERROR("Search results is empty for batch query %zu", i);
      }
    }

    return 0;
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  size_t bench_secs_;
  size_t batch_count_;
  size_t dim_;
  shared_ptr<ThreadPool> pool_;
  core_interface::BaseIndexQueryParam::Pointer query_param_;

  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  // Tag lists for filtering
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;

  BenchResult bench_result_;
  RetrievalMode retrieval_mode_{RM_UNDEFINED};
  FilterMode filter_mode_{FM_NONE};
  static bool STOP_NOW;
};

template <typename T>
bool Bench<T>::STOP_NOW = false;

//------------------------------------------------------------
// Sparse Bench
//------------------------------------------------------------
template <typename T>
class SparseBench {
 public:
  SparseBench(size_t threads, size_t bench_secs, size_t batch_count,
              FilterMode filter_mode)
      : threads_(threads),
        bench_secs_(bench_secs),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(false);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, false);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;
    vector<vector<T>> queries;
    vector<SparseData<T>> sparse_data;
    vector<vector<uint64_t>> taglists;

    if (!reader.load_query(query_file, first_sep, second_sep, queries,
                           sparse_data, taglists)) {
      LOG_ERROR("Load query error");
      return false;
    }

    linear_sparse_data_ = sparse_data;

    if (batch_count_ == 1) {
      for (size_t i = 0; i < sparse_data.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(sparse_data[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(sparse_data[i].indices);
        batch_sparse_features_.push_back(sparse_data[i].features);
      }

      for (size_t i = 0; i < taglists.size(); ++i) {
        vector<vector<uint64_t>> new_taglists;
        new_taglists.push_back(taglists[i]);

        batch_taglists_.push_back(std::move(new_taglists));
      }
    } else {
      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;
        vector<vector<uint64_t>> batch_taglists;

        for (size_t i = 0; i < batch_count_; ++i) {
          batch_sparse_count.push_back(sparse_data[idx].count);

          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);
          }

          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {
            batch_sparse_feature.push_back(sparse_data[idx].features[k]);
          }

          if (taglists.size() > idx) {
            batch_taglists.push_back(taglists[idx]);
          }

          idx = (idx + 1) % queries.size();
        }

        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
        batch_taglists_.push_back(batch_taglists);
      }
    }

    if (typeid(T) == typeid(float)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);
    } else {
      LOG_ERROR("unsupported type");
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  void run(core_interface::Index::Pointer index,
           core_interface::BaseIndexQueryParam::Pointer query_param,
           int max_iter, int topk) {
    // Check
    if (batch_sparse_counts_.size() == 0) {
      return;
    }

    query_param_ = query_param;
    query_param_->topk = topk;
    query_param_->is_linear = false;

    // Do bench
    signal(SIGINT, stop);
    bench_result_.mark_start();
    auto start_time = Monotime::MilliSeconds();
    for (size_t i = 0; i < threads_; ++i) {
      pool_->execute(this, &SparseBench<T>::start_bench, index, max_iter,
                     &STOP_NOW);
    }

    while (!pool_->is_finished()) {
      this_thread::sleep_for(chrono::milliseconds(1));
      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {
        STOP_NOW = true;
      }
    }

    pool_->wait_finish();

    bench_result_.mark_end();
    bench_result_.print();
  }

  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,
                     const std::vector<uint64_t> &tag_key_list) {
    id_to_tags_list_ = id_to_tags_list;
    tag_key_list_ = tag_key_list;
  }

 private:
  void start_bench(core_interface::Index::Pointer index, size_t max_iter,
                   const bool *is_stop) {
    size_t thread_index = pool_->indexof_this();

    size_t i = thread_index;
    size_t sparse_query_size = batch_sparse_indices_.size();
    for (; i < max_iter && !*is_stop; i += threads_) {
      int idx = i % sparse_query_size;

      // prefilter
      FilterResultCache filter_cache;
      std::shared_ptr<IndexFilter> filter_ptr = nullptr;
      if (filter_mode_ == FM_TAG) {
        if (batch_taglists_[idx].size() != 1) {
          LOG_ERROR("query tag list not equal to one!");
          return;
        }

        int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],
                                      tag_key_list_);
        if (ret != 0) {
          LOG_ERROR("prefilter failed, idx: %d", idx);
          return;
        }

        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

        filter_ptr = std::make_shared<IndexFilter>();
        filter_ptr->set(filterFunc);
      }

      auto query_param = query_param_->Clone();
      query_param->filter = filter_ptr;

      // Do knn_search
      uint64_t start = Monotime::MicroSeconds();
      int ret;
      if (batch_count_ == 1) {
        if (batch_sparse_counts_[idx].size() != 1) {
          LOG_ERROR("Sparse count size should be 1, since batch count is 1");
          return;
        }
        ret = do_knn_search<T>(index, batch_sparse_counts_[idx][0],
                               batch_sparse_indices_[idx],
                               batch_sparse_features_[idx], query_param);
      } else {
        ret = do_knn_search_batch<T>(
            index, batch_sparse_counts_[idx], batch_sparse_indices_[idx],
            batch_sparse_features_[idx], idx, query_param);
      }

      if (ret != 0) {
        LOG_ERROR("Failed to sparse knn search, ret=%d %s", ret,
                  IndexError::What(ret));
        return;
      }

      uint64_t end = Monotime::MicroSeconds();

      // Do sample
      bench_result_.add_time(batch_count_, end - start);
    }
  }

  // sparse search - single query
  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(core_interface::Index::Pointer index,
                const uint32_t sparse_count,
                const vector<uint32_t> &sparse_indices,
                const vector<U> &sparse_feature,
                core_interface::BaseIndexQueryParam::Pointer query_param) {
    core_interface::SparseVector sparse_query;
    sparse_query.count = sparse_count;
    sparse_query.indices = sparse_indices.data();
    sparse_query.values = sparse_feature.data();
    core_interface::VectorData query_data;
    query_data.vector = sparse_query;

    core_interface::SearchResult search_result;
    int ret = index->Search(query_data, query_param, &search_result);
    if (ret < 0) {
      return ret;
    }

    if (search_result.doc_list_.empty()) {
      LOG_ERROR("Search results is empty");
    }

    return 0;
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value ||
                              std::is_same<uint32_t, U>::value ||
                              std::is_same<uint64_t, U>::value,
                          int>::type
  do_knn_search(core_interface::Index::Pointer /*index*/,
                const uint32_t /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/,
                core_interface::BaseIndexQueryParam::Pointer /*query_param*/) {
    return IndexError_Unsupported;
  }

  // sparse search - batch
  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search_batch(
      core_interface::Index::Pointer index,
      const vector<uint32_t> &sparse_count,
      const vector<uint32_t> & /*sparse_indices*/,
      const vector<U> & /*sparse_feature*/, size_t batch_idx,
      core_interface::BaseIndexQueryParam::Pointer query_param) {
    // For batch search, search each query separately
    for (size_t i = 0; i < sparse_count.size(); ++i) {
      size_t query_idx = batch_idx * batch_count_ + i;
      if (query_idx >= linear_sparse_data_.size()) {
        break;
      }

      const auto &single_sparse = linear_sparse_data_[query_idx];
      core_interface::SparseVector sparse_query;
      sparse_query.count = single_sparse.count;
      sparse_query.indices = single_sparse.indices.data();
      sparse_query.values = single_sparse.features.data();
      core_interface::VectorData query_data;
      query_data.vector = sparse_query;

      core_interface::SearchResult search_result;
      int ret = index->Search(query_data, query_param, &search_result);
      if (ret < 0) {
        return ret;
      }

      if (search_result.doc_list_.empty()) {
        LOG_ERROR("Search results is empty for batch query %zu", i);
      }
    }

    return 0;
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value ||
                              std::is_same<uint32_t, U>::value ||
                              std::is_same<uint64_t, U>::value,
                          int>::type
  do_knn_search_batch(
      core_interface::Index::Pointer /*index*/,
      const vector<uint32_t> & /*sparse_count*/,
      const vector<uint32_t> & /*sparse_indices*/,
      const vector<U> & /*sparse_feature*/, size_t /*batch_idx*/,
      core_interface::BaseIndexQueryParam::Pointer /*query_param*/) {
    return IndexError_Unsupported;
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  size_t bench_secs_;
  size_t batch_count_;
  core_interface::BaseIndexQueryParam::Pointer query_param_;
  shared_ptr<ThreadPool> pool_;

  vector<SparseData<T>> linear_sparse_data_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  // Tag lists for filtering
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;

  FilterMode filter_mode_{FM_NONE};
  BenchResult bench_result_;
  static bool STOP_NOW;
};
template <typename T>
bool SparseBench<T>::STOP_NOW = false;

bool check_config(YAML::Node &config_node) {
  auto common = config_node["IndexCommon"];
  if (!common) {
    LOG_ERROR("Can not find [IndexCommon] in config");
    return false;
  }
  if (!common["IndexConfig"]) {
    LOG_ERROR("Can not find [IndexConfig] in config");
    return false;
  }
  if (!common["IndexPath"]) {
    LOG_ERROR("Can not find [IndexPath] in config");
    return false;
  }
  if (!common["TopK"]) {
    LOG_ERROR("Can not find [TopK] in config");
    return false;
  }
  if (!common["QueryFile"]) {
    LOG_ERROR("Can not find [QueryFile] in config");
    return false;
  }

  auto query_config = config_node["QueryConfig"];
  if (!query_config) {
    LOG_ERROR("Can not find [QueryConfig] in config");
    return false;
  }
  if (!query_config["QueryParam"]) {
    LOG_ERROR("Can not find [QueryConfig.QueryParam] in config");
    return false;
  }


  return true;
}

void usage(void) {
  cout << "Usage: bench CONFIG.yaml [plugin file path]" << endl;
}


int bench(std::string &query_type, size_t thread_count, size_t batch_count,
          size_t top_k, string query_file, string &first_sep,
          string &second_sep, size_t bench_secs, size_t iter_count,
          core_interface::Index::Pointer index,
          core_interface::BaseIndexQueryParam::Pointer query_param,
          string &index_dir, RetrievalMode retrieval_mode,
          FilterMode filter_mode) {
  if (filter_mode == FM_TAG && batch_count > 1) {
    LOG_ERROR("filter mode can not be run in batch mode");
    return -1;
  }

  std::vector<std::vector<uint64_t>> id_to_tags_list;
  std::vector<uint64_t> tag_key_list;
  // Load tag lists if available
  load_taglists(index_dir, id_to_tags_list, tag_key_list);

  if (query_type == "float") {
    Bench<float> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                       filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else if (query_type == "int8") {
    Bench<int8_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                        filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else if (query_type == "binary") {
    Bench<uint32_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                          filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else if (query_type == "binary64") {
    Bench<uint64_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                          filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else {
    LOG_ERROR("Can not recognize type: %s", query_type.c_str());
  }

  return 0;
}

int bench_sparse(std::string &query_type, size_t thread_count,
                 size_t batch_count, size_t top_k, string query_file,
                 string &first_sep, string &second_sep, size_t bench_secs,
                 size_t iter_count, core_interface::Index::Pointer index,
                 core_interface::BaseIndexQueryParam::Pointer query_param,
                 string &index_dir, FilterMode filter_mode) {
  if (filter_mode == FM_TAG && batch_count > 1) {
    LOG_ERROR("filter mode can not be run in batch mode");
    return -1;
  }

  std::vector<std::vector<uint64_t>> id_to_tags_list;
  std::vector<uint64_t> tag_key_list;
  // Load tag lists if available
  load_taglists(index_dir, id_to_tags_list, tag_key_list);

  if (query_type == "float") {
    SparseBench<float> bench(thread_count, bench_secs, batch_count,
                             filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else if (query_type == "int8") {
    SparseBench<int8_t> bench(thread_count, bench_secs, batch_count,
                              filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    bench.set_tag_lists(id_to_tags_list, tag_key_list);
    bench.run(index, query_param, iter_count, top_k);
  } else {
    LOG_ERROR("Can not recognize type: %s", query_type.c_str());
  }

  return 0;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }

  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      LOG_ERROR("Failed to load plugin: %s (%s)", argv[i], error.c_str());
      return -1;
    }
  }

  YAML::Node config_node;
  try {
    config_node = YAML::LoadFile(argv[1]);
  } catch (...) {
    LOG_ERROR("Load YAML file[%s] failed!", argv[1]);
    return -1;
  }

  if (!check_config(config_node)) {
    return -1;
  }
  auto config_common = config_node["IndexCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};
  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";
  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  // Calculate Bench
  size_t thread_count = config_common["BenchThreadCount"]
                            ? config_common["BenchThreadCount"].as<uint64_t>()
                            : 0;
  size_t iter_count = config_common["BenchIterCount"]
                          ? config_common["BenchIterCount"].as<uint64_t>()
                          : 10000;
  size_t batch_count = config_common["BenchBatchCount"]
                           ? config_common["BenchBatchCount"].as<uint64_t>()
                           : 0;
  g_debug_mode = config_common["DebugMode"]
                     ? config_common["DebugMode"].as<bool>()
                     : false;
  string topk_str = config_common["TopK"].as<string>();

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  FilterMode filter_mode{FM_NONE};
  if (config_common["FilterMode"]) {
    std::string filter_mode_str = config_common["FilterMode"].as<string>();
    if (filter_mode_str == "tag") {
      filter_mode = FM_TAG;
    }
  }

  vector<int32_t> topk_values;
  StringHelper::Split(topk_str, ",", &topk_values);
  size_t top_k = *topk_values.rbegin();
  string query_file = config_common["QueryFile"].as<string>();
  string first_sep = config_common["QueryFirstSep"]
                         ? config_common["QueryFirstSep"].as<string>()
                         : ";";
  string second_sep = config_common["QuerySecondSep"]
                          ? config_common["QuerySecondSep"].as<string>()
                          : " ";
  string query_type = config_common["QueryType"]
                          ? config_common["QueryType"].as<string>()
                          : "float";
  size_t bench_secs = config_common["BenchSecs"]
                          ? config_common["BenchSecs"].as<uint64_t>()
                          : 60;

  string index_dir = config_common["IndexPath"].as<string>();

  core_interface::Index::Pointer index;
  core_interface::BaseIndexQueryParam::Pointer query_param;
  if (0 !=
      parse_and_load_index_param(config_node, index_dir, index, query_param)) {
    LOG_ERROR("Failed to parse and load index param");
    return -1;
  }

  if (retrieval_mode == RM_SPARSE) {
    bench_sparse(query_type, thread_count, batch_count, top_k, query_file,
                 first_sep, second_sep, bench_secs, iter_count, index,
                 query_param, index_dir, filter_mode);

    cout << "Bench Sparse done." << endl;
  } else {
    bench(query_type, thread_count, batch_count, top_k, query_file, first_sep,
          second_sep, bench_secs, iter_count, index, query_param, index_dir,
          retrieval_mode, filter_mode);

    cout << "Bench done." << endl;
  }

  // Cleanup
  index->Close();

  return 0;
}


================================================
FILE: tools/core/bench_original.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <signal.h>
#include <iostream>
#include <ailego/container/bitmap.h>
#include <zvec/ailego/utility/time_helper.h>
#include "zvec/ailego/utility/string_helper.h"
#include "zvec/core/framework/index_plugin.h"
#include "zvec/core/interface/index_factory.h"
#include "zvec/core/interface/index_param.h"
#include "bench_result.h"
#include "filter_result_cache.h"
#include "flow.h"
#include "txt_input_reader.h"

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#include <yaml-cpp/yaml.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

using namespace std;
using namespace zvec::core;
using namespace zvec::ailego;

using Flow = Flow;

static bool g_debug_mode = 0;

//------------------------------------------------------------
// Bench
//------------------------------------------------------------
enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

enum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };

template <typename T>
class Bench {
 public:
  Bench(size_t threads, size_t bench_secs, size_t batch_count,
        RetrievalMode &retrieval_mode, FilterMode filter_mode)
      : threads_(threads),
        bench_secs_(bench_secs),
        batch_count_(batch_count),
        retrieval_mode_{retrieval_mode},
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(false);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, false);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;
    vector<vector<T>> queries;
    vector<SparseData<T>> sparse_data;
    vector<vector<uint64_t>> taglists;

    if (!reader.load_query(query_file, first_sep, second_sep, queries,
                           sparse_data, taglists)) {
      cerr << "Load query error" << endl;
      return false;
    }

    if (batch_count_ == 1) {
      batch_queries_ = queries;

      for (size_t i = 0; i < sparse_data.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(sparse_data[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(sparse_data[i].indices);
        batch_sparse_features_.push_back(sparse_data[i].features);
      }

      for (size_t i = 0; i < taglists.size(); ++i) {
        vector<vector<uint64_t>> new_taglists;
        new_taglists.push_back(taglists[i]);

        batch_taglists_.push_back(std::move(new_taglists));
      }
    } else {
      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<T> batch_query;
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;
        vector<vector<uint64_t>> batch_taglists;

        for (size_t i = 0; i < batch_count_; ++i) {
          for (size_t k = 0; k < queries[idx].size(); ++k) {
            batch_query.push_back(queries[idx][k]);
          }

          batch_sparse_count.push_back(sparse_data[idx].count);

          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);
          }

          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {
            batch_sparse_feature.push_back(sparse_data[idx].features[k]);
          }

          if (taglists.size() > idx) {
            batch_taglists.push_back(taglists[idx]);
          }

          idx = (idx + 1) % queries.size();
        }

        batch_queries_.push_back(batch_query);
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
        batch_taglists_.push_back(batch_taglists);
      }
    }

    size_t dim = queries[0].size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim);
    } else {
      cerr << "unsupported type";
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  void run(Flow *flower, int max_iter, int topk) {
    // Check
    if (batch_queries_.size() == 0) {
      return;
    }

    for (size_t i = 0; i < threads_; i++) {
      contexts_.emplace_back(flower->create_context());
      contexts_[i]->set_topk(topk);
      contexts_[i]->set_debug_mode(g_debug_mode);
    }

    // Do bench
    signal(SIGINT, stop);
    bench_result_.mark_start();
    auto start_time = Monotime::MilliSeconds();
    for (size_t i = 0; i < threads_; ++i) {
      pool_->execute(this, &Bench<T>::start_bench, flower, max_iter, &STOP_NOW);
    }

    while (!pool_->is_finished()) {
      this_thread::sleep_for(chrono::milliseconds(1));
      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {
        STOP_NOW = true;
      }
    }

    pool_->wait_finish();

    bench_result_.mark_end();
    bench_result_.print();

    // for (size_t i = 0; i < threads_; i++) {
    //   if (contexts_[i]->flow_context() != nullptr) {
    //     std::cout << "context id: " << i << ": \n" <<
    //     contexts_[i]->flow_context()->searcher_context()->profiler().display();
    //   }
    // }
  }

 private:
  void start_bench(Flow *flower, size_t max_iter, const bool *is_stop) {
    size_t thread_index = pool_->indexof_this();

    size_t i = thread_index;
    for (; i < max_iter && !*is_stop; i += threads_) {
      int idx = i % batch_queries_.size();

      // prefilter
      FilterResultCache filter_cache;
      if (filter_mode_ == FM_TAG) {
        if (batch_taglists_[idx].size() != 1) {
          cerr << "query tag list not equal to one!" << endl;
          return;
        }

        int ret = filter_cache.filter(flower->id_to_tags_list(),
                                      batch_taglists_[idx][0],
                                      flower->tag_key_list());
        if (ret != 0) {
          cerr << "prefilter failed, idx: " << idx << std::endl;

          return;
        }

        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

        contexts_[thread_index]->set_filter(filterFunc);
      }

      // Do knn_search
      uint64_t start = Monotime::MicroSeconds();
      int ret;
      if (retrieval_mode_ == RM_DENSE) {
        if (batch_count_ == 1) {
          ret = do_knn_search<T>(flower, contexts_[thread_index],
                                 batch_queries_[idx]);
        } else {
          ret = do_knn_search<T>(flower, contexts_[thread_index],
                                 batch_queries_[idx], batch_count_);
        }

        if (ret != 0) {
          cerr << "Failed to knn search, ret=" << ret << endl;
          return;
        }
      } else {
        std::string mode = retrieval_mode_ == 1 ? "Dense" : "Sparse";
        cerr << "unsupported retrieval mode: " << mode << endl;
      }

      uint64_t end = Monotime::MicroSeconds();

      // Check result
      auto &result = contexts_[thread_index]->result();
      if (result.empty()) {
        cerr << "Search results is small than queries" << endl;
      }

      // Do sample
      bench_result_.add_time(batch_count_, end - start);
    }
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn search
    return flower->search_impl(query.data(), qmeta_, context);
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  size_t bench_secs_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;
  vector<Flow::Context::Pointer> contexts_;

  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  BenchResult bench_result_;
  RetrievalMode retrieval_mode_{RM_UNDEFINED};
  FilterMode filter_mode_{FM_NONE};
  static bool STOP_NOW;
};

template <typename T>
bool Bench<T>::STOP_NOW = false;

//------------------------------------------------------------
// Sparse Bench
//------------------------------------------------------------
template <typename T>
class SparseBench {
 public:
  SparseBench(size_t threads, size_t bench_secs, size_t batch_count,
              FilterMode filter_mode)
      : threads_(threads),
        bench_secs_(bench_secs),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(false);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, false);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;
    vector<vector<T>> queries;
    vector<SparseData<T>> sparse_data;
    vector<vector<uint64_t>> taglists;

    if (!reader.load_query(query_file, first_sep, second_sep, queries,
                           sparse_data, taglists)) {
      cerr << "Load query error" << endl;
      return false;
    }

    if (batch_count_ == 1) {
      for (size_t i = 0; i < sparse_data.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(sparse_data[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(sparse_data[i].indices);
        batch_sparse_features_.push_back(sparse_data[i].features);
      }
    } else {
      size_t num_batch = (queries.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;

        for (size_t i = 0; i < batch_count_; ++i) {
          batch_sparse_count.push_back(sparse_data[idx].count);

          for (size_t k = 0; k < sparse_data[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(sparse_data[idx].indices[k]);
          }

          for (size_t k = 0; k < sparse_data[idx].features.size(); ++k) {
            batch_sparse_feature.push_back(sparse_data[idx].features[k]);
          }

          idx = (idx + 1) % queries.size();
        }

        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
      }
    }

    if (typeid(T) == typeid(float)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);
    } else {
      cerr << "unsupported type";
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  void run(SparseFlow *flower, int max_iter, int topk) {
    for (size_t i = 0; i < threads_; i++) {
      contexts_.emplace_back(flower->create_context());
      contexts_[i]->set_topk(topk);
      contexts_[i]->set_debug_mode(g_debug_mode);
    }

    // Do bench
    signal(SIGINT, stop);
    bench_result_.mark_start();
    auto start_time = Monotime::MilliSeconds();
    for (size_t i = 0; i < threads_; ++i) {
      pool_->execute(this, &SparseBench<T>::start_bench, flower, max_iter,
                     &STOP_NOW);
    }

    while (!pool_->is_finished()) {
      this_thread::sleep_for(chrono::milliseconds(1));
      if (Monotime::MilliSeconds() - start_time > bench_secs_ * 1000) {
        STOP_NOW = true;
      }
    }

    pool_->wait_finish();

    bench_result_.mark_end();
    bench_result_.print();
  }

 private:
  void start_bench(SparseFlow *flower, size_t max_iter, const bool *is_stop) {
    size_t thread_index = pool_->indexof_this();

    size_t i = thread_index;
    size_t sparse_query_size = batch_sparse_indices_.size();
    for (; i < max_iter && !*is_stop; i += threads_) {
      int idx = i % sparse_query_size;

      // prefilter
      FilterResultCache filter_cache;
      if (filter_mode_ == FM_TAG) {
        if (batch_taglists_[idx].size() != 1) {
          cerr << "query tag list not equal to one!" << endl;
          return;
        }

        int ret = filter_cache.filter(flower->id_to_tags_list(),
                                      batch_taglists_[idx][0],
                                      flower->tag_key_list());
        if (ret != 0) {
          cerr << "prefilter failed, idx: " << idx << std::endl;

          return;
        }

        auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

        contexts_[thread_index]->set_filter(filterFunc);
      }

      // Do knn_search
      uint64_t start = Monotime::MicroSeconds();
      int ret;
      if (batch_count_ == 1) {
        if (batch_sparse_counts_[idx].size() != 1) {
          cerr << "Sparse count size should be 1, since batch count is 1"
               << endl;
          return;
        }
        ret = do_knn_search<T>(
            flower, contexts_[thread_index], batch_sparse_counts_[idx][0],
            batch_sparse_indices_[idx], batch_sparse_features_[idx]);
      } else {
        ret = do_knn_search<T>(flower, contexts_[thread_index],
                               batch_sparse_counts_[idx],
                               batch_sparse_indices_[idx],
                               batch_sparse_features_[idx], batch_count_);
      }

      if (ret != 0) {
        cerr << "Failed to sparse knn search, ret=" << ret << endl;
        return;
      }

      uint64_t end = Monotime::MicroSeconds();

      // Check result
      auto &result = contexts_[thread_index]->result();
      if (result.empty()) {
        cerr << "Search results is small than queries" << endl;
      }

      // Do sample
      bench_result_.add_time(batch_count_, end - start);
    }
  }

  // sparse search
  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                const vector<uint32_t> &sparse_count,
                const vector<uint32_t> &sparse_indices,
                const vector<U> &sparse_feature, size_t count) {
    // Do sparse knn search
    return flower->search_impl(sparse_count.data(), sparse_indices.data(),
                               sparse_feature.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const vector<uint32_t> & /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/, size_t /*count*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const vector<uint32_t> & /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/, size_t /*count*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const vector<uint32_t> & /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/, size_t /*count*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                const uint32_t sparse_count,
                const vector<uint32_t> &sparse_indices,
                const vector<U> &sparse_feature) {
    // Do sparse knn search
    return flower->search_impl(sparse_count, sparse_indices.data(),
                               sparse_feature.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const uint32_t /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const uint32_t /*parse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(SparseFlow * /*flower*/,
                SparseFlow::Context::Pointer & /*context*/,
                const uint32_t /*sparse_count*/,
                const vector<uint32_t> & /*sparse_indices*/,
                const vector<U> & /*sparse_feature*/) {
    // Do sparse knn search
    return IndexError_Unsupported;
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  size_t bench_secs_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;
  vector<SparseFlow::Context::Pointer> contexts_;

  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  FilterMode filter_mode_{FM_NONE};
  BenchResult bench_result_;
  static bool STOP_NOW;
};
template <typename T>
bool SparseBench<T>::STOP_NOW = false;

// do
bool prepare_params(YAML::Node &&config_params, Params &params) {
  cout << "Parse params as blow:" << endl;
  for (auto it = config_params.begin(); it != config_params.end(); ++it) {
    string tag = it->second.Tag();
    if (tag == "tag:yaml.org,2002:int") {
      int64_t val = it->second.as<int64_t>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:float") {
      float val = it->second.as<float>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:bool") {
      bool val = it->second.as<bool>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else {
      if (it->second.IsScalar()) {
        string val = it->second.as<string>();
        params.set(it->first.as<string>(), val);
        cout << it->first.as<string>() << "=" << val << endl;
      } else if (it->second.IsMap()) {
        Params sub_params;
        auto sub_node = it->second;
        if (!prepare_params(std::move(sub_node), sub_params)) {
          cerr << "parse params error with key[" << it->first.as<string>()
               << "]" << endl;
          return false;
        }
        params.set(it->first.as<string>(), sub_params);
      }
    }
  }
  return true;
}

bool check_config(YAML::Node &config_node) {
  auto common = config_node["SearcherCommon"];
  if (!common) {
    cerr << "Can not find [SearcherCommon] in config" << endl;
    return false;
  }
  if (!common["SearcherClass"] && !common["SearcherConfig"]) {
    cerr << "Can not find [SearcherClass] or [SearcherConfig] in config"
         << endl;
    return false;
  }
  if (!common["IndexPath"]) {
    cerr << "Can not find [IndexPath] in config" << endl;
    return false;
  }
  if (!common["TopK"]) {
    cerr << "Can not find [TopK] in config" << endl;
    return false;
  }
  if (!common["QueryFile"]) {
    cerr << "Can not find [QueryFile] in config" << endl;
    return false;
  }
  return true;
}

void usage(void) {
  cout << "Usage: bench CONFIG.yaml [plugin file path]" << endl;
}

bool load_index(Flow &flower, string &index_dir) {
  int ret = flower.load(index_dir);
  if (0 != ret) {
    cerr << "Flow load failed with ret " << ret << endl;
    return false;
  }
  cout << "Load index done!" << endl;
  return true;
};

int bench(std::string &query_type, size_t thread_count, size_t batch_count,
          size_t top_k, string query_file, string &first_sep,
          string &second_sep, size_t bench_secs, size_t iter_count,
          Flow &flower, string &index_dir, RetrievalMode retrieval_mode,
          FilterMode filter_mode) {
  if (filter_mode == FM_TAG && batch_count > 1) {
    cerr << "filter mode can not be run in batch mode" << endl;
    return -1;
  }

  if (query_type == "float") {
    Bench<float> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                       filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else if (query_type == "int8") {
    Bench<int8_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                        filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else if (query_type == "binary") {
    Bench<uint32_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                          filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else if (query_type == "binary64") {
    Bench<uint64_t> bench(thread_count, bench_secs, batch_count, retrieval_mode,
                          filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else {
    cerr << "Can not recognize type: " << query_type << endl;
  }

  return 0;
}

bool load_index(SparseFlow &flower, string &index_dir) {
  int ret = flower.load(index_dir);
  if (0 != ret) {
    cerr << "Flow load failed with ret " << ret << endl;
    return false;
  }
  cout << "Load index done!" << endl;
  return true;
};

int bench_sparse(std::string &query_type, size_t thread_count,
                 size_t batch_count, size_t top_k, string query_file,
                 string &first_sep, string &second_sep, size_t bench_secs,
                 size_t iter_count, SparseFlow &flower, string &index_dir,
                 FilterMode filter_mode) {
  if (filter_mode == FM_TAG && batch_count > 1) {
    cerr << "filter mode can not be run in batch mode" << endl;
    return -1;
  }

  if (query_type == "float") {
    SparseBench<float> bench(thread_count, bench_secs, batch_count,
                             filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else if (query_type == "int8") {
    SparseBench<int8_t> bench(thread_count, bench_secs, batch_count,
                              filter_mode);
    bench.load_query(query_file, first_sep, second_sep);
    if (load_index(flower, index_dir)) {
      bench.run(&flower, iter_count, top_k);
    } else {
      return -1;
    }
  } else {
    cerr << "Can not recognize type: " << query_type << endl;
  }

  return 0;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }

  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      cerr << "Failed to load plugin: " << argv[i] << " (" << error << ")"
           << endl;
      return -1;
    }
  }

  YAML::Node config_node;
  try {
    config_node = YAML::LoadFile(argv[1]);
  } catch (...) {
    cerr << "Load YAML file[" << argv[1] << "] failed!" << endl;
    return -1;
  }

  if (!check_config(config_node)) {
    return -1;
  }
  auto config_common = config_node["SearcherCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};
  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";
  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  // Calculate Bench
  size_t thread_count = config_common["BenchThreadCount"]
                            ? config_common["BenchThreadCount"].as<uint64_t>()
                            : 0;
  size_t iter_count = config_common["BenchIterCount"]
                          ? config_common["BenchIterCount"].as<uint64_t>()
                          : 10000;
  size_t batch_count = config_common["BenchBatchCount"]
                           ? config_common["BenchBatchCount"].as<uint64_t>()
                           : 0;
  g_debug_mode = config_common["DebugMode"]
                     ? config_common["DebugMode"].as<bool>()
                     : false;
  string topk_str = config_common["TopK"].as<string>();

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  FilterMode filter_mode{FM_NONE};
  if (config_common["FilterMode"]) {
    std::string filter_mode_str = config_common["FilterMode"].as<string>();
    if (filter_mode_str == "tag") {
      filter_mode = FM_TAG;
    }
  }

  vector<int32_t> topk_values;
  StringHelper::Split(topk_str, ",", &topk_values);
  size_t top_k = *topk_values.rbegin();
  string query_file = config_common["QueryFile"].as<string>();
  string first_sep = config_common["QueryFirstSep"]
                         ? config_common["QueryFirstSep"].as<string>()
                         : ";";
  string second_sep = config_common["QuerySecondSep"]
                          ? config_common["QuerySecondSep"].as<string>()
                          : " ";
  string query_type = config_common["QueryType"]
                          ? config_common["QueryType"].as<string>()
                          : "float";
  string container_type = config_common["ContainerType"]
                              ? config_common["ContainerType"].as<string>()
                              : "MMapFileStorage";
  size_t bench_secs = config_common["BenchSecs"]
                          ? config_common["BenchSecs"].as<uint64_t>()
                          : 60;

  if (retrieval_mode == RM_SPARSE) {
    SparseFlow flower;

    // Create container params
    Params container_params;
    if (config_node["ContainerParams"]) {
      // Get index params of Searcher in flower object
      if (!prepare_params(config_node["ContainerParams"], container_params)) {
        return -1;
      }
      cout << "Created index params of a container in flower object " << endl;
    }

    container_params.set("proxima.mmap_file.container.memory_warmup", true);
    // Create a container
    int ret = flower.set_container(container_type, container_params);
    if (0 != ret) {
      cerr << "Create " << container_type << " failed." << endl;
      return -1;
    }

    if (config_common["SearcherClass"]) {
      Params params;
      if (config_node["SearcherParams"]) {
        // Get index params of Searcher in flower object
        if (!prepare_params(config_node["SearcherParams"], params)) {
          return -1;
        }
        cout << "Created index params of a searcher in flower object " << endl;
      }

      // Set a Searcher
      string searcher_class = config_common["SearcherClass"].as<string>();
      ret = flower.set_searcher(searcher_class, params);
      if (0 != ret) {
        cerr << "Failed to create searcher " << searcher_class << endl;
        return -1;
      }
      cout << "Created searcher " << searcher_class << endl;
    } else {  // SearcherConfig
      std::cout << config_common["SearcherConfig"].as<string>() << std::endl;
      auto params =
          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(
              config_common["SearcherConfig"].as<string>());

      auto index =
          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);

      flower.set_searcher(index->index_searcher());
    }

    string index_dir = config_common["IndexPath"].as<string>();

    bench_sparse(query_type, thread_count, batch_count, top_k, query_file,
                 first_sep, second_sep, bench_secs, iter_count, flower,
                 index_dir, filter_mode);

    cout << "Bench Sparse done." << endl;
  } else {
    Flow flower;

    // Create container params
    Params container_params;
    if (config_node["ContainerParams"]) {
      // Get index params of Searcher in flower object
      if (!prepare_params(config_node["ContainerParams"], container_params)) {
        return -1;
      }
      cout << "Created index params of a container in flower object " << endl;
    }

    container_params.set("proxima.mmap_file.container.memory_warmup", true);
    // Create a container
    int ret = flower.set_container(container_type, container_params);
    if (0 != ret) {
      cerr << "Create " << container_type << " failed." << endl;
      return -1;
    }

    // Set a Searcher
    if (config_common["SearcherClass"]) {
      Params params;
      if (config_node["SearcherParams"]) {
        // Get index params of Searcher in flower object
        if (!prepare_params(config_node["SearcherParams"], params)) {
          return -1;
        }
        cout << "Created index params of a searcher in flower object " << endl;
      }

      string searcher_class = config_common["SearcherClass"].as<string>();
      ret = flower.set_searcher(searcher_class, params);
      if (0 != ret) {
        cerr << "Failed to create searcher " << searcher_class << endl;
        return -1;
      }
      cout << "Created searcher " << searcher_class << endl;
    } else {  // SearcherConfig
      std::cout << config_common["SearcherConfig"].as<string>() << std::endl;
      auto params =
          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(
              config_common["SearcherConfig"].as<string>());

      auto index =
          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);

      flower.set_searcher(index->index_searcher());
    }

    string index_dir = config_common["IndexPath"].as<string>();

    bench(query_type, thread_count, batch_count, top_k, query_file, first_sep,
          second_sep, bench_secs, iter_count, flower, index_dir, retrieval_mode,
          filter_mode);

    flower.unload();

    cout << "Bench done." << endl;
  }

  return 0;
}


================================================
FILE: tools/core/bench_result.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <sys/time.h>
#include <stdio.h>
#include <string.h>
#include <limits>
#include <map>
#include <ailego/parallel/lock.h>

namespace zvec {
namespace core {

class BenchResult {
 public:
  BenchResult() {
    total_query_count_ = 0;
    total_process_time_by_us_ = 0;
    min_time_by_us_ = std::numeric_limits<long>::max();
    max_time_by_us_ = 0;
  }
  ~BenchResult() {}

  void add_time(int query_count, long time_by_us) {
    lock_.lock();
    total_query_count_ += query_count;
    total_process_time_by_us_ += time_by_us;
    long time_val = time_by_us / 100;
    if (process_time_map_.find(time_val) != process_time_map_.end()) {
      ++process_time_map_[time_val];
    } else {
      process_time_map_[time_val] = 1;
    }
    if (time_by_us < min_time_by_us_) {
      min_time_by_us_ = time_by_us;
    } else if (time_by_us > max_time_by_us_) {
      max_time_by_us_ = time_by_us;
    }
    lock_.unlock();
  }
  void mark_start() {
    gettimeofday(&start_, NULL);
  }
  void mark_end() {
    gettimeofday(&end_, NULL);
  }
  long get_duration_by_ms() {
    long duration = (end_.tv_sec - start_.tv_sec) * 1000 +
                    (end_.tv_usec - start_.tv_usec) / 1000;
    return duration;
  }
  long get_total_query_count() {
    return total_query_count_;
  }
  std::map<long, long> &get_process_time_map() {
    return process_time_map_;
  }
  long get_total_process_time_by_ms() {
    return total_process_time_by_us_ / 1000;
  }
  void print() {
    fprintf(stdout,
            "Process query: %ld, total process time: %ldms, "
            "duration: %ldms, max: %ldms, min:%ldms\n",
            get_total_query_count(), get_total_process_time_by_ms(),
            get_duration_by_ms(), max_time_by_us_ / 1000,
            min_time_by_us_ / 1000);
    fprintf(stdout, "Avg latency: %0.1fms qps: %0.1f\n",
            ((float)get_total_process_time_by_ms()) / get_total_query_count(),
            get_total_query_count() / ((float)get_duration_by_ms() / 1000));

    int tot_num = 0;
    int percent[] = {25, 50, 75, 90, 95, 99};
    int index = 0;
    float max_time = 0.0;
    int last_num = 0;

    for (auto element : process_time_map_) {
      tot_num += element.second;
      if (tot_num >= total_query_count_ * percent[index] / 100) {
        if (last_num != tot_num) {
          max_time = (float)element.first / 10;
          last_num = tot_num;
        }
        fprintf(stdout, "%d Percentile:\t\t %.1f ms\n", percent[index],
                max_time);
        index++;
        if (index >= 6) {
          break;
        }
      }
    }
    for (; index < 6; index++) {
      fprintf(stdout, "%d Percentile:\t\t %.1f ms\n", percent[index], max_time);
    }
    fprintf(stdout, "\n");
  }

 private:
  long total_query_count_;
  long total_process_time_by_us_;
  long min_time_by_us_;
  long max_time_by_us_;
  struct timeval start_;
  struct timeval end_;
  ailego::SpinMutex lock_;
  std::map<long, long> process_time_map_;  // <processTimeBy100us, count>
};

}  // namespace core
}  // namespace zvec


================================================
FILE: tools/core/convert_cohere_parquet.py
================================================
from __future__ import annotations

import logging
import os
import pathlib
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl

to_append = True


def write_neighbors_file(data_frame, neighbors_file):
    id_list = np.stack(data_frame["id"])
    neighbors_list = np.stack(data_frame["neighbors_id"])

    id_list.tolist()
    neighbors_list.tolist()

    if len(id_list) != len(neighbors_list):
        logger.error("list size not equal: %d, %d", len(id_list), len(neighbors_list))
        os._exit(1)

    for i in range(len(id_list)):
        id_int = id_list[i]
        line = str(id_int) + ";"

        neighbors = neighbors_list[i]
        # for j in range(len(neighbors)):
        for j in range(100):
            neighbor_id = neighbors[j]

            line += str(neighbor_id)
            if j != 99:
                line += " "
            else:
                line += "\n"

        neighbors_file.write(line)

    logger.info("Output neighbors file done. Total lines: %d", len(id_list))


def write_vector_file(data_frame, vector_file):
    test_embedding_list = np.stack(data_frame["emb"])
    test_embedding_list.tolist()

    test_id_list = np.stack(data_frame["id"])
    test_id_list.tolist()

    if len(test_id_list) != len(test_embedding_list):
        logger.info(
            "id list not matched with embedding list! : %d, %d",
            len(test_id_list),
            len(test_embedding_list),
        )
        return

    for case_id in range(len(test_id_list)):
        idx = test_id_list[case_id]
        vector = test_embedding_list[case_id]

        vector_line = str(idx) + ";"

        for i in range(len(vector)):
            vector_line += str(round(vector[i], 16))
            if i != len(vector) - 1:
                vector_line += " "
            else:
                vector_line += ";"

        vector_line += "\n"

        vector_file.write(vector_line)

        if case_id != 0 and case_id % 10000 == 0:
            logger.info("output lines: %d", case_id)

    logger.info("Output vector file done. Total lines: %d", len(test_id_list))


def read_parquet_file(file_name: str) -> pd.DataFrame:
    parquet_file = pathlib.Path(file_name)
    if not parquet_file.exists():
        logger.error("open error!")
        return pd.DataFrame()

    try:
        return pl.read_parquet(parquet_file)
    except Exception:
        logger.error("open error! error file: %s", file_name)
        return pd.DataFrame()


def gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name):
    input_file_list = list(Path(input_dir).rglob(input_file_pattern))

    output_file_name_full = pathlib.Path(output_dir, output_file_name)

    if not to_append and output_file_name_full.exists():
        logger.error("File exists! File name: %s", output_file_name_full)
        os._exit(1)

    write_flag = "a" if to_append else "w"

    with Path.open(output_file_name_full.resolve(), write_flag) as vector_file:
        for input_file in input_file_list:
            input_file_name = input_file.resolve()

            logger.info(
                "Load the entire file into memory. File name: %s", input_file_name
            )
            data_set = read_parquet_file(input_file.resolve())
            logger.info("Read parquet file done. File name: %s", input_file_name)

            if len(data_set) > 0:
                logger.info("Process parquet file. File name: %s", input_file_name)
                write_vector_file(data_set, vector_file)
                logger.info("Process parquet file done. File name: %s", input_file_name)


def gen_neighbor_files(input_dir, input_file_pattern, output_dir, output_file_name):
    input_file_list = list(Path(input_dir).rglob(input_file_pattern))

    output_file_name_full = pathlib.Path(output_dir, output_file_name)

    if not to_append and output_file_name_full.exists():
        logger.error("File already exists. File name: %s", output_file_name_full)
        os._exit(1)

    write_flag = "a" if to_append else "w"

    with Path.open(output_file_name_full.resolve(), write_flag) as neighbor_file:
        for input_file in input_file_list:
            input_file_name = input_file.resolve()

            logger.info(
                "Load the entire file into memory. File name: %s", input_file_name
            )
            data_set = read_parquet_file(input_file.resolve())
            logger.info("Read parquet file done. File name: %s", input_file_name)

            if len(data_set) > 0:
                logger.info("Write parquet file. File name: %s", input_file_name)
                write_neighbors_file(data_set, neighbor_file)
                logger.info("Write parquet file done. File name: %s", input_file_name)


if __name__ == "__main__":
    logger = logging.getLogger("convert_log")
    logger.setLevel(logging.DEBUG)

    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.DEBUG)

    formatter = logging.Formatter(
        fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
    )

    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)

    input_dir = "./cohere/10m"
    output_dir = "./10m.output"

    logger.info("Generate test vector files")
    input_file_pattern = "test.parquet"
    output_file_name = "cohere_test_vector_1000.new.txt"

    to_append = False
    gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name)

    logger.info("Generate neighbor files")
    input_file_pattern = "neighbors.parquet"
    output_file_name = "neighbors.txt"

    to_append = False
    gen_neighbor_files(input_dir, input_file_pattern, output_dir, output_file_name)

    logger.info("Generate train vector files")
    output_file_name = "cohere_768_10m_vector.train.txt"
    to_append = True
    for i in range(10):
        input_file_pattern = "shuffle_train-0" + str(i) + "-of-10.parquet"
        gen_vector_files(input_dir, input_file_pattern, output_dir, output_file_name)


================================================
FILE: tools/core/filter_result_cache.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <vector>
#include <roaring/roaring.h>

namespace zvec {
namespace core {

struct FilterResultCache {
 public:
  FilterResultCache() {
    bitmap_ = roaring_bitmap_create();
  }

  FilterResultCache(uint32_t capacity_hint) {
    bitmap_ = roaring_bitmap_create_with_capacity(capacity_hint);
  }

  ~FilterResultCache() {
    roaring_bitmap_free(bitmap_);
    bitmap_ = nullptr;
  }

  bool find(uint64_t key) const {
    return !roaring_bitmap_contains(bitmap_, key);
  }

  void set(uint64_t key) const {
    roaring_bitmap_add(bitmap_, key);
  }

  int filter(const std::vector<std::vector<uint64_t>> &id_to_tags_list,
             const std::vector<uint64_t> &query_tag_list,
             const std::vector<uint64_t> &id_to_key_list) {
    for (size_t i = 0; i < id_to_tags_list.size(); ++i) {
      auto &id_tag_list = id_to_tags_list[i];

      size_t t_i = 0;
      size_t q_i = 0;
      while (t_i < id_tag_list.size() && q_i < query_tag_list.size()) {
        if (id_tag_list[t_i] == query_tag_list[q_i]) {
          uint64_t key = id_to_key_list[i];

          set(key);

          break;
        } else if (id_tag_list[t_i] < query_tag_list[q_i]) {
          ++t_i;
        } else {
          ++q_i;
        }
      }
    }

    return 0;
  }

 public:
  roaring_bitmap_t *bitmap_{nullptr};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: tools/core/flow.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "zvec/core/framework/index_flow.h"
#include "meta_segment_common.h"

using namespace std;

namespace zvec {
namespace core {

#define SEARCH_DENSE_BATCH(_FUNC_NAME)                                         \
  int _FUNC_NAME(const void *query, const IndexQueryMeta &qmeta,               \
                 uint32_t count, Context::Pointer &context) const {            \
    if (streamer_) {                                                           \
      if (reformer_) {                                                         \
        std::string ovec;                                                      \
        IndexQueryMeta ometa;                                                  \
        int ret = reformer_->convert(query, qmeta, count, &ovec, &ometa);      \
        if (ret != 0) {                                                        \
          return ret;                                                          \
        }                                                                      \
        return streamer_->_FUNC_NAME(ovec.data(), ometa, count,                \
                                     context->context());                      \
      } else {                                                                 \
        return streamer_->_FUNC_NAME(query, qmeta, count, context->context()); \
      }                                                                        \
    } else {                                                                   \
      return flow_._FUNC_NAME(query, qmeta, count, context->flow_context());   \
    }                                                                          \
  }

#define SEARCH_DENSE(_FUNC_NAME)                                              \
  int _FUNC_NAME(const void *query, const IndexQueryMeta &qmeta,              \
                 Context::Pointer &context) const {                           \
    if (streamer_) {                                                          \
      if (reformer_) {                                                        \
        std::string ovec;                                                     \
        IndexQueryMeta ometa;                                                 \
        int ret = reformer_->convert(query, qmeta, &ovec, &ometa);            \
        if (ret != 0) {                                                       \
          return ret;                                                         \
        }                                                                     \
        return streamer_->_FUNC_NAME(ovec.data(), ometa, context->context()); \
      } else {                                                                \
        return streamer_->_FUNC_NAME(query, qmeta, context->context());       \
      }                                                                       \
    } else {                                                                  \
      return flow_._FUNC_NAME(query, qmeta, context->flow_context());         \
    }                                                                         \
  }

#define SEARCH_SPRASE_BATCH(_FUNC_NAME)                                        \
  int _FUNC_NAME(const uint32_t *sparse_count, const uint32_t *sparse_indices, \
                 const void *sparse_query, const IndexQueryMeta &qmeta,        \
                 uint32_t count, Context::Pointer &context) const {            \
    if (streamer_) {                                                           \
      if (reformer_) {                                                         \
        LOG_ERROR("reformer not supported in sparse search");                  \
        return IndexError_Runtime;                                             \
      } else {                                                                 \
        return streamer_->_FUNC_NAME(sparse_count, sparse_indices,             \
                                     sparse_query, qmeta, count,               \
                                     context->context());                      \
      }                                                                        \
    } else {                                                                   \
      return flow_._FUNC_NAME(sparse_count, sparse_indices, sparse_query,      \
                              qmeta, count, context->flow_context());          \
    }                                                                          \
  }

#define SEARCH_SPARSE(_FUNC_NAME)                                              \
  int _FUNC_NAME(const uint32_t sparse_count, const uint32_t *sparse_indices,  \
                 const void *sparse_query, const IndexQueryMeta &qmeta,        \
                 Context::Pointer &context) const {                            \
    if (streamer_) {                                                           \
      if (reformer_) {                                                         \
        LOG_ERROR("reformer not supported in sparse search");                  \
        return IndexError_Runtime;                                             \
      } else {                                                                 \
        return streamer_->_FUNC_NAME(sparse_count, sparse_indices,             \
                                     sparse_query, qmeta, context->context()); \
      }                                                                        \
    } else {                                                                   \
      return flow_._FUNC_NAME(sparse_count, sparse_indices, sparse_query,      \
                              qmeta, context->flow_context());                 \
    }                                                                          \
  }

class Flow {
 public:
  class Context {
   public:
    typedef std::unique_ptr<Context> Pointer;

    Context(IndexContext::Pointer &ctx, IndexFlow::Context::Pointer &flow_ctx)
        : ctx_(std::move(ctx)), flow_ctx_(std::move(flow_ctx)) {}

    void set_debug_mode(bool debug_mode) {
      ctx_ ? ctx_->set_debug_mode(debug_mode)
           : flow_ctx_->set_debug_mode(debug_mode);
    }

    std::string debug_string() {
      return ctx_ ? ctx_->debug_string() : flow_ctx_->debug_string();
    }

    void set_topk(uint32_t topk) {
      ctx_ ? ctx_->set_topk(topk) : flow_ctx_->set_topk(topk);
    }

    template <typename T>
    void set_filter(T &&func) {
      ctx_ ? ctx_->set_filter(func) : flow_ctx_->set_filter(func);
    }

    const IndexDocumentList &result(void) const {
      return ctx_ ? ctx_->result() : flow_ctx_->result();
    }

    const IndexDocumentList &result(size_t index) const {
      return ctx_ ? ctx_->result(index) : flow_ctx_->result(index);
    }

   public:
    friend class Flow;

    IndexFlow::Context::Pointer &flow_context(void) {
      return flow_ctx_;
    }

    IndexContext::Pointer &context(void) {
      return ctx_;
    }

   private:
    IndexContext::Pointer ctx_;
    IndexFlow::Context::Pointer flow_ctx_;
  };

  Context::Pointer create_context(void) const {
    IndexContext::Pointer ctx;
    IndexFlow::Context::Pointer flow_ctx;
    if (streamer_) {
      ctx = streamer_->create_context();
    } else {
      flow_ctx = flow_.create_context();
    }
    return Context::Pointer(new (std::nothrow) Context(ctx, flow_ctx));
  }

  int set_container(const std::string &name, const ailego::Params &params) {
    return flow_.set_storage(name, params);
  }

  int load_taglists(const std::string &path) {
    // load tag lists
    auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");

    int ret = storage->open(path, false);
    if (ret != 0) {
      LOG_ERROR("Failed to load index with storage %s",
                storage->name().c_str());
      return ret;
    }

    auto segment_taglist_header = storage->get(TAGLIST_HEADER_SEGMENT_NAME);
    if (!segment_taglist_header) {
      LOG_INFO("No Tag Lists Found!");

      return 0;
    }

    TagListHeader taglist_header;
    void *data_ptr;
    if (segment_taglist_header->read(0, (const void **)(&data_ptr),
                                     sizeof(TagListHeader)) !=
        sizeof(TagListHeader)) {
      LOG_ERROR("Read tag list meta failed");
      return IndexError_ReadData;
    }

    memcpy(&taglist_header, data_ptr, sizeof(TagListHeader));

    auto segment_taglist_key = storage->get(TAGLIST_KEY_SEGMENT_NAME);
    if (!segment_taglist_key) {
      LOG_ERROR("IndexStorage get segment %s failed",
                TAGLIST_KEY_SEGMENT_NAME.c_str());
      return IndexError_InvalidValue;
    }

    size_t offset = 0;
    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
      if (segment_taglist_key->read(offset, (const void **)(&data_ptr),
                                    sizeof(uint64_t)) != sizeof(uint64_t)) {
        LOG_ERROR("Read tag list key failed");
        return IndexError_ReadData;
      }

      uint64_t key = *reinterpret_cast<const uint64_t *>(data_ptr);
      tag_key_list_.push_back(key);

      offset += sizeof(uint64_t);
    }

    auto segment_taglist_data = storage->get(TAGLIST_DATA_SEGMENT_NAME);
    if (!segment_taglist_data) {
      LOG_ERROR("IndexStorage get segment %s failed",
                TAGLIST_DATA_SEGMENT_NAME.c_str());
      return IndexError_InvalidValue;
    }

    std::vector<uint64_t> taglist_offsets;
    offset = 0;
    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                     sizeof(uint64_t)) != sizeof(uint64_t)) {
        LOG_ERROR("Read tag list data failed");
        return IndexError_ReadData;
      }

      uint64_t tag_offset = *reinterpret_cast<const uint64_t *>(data_ptr);
      taglist_offsets.push_back(tag_offset);

      offset += sizeof(uint64_t);
    }

    offset = taglist_header.num_vecs * sizeof(uint64_t);
    for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                     sizeof(uint64_t)) != sizeof(uint64_t)) {
        LOG_ERROR("Read tag list data failed");
        return IndexError_ReadData;
      }
      offset += sizeof(uint64_t);

      uint64_t tag_count = *reinterpret_cast<const uint64_t *>(data_ptr);

      if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                     tag_count * sizeof(uint64_t)) !=
          tag_count * sizeof(uint64_t)) {
        LOG_ERROR("Read tag list data failed");
        return IndexError_ReadData;
      }
      offset += tag_count * sizeof(uint64_t);

      std::vector<uint64_t> tag_list;
      for (size_t j = 0; j < tag_count; ++j) {
        uint64_t tag_id = *(reinterpret_cast<const uint64_t *>(data_ptr) + j);
        tag_list.push_back(tag_id);
      }

      // order tags
      sort(tag_list.begin(), tag_list.end());

      id_to_tags_list_.push_back(std::move(tag_list));
    }

    storage->cleanup();
    storage = nullptr;

    return 0;
  }

  int load(const std::string &path) {
    int ret = load_taglists(path);
    if (ret != 0) {
      LOG_ERROR("Failed to load tag lists");
      return ret;
    }

    if (streamer_) {
      stg_ = IndexFactory::CreateStorage("MMapFileStorage");
      if (!stg_) {
        return IndexError_NoExist;
      }
      ailego::Params params;
      params.set("proxima.mmap_file.storage.memory_warmup", true);
      ret = stg_->init(params);
      if (ret != 0) {
        return ret;
      }
      ret = stg_->open(path, true);
      if (ret != 0) {
        return ret;
      }

      if (!inited_) {
        IndexMeta meta;
        ret = IndexHelper::DeserializeFromStorage(stg_.get(), &meta);
        if (ret != 0) {
          LOG_ERROR("Failed to get IndexMeta from Storage");
          return ret;
        }
        ret = streamer_->init(meta, searcher_params_);
        if (ret != 0) {
          return ret;
        }

        if (!meta.reformer_name().empty()) {
          reformer_ = IndexFactory::CreateReformer(meta.reformer_name());
          if (!reformer_) {
            LOG_ERROR("Failed to create reformer %s",
                      meta.reformer_name().c_str());
            return IndexError_NoExist;
          }
          reformer_->init(meta.reformer_params());
        }
      }

      return streamer_->open(stg_);
    } else {
      return flow_.load(path);
    }
  }

  int unload(void) {
    if (streamer_) {
      streamer_->close();
      return stg_->close();
    } else {
      return flow_.unload();
    }
  }

  int set_searcher(const std::string &name, const ailego::Params &params) {
    //! If the searcher is streamer, create it
    streamer_ = IndexFactory::CreateStreamer(name);
    if (!streamer_) {
      return flow_.set_searcher(name, params);
    }
    searcher_params_ = params;
    return 0;
  }

  int set_searcher(IndexStreamer::Pointer streamer) {
    streamer_ = streamer;

    inited_ = true;

    return 0;
  }

  const std::vector<std::vector<uint64_t>> &id_to_tags_list() const {
    return id_to_tags_list_;
  }

  const std::vector<uint64_t> &tag_key_list() const {
    return tag_key_list_;
  }

  SEARCH_DENSE_BATCH(search_impl);
  SEARCH_DENSE(search_impl);
  SEARCH_DENSE_BATCH(search_bf_impl);
  SEARCH_DENSE(search_bf_impl);

 private:
  IndexFlow flow_{};

  IndexStreamer::Pointer streamer_{};
  IndexReformer::Pointer reformer_{};

  bool inited_{false};

  IndexStorage::Pointer stg_{};
  ailego::Params searcher_params_{};
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;
};

class SparseFlow {
 public:
  class Context {
   public:
    typedef std::unique_ptr<Context> Pointer;

    Context(IndexContext::Pointer &ctx,
            IndexSparseFlow::Context::Pointer &flow_ctx)
        : ctx_(std::move(ctx)), flow_ctx_(std::move(flow_ctx)) {}

    void set_debug_mode(bool debug_mode) {
      ctx_ ? ctx_->set_debug_mode(debug_mode)
           : flow_ctx_->set_debug_mode(debug_mode);
    }

    std::string debug_string() {
      return ctx_ ? ctx_->debug_string() : flow_ctx_->debug_string();
    }

    template <typename T>
    void set_filter(T &&func) {
      ctx_ ? ctx_->set_filter(func) : flow_ctx_->set_filter(func);
    }

    void set_topk(uint32_t topk) {
      ctx_ ? ctx_->set_topk(topk) : flow_ctx_->set_topk(topk);
    }

    const IndexDocumentList &result(void) const {
      return ctx_ ? ctx_->result() : flow_ctx_->result();
    }

    const IndexDocumentList &result(size_t index) const {
      return ctx_ ? ctx_->result(index) : flow_ctx_->result(index);
    }

   private:
    friend class SparseFlow;

    IndexSparseFlow::Context::Pointer &flow_context(void) {
      return flow_ctx_;
    }

    IndexContext::Pointer &context(void) {
      return ctx_;
    }


   private:
    IndexContext::Pointer ctx_;
    IndexSparseFlow::Context::Pointer flow_ctx_;
  };

  Context::Pointer create_context(void) const {
    IndexContext::Pointer ctx;
    IndexSparseFlow::Context::Pointer flow_ctx;
    if (streamer_) {
      ctx = streamer_->create_context();
    } else {
      flow_ctx = flow_.create_context();
    }
    return Context::Pointer(new (std::nothrow) Context(ctx, flow_ctx));
  }

  int set_container(const std::string &name, const ailego::Params &params) {
    return flow_.set_storage(name, params);
  }

  int load(const std::string &path) {
    if (streamer_) {
      stg_ = IndexFactory::CreateStorage("MMapFileStorage");
      if (!stg_) {
        return IndexError_NoExist;
      }
      ailego::Params params;
      params.set("proxima.mmap_file.storage.memory_warmup", true);
      int ret = stg_->init(params);
      if (ret != 0) {
        return ret;
      }
      ret = stg_->open(path, true);
      if (ret != 0) {
        return ret;
      }

      if (!inited_) {
        IndexMeta meta;
        ret = IndexHelper::DeserializeFromStorage(stg_.get(), &meta);
        if (ret != 0) {
          LOG_ERROR("Failed to get IndexMeta from Storage");
          return ret;
        }

        ret = streamer_->init(meta, searcher_params_);
        if (ret != 0) {
          return ret;
        }

        if (!meta.reformer_name().empty()) {
          reformer_ = IndexFactory::CreateReformer(meta.reformer_name());
          if (!reformer_) {
            LOG_ERROR("Failed to create reformer %s",
                      meta.reformer_name().c_str());
            return IndexError_NoExist;
          }
          reformer_->init(meta.reformer_params());
        }
      }

      return streamer_->open(stg_);
    } else {
      return flow_.load(path);
    }

    return 0;
  }

  int unload(void) {
    if (streamer_) {
      streamer_->close();
      return stg_->close();
    } else {
      return flow_.unload();
    }
  }

  int set_searcher(const std::string &name, const ailego::Params &params) {
    //! If the searcher is streamer, create it
    streamer_ = IndexFactory::CreateStreamer(name);
    if (!streamer_) {
      return flow_.set_searcher(name, params);
    }
    searcher_params_ = params;
    return 0;
  }

  int set_searcher(IndexStreamer::Pointer streamer) {
    streamer_ = streamer;

    inited_ = true;

    return 0;
  }

  const std::vector<std::vector<uint64_t>> &id_to_tags_list() const {
    return id_to_tags_list_;
  }

  const std::vector<uint64_t> &tag_key_list() const {
    return tag_key_list_;
  }

  SEARCH_SPRASE_BATCH(search_impl);
  SEARCH_SPARSE(search_impl);
  SEARCH_SPRASE_BATCH(search_bf_impl);
  SEARCH_SPARSE(search_bf_impl);

 private:
  IndexSparseFlow flow_{};

  IndexStreamer::Pointer streamer_{};
  IndexReformer::Pointer reformer_{};

  bool inited_{false};

  IndexStorage::Pointer stg_{};
  ailego::Params searcher_params_{};
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;
};

}  // namespace core
}  // namespace zvec


================================================
FILE: tools/core/helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sys/stat.h>
#include <signal.h>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ailego/container/bitmap.h>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/logger/logger.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_plugin.h"
#include "zvec/core/framework/index_storage.h"
#include "zvec/core/interface/index.h"
#include "zvec/core/interface/index_factory.h"
#include "zvec/core/interface/index_param.h"
#include "filter_result_cache.h"
#include "meta_segment_common.h"
#include "txt_input_reader.h"

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#include <yaml-cpp/yaml.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

using namespace std;
using namespace zvec;
using namespace zvec::core;
using namespace zvec::ailego;


int parse_and_load_index_param(
    YAML::Node &config_node, string &index_dir,
    core_interface::Index::Pointer &index,
    core_interface::BaseIndexQueryParam::Pointer &query_param) {
  // Create Index from config

  if (auto index_config = config_node["IndexCommon"]["IndexConfig"]) {
    std::cout << "IndexConfig: " << index_config.as<string>() << std::endl;
    auto params = core_interface::IndexFactory::DeserializeIndexParamFromJson(
        index_config.as<string>());
    index = core_interface::IndexFactory::CreateAndInitIndex(*params);
    if (!index) {
      LOG_ERROR("Failed to create index");
      return -1;
    }
    core_interface::StorageOptions storage_options;
    storage_options.type = core_interface::StorageOptions::StorageType::kMMAP;
    storage_options.create_new = false;
    storage_options.read_only = true;

    int ret = index->Open(index_dir, storage_options);
    if (0 != ret) {
      LOG_ERROR("Index open failed with ret %d", ret);
      return -1;
    }

    cout << "Load index done!" << endl;
  } else {
    LOG_ERROR("IndexCommon.IndexConfig is required");
    return -1;
  }

  /*
      QueryConfig:
      QueryParam: '{"ef_search":100,"index_type":"kHNSW"}'
      RefinerConfig:
        ScaleFactor: !!int 2
        ReferenceIndex:
          Config:
     '{"use_id_map":false,"data_type":"DT_FP32","dimension":768,"index_type":"kHNSW","metric_type":"kCosine"}'
          Path: ./cohere_train_vector_1m.2.index
  */

  // QUERY PARAM
  if (auto query_config = config_node["QueryConfig"]; query_config) {
    // QueryConfig.QueryParam
    if (auto query_param_config = query_config["QueryParam"];
        query_param_config) {
      std::cout << "QueryParam: " << query_param_config.as<string>()
                << std::endl;
      query_param = core_interface::IndexFactory::QueryParamDeserializeFromJson<
          core_interface::BaseIndexQueryParam>(
          query_param_config.as<std::string>());
      if (!query_param) {
        LOG_ERROR("Failed to deserialize query params");
        return -1;
      }
    }

    // QueryConfig.RefinerConfig
    if (auto refiner_config = query_config["RefinerConfig"]; refiner_config) {
      core_interface::Index::Pointer reference_index = nullptr;
      auto refiner_param = std::make_shared<core_interface::RefinerParam>();

      // QueryConfig.RefinerConfig.ScaleFactor
      if (auto scale_factor_config = refiner_config["ScaleFactor"];
          scale_factor_config) {
        auto scale_factor = scale_factor_config.as<float>();
        refiner_param->scale_factor_ = scale_factor;
      } else {
        LOG_ERROR("QueryConfig.RefinerConfig.ScaleFactor config is required");
        return -1;
      }

      // QueryConfig.RefinerConfig.ReferenceIndex
      if (auto reference_index_config = refiner_config["ReferenceIndex"];
          reference_index_config) {
        // QueryConfig.RefinerConfig.ReferenceIndex.Config
        if (auto reference_index_config_config =
                reference_index_config["Config"];
            reference_index_config_config) {
          auto params =
              core_interface::IndexFactory::DeserializeIndexParamFromJson(
                  reference_index_config_config.as<std::string>());

          reference_index =
              core_interface::IndexFactory::CreateAndInitIndex(*params);
        } else {
          LOG_ERROR(
              "QueryConfig.RefinerConfig.ReferenceIndex.Config config is "
              "required");
          return -1;
        }

        // QueryConfig.RefinerConfig.ReferenceIndex.Path
        if (auto reference_index_path_config = reference_index_config["Path"];
            reference_index_path_config) {
          auto reference_index_path =
              reference_index_path_config.as<std::string>();
          core_interface::StorageOptions storage_options;
          storage_options.type =
              core_interface::StorageOptions::StorageType::kMMAP;
          storage_options.create_new = false;
          storage_options.read_only = true;

          int ret =
              reference_index->Open(reference_index_path, storage_options);
          if (0 != ret) {
            LOG_ERROR("Index open failed with ret %d", ret);
            return -1;
          }

          cout << "Load reference index done!" << endl;
        } else {
          LOG_ERROR(
              "QueryConfig.RefinerConfig.ReferenceIndex.Path is required");
          return -1;
        }
        refiner_param->reference_index = reference_index;
      } else {
        LOG_ERROR(
            "QueryConfig.RefinerConfig.ReferenceIndex section is required");
        return -1;
      }  // QueryConfig.RefinerConfig.ReferenceIndex

      query_param->refiner_param = refiner_param;
    }  // QueryConfig.RefinerConfig
  }  // QUERY PARAM
  return 0;
}

//--------------------------------------------------
// Helper functions for loading tag lists
//--------------------------------------------------
int load_taglists(const std::string &path,
                  std::vector<std::vector<uint64_t>> &id_to_tags_list,
                  std::vector<uint64_t> &tag_key_list) {
  // Load tag lists
  auto storage = IndexFactory::CreateStorage("MMapFileReadStorage");

  int ret = storage->open(path, false);
  if (ret != 0) {
    LOG_ERROR("Failed to load index with storage %s", storage->name().c_str());
    return ret;
  }

  auto segment_taglist_header = storage->get(TAGLIST_HEADER_SEGMENT_NAME);
  if (!segment_taglist_header) {
    LOG_INFO("No Tag Lists Found!");
    return 0;
  }

  TagListHeader taglist_header;
  void *data_ptr;
  if (segment_taglist_header->read(0, (const void **)(&data_ptr),
                                   sizeof(TagListHeader)) !=
      sizeof(TagListHeader)) {
    LOG_ERROR("Read tag list meta failed");
    return IndexError_ReadData;
  }

  memcpy(&taglist_header, data_ptr, sizeof(TagListHeader));

  auto segment_taglist_key = storage->get(TAGLIST_KEY_SEGMENT_NAME);
  if (!segment_taglist_key) {
    LOG_ERROR("IndexStorage get segment %s failed",
              TAGLIST_KEY_SEGMENT_NAME.c_str());
    return IndexError_InvalidValue;
  }

  size_t offset = 0;
  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
    if (segment_taglist_key->read(offset, (const void **)(&data_ptr),
                                  sizeof(uint64_t)) != sizeof(uint64_t)) {
      LOG_ERROR("Read tag list key failed");
      return IndexError_ReadData;
    }

    uint64_t key = *reinterpret_cast<const uint64_t *>(data_ptr);
    tag_key_list.push_back(key);

    offset += sizeof(uint64_t);
  }

  auto segment_taglist_data = storage->get(TAGLIST_DATA_SEGMENT_NAME);
  if (!segment_taglist_data) {
    LOG_ERROR("IndexStorage get segment %s failed",
              TAGLIST_DATA_SEGMENT_NAME.c_str());
    return IndexError_InvalidValue;
  }

  std::vector<uint64_t> taglist_offsets;
  offset = 0;
  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                   sizeof(uint64_t)) != sizeof(uint64_t)) {
      LOG_ERROR("Read tag list data failed");
      return IndexError_ReadData;
    }

    uint64_t tag_offset = *reinterpret_cast<const uint64_t *>(data_ptr);
    taglist_offsets.push_back(tag_offset);

    offset += sizeof(uint64_t);
  }

  offset = taglist_header.num_vecs * sizeof(uint64_t);
  for (size_t i = 0; i < taglist_header.num_vecs; ++i) {
    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                   sizeof(uint64_t)) != sizeof(uint64_t)) {
      LOG_ERROR("Read tag list data failed");
      return IndexError_ReadData;
    }
    offset += sizeof(uint64_t);

    uint64_t tag_count = *reinterpret_cast<const uint64_t *>(data_ptr);

    if (segment_taglist_data->read(offset, (const void **)(&data_ptr),
                                   tag_count * sizeof(uint64_t)) !=
        tag_count * sizeof(uint64_t)) {
      LOG_ERROR("Read tag list data failed");
      return IndexError_ReadData;
    }
    offset += tag_count * sizeof(uint64_t);

    std::vector<uint64_t> tag_list;
    tag_list.reserve(tag_count);
    for (size_t j = 0; j < tag_count; ++j) {
      tag_list.push_back(reinterpret_cast<const uint64_t *>(data_ptr)[j]);
    }
    id_to_tags_list.push_back(std::move(tag_list));
  }

  return 0;
}


================================================
FILE: tools/core/index_meta_helper.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <string>
#include "zvec/core/framework/index_meta.h"

namespace zvec {
namespace core {

class IndexMetaHelper {
 public:
  static std::string to_string(IndexMeta::DataType type) {
    switch (type) {
      case IndexMeta::DataType::DT_FP32:
        return std::string("FP32");
      case IndexMeta::DataType::DT_FP64:
        return std::string("FP64");
      case IndexMeta::DataType::DT_INT16:
        return std::string("INT16");
      case IndexMeta::DataType::DT_INT8:
        return std::string("INT8");
      case IndexMeta::DataType::DT_BINARY32:
        return std::string("Binary");
      case IndexMeta::DataType::DT_BINARY64:
        return std::string("Binary64");
      case IndexMeta::DataType::DT_FP16:
        return std::string("FP16");
      default:
        return std::string("NotSupportedType");
    }
  }

  static std::string to_string(IndexMeta meta) {
    char buffer[1024];
    snprintf(buffer, 1024,
             "IndexMeta: type[%s] method[%s] dimension[%u] element_size[%u]",
             to_string(meta.data_type()).c_str(), meta.metric_name().c_str(),
             meta.dimension(), meta.element_size());
    return std::string(buffer);
  }

  static bool parse_from(const std::string &type, const std::string &method,
                         const std::string &vector_type, IndexMeta &meta) {
    return parse_from(type, method, 0, vector_type, meta);
  }

  static bool parse_from(const std::string &type, const std::string &method,
                         const size_t dimension, const std::string &vector_type,
                         IndexMeta &meta) {
    if (vector_type != "dense" && vector_type != "sparse") {
      std::cerr << "vector type should be dense or sparse!!!" << std::endl;
      return false;
    }

    auto feature_type = IndexMeta::DataType::DT_UNDEFINED;
    if (type == std::string("float")) {
      feature_type = IndexMeta::DataType::DT_FP32;
    } else if (type == std::string("double")) {
      feature_type = IndexMeta::DataType::DT_FP64;
    } else if (type == std::string("int16")) {
      feature_type = IndexMeta::DataType::DT_INT16;
    } else if (type == std::string("int8")) {
      feature_type = IndexMeta::DataType::DT_INT8;
    } else if (type == std::string("binary")) {
      feature_type = IndexMeta::DataType::DT_BINARY32;
    } else if (type == std::string("binary64")) {
      feature_type = IndexMeta::DataType::DT_BINARY64;
    } else {
      std::cerr << "Not supported type: " << type << std::endl;
      return false;
    }

    meta.set_meta(feature_type, dimension);
    ailego::Params params;
    if (method == std::string("L2")) {
      if (feature_type == IndexMeta::DataType::DT_FP32) {
        meta.set_metric("SquaredEuclidean", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_INT8) {
        meta.set_metric("SquaredEuclidean", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_FP16) {
        meta.set_metric("SquaredEuclidean", 0, std::move(params));
      } else {
        std::cerr << "Not supported type(" << type << ") for L2" << std::endl;
        return false;
      }
    } else if (method == std::string("IP")) {
      if (feature_type == IndexMeta::DataType::DT_FP32) {
        meta.set_metric("InnerProduct", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_INT8) {
        meta.set_metric("InnerProduct", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_FP16) {
        meta.set_metric("InnerProduct", 0, std::move(params));
      } else {
        std::cerr << "Not supported type(" << type << ") for IP" << std::endl;
        return false;
      }
    } else if (method == std::string("Cosine")) {
      if (feature_type == IndexMeta::DataType::DT_FP32) {
        meta.set_metric("Cosine", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_INT8) {
        meta.set_metric("Cosine", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_FP16) {
        meta.set_metric("Cosine", 0, std::move(params));
      } else {
        std::cerr << "Not supported type(" << type << ") for Cosine"
                  << std::endl;
        return false;
      }
    } else if (method == std::string("HAMMING")) {
      if (feature_type == IndexMeta::DataType::DT_BINARY32) {
        meta.set_metric("Hamming", 0, std::move(params));
      } else if (feature_type == IndexMeta::DataType::DT_BINARY64) {
        meta.set_metric("Hamming", 0, std::move(params));
      } else {
        std::cerr << "Not supported type(" << type << ") for hamming"
                  << std::endl;
        return false;
      }
    } else {
      std::cerr << "Not supported method: " << method << std::endl;
      return false;
    }

    return true;
  }
};

}  // namespace core
}  // namespace zvec

================================================
FILE: tools/core/local_builder.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <libgen.h>
#include <signal.h>
#include <iostream>
#include <memory>
#include <ailego/pattern/defer.h>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/utility/time_helper.h>
#include "algorithm/flat/flat_utility.h"
#include "algorithm/hnsw_rabitq/hnsw_rabitq_params.h"
#if RABITQ_SUPPORTED
#include "algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h"
#include "algorithm/hnsw_rabitq/rabitq_converter.h"
#endif
#include "algorithm/hnsw/hnsw_params.h"
#include "zvec/ailego/logger/logger.h"
#include "zvec/core/framework/index_dumper.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_plugin.h"
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"
#include "zvec/core/framework/index_streamer.h"
#include "index_meta_helper.h"
#include "meta_segment_common.h"
#include "vecs_index_holder.h"

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#include <yaml-cpp/yaml.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

using namespace std;
using namespace zvec::core;
using namespace zvec;

bool g_disable_id_map = false;

enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

VecsIndexHolder::Pointer holder;
VecsIndexSparseHolder::Pointer sparse_holder;

bool stop_now = false;
void stop(int signo) {
  if (stop_now) {
    exit(signo);
  }
  stop_now = true;
  cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
       << flush;
  if (holder) {
    holder->stop();
  }
}

void usage(void) {
  cout << "Usage: local_builder CONFIG.yaml [plugin file path]" << endl;
}

bool prepare_params(YAML::Node &&config_params, ailego::Params &params) {
  cout << "Parse params as blow:" << endl;
  for (auto it = config_params.begin(); it != config_params.end(); ++it) {
    string tag = it->second.Tag();
    if (tag == "tag:yaml.org,2002:int") {
      int64_t val = it->second.as<int64_t>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:float") {
      float val = it->second.as<float>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:bool") {
      bool val = it->second.as<bool>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else {
      if (it->second.IsScalar()) {
        string val = it->second.as<string>();
        params.set(it->first.as<string>(), val);
        cout << it->first.as<string>() << "=" << val << endl;
      } else if (it->second.IsMap()) {
        ailego::Params sub_params;
        auto sub_node = it->second;
        if (!prepare_params(std::move(sub_node), sub_params)) {
          LOG_ERROR("parse params error with key[%s]",
                    it->first.as<string>().c_str());
          return false;
        }
        params.set(it->first.as<string>(), sub_params);
      }
    }
  }
  return true;
}

int setup_hnsw_rabitq_streamer(const IndexStreamer::Pointer &streamer,
                               const IndexMeta &meta, YAML::Node &config_root,
                               const std::string &converter_name,
                               IndexHolder::Pointer *build_holder) {
#if RABITQ_SUPPORTED
  RabitqConverter rabitq_converter;
  ailego::Params rabitq_converter_params;
  if (config_root["RabitqConverterParams"]) {
    auto rabitq_params_node = config_root["RabitqConverterParams"];
    if (!prepare_params(std::move(rabitq_params_node),
                        rabitq_converter_params)) {
      cerr << "Failed to prepare rabitq converter params" << endl;
      return -1;
    }
  }
  if (rabitq_converter.init(meta, rabitq_converter_params) != 0) {
    cerr << "rabitq converter init failed" << std::endl;
    return -1;
  }
  if (rabitq_converter.train(*build_holder) != 0) {
    cerr << "rabitq converter train failed" << std::endl;
    return -1;
  }
  IndexReformer::Pointer rabitq_reformer;
  rabitq_converter.to_reformer(&rabitq_reformer);
  HnswRabitqStreamer *hnsw_rabitq_streamer =
      dynamic_cast<HnswRabitqStreamer *>(streamer.get());
  hnsw_rabitq_streamer->set_reformer(std::move(rabitq_reformer));
  IndexProvider::Pointer provider;
  if (converter_name.empty()) {
    // build_holder is VecsIndexHolder
    provider = std::dynamic_pointer_cast<IndexProvider>(*build_holder);
  } else {
    // build_holder is ordinary IndexHolder, need to convert
    provider = convert_holder_to_provider(*build_holder);
    // reuse provider to release memory
    *build_holder = provider;
  }

  if (!provider) {
    cerr << "Failed to cast build holder to provider" << endl;
    return -1;
  }
  hnsw_rabitq_streamer->set_provider(provider);
  return 0;
#else
  cerr << "HNSW RaBitQ is not supported on this platform" << endl;
  return -1;
#endif
}

bool check_config(YAML::Node &config_root) {
  auto common = config_root["BuilderCommon"];
  if (!common) {
    LOG_ERROR("Can not find [BuilderClass] in config");
    return false;
  }
  if (!common["BuilderClass"]) {
    LOG_ERROR("Can not find [BuilderClass] in config");
    return false;
  }
  if (!common["BuildFile"]) {
    LOG_ERROR("Can not find [BuildFile] in config");
    return false;
  }
  if (common["NeedTrain"] && common["NeedTrain"].as<bool>()) {
    if (!common["TrainFile"]) {
      LOG_ERROR("Can not find [TrainFile] in config");
      return false;
    }
  }
  if (common["UseTrainer"]) {
    if (!common["TrainerIndexPath"]) {
      LOG_ERROR("Can not find [TrainerIndexPath] in config");
      return false;
    }
    if (!config_root["TrainerParams"]) {
      LOG_ERROR("Can not find [TrainerParams] in config");
      return false;
    }
  }
  if (!common["DumpPath"]) {
    LOG_ERROR("Can not find [DumpPath] in config");
    return false;
  }
  if (!config_root["BuilderParams"]) {
    LOG_ERROR("Can not find [BuilderParams] in config");
    return false;
  }
  return true;
}

static inline size_t AlignSize(size_t size) {
  return (size + 0x1F) & (~0x1F);
}

bool dump_meta_segment(const IndexDumper::Pointer &dumper,
                       const std::string &segment_id, const void *data,
                       size_t size, size_t &writes) {
  size_t len = dumper->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect: %lu, actual: %lu",
              segment_id.c_str(), size, len);
    return false;
  }

  size_t padding_size = AlignSize(size) - size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %lu", padding_size);
      return false;
    }
  }

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  int ret = dumper->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return false;
  }

  writes = len + padding_size;

  return true;
}

int dump_taglist(IndexDumper::Pointer dumper, size_t num_vecs,
                 const void *key_base, const void *taglist_data,
                 uint64_t taglist_size) {
  TagListHeader taglist_header;

  taglist_header.num_vecs = num_vecs;

  size_t total_writes;

  bool ret =
      dump_meta_segment(dumper, TAGLIST_HEADER_SEGMENT_NAME, &taglist_header,
                        sizeof(TagListHeader), total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist meta failed");
    return IndexError_WriteData;
  }

  ret = dump_meta_segment(dumper, TAGLIST_KEY_SEGMENT_NAME, key_base,
                          num_vecs * sizeof(uint64_t), total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist key failed");
    return IndexError_WriteData;
  }

  ret = dump_meta_segment(dumper, TAGLIST_DATA_SEGMENT_NAME, taglist_data,
                          taglist_size, total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist data failed");
    return IndexError_WriteData;
  }

  return 0;
}

int do_build_sparse_by_streamer(IndexStreamer::Pointer &streamer,
                                uint32_t thread_count) {
  int ret;
  ailego::ThreadPool pool(thread_count, false);
  std::atomic<size_t> finished{0};
  int errcode = 0;
  std::mutex mutex;
  std::atomic_bool error{false};
  std::condition_variable cond{};

  auto meta = streamer->meta();
  IndexReformer::Pointer reformer;
  if (!meta.reformer_name().empty()) {
    reformer = IndexFactory::CreateReformer(meta.reformer_name());
    if (!reformer) {
      LOG_ERROR("Failed to create reformer %s", meta.reformer_name().c_str());
      return IndexError_NoExist;
    }
    reformer->init(meta.reformer_params());
  }

  IndexQueryMeta qmeta(sparse_holder->data_type());
  uint32_t keep_docs = sparse_holder->count() - sparse_holder->start_cursor();


  std::function<int(uint64_t, const uint32_t, const uint32_t *, const void *,
                    const IndexQueryMeta &, IndexContext::Pointer &)>
      add_to_streamer_sparse = [&](uint64_t pkey, const uint32_t sparse_count,
                                   const uint32_t *sparse_indices,
                                   const void *sparse_query,
                                   const IndexQueryMeta &qmeta,
                                   IndexContext::Pointer &context) -> int {
    return streamer->add_impl(pkey, sparse_count, sparse_indices, sparse_query,
                              qmeta, context);
  };
  if (g_disable_id_map) {
    add_to_streamer_sparse = [&](uint64_t pkey, const uint32_t sparse_count,
                                 const uint32_t *sparse_indices,
                                 const void *sparse_query,
                                 const IndexQueryMeta &qmeta,
                                 IndexContext::Pointer &context) -> int {
      return streamer->add_with_id_impl(static_cast<uint32_t>(pkey),
                                        sparse_count, sparse_indices,
                                        sparse_query, qmeta, context);
    };
  }

  auto do_build = [&](size_t idx) {
    AILEGO_DEFER([&]() {
      std::lock_guard<std::mutex> latch(mutex);
      cond.notify_one();
    });
    auto ctx = streamer->create_context();
    if (!ctx) {
      if (!error.exchange(true)) {
        LOG_ERROR("Failed to create streamer context");
        errcode = IndexError_NoMemory;
      }
      return;
    }
    std::string ovec;
    IndexQueryMeta ometa;
    for (uint32_t id = idx; id < sparse_holder->count() && !stop_now;
         id += thread_count) {
      uint64_t key = sparse_holder->get_key(id);
      if (reformer) {
        std::string new_vec;
        IndexQueryMeta new_meta;
        ret = reformer->convert(sparse_holder->get_sparse_count(id),
                                sparse_holder->get_sparse_indices(id),
                                sparse_holder->get_sparse_data(id), qmeta,
                                &new_vec, &new_meta);
        if (ret != 0) {
          LOG_ERROR("Failed to convert sparse vector for %s",
                    IndexError::What(ret));
          errcode = ret;
          return;
        }
        ret = add_to_streamer_sparse(key, sparse_holder->get_sparse_count(id),
                                     sparse_holder->get_sparse_indices(id),
                                     new_vec.data(), new_meta, ctx);
      } else {
        ret = add_to_streamer_sparse(key, sparse_holder->get_sparse_count(id),
                                     sparse_holder->get_sparse_indices(id),
                                     sparse_holder->get_sparse_data(id), qmeta,
                                     ctx);
      }

      if (ailego_unlikely(ret != 0)) {
        if (!error.exchange(true)) {
          LOG_ERROR("streamer all_impl failed\n");
          errcode = ret;
        }
        return;
      }
      if (id >= keep_docs) {
        ret =
            streamer->remove_impl(sparse_holder->get_key(id - keep_docs), ctx);
        if (ailego_unlikely(ret != 0)) {
          if (!error.exchange(true)) {
            LOG_ERROR("streamer remove_impl failed\n");
            errcode = ret;
          }
          return;
        }
      }
      finished++;
    }
    return;
  };

  for (size_t i = 0; i < pool.count(); ++i) {
    pool.execute(do_build, i);
  }

  while (!pool.is_finished()) {
    std::unique_lock<std::mutex> lk(mutex);
    cond.wait_until(
        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));
    if (error.load(std::memory_order_acquire)) {
      LOG_ERROR("Failed to build index while waiting finish");
      return errcode;
    }
    LOG_INFO("Built cnt %zu, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / sparse_holder->count());
  }
  if (error.load(std::memory_order_acquire)) {
    LOG_ERROR("Failed to build index while waiting finish");
    return errcode;
  }
  pool.wait_finish();

  return 0;
}

int build_sparse_by_streamer(IndexStreamer::Pointer &streamer,
                             YAML::Node &config_common) {
  if (!config_common["IndexPath"]) {
    LOG_ERROR("Miss params IndexPath for Streamer");
    return IndexError_InvalidArgument;
  }
  string path = config_common["IndexPath"].as<string>();

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  if (!storage) {
    LOG_ERROR("Failed to create storage");
    return IndexError_NoExist;
  }
  ailego::Params params;
  int ret = storage->init(params);
  if (ret != 0) {
    LOG_ERROR("Storage Failed init");
    return IndexError_Runtime;
  }
  ret = storage->open(path, true);
  if (ret != 0) {
    LOG_ERROR("Storage Failed to open");
    return IndexError_Runtime;
  }
  ret = streamer->open(storage);
  if (ret != 0) {
    LOG_ERROR("Failed to open storage");
    return IndexError_Runtime;
  }

  size_t thread_count = config_common["ThreadCount"]
                            ? config_common["ThreadCount"].as<uint64_t>()
                            : std::thread::hardware_concurrency();

  auto meta = streamer->meta();

  LOG_DEBUG("thread count: %zu, retrieval_mode: sparse", thread_count);
  do_build_sparse_by_streamer(streamer, thread_count);

  return 0;
}

int do_build_by_streamer(IndexStreamer::Pointer &streamer,
                         uint32_t thread_count, RetrievalMode retrieval_mode) {
  int ret;
  ailego::ThreadPool pool(thread_count, false);
  std::atomic<size_t> finished{0};
  int errcode = 0;
  std::mutex mutex;
  std::atomic_bool error{false};
  std::condition_variable cond{};

  auto meta = streamer->meta();
  IndexReformer::Pointer reformer;
  if (!meta.reformer_name().empty()) {
    if (retrieval_mode != RM_DENSE) {
      LOG_ERROR("Reformer not supported");
      return IndexError_Runtime;
    } else {
      reformer = IndexFactory::CreateReformer(meta.reformer_name());
      if (!reformer) {
        LOG_ERROR("Failed to create reformer %s", meta.reformer_name().c_str());
        return IndexError_NoExist;
      }
      reformer->init(meta.reformer_params());
    }
  }

  IndexQueryMeta qmeta(holder->data_type(), holder->dimension());
  uint32_t keep_docs = holder->count() - holder->start_cursor();

  std::function<int(uint64_t, const void *, const IndexQueryMeta &,
                    IndexContext::Pointer &)>
      add_to_streamer = [&](uint64_t pkey, const void *query,
                            const IndexQueryMeta &qmeta,
                            IndexContext::Pointer &context) -> int {
    return streamer->add_impl(pkey, query, qmeta, context);
  };
  if (g_disable_id_map) {
    add_to_streamer = [&](uint64_t pkey, const void *query,
                          const IndexQueryMeta &qmeta,
                          IndexStreamer::Context::Pointer &context) -> int {
      return streamer->add_with_id_impl(static_cast<uint32_t>(pkey), query,
                                        qmeta, context);
    };
  }

  auto do_build = [&](size_t idx) {
    AILEGO_DEFER([&]() {
      std::lock_guard<std::mutex> latch(mutex);
      cond.notify_one();
    });
    auto ctx = streamer->create_context();
    if (!ctx) {
      if (!error.exchange(true)) {
        LOG_ERROR("Failed to create streamer context");
        errcode = IndexError_NoMemory;
      }
      return;
    }
    std::string ovec;
    IndexQueryMeta ometa;
    for (uint32_t id = idx; id < holder->count() && !stop_now;
         id += thread_count) {
      uint64_t key = holder->get_key(id);
      if (retrieval_mode == RM_DENSE) {
        if (reformer) {
          ret = reformer->convert(holder->get_vector_by_index(id), qmeta, &ovec,
                                  &ometa);
          if (ret != 0) {
            LOG_ERROR("Failed to convert vector for %s", IndexError::What(ret));
            errcode = ret;
            return;
          }
          ret = add_to_streamer(key, ovec.data(), ometa, ctx);
        } else {
          ret =
              add_to_streamer(key, holder->get_vector_by_index(id), qmeta, ctx);
        }
      } else {
        LOG_ERROR("Retrieval mode not supported");
        errcode = IndexError_Unsupported;
        return;
      }

      if (ailego_unlikely(ret != 0)) {
        if (!error.exchange(true)) {
          LOG_ERROR("streamer add_impl failed");
          errcode = ret;
        }
        return;
      }
      if (id >= keep_docs) {
        ret = streamer->remove_impl(holder->get_key(id - keep_docs), ctx);
        if (ailego_unlikely(ret != 0)) {
          if (!error.exchange(true)) {
            LOG_ERROR("streamer remove_impl failed");
            errcode = ret;
          }
          return;
        }
      }
      finished++;
    }
    return;
  };

  for (size_t i = 0; i < pool.count(); ++i) {
    pool.execute(do_build, i);
  }

  while (!pool.is_finished()) {
    std::unique_lock<std::mutex> lk(mutex);
    cond.wait_until(
        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));
    if (error.load(std::memory_order_acquire)) {
      LOG_ERROR("Failed to build index while waiting finish");
      return errcode;
    }
    LOG_INFO("Built cnt %zu, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / holder->count());
  }
  if (error.load(std::memory_order_acquire)) {
    LOG_ERROR("Failed to build index while waiting finish");
    return errcode;
  }
  pool.wait_finish();

  return 0;
}

int build_by_streamer(IndexStreamer::Pointer &streamer,
                      YAML::Node &config_common) {
  if (!config_common["IndexPath"]) {
    LOG_ERROR("Miss params IndexPath for Streamer");
    return IndexError_InvalidArgument;
  }
  string path = config_common["IndexPath"].as<string>();

  ailego::File::RemovePath(path);

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  if (!storage) {
    LOG_ERROR("Failed to create storage");
    return IndexError_NoExist;
  }
  ailego::Params params;
  int ret = storage->init(params);
  if (ret != 0) {
    LOG_ERROR("Storage Failed init");
    return IndexError_Runtime;
  }
  ret = storage->open(path, true);
  if (ret != 0) {
    LOG_ERROR("Storage Failed to open");
    return IndexError_Runtime;
  }
  ret = streamer->open(storage);
  if (ret != 0) {
    LOG_ERROR("Failed to open storage");
    return IndexError_Runtime;
  }

  size_t thread_count = config_common["ThreadCount"]
                            ? config_common["ThreadCount"].as<uint64_t>()
                            : std::thread::hardware_concurrency();

  auto meta = streamer->meta();

  RetrievalMode retrieval_mode = RM_UNDEFINED;
  if (meta.dimension() > 0) {
    retrieval_mode = RM_DENSE;
  } else {
    retrieval_mode = RM_SPARSE;
  }

  LOG_DEBUG("thread count: %zu, retrieval mode: %s", thread_count,
            retrieval_mode == 1 ? "Dense" : "Sparse");
  do_build_by_streamer(streamer, thread_count, retrieval_mode);

  return 0;
}

IndexSparseHolder::Pointer convert_sparse_holder(
    const std::string &name, const ailego::Params &params,
    VecsIndexSparseHolder::Pointer &in_holder, IndexMeta &index_meta) {
  IndexSparseHolder::Pointer cast_holder =
      std::dynamic_pointer_cast<IndexSparseHolder>(in_holder);
  if (name.empty()) {
    return cast_holder;
  }

  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);
  if (!converter) {
    LOG_ERROR("Failed to create sparse converter %s", name.c_str());
    return IndexSparseHolder::Pointer();
  }

  int ret = converter->init(in_holder->index_meta(), params);
  if (ret != 0) {
    LOG_ERROR("Failed to init converter %d", ret);
    return IndexSparseHolder::Pointer();
  }

  ret = converter->train(cast_holder);
  if (ret != 0) {
    LOG_ERROR("Failed to train sparse converter %d", ret);
    return IndexSparseHolder::Pointer();
  }

  ret = converter->transform(cast_holder);
  if (ret != 0) {
    LOG_ERROR("Failed to transform converter %d", ret);
    return IndexSparseHolder::Pointer();
  }

  index_meta = converter->meta();

  return converter->sparse_result();
}

IndexHolder::Pointer convert_holder(const std::string &name,
                                    const ailego::Params &params,
                                    VecsIndexHolder::Pointer &in_holder,
                                    IndexMeta &index_meta) {
  IndexHolder::Pointer cast_holder =
      std::dynamic_pointer_cast<IndexHolder>(in_holder);
  if (name.empty()) {
    return cast_holder;
  }

  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);
  if (!converter) {
    LOG_ERROR("Failed to create converter %s", name.c_str());
    return IndexHolder::Pointer();
  }

  int ret = converter->init(in_holder->index_meta(), params);
  if (ret != 0) {
    LOG_ERROR("Failed to init converter %d", ret);
    return IndexHolder::Pointer();
  }

  ret = converter->train(cast_holder);
  if (ret != 0) {
    LOG_ERROR("Failed to train converter %d", ret);
    return IndexHolder::Pointer();
  }

  ret = converter->transform(cast_holder);
  if (ret != 0) {
    LOG_ERROR("Failed to transform converter %d", ret);
    return IndexHolder::Pointer();
  }

  index_meta = converter->meta();

  return converter->result();
}

int do_build_sparse(YAML::Node &config_root, YAML::Node &config_common) {
  string build_file = config_common["BuildFile"].as<string>();
  VecsIndexSparseHolder::Pointer build_holder(new VecsIndexSparseHolder);
  if (!build_holder->load(build_file)) {
    LOG_ERROR("Load input error: %s", build_file.c_str());
    return -1;
  }
  IndexMeta meta;
  meta = build_holder->index_meta();

  std::string metric_name;
  ailego::Params metric_params;
  if (config_common["MetricName"] &&
      !config_common["MetricName"].as<string>().empty()) {
    metric_name = config_common["MetricName"].as<string>();
    if (config_root["MetricParams"] &&
        !prepare_params(config_root["MetricParams"], metric_params)) {
      LOG_ERROR("Failed to prepare metric params");
      return -1;
    }
    build_holder->set_metric(metric_name, metric_params);
    meta.set_metric(metric_name, 0, metric_params);
  }

  string converter_name;
  ailego::Params converter_params;
  if (config_common["ConverterName"] &&
      !config_common["ConverterName"].as<string>().empty()) {
    converter_name = config_common["ConverterName"].as<string>();
    if (config_root["ConverterParams"] &&
        !prepare_params(config_root["ConverterParams"], converter_params)) {
      LOG_ERROR("Failed to prepare converter params");
      return -1;
    }
  }

  if (config_common["MaxDocs"] && config_common["MaxDocs"].as<uint32_t>()) {
    auto max_docs = config_common["MaxDocs"].as<uint32_t>();
    build_holder->set_max_doc_count(max_docs);
  }
  if (config_common["KeepDocs"] && config_common["KeepDocs"].as<uint32_t>()) {
    auto keep_docs = config_common["KeepDocs"].as<uint32_t>();
    if (keep_docs < build_holder->count()) {
      build_holder->set_start_cursor(build_holder->count() - keep_docs);
    }
  }

  // Create a Builder
  string builder_class = config_common["BuilderClass"].as<string>();
  IndexStreamer::Pointer streamer;
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder(builder_class.c_str());
  if (!builder) {
    streamer = IndexFactory::CreateStreamer(builder_class.c_str());
  }
  if (!builder && !streamer) {
    LOG_ERROR("Failed to create builder %s", builder_class.c_str());
    return -1;
  }
  cout << "Created builder " << builder_class << endl;

  IndexSparseHolder::Pointer cv_build_holder = convert_sparse_holder(
      converter_name, converter_params, build_holder, meta);
  if (!cv_build_holder) {
    LOG_ERROR("Convert holder failed.");
    return -1;
  }

  ailego::Params params;
  if (!prepare_params(config_root["BuilderParams"], params)) {
    LOG_ERROR("Failed to prepare params");
    return -1;
  }

  // INIT
  int ret =
      builder ? builder->init(meta, params) : streamer->init(meta, params);
  if (ret < 0) {
    LOG_ERROR("Failed to init builder, ret=%d", ret);
    return -1;
  }
  ailego::ElapsedTime timer;

  // TRAIN
  if (builder && config_common["NeedTrain"] &&
      config_common["NeedTrain"].as<bool>()) {
    string train_file = config_common["TrainFile"].as<string>();
    VecsIndexSparseHolder::Pointer train_holder(new VecsIndexSparseHolder);
    if (!train_holder->load(train_file)) {
      LOG_ERROR("Load input error: %s", train_file.c_str());
      return -1;
    }

    if (!metric_name.empty()) {
      train_holder->set_metric(metric_name, metric_params);
    }

    IndexSparseHolder::Pointer cv_train_holder = convert_sparse_holder(
        converter_name, converter_params, train_holder, meta);
    if (!cv_train_holder) {
      LOG_ERROR("Convert train holder failed.");
      return -1;
    }

    std::cout << "Prepare train data done!" << std::endl;
    timer.reset();
    ret = builder->train(std::move(cv_train_holder));
    size_t train_time = timer.milli_seconds();

    if (ret < 0) {
      LOG_ERROR("Failed to train in builder, ret=%d", ret);
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else {
    cout << "Skip train procedure" << endl;
  }

  // BUILD
  sparse_holder = build_holder;
  signal(SIGINT, stop);
  timer.reset();
  if (builder != nullptr) {
    ret = builder->build(std::move(cv_build_holder));
  } else {
    ret = build_sparse_by_streamer(streamer, config_common);
  }
  size_t build_time = timer.milli_seconds();
  if (ret < 0) {
    LOG_ERROR("Failed to build in builder, ret=%d", ret);
    return -1;
  }
  cout << "Build finished, consume " << build_time << "ms." << endl;
  signal(SIGINT, SIG_DFL);

  // DUMP
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  if (!dumper) {
    LOG_ERROR("Failed to create FileDumper.");
    return -1;
  }
  string dump_prefix = config_common["DumpPath"].as<string>();
  ret = dumper->create(dump_prefix);
  if (ret != 0) {
    LOG_ERROR("Failed to create in dumper, ret=%d", ret);
    return -1;
  }
  timer.reset();
  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);
  size_t dump_time = timer.milli_seconds();
  if (ret == IndexError_NotImplemented) {
    LOG_WARN("Dump index not implemented");
  } else if (ret < 0) {
    LOG_ERROR("Failed to dump in builder, ret=%d", ret);
    return -1;
  }

  if (build_holder->has_taglist()) {
    size_t taglist_size{0};
    const void *taglist_data = build_holder->get_taglist_data(taglist_size);
    const void *key_base = build_holder->get_key_base();

    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,
                 taglist_size);
  }

  ret = dumper->close();
  if (ret != 0) {
    LOG_ERROR("Dumper failed to close, ret=%d", ret);
    return -1;
  }
  std::cout << "Dump to [" << dump_prefix << "] finished, consume " << dump_time
            << "ms." << std::endl;

  if (builder) {
    auto &stats =
        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();
    std::cout << "STATS: \n\tTrained count[" << stats.trained_count()
              << "]\n\tBuilt count[" << stats.built_count()
              << "]\n\tDump count[" << stats.dumped_count()
              << "]\n\tDiscarded count[" << stats.discarded_count() << "]\n";
  } else {
    auto &stats = streamer->stats();
    std::cout << "STATS: \n\tTrained count[" << 0 << "]\n\tBuilt count["
              << stats.added_count() << "]\n\tDump size ["
              << stats.dumped_size() << "]\n\tDiscarded count["
              << stats.discarded_count() << "]\n";
  }

  // CLEANUP
  builder ? builder->cleanup() : streamer->cleanup();

  return 0;
}

int do_build(YAML::Node &config_root, YAML::Node &config_common) {
  string build_file = config_common["BuildFile"].as<string>();
  VecsIndexHolder::Pointer build_holder(new VecsIndexHolder);
  if (!build_holder->load(build_file)) {
    LOG_ERROR("Load input error: %s", build_file.c_str());
    return -1;
  }
  IndexMeta meta;
  meta = build_holder->index_meta();

  std::string metric_name;
  ailego::Params metric_params;
  if (config_common["MetricName"] &&
      !config_common["MetricName"].as<string>().empty()) {
    metric_name = config_common["MetricName"].as<string>();
    if (config_root["MetricParams"] &&
        !prepare_params(config_root["MetricParams"], metric_params)) {
      LOG_ERROR("Failed to prepare metric params");
      return -1;
    }
    build_holder->set_metric(metric_name, metric_params);
    meta.set_metric(metric_name, 0, metric_params);
  }
  IndexMeta input_meta = meta;
  string converter_name;
  ailego::Params converter_params;
  if (config_common["ConverterName"] &&
      !config_common["ConverterName"].as<string>().empty()) {
    converter_name = config_common["ConverterName"].as<string>();
    if (config_root["ConverterParams"] &&
        !prepare_params(config_root["ConverterParams"], converter_params)) {
      LOG_ERROR("Failed to prepare converter params");
      return -1;
    }
  }
  IndexMeta::MajorOrder order = IndexMeta::MO_UNDEFINED;
  if (config_common["MajorOrder"]) {
    std::string order_str = config_common["MajorOrder"].as<string>();
    if (order_str == "row") {
      order = IndexMeta::MajorOrder::MO_ROW;
    } else {
      order = IndexMeta::MajorOrder::MO_COLUMN;
    }
  }

  if (config_common["MaxDocs"] && config_common["MaxDocs"].as<uint32_t>()) {
    auto max_docs = config_common["MaxDocs"].as<uint32_t>();
    build_holder->set_max_doc_count(max_docs);
  }
  if (config_common["KeepDocs"] && config_common["KeepDocs"].as<uint32_t>()) {
    auto keep_docs = config_common["KeepDocs"].as<uint32_t>();
    if (keep_docs < build_holder->count()) {
      build_holder->set_start_cursor(build_holder->count() - keep_docs);
    }
  }

  // Create a Builder
  string builder_class = config_common["BuilderClass"].as<string>();
  IndexStreamer::Pointer streamer;
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder(builder_class.c_str());
  if (!builder) {
    streamer = IndexFactory::CreateStreamer(builder_class.c_str());
  }
  if (!builder && !streamer) {
    LOG_ERROR("Failed to create builder %s", builder_class.c_str());
    return -1;
  }
  cout << "Created builder " << builder_class << endl;


  IndexHolder::Pointer cv_build_holder =
      convert_holder(converter_name, converter_params, build_holder, meta);
  if (!cv_build_holder) {
    LOG_ERROR("Convert holder failed.");
    return -1;
  }
  meta.set_major_order(order);
  cout << IndexMetaHelper::to_string(meta) << endl;
  cout << "Prepare data done!" << endl;

  ailego::Params params;
  if (!prepare_params(config_root["BuilderParams"], params)) {
    LOG_ERROR("Failed to prepare params");
    return -1;
  }
  std::vector<std::string> id_map_param_list = {
      PARAM_HNSW_STREAMER_USE_ID_MAP,
      PARAM_FLAT_USE_ID_MAP,
      PARAM_HNSW_RABITQ_STREAMER_USE_ID_MAP,
  };
  for (auto &param : id_map_param_list) {
    params.set(param, !g_disable_id_map);
  }

  // INIT
  int ret =
      builder ? builder->init(meta, params) : streamer->init(meta, params);
  if (ret < 0) {
    LOG_ERROR("Failed to init builder, ret=%d", ret);
    return -1;
  }
  ailego::ElapsedTime timer;

  // TRAIN
  if (config_common["UseTrainer"] && config_common["UseTrainer"].as<bool>()) {
    ailego::Params trainer_params;
    if (!prepare_params(config_root["TrainerParams"], trainer_params)) {
      LOG_ERROR("Failed to prepare trainer params");
      return -1;
    }

    string train_index_path;
    if (config_common["TrainerIndexPath"]) {
      train_index_path = config_common["TrainerIndexPath"].as<string>();
      if (train_index_path.empty()) {
        LOG_ERROR("invalid TrainerIndexPath format");
        return -1;
      }
      cout << "Trainer index path: " << train_index_path << "\n";
    } else {
      LOG_ERROR("Need [TrainerIndexPath] config");
      return -1;
    }

    IndexTrainer::Pointer trainer =
        IndexFactory::CreateTrainer("StratifiedClusterTrainer");
    if (trainer->init(meta, trainer_params) != 0) {
      LOG_ERROR("trainer init failed");
      return -1;
    }

    if (ailego::File::IsExist(train_index_path)) {
      IndexStorage::Pointer container =
          IndexFactory::CreateStorage("MMapFileReadStorage");
      if (!container) {
        LOG_ERROR("Failed to create MMapFileReadStorage");
        return -1;
      }
      container->init(ailego::Params());
      if (container->open(train_index_path, false) != 0) {
        LOG_ERROR("MMapFileReadStorage failed to load %s",
                  train_index_path.c_str());
        return -1;
      }
      if (trainer->load(container) != 0) {
        LOG_ERROR("Trainer failed to load container");
        return -1;
      };
    } else {
      std::cout << "Prepare trainer data..." << std::endl;
      string train_file = config_common["TrainFile"].as<string>();
      VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);
      if (!train_holder->load(train_file)) {
        LOG_ERROR("Load input error: %s", train_file.c_str());
        return -1;
      }
      if (!metric_name.empty()) {
        train_holder->set_metric(metric_name, metric_params);
      }

      // support fp16 convert

      IndexHolder::Pointer cv_train_holder =
          convert_holder(converter_name, converter_params, train_holder, meta);
      if (!cv_train_holder) {
        LOG_ERROR("Convert train holder failed.");
        return -1;
      }

      std::cout << "Prepare trainer data done!" << std::endl;
      std::cout << "Prepare train data!" << std::endl;

      ret = trainer->train(cv_train_holder);
      if (ret != 0) {
        LOG_ERROR("trainer train_index failed with %d", ret);
        return -1;
      }

      std::cout << "train data done!" << std::endl;
      IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
      if (!dumper) {
        LOG_ERROR("Failed to create FileDumper.");
        return -1;
      }
      if (dumper->init(ailego::Params()) != 0) {
        LOG_ERROR("Failed to init FileDumper.");
        return -1;
      }
      ret = dumper->create(train_index_path);
      if (ret != 0) {
        LOG_ERROR("Failed to create in dumper, ret=%d", ret);
        return -1;
      }
      if (trainer->dump(dumper) != 0) {
        LOG_ERROR("trainer dump_index failed");
        return -1;
      }
      dumper->close();
    }

    ret = builder->train(trainer);
    size_t train_time = timer.milli_seconds();
    if (ret < 0) {
      LOG_ERROR("Failed to train in builder, ret=%d", ret);
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else if (builder && config_common["NeedTrain"] &&
             config_common["NeedTrain"].as<bool>()) {
    string train_file = config_common["TrainFile"].as<string>();
    VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);
    if (!train_holder->load(train_file)) {
      LOG_ERROR("Load input error: %s", train_file.c_str());
      return -1;
    }

    if (!metric_name.empty()) {
      train_holder->set_metric(metric_name, metric_params);
    }
    IndexHolder::Pointer cv_train_holder =
        convert_holder(converter_name, converter_params, train_holder, meta);
    if (!cv_train_holder) {
      LOG_ERROR("Convert train holder failed.");
      return -1;
    }

    std::cout << "Prepare train data done!" << std::endl;
    timer.reset();
    ret = builder->train(std::move(cv_train_holder));
    size_t train_time = timer.milli_seconds();
    if (ret < 0) {
      LOG_ERROR("Failed to train in builder, ret=%d", ret);
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else {
    cout << "Skip train procedure" << endl;
  }

  if (builder_class == "HnswRabitqStreamer") {
    if (setup_hnsw_rabitq_streamer(streamer, input_meta, config_root,
                                   converter_name, &cv_build_holder) != 0) {
      return -1;
    }
  } else if (builder_class == "HnswRabitqBuilder" && !converter_name.empty()) {
    cv_build_holder = convert_holder_to_provider(cv_build_holder);
  }

  // BUILD
  holder = build_holder;
  signal(SIGINT, stop);
  timer.reset();
  if (builder != nullptr) {
    ret = builder->build(std::move(cv_build_holder));
  } else {
    std::string retrieval_mode = "dense";
    if (meta.dimension() > 0) {
      retrieval_mode = "sparse";
    } else {
      retrieval_mode = "dense";
    }

    ret = build_by_streamer(streamer, config_common);
  }
  size_t build_time = timer.milli_seconds();
  if (ret < 0) {
    LOG_ERROR("Failed to build in builder, ret=%d", ret);
    return -1;
  }
  cout << "Build finished, consume " << build_time << "ms." << endl;
  signal(SIGINT, SIG_DFL);

  // DUMP
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  if (!dumper) {
    LOG_ERROR("Failed to create FileDumper.");
    return -1;
  }
  string dump_prefix = config_common["DumpPath"].as<string>();
  ret = dumper->create(dump_prefix);
  if (ret != 0) {
    LOG_ERROR("Failed to create in dumper, ret=%d", ret);
    return -1;
  }
  timer.reset();
  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);
  size_t dump_time = timer.milli_seconds();
  if (ret == IndexError_NotImplemented) {
    LOG_WARN("Dump index not implemented");
  } else if (ret < 0) {
    LOG_ERROR("Failed to dump in builder, ret=%d", ret);
    return -1;
  }

  if (build_holder->has_taglist()) {
    size_t taglist_size{0};
    const void *taglist_data = build_holder->get_taglist_data(taglist_size);
    const void *key_base = build_holder->get_key_base();

    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,
                 taglist_size);
  }

  ret = dumper->close();
  if (ret != 0) {
    LOG_ERROR("Dumper failed to close, ret=%d", ret);
    return -1;
  }
  std::cout << "Dump to [" << dump_prefix << "] finished, consume " << dump_time
            << "ms." << std::endl;

  if (builder) {
    auto &stats =
        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();
    std::cout << "STATS: \n\tTrained count[" << stats.trained_count()
              << "]\n\tBuilt count[" << stats.built_count()
              << "]\n\tDump count[" << stats.dumped_count()
              << "]\n\tDiscarded count[" << stats.discarded_count() << "]\n";
  } else {
    auto &stats = streamer->stats();
    std::cout << "STATS: \n\tTrained count[" << 0 << "]\n\tBuilt count["
              << stats.added_count() << "]\n\tDump size ["
              << stats.dumped_size() << "]\n\tDiscarded count["
              << stats.discarded_count() << "]\n";
  }

  // CLEANUP
  builder ? builder->cleanup() : streamer->cleanup();

  return 0;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }
  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      LOG_ERROR("Failed to load plugin: %s (%s)", argv[i], error.c_str());
      return -1;
    }
  }
  YAML::Node config_root;
  try {
    config_root = YAML::LoadFile(argv[1]);
  } catch (...) {
    LOG_ERROR("Load YAML file[%s] failed!", argv[1]);
    return -1;
  }
  if (!check_config(config_root)) {
    return -1;
  }
  auto config_common = config_root["BuilderCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};

  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";

  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  if (config_common["DisableIdMap"]) {
    g_disable_id_map = config_common["DisableIdMap"].as<bool>();
    if (g_disable_id_map) {
      cout << "Disable ID map" << endl;
    } else {
      cout << "Enable ID map" << endl;
    }
  }

  if (retrieval_mode == RM_SPARSE) {
    return do_build_sparse(config_root, config_common);
  } else {
    return do_build(config_root, config_common);
  }

  return 0;
}


================================================
FILE: tools/core/local_builder_original.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <libgen.h>
#include <signal.h>
#include <iostream>
#include <memory>
#include <ailego/pattern/defer.h>
#include <zvec/ailego/container/params.h>
#include <zvec/ailego/utility/time_helper.h>
#if RABITQ_SUPPORTED
#include "algorithm/hnsw_rabitq/hnsw_rabitq_streamer.h"
#include "algorithm/hnsw_rabitq/rabitq_converter.h"
#include "algorithm/hnsw_rabitq/rabitq_reformer.h"
#endif
#include "zvec/core/framework/index_dumper.h"
#include "zvec/core/framework/index_factory.h"
#include "zvec/core/framework/index_logger.h"
#include "zvec/core/framework/index_plugin.h"
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_reformer.h"
#include "zvec/core/framework/index_streamer.h"
#include "index_meta_helper.h"
#include "meta_segment_common.h"
#include "vecs_index_holder.h"

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#include <yaml-cpp/yaml.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

using namespace std;
using namespace zvec::core;
using namespace zvec;

enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

VecsIndexHolder::Pointer holder;
VecsIndexSparseHolder::Pointer sparse_holder;

bool stop_now = false;
void stop(int signo) {
  if (stop_now) {
    exit(signo);
  }
  stop_now = true;
  cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
       << flush;
  if (holder) {
    holder->stop();
  }
}

void usage(void) {
  cout << "Usage: local_builder CONFIG.yaml [plugin file path]" << endl;
}

bool prepare_params(YAML::Node &&config_params, ailego::Params &params) {
  cout << "Parse params as blow:" << endl;
  for (auto it = config_params.begin(); it != config_params.end(); ++it) {
    string tag = it->second.Tag();
    if (tag == "tag:yaml.org,2002:int") {
      int64_t val = it->second.as<int64_t>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:float") {
      float val = it->second.as<float>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:bool") {
      bool val = it->second.as<bool>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else {
      if (it->second.IsScalar()) {
        string val = it->second.as<string>();
        params.set(it->first.as<string>(), val);
        cout << it->first.as<string>() << "=" << val << endl;
      } else if (it->second.IsMap()) {
        ailego::Params sub_params;
        auto sub_node = it->second;
        if (!prepare_params(std::move(sub_node), sub_params)) {
          cerr << "parse params error with key[" << it->first.as<string>()
               << "]" << endl;
          return false;
        }
        params.set(it->first.as<string>(), sub_params);
      }
    }
  }
  return true;
}

int setup_hnsw_rabitq_streamer(const IndexStreamer::Pointer &streamer,
                               const IndexMeta &meta, YAML::Node &config_root,
                               const std::string &converter_name,
                               IndexHolder::Pointer *build_holder) {
#if RABITQ_SUPPORTED
  RabitqConverter rabitq_converter;
  ailego::Params rabitq_converter_params;
  if (config_root["RabitqConverterParams"] &&
      !prepare_params(std::move(config_root["RabitqConverterParams"]),
                      rabitq_converter_params)) {
    cerr << "Failed to prepare rabitq converter params" << endl;
    return -1;
  }
  if (rabitq_converter.init(meta, rabitq_converter_params) != 0) {
    cerr << "rabitq converter init failed" << std::endl;
    return -1;
  }
  if (rabitq_converter.train(*build_holder) != 0) {
    cerr << "rabitq converter train failed" << std::endl;
    return -1;
  }
  IndexReformer::Pointer rabitq_reformer;
  rabitq_converter.to_reformer(&rabitq_reformer);
  HnswRabitqStreamer *hnsw_rabitq_streamer =
      dynamic_cast<HnswRabitqStreamer *>(streamer.get());
  hnsw_rabitq_streamer->set_reformer(std::move(rabitq_reformer));
  IndexProvider::Pointer provider;
  if (converter_name.empty()) {
    // build_holder is VecsIndexHolder
    provider = std::dynamic_pointer_cast<IndexProvider>(*build_holder);
  } else {
    // build_holder is ordinary IndexHolder, need to convert
    provider = convert_holder_to_provider(*build_holder);
    // reuse provider to release memory
    *build_holder = provider;
  }

  if (!provider) {
    cerr << "Failed to cast build holder to provider" << endl;
    return -1;
  }
  hnsw_rabitq_streamer->set_provider(provider);
  return 0;
#else
  (void)streamer;
  (void)meta;
  (void)config_root;
  (void)converter_name;
  (void)build_holder;
  cerr << "HNSW RaBitQ is not supported on this platform" << endl;
  return -1;
#endif
}

bool check_config(YAML::Node &config_root) {
  auto common = config_root["BuilderCommon"];
  if (!common) {
    cerr << "Can not find [BuilderClass] in config" << endl;
    return false;
  }
  if (!common["BuilderClass"]) {
    cerr << "Can not find [BuilderClass] in config" << endl;
    return false;
  }
  if (!common["BuildFile"]) {
    cerr << "Can not find [BuildFile] in config" << endl;
    return false;
  }
  if (common["NeedTrain"] && common["NeedTrain"].as<bool>()) {
    if (!common["TrainFile"]) {
      cerr << "Can not find [TrainFile] in config" << endl;
      return false;
    }
  }
  if (common["UseTrainer"]) {
    if (!common["TrainerIndexPath"]) {
      cerr << "Can not find [TrainerIndexPath] in config" << endl;
      return false;
    }
    if (!config_root["TrainerParams"]) {
      cerr << "Can not find [TrainerParams] in config" << endl;
      return false;
    }
  }
  if (!common["DumpPath"]) {
    cerr << "Can not find [DumpPath] in config" << endl;
    return false;
  }
  if (!config_root["BuilderParams"]) {
    cerr << "Can not find [BuilderParams] in config" << endl;
    return false;
  }
  return true;
}

static inline size_t AlignSize(size_t size) {
  return (size + 0x1F) & (~0x1F);
}

int64_t dump_meta_segment(const IndexDumper::Pointer &dumper,
                          const std::string &segment_id, const void *data,
                          size_t size, size_t &writes) {
  size_t len = dumper->write(data, size);
  if (len != size) {
    LOG_ERROR("Dump segment %s data failed, expect: %lu, actual: %lu",
              segment_id.c_str(), size, len);
    return false;
  }

  size_t padding_size = AlignSize(size) - size;
  if (padding_size > 0) {
    std::string padding(padding_size, '\0');
    if (dumper->write(padding.data(), padding_size) != padding_size) {
      LOG_ERROR("Append padding failed, size %lu", padding_size);
      return false;
    }
  }

  uint32_t crc = ailego::Crc32c::Hash(data, size);
  int ret = dumper->append(segment_id, size, padding_size, crc);
  if (ret != 0) {
    LOG_ERROR("Dump segment %s meta failed, ret=%d", segment_id.c_str(), ret);
    return false;
  }

  writes = len + padding_size;

  return true;
}

int dump_taglist(IndexDumper::Pointer dumper, size_t num_vecs,
                 const void *key_base, const void *taglist_data,
                 uint64_t taglist_size) {
  TagListHeader taglist_header;

  taglist_header.num_vecs = num_vecs;

  size_t total_writes;

  bool ret =
      dump_meta_segment(dumper, TAGLIST_HEADER_SEGMENT_NAME, &taglist_header,
                        sizeof(TagListHeader), total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist meta failed");
    return IndexError_WriteData;
  }

  ret = dump_meta_segment(dumper, TAGLIST_KEY_SEGMENT_NAME, key_base,
                          num_vecs * sizeof(uint64_t), total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist key failed");
    return IndexError_WriteData;
  }

  ret = dump_meta_segment(dumper, TAGLIST_DATA_SEGMENT_NAME, taglist_data,
                          taglist_size, total_writes);
  if (ret == false) {
    LOG_ERROR("dump taglist data failed");
    return IndexError_WriteData;
  }

  return 0;
}

int do_build_sparse_by_streamer(IndexStreamer::Pointer &streamer,
                                uint32_t thread_count) {
  int ret;
  ailego::ThreadPool pool(thread_count, false);
  std::atomic<size_t> finished{0};
  int errcode = 0;
  std::mutex mutex;
  std::atomic_bool error{false};
  std::condition_variable cond{};

  auto meta = streamer->meta();
  IndexReformer::Pointer reformer;
  if (!meta.reformer_name().empty()) {
    reformer = IndexFactory::CreateReformer(meta.reformer_name());
    if (!reformer) {
      LOG_ERROR("Failed to create reformer %s", meta.reformer_name().c_str());
      return IndexError_NoExist;
    }
    reformer->init(meta.reformer_params());
  }

  IndexQueryMeta qmeta(sparse_holder->data_type());
  uint32_t keep_docs = sparse_holder->count() - sparse_holder->start_cursor();

  auto do_build = [&](size_t idx) {
    AILEGO_DEFER([&]() {
      std::lock_guard<std::mutex> latch(mutex);
      cond.notify_one();
    });
    auto ctx = streamer->create_context();
    if (!ctx) {
      if (!error.exchange(true)) {
        cerr << "Failed to create streamer context";
        errcode = IndexError_NoMemory;
      }
      return;
    }
    std::string ovec;
    IndexQueryMeta ometa;
    for (uint32_t id = idx; id < sparse_holder->count() && !stop_now;
         id += thread_count) {
      uint64_t key = sparse_holder->get_key(id);
      if (reformer) {
        std::string new_vec;
        IndexQueryMeta new_meta;
        ret = reformer->convert(sparse_holder->get_sparse_count(id),
                                sparse_holder->get_sparse_indices(id),
                                sparse_holder->get_sparse_data(id), qmeta,
                                &new_vec, &new_meta);
        if (ret != 0) {
          LOG_ERROR("Failed to convert sparse vector for %s",
                    IndexError::What(ret));
          errcode = ret;
          return;
        }
        ret = streamer->add_impl(key, sparse_holder->get_sparse_count(id),
                                 sparse_holder->get_sparse_indices(id),
                                 new_vec.data(), new_meta, ctx);
      } else {
        ret =
            streamer->add_impl(key, sparse_holder->get_sparse_count(id),
                               sparse_holder->get_sparse_indices(id),
                               sparse_holder->get_sparse_data(id), qmeta, ctx);
      }

      if (ailego_unlikely(ret != 0)) {
        if (!error.exchange(true)) {
          LOG_ERROR("streamer all_impl failed\n");
          errcode = ret;
        }
        return;
      }
      if (id >= keep_docs) {
        ret =
            streamer->remove_impl(sparse_holder->get_key(id - keep_docs), ctx);
        if (ailego_unlikely(ret != 0)) {
          if (!error.exchange(true)) {
            LOG_ERROR("streamer remove_impl failed\n");
            errcode = ret;
          }
          return;
        }
      }
      finished++;
    }
    return;
  };

  for (size_t i = 0; i < pool.count(); ++i) {
    pool.execute(do_build, i);
  }

  while (!pool.is_finished()) {
    std::unique_lock<std::mutex> lk(mutex);
    cond.wait_until(
        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));
    if (error.load(std::memory_order_acquire)) {
      cerr << "Failed to build index while waiting finish\n";
      return errcode;
    }
    LOG_INFO("Built cnt %zu, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / sparse_holder->count());
  }
  if (error.load(std::memory_order_acquire)) {
    cerr << "Failed to build index while waiting finish\n";
    return errcode;
  }
  pool.wait_finish();

  return 0;
}

int build_sparse_by_streamer(IndexStreamer::Pointer &streamer,
                             YAML::Node &config_common) {
  if (!config_common["IndexPath"]) {
    cerr << "Miss params IndexPath for Streamer\n";
    return IndexError_InvalidArgument;
  }
  string path = config_common["IndexPath"].as<string>();

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  if (!storage) {
    cerr << "Failed to create storage\n";
    return IndexError_NoExist;
  }
  ailego::Params params;
  int ret = storage->init(params);
  if (ret != 0) {
    cerr << "Storage Failed init";
    return IndexError_Runtime;
  }
  ret = storage->open(path, true);
  if (ret != 0) {
    cerr << "Storage Failed to open";
    return IndexError_Runtime;
  }
  ret = streamer->open(storage);
  if (ret != 0) {
    cerr << "Failed to open storage";
    return IndexError_Runtime;
  }

  size_t thread_count = config_common["ThreadCount"]
                            ? config_common["ThreadCount"].as<uint64_t>()
                            : std::thread::hardware_concurrency();

  auto meta = streamer->meta();

  LOG_DEBUG("thread count: %zu, retrieval_mode: sparse", thread_count);
  do_build_sparse_by_streamer(streamer, thread_count);

  return 0;
}

int do_build_by_streamer(IndexStreamer::Pointer &streamer,
                         uint32_t thread_count, RetrievalMode retrieval_mode) {
  int ret;
  ailego::ThreadPool pool(thread_count, false);
  std::atomic<size_t> finished{0};
  int errcode = 0;
  std::mutex mutex;
  std::atomic_bool error{false};
  std::condition_variable cond{};

  auto meta = streamer->meta();
  IndexReformer::Pointer reformer;
  if (!meta.reformer_name().empty()) {
    if (retrieval_mode != RM_DENSE) {
      cerr << "Reformer not supported";
      return IndexError_Runtime;
    } else {
      reformer = IndexFactory::CreateReformer(meta.reformer_name());
      if (!reformer) {
        LOG_ERROR("Failed to create reformer %s", meta.reformer_name().c_str());
        return IndexError_NoExist;
      }
      reformer->init(meta.reformer_params());
    }
  }

  IndexQueryMeta qmeta(holder->data_type(), holder->dimension());
  uint32_t keep_docs = holder->count() - holder->start_cursor();

  auto do_build = [&](size_t idx) {
    AILEGO_DEFER([&]() {
      std::lock_guard<std::mutex> latch(mutex);
      cond.notify_one();
    });
    auto ctx = streamer->create_context();
    if (!ctx) {
      if (!error.exchange(true)) {
        cerr << "Failed to create streamer context";
        errcode = IndexError_NoMemory;
      }
      return;
    }
    std::string ovec;
    IndexQueryMeta ometa;
    for (uint32_t id = idx; id < holder->count() && !stop_now;
         id += thread_count) {
      uint64_t key = holder->get_key(id);
      if (retrieval_mode == RM_DENSE) {
        if (reformer) {
          ret = reformer->convert(holder->get_vector_by_index(id), qmeta, &ovec,
                                  &ometa);
          if (ret != 0) {
            LOG_ERROR("Failed to convert vector for %s", IndexError::What(ret));
            errcode = ret;
            return;
          }
          ret = streamer->add_impl(key, ovec.data(), ometa, ctx);
        } else {
          ret = streamer->add_impl(key, holder->get_vector_by_index(id), qmeta,
                                   ctx);
        }
      } else {
        cerr << "Retrieval mode not supported";
        errcode = IndexError_Unsupported;
        return;
      }

      if (ailego_unlikely(ret != 0)) {
        if (!error.exchange(true)) {
          LOG_ERROR("streamer add_impl failed\n");
          errcode = ret;
        }
        return;
      }
      if (id >= keep_docs) {
        ret = streamer->remove_impl(holder->get_key(id - keep_docs), ctx);
        if (ailego_unlikely(ret != 0)) {
          if (!error.exchange(true)) {
            LOG_ERROR("streamer remove_impl failed\n");
            errcode = ret;
          }
          return;
        }
      }
      finished++;
    }
    return;
  };

  for (size_t i = 0; i < pool.count(); ++i) {
    pool.execute(do_build, i);
  }

  while (!pool.is_finished()) {
    std::unique_lock<std::mutex> lk(mutex);
    cond.wait_until(
        lk, std::chrono::system_clock::now() + std::chrono::seconds(15));
    if (error.load(std::memory_order_acquire)) {
      cerr << "Failed to build index while waiting finish\n";
      return errcode;
    }
    LOG_INFO("Built cnt %zu, finished percent %.3f%%", finished.load(),
             finished.load() * 100.0f / holder->count());
  }
  if (error.load(std::memory_order_acquire)) {
    cerr << "Failed to build index while waiting finish\n";
    return errcode;
  }
  pool.wait_finish();

  return 0;
}

int build_by_streamer(IndexStreamer::Pointer &streamer,
                      YAML::Node &config_common) {
  if (!config_common["IndexPath"]) {
    cerr << "Miss params IndexPath for Streamer\n";
    return IndexError_InvalidArgument;
  }
  string path = config_common["IndexPath"].as<string>();

  auto storage = IndexFactory::CreateStorage("MMapFileStorage");
  if (!storage) {
    cerr << "Failed to create storage\n";
    return IndexError_NoExist;
  }
  ailego::Params params;
  int ret = storage->init(params);
  if (ret != 0) {
    cerr << "Storage Failed init";
    return IndexError_Runtime;
  }
  ret = storage->open(path, true);
  if (ret != 0) {
    cerr << "Storage Failed to open";
    return IndexError_Runtime;
  }
  ret = streamer->open(storage);
  if (ret != 0) {
    cerr << "Failed to open storage";
    return IndexError_Runtime;
  }

  size_t thread_count = config_common["ThreadCount"]
                            ? config_common["ThreadCount"].as<uint64_t>()
                            : std::thread::hardware_concurrency();

  auto meta = streamer->meta();

  RetrievalMode retrieval_mode = RM_UNDEFINED;
  if (meta.dimension() > 0) {
    retrieval_mode = RM_DENSE;
  } else {
    retrieval_mode = RM_SPARSE;
  }

  LOG_DEBUG("thread count: %zu, retrieval mode: %s", thread_count,
            retrieval_mode == 1 ? "Dense" : "Sparse");
  do_build_by_streamer(streamer, thread_count, retrieval_mode);

  return 0;
}

IndexSparseHolder::Pointer convert_sparse_holder(
    const std::string &name, const ailego::Params &params,
    VecsIndexSparseHolder::Pointer &in_holder, IndexMeta &index_meta) {
  IndexSparseHolder::Pointer cast_holder =
      std::dynamic_pointer_cast<IndexSparseHolder>(in_holder);
  if (name.empty()) {
    return cast_holder;
  }

  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);
  if (!converter) {
    cerr << "Failed to create sparse converter " << name << endl;
    return IndexSparseHolder::Pointer();
  }

  int ret = converter->init(in_holder->index_meta(), params);
  if (ret != 0) {
    cerr << "Failed to init converter " << ret << endl;
    return IndexSparseHolder::Pointer();
  }

  ret = converter->train(cast_holder);
  if (ret != 0) {
    cerr << "Failed to train sparse converter " << ret << endl;
    return IndexSparseHolder::Pointer();
  }

  ret = converter->transform(cast_holder);
  if (ret != 0) {
    cerr << "Failed to transform converter " << ret << endl;
    return IndexSparseHolder::Pointer();
  }

  index_meta = converter->meta();

  return converter->sparse_result();
}

IndexHolder::Pointer convert_holder(const std::string &name,
                                    const ailego::Params &params,
                                    VecsIndexHolder::Pointer &in_holder,
                                    IndexMeta &index_meta) {
  IndexHolder::Pointer cast_holder =
      std::dynamic_pointer_cast<IndexHolder>(in_holder);
  if (name.empty()) {
    return cast_holder;
  }

  IndexConverter::Pointer converter = IndexFactory::CreateConverter(name);
  if (!converter) {
    cerr << "Failed to create converter " << name << endl;
    return IndexHolder::Pointer();
  }

  int ret = converter->init(in_holder->index_meta(), params);
  if (ret != 0) {
    cerr << "Failed to init converter " << ret << endl;
    return IndexHolder::Pointer();
  }

  ret = converter->train(cast_holder);
  if (ret != 0) {
    cerr << "Failed to train converter " << ret << endl;
    return IndexHolder::Pointer();
  }

  ret = converter->transform(cast_holder);
  if (ret != 0) {
    cerr << "Failed to transform converter " << ret << endl;
    return IndexHolder::Pointer();
  }

  index_meta = converter->meta();

  return converter->result();
}

int do_build_sparse(YAML::Node &config_root, YAML::Node &config_common) {
  string build_file = config_common["BuildFile"].as<string>();
  VecsIndexSparseHolder::Pointer build_holder(new VecsIndexSparseHolder);
  if (!build_holder->load(build_file)) {
    cerr << "Load input error: " << build_file << endl;
    return -1;
  }
  IndexMeta meta;
  meta = build_holder->index_meta();

  std::string metric_name;
  ailego::Params metric_params;
  if (config_common["MetricName"] &&
      !config_common["MetricName"].as<string>().empty()) {
    metric_name = config_common["MetricName"].as<string>();
    if (config_root["MetricParams"] &&
        !prepare_params(config_root["MetricParams"], metric_params)) {
      cerr << "Failed to prepare metric params" << endl;
      return -1;
    }
    build_holder->set_metric(metric_name, metric_params);
    meta.set_metric(metric_name, 0, metric_params);
  }

  string converter_name;
  ailego::Params converter_params;
  if (config_common["ConverterName"] &&
      !config_common["ConverterName"].as<string>().empty()) {
    converter_name = config_common["ConverterName"].as<string>();
    if (config_root["ConverterParams"] &&
        !prepare_params(config_root["ConverterParams"], converter_params)) {
      cerr << "Failed to prepare converter params" << endl;
      return -1;
    }
  }

  if (config_common["MaxDocs"] && config_common["MaxDocs"].as<uint32_t>()) {
    auto max_docs = config_common["MaxDocs"].as<uint32_t>();
    build_holder->set_max_doc_count(max_docs);
  }
  if (config_common["KeepDocs"] && config_common["KeepDocs"].as<uint32_t>()) {
    auto keep_docs = config_common["KeepDocs"].as<uint32_t>();
    if (keep_docs < build_holder->count()) {
      build_holder->set_start_cursor(build_holder->count() - keep_docs);
    }
  }

  // Create a Builder
  string builder_class = config_common["BuilderClass"].as<string>();
  IndexStreamer::Pointer streamer;
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder(builder_class.c_str());
  if (!builder) {
    streamer = IndexFactory::CreateStreamer(builder_class.c_str());
  }
  if (!builder && !streamer) {
    cerr << "Failed to create builder " << builder_class << endl;
    return -1;
  }
  cout << "Created builder " << builder_class << endl;

  IndexSparseHolder::Pointer cv_build_holder = convert_sparse_holder(
      converter_name, converter_params, build_holder, meta);
  if (!cv_build_holder) {
    cerr << "Convert holder failed." << endl;
    return -1;
  }

  ailego::Params params;
  if (!prepare_params(config_root["BuilderParams"], params)) {
    cerr << "Failed to prepare params" << endl;
    return -1;
  }

  // INIT
  int ret =
      builder ? builder->init(meta, params) : streamer->init(meta, params);
  if (ret < 0) {
    cerr << "Failed to init builder, ret=" << ret << endl;
    return -1;
  }
  ailego::ElapsedTime timer;

  // TRAIN
  if (builder && config_common["NeedTrain"] &&
      config_common["NeedTrain"].as<bool>()) {
    string train_file = config_common["TrainFile"].as<string>();
    VecsIndexSparseHolder::Pointer train_holder(new VecsIndexSparseHolder);
    if (!train_holder->load(train_file)) {
      cerr << "Load input error: " << train_file << endl;
      return -1;
    }

    if (!metric_name.empty()) {
      train_holder->set_metric(metric_name, metric_params);
    }

    IndexSparseHolder::Pointer cv_train_holder = convert_sparse_holder(
        converter_name, converter_params, train_holder, meta);
    if (!cv_train_holder) {
      cerr << "Convert train holder failed." << endl;
      return -1;
    }

    std::cout << "Prepare train data done!" << std::endl;
    timer.reset();
    ret = builder->train(std::move(cv_train_holder));
    size_t train_time = timer.milli_seconds();

    if (ret < 0) {
      cerr << "Failed to train in builder, ret=" << ret << endl;
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else {
    cout << "Skip train procedure" << endl;
  }

  // BUILD
  sparse_holder = build_holder;
  signal(SIGINT, stop);
  timer.reset();
  if (builder != nullptr) {
    ret = builder->build(std::move(cv_build_holder));
  } else {
    ret = build_sparse_by_streamer(streamer, config_common);
  }
  size_t build_time = timer.milli_seconds();
  if (ret < 0) {
    cerr << "Failed to build in builder, ret=" << ret << endl;
    return -1;
  }
  cout << "Build finished, consume " << build_time << "ms." << endl;
  signal(SIGINT, SIG_DFL);

  // DUMP
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  if (!dumper) {
    cerr << "Failed to create FileDumper." << endl;
    return -1;
  }
  string dump_prefix = config_common["DumpPath"].as<string>();
  ret = dumper->create(dump_prefix);
  if (ret != 0) {
    cerr << "Failed to create in dumper, ret=" << ret << endl;
    return -1;
  }
  timer.reset();
  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);
  size_t dump_time = timer.milli_seconds();
  if (ret == IndexError_NotImplemented) {
    LOG_WARN("Dump index not implemented");
  } else if (ret < 0) {
    cerr << "Failed to dump in builder, ret=" << ret << endl;
    return -1;
  }

  if (build_holder->has_taglist()) {
    size_t taglist_size{0};
    const void *taglist_data = build_holder->get_taglist_data(taglist_size);
    const void *key_base = build_holder->get_key_base();

    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,
                 taglist_size);
  }

  ret = dumper->close();
  if (ret != 0) {
    cerr << "Dumper failed to close, ret=" << ret << endl;
    return -1;
  }
  std::cout << "Dump to [" << dump_prefix << "] finished, consume " << dump_time
            << "ms." << std::endl;

  if (builder) {
    auto &stats =
        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();
    std::cout << "STATS: \n\tTrained count[" << stats.trained_count()
              << "]\n\tBuilt count[" << stats.built_count()
              << "]\n\tDump count[" << stats.dumped_count()
              << "]\n\tDiscarded count[" << stats.discarded_count() << "]\n";
  } else {
    auto &stats = streamer->stats();
    std::cout << "STATS: \n\tTrained count[" << 0 << "]\n\tBuilt count["
              << stats.added_count() << "]\n\tDump size ["
              << stats.dumped_size() << "]\n\tDiscarded count["
              << stats.discarded_count() << "]\n";
  }

  // CLEANUP
  builder ? builder->cleanup() : streamer->cleanup();

  return 0;
}

int do_build(YAML::Node &config_root, YAML::Node &config_common) {
  string build_file = config_common["BuildFile"].as<string>();
  VecsIndexHolder::Pointer build_holder(new VecsIndexHolder);
  if (!build_holder->load(build_file)) {
    cerr << "Load input error: " << build_file << endl;
    return -1;
  }
  IndexMeta meta;
  meta = build_holder->index_meta();

  std::string metric_name;
  ailego::Params metric_params;
  if (config_common["MetricName"] &&
      !config_common["MetricName"].as<string>().empty()) {
    metric_name = config_common["MetricName"].as<string>();
    if (config_root["MetricParams"] &&
        !prepare_params(config_root["MetricParams"], metric_params)) {
      cerr << "Failed to prepare metric params" << endl;
      return -1;
    }
    build_holder->set_metric(metric_name, metric_params);
    meta.set_metric(metric_name, 0, metric_params);
  }
  IndexMeta input_meta = meta;
  string converter_name;
  ailego::Params converter_params;
  if (config_common["ConverterName"] &&
      !config_common["ConverterName"].as<string>().empty()) {
    converter_name = config_common["ConverterName"].as<string>();
    if (config_root["ConverterParams"] &&
        !prepare_params(config_root["ConverterParams"], converter_params)) {
      cerr << "Failed to prepare converter params" << endl;
      return -1;
    }
  }
  IndexMeta::MajorOrder order = IndexMeta::MO_UNDEFINED;
  if (config_common["MajorOrder"]) {
    std::string order_str = config_common["MajorOrder"].as<string>();
    if (order_str == "row") {
      order = IndexMeta::MajorOrder::MO_ROW;
    } else {
      order = IndexMeta::MajorOrder::MO_COLUMN;
    }
  }

  if (config_common["MaxDocs"] && config_common["MaxDocs"].as<uint32_t>()) {
    auto max_docs = config_common["MaxDocs"].as<uint32_t>();
    build_holder->set_max_doc_count(max_docs);
  }
  if (config_common["KeepDocs"] && config_common["KeepDocs"].as<uint32_t>()) {
    auto keep_docs = config_common["KeepDocs"].as<uint32_t>();
    if (keep_docs < build_holder->count()) {
      build_holder->set_start_cursor(build_holder->count() - keep_docs);
    }
  }

  // Create a Builder
  string builder_class = config_common["BuilderClass"].as<string>();
  IndexStreamer::Pointer streamer;
  IndexBuilder::Pointer builder =
      IndexFactory::CreateBuilder(builder_class.c_str());
  if (!builder) {
    streamer = IndexFactory::CreateStreamer(builder_class.c_str());
  }
  if (!builder && !streamer) {
    cerr << "Failed to create builder " << builder_class << endl;
    return -1;
  }
  cout << "Created builder " << builder_class << endl;


  IndexHolder::Pointer cv_build_holder =
      convert_holder(converter_name, converter_params, build_holder, meta);
  if (!cv_build_holder) {
    cerr << "Convert holder failed." << endl;
    return -1;
  }
  meta.set_major_order(order);
  cout << IndexMetaHelper::to_string(meta) << endl;
  cout << "Prepare data done!" << endl;

  ailego::Params params;
  if (!prepare_params(config_root["BuilderParams"], params)) {
    cerr << "Failed to prepare params" << endl;
    return -1;
  }

  // INIT
  int ret =
      builder ? builder->init(meta, params) : streamer->init(meta, params);
  if (ret < 0) {
    cerr << "Failed to init builder, ret=" << ret << endl;
    return -1;
  }
  ailego::ElapsedTime timer;

  // TRAIN
  if (config_common["UseTrainer"] && config_common["UseTrainer"].as<bool>()) {
    ailego::Params trainer_params;
    if (!prepare_params(config_root["TrainerParams"], trainer_params)) {
      cerr << "Failed to prepare trainer params" << endl;
      return -1;
    }

    string train_index_path;
    if (config_common["TrainerIndexPath"]) {
      train_index_path = config_common["TrainerIndexPath"].as<string>();
      if (train_index_path.empty()) {
        cerr << "invalid TrainerIndexPath format" << std::endl;
        return -1;
      }
      cout << "Trainer index path: " << train_index_path << "\n";
    } else {
      cerr << "Need [TrainerIndexPath] config" << std::endl;
      return -1;
    }

    IndexTrainer::Pointer trainer =
        IndexFactory::CreateTrainer("StratifiedClusterTrainer");
    if (trainer->init(meta, trainer_params) != 0) {
      cerr << "trainer init failed" << std::endl;
      return -1;
    }

    if (ailego::File::IsExist(train_index_path)) {
      IndexStorage::Pointer container =
          IndexFactory::CreateStorage("MMapFileReadStorage");
      if (!container) {
        cerr << "Failed to create MMapFileReadStorage" << endl;
        return -1;
      }
      container->init(ailego::Params());
      if (container->open(train_index_path, false) != 0) {
        cerr << "MMapFileReadStorage failed to load "
             << train_index_path.c_str() << endl;
        return -1;
      }
      if (trainer->load(container) != 0) {
        cerr << "Trainer failed to load container" << endl;
        return -1;
      };
    } else {
      std::cout << "Prepare trainer data..." << std::endl;
      string train_file = config_common["TrainFile"].as<string>();
      VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);
      if (!train_holder->load(train_file)) {
        cerr << "Load input error: " << train_file << endl;
        return -1;
      }
      if (!metric_name.empty()) {
        train_holder->set_metric(metric_name, metric_params);
      }

      // support fp16 convert

      IndexHolder::Pointer cv_train_holder =
          convert_holder(converter_name, converter_params, train_holder, meta);
      if (!cv_train_holder) {
        cerr << "Convert train holder failed." << endl;
        return -1;
      }

      std::cout << "Prepare trainer data done!" << std::endl;
      std::cout << "Prepare train data!" << std::endl;

      ret = trainer->train(cv_train_holder);
      if (ret != 0) {
        cerr << "trainer train_index failed with " << ret << std::endl;
        return -1;
      }

      std::cout << "train data done!" << std::endl;
      IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
      if (!dumper) {
        cerr << "Failed to create FileDumper." << endl;
        return -1;
      }
      if (dumper->init(ailego::Params()) != 0) {
        cerr << "Failed to init FileDumper." << endl;
        return -1;
      }
      ret = dumper->create(train_index_path);
      if (ret != 0) {
        cerr << "Failed to create in dumper, ret=" << ret << endl;
        return -1;
      }
      if (trainer->dump(dumper) != 0) {
        cerr << "trainer dump_index failed" << std::endl;
        return -1;
      }
      dumper->close();
    }

    ret = builder->train(trainer);
    size_t train_time = timer.milli_seconds();
    if (ret < 0) {
      cerr << "Failed to train in builder, ret=" << ret << endl;
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else if (builder && config_common["NeedTrain"] &&
             config_common["NeedTrain"].as<bool>()) {
    string train_file = config_common["TrainFile"].as<string>();
    VecsIndexHolder::Pointer train_holder(new VecsIndexHolder);
    if (!train_holder->load(train_file)) {
      cerr << "Load input error: " << train_file << endl;
      return -1;
    }

    if (!metric_name.empty()) {
      train_holder->set_metric(metric_name, metric_params);
    }
    IndexHolder::Pointer cv_train_holder =
        convert_holder(converter_name, converter_params, train_holder, meta);
    if (!cv_train_holder) {
      cerr << "Convert train holder failed." << endl;
      return -1;
    }

    std::cout << "Prepare train data done!" << std::endl;
    timer.reset();
    ret = builder->train(std::move(cv_train_holder));
    size_t train_time = timer.milli_seconds();
    if (ret < 0) {
      cerr << "Failed to train in builder, ret=" << ret << endl;
      return -1;
    }
    cout << "Train finished, consume " << train_time << "ms." << endl;
  } else {
    cout << "Skip train procedure" << endl;
  }

  if (builder_class == "HnswRabitqStreamer") {
    if (setup_hnsw_rabitq_streamer(streamer, input_meta, config_root,
                                   converter_name, &cv_build_holder) != 0) {
      return -1;
    }
  } else if (builder_class == "HnswRabitqBuilder" && !converter_name.empty()) {
    cv_build_holder = convert_holder_to_provider(cv_build_holder);
  }

  // BUILD
  holder = build_holder;
  signal(SIGINT, stop);
  timer.reset();
  if (builder != nullptr) {
    ret = builder->build(std::move(cv_build_holder));
  } else {
    std::string retrieval_mode = "dense";
    if (meta.dimension() > 0) {
      retrieval_mode = "sparse";
    } else {
      retrieval_mode = "dense";
    }

    ret = build_by_streamer(streamer, config_common);
  }
  size_t build_time = timer.milli_seconds();
  if (ret < 0) {
    cerr << "Failed to build in builder, ret=" << ret << endl;
    return -1;
  }
  cout << "Build finished, consume " << build_time << "ms." << endl;
  signal(SIGINT, SIG_DFL);

  // DUMP
  IndexDumper::Pointer dumper = IndexFactory::CreateDumper("FileDumper");
  if (!dumper) {
    cerr << "Failed to create FileDumper." << endl;
    return -1;
  }
  string dump_prefix = config_common["DumpPath"].as<string>();
  ret = dumper->create(dump_prefix);
  if (ret != 0) {
    cerr << "Failed to create in dumper, ret=" << ret << endl;
    return -1;
  }
  timer.reset();
  ret = streamer ? streamer->dump(dumper) : builder->dump(dumper);
  size_t dump_time = timer.milli_seconds();
  if (ret == IndexError_NotImplemented) {
    LOG_WARN("Dump index not implemented");
  } else if (ret < 0) {
    cerr << "Failed to dump in builder, ret=" << ret << endl;
    return -1;
  }

  if (build_holder->has_taglist()) {
    size_t taglist_size{0};
    const void *taglist_data = build_holder->get_taglist_data(taglist_size);
    const void *key_base = build_holder->get_key_base();

    dump_taglist(dumper, build_holder->get_num_vecs(), key_base, taglist_data,
                 taglist_size);
  }

  ret = dumper->close();
  if (ret != 0) {
    cerr << "Dumper failed to close, ret=" << ret << endl;
    return -1;
  }
  std::cout << "Dump to [" << dump_prefix << "] finished, consume " << dump_time
            << "ms." << std::endl;

  if (builder) {
    auto &stats =
        reinterpret_cast<const IndexBuilder *>(builder.get())->stats();
    std::cout << "STATS: \n\tTrained count[" << stats.trained_count()
              << "]\n\tBuilt count[" << stats.built_count()
              << "]\n\tDump count[" << stats.dumped_count()
              << "]\n\tDiscarded count[" << stats.discarded_count() << "]\n";
  } else {
    auto &stats = streamer->stats();
    std::cout << "STATS: \n\tTrained count[" << 0 << "]\n\tBuilt count["
              << stats.added_count() << "]\n\tDump size ["
              << stats.dumped_size() << "]\n\tDiscarded count["
              << stats.discarded_count() << "]\n";
  }

  // CLEANUP
  builder ? builder->cleanup() : streamer->cleanup();

  return 0;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }
  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      cerr << "Failed to load plugin: " << argv[i] << " (" << error << ")"
           << endl;
      return -1;
    }
  }
  YAML::Node config_root;
  try {
    config_root = YAML::LoadFile(argv[1]);
  } catch (...) {
    cerr << "Load YAML file[" << argv[1] << "] failed!" << endl;
    return -1;
  }
  if (!check_config(config_root)) {
    return -1;
  }
  auto config_common = config_root["BuilderCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};

  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";

  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  if (retrieval_mode == RM_SPARSE) {
    return do_build_sparse(config_root, config_common);
  } else {
    return do_build(config_root, config_common);
  }

  return 0;
}


================================================
FILE: tools/core/meta_segment_common.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <zvec/ailego/utility/type_helper.h>

namespace zvec {
namespace core {

const static std::string TAGLIST_HEADER_SEGMENT_NAME("local_taglists_header");
const static std::string TAGLIST_KEY_SEGMENT_NAME("local_taglists_key");
const static std::string TAGLIST_DATA_SEGMENT_NAME("local_taglists_data");

#pragma pack(4)
struct TagListHeader {
  uint64_t num_vecs;
  uint8_t meta_buf[252];
};
#pragma pack()

}  // namespace core
}  // namespace zvec

================================================
FILE: tools/core/recall.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "helper.h"

mutex recall_lock;
bool g_compare_by_id = false;
float g_recall_precision;


//--------------------------------------------------
// Recall
//--------------------------------------------------
enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

enum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };

template <typename T>
class Recall {
 public:
  Recall(size_t threads, const string &output, size_t batch_count,
         FilterMode filter_mode)
      : threads_(threads),
        output_(output),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(true);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, true);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
      call_batch_api_ = false;
    } else {
      call_batch_api_ = true;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  void run_dense(core_interface::Index::Pointer index,
                 core_interface::BaseIndexQueryParam::Pointer query_param,
                 const string &recall_tops, size_t gt_count) {
    StringHelper::Split(recall_tops, ",", &topk_ids_);
    std::sort(topk_ids_.begin(), topk_ids_.end());

    for (auto i : topk_ids_) {
      recall_res_[i] = 0.0f;
    }
    size_t topk = recall_res_.rbegin()->first;

    gt_count = topk < gt_count ? gt_count : topk;

    if (external_gt_file_enabled_) {
      cout << "Internal ground truth file NOT used since external ground truth "
              "file has been loaded"
           << endl;
    } else {
      cout << "Loading internal ground truth file" << endl;

      if (!load_gt_dense(index, gt_count)) {
        LOG_ERROR("Load ground truth file failed!");
        return;
      }
    }

    if (batch_queries_.size() < threads_) {
      threads_ = batch_queries_.size();
      pool_ = make_shared<ThreadPool>(true, threads_);
      cout << "Query size too small, resize thread pool count[" << threads_
           << "]" << endl;
    }

    // Prepare file handler
    vector<pair<fstream *, fstream *>> output_fs;
    if (!output_.empty()) {
      string cmd = "mkdir -p " + output_;
      int ret = system(cmd.c_str());
      if (ret != 0) {
        LOG_ERROR("execute cmd %s failed, ret=%d", cmd.c_str(), ret);
        return;
      }
      struct stat sb;
      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
        cout << "logs output to : " << output_ << endl;
        for (size_t i = 0; i < threads_; ++i) {
          fstream *fs_k = new fstream();
          fs_k->open(output_ + "/t" + to_string(i) + ".knn", ios::out);
          fstream *fs_l = new fstream();
          fs_l->open(output_ + "/t" + to_string(i) + ".linear", ios::out);
          output_fs.push_back(make_pair(fs_k, fs_l));
        }
      }
    }

    signal(SIGINT, stop);
    size_t i = 0;
    for (; !STOP_NOW && i < batch_queries_.size();) {
      if (pool_->pending_count() >= pool_->count()) {
        this_thread::sleep_for(chrono::microseconds(1));
        continue;
      }

      Closure::Pointer task =
          Closure::New(this, &Recall::recall_one_dense, index, query_param,
                       topk, i, output_fs);
      pool_->enqueue_and_wake(task);

      i++;
    }
    pool_->wait_finish();

    for (auto fs : output_fs) {
      fs.first->close();
      fs.second->close();
      delete fs.first;
      delete fs.second;
    }
    cout << "Process query: " << i << endl;
    for (auto it : recall_res_) {
      cout << "Recall@" << it.first << ": "
           << it.second / linear_queries_.size() << endl;
    }
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;

    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,
                           linear_sparse_data_, linear_taglists_)) {
      LOG_ERROR("Load query error");
      return false;
    }

    if (batch_count_ == 1) {
      batch_queries_ = linear_queries_;

      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(linear_sparse_data_[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);
        batch_sparse_features_.push_back(linear_sparse_data_[i].features);
      }

      for (size_t i = 0; i < linear_taglists_.size(); ++i) {
        vector<vector<uint64_t>> new_taglists;
        new_taglists.push_back(linear_taglists_[i]);

        batch_taglists_.push_back(std::move(new_taglists));
      }
    } else {
      size_t num_batch =
          (linear_queries_.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<T> batch_query;
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;
        vector<vector<uint64_t>> batch_taglists;

        for (size_t i = 0; i < batch_count_; ++i) {
          for (size_t k = 0; k < linear_queries_[idx].size(); ++k) {
            batch_query.push_back(linear_queries_[idx][k]);
          }

          batch_sparse_count.push_back(linear_sparse_data_[idx].count);

          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);
          }

          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();
               ++k) {
            batch_sparse_feature.push_back(
                linear_sparse_data_[idx].features[k]);
          }

          idx = (idx + 1) % linear_queries_.size();
        }

        batch_queries_.push_back(batch_query);
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
        batch_taglists_.push_back(batch_taglists);
      }
    }

    dim_ = linear_queries_[0].size();
    total_querys_ = linear_queries_.size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);
    } else {
      LOG_ERROR("unsupported type");
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  bool load_external_gt_file(const std::string &external_gt_file,
                             const std::string &first_sep,
                             const std::string &second_sep) {
    TxtInputReader<T> reader;
    bool ret =
        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);
    if (ret) {
      cout << "Load external ground truth file["
           << File::BaseName(external_gt_file) << "] done!" << endl;
      external_gt_file_enabled_ = true;
    } else {
      LOG_ERROR("Failed to load ground truth file!");
    }

    return ret;
  }

 private:
  std::string compute_crc(size_t gt_count) {
    uint32_t crc = 0u;
    // dense
    if (batch_queries_.size() > 0) {
      size_t one_size = dim_ * sizeof(T);
      size_t data_size = total_querys_ * one_size + sizeof(size_t);
      char *data = new char[data_size];
      size_t q = 0;
      char *p = data;
      for (; q < batch_queries_.size(); ++q) {
        memcpy(p, batch_queries_[q].data(),
               batch_queries_[q].size() * sizeof(T));
        p += batch_queries_[q].size() * sizeof(T);
      }
      memcpy(p, &gt_count, sizeof(size_t));
      crc = Crc32c::Hash(data, data_size, crc);
      delete[] data;
    }

    // sparse
    if (linear_sparse_data_.size() > 0) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),
                           crc);
        crc =
            Crc32c::Hash(linear_sparse_data_[i].indices.data(),
                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);
        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),
                           linear_sparse_data_[i].count * sizeof(T), crc);
      }
    }

    char crc_str[64];
    snprintf(crc_str, sizeof(crc_str), "%X", crc);

    return std::string(crc_str);
  }

  bool load_gt_dense(core_interface::Index::Pointer index, size_t gt_count) {
    std::string crc_str = compute_crc(gt_count);

    string gt_file = string("gt.") + crc_str;

    File gtf;
    if (!gtf.IsRegular(gt_file.c_str())) {
      cout << "Ground truth file[" << gt_file << "] not exist, try to create it"
           << endl;
      ElapsedTime timer;

      size_t size = sizeof(uint64_t) + sizeof(float);
      size_t file_size =
          linear_queries_.size() * (sizeof(int) + size * gt_count);

      std::string gt_file_temp = gt_file + ".tmp";
      gtf.create(gt_file_temp.c_str(), file_size);

      gt_.resize(linear_queries_.size());

      atomic_bool error(false);
      size_t count = 0;
      float s = linear_queries_.size() / 100.0;
      size_t pc = 0;
      SpinMutex spin_lock;

      function<void(size_t)> fun = [&](size_t i) {
        spin_lock.lock();
        count++;
        size_t process = (size_t)ceil(count / s);
        if (process > pc) {
          pc = process;
          stringstream msg;
          msg << "\r" << setw(3) << setfill(' ') << process << "% " << left
              << setfill('=') << setw(process / 2 + 1) << "[" << right
              << setfill(' ') << setw(51 - process / 2) << "]";
          cout << msg.str() << flush;
        }
        spin_lock.unlock();

        auto query = linear_queries_[i];

        FilterResultCache filter_cache;
        std::shared_ptr<IndexFilter> filter_ptr = nullptr;
        if (filter_mode_ == FM_TAG) {
          if (batch_taglists_[i].size() != 1) {
            LOG_ERROR("query tag list not equal to one!");
            return;
          }

          int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[i][0],
                                        tag_key_list_);
          if (ret != 0) {
            LOG_ERROR("prefilter failed, idx: %zu", i);
            return;
          }

          auto filterFunc = [&](uint64_t key) {
            return filter_cache.find(key);
          };

          filter_ptr = std::make_shared<IndexFilter>();
          filter_ptr->set(filterFunc);
        }

        core_interface::DenseVector dense_query;
        dense_query.data = query.data();
        core_interface::VectorData query_data;
        query_data.vector = dense_query;

        auto query_param = std::make_shared<core_interface::FlatQueryParam>();
        query_param->topk = gt_count;
        query_param->is_linear = true;
        query_param->filter = filter_ptr;

        core_interface::SearchResult search_result;
        int ret = index->Search(query_data, query_param, &search_result);
        if (ret < 0) {
          LOG_ERROR("Failed to linear search, ret=%d %s", ret,
                    IndexError::What(ret));
          error.exchange(true);
          return;
        }
        auto &result = search_result.doc_list_;
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (auto knn : result) {
          one_gt.emplace_back(knn.key(), knn.score());
        }
        gt_[i] = one_gt;
      };
      for (size_t i = 0; i < linear_queries_.size(); ++i) {
        if (error) {
          break;
        }
        pool_->enqueue_and_wake(Closure::New(fun, i));
      }
      pool_->wait_finish();

      if (error) {
        cout << endl
             << "Ground truth file[" << gt_file << "] create failed!" << endl;
        gtf.close();
        remove(gt_file.c_str());
        return false;
      }

      for (size_t i = 0; i < gt_.size(); ++i) {
        auto &gt = gt_[i];

        gtf.write(&gt_count, sizeof(int));

        for (size_t j = 0; j < gt.size(); j++) {
          auto &one_gt = gt[j];

          gtf.write(&one_gt.first, sizeof(uint64_t));
          gtf.write(&one_gt.second, sizeof(float));
        }

        // if ground truth is less than gt count, fill it up
        if (gt.size() != gt_count) {
          std::cout
              << "WARN: GT result count less than GT expected count, index: "
              << i << ", expected GT count: " << gt_count
              << ", actual GT count: " << gt.size() << std::endl;

          uint64_t key{-1LLU};
          float score{std::nanf("")};

          for (size_t j = gt.size(); j < gt_count; ++j) {
            gtf.write(&key, sizeof(uint64_t));
            gtf.write(&score, sizeof(float));
          }
        }
      }

      gtf.close();

      if (!File::Rename(gt_file_temp, gt_file)) {
        LOG_ERROR("failed to rename ground truth file, src: %s, dst: %s",
                  gt_file_temp.c_str(), gt_file.c_str());

        return false;
      }

      cout << endl
           << "Ground truth file create successful in "
           << timer.milli_seconds() / 1000 << "s." << endl;
    } else {
      if (!gtf.open(gt_file.c_str(), true)) {
        LOG_ERROR("Failed to open ground truth file[%s]", gt_file.c_str());
        return false;
      }
      size_t file_size = gtf.size();

      constexpr size_t LENGTH = 10240;
      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);

      char *buffer = new char[LENGTH];
      gtf.read(buffer, sizeof(int));

      size_t gt_count_input = (size_t) * (int *)buffer;
      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;

      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {
        LOG_ERROR("Ground truth file[%s] content error!", gt_file.c_str());
        gtf.close();
        return false;
      }

      size_t query_num = file_size / one_query_line_size;
      if (one_query_line_size > LENGTH) {
        delete[] buffer;
        buffer = new char[one_query_line_size];
      }

      for (size_t n = 0; n < query_num; ++n) {
        gtf.read(n * one_query_line_size, buffer, one_query_line_size);
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (size_t i = 0; i < gt_count; ++i) {
          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);
          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +
                                   sizeof(uint64_t));

          if (key != -1LLU) {
            one_gt.emplace_back(key, score);
          }
        }
        gt_.emplace_back(one_gt);
      }
      delete[] buffer;
      cout << "Load ground truth file[" << gt_file << "] done!" << endl;
    }

    return true;
  }


  void recall_one_dense(
      core_interface::Index::Pointer index,
      core_interface::BaseIndexQueryParam::Pointer query_param, size_t topk,
      size_t idx,
      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {
    const auto &query = batch_queries_[idx];

    size_t thread_index = pool_->indexof_this();
    fstream *knn_fs = nullptr;
    fstream *linear_fs = nullptr;
    if (output_fs.size() > thread_index) {
      knn_fs = output_fs[thread_index].first;
      linear_fs = output_fs[thread_index].second;
    }

    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,
                                size_t query_idx) {
      vector<IndexDocument> linear_res;

      size_t result_size = std::min(topk, gt_[query_idx].size());
      if (result_size == 0) {
        return;
      }

      for (size_t i = 0; i < result_size; ++i) {
        auto gt_node = gt_[query_idx][i];

        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);
      }


      if (knn_fs) {
        for (auto knn : knn_res) {
          string str = "query[" + to_string(query_idx) + "]\tkey[" +
                       to_string(knn.key()) + "], dist[" +
                       to_string(knn.score()) + "]\n";
          knn_fs->write(str.c_str(), str.size());
        }
      }
      size_t match = 0;
      bool asc =
          (linear_res.size() > 1 &&
           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))
              ? false
              : true;

      map<int32_t, size_t> topk_matchs;
      if (g_compare_by_id) {
        for (size_t i = 0; i < topk_ids_.size(); ++i) {
          topk_matchs[topk_ids_[i]] = 0;
        }
      }
      for (size_t i = 0, j = 0; i < linear_res.size();) {
        bool m = false;       // if current doc matched in max topk
        bool changed = true;  // if i changed
        if (g_compare_by_id) {
          for (size_t k = 0; k < topk_ids_.size(); ++k) {
            size_t dynamic_size = (size_t)topk_ids_[k];
            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {
              if (fabs(knn_res[dynamic_size - 1].score() -
                       knn_res[dynamic_size].score()) >=
                  numeric_limits<float>::epsilon()) {
                break;
              }
            }
            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {
              if (linear_res[i].key() == knn_res[l].key()) {
                topk_matchs[topk_ids_[k]]++;
                if (k == topk_ids_.size() - 1) {
                  m = true;
                }
                break;
              }
            }
          }
          ++i;
          auto it = recall_res_.find(i);
          if (it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * topk_matchs[i] / i;
          }
        } else {
          size_t cur_topk = i + 1;
          if (j < knn_res.size()) {
            if (fabs(linear_res[i].score() - knn_res[j].score()) <
                g_recall_precision) {
              ++j;
              ++i;
              match++;
              m = true;
            } else {
              if ((asc && linear_res[i].score() < knn_res[j].score()) ||
                  (!asc && linear_res[i].score() > knn_res[j].score())) {
                ++i;
              } else {
                changed = false;
                ++j;
              }
            }
          } else {
            ++i;
          }
          auto it = recall_res_.find(cur_topk);
          if (changed && it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * match / cur_topk;
          }
        }
        if (linear_fs && changed) {
          string str = string(m ? "    HIT" : "NOT HIT") + "  query[" +
                       to_string(idx) + "]\tkey[" +
                       to_string(linear_res[i - 1].key()) + "], dist[" +
                       to_string(linear_res[i - 1].score()) + "]\n";
          linear_fs->write(str.c_str(), str.size());
        }
      }
    };

    // prefilter
    FilterResultCache filter_cache;
    std::shared_ptr<IndexFilter> filter_ptr = nullptr;
    if (filter_mode_ == FM_TAG) {
      if (batch_taglists_[idx].size() != 1) {
        LOG_ERROR("query tag list not equal to one!");
        return;
      }

      int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],
                                    tag_key_list_);
      if (ret != 0) {
        LOG_ERROR("prefilter failed, idx: %zu", idx);
        return;
      }

      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

      filter_ptr = std::make_shared<core::IndexFilter>();
      filter_ptr->set(filterFunc);
    }

    core_interface::DenseVector dense_query;
    dense_query.data = query.data();
    core_interface::VectorData query_data;
    query_data.vector = dense_query;

    // query_param is required in the config, so it should not be nullptr
    auto query_param_clone = query_param->Clone();
    query_param_clone->topk = topk;
    query_param_clone->filter = filter_ptr;
    query_param_clone->is_linear = false;

    if (call_batch_api_) {
      size_t qnum = query.size() / dim_;
      // For batch search, we need to search each query separately
      // since Index::Search doesn't support batch natively in the same way
      for (size_t i = 0; i < qnum; ++i) {
        size_t query_idx = idx * batch_count_ + i;
        if (query_idx >= linear_queries_.size()) {
          break;
        }

        const auto &single_query = linear_queries_[query_idx];
        core_interface::DenseVector single_dense_query;
        single_dense_query.data = single_query.data();
        core_interface::VectorData single_query_data;
        single_query_data.vector = single_dense_query;

        core_interface::SearchResult search_result;
        int ret =
            index->Search(single_query_data, query_param_clone, &search_result);
        if (ret < 0) {
          LOG_ERROR("Failed to knn_search batch, ret=%d %s", ret,
                    IndexError::What(ret));
          return;
        }
        auto &knn_res = search_result.doc_list_;
        cal_recall(knn_res, query_idx);
      }
    } else {
      core_interface::SearchResult search_result;
      int ret = index->Search(query_data, query_param_clone, &search_result);
      if (ret < 0) {
        LOG_ERROR("Failed to knn_search, ret=%d %s", ret,
                  IndexError::What(ret));
        return;
      }
      auto &knn_res = search_result.doc_list_;
      cal_recall(knn_res, idx);
    }

    // std::cout << "id: " << index << ": \n" <<
    // knn_context->flow_context()->searcher_context()->profiler().display();
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  bool call_batch_api_;
  string output_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;

  // for gt
  vector<vector<T>> linear_queries_;
  vector<SparseData<T>> linear_sparse_data_;
  vector<vector<uint64_t>> linear_taglists_;

  // for recall
  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  size_t dim_;
  size_t total_querys_;

  map<size_t, float> recall_res_;
  vector<int32_t> topk_ids_;
  vector<vector<pair<uint64_t, float>>> gt_;

  bool external_gt_file_enabled_{false};

  FilterMode filter_mode_{FM_NONE};

  static bool STOP_NOW;

  // Tag lists for filtering
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;

 public:
  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,
                     const std::vector<uint64_t> &tag_key_list) {
    id_to_tags_list_ = id_to_tags_list;
    tag_key_list_ = tag_key_list;
  }
};

template <typename T>
bool Recall<T>::STOP_NOW = false;

//--------------------------------------------------
// Sparse Recall
//--------------------------------------------------
template <typename T>
class SparseRecall {
 public:
  SparseRecall(size_t threads, const string &output, size_t batch_count,
               FilterMode filter_mode)
      : threads_(threads),
        output_(output),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(true);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, true);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
      call_batch_api_ = false;
    } else {
      call_batch_api_ = true;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  int transform_queries_without_hybrid_scale(
      const vector<vector<T>> &queries,
      const vector<vector<T>> &sparse_features,
      vector<vector<T>> *queries_output,
      vector<vector<T>> *sparse_features_output) {
    if (!queries_output || !sparse_features_output) {
      LOG_ERROR("input should not be empty in transfrom queries");

      return -1;
    }

    queries_output->clear();
    sparse_features_output->clear();

    for (size_t i = 0; i < queries.size(); ++i) {
      vector<T> query_output;
      vector<T> sparse_feature_output;

      transform_query_without_hybrid_scale(queries[i], sparse_features[i],
                                           &query_output,
                                           &sparse_feature_output);

      queries_output->push_back(query_output);
      sparse_features_output->push_back(sparse_feature_output);
    }

    return 0;
  }

  void run_sparse(core_interface::Index::Pointer index,
                  core_interface::BaseIndexQueryParam::Pointer query_param,
                  const string &recall_tops, size_t gt_count) {
    StringHelper::Split(recall_tops, ",", &topk_ids_);
    std::sort(topk_ids_.begin(), topk_ids_.end());

    for (auto i : topk_ids_) {
      recall_res_[i] = 0.0f;
    }
    size_t topk = recall_res_.rbegin()->first;

    gt_count = topk < gt_count ? gt_count : topk;

    if (external_gt_file_enabled_) {
      cout << "Internal ground truth file NOT used since external ground truth "
              "file has been loaded"
           << endl;
    } else {
      cout << "Loading internal ground truth file" << endl;

      if (!load_gt_sparse(index, gt_count)) {
        LOG_ERROR("Load ground truth file failed!");
        return;
      }
    }

    if (batch_sparse_counts_.size() < threads_) {
      threads_ = batch_sparse_counts_.size();
      pool_ = make_shared<ThreadPool>(true, threads_);
      cout << "Query size too small, resize thread pool count[" << threads_
           << "]" << endl;
    }

    // Prepare file handler
    vector<pair<fstream *, fstream *>> output_fs;
    if (!output_.empty()) {
      string cmd = "mkdir -p " + output_;
      int ret = system(cmd.c_str());
      if (ret != 0) {
        LOG_ERROR("execute cmd %s failed, ret=%d", cmd.c_str(), ret);
        return;
      }
      struct stat sb;
      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
        cout << "logs output to : " << output_ << endl;
        for (size_t i = 0; i < threads_; ++i) {
          fstream *fs_k = new fstream();
          fs_k->open(output_ + "/t" + to_string(i) + ".knn", ios::out);
          fstream *fs_l = new fstream();
          fs_l->open(output_ + "/t" + to_string(i) + ".linear", ios::out);
          output_fs.push_back(make_pair(fs_k, fs_l));
        }
      }
    }

    signal(SIGINT, stop);
    size_t i = 0;
    for (; !STOP_NOW && i < batch_sparse_counts_.size();) {
      if (pool_->pending_count() >= pool_->count()) {
        this_thread::sleep_for(chrono::microseconds(1));
        continue;
      }

      Closure::Pointer task =
          Closure::New(this, &SparseRecall::recall_one_sparse, index,
                       query_param, topk, i, output_fs);
      pool_->enqueue_and_wake(task);

      i++;
    }
    pool_->wait_finish();

    for (auto fs : output_fs) {
      fs.first->close();
      fs.second->close();
      delete fs.first;
      delete fs.second;
    }
    cout << "Process query: " << i << endl;
    for (auto it : recall_res_) {
      cout << "Recall@" << it.first << ": "
           << it.second / linear_queries_.size() << endl;
    }
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;

    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,
                           linear_sparse_data_, linear_taglists_)) {
      LOG_ERROR("Load query error");
      return false;
    }

    if (batch_count_ == 1) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(linear_sparse_data_[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);
        batch_sparse_features_.push_back(linear_sparse_data_[i].features);
      }
    } else {
      size_t num_batch =
          (linear_queries_.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;

        for (size_t i = 0; i < batch_count_; ++i) {
          batch_sparse_count.push_back(linear_sparse_data_[idx].count);

          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);
          }

          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();
               ++k) {
            batch_sparse_feature.push_back(
                linear_sparse_data_[idx].features[k]);
          }

          idx = (idx + 1) % linear_queries_.size();
        }
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
      }
    }

    total_querys_ = linear_queries_.size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);
    } else {
      LOG_ERROR("unsupported type");
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  bool load_gt_sparse(core_interface::Index::Pointer index, size_t gt_count) {
    std::string crc_str = compute_crc();

    string gt_file = string("gt.") + crc_str;

    File gtf;
    if (!gtf.IsRegular(gt_file.c_str())) {
      cout << "Ground truth file[" << gt_file << "] not exist, try to create it"
           << endl;
      ElapsedTime timer;
      size_t size = sizeof(uint64_t) + sizeof(float);
      size_t file_size =
          linear_sparse_data_.size() * (sizeof(int) + size * gt_count);

      std::string gt_file_temp = gt_file + ".tmp";
      gtf.create(gt_file_temp.c_str(), file_size);

      gt_.resize(linear_sparse_data_.size());

      atomic_bool error(false);
      size_t count = 0;
      float s = linear_sparse_data_.size() / 100.0;
      size_t pc = 0;
      SpinMutex spin_lock;

      function<void(size_t)> fun = [&](size_t i) {
        spin_lock.lock();
        count++;
        size_t process = (size_t)ceil(count / s);
        if (process > pc) {
          pc = process;
          stringstream msg;
          msg << "\r" << setw(3) << setfill(' ') << process << "% " << left
              << setfill('=') << setw(process / 2 + 1) << "[" << right
              << setfill(' ') << setw(51 - process / 2) << "]";
          cout << msg.str() << flush;
        }
        spin_lock.unlock();

        SparseData<T> sparse_data = linear_sparse_data_[i];

        // prefilter
        FilterResultCache filter_cache;
        std::shared_ptr<IndexFilter> filter_ptr = nullptr;
        if (filter_mode_ == FM_TAG) {
          if (batch_taglists_[i].size() != 1) {
            LOG_ERROR("query tag list not equal to one!");
            return;
          }

          int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[i][0],
                                        tag_key_list_);
          if (ret != 0) {
            LOG_ERROR("prefilter failed, idx: %zu", i);
            return;
          }

          auto filterFunc = [&](uint64_t key) {
            return filter_cache.find(key);
          };

          filter_ptr = std::make_shared<IndexFilter>();
          filter_ptr->set(filterFunc);
        }

        core_interface::SparseVector sparse_query;
        sparse_query.count = sparse_data.count;
        sparse_query.indices = sparse_data.indices.data();
        sparse_query.values = sparse_data.features.data();
        core_interface::VectorData query_data;
        query_data.vector = sparse_query;

        auto query_param = std::make_shared<core_interface::FlatQueryParam>();
        query_param->topk = gt_count;
        query_param->is_linear = true;
        query_param->filter = filter_ptr;

        core_interface::SearchResult search_result;
        int ret = index->Search(query_data, query_param, &search_result);
        if (ret < 0) {
          LOG_ERROR("Failed to sparse linear search, ret=%d", ret);
          error.exchange(true);
          return;
        }
        auto &result = search_result.doc_list_;

        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (auto knn : result) {
          one_gt.emplace_back(knn.key(), knn.score());
        }
        gt_[i] = one_gt;
      };

      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        if (error) {
          break;
        }
        pool_->enqueue_and_wake(Closure::New(fun, i));
      }
      pool_->wait_finish();

      if (error) {
        cout << endl
             << "Ground truth file[" << gt_file << "] create failed!" << endl;
        gtf.close();
        remove(gt_file.c_str());
        return false;
      }

      for (size_t i = 0; i < gt_.size(); ++i) {
        auto &gt = gt_[i];

        gtf.write(&gt_count, sizeof(int));

        for (size_t j = 0; j < gt.size(); j++) {
          auto &one_gt = gt[j];

          gtf.write(&one_gt.first, sizeof(uint64_t));
          gtf.write(&one_gt.second, sizeof(float));
        }

        // if ground truth is less than gt count, fill it up
        if (gt.size() != gt_count) {
          std::cout
              << "WARN: GT result count less than GT expected count, index: "
              << i << ", expected GT count: " << gt_count
              << ", actual GT count: " << gt.size() << std::endl;

          uint64_t key{-1LLU};
          float score{std::nanf("")};

          for (size_t j = gt.size(); j < gt_count; ++j) {
            gtf.write(&key, sizeof(uint64_t));
            gtf.write(&score, sizeof(float));
          }
        }
      }
      gtf.close();

      if (!File::Rename(gt_file_temp, gt_file)) {
        LOG_ERROR("failed to rename ground truth file, src: %s, dst: %s",
                  gt_file_temp.c_str(), gt_file.c_str());

        return false;
      }

      cout << endl
           << "Ground truth file create successful in "
           << timer.milli_seconds() / 1000 << "s." << endl;
    } else {
      if (!gtf.open(gt_file.c_str(), true)) {
        LOG_ERROR("Failed to open ground truth file[%s]", gt_file.c_str());
        return false;
      }
      size_t file_size = gtf.size();

      constexpr size_t LENGTH = 10240;
      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);

      char *buffer = new char[LENGTH];
      gtf.read(buffer, sizeof(int));

      size_t gt_count_input = (size_t) * (int *)buffer;
      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;

      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {
        LOG_ERROR("Ground truth file[%s] content error!", gt_file.c_str());
        gtf.close();
        return false;
      }

      size_t query_num = file_size / one_query_line_size;
      if (one_query_line_size > LENGTH) {
        delete[] buffer;
        buffer = new char[one_query_line_size];
      }

      for (size_t n = 0; n < query_num; ++n) {
        gtf.read(n * one_query_line_size, buffer, one_query_line_size);
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (size_t i = 0; i < gt_count; ++i) {
          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);
          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +
                                   sizeof(uint64_t));

          if (key != -1LLU) {
            one_gt.emplace_back(key, score);
          }
        }

        gt_.emplace_back(one_gt);
      }

      delete[] buffer;
      cout << "Load ground truth file[" << gt_file << "] done!" << endl;
    }

    return true;
  }

  bool load_external_gt_file(const std::string &external_gt_file,
                             const std::string &first_sep,
                             const std::string &second_sep) {
    TxtInputReader<T> reader;
    bool ret =
        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);
    if (ret) {
      cout << "Load external ground truth file["
           << File::BaseName(external_gt_file) << "] done!" << endl;
      external_gt_file_enabled_ = true;
    } else {
      LOG_ERROR("Failed to load ground truth file!");
    }

    return ret;
  }

 private:
  std::string compute_crc() {
    uint32_t crc = 0u;
    // sparse
    if (linear_sparse_data_.size() > 0) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),
                           crc);
        crc =
            Crc32c::Hash(linear_sparse_data_[i].indices.data(),
                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);
        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),
                           linear_sparse_data_[i].count * sizeof(T), crc);
      }
    }

    char crc_str[64];
    snprintf(crc_str, sizeof(crc_str), "%X", crc);

    return std::string(crc_str);
  }


  void recall_one_sparse(
      core_interface::Index::Pointer index,
      core_interface::BaseIndexQueryParam::Pointer query_param, size_t topk,
      size_t idx,
      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {
    const auto &sparse_count = batch_sparse_counts_[idx];
    const auto &sparse_index = batch_sparse_indices_[idx];
    const auto &sparse_feature = batch_sparse_features_[idx];

    size_t thread_index = pool_->indexof_this();
    fstream *knn_fs = nullptr;
    fstream *linear_fs = nullptr;
    if (output_fs.size() > thread_index) {
      knn_fs = output_fs[thread_index].first;
      linear_fs = output_fs[thread_index].second;
    }

    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,
                                size_t query_idx) {
      vector<IndexDocument> linear_res;

      size_t result_size = std::min(topk, gt_[query_idx].size());
      if (result_size == 0) {
        return;
      }

      for (size_t i = 0; i < result_size; ++i) {
        auto gt_node = gt_[query_idx][i];

        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);
      }

      if (knn_fs) {
        for (auto knn : knn_res) {
          string str = "query[" + to_string(query_idx) + "]\tkey[" +
                       to_string(knn.key()) + "], dist[" +
                       to_string(knn.score()) + "]\n";
          knn_fs->write(str.c_str(), str.size());
        }
      }

      size_t match = 0;
      bool asc =
          (linear_res.size() > 1 &&
           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))
              ? false
              : true;

      map<int32_t, size_t> topk_matchs;
      if (g_compare_by_id) {
        for (size_t i = 0; i < topk_ids_.size(); ++i) {
          topk_matchs[topk_ids_[i]] = 0;
        }
      }

      for (size_t i = 0, j = 0; i < linear_res.size();) {
        bool m = false;       // if current doc matched in max topk
        bool changed = true;  // if i changed
        if (g_compare_by_id) {
          for (size_t k = 0; k < topk_ids_.size(); ++k) {
            size_t dynamic_size = (size_t)topk_ids_[k];
            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {
              if (fabs(knn_res[dynamic_size - 1].score() -
                       knn_res[dynamic_size].score()) >=
                  numeric_limits<float>::epsilon()) {
                break;
              }
            }
            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {
              if (linear_res[i].key() == knn_res[l].key()) {
                topk_matchs[topk_ids_[k]]++;
                if (k == topk_ids_.size() - 1) {
                  m = true;
                }
                break;
              }
            }
          }
          ++i;

          auto it = recall_res_.find(i);
          if (it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * topk_matchs[i] / i;
          }
        } else {
          size_t cur_topk = i + 1;
          if (j < knn_res.size()) {
            if (fabs(linear_res[i].score() - knn_res[j].score()) <
                g_recall_precision) {
              ++j;
              ++i;
              match++;
              m = true;
            } else {
              if ((asc && linear_res[i].score() < knn_res[j].score()) ||
                  (!asc && linear_res[i].score() > knn_res[j].score())) {
                ++i;
              } else {
                changed = false;
                ++j;
              }
            }
          } else {
            ++i;
          }

          auto it = recall_res_.find(cur_topk);
          if (changed && it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * match / cur_topk;
          }
        }

        if (linear_fs && changed) {
          string str = string(m ? "    HIT" : "NOT HIT") + "  query[" +
                       to_string(idx) + "]\tkey[" +
                       to_string(linear_res[i - 1].key()) + "], dist[" +
                       to_string(linear_res[i - 1].score()) + "]\n";
          linear_fs->write(str.c_str(), str.size());
        }
      }
    };

    FilterResultCache filter_cache;
    std::shared_ptr<IndexFilter> filter_ptr = nullptr;
    if (filter_mode_ == FM_TAG) {
      if (batch_taglists_[idx].size() != 1) {
        LOG_ERROR("query tag list not equal to one!");
        return;
      }

      int ret = filter_cache.filter(id_to_tags_list_, batch_taglists_[idx][0],
                                    tag_key_list_);
      if (ret != 0) {
        LOG_ERROR("prefilter failed, idx: %zu", idx);
        return;
      }

      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

      filter_ptr = std::make_shared<core::IndexFilter>();
      filter_ptr->set(filterFunc);
    }

    core_interface::SparseVector sparse_query;
    sparse_query.count = sparse_count[0];
    sparse_query.indices = sparse_index.data();
    sparse_query.values = sparse_feature.data();
    core_interface::VectorData query_data;
    query_data.vector = sparse_query;

    auto query_param_clone = query_param->Clone();
    query_param_clone->topk = topk;
    query_param_clone->filter = filter_ptr;
    query_param_clone->is_linear = true;

    if (call_batch_api_) {
      // For batch search, we need to search each query separately
      for (size_t i = 0; i < sparse_count.size(); ++i) {
        size_t query_idx = idx * batch_count_ + i;
        if (query_idx >= linear_sparse_data_.size()) {
          break;
        }

        const auto &single_sparse = linear_sparse_data_[query_idx];
        core_interface::SparseVector single_sparse_query;
        single_sparse_query.count = single_sparse.count;
        single_sparse_query.indices = single_sparse.indices.data();
        single_sparse_query.values = single_sparse.features.data();
        core_interface::VectorData single_query_data;
        single_query_data.vector = single_sparse_query;

        core_interface::SearchResult search_result;
        int ret =
            index->Search(single_query_data, query_param_clone, &search_result);
        if (ret < 0) {
          LOG_ERROR("Failed to sparse_knn_search batch, ret=%d %s", ret,
                    IndexError::What(ret));
          return;
        }
        auto &knn_res = search_result.doc_list_;
        cal_recall(knn_res, query_idx);
      }
    } else {
      core_interface::SearchResult search_result;
      int ret = index->Search(query_data, query_param_clone, &search_result);
      if (ret < 0) {
        LOG_ERROR("Failed to sparse_knn_search, ret=%d %s", ret,
                  IndexError::What(ret));
        return;
      }
      auto &knn_res = search_result.doc_list_;
      cal_recall(knn_res, idx);
    }
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  bool call_batch_api_;
  string output_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;

  // for gt
  vector<vector<T>> linear_queries_;
  vector<SparseData<T>> linear_sparse_data_;
  vector<uint32_t> linear_partitions_;
  vector<vector<uint64_t>> linear_taglists_;

  std::map<std::string, vector<vector<T>>> linear_queries_scaled_;
  std::map<std::string, vector<vector<T>>> linear_sparse_features_scaled_;

  // for recall
  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<uint32_t>> batch_partitions_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  std::map<std::string, vector<vector<T>>> batch_queries_scaled_;
  std::map<std::string, vector<vector<T>>> batch_sparse_features_scaled_;

  size_t total_querys_;

  map<size_t, float> recall_res_;
  vector<int32_t> topk_ids_;
  vector<vector<pair<uint64_t, float>>> gt_;

  map<string, vector<vector<pair<uint64_t, float>>>> gt_hybrid_;
  bool external_gt_file_enabled_{false};

  FilterMode filter_mode_{FM_NONE};

  // Tag lists for filtering
  std::vector<std::vector<uint64_t>> id_to_tags_list_;
  std::vector<uint64_t> tag_key_list_;

 public:
  void set_tag_lists(const std::vector<std::vector<uint64_t>> &id_to_tags_list,
                     const std::vector<uint64_t> &tag_key_list) {
    id_to_tags_list_ = id_to_tags_list;
    tag_key_list_ = tag_key_list;
  }

  static bool STOP_NOW;
};

template <typename T>
bool SparseRecall<T>::STOP_NOW = false;

bool check_config(YAML::Node &config_node) {
  auto common = config_node["IndexCommon"];
  if (!common) {
    LOG_ERROR("Can not find [IndexCommon] in config");
    return false;
  }
  if (!common["IndexConfig"]) {
    LOG_ERROR("Can not find [IndexConfig] in config");
    return false;
  }
  if (!common["IndexPath"]) {
    LOG_ERROR("Can not find [IndexPath] in config");
    return false;
  }
  if (!common["TopK"]) {
    LOG_ERROR("Can not find [TopK] in config");
    return false;
  }
  if (!common["QueryFile"]) {
    LOG_ERROR("Can not find [QueryFile] in config");
    return false;
  }

  auto query_config = config_node["QueryConfig"];
  if (!query_config) {
    LOG_ERROR("Can not find [QueryConfig] in config");
    return false;
  }
  if (!query_config["QueryParam"]) {
    LOG_ERROR("Can not find [QueryConfig.QueryParam] in config");
    return false;
  }
  return true;
}

void usage(void) {
  cout << "Usage: recall CONFIG.yaml [plugin file path]" << endl;
}

int recall_dense(std::string &query_type, size_t thread_count,
                 size_t batch_count, string top_k, size_t gt_count,
                 string query_file, string &first_sep, string &second_sep,
                 string &ground_truth_file, string &ground_truth_first_sep,
                 string ground_truth_second_sep,
                 core_interface::Index::Pointer index,
                 core_interface::BaseIndexQueryParam::Pointer query_param,
                 string &index_dir, string &log_dir, FilterMode filter_mode) {
  std::vector<std::vector<uint64_t>> id_to_tags_list;
  std::vector<uint64_t> tag_key_list;
  // Load tag lists if available
  load_taglists(index_dir, id_to_tags_list, tag_key_list);

  if (query_type == "float") {
    Recall<float> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    recall.set_tag_lists(id_to_tags_list, tag_key_list);

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    recall.run_dense(index, query_param, top_k, gt_count);
  } else if (query_type == "int8") {
    Recall<int8_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    recall.set_tag_lists(id_to_tags_list, tag_key_list);

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    recall.run_dense(index, query_param, top_k, gt_count);
  } else if (query_type == "binary") {
    Recall<uint32_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    recall.set_tag_lists(id_to_tags_list, tag_key_list);

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    recall.run_dense(index, query_param, top_k, gt_count);
  } else if (query_type == "binary64") {
    Recall<uint64_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    recall.set_tag_lists(id_to_tags_list, tag_key_list);

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    recall.run_dense(index, query_param, top_k, gt_count);
  } else {
    LOG_ERROR("Can not recognize type: %s", query_type.c_str());
  }

  return 0;
}

int recall_sparse(std::string &query_type, size_t thread_count,
                  size_t batch_count, string top_k, size_t gt_count,
                  string &query_file, string &first_sep, string &second_sep,
                  string &ground_truth_file, string &ground_truth_first_sep,
                  string &ground_truth_second_sep,
                  core_interface::Index::Pointer index,
                  core_interface::BaseIndexQueryParam::Pointer query_param,
                  string &index_dir, string &log_dir, FilterMode filter_mode) {
  if (query_type == "float") {
    SparseRecall<float> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    std::vector<std::vector<uint64_t>> id_to_tags_list;
    std::vector<uint64_t> tag_key_list;
    // Load tag lists if available
    if (load_taglists(index_dir, id_to_tags_list, tag_key_list) != 0) {
      LOG_ERROR("Failed to load tag lists");
      return -1;
    }

    recall.set_tag_lists(id_to_tags_list, tag_key_list);

    recall.run_sparse(index, query_param, top_k, gt_count);
  } else {
    LOG_ERROR("Can not recognize type: %s", query_type.c_str());
  }

  return 0;
}

int get_recall_precision(string &recall_precision_string) {
  constexpr float DEFAULT_RECALL_PRECISION = 1e-6;

  if (recall_precision_string == "") {
    g_recall_precision = DEFAULT_RECALL_PRECISION;
    return true;
  }

  try {
    g_recall_precision = std::stof(recall_precision_string);
    std::cout << "Recall Score Precesion: " << g_recall_precision << std::endl;
  } catch (const std::invalid_argument &e) {
    LOG_ERROR("Exeception in getting recall precision: %s, value: %s", e.what(),
              recall_precision_string.c_str());
    return -1;
  } catch (const std::out_of_range &e) {
    LOG_ERROR(
        "Out of range exception in getting recall precision: %s, value: %s",
        e.what(), recall_precision_string.c_str());
    return -1;
  }

  return true;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }

  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      LOG_ERROR("Failed to load plugin: %s (%s)", argv[i], error.c_str());
      return -1;
    }
  }

  YAML::Node config_node;
  try {
    config_node = YAML::LoadFile(argv[1]);
  } catch (...) {
    LOG_ERROR("Load YAML file[%s] failed!", argv[1]);
    return -1;
  }
  if (!check_config(config_node)) {
    return -1;
  }
  auto config_common = config_node["IndexCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};
  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";
  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
    zvec::ailego::LoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  // Calculate Recall
  string log_dir = "";
  if (config_common["RecallLogDir"]) {
    log_dir = config_common["RecallLogDir"].as<string>();
  }
  size_t thread_count = config_common["RecallThreadCount"]
                            ? config_common["RecallThreadCount"].as<uint64_t>()
                            : 0;
  size_t gt_count = config_common["RecallGTCount"]
                        ? config_common["RecallGTCount"].as<uint64_t>()
                        : 100;
  size_t batch_count = config_common["RecallBatchCount"]
                           ? config_common["RecallBatchCount"].as<uint64_t>()
                           : 0;
  g_compare_by_id = config_common["CompareById"]
                        ? config_common["CompareById"].as<bool>()
                        : 0;
  string top_k = config_common["TopK"].as<string>();

  string recall_precision_string =
      config_common["RecallScorePrecision"]
          ? config_common["RecallScorePrecision"].as<string>()
          : "";

  if (!get_recall_precision(recall_precision_string)) {
    LOG_ERROR("Get recall precision failed, value: %s",
              recall_precision_string.c_str());
    return -1;
  }

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  FilterMode filter_mode{FM_NONE};
  if (config_common["FilterMode"]) {
    std::string filter_mode_str = config_common["FilterMode"].as<string>();
    if (filter_mode_str == "tag") {
      filter_mode = FM_TAG;
    }
  }

  string query_file = config_common["QueryFile"].as<string>();

  string first_sep = config_common["QueryFirstSep"]
                         ? config_common["QueryFirstSep"].as<string>()
                         : ";";
  string second_sep = config_common["QuerySecondSep"]
                          ? config_common["QuerySecondSep"].as<string>()
                          : " ";
  string query_type = config_common["QueryType"]
                          ? config_common["QueryType"].as<string>()
                          : "float";
  string container_type = config_common["ContainerType"]
                              ? config_common["ContainerType"].as<string>()
                              : "MMapFileStorage";

  string ground_truth_file = "";
  string ground_truth_first_sep = ";";
  string ground_truth_second_sep = " ";

  if (config_common["GroundTruthFile"]) {
    ground_truth_file = config_common["GroundTruthFile"].as<string>();

    if (config_common["GroundTruthFirstSep"]) {
      ground_truth_first_sep =
          config_common["GroundTruthFirstSep"].as<string>();
    }

    if (config_common["GroundTruthSecondSep"]) {
      ground_truth_second_sep =
          config_common["GroundTruthSecondSep"].as<string>();
    }
  }

  string index_dir = config_common["IndexPath"].as<string>();

  core_interface::Index::Pointer index;
  core_interface::BaseIndexQueryParam::Pointer query_param;
  if (parse_and_load_index_param(config_node, index_dir, index, query_param) !=
      0) {
    LOG_ERROR("Failed to parse and load index param");
    return -1;
  }

  if (retrieval_mode == RM_DENSE) {
    recall_dense(query_type, thread_count, batch_count, top_k, gt_count,
                 query_file, first_sep, second_sep, ground_truth_file,
                 ground_truth_first_sep, ground_truth_second_sep, index,
                 query_param, index_dir, log_dir, filter_mode);
  } else if (retrieval_mode == RM_SPARSE) {
    recall_sparse(query_type, thread_count, batch_count, top_k, gt_count,
                  query_file, first_sep, second_sep, ground_truth_file,
                  ground_truth_first_sep, ground_truth_second_sep, index,
                  query_param, index_dir, log_dir, filter_mode);
  } else {
    std::string mode = retrieval_mode == 1 ? "Dense" : "Sparse";
    LOG_ERROR("unsupported retrieval mode: %s", mode.c_str());
    return -1;
  }

  // Cleanup
  index->Close();

  cout << "Recall done." << endl;

  return 0;
}


================================================
FILE: tools/core/recall_original.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sys/stat.h>
#include <signal.h>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ailego/parallel/lock.h>
#include <zvec/ailego/hash/crc32c.h>
#include <zvec/ailego/io/file.h>
#include <zvec/ailego/parallel/thread_pool.h>
#include <zvec/ailego/utility/string_helper.h>
#include <zvec/ailego/utility/time_helper.h>
#include "zvec/core/framework/index_plugin.h"
#include "zvec/core/interface/index_factory.h"
#include "zvec/core/interface/index_param.h"
#include "filter_result_cache.h"
#include "flow.h"
#include "txt_input_reader.h"

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif

#include <yaml-cpp/yaml.h>

#ifdef __clang__
#pragma clang diagnostic pop
#elif defined(__GNUC__) || defined(__GNUG__)
#pragma GCC diagnostic pop
#endif

using namespace std;
using namespace zvec::core;
using namespace zvec::ailego;

using Flow = Flow;
using SparseFlow = SparseFlow;

mutex recall_lock;
bool g_compare_by_id = false;
float g_recall_precision;

//--------------------------------------------------
// Recall
//--------------------------------------------------
enum RetrievalMode { RM_UNDEFINED = 0, RM_DENSE = 1, RM_SPARSE = 2 };

enum FilterMode { FM_UNDEFINED = 0, FM_NONE = 1, FM_TAG = 2 };

template <typename T>
class Recall {
 public:
  Recall(size_t threads, const string &output, size_t batch_count,
         FilterMode filter_mode)
      : threads_(threads),
        output_(output),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(true);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, true);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
      call_batch_api_ = false;
    } else {
      call_batch_api_ = true;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  void run_dense(Flow *flower, const string &recall_tops, size_t gt_count) {
    StringHelper::Split(recall_tops, ",", &topk_ids_);
    std::sort(topk_ids_.begin(), topk_ids_.end());

    for (auto i : topk_ids_) {
      recall_res_[i] = 0.0f;
    }
    size_t topk = recall_res_.rbegin()->first;

    gt_count = topk < gt_count ? gt_count : topk;

    if (external_gt_file_enabled_) {
      cout << "Internal ground truth file NOT used since external ground truth "
              "file has been loaded"
           << endl;
    } else {
      cout << "Loading internal ground truth file" << endl;

      if (!load_gt_dense(flower, gt_count)) {
        cerr << "Load ground truth file failed!" << endl;
        return;
      }
    }

    if (batch_queries_.size() < threads_) {
      threads_ = batch_queries_.size();
      pool_ = make_shared<ThreadPool>(true, threads_);
      cout << "Query size too small, resize thread pool count[" << threads_
           << "]" << endl;
    }

    // Prepare file handler
    vector<pair<fstream *, fstream *>> output_fs;
    if (!output_.empty()) {
      string cmd = "mkdir -p " + output_;
      int ret = system(cmd.c_str());
      if (ret != 0) {
        std::cerr << "execute cmd " << cmd << " failed" << std::endl;
        return;
      }
      struct stat sb;
      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
        cout << "logs output to : " << output_ << endl;
        for (size_t i = 0; i < threads_; ++i) {
          fstream *fs_k = new fstream();
          fs_k->open(output_ + "/t" + to_string(i) + ".knn", ios::out);
          fstream *fs_l = new fstream();
          fs_l->open(output_ + "/t" + to_string(i) + ".linear", ios::out);
          output_fs.push_back(make_pair(fs_k, fs_l));
        }
      }
    }

    signal(SIGINT, stop);
    size_t i = 0;
    for (; !STOP_NOW && i < batch_queries_.size();) {
      if (pool_->pending_count() >= pool_->count()) {
        this_thread::sleep_for(chrono::microseconds(1));
        continue;
      }

      Closure::Pointer task = Closure::New(this, &Recall::recall_one_dense,
                                           flower, topk, i, output_fs);
      pool_->enqueue_and_wake(task);

      i++;
    }
    pool_->wait_finish();

    for (auto fs : output_fs) {
      fs.first->close();
      fs.second->close();
      delete fs.first;
      delete fs.second;
    }
    cout << "Process query: " << i << endl;
    for (auto it : recall_res_) {
      cout << "Recall@" << it.first << ": "
           << it.second / linear_queries_.size() << endl;
    }
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;

    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,
                           linear_sparse_data_, linear_taglists_)) {
      cerr << "Load query error" << endl;
      return false;
    }

    if (batch_count_ == 1) {
      batch_queries_ = linear_queries_;

      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(linear_sparse_data_[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);
        batch_sparse_features_.push_back(linear_sparse_data_[i].features);
      }

      for (size_t i = 0; i < linear_taglists_.size(); ++i) {
        vector<vector<uint64_t>> new_taglists;
        new_taglists.push_back(linear_taglists_[i]);

        batch_taglists_.push_back(std::move(new_taglists));
      }
    } else {
      size_t num_batch =
          (linear_queries_.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<T> batch_query;
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;
        vector<vector<uint64_t>> batch_taglists;

        for (size_t i = 0; i < batch_count_; ++i) {
          for (size_t k = 0; k < linear_queries_[idx].size(); ++k) {
            batch_query.push_back(linear_queries_[idx][k]);
          }

          batch_sparse_count.push_back(linear_sparse_data_[idx].count);

          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);
          }

          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();
               ++k) {
            batch_sparse_feature.push_back(
                linear_sparse_data_[idx].features[k]);
          }

          idx = (idx + 1) % linear_queries_.size();
        }

        batch_queries_.push_back(batch_query);
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
        batch_taglists_.push_back(batch_taglists);
      }
    }

    dim_ = linear_queries_[0].size();
    total_querys_ = linear_queries_.size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_FP32, dim_);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_meta(IndexMeta::DataType::DT_INT8, dim_);
    } else {
      cerr << "unsupported type";
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  bool load_external_gt_file(const std::string &external_gt_file,
                             const std::string &first_sep,
                             const std::string &second_sep) {
    TxtInputReader<T> reader;
    bool ret =
        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);
    if (ret) {
      cout << "Load external ground truth file["
           << File::BaseName(external_gt_file) << "] done!" << endl;
      external_gt_file_enabled_ = true;
    } else {
      cerr << "Failed to load ground truth file!" << endl;
    }

    return ret;
  }

 private:
  std::string compute_crc(size_t gt_count) {
    uint32_t crc = 0u;
    // dense
    if (batch_queries_.size() > 0) {
      size_t one_size = dim_ * sizeof(T);
      size_t data_size = total_querys_ * one_size + sizeof(size_t);
      char *data = new char[data_size];
      size_t q = 0;
      char *p = data;
      for (; q < batch_queries_.size(); ++q) {
        memcpy(p, batch_queries_[q].data(),
               batch_queries_[q].size() * sizeof(T));
        p += batch_queries_[q].size() * sizeof(T);
      }
      memcpy(p, &gt_count, sizeof(size_t));
      crc = Crc32c::Hash(data, data_size, crc);
      delete[] data;
    }

    // sparse
    if (linear_sparse_data_.size() > 0) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),
                           crc);
        crc =
            Crc32c::Hash(linear_sparse_data_[i].indices.data(),
                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);
        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),
                           linear_sparse_data_[i].count * sizeof(T), crc);
      }
    }

    char crc_str[64];
    snprintf(crc_str, sizeof(crc_str), "%X", crc);

    return std::string(crc_str);
  }

  bool load_gt_dense(Flow *flower, size_t gt_count) {
    std::string crc_str = compute_crc(gt_count);

    string gt_file = string("gt.") + crc_str;

    File gtf;
    if (!gtf.IsRegular(gt_file.c_str())) {
      cout << "Ground truth file[" << gt_file << "] not exist, try to create it"
           << endl;
      ElapsedTime timer;

      size_t size = sizeof(uint64_t) + sizeof(float);
      size_t file_size =
          linear_queries_.size() * (sizeof(int) + size * gt_count);

      std::string gt_file_temp = gt_file + ".tmp";
      gtf.create(gt_file_temp.c_str(), file_size);

      gt_.resize(linear_queries_.size());

      atomic_bool error(false);
      size_t count = 0;
      float s = linear_queries_.size() / 100.0;
      size_t pc = 0;
      SpinMutex spin_lock;

      function<void(size_t)> fun = [&](size_t i) {
        spin_lock.lock();
        count++;
        size_t process = (size_t)ceil(count / s);
        if (process > pc) {
          pc = process;
          stringstream msg;
          msg << "\r" << setw(3) << setfill(' ') << process << "% " << left
              << setfill('=') << setw(process / 2 + 1) << "[" << right
              << setfill(' ') << setw(51 - process / 2) << "]";
          cout << msg.str() << flush;
        }
        spin_lock.unlock();

        auto query = linear_queries_[i];
        Flow::Context::Pointer context = flower->create_context();
        if (!context) {
          cerr << "Failed to create search context" << endl;
          return;
        }

        FilterResultCache filter_cache;
        if (filter_mode_ == FM_TAG) {
          if (batch_taglists_[i].size() != 1) {
            cerr << "query tag list not equal to one!" << endl;
            return;
          }

          int ret = filter_cache.filter(flower->id_to_tags_list(),
                                        batch_taglists_[i][0],
                                        flower->tag_key_list());
          if (ret != 0) {
            cerr << "prefilter failed, idx: " << i << std::endl;

            return;
          }

          auto filterFunc = [&](uint64_t key) {
            return filter_cache.find(key);
          };

          context->set_filter(filterFunc);
        }

        context->set_topk(gt_count);
        int ret = do_linear_search<T>(flower, context, query);
        if (ret < 0) {
          cerr << "Failed to linear search, ret=" << ret << endl;
          error.exchange(true);
          return;
        }
        auto result = context->result();
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (auto knn : result) {
          one_gt.emplace_back(knn.key(), knn.score());
        }
        gt_[i] = one_gt;
      };
      for (size_t i = 0; i < linear_queries_.size(); ++i) {
        if (error) {
          break;
        }
        pool_->enqueue_and_wake(Closure::New(fun, i));
      }
      pool_->wait_finish();

      if (error) {
        cout << endl
             << "Ground truth file[" << gt_file << "] create failed!" << endl;
        gtf.close();
        remove(gt_file.c_str());
        return false;
      }

      for (size_t i = 0; i < gt_.size(); ++i) {
        auto &gt = gt_[i];

        gtf.write(&gt_count, sizeof(int));

        for (size_t j = 0; j < gt.size(); j++) {
          auto &one_gt = gt[j];

          gtf.write(&one_gt.first, sizeof(uint64_t));
          gtf.write(&one_gt.second, sizeof(float));
        }

        // if ground truth is less than gt count, fill it up
        if (gt.size() != gt_count) {
          std::cout
              << "WARN: GT result count less than GT expected count, index: "
              << i << ", expected GT count: " << gt_count
              << ", actual GT count: " << gt.size() << std::endl;

          uint64_t key{-1LLU};
          float score{std::nanf("")};

          for (size_t j = gt.size(); j < gt_count; ++j) {
            gtf.write(&key, sizeof(uint64_t));
            gtf.write(&score, sizeof(float));
          }
        }
      }

      gtf.close();

      if (!File::Rename(gt_file_temp, gt_file)) {
        cerr << "failed to rename ground truth file, src: " << gt_file_temp
             << ", dst: " << gt_file << endl;

        return false;
      }

      cout << endl
           << "Ground truth file create successful in "
           << timer.milli_seconds() / 1000 << "s." << endl;
    } else {
      if (!gtf.open(gt_file.c_str(), true)) {
        cerr << "Failed to open ground truth file[" << gt_file << "]" << endl;
        return false;
      }
      size_t file_size = gtf.size();

      constexpr size_t LENGTH = 10240;
      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);

      char *buffer = new char[LENGTH];
      gtf.read(buffer, sizeof(int));

      size_t gt_count_input = (size_t) * (int *)buffer;
      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;

      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {
        cerr << "Ground truth file[" << gt_file << "] content error!" << endl;
        gtf.close();
        return false;
      }

      size_t query_num = file_size / one_query_line_size;
      if (one_query_line_size > LENGTH) {
        delete[] buffer;
        buffer = new char[one_query_line_size];
      }

      for (size_t n = 0; n < query_num; ++n) {
        gtf.read(n * one_query_line_size, buffer, one_query_line_size);
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (size_t i = 0; i < gt_count; ++i) {
          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);
          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +
                                   sizeof(uint64_t));

          if (key != -1LLU) {
            one_gt.emplace_back(key, score);
          }
        }
        gt_.emplace_back(one_gt);
      }
      delete[] buffer;
      cout << "Load ground truth file[" << gt_file << "] done!" << endl;
    }

    return true;
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,
    // query.size() / count * sizeof(float), count);
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,
    // query.size() / count, count);
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() / count * sizeof(uint32_t), count);
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query, size_t count) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() / count * sizeof(uint32_t), count);
    return flower->search_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,
    // query.size() * sizeof(float), 1);
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,
    // query.size() , 1);
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() * sizeof(uint32_t), 1);
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_knn_search(Flow *flower, Flow::Context::Pointer &context,
                const vector<U> &query) {
    // Do knn_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() * sizeof(uint32_t), 1);
    return flower->search_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,
    // query.size() * sizeof(float), 1);
    return flower->search_bf_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,
    // query.size() , 1);
    return flower->search_bf_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() * sizeof(uint32_t), 1);
    return flower->search_bf_impl(query.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() * sizeof(uint32_t), 1);
    return flower->search_bf_impl(query.data(), qmeta_, context);
  }


  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query, size_t count) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_FP32,
    // query.size() / count * sizeof(float), count);
    return flower->search_bf_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<int8_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query, size_t count) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_INT8,
    // query.size()  / count, count);
    return flower->search_bf_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint32_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query, size_t count) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() / count * sizeof(uint32_t), count);
    return flower->search_bf_impl(query.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<uint64_t, U>::value, int>::type
  do_linear_search(Flow *flower, Flow::Context::Pointer &context,
                   const vector<U> &query, size_t count) {
    // Do linear_search
    // IndexQueryMeta qmeta(IndexMeta::DataType::DT_BINARY32,
    // query.size() / count * sizeof(uint32_t), count);
    return flower->search_bf_impl(query.data(), qmeta_, count, context);
  }

  void recall_one_dense(
      Flow *flower, size_t topk, size_t index,
      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {
    const auto &query = batch_queries_[index];

    size_t thread_index = pool_->indexof_this();
    fstream *knn_fs = nullptr;
    fstream *linear_fs = nullptr;
    if (output_fs.size() > thread_index) {
      knn_fs = output_fs[thread_index].first;
      linear_fs = output_fs[thread_index].second;
    }

    Flow::Context::Pointer knn_context = flower->create_context();
    if (!knn_context) {
      cerr << "Failed to create search context" << endl;
      return;
    }
    knn_context->set_topk(topk);

    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,
                                size_t idx) {
      vector<IndexDocument> linear_res;

      size_t result_size = std::min(topk, gt_[idx].size());
      if (result_size == 0) {
        return;
      }

      for (size_t i = 0; i < result_size; ++i) {
        auto gt_node = gt_[idx][i];

        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);
      }


      if (knn_fs) {
        for (auto knn : knn_res) {
          string str = "query[" + to_string(idx) + "]\tkey[" +
                       to_string(knn.key()) + "], dist[" +
                       to_string(knn.score()) + "]\n";
          knn_fs->write(str.c_str(), str.size());
        }
      }
      size_t match = 0;
      bool asc =
          (linear_res.size() > 1 &&
           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))
              ? false
              : true;

      map<int32_t, size_t> topk_matchs;
      if (g_compare_by_id) {
        for (size_t i = 0; i < topk_ids_.size(); ++i) {
          topk_matchs[topk_ids_[i]] = 0;
        }
      }
      for (size_t i = 0, j = 0; i < linear_res.size();) {
        bool m = false;       // if current doc matched in max topk
        bool changed = true;  // if i changed
        if (g_compare_by_id) {
          for (size_t k = 0; k < topk_ids_.size(); ++k) {
            size_t dynamic_size = (size_t)topk_ids_[k];
            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {
              if (fabs(knn_res[dynamic_size - 1].score() -
                       knn_res[dynamic_size].score()) >=
                  numeric_limits<float>::epsilon()) {
                break;
              }
            }
            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {
              if (linear_res[i].key() == knn_res[l].key()) {
                topk_matchs[topk_ids_[k]]++;
                if (k == topk_ids_.size() - 1) {
                  m = true;
                }
                break;
              }
            }
          }
          ++i;
          auto it = recall_res_.find(i);
          if (it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * topk_matchs[i] / i;
          }
        } else {
          size_t cur_topk = i + 1;
          if (j < knn_res.size()) {
            if (fabs(linear_res[i].score() - knn_res[j].score()) <
                g_recall_precision) {
              ++j;
              ++i;
              match++;
              m = true;
            } else {
              if ((asc && linear_res[i].score() < knn_res[j].score()) ||
                  (!asc && linear_res[i].score() > knn_res[j].score())) {
                ++i;
              } else {
                changed = false;
                ++j;
              }
            }
          } else {
            ++i;
          }
          auto it = recall_res_.find(cur_topk);
          if (changed && it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * match / cur_topk;
          }
        }
        if (linear_fs && changed) {
          string str = string(m ? "    HIT" : "NOT HIT") + "  query[" +
                       to_string(idx) + "]\tkey[" +
                       to_string(linear_res[i - 1].key()) + "], dist[" +
                       to_string(linear_res[i - 1].score()) + "]\n";
          linear_fs->write(str.c_str(), str.size());
        }
      }
    };

    // prefilter
    FilterResultCache filter_cache;
    if (filter_mode_ == FM_TAG) {
      if (batch_taglists_[index].size() != 1) {
        cerr << "query tag list not equal to one!" << endl;
        return;
      }

      int ret = filter_cache.filter(flower->id_to_tags_list(),
                                    batch_taglists_[index][0],
                                    flower->tag_key_list());
      if (ret != 0) {
        cerr << "prefilter failed, idx: " << index << std::endl;

        return;
      }

      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

      knn_context->set_filter(filterFunc);
    }

    if (call_batch_api_) {
      size_t qnum = query.size() / dim_;
      int ret = do_knn_search<T>(flower, knn_context, query, qnum);
      if (ret < 0) {
        cerr << "Failed to knn_search batch, ret=" << ret << " "
             << IndexError::What(ret) << endl;
        return;
      }
      for (size_t i = 0; i < qnum; ++i) {
        size_t idx = index * batch_count_ + i;
        if (idx >= linear_queries_.size()) {
          break;
        }

        auto &knn_res = knn_context->result(i);
        cal_recall(knn_res, idx);
      }
    } else {
      int ret = do_knn_search<T>(flower, knn_context, query);
      if (ret < 0) {
        cerr << "Failed to knn_search, ret=" << ret << " "
             << IndexError::What(ret) << endl;
        return;
      }
      auto &knn_res = knn_context->result();
      cal_recall(knn_res, index);
    }

    // std::cout << "id: " << index << ": \n" <<
    // knn_context->flow_context()->searcher_context()->profiler().display();
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  bool call_batch_api_;
  string output_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;

  // for gt
  vector<vector<T>> linear_queries_;
  vector<SparseData<T>> linear_sparse_data_;
  vector<vector<uint64_t>> linear_taglists_;

  // for recall
  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  size_t dim_;
  size_t total_querys_;

  map<size_t, float> recall_res_;
  vector<int32_t> topk_ids_;
  vector<vector<pair<uint64_t, float>>> gt_;

  bool external_gt_file_enabled_{false};

  FilterMode filter_mode_{FM_NONE};

  static bool STOP_NOW;
};

template <typename T>
bool Recall<T>::STOP_NOW = false;

//--------------------------------------------------
// Sparse Recall
//--------------------------------------------------
template <typename T>
class SparseRecall {
 public:
  SparseRecall(size_t threads, const string &output, size_t batch_count,
               FilterMode filter_mode)
      : threads_(threads),
        output_(output),
        batch_count_(batch_count),
        filter_mode_{filter_mode} {
    if (threads_ == 0) {
      pool_ = make_shared<ThreadPool>(true);
      threads_ = pool_->count();
      cout << "Using cpu count as thread pool count[" << threads_ << "]"
           << endl;
    } else {
      pool_ = make_shared<ThreadPool>(threads_, true);
      cout << "Using thread pool count[" << threads_ << "]" << endl;
    }
    if (batch_count_ < 1) {
      batch_count_ = 1;
      call_batch_api_ = false;
    } else {
      call_batch_api_ = true;
    }
  }

  static void stop(int signo) {
    if (STOP_NOW) {
      exit(signo);
    }
    STOP_NOW = true;
    cout << "\rTrying to stop. press [Ctrl+C] again kill immediately." << endl
         << flush;
  }

  int transform_queries_without_hybrid_scale(
      const vector<vector<T>> &queries,
      const vector<vector<T>> &sparse_features,
      vector<vector<T>> *queries_output,
      vector<vector<T>> *sparse_features_output) {
    if (!queries_output || !sparse_features_output) {
      std::cerr << "input should not be empty in transfrom queries"
                << std::endl;

      return -1;
    }

    queries_output->clear();
    sparse_features_output->clear();

    for (size_t i = 0; i < queries.size(); ++i) {
      vector<T> query_output;
      vector<T> sparse_feature_output;

      transform_query_without_hybrid_scale(queries[i], sparse_features[i],
                                           &query_output,
                                           &sparse_feature_output);

      queries_output->push_back(query_output);
      sparse_features_output->push_back(sparse_feature_output);
    }

    return 0;
  }

  void run_sparse(SparseFlow *flower, const string &recall_tops,
                  size_t gt_count) {
    StringHelper::Split(recall_tops, ",", &topk_ids_);
    std::sort(topk_ids_.begin(), topk_ids_.end());

    for (auto i : topk_ids_) {
      recall_res_[i] = 0.0f;
    }
    size_t topk = recall_res_.rbegin()->first;

    gt_count = topk < gt_count ? gt_count : topk;

    if (external_gt_file_enabled_) {
      cout << "Internal ground truth file NOT used since external ground truth "
              "file has been loaded"
           << endl;
    } else {
      cout << "Loading internal ground truth file" << endl;

      if (!load_gt_sparse(flower, gt_count)) {
        cerr << "Load ground truth file failed!" << endl;
        return;
      }
    }

    if (batch_sparse_counts_.size() < threads_) {
      threads_ = batch_sparse_counts_.size();
      pool_ = make_shared<ThreadPool>(true, threads_);
      cout << "Query size too small, resize thread pool count[" << threads_
           << "]" << endl;
    }

    // Prepare file handler
    vector<pair<fstream *, fstream *>> output_fs;
    if (!output_.empty()) {
      string cmd = "mkdir -p " + output_;
      int ret = system(cmd.c_str());
      if (ret != 0) {
        std::cerr << "execute cmd " << cmd << " failed" << std::endl;
        return;
      }
      struct stat sb;
      if (stat(output_.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
        cout << "logs output to : " << output_ << endl;
        for (size_t i = 0; i < threads_; ++i) {
          fstream *fs_k = new fstream();
          fs_k->open(output_ + "/t" + to_string(i) + ".knn", ios::out);
          fstream *fs_l = new fstream();
          fs_l->open(output_ + "/t" + to_string(i) + ".linear", ios::out);
          output_fs.push_back(make_pair(fs_k, fs_l));
        }
      }
    }

    signal(SIGINT, stop);
    size_t i = 0;
    for (; !STOP_NOW && i < batch_sparse_counts_.size();) {
      if (pool_->pending_count() >= pool_->count()) {
        this_thread::sleep_for(chrono::microseconds(1));
        continue;
      }

      Closure::Pointer task = Closure::New(
          this, &SparseRecall::recall_one_sparse, flower, topk, i, output_fs);
      pool_->enqueue_and_wake(task);

      i++;
    }
    pool_->wait_finish();

    for (auto fs : output_fs) {
      fs.first->close();
      fs.second->close();
      delete fs.first;
      delete fs.second;
    }
    cout << "Process query: " << i << endl;
    for (auto it : recall_res_) {
      cout << "Recall@" << it.first << ": "
           << it.second / linear_queries_.size() << endl;
    }
  }

  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep) {
    TxtInputReader<T> reader;

    if (!reader.load_query(query_file, first_sep, second_sep, linear_queries_,
                           linear_sparse_data_, linear_taglists_)) {
      cerr << "Load query error" << endl;
      return false;
    }

    if (batch_count_ == 1) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        vector<uint32_t> sparse_count;
        sparse_count.push_back(linear_sparse_data_[i].count);

        batch_sparse_counts_.push_back(sparse_count);
        batch_sparse_indices_.push_back(linear_sparse_data_[i].indices);
        batch_sparse_features_.push_back(linear_sparse_data_[i].features);
      }
    } else {
      size_t num_batch =
          (linear_queries_.size() + batch_count_ - 1) / batch_count_;
      size_t idx = 0;
      for (size_t n = 0; n < num_batch; ++n) {
        vector<uint32_t> batch_sparse_count;
        vector<uint32_t> batch_sparse_indices;
        vector<T> batch_sparse_feature;

        for (size_t i = 0; i < batch_count_; ++i) {
          batch_sparse_count.push_back(linear_sparse_data_[idx].count);

          for (size_t k = 0; k < linear_sparse_data_[idx].indices.size(); ++k) {
            batch_sparse_indices.push_back(linear_sparse_data_[idx].indices[k]);
          }

          for (size_t k = 0; k < linear_sparse_data_[idx].features.size();
               ++k) {
            batch_sparse_feature.push_back(
                linear_sparse_data_[idx].features[k]);
          }

          idx = (idx + 1) % linear_queries_.size();
        }
        batch_sparse_counts_.push_back(batch_sparse_count);
        batch_sparse_indices_.push_back(batch_sparse_indices);
        batch_sparse_features_.push_back(batch_sparse_feature);
      }
    }

    total_querys_ = linear_queries_.size();
    if (typeid(T) == typeid(float)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_FP32);
    } else if (typeid(T) == typeid(int8_t)) {
      qmeta_.set_data_type(IndexMeta::DataType::DT_INT8);
    } else {
      cerr << "unsupported type";
      return false;
    }

    cout << "Load query done!" << endl;
    return true;
  }

  bool load_gt_sparse(SparseFlow *flower, size_t gt_count) {
    std::string crc_str = compute_crc();

    string gt_file = string("gt.") + crc_str;

    File gtf;
    if (!gtf.IsRegular(gt_file.c_str())) {
      cout << "Ground truth file[" << gt_file << "] not exist, try to create it"
           << endl;
      ElapsedTime timer;
      size_t size = sizeof(uint64_t) + sizeof(float);
      size_t file_size =
          linear_sparse_data_.size() * (sizeof(int) + size * gt_count);

      std::string gt_file_temp = gt_file + ".tmp";
      gtf.create(gt_file_temp.c_str(), file_size);

      gt_.resize(linear_sparse_data_.size());

      atomic_bool error(false);
      size_t count = 0;
      float s = linear_sparse_data_.size() / 100.0;
      size_t pc = 0;
      SpinMutex spin_lock;

      function<void(size_t)> fun = [&](size_t i) {
        spin_lock.lock();
        count++;
        size_t process = (size_t)ceil(count / s);
        if (process > pc) {
          pc = process;
          stringstream msg;
          msg << "\r" << setw(3) << setfill(' ') << process << "% " << left
              << setfill('=') << setw(process / 2 + 1) << "[" << right
              << setfill(' ') << setw(51 - process / 2) << "]";
          cout << msg.str() << flush;
        }
        spin_lock.unlock();

        SparseFlow::Context::Pointer context = flower->create_context();
        if (!context) {
          cerr << "Failed to create search context" << endl;
          return;
        }

        context->set_topk(gt_count);
        SparseData<T> sparse_data = linear_sparse_data_[i];

        // prefilter
        FilterResultCache filter_cache;
        if (filter_mode_ == FM_TAG) {
          if (batch_taglists_[i].size() != 1) {
            cerr << "query tag list not equal to one!" << endl;
            return;
          }

          int ret = filter_cache.filter(flower->id_to_tags_list(),
                                        batch_taglists_[i][0],
                                        flower->tag_key_list());
          if (ret != 0) {
            cerr << "prefilter failed, idx: " << i << std::endl;

            return;
          }

          auto filterFunc = [&](uint64_t key) {
            return filter_cache.find(key);
          };

          context->set_filter(filterFunc);
        }

        int ret =
            do_linear_search<T>(flower, context, sparse_data.count,
                                sparse_data.indices, sparse_data.features);
        if (ret < 0) {
          cerr << "Failed to sparse linear search, ret=" << ret << endl;
          error.exchange(true);
          return;
        }
        auto result = context->result();

        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (auto knn : result) {
          one_gt.emplace_back(knn.key(), knn.score());
        }
        gt_[i] = one_gt;
      };

      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        if (error) {
          break;
        }
        pool_->enqueue_and_wake(Closure::New(fun, i));
      }
      pool_->wait_finish();

      if (error) {
        cout << endl
             << "Ground truth file[" << gt_file << "] create failed!" << endl;
        gtf.close();
        remove(gt_file.c_str());
        return false;
      }

      for (size_t i = 0; i < gt_.size(); ++i) {
        auto &gt = gt_[i];

        gtf.write(&gt_count, sizeof(int));

        for (size_t j = 0; j < gt.size(); j++) {
          auto &one_gt = gt[j];

          gtf.write(&one_gt.first, sizeof(uint64_t));
          gtf.write(&one_gt.second, sizeof(float));
        }

        // if ground truth is less than gt count, fill it up
        if (gt.size() != gt_count) {
          std::cout
              << "WARN: GT result count less than GT expected count, index: "
              << i << ", expected GT count: " << gt_count
              << ", actual GT count: " << gt.size() << std::endl;

          uint64_t key{-1LLU};
          float score{std::nanf("")};

          for (size_t j = gt.size(); j < gt_count; ++j) {
            gtf.write(&key, sizeof(uint64_t));
            gtf.write(&score, sizeof(float));
          }
        }
      }
      gtf.close();

      if (!File::Rename(gt_file_temp, gt_file)) {
        cerr << "failed to rename ground truth file, src: " << gt_file_temp
             << ", dst: " << gt_file << endl;

        return false;
      }

      cout << endl
           << "Ground truth file create successful in "
           << timer.milli_seconds() / 1000 << "s." << endl;
    } else {
      if (!gtf.open(gt_file.c_str(), true)) {
        cerr << "Failed to open ground truth file[" << gt_file << "]" << endl;
        return false;
      }
      size_t file_size = gtf.size();

      constexpr size_t LENGTH = 10240;
      constexpr size_t GT_PAIR_SIZE = sizeof(uint64_t) + sizeof(float);

      char *buffer = new char[LENGTH];
      gtf.read(buffer, sizeof(int));

      size_t gt_count_input = (size_t) * (int *)buffer;
      size_t one_query_line_size = sizeof(int) + GT_PAIR_SIZE * gt_count_input;

      if (gt_count != gt_count_input || file_size % one_query_line_size != 0) {
        cerr << "Ground truth file[" << gt_file << "] content error!" << endl;
        gtf.close();
        return false;
      }

      size_t query_num = file_size / one_query_line_size;
      if (one_query_line_size > LENGTH) {
        delete[] buffer;
        buffer = new char[one_query_line_size];
      }

      for (size_t n = 0; n < query_num; ++n) {
        gtf.read(n * one_query_line_size, buffer, one_query_line_size);
        vector<pair<uint64_t, float>> one_gt;
        one_gt.reserve(gt_count);

        for (size_t i = 0; i < gt_count; ++i) {
          uint64_t key = *(uint64_t *)(buffer + sizeof(int) + GT_PAIR_SIZE * i);
          float score = *(float *)(buffer + sizeof(int) + GT_PAIR_SIZE * i +
                                   sizeof(uint64_t));

          if (key != -1LLU) {
            one_gt.emplace_back(key, score);
          }
        }

        gt_.emplace_back(one_gt);
      }

      delete[] buffer;
      cout << "Load ground truth file[" << gt_file << "] done!" << endl;
    }

    return true;
  }

  bool load_external_gt_file(const std::string &external_gt_file,
                             const std::string &first_sep,
                             const std::string &second_sep) {
    TxtInputReader<T> reader;
    bool ret =
        reader.load_external_gt(external_gt_file, first_sep, second_sep, gt_);
    if (ret) {
      cout << "Load external ground truth file["
           << File::BaseName(external_gt_file) << "] done!" << endl;
      external_gt_file_enabled_ = true;
    } else {
      cerr << "Failed to load ground truth file!" << endl;
    }

    return ret;
  }

 private:
  std::string compute_crc() {
    uint32_t crc = 0u;
    // sparse
    if (linear_sparse_data_.size() > 0) {
      for (size_t i = 0; i < linear_sparse_data_.size(); ++i) {
        crc = Crc32c::Hash(&(linear_sparse_data_[i].count), sizeof(uint32_t),
                           crc);
        crc =
            Crc32c::Hash(linear_sparse_data_[i].indices.data(),
                         linear_sparse_data_[i].count * sizeof(uint32_t), crc);
        crc = Crc32c::Hash(linear_sparse_data_[i].features.data(),
                           linear_sparse_data_[i].count * sizeof(T), crc);
      }
    }

    char crc_str[64];
    snprintf(crc_str, sizeof(crc_str), "%X", crc);

    return std::string(crc_str);
  }

  // sparse search
  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                const vector<uint32_t> &sparse_count,
                const vector<uint32_t> &sparse_indices,
                const vector<U> &sparse_feature, size_t count) {
    return flower->search_impl(sparse_count.data(), sparse_indices.data(),
                               sparse_feature.data(), qmeta_, count, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_knn_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                const uint32_t sparse_count,
                const vector<uint32_t> &sparse_indices,
                const vector<U> &sparse_feature) {
    return flower->search_impl(sparse_count, sparse_indices.data(),
                               sparse_feature.data(), qmeta_, context);
  }

  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_linear_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                   const vector<uint32_t> &sparse_count,
                   const vector<uint32_t> &sparse_indices,
                   const vector<U> &sparse_feature, size_t count) {
    return flower->search_bf_impl(sparse_count.data(), sparse_indices.data(),
                                  sparse_feature.data(), qmeta_, count,
                                  context);
  }


  template <typename U>
  typename std::enable_if<std::is_same<float, U>::value, int>::type
  do_linear_search(SparseFlow *flower, SparseFlow::Context::Pointer &context,
                   const uint32_t sparse_count,
                   const vector<uint32_t> &sparse_indices,
                   const vector<U> &sparse_feature) {
    return flower->search_bf_impl(sparse_count, sparse_indices.data(),
                                  sparse_feature.data(), qmeta_, context);
  }

  void recall_one_sparse(
      SparseFlow *flower, size_t topk, size_t index,
      std::vector<pair<std::fstream *, std::fstream *>> &output_fs) {
    const auto &sparse_count = batch_sparse_counts_[index];
    const auto &sparse_index = batch_sparse_indices_[index];
    const auto &sparse_feature = batch_sparse_features_[index];

    size_t thread_index = pool_->indexof_this();
    fstream *knn_fs = nullptr;
    fstream *linear_fs = nullptr;
    if (output_fs.size() > thread_index) {
      knn_fs = output_fs[thread_index].first;
      linear_fs = output_fs[thread_index].second;
    }

    SparseFlow::Context::Pointer knn_context = flower->create_context();
    if (!knn_context) {
      cerr << "Failed to create search context" << endl;
      return;
    }
    knn_context->set_topk(topk);

    auto cal_recall = [&, this](const std::vector<IndexDocument> &knn_res,
                                size_t idx) {
      vector<IndexDocument> linear_res;

      size_t result_size = std::min(topk, gt_[idx].size());
      if (result_size == 0) {
        return;
      }

      for (size_t i = 0; i < result_size; ++i) {
        auto gt_node = gt_[idx][i];

        linear_res.emplace_back(gt_node.first, gt_node.second, gt_node.first);
      }

      if (knn_fs) {
        for (auto knn : knn_res) {
          string str = "query[" + to_string(idx) + "]\tkey[" +
                       to_string(knn.key()) + "], dist[" +
                       to_string(knn.score()) + "]\n";
          knn_fs->write(str.c_str(), str.size());
        }
      }

      size_t match = 0;
      bool asc =
          (linear_res.size() > 1 &&
           (linear_res[0].score() > linear_res[linear_res.size() - 1].score()))
              ? false
              : true;

      map<int32_t, size_t> topk_matchs;
      if (g_compare_by_id) {
        for (size_t i = 0; i < topk_ids_.size(); ++i) {
          topk_matchs[topk_ids_[i]] = 0;
        }
      }

      for (size_t i = 0, j = 0; i < linear_res.size();) {
        bool m = false;       // if current doc matched in max topk
        bool changed = true;  // if i changed
        if (g_compare_by_id) {
          for (size_t k = 0; k < topk_ids_.size(); ++k) {
            size_t dynamic_size = (size_t)topk_ids_[k];
            for (; dynamic_size + 1 < knn_res.size(); ++dynamic_size) {
              if (fabs(knn_res[dynamic_size - 1].score() -
                       knn_res[dynamic_size].score()) >=
                  numeric_limits<float>::epsilon()) {
                break;
              }
            }
            for (size_t l = 0; l < dynamic_size && l < knn_res.size(); ++l) {
              if (linear_res[i].key() == knn_res[l].key()) {
                topk_matchs[topk_ids_[k]]++;
                if (k == topk_ids_.size() - 1) {
                  m = true;
                }
                break;
              }
            }
          }
          ++i;

          auto it = recall_res_.find(i);
          if (it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * topk_matchs[i] / i;
          }
        } else {
          size_t cur_topk = i + 1;
          if (j < knn_res.size()) {
            if (fabs(linear_res[i].score() - knn_res[j].score()) <
                g_recall_precision) {
              ++j;
              ++i;
              match++;
              m = true;
            } else {
              if ((asc && linear_res[i].score() < knn_res[j].score()) ||
                  (!asc && linear_res[i].score() > knn_res[j].score())) {
                ++i;
              } else {
                changed = false;
                ++j;
              }
            }
          } else {
            ++i;
          }

          auto it = recall_res_.find(cur_topk);
          if (changed && it != recall_res_.end()) {
            lock_guard<mutex> lock(recall_lock);
            it->second += 100.0 * match / cur_topk;
          }
        }

        if (linear_fs && changed) {
          string str = string(m ? "    HIT" : "NOT HIT") + "  query[" +
                       to_string(idx) + "]\tkey[" +
                       to_string(linear_res[i - 1].key()) + "], dist[" +
                       to_string(linear_res[i - 1].score()) + "]\n";
          linear_fs->write(str.c_str(), str.size());
        }
      }
    };

    FilterResultCache filter_cache;
    if (filter_mode_ == FM_TAG) {
      if (batch_taglists_[index].size() != 1) {
        cerr << "query tag list not equal to one!" << endl;
        return;
      }

      int ret = filter_cache.filter(flower->id_to_tags_list(),
                                    batch_taglists_[index][0],
                                    flower->tag_key_list());
      if (ret != 0) {
        cerr << "prefilter failed, idx: " << index << std::endl;

        return;
      }

      auto filterFunc = [&](uint64_t key) { return filter_cache.find(key); };

      knn_context->set_filter(filterFunc);
    }

    if (call_batch_api_) {
      // size_t qnum = sparse_count.size() / dim_;
      // int ret = do_knn_search<T>(flower, knn_context, sparse_count,
      // sparse_index, sparse_feature, qnum); if (ret < 0) {
      //   cerr << "Failed to sparse_knn_search batch, ret=" << ret << " "
      //       << IndexError::What(ret) << endl;
      //   return;
      // }
      // for (size_t i = 0; i < qnum; ++i) {
      //   size_t idx = index * batch_count_ + i;

      //   if (idx >= linear_queries_.size()) {
      //     break;
      //   }

      //   auto &knn_res = knn_context->result(i);
      //   cal_recall(knn_res, idx);
      // }
    } else {
      int ret = do_knn_search<T>(flower, knn_context, sparse_count[0],
                                 sparse_index, sparse_feature);
      if (ret < 0) {
        cerr << "Failed to sparse_knn_search, ret=" << ret << " "
             << IndexError::What(ret) << endl;
        return;
      }
      auto &knn_res = knn_context->result();
      cal_recall(knn_res, index);
    }
  }

 private:
  IndexQueryMeta qmeta_{};
  size_t threads_;
  bool call_batch_api_;
  string output_;
  size_t batch_count_;
  shared_ptr<ThreadPool> pool_;

  // for gt
  vector<vector<T>> linear_queries_;
  vector<SparseData<T>> linear_sparse_data_;
  vector<uint32_t> linear_partitions_;
  vector<vector<uint64_t>> linear_taglists_;

  std::map<std::string, vector<vector<T>>> linear_queries_scaled_;
  std::map<std::string, vector<vector<T>>> linear_sparse_features_scaled_;

  // for recall
  vector<vector<T>> batch_queries_;
  vector<vector<uint32_t>> batch_sparse_counts_;
  vector<vector<uint32_t>> batch_sparse_indices_;
  vector<vector<T>> batch_sparse_features_;
  vector<vector<uint32_t>> batch_partitions_;
  vector<vector<vector<uint64_t>>> batch_taglists_;

  std::map<std::string, vector<vector<T>>> batch_queries_scaled_;
  std::map<std::string, vector<vector<T>>> batch_sparse_features_scaled_;

  size_t total_querys_;

  map<size_t, float> recall_res_;
  vector<int32_t> topk_ids_;
  vector<vector<pair<uint64_t, float>>> gt_;

  map<string, vector<vector<pair<uint64_t, float>>>> gt_hybrid_;
  bool external_gt_file_enabled_{false};

  FilterMode filter_mode_{FM_NONE};
  static bool STOP_NOW;
};

template <typename T>
bool SparseRecall<T>::STOP_NOW = false;

bool prepare_params(YAML::Node &&config_params, Params &params) {
  cout << "Parse params as blow:" << endl;
  for (auto it = config_params.begin(); it != config_params.end(); ++it) {
    string tag = it->second.Tag();
    if (tag == "tag:yaml.org,2002:int") {
      int64_t val = it->second.as<int64_t>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:float") {
      float val = it->second.as<float>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else if (tag == "tag:yaml.org,2002:bool") {
      bool val = it->second.as<bool>();
      params.set(it->first.as<string>(), val);
      cout << it->first.as<string>() << "=" << val << endl;
    } else {
      if (it->second.IsScalar()) {
        string val = it->second.as<string>();
        params.set(it->first.as<string>(), val);
        cout << it->first.as<string>() << "=" << val << endl;
      } else if (it->second.IsMap()) {
        Params sub_params;
        auto sub_node = it->second;
        if (!prepare_params(std::move(sub_node), sub_params)) {
          cerr << "parse params error with key[" << it->first.as<string>()
               << "]" << endl;
          return false;
        }
        params.set(it->first.as<string>(), sub_params);
      }
    }
  }
  return true;
}

bool check_config(YAML::Node &config_node) {
  auto common = config_node["SearcherCommon"];
  if (!common) {
    cerr << "Can not find [SearcherCommon] in config" << endl;
    return false;
  }
  if (!common["SearcherClass"] && !common["SearcherConfig"]) {
    cerr << "Can not find [SearcherClass] or [SearcherConfig] in config"
         << endl;
    return false;
  }
  if (!common["IndexPath"]) {
    cerr << "Can not find [IndexPath] in config" << endl;
    return false;
  }
  if (!common["TopK"]) {
    cerr << "Can not find [TopK] in config" << endl;
    return false;
  }
  if (!common["QueryFile"]) {
    cerr << "Can not find [QueryFile] in config" << endl;
    return false;
  }
  return true;
}

void usage(void) {
  cout << "Usage: recall CONFIG.yaml [plugin file path]" << endl;
}

bool load_index(Flow &flower, string &index_dir) {
  int ret = flower.load(index_dir);
  if (0 != ret) {
    cerr << "Flow load failed with ret " << ret << endl;
    return false;
  }
  cout << "Load index done!" << endl;
  return true;
};

int recall_dense(std::string &query_type, size_t thread_count,
                 size_t batch_count, string top_k, size_t gt_count,
                 string query_file, string &first_sep, string &second_sep,
                 string &ground_truth_file, string &ground_truth_first_sep,
                 string ground_truth_second_sep, Flow &flower,
                 string &index_dir, string &log_dir, FilterMode filter_mode) {
  if (query_type == "float") {
    Recall<float> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    if (load_index(flower, index_dir)) {
      recall.run_dense(&flower, top_k, gt_count);
    } else {
      return -1;
    }
  } else if (query_type == "int8") {
    Recall<int8_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    if (load_index(flower, index_dir)) {
      recall.run_dense(&flower, top_k, gt_count);
    } else {
      return -1;
    }
  } else if (query_type == "binary") {
    Recall<uint32_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    if (load_index(flower, index_dir)) {
      recall.run_dense(&flower, top_k, gt_count);
    } else {
      return -1;
    }
  } else if (query_type == "binary64") {
    Recall<uint64_t> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    if (load_index(flower, index_dir)) {
      recall.run_dense(&flower, top_k, gt_count);
    } else {
      return -1;
    }
  } else {
    cerr << "Can not recognize type: " << query_type << endl;
  }

  return 0;
}

bool load_sparse_index(SparseFlow &flower, string &index_dir) {
  int ret = flower.load(index_dir);
  if (0 != ret) {
    cerr << "Flow load failed with ret " << ret << endl;
    return false;
  }
  cout << "Load index done!" << endl;

  return true;
};

int recall_sparse(std::string &query_type, size_t thread_count,
                  size_t batch_count, string top_k, size_t gt_count,
                  string &query_file, string &first_sep, string &second_sep,
                  string &ground_truth_file, string &ground_truth_first_sep,
                  string &ground_truth_second_sep, SparseFlow &flower,
                  string &index_dir, string &log_dir, FilterMode filter_mode) {
  if (query_type == "float") {
    SparseRecall<float> recall(thread_count, log_dir, batch_count, filter_mode);
    if (!recall.load_query(query_file, first_sep, second_sep)) {
      return -1;
    }

    if (ground_truth_file != "") {
      if (!recall.load_external_gt_file(ground_truth_file,
                                        ground_truth_first_sep,
                                        ground_truth_second_sep)) {
        return -1;
      }
    }

    if (load_sparse_index(flower, index_dir)) {
      recall.run_sparse(&flower, top_k, gt_count);
    } else {
      return -1;
    }
  } else {
    cerr << "Can not recognize type: " << query_type << endl;
  }

  return 0;
}

int get_recall_precision(string &recall_precision_string) {
  constexpr float DEFAULT_RECALL_PRECISION = 1e-6;

  if (recall_precision_string == "") {
    g_recall_precision = DEFAULT_RECALL_PRECISION;
    return true;
  }

  try {
    g_recall_precision = std::stof(recall_precision_string);
    std::cout << "Recall Score Precesion: " << g_recall_precision << std::endl;
  } catch (const std::invalid_argument &e) {
    std::cerr << "Exeception in getting recall precision: " << e.what()
              << ", value: " << recall_precision_string << std::endl;
    return false;
  } catch (const std::out_of_range &e) {
    std::cerr << "Out of range exception in getting recall precision: "
              << e.what() << ", value: " << recall_precision_string
              << std::endl;
    return false;
  }

  return true;
}

int main(int argc, char *argv[]) {
  if (argc < 2) {
    usage();
    return -1;
  }

  IndexPluginBroker broker;
  std::string error;
  for (int i = 2; i < argc; ++i) {
    if (!broker.emplace(argv[i], &error)) {
      cerr << "Failed to load plugin: " << argv[i] << " (" << error << ")"
           << endl;
      return -1;
    }
  }

  YAML::Node config_node;
  try {
    config_node = YAML::LoadFile(argv[1]);
  } catch (...) {
    cerr << "Load YAML file[" << argv[1] << "] failed!" << endl;
    return -1;
  }
  if (!check_config(config_node)) {
    return -1;
  }
  auto config_common = config_node["SearcherCommon"];

  map<string, int> LOG_LEVEL = {{"debug", IndexLogger::LEVEL_DEBUG},
                                {"info", IndexLogger::LEVEL_INFO},
                                {"warn", IndexLogger::LEVEL_WARN},
                                {"error", IndexLogger::LEVEL_ERROR},
                                {"fatal", IndexLogger::LEVEL_FATAL}};
  string log_level = config_common["LogLevel"]
                         ? config_common["LogLevel"].as<string>()
                         : "debug";
  transform(log_level.begin(), log_level.end(), log_level.begin(), ::tolower);
  if (LOG_LEVEL.find(log_level) != LOG_LEVEL.end()) {
    IndexLoggerBroker::SetLevel(LOG_LEVEL[log_level]);
  }

  // Calculate Recall
  string log_dir = "";
  if (config_common["RecallLogDir"]) {
    log_dir = config_common["RecallLogDir"].as<string>();
  }
  size_t thread_count = config_common["RecallThreadCount"]
                            ? config_common["RecallThreadCount"].as<uint64_t>()
                            : 0;
  size_t gt_count = config_common["RecallGTCount"]
                        ? config_common["RecallGTCount"].as<uint64_t>()
                        : 100;
  size_t batch_count = config_common["RecallBatchCount"]
                           ? config_common["RecallBatchCount"].as<uint64_t>()
                           : 0;
  g_compare_by_id = config_common["CompareById"]
                        ? config_common["CompareById"].as<bool>()
                        : 0;
  string top_k = config_common["TopK"].as<string>();

  string recall_precision_string =
      config_common["RecallScorePrecision"]
          ? config_common["RecallScorePrecision"].as<string>()
          : "";

  if (!get_recall_precision(recall_precision_string)) {
    cerr << "Get recall precision failed, value: " << recall_precision_string
         << endl;
    return -1;
  }

  RetrievalMode retrieval_mode{RM_DENSE};
  if (config_common["RetrievalMode"]) {
    std::string retrieval_mode_str =
        config_common["RetrievalMode"].as<string>();
    if (retrieval_mode_str == "dense") {
      retrieval_mode = RM_DENSE;
    } else if (retrieval_mode_str == "sparse") {
      retrieval_mode = RM_SPARSE;
    }
  }

  FilterMode filter_mode{FM_NONE};
  if (config_common["FilterMode"]) {
    std::string filter_mode_str = config_common["FilterMode"].as<string>();
    if (filter_mode_str == "tag") {
      filter_mode = FM_TAG;
    }
  }

  string query_file = config_common["QueryFile"].as<string>();

  string first_sep = config_common["QueryFirstSep"]
                         ? config_common["QueryFirstSep"].as<string>()
                         : ";";
  string second_sep = config_common["QuerySecondSep"]
                          ? config_common["QuerySecondSep"].as<string>()
                          : " ";
  string query_type = config_common["QueryType"]
                          ? config_common["QueryType"].as<string>()
                          : "float";
  string container_type = config_common["ContainerType"]
                              ? config_common["ContainerType"].as<string>()
                              : "MMapFileStorage";

  string ground_truth_file = "";
  string ground_truth_first_sep = ";";
  string ground_truth_second_sep = " ";

  if (config_common["GroundTruthFile"]) {
    ground_truth_file = config_common["GroundTruthFile"].as<string>();

    if (config_common["GroundTruthFirstSep"]) {
      ground_truth_first_sep =
          config_common["GroundTruthFirstSep"].as<string>();
    }

    if (config_common["GroundTruthSecondSep"]) {
      ground_truth_second_sep =
          config_common["GroundTruthSecondSep"].as<string>();
    }
  }

  if (retrieval_mode == RM_SPARSE) {
    SparseFlow flower;
    Params container_params;
    if (config_node["ContainerParams"]) {
      // Get index params of Searcher in flower object
      if (!prepare_params(config_node["ContainerParams"], container_params)) {
        return -1;
      }
      cout << "Created index params of a container in flower object " << endl;
    }

    int ret = flower.set_container(container_type, container_params);
    if (0 != ret) {
      cerr << "Create" << container_type << "failed." << endl;
      return -1;
    }

    // Set a Searcher
    if (config_common["SearcherClass"]) {
      Params params;
      if (config_node["SearcherParams"]) {
        // Get index params of Searcher in flower object
        if (!prepare_params(config_node["SearcherParams"], params)) {
          return -1;
        }
        cout << "Created index params of a searcher in flower object " << endl;
      }

      string searcher_class = config_common["SearcherClass"].as<string>();
      ret = flower.set_searcher(searcher_class, params);
      if (0 != ret) {
        cerr << "Failed to create searcher " << searcher_class << endl;
        return -1;
      }
      cout << "Created searcher " << searcher_class << endl;
    } else {  // SearcherConfig
      std::cout << config_common["SearcherConfig"].as<string>() << std::endl;
      auto params =
          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(
              config_common["SearcherConfig"].as<string>());

      auto index =
          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);

      flower.set_searcher(index->index_searcher());
    }

    string index_dir = config_common["IndexPath"].as<string>();
    recall_sparse(query_type, thread_count, batch_count, top_k, gt_count,
                  query_file, first_sep, second_sep, ground_truth_file,
                  ground_truth_first_sep, ground_truth_second_sep, flower,
                  index_dir, log_dir, filter_mode);

    flower.unload();

    cout << "Recall done." << endl;
  } else {
    Flow flower;
    Params container_params;
    if (config_node["ContainerParams"]) {
      // Get index params of Searcher in flower object
      if (!prepare_params(config_node["ContainerParams"], container_params)) {
        return -1;
      }
      cout << "Created index params of a container in flower object " << endl;
    }

    int ret = flower.set_container(container_type, container_params);
    if (0 != ret) {
      cerr << "Create" << container_type << "failed." << endl;
      return -1;
    }

    // Set a Searcher
    if (config_common["SearcherClass"]) {
      Params params;
      if (config_node["SearcherParams"]) {
        // Get index params of Searcher in flower object
        if (!prepare_params(config_node["SearcherParams"], params)) {
          return -1;
        }
        cout << "Created index params of a searcher in flower object " << endl;
      }

      string searcher_class = config_common["SearcherClass"].as<string>();
      ret = flower.set_searcher(searcher_class, params);
      if (0 != ret) {
        cerr << "Failed to create searcher " << searcher_class << endl;
        return -1;
      }
      cout << "Created searcher " << searcher_class << endl;
    } else {  // SearcherConfig
      std::cout << config_common["SearcherConfig"].as<string>() << std::endl;
      auto params =
          zvec::core_interface::IndexFactory::DeserializeIndexParamFromJson(
              config_common["SearcherConfig"].as<string>());

      auto index =
          zvec::core_interface::IndexFactory::CreateAndInitIndex(*params);

      flower.set_searcher(index->index_searcher());
    }

    string index_dir = config_common["IndexPath"].as<string>();
    if (retrieval_mode == RM_DENSE) {
      recall_dense(query_type, thread_count, batch_count, top_k, gt_count,
                   query_file, first_sep, second_sep, ground_truth_file,
                   ground_truth_first_sep, ground_truth_second_sep, flower,
                   index_dir, log_dir, filter_mode);
    } else {
      std::string mode = retrieval_mode == 1 ? "Dense" : "Sparse";
      cerr << "unsupported retrieval mode: " << mode << endl;

      return -1;
    }

    // Cleanup
    flower.unload();

    cout << "Recall done." << endl;
  }

  return 0;
}


================================================
FILE: tools/core/txt2vecs.cc
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <set>
#include "gflags/gflags.h"
#include "zvec/core/framework/index_meta.h"
#include "index_meta_helper.h"
#include "txt_input_reader.h"
#include "vecs_common.h"

using namespace std;
using namespace zvec::core;

DEFINE_string(input, "input.txt", "txt input file");
DEFINE_string(input_first_sep, ";", "input first sep");
DEFINE_string(input_second_sep, " ", "input second sep");
DEFINE_string(output, "output.vecs", "vecs output file");
DEFINE_string(type, "float",
              "available type: float, double, int16, int8, binary");
DEFINE_string(method, "L2", "available method: L2, IP, HAMMING");
DEFINE_int32(dimension, 256, "data dimension");
DEFINE_string(vector_type, "dense", "available type: dense, hybrid, sparse");

bool write_header_output(VecsHeader header, const IndexMeta &meta,
                         size_t &total_writes, FILE *wfp) {
  // write header
  std::cout << "Begin to Write Header Section..." << std::endl;

  std::string meta_buf;
  meta.serialize(&meta_buf);
  header.meta_size = meta_buf.size();
  size_t wret = fwrite(&header, sizeof(header), 1, wfp);
  if (wret != 1) {
    cerr << "Write header error" << endl;
    fclose(wfp);
    return false;
  }

  total_writes += sizeof(header);
  std::cout << "Total Writes after Header Section: " << total_writes
            << std::endl
            << std::endl;

  // write meta
  std::cout << "Begin to Write Meta Section..." << std::endl;
  wret = fwrite(meta_buf.c_str(), meta_buf.size(), 1, wfp);
  if (wret != 1) {
    cerr << "Write header meta_buf error" << endl;
    fclose(wfp);
    return false;
  }

  total_writes += meta_buf.size();
  std::cout << "Total Writes after Meta Buf: " << total_writes << std::endl
            << std::endl;

  return true;
}

bool write_header_output_sparse(VecsHeader header, const IndexMeta &meta,
                                size_t &total_writes, FILE *wfp) {
  // write header
  std::cout << "Begin to Write Header Section..." << std::endl;
  std::string meta_buf;
  meta.serialize(&meta_buf);
  header.meta_size = meta_buf.size();
  size_t wret = fwrite(&header, sizeof(header), 1, wfp);
  if (wret != 1) {
    cerr << "Write header error" << endl;
    fclose(wfp);
    return false;
  }

  total_writes += sizeof(header);
  std::cout << "Total Writes after Header Section: " << total_writes
            << std::endl
            << std::endl;

  // write meta
  std::cout << "Begin to Write Sparse Meta Section..." << std::endl;
  wret = fwrite(meta_buf.c_str(), meta_buf.size(), 1, wfp);
  if (wret != 1) {
    cerr << "Write header meta buf error" << endl;
    fclose(wfp);
    return false;
  }

  total_writes += meta_buf.size();
  std::cout << "Total Writes after Meta Buf: " << total_writes << std::endl
            << std::endl;

  return true;
}

template <typename T>
bool write_features_output(size_t vec_num, const vector<vector<T>> &features,
                           size_t &total_writes, FILE *wfp) {
  // write dense vector
  std::cout << "Begin to Write Dense Vector Section..." << std::endl;
  for (size_t i = 0; i < vec_num; ++i) {
    auto &feature = features[i];
    size_t wret = fwrite(&feature[0], sizeof(T), feature.size(), wfp);
    if (wret != feature.size()) {
      cerr << "Write feature error. " << endl;
      fclose(wfp);
      return false;
    }

    total_writes += feature.size() * sizeof(T);
  }

  std::cout << "Total Writes after Dense Vector: " << total_writes << std::endl
            << std::endl;

  return true;
}

bool write_keys_output(size_t vec_num, const vector<uint64_t> &keys,
                       size_t &total_writes, FILE *wfp) {
  std::cout << "Begin to Write Key Section..." << std::endl;
  for (size_t i = 0; i < vec_num; ++i) {
    uint64_t key = keys[i];
    size_t wret = fwrite(&key, sizeof(key), 1, wfp);
    if (wret != 1) {
      cerr << "Write key error. key:" << key << endl;
      fclose(wfp);
      return false;
    }

    total_writes += sizeof(uint64_t);
  }

  std::cout << "Total Writes after Key Section: " << total_writes << std::endl
            << std::endl;

  return true;
}

template <typename T>
bool write_sparse_features_output(size_t vec_num,
                                  const vector<SparseData<T>> &sparse_data,
                                  size_t &total_writes, FILE *wfp) {
  std::set<uint32_t> sparse_dims;
  uint32_t sparse_max_count = 0;
  uint32_t sparse_min_count = -1U;
  uint32_t sparse_total_count = 0;

  // write sparse meta
  std::cout << "Begin to Write Sparse Meta Section..." << std::endl;
  size_t wret;
  uint64_t offset = 0;
  for (size_t i = 0; i < vec_num; ++i) {
    wret = fwrite(&offset, sizeof(uint64_t), 1, wfp);
    if (wret != 1) {
      cerr << "Write sparse feature len error. " << endl;
      fclose(wfp);
      return false;
    }
    offset += sparse_data[i].get_len();

    total_writes += sizeof(size_t);
  }
  std::cout << "Total Writes after Sparse Meta Section: " << total_writes
            << std::endl
            << std::endl;

  std::cout << "Begin to Write Sparse Vector Section..." << std::endl;
  for (size_t i = 0; i < vec_num; ++i) {
    auto &sparse_one_data = sparse_data[i];

    wret = fwrite(&(sparse_one_data.count), sizeof(uint32_t), 1, wfp);
    if (wret != 1) {
      cerr << "Write sparse feature count error. " << endl;
      fclose(wfp);
      return false;
    }

    total_writes += sizeof(uint32_t);

    wret = fwrite(&sparse_one_data.indices[0], sizeof(uint32_t),
                  sparse_one_data.indices.size(), wfp);
    if (wret != sparse_one_data.indices.size()) {
      cerr << "Write feature error. " << endl;
      fclose(wfp);
      return false;
    }

    total_writes += sizeof(uint32_t) * sparse_one_data.indices.size();
    // do some stat
    for (size_t s = 0; s < sparse_one_data.indices.size(); ++s) {
      sparse_dims.insert(sparse_one_data.indices[s]);
    }

    if (sparse_one_data.indices.size() > sparse_max_count) {
      sparse_max_count = sparse_one_data.indices.size();
    }

    if (sparse_one_data.indices.size() < sparse_min_count) {
      sparse_min_count = sparse_one_data.indices.size();
    }

    sparse_total_count += sparse_one_data.indices.size();
    // //done

    wret = fwrite(&sparse_one_data.features[0], sizeof(T),
                  sparse_one_data.features.size(), wfp);
    if (wret != sparse_one_data.features.size()) {
      cerr << "Write feature error. " << endl;
      fclose(wfp);
      return false;
    }

    total_writes += sizeof(T) * sparse_one_data.features.size();
  }

  std::cout << "Total Writes after Sparse Vector Section: " << total_writes
            << std::endl
            << std::endl;
  // for (auto itr=sparse_dims.begin(); itr!=sparse_dims.end(); ++itr) {
  //   std::cout << (*itr) << ",";
  // }
  // std::cout << std::endl;

  std::cout << "Max Sparse Dimension Count: " << sparse_max_count << std::endl;
  std::cout << "Min Sparse Dimension Count: " << sparse_min_count << std::endl;
  std::cout << "Avg Sparse Dimension Count: " << sparse_total_count / vec_num
            << std::endl;

  return true;
}

bool write_taglists_output(size_t vec_num,
                           const vector<vector<uint64_t>> &taglists,
                           size_t &total_writes, FILE *wfp) {
  std::cout << "Begin to Write Tag List Section..." << std::endl;

  // write tag list meta
  std::cout << "Begin to Write Tag List Meta Section..." << std::endl;
  size_t wret;
  uint64_t offset = 0;
  for (size_t i = 0; i < vec_num; ++i) {
    wret = fwrite(&offset, sizeof(uint64_t), 1, wfp);
    if (wret != 1) {
      cerr << "Write tag list meta error. Rec no: " << i << endl;
      fclose(wfp);
      return false;
    }
    offset += taglists[i].size() * sizeof(uint64_t);

    total_writes += sizeof(size_t);
  }
  std::cout << "Total Writes after Tag Meta Section: " << total_writes
            << std::endl
            << std::endl;

  for (size_t i = 0; i < vec_num; ++i) {
    std::vector<uint64_t> taglist = taglists[i];
    uint64_t taglist_size = taglist.size();
    wret = fwrite(&taglist_size, sizeof(uint64_t), 1, wfp);
    if (wret != 1) {
      cerr << "Write tag list size error. Rec no: " << i << endl;
      fclose(wfp);
      return false;
    }

    wret = fwrite(&(taglist[0]), sizeof(uint64_t), taglist.size(), wfp);
    if (wret != taglist.size()) {
      cerr << "Write tag list error. Rec no: " << i << endl;
      fclose(wfp);
      return false;
    }

    total_writes += sizeof(uint64_t) * taglist.size() + sizeof(uint64_t);
  }

  std::cout << "Total Writes after Tag List Section: " << total_writes
            << std::endl
            << std::endl;

  return true;
}

template <typename T>
bool write_vecs_output_sparse(VecsHeader &header, const IndexMeta &meta,
                              const vector<uint64_t> &keys,
                              const vector<SparseData<T>> &sparse_data,
                              const vector<vector<uint64_t>> &taglists) {
  if (keys.empty()) {
    cerr << "keys is empty." << endl;
    return false;
  }

  if (keys.size() != sparse_data.size()) {
    cerr << "keys's size(" << keys.size()
         << ") is not equal to sparse data's size(" << sparse_data.size()
         << ")." << endl;
    return false;
  }

  size_t vec_num = keys.size();

  FILE *wfp = fopen(FLAGS_output.c_str(), "wb");
  if (!wfp) {
    cerr << "Open file error. " << FLAGS_output << endl;
    return false;
  }

  size_t total_writes = 0;

  std::cout << "------------------------" << std::endl;
  std::cout << " Output Process         " << std::endl;
  std::cout << "------------------------" << std::endl;

  // write sparse header
  bool ret = write_header_output_sparse(header, meta, total_writes, wfp);
  if (!ret) {
    cerr << "write header error! " << endl;

    return false;
  }

  // write keys
  ret = write_keys_output(vec_num, keys, total_writes, wfp);
  if (!ret) {
    cerr << "write keys error! " << endl;

    return false;
  }

  // write sparse features
  ret = write_sparse_features_output(vec_num, sparse_data, total_writes, wfp);
  if (!ret) {
    cerr << "write sparse features error! " << endl;

    return false;
  }

  if ((header.bitmap & (1ULL << BITMAP_INDEX_TAGLIST)) != 0) {
    // write tag lists features
    ret = write_taglists_output(vec_num, taglists, total_writes, wfp);
    if (!ret) {
      cerr << "write tag lists error! " << endl;

      return false;
    }
  }

  std::cout << "------------------------" << std::endl;
  std::cout << " Output Done            " << std::endl;
  std::cout << "------------------------" << std::endl;

  fclose(wfp);
  return true;
}

template <typename T>
bool write_vecs_output(VecsHeader &header, const IndexMeta &meta,
                       const vector<uint64_t> &keys,
                       const vector<vector<T>> &features,
                       const vector<SparseData<T>> &sparse_data,
                       const vector<vector<uint64_t>> &taglists) {
  if (keys.empty()) {
    cerr << "keys is empty." << endl;
    return false;
  }

  if (keys.size() != features.size()) {
    cerr << "keys's size(" << keys.size()
         << ") is not equal to features's size(" << features.size() << ")."
         << endl;
    return false;
  }


  size_t vec_num = header.num_vecs;

  FILE *wfp = fopen(FLAGS_output.c_str(), "wb");
  if (!wfp) {
    cerr << "Open file error. " << FLAGS_output << endl;
    return false;
  }

  size_t total_writes = 0;

  std::cout << "------------------------" << std::endl;
  std::cout << " Output Process         " << std::endl;
  std::cout << "------------------------" << std::endl;

  // write header
  bool ret = write_header_output(header, meta, total_writes, wfp);
  if (!ret) {
    cerr << "write header error! " << endl;

    return false;
  }

  // write features
  ret = write_features_output(vec_num, features, total_writes, wfp);
  if (!ret) {
    cerr << "write features error! " << endl;

    return false;
  }

  // write keys
  ret = write_keys_output(vec_num, keys, total_writes, wfp);
  if (!ret) {
    cerr << "write keys error! " << endl;

    return false;
  }

  // write sparse features
  if ((header.bitmap & (1ULL << BITMAP_INDEX_SPARSE)) != 0) {
    ret = write_sparse_features_output(vec_num, sparse_data, total_writes, wfp);
    if (!ret) {
      cerr << "write sparse features error! " << endl;

      return false;
    }
  }

  if ((header.bitmap & (1ULL << BITMAP_INDEX_TAGLIST)) != 0) {
    // write tag lists features
    ret = write_taglists_output(vec_num, taglists, total_writes, wfp);
    if (!ret) {
      cerr << "write tag lists error! " << endl;

      return false;
    }
  }

  std::cout << "------------------------" << std::endl;
  std::cout << " Output Done            " << std::endl;
  std::cout << "------------------------" << std::endl;

  fclose(wfp);
  return true;
}

template <typename T>
bool compute_offset(uint64_t num_vecs, const IndexMeta &meta,
                    const vector<uint64_t> & /*keys*/,
                    const vector<vector<T>> & /*features*/,
                    const vector<SparseData<T>> &sparse_data,
                    const vector<std::vector<uint64_t>> &taglists,
                    uint64_t &key_offset, uint64_t &feature_offset,
                    uint64_t &sparse_offset, uint64_t &taglist_offset,
                    uint64_t &key_size, uint64_t &feature_size,
                    uint64_t &sparse_size, uint64_t &taglist_size) {
  size_t total_offset = 0;

  feature_offset = 0;
  feature_size = num_vecs * meta.element_size();
  total_offset += feature_size;

  key_offset = total_offset;
  key_size = num_vecs * sizeof(uint64_t);
  total_offset += key_size;

  if (sparse_data.size() != 0) {
    sparse_offset = total_offset;

    size_t data_offset = num_vecs * sizeof(uint64_t);
    for (size_t i = 0; i < sparse_data.size(); ++i) {
      data_offset += sizeof(uint32_t) +
                     sparse_data[i].count * (sizeof(uint32_t) + sizeof(T));
    }

    sparse_size = data_offset;

    total_offset += sparse_size;
  } else {
    sparse_offset = -1LLU;
    sparse_size = 0;
  }

  if (taglists.size() != 0) {
    taglist_offset = total_offset;

    size_t data_offset = num_vecs * sizeof(uint64_t);
    for (size_t i = 0; i < taglists.size(); ++i) {
      data_offset += sizeof(uint64_t) + taglists[i].size() * sizeof(uint64_t);
    }

    taglist_size = data_offset;
  } else {
    taglist_offset = -1LLU;
    taglist_size = 0;
  }

  return true;
}

template <typename T>
bool compute_sparse_offset(uint64_t num_vecs, const IndexMeta & /*meta*/,
                           const vector<uint64_t> & /*keys*/,
                           const vector<SparseData<T>> &sparse_data,
                           const vector<std::vector<uint64_t>> &taglists,
                           uint64_t &key_offset, uint64_t &sparse_offset,
                           uint64_t &taglist_offset, uint64_t &key_size,
                           uint64_t &sparse_size, uint64_t &taglist_size) {
  size_t total_offset = 0;

  key_offset = 0;
  key_size = num_vecs * sizeof(uint64_t);
  total_offset += num_vecs * sizeof(uint64_t);

  sparse_offset = total_offset;
  size_t data_offset = num_vecs * sizeof(uint64_t);
  for (size_t i = 0; i < sparse_data.size(); ++i) {
    data_offset += sizeof(uint32_t) +
                   sparse_data[i].count * (sizeof(uint32_t) + sizeof(T));
  }

  sparse_size = data_offset;
  total_offset += sparse_size;

  if (taglists.size() != 0) {
    taglist_offset = total_offset;

    data_offset = num_vecs * sizeof(uint64_t);
    for (size_t i = 0; i < taglists.size(); ++i) {
      data_offset += sizeof(uint64_t) + taglists[i].size() * sizeof(uint64_t);
    }

    taglist_size = data_offset;
  } else {
    taglist_offset = -1LLU;
    taglist_size = 0;
  }

  return true;
}

template <typename T>
bool process(void) {
  if (FLAGS_vector_type == "sparse") {
    std::cout << "------------------------" << std::endl;
    std::cout << " Vector Type: sparse    " << std::endl;
    std::cout << "------------------------" << std::endl;

    IndexMeta meta;
    if (!IndexMetaHelper::parse_from(FLAGS_type, FLAGS_method,
                                     FLAGS_vector_type, meta)) {
      cerr << "Index meta parse error." << endl;
      return false;
    }
    cerr << IndexMetaHelper::to_string(meta) << endl;

    TxtInputReader<T> reader;
    vector<uint64_t> keys;
    vector<SparseData<T>> sparse_data;
    vector<std::vector<uint64_t>> taglists;

    bool ret = reader.load_record_sparse(FLAGS_input, FLAGS_input_first_sep,
                                         FLAGS_input_second_sep, keys,
                                         sparse_data, taglists);
    if (!ret) {
      cerr << "Read record failed" << endl;
      return false;
    }

    if (sparse_data.size() == 0) {
      cerr << "empty sparse data!" << endl;
      return false;
    }

    uint64_t num_vecs = keys.size();

    uint64_t key_offset{-1LLU}, sparse_offset{-1LLU}, taglist_offset{-1LLU};
    uint64_t key_size{0}, sparse_size{0}, taglist_size{0};

    compute_sparse_offset(num_vecs, meta, keys, sparse_data, taglists,
                          key_offset, sparse_offset, taglist_offset, key_size,
                          sparse_size, taglist_size);

    VecsHeader header;
    header.num_vecs = keys.size();
    header.meta_size_v1 = 0;
    header.version = 1;
    header.bitmap = 0;
    header.key_offset = key_offset;
    header.dense_offset = -1LLU;
    header.sparse_offset = sparse_offset;
    header.taglist_offset = taglist_offset;
    header.key_size = key_size;
    header.dense_size = 0;
    header.sparse_size = sparse_size;
    header.taglist_size = taglist_size;

    header.bitmap |= (1 << BITMAP_INDEX_KEY);
    header.bitmap |= (1 << BITMAP_INDEX_SPARSE);

    if (taglist_offset != -1LLU) {
      header.bitmap |= (1 << BITMAP_INDEX_TAGLIST);
    }

    ret = write_vecs_output_sparse(header, meta, keys, sparse_data, taglists);
    if (!ret) {
      cerr << "write vecs output failed" << endl;
      return false;
    }
  } else {
    std::cout << "------------------------" << std::endl;
    std::cout << " Vector Type:     " << FLAGS_vector_type << std::endl;
    std::cout << "------------------------" << std::endl;

    IndexMeta meta;
    if (!IndexMetaHelper::parse_from(FLAGS_type, FLAGS_method, FLAGS_dimension,
                                     FLAGS_vector_type, meta)) {
      cerr << "Index meta parse error." << endl;
      return false;
    }
    cerr << IndexMetaHelper::to_string(meta) << endl;

    TxtInputReader<T> reader;
    vector<uint64_t> keys;
    vector<vector<T>> features;
    vector<SparseData<T>> sparse_data;
    vector<std::vector<uint64_t>> taglists;

    bool ret = reader.load_record(FLAGS_input, FLAGS_input_first_sep,
                                  FLAGS_input_second_sep, FLAGS_dimension, keys,
                                  features, sparse_data, taglists);
    if (!ret) {
      cerr << "Read record failed" << endl;
      return false;
    }

    uint64_t num_vecs = keys.size();

    uint64_t key_offset{-1LLU}, features_offset{-1LLU}, sparse_offset{-1LLU},
        taglist_offset{-1LLU};
    uint64_t key_size{0}, feature_size{0}, sparse_size{0}, taglist_size{0};

    compute_offset(num_vecs, meta, keys, features, sparse_data, taglists,
                   key_offset, features_offset, sparse_offset, taglist_offset,
                   key_size, feature_size, sparse_size, taglist_size);

    VecsHeader header;
    header.num_vecs = num_vecs;
    header.meta_size_v1 = 0;
    header.version = 1;
    header.bitmap = 0;
    header.key_offset = key_offset;
    header.dense_offset = features_offset;
    header.sparse_offset = sparse_offset;
    header.taglist_offset = taglist_offset;
    header.key_size = key_size;
    header.dense_size = feature_size;
    header.sparse_size = sparse_size;
    header.taglist_size = taglist_size;

    header.bitmap |= (1 << BITMAP_INDEX_KEY);
    header.bitmap |= (1 << BITMAP_INDEX_DENSE);

    if (sparse_offset != -1LLU) {
      header.bitmap |= (1 << BITMAP_INDEX_SPARSE);
    }

    if (taglist_offset != -1LLU) {
      header.bitmap |= (1 << BITMAP_INDEX_TAGLIST);
    }

    ret =
        write_vecs_output(header, meta, keys, features, sparse_data, taglists);
    if (!ret) {
      cerr << "write vecs output failed" << endl;
      return false;
    }
  }

  return true;
}

int main(int argc, char *argv[]) {
  // gflags
  gflags::SetUsageMessage("Usage: txt2vecs [options]");
  gflags::ParseCommandLineFlags(&argc, &argv, true);

  if (FLAGS_type == "float") {
    if (!process<float>()) {
      return -1;
    }
  } else if (FLAGS_type == "double") {
    if (!process<double>()) {
      return -1;
    }
  } else if (FLAGS_type == "int16") {
    if (!process<int16_t>()) {
      return -1;
    }
  } else if (FLAGS_type == "int8") {
    if (!process<int8_t>()) {
      return -1;
    }
  } else if (FLAGS_type == "binary") {
    if (!process<uint32_t>()) {
      return -1;
    }
  } else if (FLAGS_type == "binary64") {
    if (!process<uint64_t>()) {
      return -1;
    }
  } else {
    cerr << "Can not recognize type: " << FLAGS_type << endl;
    return -1;
  }
  return 0;
}


================================================
FILE: tools/core/txt_input_reader.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string.h>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <zvec/ailego/utility/string_helper.h>

namespace zvec {
namespace core {

template <typename T>
struct SparseData {
 public:
  SparseData(uint32_t count_in, std::vector<uint32_t> &indices_in,
             std::vector<T> &features_in)
      : count(count_in),
        indices(std::move(indices_in)),
        features(std::move(features_in)) {}

  SparseData(uint32_t count_in, std::vector<uint32_t> &&indices_in,
             std::vector<T> &&features_in)
      : count(count_in),
        indices(std::move(indices_in)),
        features(std::move(features_in)) {}

 public:
  uint64_t get_len() const {
    return sizeof(uint32_t) + sizeof(uint32_t) * indices.size() +
           sizeof(T) * features.size();
  }

 public:
  uint32_t count;
  std::vector<uint32_t> indices;
  std::vector<T> features;
};

// support type: float, binary, int16, int8
template <typename T>
class TxtInputReader {
 public:
  bool load_query(const std::string &query_file, const std::string &first_sep,
                  const std::string &second_sep,
                  std::vector<std::vector<T>> &features,
                  std::vector<SparseData<T>> &sparse_data,
                  std::vector<std::vector<uint64_t>> &taglists) {
    std::fstream qf(query_file, std::ios::in);

    if (!qf.is_open()) {
      std::cerr << "open query file failed! [" << query_file << "]"
                << std::endl;
      return false;
    }

    bool ret;
    std::string buffer;
    while (getline(qf, buffer)) {
      buffer.erase(buffer.find_last_not_of('\n') + 1);
      if (buffer.empty()) {
        continue;
      }
      std::vector<std::string> res;
      ailego::StringHelper::Split(buffer, first_sep, &res);
      if (res.empty()) {
        continue;
      }
      std::string feature_str = res[0];
      if (res.size() > 1) {
        feature_str = res[1];
      }
      std::vector<T> feature;
      size_t dimension = 0;
      ret = load_from_string(feature_str, second_sep, feature, &dimension);
      if (!ret) {
        return false;
      }

      features.emplace_back(feature);

      uint64_t key = atol(res[0].c_str());

      // load sparse feature
      uint32_t sparse_count = 0;
      std::vector<uint32_t> sparse_indices;
      std::vector<T> sparse_feature;

      if (res.size() >= 3) {
        ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,
                                      sparse_feature, &sparse_count);
        if (!ret) {
          std::cerr << "load sparse failed for key: " << key << std::endl;
          return false;
        }
      }

      sparse_data.emplace_back(sparse_count, std::move(sparse_indices),
                               std::move(sparse_feature));

      if (res.size() >= 4) {
        std::vector<uint64_t> taglist;
        size_t tag_count = 0;

        ret = load_tags_from_string(res[4], second_sep, taglist, &tag_count);
        if (!ret) {
          std::cerr << "load tags failed for key: " << key << std::endl;
          return false;
        }

        taglists.emplace_back(taglist);
      }
    }

    qf.close();
    if (features.size() == 0) {
      std::cerr << "Read query size is 0" << std::endl;
      return false;
    }
    return true;
  }


  bool load_record(const std::string &input, const std::string &first_sep,
                   const std::string &second_sep, const size_t dimension,
                   std::vector<uint64_t> &keys,
                   std::vector<std::vector<T>> &features,
                   std::vector<SparseData<T>> &sparse_data,
                   std::vector<std::vector<uint64_t>> &taglists) {
    std::fstream qf(input, std::ios::in);

    if (!qf.is_open()) {
      std::cerr << "open file failed! [" << input << "]" << std::endl;
      return false;
    }

    bool ret;
    uint32_t count = 0;
    std::string buffer;

    while (getline(qf, buffer)) {
      buffer.erase(buffer.find_last_not_of('\n') + 1);
      if (buffer.empty()) {
        continue;
      }
      std::vector<std::string> res;
      ailego::StringHelper::Split(buffer, first_sep, &res);
      if (res.size() < 2) {
        std::cerr << "skip record : " << buffer << std::endl;
        continue;
      }

      std::vector<T> feature;
      size_t real_dim = 0;

      // load sparse feature
      uint32_t sparse_count = 0;
      std::vector<uint32_t> sparse_indices;
      std::vector<T> sparse_feature;

      uint64_t key = atol(res[0].c_str());

      // load dense feature
      ret = load_from_string(res[1], second_sep, feature, &real_dim);
      if (!ret) {
        return false;
      }

      if (real_dim != dimension) {
        std::cerr << "real dim (" << real_dim << ") is not equal to dimension("
                  << dimension << ") key : " << res[0] << std::endl;
        continue;
      }

      features.emplace_back(feature);
      keys.emplace_back(key);

      if (res.size() >= 3) {
        ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,
                                      sparse_feature, &sparse_count);
        if (!ret) {
          std::cerr << "load sparse failed for key: " << key << std::endl;
          return false;
        }

        sparse_data.emplace_back(sparse_count, std::move(sparse_indices),
                                 std::move(sparse_feature));
      }

      if (res.size() >= 4) {
        std::vector<uint64_t> taglist;
        size_t tag_count = 0;

        ret = load_tags_from_string(res[3], second_sep, taglist, &tag_count);
        if (!ret) {
          std::cerr << "load tags failed for key: " << key << std::endl;
          return false;
        }

        taglists.emplace_back(taglist);
      }

      count++;
      if (count % 1000000 == 0) {
        std::cout << "processed " << count << " records!" << std::endl;
      }
    }

    qf.close();

    if (keys.size() == 0) {
      std::cerr << "Reading nothing from input" << std::endl;
      return false;
    }

    return true;
  }

  bool load_record_sparse(const std::string &input,
                          const std::string &first_sep,
                          const std::string &second_sep,
                          std::vector<uint64_t> &keys,
                          std::vector<SparseData<T>> &sparse_data,
                          std::vector<std::vector<uint64_t>> &taglists) {
    std::fstream qf(input, std::ios::in);

    if (!qf.is_open()) {
      std::cerr << "open file failed! [" << input << "]" << std::endl;
      return false;
    }

    bool ret;
    uint32_t count = 0;
    std::string buffer;

    while (getline(qf, buffer)) {
      buffer.erase(buffer.find_last_not_of('\n') + 1);
      if (buffer.empty()) {
        continue;
      }
      std::vector<std::string> res;
      ailego::StringHelper::Split(buffer, first_sep, &res);
      if (res.size() < 2) {
        std::cerr << "skip record : " << buffer << std::endl;
        continue;
      }

      uint64_t key = atol(res[0].c_str());

      // load sparse feature
      uint32_t sparse_count = 0;
      std::vector<uint32_t> sparse_indices;
      std::vector<T> sparse_feature;

      if (res.size() <= 2) {
        std::cerr << "field erorr, key: " << key << std::endl;
        continue;
      }

      ret = load_from_string_sparse(key, res[2], second_sep, sparse_indices,
                                    sparse_feature, &sparse_count);
      if (!ret) {
        std::cerr << "load sparse failed for key: " << key << std::endl;
        return false;
      }

      keys.emplace_back(key);

      sparse_data.emplace_back(sparse_count, std::move(sparse_indices),
                               std::move(sparse_feature));

      if (res.size() >= 4) {
        std::vector<uint64_t> taglist;
        size_t tag_count;

        ret = load_tags_from_string(res[4], second_sep, taglist, &tag_count);
        if (!ret) {
          std::cerr << "load tags failed for key: " << key << std::endl;
          return false;
        }

        taglists.emplace_back(taglist);
      }

      count++;
      if (count % 1000000 == 0) {
        std::cout << "processed " << count << " records!" << std::endl;
      }
    }

    qf.close();

    if (keys.size() == 0) {
      std::cerr << "Reading nothing from input" << std::endl;
      return false;
    }

    return true;
  }

  template <typename U>
  bool load_from_string(const std::string &record,
                        const std::string &second_sep, std::vector<U> &data,
                        size_t *count) {
    ailego::StringHelper::Split(record, second_sep, &data, true);
    *count = data.size();

    return true;
  }

  bool load_scores_from_string(const std::string &record,
                               const std::string &second_sep,
                               std::vector<float> &data, size_t *count) {
    ailego::StringHelper::Split(record, second_sep, &data, true);
    *count = data.size();

    return true;
  }

  bool load_ids_from_string(const std::string &record,
                            const std::string &second_sep,
                            std::vector<uint64_t> &data, size_t *count) {
    ailego::StringHelper::Split(record, second_sep, &data, true);
    *count = data.size();

    return true;
  }

  bool load_tags_from_string(const std::string &record,
                             const std::string &second_sep,
                             std::vector<uint64_t> &tags, size_t *count) {
    ailego::StringHelper::Split(record, second_sep, &tags, true);
    *count = tags.size();

    // order tags
    sort(tags.begin(), tags.end());

    return true;
  }

  // overloading for binary
  bool load_from_string(const std::string &record,
                        const std::string &second_sep,
                        std::vector<uint32_t> &data, size_t *count) {
    // fetch split value from text file
    std::vector<uint8_t> vec;
    ailego::StringHelper::Split(record, second_sep, &vec, true);
    if (vec.size() == 0) {
      std::cerr << "Binary vector size is 0" << std::endl;
      return false;
    }
    if (vec.size() % 32 != 0) {
      std::cerr << "Binary vector size must be 32_X" << std::endl;
      return false;
    }
    // compact into uint32_t
    size_t sz = vec.size();
    std::vector<uint8_t> tmp;
    for (size_t i = 0; i < sz; i += 8) {
      uint8_t v = 0;
      v |= (vec[i] & 0x01) << 7;
      v |= (vec[i + 1] & 0x01) << 6;
      v |= (vec[i + 2] & 0x01) << 5;
      v |= (vec[i + 3] & 0x01) << 4;
      v |= (vec[i + 4] & 0x01) << 3;
      v |= (vec[i + 5] & 0x01) << 2;
      v |= (vec[i + 6] & 0x01) << 1;
      v |= (vec[i + 7] & 0x01) << 0;
      tmp.push_back(v);
    }
    data.resize(sz / 32);
    memcpy(&data[0], &tmp[0], tmp.size());
    *count = sz;

    return true;
  }

  // overloading for binary
  bool load_from_string(const std::string &record,
                        const std::string &second_sep,
                        std::vector<uint64_t> &data, size_t *count) {
    // fetch split value from text file
    std::vector<uint8_t> vec;
    ailego::StringHelper::Split(record, second_sep, &vec);
    if (vec.size() == 0) {
      std::cerr << "Binary vector size is 0" << std::endl;
      return false;
    }
    if (vec.size() % 64 != 0) {
      std::cerr << "Binary vector size must be 32_X" << std::endl;
      return false;
    }
    // compact into uint64_t
    size_t sz = vec.size();
    std::vector<uint8_t> tmp;
    for (size_t i = 0; i < sz; i += 8) {
      uint8_t v = 0;
      v |= (vec[i] & 0x01) << 7;
      v |= (vec[i + 1] & 0x01) << 6;
      v |= (vec[i + 2] & 0x01) << 5;
      v |= (vec[i + 3] & 0x01) << 4;
      v |= (vec[i + 4] & 0x01) << 3;
      v |= (vec[i + 5] & 0x01) << 2;
      v |= (vec[i + 6] & 0x01) << 1;
      v |= (vec[i + 7] & 0x01) << 0;
      tmp.push_back(v);
    }
    data.resize(sz / 64);
    memcpy(&data[0], &tmp[0], tmp.size());
    *count = sz;

    return true;
  }

  bool load_from_string_sparse(uint64_t key, const std::string &record,
                               const std::string &second_sep,
                               std::vector<uint32_t> &sparse_indices,
                               std::vector<T> &sparse_feature,
                               uint32_t *sparse_count) {
    const std::string sparse_sep = ":";
    std::vector<std::string> res;
    ailego::StringHelper::Split(record, sparse_sep, &res);

    if (res.size() == 2) {
      ailego::StringHelper::Split(res[0], second_sep, &sparse_indices);
      ailego::StringHelper::Split(res[1], second_sep, &sparse_feature);

      uint32_t index_count = sparse_indices.size();
      uint32_t feature_count = sparse_feature.size();

      if (feature_count == index_count) {
        *sparse_count = feature_count;
      } else {
        std::cerr << "sparse feature count (" << feature_count
                  << ") is not equal with sparse index count(" << index_count
                  << ") key : " << key << std::endl;
        *sparse_count = 0;

        return false;
      }

      // check order
      for (size_t i = 1; i < sparse_indices.size(); ++i) {
        if (sparse_indices[i - 1] >= sparse_indices[i]) {
          std::cerr << "sparse indices not ordered, key : " << key
                    << ", dim info: [" << sparse_indices[i - 1] << ", "
                    << sparse_indices[i] << "]" << std::endl;

          return false;
        }
      }
    }

    return true;
  }

  // LINE FORMAT is as follows:
  //      key:key0 key1 key2 ... keyN:score0 score1 score2 ... scoreN
  bool load_external_gt(
      const std::string &input, const std::string &first_sep,
      const std::string &second_sep,
      std::vector<std::vector<std::pair<uint64_t, float>>> &ground_truth) {
    std::fstream gf(input, std::ios::in);

    if (!gf.is_open()) {
      std::cerr << "open file failed! [" << input << "]" << std::endl;
      return false;
    }

    uint32_t count = 0;
    std::string buffer;
    while (getline(gf, buffer)) {
      buffer.erase(buffer.find_last_not_of('\n') + 1);
      if (buffer.empty()) {
        continue;
      }
      std::vector<std::string> res;
      ailego::StringHelper::Split(buffer, first_sep, &res);
      if (res.size() < 2) {
        std::cerr << "skip record : " << buffer << std::endl;
        continue;
      }

      // uint64_t main_key = std::strtoll(res[0].c_str(), NULL, 10);
      if (res.size() == 2) {
        std::vector<uint64_t> keys;
        size_t key_num = 0;
        load_ids_from_string(res[1], second_sep, keys, &key_num);

        std::vector<std::pair<uint64_t, float>> one_groud_truth;
        for (size_t i = 0; i < keys.size(); ++i) {
          one_groud_truth.push_back(std::make_pair(keys[i], 0.0f));
        }

        ground_truth.push_back(std::move(one_groud_truth));
      } else {
        std::vector<uint64_t> keys;
        size_t key_num = 0;
        load_ids_from_string(res[1], second_sep, keys, &key_num);

        std::vector<float> scores;
        size_t score_num = 0;
        load_scores_from_string(res[2], second_sep, scores, &score_num);

        if (key_num != score_num) {
          std::cerr << "key num (" << key_num << ") is not equal to ("
                    << score_num << "), line data:" << buffer << std::endl;
          continue;
        }

        std::vector<std::pair<uint64_t, float>> one_groud_truth;
        for (size_t i = 0; i < keys.size(); ++i) {
          one_groud_truth.push_back(std::make_pair(keys[i], scores[i]));
        }

        ground_truth.push_back(std::move(one_groud_truth));
      }

      count++;
      if (count % 1000000 == 0) {
        std::cout << "processed " << count << " records!" << std::endl;
      }
    }
    gf.close();
    if (ground_truth.size() == 0) {
      std::cerr << "Reading nothing from input" << std::endl;
      return false;
    }

    return true;
  }
};

}  // namespace core
}  // namespace zvec

================================================
FILE: tools/core/vecs_common.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
namespace zvec {
namespace core {

enum VecsBitMapIndex {
  BITMAP_INDEX_KEY = 0,
  BITMAP_INDEX_DENSE = 1,
  BITMAP_INDEX_SPARSE = 2,
  BITMAP_INDEX_TAGLIST = 4
};

#pragma pack(4)
struct VecsHeader {
  uint64_t num_vecs;
  uint16_t meta_size_v1;
  uint16_t version;
  uint32_t meta_size;
  uint64_t bitmap;            // set for data section
  uint64_t key_offset;        // offset for key
  uint64_t key_size;          // size for key
  uint64_t dense_offset;      // offset for dense
  uint64_t dense_size;        // size for dense
  uint64_t sparse_offset;     // offset for sparse
  uint64_t sparse_size;       // size for sparse
  uint64_t partition_offset;  // offset for partition
  uint64_t partition_size;    // size for partition
  uint64_t taglist_offset;    // offset for taglist
  uint64_t taglist_size;      // size for taglist
  uint8_t meta_buf[0];
};
#pragma pack()

}  // namespace core
}  // namespace zvec


================================================
FILE: tools/core/vecs_index_holder.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <unordered_map>
#include <zvec/ailego/container/params.h>
#include "zvec/core/framework/index_error.h"
#include "zvec/core/framework/index_holder.h"
#include "zvec/core/framework/index_provider.h"
#include "zvec/core/framework/index_storage.h"
#include "vecs_reader.h"

namespace zvec {
namespace core {

/*!
 * Vecs Index Holder
 *  framwork will use IndexHolder in this way:
 *  for (iter = create_iterator(); iter->is_valid(); iter->next()) {
 *      key = iter->key();
 *      data = iter->data();
 *  }
 */
class VecsIndexHolder : public IndexProvider {
 public:
  typedef std::shared_ptr<VecsIndexHolder> Pointer;

  bool load(const std::string &file_path) {
    if (!vecs_reader_.load(file_path)) {
      return false;
    }
    build_key_index_map();
    return true;
  }

  const IndexMeta &index_meta(void) const {
    return vecs_reader_.index_meta();
  }

  void set_metric(const std::string &name, const ailego::Params &params) {
    vecs_reader_.set_metric(name, params);
  }

  /*!
   * Index Holder Iterator
   */
  class Iterator : public IndexHybridHolder::Iterator {
   public:
    //! Constructor
    Iterator(const VecsIndexHolder &holder, uint32_t cursor)
        : cursor_(cursor),
          vecs_reader_(holder.vecs_reader_),
          stop_(holder.stop_) {}

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return !stop_ && cursor_ < vecs_reader_.num_vecs();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return vecs_reader_.get_key(cursor_);
    }

    //! Retrieve pointer of data
    virtual const void *data() const override {
      return vecs_reader_.get_vector(cursor_);
    }

    //! Retrieve primary key
    virtual uint32_t sparse_count() const override {
      return vecs_reader_.get_sparse_count(cursor_);
    }

    //! Retrieve primary key
    virtual const uint32_t *sparse_indices() const override {
      return vecs_reader_.get_sparse_indices(cursor_);
    }

    //! Retrieve primary key
    virtual const void *sparse_data() const override {
      return vecs_reader_.get_sparse_data(cursor_);
    }

    //! Next iterator
    virtual void next(void) override {
      ++cursor_;
    }

    //! Reset the iterator
    virtual void reset(void) {
      cursor_ = 0;
    }

   private:
    size_t cursor_;
    const VecsReader &vecs_reader_;
    const bool &stop_;
  };

  virtual IndexHolder::Iterator::Pointer create_iterator(void) override {
    // make sure iter has value whenn create_iterator finished
    IndexHolder::Iterator::Pointer iter(
        new VecsIndexHolder::Iterator(*this, start_cursor_));
    return iter;
  }

  virtual IndexHybridHolder::Iterator::Pointer create_hybrid_iterator(void) {
    // make sure iter has value whenn create_iterator finished
    IndexHybridHolder::Iterator::Pointer iter(
        new VecsIndexHolder::Iterator(*this, start_cursor_));
    return iter;
  }

  //! Retrieve count of elements in holder
  virtual size_t count(void) const override {
    return max_doc_count_ != 0
               ? std::min(max_doc_count_, vecs_reader_.num_vecs())
               : vecs_reader_.num_vecs();
  }

  //! Retrieve dimension
  virtual size_t dimension(void) const override {
    return vecs_reader_.index_meta().dimension();
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const override {
    return vecs_reader_.index_meta().data_type();
  }

  //! Retrieve element size in bytes
  virtual size_t element_size(void) const override {
    return vecs_reader_.index_meta().element_size();
  }

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const override {
    return true;
  }

  void stop(void) {
    stop_ = true;
  }

  uint64_t get_num_vecs() const {
    return vecs_reader_.num_vecs();
  }

  uint64_t get_key(size_t idx) const {
    return vecs_reader_.get_key(idx);
  }

  uint32_t get_sparse_count(size_t idx) const {
    return vecs_reader_.get_sparse_count(idx);
  }

  const uint32_t *get_sparse_indices(size_t idx) const {
    return vecs_reader_.get_sparse_indices(idx);
  }

  const void *get_sparse_data(size_t idx) const {
    return vecs_reader_.get_sparse_data(idx);
  }

  void set_start_cursor(uint32_t index) {
    start_cursor_ = index;
  }

  void set_max_doc_count(size_t value) {
    max_doc_count_ = value;
  }

  uint32_t start_cursor() const {
    return start_cursor_;
  }

  size_t total_sparse_count(void) const {
    return vecs_reader_.get_total_sparse_count();
  }

  bool has_taglist() const {
    return vecs_reader_.has_taglist();
  }

  uint64_t get_taglist_count(size_t index) const {
    return vecs_reader_.get_taglist_count(index);
  }

  const void *get_taglist(size_t index) const {
    return vecs_reader_.get_taglist(index);
  }

  const void *get_taglist_data(size_t &size) const {
    return vecs_reader_.get_taglist_data(size);
  }

  const void *get_key_base() const {
    return vecs_reader_.key_base();
  }

  const void *get_vector_by_index(size_t idx) const {
    return vecs_reader_.get_vector(idx);
  }

 public:  // IndexProvider interface implementation
  //! Retrieve a vector using a primary key
  const void *get_vector(const uint64_t key) const override {
    auto it = key_to_index_map_.find(key);
    if (it == key_to_index_map_.end()) {
      return nullptr;
    }
    return vecs_reader_.get_vector(it->second);
  }

  //! Retrieve a vector using a primary key
  virtual int get_vector(const uint64_t key,
                         IndexStorage::MemoryBlock &block) const override {
    const void *vector = get_vector(key);
    if (vector == nullptr) {
      return IndexError_NoExist;
    }
    block.reset((void *)vector);
    return 0;
  }

  //! Retrieve the owner class
  virtual const std::string &owner_class(void) const override {
    static std::string owner_class_name = "VecsIndexHolder";
    return owner_class_name;
  }

 private:
  //! Build key to index mapping
  void build_key_index_map() {
    key_to_index_map_.clear();
    size_t num_vecs = vecs_reader_.num_vecs();
    for (size_t i = 0; i < num_vecs; ++i) {
      uint64_t key = vecs_reader_.get_key(i);
      key_to_index_map_[key] = i;
    }
  }

  bool stop_{false};
  uint32_t start_cursor_{0};
  VecsReader vecs_reader_;
  size_t max_doc_count_{0};
  std::unordered_map<uint64_t, size_t> key_to_index_map_;
};


/*!
 * Vecs Index Sparse Holder
 *  framwork will use IndexHolder in this way:
 *  for (iter = create_iterator(); iter->is_valid(); iter->next()) {
 *      key = iter->key();
 *      data = iter->sparse_data();
 *  }
 */
class VecsIndexSparseHolder : public IndexSparseHolder {
 public:
  typedef std::shared_ptr<VecsIndexSparseHolder> Pointer;

  bool load(const std::string &file_path) {
    return vecs_reader_.load(file_path);
  }

  const IndexMeta &index_meta(void) const {
    return vecs_reader_.index_meta();
  }

  void set_metric(const std::string &name, const ailego::Params &params) {
    vecs_reader_.set_metric(name, params);
  }

  /*!
   * Index Holder Iterator
   */
  class Iterator : public IndexSparseHolder::Iterator {
   public:
    //! Constructor
    Iterator(const VecsIndexSparseHolder &holder, uint32_t cursor)
        : cursor_(cursor),
          vecs_reader_(holder.vecs_reader_),
          stop_(holder.stop_) {}

    //! Test if the iterator is valid
    virtual bool is_valid(void) const override {
      return !stop_ && cursor_ < vecs_reader_.num_vecs();
    }

    //! Retrieve primary key
    virtual uint64_t key(void) const override {
      return vecs_reader_.get_key(cursor_);
    }

    //! Retrieve primary key
    virtual uint32_t sparse_count() const override {
      return vecs_reader_.get_sparse_count(cursor_);
    }

    //! Retrieve primary key
    virtual const uint32_t *sparse_indices() const override {
      return vecs_reader_.get_sparse_indices(cursor_);
    }

    //! Retrieve primary key
    virtual const void *sparse_data() const override {
      return vecs_reader_.get_sparse_data(cursor_);
    }

    //! Next iterator
    virtual void next(void) override {
      ++cursor_;
    }

    //! Reset the iterator
    virtual void reset(void) {
      cursor_ = 0;
    }

   private:
    size_t cursor_;
    const SparseVecsReader &vecs_reader_;
    const bool &stop_;
  };

  virtual IndexSparseHolder::Iterator::Pointer create_iterator(void) override {
    // make sure iter has value whenn create_iterator finished
    IndexSparseHolder::Iterator::Pointer iter(
        new VecsIndexSparseHolder::Iterator(*this, start_cursor_));
    return iter;
  }

  //! Retrieve count of elements in holder
  virtual size_t count(void) const override {
    return max_doc_count_ != 0
               ? std::min(max_doc_count_, vecs_reader_.num_vecs())
               : vecs_reader_.num_vecs();
  }

  //! Retrieve type information
  virtual IndexMeta::DataType data_type(void) const override {
    return vecs_reader_.index_meta().data_type();
  }

  //! Retrieve if it can multi-pass
  virtual bool multipass(void) const override {
    return true;
  }

  void stop(void) {
    stop_ = true;
  }

  uint64_t get_key(size_t idx) const {
    return vecs_reader_.get_key(idx);
  }

  uint32_t get_sparse_count(size_t idx) const {
    return vecs_reader_.get_sparse_count(idx);
  }

  const uint32_t *get_sparse_indices(size_t idx) const {
    return vecs_reader_.get_sparse_indices(idx);
  }

  const void *get_sparse_data(size_t idx) const {
    return vecs_reader_.get_sparse_data(idx);
  }

  void set_start_cursor(uint32_t index) {
    start_cursor_ = index;
  }

  void set_max_doc_count(size_t value) {
    max_doc_count_ = value;
  }

  uint64_t get_num_vecs() const {
    return vecs_reader_.num_vecs();
  }

  uint32_t start_cursor() const {
    return start_cursor_;
  }

  size_t total_sparse_count(void) const override {
    return vecs_reader_.get_total_sparse_count();
  }

  bool has_taglist() const {
    return vecs_reader_.has_taglist();
  }

  uint64_t get_taglist_count(size_t index) const {
    return vecs_reader_.get_taglist_count(index);
  }

  const void *get_taglist(size_t index) const {
    return vecs_reader_.get_taglist(index);
  }

  const void *get_taglist_data(size_t &size) const {
    return vecs_reader_.get_taglist_data(size);
  }

  const void *get_key_base() const {
    return vecs_reader_.key_base();
  }

 private:
  bool stop_{false};
  uint32_t start_cursor_{0};
  SparseVecsReader vecs_reader_;
  size_t max_doc_count_{0};
};

}  // namespace core
}  // namespace zvec

================================================
FILE: tools/core/vecs_reader.h
================================================
// Copyright 2025-present the zvec project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <iostream>
#include <zvec/ailego/io/mmap_file.h>
#include "zvec/core/framework/index_meta.h"
#include "vecs_common.h"

namespace zvec {
namespace core {

class VecsReader {
 public:
  VecsReader()
      : mmap_file_(),
        index_meta_(),
        num_vecs_(0),
        vector_base_(nullptr),
        key_base_(nullptr),
        sparse_base_meta_{nullptr},
        sparse_base_data_{nullptr},
        partition_base_{nullptr},
        taglist_base_meta_{nullptr},
        taglist_base_data_{nullptr},
        taglist_size_{0} {}

  void set_metric(const std::string &name, const ailego::Params &params) {
    index_meta_.set_metric(name, 0, params);
  }

  bool load(const std::string &fname) {
    return load(fname.c_str());
  }

  bool load(const char *fname) {
    if (!fname) {
      std::cerr << "Load fname is nullptr" << std::endl;
      return false;
    }
    if (!mmap_file_.open(fname, true)) {
      std::cerr << "Open file error: " << fname << std::endl;
      return false;
    }

    return load();
  }

  bool load() {
    const VecsHeader *header =
        reinterpret_cast<const VecsHeader *>(mmap_file_.region());
    // check
    num_vecs_ = header->num_vecs;

    // deserialize
    bool bret = index_meta_.deserialize(&header->meta_buf, header->meta_size);
    if (!bret) {
      std::cerr << "deserialize index meta error." << std::endl;
      return false;
    }

    const char *data_base_ptr =
        reinterpret_cast<const char *>(header + 1) + header->meta_size;

    vector_base_ = reinterpret_cast<const char *>(data_base_ptr);
    key_base_ = reinterpret_cast<const uint64_t *>(
        vector_base_ + num_vecs_ * index_meta_.element_size());

    if (header->sparse_offset != -1LLU) {
      sparse_base_meta_ = data_base_ptr + header->sparse_offset;
      sparse_base_data_ = sparse_base_meta_ + num_vecs_ * sizeof(uint64_t);
    }

    if (header->partition_offset != -1LLU) {
      partition_base_ = reinterpret_cast<const uint32_t *>(
          data_base_ptr + header->partition_offset);
    }

    if (header->taglist_offset != -1LLU) {
      taglist_base_meta_ = data_base_ptr + header->taglist_offset;
      taglist_base_data_ = taglist_base_meta_ + num_vecs_;
      taglist_size_ = header->taglist_size;
    }

    return true;
  }

  size_t num_vecs() const {
    return num_vecs_;
  }

  const void *vector_base() const {
    return vector_base_;
  }

  const uint64_t *key_base() const {
    return key_base_;
  }

  const IndexMeta &index_meta() const {
    return index_meta_;
  }

  uint64_t get_key(size_t index) const {
    return key_base_[index];
  }

  const void *get_vector(size_t index) const {
    return vector_base_ + index * index_meta_.element_size();
  }

  uint32_t get_sparse_count(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));

    return sparse_count;

    return 0;
  }

  const uint32_t *get_sparse_indices(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t *sparse_indices =
        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t));

    return sparse_indices;

    return nullptr;
  }

  const void *get_sparse_data(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));
    void *sparse_data =
        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t) +
                     sparse_count * sizeof(uint32_t));

    return sparse_data;
  }

  size_t get_total_sparse_count(void) const {
    size_t total_sparse_count = 0;
    for (size_t i = 0; i < num_vecs_; ++i) {
      total_sparse_count += get_sparse_count(i);
    }

    return total_sparse_count;
  }

  bool has_taglist(void) const {
    return taglist_base_meta_ != nullptr;
  }

  uint64_t get_taglist_count(size_t index) const {
    if (!taglist_base_data_ || !taglist_base_meta_) {
      return 0;
    }

    uint64_t taglist_count = *reinterpret_cast<const uint64_t *>(
        taglist_base_data_ + taglist_base_meta_[index]);
    return taglist_count;
  }

  const uint64_t *get_taglist(size_t index) const {
    if (!taglist_base_data_ || !taglist_base_meta_) {
      return nullptr;
    }

    return reinterpret_cast<const uint64_t *>(taglist_base_data_ +
                                              taglist_base_meta_[index]) +
           1;
  }

  const void *get_taglist_data(size_t &size) const {
    size = taglist_size_;

    return taglist_base_meta_;
  }

 private:
  ailego::MMapFile mmap_file_;
  IndexMeta index_meta_;
  size_t num_vecs_;
  const char *vector_base_;
  const uint64_t *key_base_;
  const char *sparse_base_meta_;
  const char *sparse_base_data_;
  const uint32_t *partition_base_;
  const char *taglist_base_meta_;
  const char *taglist_base_data_;
  uint64_t taglist_size_;
};

class SparseVecsReader {
 public:
  SparseVecsReader()
      : mmap_file_(),
        index_meta_(),
        num_vecs_(0),
        key_base_(nullptr),
        sparse_base_meta_(nullptr),
        sparse_base_data_{nullptr},
        partition_base_{nullptr},
        taglist_base_meta_{nullptr},
        taglist_base_data_{nullptr},
        taglist_size_{0} {}

  void set_metric(const std::string &name, const ailego::Params &params) {
    index_meta_.set_metric(name, 0, params);
  }

  bool load(const std::string &fname) {
    return load(fname.c_str());
  }


  bool load(const char *fname) {
    if (!fname) {
      std::cerr << "Load fname is nullptr" << std::endl;
      return false;
    }
    if (!mmap_file_.open(fname, true)) {
      std::cerr << "Open file error: " << fname << std::endl;
      return false;
    }

    return load();
  }

  bool load() {
    const VecsHeader *header =
        reinterpret_cast<const VecsHeader *>(mmap_file_.region());

    // check
    num_vecs_ = header->num_vecs;

    // deserialize
    bool bret = index_meta_.deserialize(&header->meta_buf, header->meta_size);
    if (!bret) {
      std::cerr << "deserialize index meta error." << std::endl;
      return false;
    }

    const char *data_base_ptr =
        reinterpret_cast<const char *>(header + 1) + header->meta_size;

    key_base_ = reinterpret_cast<const uint64_t *>(
        reinterpret_cast<const char *>(header + 1) + header->meta_size);
    sparse_base_meta_ = reinterpret_cast<const char *>(key_base_ + num_vecs_);
    sparse_base_data_ = reinterpret_cast<const char *>(
        sparse_base_meta_ + num_vecs_ * sizeof(uint64_t));

    if (header->partition_offset != -1LLU) {
      partition_base_ = reinterpret_cast<const uint32_t *>(
          data_base_ptr + header->partition_offset);
    }

    if (header->taglist_offset != -1LLU) {
      taglist_base_meta_ = data_base_ptr + header->taglist_offset;
      taglist_base_data_ = taglist_base_meta_ + num_vecs_;
      taglist_size_ = header->taglist_size;
    }

    return true;
  }

  size_t num_vecs() const {
    return num_vecs_;
  }

  const void *sparse_meta_base() const {
    return sparse_base_meta_;
  }

  const uint64_t *key_base() const {
    return key_base_;
  }

  const IndexMeta &index_meta() const {
    return index_meta_;
  }

  uint64_t get_key(size_t index) const {
    return key_base_[index];
  }

  uint32_t get_sparse_count(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));

    return sparse_count;

    return 0;
  }

  const uint32_t *get_sparse_indices(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t *sparse_indices =
        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t));

    return sparse_indices;

    return nullptr;
  }

  const void *get_sparse_data(size_t index) const {
    auto sparse_data_meta = sparse_base_meta_ + index * sizeof(uint64_t);
    uint64_t sparse_offset = *((uint64_t *)sparse_data_meta);
    uint32_t sparse_count = *((uint32_t *)(sparse_base_data_ + sparse_offset));
    void *sparse_data =
        (uint32_t *)(sparse_base_data_ + sparse_offset + sizeof(uint32_t) +
                     sparse_count * sizeof(uint32_t));

    return sparse_data;
  }

  size_t get_total_sparse_count(void) const {
    size_t total_sparse_count = 0;
    for (size_t i = 0; i < num_vecs_; ++i) {
      total_sparse_count += get_sparse_count(i);
    }

    return total_sparse_count;
  }

  bool has_taglist(void) const {
    return taglist_base_meta_ != nullptr;
  }

  uint64_t get_taglist_count(size_t index) const {
    uint64_t taglist_count = *reinterpret_cast<const uint64_t *>(
        taglist_base_data_ + taglist_base_meta_[index]);
    return taglist_count;
  }

  const uint64_t *get_taglist(size_t index) const {
    return reinterpret_cast<const uint64_t *>(taglist_base_data_ +
                                              taglist_base_meta_[index]) +
           1;
  }

  const void *get_taglist_data(size_t &size) const {
    size = taglist_size_;
    return taglist_base_meta_;
  }

 private:
  ailego::MMapFile mmap_file_;
  IndexMeta index_meta_;
  size_t num_vecs_;
  const uint64_t *key_base_;
  const char *sparse_base_meta_;
  const char *sparse_base_data_;
  const uint32_t *partition_base_;
  const char *taglist_base_meta_;
  const char *taglist_base_data_;
  uint64_t taglist_size_;
};

}  // namespace core
}  // namespace zvec